diff --git a/.env.example b/.env.example
index 137c079..d6636bc 100644
--- a/.env.example
+++ b/.env.example
@@ -18,11 +18,15 @@
 # Note: CHECK_KEYS is disabled by default in local development mode, but enabled
 # by default in production mode.
 
-# Optional settings for user management and access control. See
-# `docs/user-management.md` to learn how to use these.
+# Optional settings for user management, access control, and quota enforcement.
+# See `docs/user-management.md` to learn how to use these.
 # GATEKEEPER=none
 # GATEKEEPER_STORE=memory
 # MAX_IPS_PER_USER=20
+# TOKEN_QUOTA_TURBO=0
+# TOKEN_QUOTA_GPT4=0
+# TOKEN_QUOTA_CLAUDE=0
+# QUOTA_REFRESH_PERIOD=hourly
 
 # Optional settings for prompt logging. See docs/logging-sheets.md.
 # PROMPT_LOGGING=false
diff --git a/docs/huggingface-createspace.png b/docs/assets/huggingface-createspace.png
similarity index 100%
rename from docs/huggingface-createspace.png
rename to docs/assets/huggingface-createspace.png
diff --git a/docs/huggingface-dockerfile.png b/docs/assets/huggingface-dockerfile.png
similarity index 100%
rename from docs/huggingface-dockerfile.png
rename to docs/assets/huggingface-dockerfile.png
diff --git a/docs/huggingface-savedockerfile.png b/docs/assets/huggingface-savedockerfile.png
similarity index 100%
rename from docs/huggingface-savedockerfile.png
rename to docs/assets/huggingface-savedockerfile.png
diff --git a/docs/openapi-admin-users.yaml b/docs/assets/openapi-admin-users.yaml
similarity index 100%
rename from docs/openapi-admin-users.yaml
rename to docs/assets/openapi-admin-users.yaml
diff --git a/docs/deploy-huggingface.md b/docs/deploy-huggingface.md
index 9060399..d25509a 100644
--- a/docs/deploy-huggingface.md
+++ b/docs/deploy-huggingface.md
@@ -12,12 +12,12 @@ This repository can be deployed to a [Huggingface Space](https://huggingface.co/
 - Provide a name for your Space and select "Docker" as the SDK.  Select "Blank" for the template.
 - Click "Create Space" and wait for the Space to be created.
 
-![Create Space](huggingface-createspace.png)
+![Create Space](assets/huggingface-createspace.png)
 
 ### 3. Create an empty Dockerfile
 - Once your Space is created, you'll see an option to "Create the Dockerfile in your browser".  Click that link.
 
-![Create Dockerfile](huggingface-dockerfile.png)
+![Create Dockerfile](assets/huggingface-dockerfile.png)
 - Paste the following into the text editor and click "Save".
 ```dockerfile
 FROM node:18-bullseye-slim
@@ -34,7 +34,7 @@ CMD [ "npm", "start" ]
 ```
 - Click "Commit new file to `main`" to save the Dockerfile.
 
-![Commit](huggingface-savedockerfile.png)
+![Commit](assets/huggingface-savedockerfile.png)
 
 ### 4. Set your API key as a secret
 - Click the Settings button in the top right corner of your repository.
@@ -82,8 +82,6 @@ MAX_OUTPUT_TOKENS_ANTHROPIC=512
 # Block prompts containing disallowed characters
 REJECT_DISALLOWED=false
 REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy."
-# Show exact quota usage on the Server Info page
-QUOTA_DISPLAY_MODE=full
 ```
 
 See `.env.example` for a full list of available settings, or check `config.ts` for details on what each setting does.
diff --git a/docs/user-management.md b/docs/user-management.md
index 5be7fe5..adeebf6 100644
--- a/docs/user-management.md
+++ b/docs/user-management.md
@@ -11,6 +11,7 @@ Several of these features require you to set secrets in your environment.  If us
   - [Memory](#memory)
   - [Firebase Realtime Database](#firebase-realtime-database)
     - [Firebase setup instructions](#firebase-setup-instructions)
+- [User quotas](#user-quotas)
 
 ## No user management (`GATEKEEPER=none`)
 
@@ -59,3 +60,5 @@ To use Firebase Realtime Database to persist user data, set the following enviro
 8. Set `GATEKEEPER_STORE` to `firebase_rtdb` in your environment if you haven't already.
 
 The proxy server will attempt to connect to your Firebase Realtime Database at startup and will throw an error if it cannot connect.  If you see this error, check that your `FIREBASE_RTDB_URL` and `FIREBASE_KEY` secrets are set correctly.
+
+# User quotas
diff --git a/docs/user-quotas.md b/docs/user-quotas.md
new file mode 100644
index 0000000..01b2702
--- /dev/null
+++ b/docs/user-quotas.md
@@ -0,0 +1,36 @@
+# User Quotas
+
+When using `user_token` authentication, you can set (model) token quotas for user.  These quotas are enforced by the proxy server and are separate from the quotas enforced by OpenAI.
+
+You can set the default quota via environment variables. Quotas are enforced on a per-model basis, and count both prompt tokens and completion tokens. By default, all quotas are disabled.
+
+Set the following environment variables to set the default quotas:
+- `TOKEN_QUOTA_TURBO`
+- `TOKEN_QUOTA_GPT4`
+- `TOKEN_QUOTA_CLAUDE`
+
+Quotas only apply to `normal`-type users; `special`-type users are exempt from quotas. You can change users' types via the REST API.
+
+**Note that changes to these environment variables will only apply to newly created users.**  To modify existing users' quotas, use the REST API or the admin UI.
+
+## Automatically refreshing quotas
+
+You can use the `QUOTA_REFRESH_PERIOD` environment variable to automatically refresh users' quotas periodically.  This is useful if you want to give users a certain number of tokens per day, for example. The entire quota will be refreshed at the start of the specified period, and any tokens a user has not used will not be carried over.
+
+Quotas for all models and users will be refreshed. If you haven't set `TOKEN_QUOTA_*` for a particular model, quotas for that model will not be refreshed (so any manually set quotas will not be overwritten).
+
+Set the `QUOTA_REFRESH_PERIOD` environment variable to one of the following values:
+- `daily` (at midnight)
+- `hourly`
+- leave unset to disable automatic refreshing
+
+You can also use a cron expression, for example:
+- Every 45 seconds: `"*/45 * * * * *"`
+- Every 30 minutes: `"*/30 * * * *"`
+- Every 6 hours: `"0 */6 * * *"`
+- Every 3 days: `"0 0 */3 * *"`
+- Daily, but at mid-day: `"0 12 * * *"`
+
+Make sure to enclose the cron expression in quotation marks.
+
+All times are in the server's local time zone. Refer to [crontab.guru](https://crontab.guru/) for more examples.
diff --git a/package-lock.json b/package-lock.json
index 25683dd..f6eb51d 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -22,6 +22,7 @@
         "googleapis": "^122.0.0",
         "http-proxy-middleware": "^3.0.0-beta.1",
         "multer": "^1.4.5-lts.1",
+        "node-schedule": "^2.1.1",
         "pino": "^8.11.0",
         "pino-http": "^8.3.3",
         "showdown": "^2.1.0",
@@ -35,6 +36,7 @@
         "@types/cors": "^2.8.13",
         "@types/express": "^4.17.17",
         "@types/multer": "^1.4.7",
+        "@types/node-schedule": "^2.1.0",
         "@types/showdown": "^2.0.0",
         "@types/uuid": "^9.0.1",
         "concurrently": "^8.0.1",
@@ -911,6 +913,15 @@
       "resolved": "https://registry.npmjs.org/@types/node/-/node-18.15.11.tgz",
       "integrity": "sha512-E5Kwq2n4SbMzQOn6wnmBjuK9ouqlURrcZDVfbo9ftDDTFt3nk7ZKK4GMOzoYgnpQJKcxwQw+lGaBvvlMo0qN/Q=="
     },
+    "node_modules/@types/node-schedule": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/@types/node-schedule/-/node-schedule-2.1.0.tgz",
+      "integrity": "sha512-NiTwl8YN3v/1YCKrDFSmCTkVxFDylueEqsOFdgF+vPsm+AlyJKGAo5yzX1FiOxPsZiN6/r8gJitYx2EaSuBmmg==",
+      "dev": true,
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
     "node_modules/@types/qs": {
       "version": "6.9.7",
       "resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.7.tgz",
@@ -1677,6 +1688,17 @@
       "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==",
       "dev": true
     },
+    "node_modules/cron-parser": {
+      "version": "4.9.0",
+      "resolved": "https://registry.npmjs.org/cron-parser/-/cron-parser-4.9.0.tgz",
+      "integrity": "sha512-p0SaNjrHOnQeR8/VnfGbmg9te2kfyYSQ7Sc/j/6DtPL3JQvKxmjO9TSjNFpujqV3vEYYBvNNvXSxzyksBWAx1Q==",
+      "dependencies": {
+        "luxon": "^3.2.1"
+      },
+      "engines": {
+        "node": ">=12.0.0"
+      }
+    },
     "node_modules/csrf-csrf": {
       "version": "2.3.0",
       "resolved": "https://registry.npmjs.org/csrf-csrf/-/csrf-csrf-2.3.0.tgz",
@@ -3075,6 +3097,11 @@
       "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==",
       "optional": true
     },
+    "node_modules/long-timeout": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/long-timeout/-/long-timeout-0.1.1.tgz",
+      "integrity": "sha512-BFRuQUqc7x2NWxfJBCyUrN8iYUYznzL9JROmRz1gZ6KlOIgmoD+njPVbb+VNn2nGMKggMsK79iUNErillsrx7w=="
+    },
     "node_modules/lru-cache": {
       "version": "6.0.0",
       "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
@@ -3109,6 +3136,14 @@
       "resolved": "https://registry.npmjs.org/yallist/-/yallist-2.1.2.tgz",
       "integrity": "sha512-ncTzHV7NvsQZkYe1DW7cbDLm0YpzHmZF5r/iyP3ZnQtMiJ+pjzisCiMNI+Sj+xQF5pXhSHxSB3uDbsBTzY/c2A=="
     },
+    "node_modules/luxon": {
+      "version": "3.4.2",
+      "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.4.2.tgz",
+      "integrity": "sha512-uBoAVCVcajsrqy3pv7eo5jEUz1oeLmCcnMv8n4AJpT5hbpN9lUssAXibNElpbLce3Mhm9dyBzwYLs9zctM/0tA==",
+      "engines": {
+        "node": ">=12"
+      }
+    },
     "node_modules/make-error": {
       "version": "1.3.6",
       "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz",
@@ -3320,6 +3355,19 @@
         "node": ">= 6.13.0"
       }
     },
+    "node_modules/node-schedule": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/node-schedule/-/node-schedule-2.1.1.tgz",
+      "integrity": "sha512-OXdegQq03OmXEjt2hZP33W2YPs/E5BcFQks46+G2gAxs4gHOIVD1u7EqlYLYSKsaIpyKCK9Gbk0ta1/gjRSMRQ==",
+      "dependencies": {
+        "cron-parser": "^4.2.0",
+        "long-timeout": "0.1.1",
+        "sorted-array-functions": "^1.3.0"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
     "node_modules/nodemon": {
       "version": "3.0.1",
       "resolved": "https://registry.npmjs.org/nodemon/-/nodemon-3.0.1.tgz",
@@ -4011,6 +4059,11 @@
         "atomic-sleep": "^1.0.0"
       }
     },
+    "node_modules/sorted-array-functions": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/sorted-array-functions/-/sorted-array-functions-1.3.0.tgz",
+      "integrity": "sha512-2sqgzeFlid6N4Z2fUQ1cvFmTOLRi/sEDzSQ0OKYchqgoPmQBVyM3959qYx3fpS6Esef80KjmpgPeEr028dP3OA=="
+    },
     "node_modules/source-map": {
       "version": "0.6.1",
       "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
diff --git a/package.json b/package.json
index f11d8f7..6b4d18b 100644
--- a/package.json
+++ b/package.json
@@ -29,6 +29,7 @@
     "googleapis": "^122.0.0",
     "http-proxy-middleware": "^3.0.0-beta.1",
     "multer": "^1.4.5-lts.1",
+    "node-schedule": "^2.1.1",
     "pino": "^8.11.0",
     "pino-http": "^8.3.3",
     "showdown": "^2.1.0",
@@ -42,6 +43,7 @@
     "@types/cors": "^2.8.13",
     "@types/express": "^4.17.17",
     "@types/multer": "^1.4.7",
+    "@types/node-schedule": "^2.1.0",
     "@types/showdown": "^2.0.0",
     "@types/uuid": "^9.0.1",
     "concurrently": "^8.0.1",
diff --git a/src/admin/common.ts b/src/admin/common.ts
index 36a273f..6f72a17 100644
--- a/src/admin/common.ts
+++ b/src/admin/common.ts
@@ -1,5 +1,7 @@
 import { z } from "zod";
+import { RequestHandler } from "express";
 import { Query } from "express-serve-static-core";
+import { config } from "../config";
 
 export function parseSort(sort: Query["sort"]) {
   if (!sort) return null;
@@ -45,7 +47,15 @@ export const UserSchema = z
     ip: z.array(z.string()).optional(),
     type: z.enum(["normal", "special"]).optional(),
     promptCount: z.number().optional(),
-    tokenCount: z.number().optional(),
+    tokenCount: z.any().optional(), // never used, but remains for compatibility
+    tokenCounts: z
+      .object({ turbo: z.number(), gpt4: z.number(), claude: z.number() })
+      .strict()
+      .optional(),
+    tokenLimits: z
+      .object({ turbo: z.number(), gpt4: z.number(), claude: z.number() })
+      .strict()
+      .optional(),
     createdAt: z.number().optional(),
     lastUsedAt: z.number().optional(),
     disabledAt: z.number().optional(),
@@ -56,3 +66,13 @@ export const UserSchema = z
 export const UserSchemaWithToken = UserSchema.extend({
   token: z.string(),
 }).strict();
+
+export const injectLocals: RequestHandler = (_req, res, next) => {
+  const quota = config.tokenQuota;
+  res.locals.quotasEnabled =
+    quota.turbo > 0 || quota.gpt4 > 0 || quota.claude > 0;
+
+  res.locals.persistenceEnabled = config.gatekeeperStore !== "memory";
+
+  next();
+};
diff --git a/src/admin/routes.ts b/src/admin/routes.ts
index e441ee0..a08b242 100644
--- a/src/admin/routes.ts
+++ b/src/admin/routes.ts
@@ -1,10 +1,11 @@
 import express, { Router } from "express";
 import cookieParser from "cookie-parser";
 import { authorize } from "./auth";
+import { injectLocals } from "./common";
 import { injectCsrfToken, checkCsrfToken } from "./csrf";
+import { loginRouter } from "./login";
 import { usersApiRouter as apiRouter } from "./api/users";
 import { usersUiRouter as uiRouter } from "./ui/users";
-import { loginRouter } from "./login";
 
 const adminRouter = Router();
 
@@ -18,6 +19,7 @@ adminRouter.use(injectCsrfToken);
 adminRouter.use("/users", authorize({ via: "header" }), apiRouter);
 
 adminRouter.use(checkCsrfToken); // All UI routes require CSRF token
+adminRouter.use(injectLocals);
 adminRouter.use("/", loginRouter);
 adminRouter.use("/manage", authorize({ via: "cookie" }), uiRouter);
 
diff --git a/src/admin/ui/users.ts b/src/admin/ui/users.ts
index 53529a0..a15f7e0 100644
--- a/src/admin/ui/users.ts
+++ b/src/admin/ui/users.ts
@@ -53,7 +53,13 @@ router.get("/list-users", (req, res) => {
   const requestedPageSize =
     Number(req.query.perPage) || Number(req.cookies.perPage) || 20;
   const perPage = Math.max(1, Math.min(1000, requestedPageSize));
-  const users = userStore.getUsers().sort(sortBy(sort, false));
+  const users = userStore
+    .getUsers()
+    .map((user) => {
+      const sum = Object.values(user.tokenCounts).reduce((a, b) => a + b, 0); // TODO: cache
+      return { ...user, sumTokenCounts: sum };
+    })
+    .sort(sortBy(sort, false));
 
   const page = Number(req.query.page) || 1;
   const { items, ...pagination } = paginate(users, page, perPage);
@@ -95,9 +101,7 @@ router.get("/export-users.json", (_req, res) => {
 });
 
 router.get("/", (_req, res) => {
-  res.render("admin/index", {
-    isPersistenceEnabled: config.gatekeeperStore !== "memory",
-  });
+  res.render("admin/index");
 });
 
 router.post("/edit-user/:token", (req, res) => {
@@ -129,7 +133,23 @@ router.post("/disable-user/:token", (req, res) => {
   }
   userStore.disableUser(req.params.token, req.body.reason);
   return res.sendStatus(204);
-}); 
-  
+});
+
+router.post("/refresh-user-quota", (req, res) => {
+  const user = userStore.getUser(req.body.token);
+  if (!user) {
+    return res.status(404).send("User not found");
+  }
+  userStore.refreshQuota(req.body.token);
+  return res.redirect(`/admin/manage/view-user/${req.body.token}`);
+});
+
+router.post("/refresh-all-quotas", (_req, res) => {
+  const users = userStore.getUsers();
+
+  users.forEach((user) => userStore.refreshQuota(user.token));
+
+  return res.send(`Refreshed ${users.length} quotas`);
+});
 
 export { router as usersUiRouter };
diff --git a/src/config.ts b/src/config.ts
index 305f2ca..55ed519 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -117,6 +117,30 @@ type Config = {
    * prevent excessive spend.  Applies only to OpenAI.
    */
   turboOnly?: boolean;
+  /**
+   * The number of (LLM) tokens a user can consume before requests are rejected.
+   * Limits include both prompt and response tokens. `special` users are exempt.
+   * Defaults to 0, which means no limit.
+   *
+   * Note: Changes are not automatically applied to existing users. Use the
+   * admin API or UI to update existing users, or use the QUOTA_REFRESH_PERIOD
+   * setting to periodically set all users' quotas to these values.
+   */
+  tokenQuota: {
+    /** Token allowance for GPT-3.5 Turbo models. */
+    turbo: number;
+    /** Token allowance for GPT-4 models. */
+    gpt4: number;
+    /** Token allowance for Claude models. */
+    claude: number;
+  };
+  /**
+   * The period over which to enforce token quotas. Quotas will be fully reset
+   * at the start of each period, server time. Unused quota does not roll over.
+   * You can also provide a cron expression for a custom schedule.
+   * Defaults to no automatic quota refresh.
+   */
+  quotaRefreshPeriod?: "hourly" | "daily" | string;
 };
 
 // To change configs, create a file called .env in the root directory.
@@ -138,9 +162,12 @@ export const config: Config = {
     "MAX_CONTEXT_TOKENS_ANTHROPIC",
     0
   ),
-  maxOutputTokensOpenAI: getEnvWithDefault("MAX_OUTPUT_TOKENS_OPENAI", 300),
+  maxOutputTokensOpenAI: getEnvWithDefault(
+    ["MAX_OUTPUT_TOKENS_OPENAI", "MAX_OUTPUT_TOKENS"],
+    300
+  ),
   maxOutputTokensAnthropic: getEnvWithDefault(
-    "MAX_OUTPUT_TOKENS_ANTHROPIC",
+    ["MAX_OUTPUT_TOKENS_ANTHROPIC", "MAX_OUTPUT_TOKENS"],
     400
   ),
   rejectDisallowed: getEnvWithDefault("REJECT_DISALLOWED", false),
@@ -164,66 +191,39 @@ export const config: Config = {
   ),
   blockRedirect: getEnvWithDefault("BLOCK_REDIRECT", "https://www.9gag.com"),
   turboOnly: getEnvWithDefault("TURBO_ONLY", false),
+  tokenQuota: {
+    turbo: getEnvWithDefault("TOKEN_QUOTA_TURBO", 0),
+    gpt4: getEnvWithDefault("TOKEN_QUOTA_GPT4", 0),
+    claude: getEnvWithDefault("TOKEN_QUOTA_CLAUDE", 0),
+  },
+  quotaRefreshPeriod: getEnvWithDefault("QUOTA_REFRESH_PERIOD", undefined),
 } as const;
 
-function migrateConfigs() {
-  let migrated = false;
-  const deprecatedMax = process.env.MAX_OUTPUT_TOKENS;
-
-  if (!process.env.MAX_OUTPUT_TOKENS_OPENAI && deprecatedMax) {
-    migrated = true;
-    config.maxOutputTokensOpenAI = parseInt(deprecatedMax);
-  }
-  if (!process.env.MAX_OUTPUT_TOKENS_ANTHROPIC && deprecatedMax) {
-    migrated = true;
-    config.maxOutputTokensAnthropic = parseInt(deprecatedMax);
-  }
-
-  if (migrated) {
-    startupLogger.warn(
-      {
-        MAX_OUTPUT_TOKENS: deprecatedMax,
-        MAX_OUTPUT_TOKENS_OPENAI: config.maxOutputTokensOpenAI,
-        MAX_OUTPUT_TOKENS_ANTHROPIC: config.maxOutputTokensAnthropic,
-      },
-      "`MAX_OUTPUT_TOKENS` has been replaced with separate `MAX_OUTPUT_TOKENS_OPENAI` and `MAX_OUTPUT_TOKENS_ANTHROPIC` configs. You should update your .env file to remove `MAX_OUTPUT_TOKENS` and set the new configs."
-    );
-  }
-}
-
-/** Prevents the server from starting if config state is invalid. */
 export async function assertConfigIsValid() {
-  migrateConfigs();
-
-  // Ensure gatekeeper mode is valid.
   if (!["none", "proxy_key", "user_token"].includes(config.gatekeeper)) {
     throw new Error(
       `Invalid gatekeeper mode: ${config.gatekeeper}. Must be one of: none, proxy_key, user_token.`
     );
   }
 
-  // Don't allow `user_token` mode without `ADMIN_KEY`.
   if (config.gatekeeper === "user_token" && !config.adminKey) {
     throw new Error(
       "`user_token` gatekeeper mode requires an `ADMIN_KEY` to be set."
     );
   }
 
-  // Don't allow `proxy_key` mode without `PROXY_KEY`.
   if (config.gatekeeper === "proxy_key" && !config.proxyKey) {
     throw new Error(
       "`proxy_key` gatekeeper mode requires a `PROXY_KEY` to be set."
     );
   }
 
-  // Don't allow `PROXY_KEY` to be set for other modes.
   if (config.gatekeeper !== "proxy_key" && config.proxyKey) {
     throw new Error(
       "`PROXY_KEY` is set, but gatekeeper mode is not `proxy_key`. Make sure to set `GATEKEEPER=proxy_key`."
     );
   }
 
-  // Require appropriate firebase config if using firebase store.
   if (
     config.gatekeeperStore === "firebase_rtdb" &&
     (!config.firebaseKey || !config.firebaseRtdbUrl)
@@ -279,10 +279,10 @@ export const OMITTED_KEYS: (keyof Config)[] = [
 
 const getKeys = Object.keys as <T extends object>(obj: T) => Array<keyof T>;
 
-export function listConfig(): Record<string, string> {
-  const result: Record<string, string> = {};
-  for (const key of getKeys(config)) {
-    const value = config[key]?.toString() || "";
+export function listConfig(obj: Config = config): Record<string, any> {
+  const result: Record<string, any> = {};
+  for (const key of getKeys(obj)) {
+    const value = obj[key]?.toString() || "";
 
     const shouldOmit =
       OMITTED_KEYS.includes(key) || value === "" || value === "undefined";
@@ -297,17 +297,27 @@ export function listConfig(): Record<string, string> {
     } else {
       result[key] = value;
     }
+
+    if (typeof obj[key] === "object") {
+      result[key] = listConfig(obj[key] as unknown as Config);
+    }
   }
   return result;
 }
 
-function getEnvWithDefault<T>(name: string, defaultValue: T): T {
-  const value = process.env[name];
+/**
+ * Tries to get a config value from one or more environment variables (in
+ * order), falling back to a default value if none are set.
+ */
+function getEnvWithDefault<T>(env: string | string[], defaultValue: T): T {
+  const value = Array.isArray(env)
+    ? env.map((e) => process.env[e]).find((v) => v !== undefined)
+    : process.env[env];
   if (value === undefined) {
     return defaultValue;
   }
   try {
-    if (name === "OPENAI_KEY" || name === "ANTHROPIC_KEY") {
+    if (env === "OPENAI_KEY" || env === "ANTHROPIC_KEY") {
       return value as unknown as T;
     }
     return JSON.parse(value) as T;
diff --git a/src/info-page.ts b/src/info-page.ts
index ce8e922..f3b86e6 100644
--- a/src/info-page.ts
+++ b/src/info-page.ts
@@ -36,7 +36,7 @@ function cacheInfoPageHtml(baseUrl: string) {
   const anthropicKeys = keys.filter((k) => k.service === "anthropic").length;
 
   const info = {
-    uptime: process.uptime(),
+    uptime: Math.floor(process.uptime()),
     endpoints: {
       ...(openaiKeys ? { openai: baseUrl + "/proxy/openai" } : {}),
       ...(anthropicKeys ? { anthropic: baseUrl + "/proxy/anthropic" } : {}),
diff --git a/src/key-management/openai/checker.ts b/src/key-management/openai/checker.ts
index a398e8d..a80b0f3 100644
--- a/src/key-management/openai/checker.ts
+++ b/src/key-management/openai/checker.ts
@@ -164,10 +164,7 @@ export class OpenAIKeyChecker {
         };
         this.updateKey(key.hash, updates);
       }
-      this.log.info(
-        { key: key.hash, hardLimit: key.hardLimit },
-        "Key check complete."
-      );
+      this.log.info({ key: key.hash }, "Key check complete.");
     } catch (error) {
       // touch the key so we don't check it again for a while
       this.updateKey(key.hash, {});
@@ -308,7 +305,7 @@ export class OpenAIKeyChecker {
    * is actually not valid, but keys which are revoked or out of quota will fail
    * with a 401 or 429 error instead of the expected 400 Bad Request error.
    * This lets us avoid test keys without spending any quota.
-   * 
+   *
    * We use the rate limit header to determine whether it's a trial key.
    */
   private async testLiveness(key: OpenAIKey): Promise<{ rateLimit: number }> {
diff --git a/src/proxy/anthropic.ts b/src/proxy/anthropic.ts
index d8b2233..19abb85 100644
--- a/src/proxy/anthropic.ts
+++ b/src/proxy/anthropic.ts
@@ -8,6 +8,7 @@ import { ipLimiter } from "./rate-limit";
 import { handleProxyError } from "./middleware/common";
 import {
   addKey,
+  applyQuotaLimits,
   addAnthropicPreamble,
   blockZoomerOrigins,
   createPreprocessorMiddleware,
@@ -72,6 +73,7 @@ const rewriteAnthropicRequest = (
   res: http.ServerResponse
 ) => {
   const rewriterPipeline = [
+    applyQuotaLimits,
     addKey,
     addAnthropicPreamble,
     languageFilter,
@@ -108,7 +110,7 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
 
   if (req.inboundApi === "openai") {
     req.log.info("Transforming Anthropic response to OpenAI format");
-    body = transformAnthropicResponse(body);
+    body = transformAnthropicResponse(body, req);
   }
 
   // TODO: Remove once tokenization is stable
@@ -126,17 +128,19 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
  * on-the-fly.
  */
 function transformAnthropicResponse(
-  anthropicBody: Record<string, any>
+  anthropicBody: Record<string, any>,
+  req: Request
 ): Record<string, any> {
+  const totalTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);
   return {
     id: "ant-" + anthropicBody.log_id,
     object: "chat.completion",
     created: Date.now(),
     model: anthropicBody.model,
     usage: {
-      prompt_tokens: 0,
-      completion_tokens: 0,
-      total_tokens: 0,
+      prompt_tokens: req.promptTokens,
+      completion_tokens: req.outputTokens,
+      total_tokens: totalTokens,
     },
     choices: [
       {
diff --git a/src/proxy/auth/gatekeeper.ts b/src/proxy/auth/gatekeeper.ts
index 18f640d..feab8b3 100644
--- a/src/proxy/auth/gatekeeper.ts
+++ b/src/proxy/auth/gatekeeper.ts
@@ -1,6 +1,6 @@
 import type { Request, RequestHandler } from "express";
 import { config } from "../../config";
-import { authenticate, getUser } from "./user-store";
+import { authenticate, getUser, hasAvailableQuota } from "./user-store";
 
 const GATEKEEPER = config.gatekeeper;
 const PROXY_KEY = config.proxyKey;
diff --git a/src/proxy/auth/user-store.ts b/src/proxy/auth/user-store.ts
index 53651aa..ab38f6b 100644
--- a/src/proxy/auth/user-store.ts
+++ b/src/proxy/auth/user-store.ts
@@ -8,10 +8,16 @@
  */
 
 import admin from "firebase-admin";
+import schedule from "node-schedule";
 import { v4 as uuid } from "uuid";
 import { config, getFirebaseApp } from "../../config";
 import { logger } from "../../logger";
 
+const log = logger.child({ module: "users" });
+
+// TODO: Consolidate model families with QueuePartition and KeyProvider.
+type QuotaModel = "claude" | "turbo" | "gpt4";
+
 export interface User {
   /** The user's personal access token. */
   token: string;
@@ -21,8 +27,12 @@ export interface User {
   type: UserType;
   /** The number of prompts the user has made. */
   promptCount: number;
-  /** The number of tokens the user has consumed. Not yet implemented. */
-  tokenCount: number;
+  /** @deprecated Use `tokenCounts` instead. */
+  tokenCount?: never;
+  /** The number of tokens the user has consumed, by model family. */
+  tokenCounts: Record<QuotaModel, number>;
+  /** The maximum number of tokens the user can consume, by model family. */
+  tokenLimits: Record<QuotaModel, number>;
   /** The time at which the user was created. */
   createdAt: number;
   /** The time at which the user last connected. */
@@ -37,7 +47,6 @@ export interface User {
  * Possible privilege levels for a user.
  * - `normal`: Default role. Subject to usual rate limits and quotas.
  * - `special`: Special role. Higher quotas and exempt from auto-ban/lockout.
- * TODO: implement auto-ban/lockout for normal users when they do naughty shit
  */
 export type UserType = "normal" | "special";
 
@@ -49,11 +58,32 @@ const users: Map<string, User> = new Map();
 const usersToFlush = new Set<string>();
 
 export async function init() {
-  logger.info({ store: config.gatekeeperStore }, "Initializing user store...");
+  log.info({ store: config.gatekeeperStore }, "Initializing user store...");
   if (config.gatekeeperStore === "firebase_rtdb") {
     await initFirebase();
   }
-  logger.info("User store initialized.");
+  if (config.quotaRefreshPeriod) {
+    const quotaRefreshJob = schedule.scheduleJob(getRefreshCrontab(), () => {
+      for (const user of users.values()) {
+        refreshQuota(user.token);
+      }
+      log.info(
+        { users: users.size, nextRefresh: quotaRefreshJob.nextInvocation() },
+        "Token quotas refreshed."
+      );
+    });
+
+    if (!quotaRefreshJob) {
+      throw new Error(
+        "Unable to schedule quota refresh. Is QUOTA_REFRESH_PERIOD set correctly?"
+      );
+    }
+    log.debug(
+      { nextRefresh: quotaRefreshJob.nextInvocation() },
+      "Scheduled token quota refresh."
+    );
+  }
+  log.info("User store initialized.");
 }
 
 /** Creates a new user and returns their token. */
@@ -64,7 +94,8 @@ export function createUser() {
     ip: [],
     type: "normal",
     promptCount: 0,
-    tokenCount: 0,
+    tokenCounts: { turbo: 0, gpt4: 0, claude: 0 },
+    tokenLimits: { ...config.tokenQuota },
     createdAt: Date.now(),
   });
   usersToFlush.add(token);
@@ -86,12 +117,14 @@ export function getUsers() {
  * user information via JSON. Use other functions for more specific operations.
  */
 export function upsertUser(user: UserUpdate) {
+  // TODO: May need better merging for nested objects
   const existing: User = users.get(user.token) ?? {
     token: user.token,
     ip: [],
     type: "normal",
     promptCount: 0,
-    tokenCount: 0,
+    tokenCounts: { turbo: 0, gpt4: 0, claude: 0 },
+    tokenLimits: { ...config.tokenQuota },
     createdAt: Date.now(),
   };
 
@@ -117,11 +150,16 @@ export function incrementPromptCount(token: string) {
   usersToFlush.add(token);
 }
 
-/** Increments the token count for the given user by the given amount. */
-export function incrementTokenCount(token: string, amount = 1) {
+/** Increments token consumption for the given user and model. */
+export function incrementTokenCount(
+  token: string,
+  model: string,
+  consumption: number
+) {
   const user = users.get(token);
   if (!user) return;
-  user.tokenCount += amount;
+  const modelFamily = getModelFamily(model);
+  user.tokenCounts[modelFamily] += consumption;
   usersToFlush.add(token);
 }
 
@@ -148,6 +186,40 @@ export function authenticate(token: string, ip: string) {
   return user;
 }
 
+export function hasAvailableQuota(
+  token: string,
+  model: string,
+  requested: number
+) {
+  const user = users.get(token);
+  if (!user) return false;
+  if (user.type === "special") return true;
+
+  const modelFamily = getModelFamily(model);
+  const { tokenCounts, tokenLimits } = user;
+  const tokenLimit = tokenLimits[modelFamily];
+
+  if (!tokenLimit) return true;
+
+  const tokensConsumed = tokenCounts[modelFamily] + requested;
+  return tokensConsumed < tokenLimit;
+}
+
+export function refreshQuota(token: string) {
+  const user = users.get(token);
+  if (!user) return;
+  const { tokenCounts, tokenLimits } = user;
+  const quotas = Object.entries(config.tokenQuota) as [QuotaModel, number][];
+  quotas
+    // If a quota is not configured, don't touch any existing limits a user may
+    // already have been assigned manually.
+    .filter(([, quota]) => quota > 0)
+    .forEach(
+      ([model, quota]) => (tokenLimits[model] = tokenCounts[model] + quota)
+    );
+  usersToFlush.add(token);
+}
+
 /** Disables the given user, optionally providing a reason. */
 export function disableUser(token: string, reason?: string) {
   const user = users.get(token);
@@ -163,7 +235,7 @@ export function disableUser(token: string, reason?: string) {
 let firebaseTimeout: NodeJS.Timeout | undefined;
 
 async function initFirebase() {
-  logger.info("Connecting to Firebase...");
+  log.info("Connecting to Firebase...");
   const app = getFirebaseApp();
   const db = admin.database(app);
   const usersRef = db.ref("users");
@@ -171,7 +243,7 @@ async function initFirebase() {
   const users: Record<string, User> | null = snapshot.val();
   firebaseTimeout = setInterval(flushUsers, 20 * 1000);
   if (!users) {
-    logger.info("No users found in Firebase.");
+    log.info("No users found in Firebase.");
     return;
   }
   for (const token in users) {
@@ -179,7 +251,7 @@ async function initFirebase() {
   }
   usersToFlush.clear();
   const numUsers = Object.keys(users).length;
-  logger.info({ users: numUsers }, "Loaded users from Firebase");
+  log.info({ users: numUsers }, "Loaded users from Firebase");
 }
 
 async function flushUsers() {
@@ -204,8 +276,27 @@ async function flushUsers() {
   }
 
   await usersRef.update(updates);
-  logger.info(
-    { users: Object.keys(updates).length },
-    "Flushed users to Firebase"
-  );
+  log.info({ users: Object.keys(updates).length }, "Flushed users to Firebase");
+}
+
+function getModelFamily(model: string): QuotaModel {
+  if (model.startsWith("gpt-4")) {
+    // TODO: add 32k models
+    return "gpt4";
+  }
+  if (model.startsWith("gpt-3.5")) {
+    return "turbo";
+  }
+  return "claude";
+}
+
+function getRefreshCrontab() {
+  switch (config.quotaRefreshPeriod!) {
+    case "hourly":
+      return "0 * * * *";
+    case "daily":
+      return "0 0 * * *";
+    default:
+      return config.quotaRefreshPeriod ?? "0 0 * * *";
+  }
 }
diff --git a/src/proxy/middleware/common.ts b/src/proxy/middleware/common.ts
index 78020cd..ab87d82 100644
--- a/src/proxy/middleware/common.ts
+++ b/src/proxy/middleware/common.ts
@@ -1,6 +1,8 @@
 import { Request, Response } from "express";
 import httpProxy from "http-proxy";
 import { ZodError } from "zod";
+import { AIService } from "../../key-management";
+import { QuotaExceededError } from "./request/apply-quota-limits";
 
 const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
 const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";
@@ -63,9 +65,7 @@ export const handleInternalError = (
   res: Response
 ) => {
   try {
-    const isZod = err instanceof ZodError;
-    const isForbidden = err.name === "ForbiddenError";
-    if (isZod) {
+    if (err instanceof ZodError) {
       writeErrorResponse(req, res, 400, {
         error: {
           type: "proxy_validation_error",
@@ -75,7 +75,7 @@ export const handleInternalError = (
           message: err.message,
         },
       });
-    } else if (isForbidden) {
+    } else if (err.name === "ForbiddenError") {
       // Spoofs a vaguely threatening OpenAI error message. Only invoked by the
       // block-zoomers rewriter to scare off tiktokers.
       writeErrorResponse(req, res, 403, {
@@ -86,6 +86,16 @@ export const handleInternalError = (
           message: err.message,
         },
       });
+    } else if (err instanceof QuotaExceededError) {
+      writeErrorResponse(req, res, 429, {
+        error: {
+          type: "proxy_quota_exceeded",
+          code: "quota_exceeded",
+          message: `You've exceeded your token quota for this model type.`,
+          info: err.quotaInfo,
+          stack: err.stack,
+        },
+      });
     } else {
       writeErrorResponse(req, res, 500, {
         error: {
@@ -141,3 +151,17 @@ export function buildFakeSseMessage(
   }
   return `data: ${JSON.stringify(fakeEvent)}\n\n`;
 }
+
+export function getCompletionForService({
+  service,
+  body,
+}: {
+  service: AIService;
+  body: Record<string, any>;
+}): { completion: string; model: string } {
+  if (service === "anthropic") {
+    return { completion: body.completion.trim(), model: body.model };
+  } else {
+    return { completion: body.choices[0].message.content, model: body.model };
+  }
+}
diff --git a/src/proxy/middleware/request/apply-quota-limits.ts b/src/proxy/middleware/request/apply-quota-limits.ts
new file mode 100644
index 0000000..f5555ce
--- /dev/null
+++ b/src/proxy/middleware/request/apply-quota-limits.ts
@@ -0,0 +1,30 @@
+import { hasAvailableQuota } from "../../auth/user-store";
+import { isCompletionRequest } from "../common";
+import { ProxyRequestMiddleware } from ".";
+
+export class QuotaExceededError extends Error {
+  public quotaInfo: any;
+  constructor(message: string, quotaInfo: any) {
+    super(message);
+    this.name = "QuotaExceededError";
+    this.quotaInfo = quotaInfo;
+  }
+}
+
+export const applyQuotaLimits: ProxyRequestMiddleware = (_proxyReq, req) => {
+  if (!isCompletionRequest(req) || !req.user) {
+    return;
+  }
+
+  const requestedTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);
+  if (!hasAvailableQuota(req.user.token, req.body.model, requestedTokens)) {
+    throw new QuotaExceededError(
+      "You have exceeded your proxy token quota for this model.",
+      {
+        quota: req.user.tokenLimits,
+        used: req.user.tokenCounts,
+        requested: requestedTokens,
+      }
+    );
+  }
+};
diff --git a/src/proxy/middleware/request/check-context-size.ts b/src/proxy/middleware/request/check-context-size.ts
index bfdc2fa..31174a0 100644
--- a/src/proxy/middleware/request/check-context-size.ts
+++ b/src/proxy/middleware/request/check-context-size.ts
@@ -1,7 +1,7 @@
 import { Request } from "express";
 import { z } from "zod";
 import { config } from "../../../config";
-import { countTokens } from "../../../tokenization";
+import { OpenAIPromptMessage, countTokens } from "../../../tokenization";
 import { RequestPreprocessor } from ".";
 
 const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
@@ -15,22 +15,26 @@ const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
  * request body.
  */
 export const checkContextSize: RequestPreprocessor = async (req) => {
-  let prompt;
+  const service = req.outboundApi;
+  let result;
 
-  switch (req.outboundApi) {
-    case "openai":
+  switch (service) {
+    case "openai": {
       req.outputTokens = req.body.max_tokens;
-      prompt = req.body.messages;
+      const prompt: OpenAIPromptMessage[] = req.body.messages;
+      result = await countTokens({ req, prompt, service });
       break;
-    case "anthropic":
+    }
+    case "anthropic": {
       req.outputTokens = req.body.max_tokens_to_sample;
-      prompt = req.body.prompt;
+      const prompt: string = req.body.prompt;
+      result = await countTokens({ req, prompt, service });
       break;
+    }
     default:
       throw new Error(`Unknown outbound API: ${req.outboundApi}`);
   }
 
-  const result = await countTokens({ req, prompt, service: req.outboundApi });
   req.promptTokens = result.token_count;
 
   // TODO: Remove once token counting is stable
@@ -89,6 +93,7 @@ function validateContextSize(req: Request) {
   );
 
   req.debug.prompt_tokens = promptTokens;
+  req.debug.completion_tokens = outputTokens;
   req.debug.max_model_tokens = modelMax;
   req.debug.max_proxy_tokens = proxyMax;
 }
@@ -101,7 +106,7 @@ function assertRequestHasTokenCounts(
     outputTokens: z.number().int().min(1),
   })
     .nonstrict()
-    .parse(req);
+    .parse({ promptTokens: req.promptTokens, outputTokens: req.outputTokens });
 }
 
 /**
diff --git a/src/proxy/middleware/request/index.ts b/src/proxy/middleware/request/index.ts
index 4f61405..6bde8cb 100644
--- a/src/proxy/middleware/request/index.ts
+++ b/src/proxy/middleware/request/index.ts
@@ -3,6 +3,7 @@ import type { ClientRequest } from "http";
 import type { ProxyReqCallback } from "http-proxy";
 
 // Express middleware (runs before http-proxy-middleware, can be async)
+export { applyQuotaLimits } from "./apply-quota-limits";
 export { createPreprocessorMiddleware } from "./preprocess";
 export { checkContextSize } from "./check-context-size";
 export { setApiFormat } from "./set-api-format";
diff --git a/src/proxy/middleware/response/index.ts b/src/proxy/middleware/response/index.ts
index 7ba3c6d..b7b9305 100644
--- a/src/proxy/middleware/response/index.ts
+++ b/src/proxy/middleware/response/index.ts
@@ -3,14 +3,21 @@ import { Request, Response } from "express";
 import * as http from "http";
 import util from "util";
 import zlib from "zlib";
-import { config } from "../../../config";
 import { logger } from "../../../logger";
 import { keyPool } from "../../../key-management";
 import { enqueue, trackWaitTime } from "../../queue";
-import { incrementPromptCount } from "../../auth/user-store";
-import { isCompletionRequest, writeErrorResponse } from "../common";
+import {
+  incrementPromptCount,
+  incrementTokenCount,
+} from "../../auth/user-store";
+import {
+  getCompletionForService,
+  isCompletionRequest,
+  writeErrorResponse,
+} from "../common";
 import { handleStreamedResponse } from "./handle-streamed-response";
 import { logPrompt } from "./log-prompt";
+import { countTokens } from "../../../tokenization";
 
 const DECODER_MAP = {
   gzip: util.promisify(zlib.gunzip),
@@ -84,12 +91,18 @@ export const createOnProxyResHandler = (apiMiddleware: ProxyResMiddleware) => {
       if (req.isStreaming) {
         // `handleStreamedResponse` writes to the response and ends it, so
         // we can only execute middleware that doesn't write to the response.
-        middlewareStack.push(trackRateLimit, incrementKeyUsage, logPrompt);
+        middlewareStack.push(
+          trackRateLimit,
+          countResponseTokens,
+          incrementUsage,
+          logPrompt
+        );
       } else {
         middlewareStack.push(
           trackRateLimit,
           handleUpstreamErrors,
-          incrementKeyUsage,
+          countResponseTokens,
+          incrementUsage,
           copyHttpHeaders,
           logPrompt,
           ...apiMiddleware
@@ -394,15 +407,56 @@ function handleOpenAIRateLimitError(
   return errorPayload;
 }
 
-const incrementKeyUsage: ProxyResHandlerWithBody = async (_proxyRes, req) => {
+const incrementUsage: ProxyResHandlerWithBody = async (_proxyRes, req) => {
   if (isCompletionRequest(req)) {
     keyPool.incrementPrompt(req.key!);
     if (req.user) {
       incrementPromptCount(req.user.token);
+      const model = req.body.model;
+      const tokensUsed = req.promptTokens! + req.outputTokens!;
+      incrementTokenCount(req.user.token, model, tokensUsed);
     }
   }
 };
 
+const countResponseTokens: ProxyResHandlerWithBody = async (
+  _proxyRes,
+  req,
+  _res,
+  body
+) => {
+  // This function is prone to breaking if the upstream API makes even minor
+  // changes to the response format, especially for SSE responses. If you're
+  // seeing errors in this function, check the reassembled response body from
+  // handleStreamedResponse to see if the upstream API has changed.
+  try {
+    if (typeof body !== "object") {
+      throw new Error("Expected body to be an object");
+    }
+
+    const service = req.outboundApi;
+    const { completion } = getCompletionForService({ service, body });
+    const tokens = await countTokens({ req, completion, service });
+
+    req.log.debug(
+      { service, tokens, prevOutputTokens: req.outputTokens },
+      `Counted tokens for completion`
+    );
+    if (req.debug) {
+      req.debug.completion_tokens = tokens;
+    }
+
+    req.outputTokens = tokens.token_count;
+  } catch (error) {
+    req.log.error(
+      error,
+      "Error while counting completion tokens; assuming `max_output_tokens`"
+    );
+    // req.outputTokens will already be set to `max_output_tokens` from the
+    // prompt counting middleware, so we don't need to do anything here.
+  }
+};
+
 const trackRateLimit: ProxyResHandlerWithBody = async (proxyRes, req) => {
   keyPool.updateRateLimits(req.key!, proxyRes.headers);
 };
diff --git a/src/proxy/middleware/response/log-prompt.ts b/src/proxy/middleware/response/log-prompt.ts
index e11feb9..bdaf1a0 100644
--- a/src/proxy/middleware/response/log-prompt.ts
+++ b/src/proxy/middleware/response/log-prompt.ts
@@ -1,10 +1,8 @@
 import { Request } from "express";
 import { config } from "../../../config";
-import { AIService } from "../../../key-management";
 import { logQueue } from "../../../prompt-logging";
-import { isCompletionRequest } from "../common";
+import { getCompletionForService, isCompletionRequest } from "../common";
 import { ProxyResHandlerWithBody } from ".";
-import { logger } from "../../../logger";
 
 /** If prompt logging is enabled, enqueues the prompt for logging. */
 export const logPrompt: ProxyResHandlerWithBody = async (
@@ -26,7 +24,7 @@ export const logPrompt: ProxyResHandlerWithBody = async (
 
   const promptPayload = getPromptForRequest(req);
   const promptFlattened = flattenMessages(promptPayload);
-  const response = getResponseForService({
+  const response = getCompletionForService({
     service: req.outboundApi,
     body: responseBody,
   });
@@ -62,17 +60,3 @@ const flattenMessages = (messages: string | OaiMessage[]): string => {
   }
   return messages.map((m) => `${m.role}: ${m.content}`).join("\n");
 };
-
-const getResponseForService = ({
-  service,
-  body,
-}: {
-  service: AIService;
-  body: Record<string, any>;
-}): { completion: string; model: string } => {
-  if (service === "anthropic") {
-    return { completion: body.completion.trim(), model: body.model };
-  } else {
-    return { completion: body.choices[0].message.content, model: body.model };
-  }
-};
diff --git a/src/proxy/openai.ts b/src/proxy/openai.ts
index 0f33122..59202b6 100644
--- a/src/proxy/openai.ts
+++ b/src/proxy/openai.ts
@@ -9,6 +9,7 @@ import { ipLimiter } from "./rate-limit";
 import { handleProxyError } from "./middleware/common";
 import {
   addKey,
+  applyQuotaLimits,
   blockZoomerOrigins,
   createPreprocessorMiddleware,
   finalizeBody,
@@ -90,6 +91,7 @@ const rewriteRequest = (
   res: http.ServerResponse
 ) => {
   const rewriterPipeline = [
+    applyQuotaLimits,
     addKey,
     languageFilter,
     limitCompletions,
diff --git a/src/tokenization/openai.ts b/src/tokenization/openai.ts
index fad8e9b..b71585d 100644
--- a/src/tokenization/openai.ts
+++ b/src/tokenization/openai.ts
@@ -15,7 +15,14 @@ export function init() {
 // Tested against:
 // https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
 
-export function getTokenCount(messages: any[], model: string) {
+export function getTokenCount(
+  prompt: string | OpenAIPromptMessage[],
+  model: string
+) {
+  if (typeof prompt === "string") {
+    return getTextTokenCount(prompt);
+  }
+
   const gpt4 = model.startsWith("gpt-4");
 
   const tokensPerMessage = gpt4 ? 3 : 4;
@@ -23,11 +30,12 @@ export function getTokenCount(messages: any[], model: string) {
 
   let numTokens = 0;
 
-  for (const message of messages) {
+  for (const message of prompt) {
     numTokens += tokensPerMessage;
     for (const key of Object.keys(message)) {
       {
-        const value = message[key];
+        const value = message[key as keyof OpenAIPromptMessage];
+        if (!value || typeof value !== "string") continue;
         // Break if we get a huge message or exceed the token limit to prevent
         // DoS.
         // 100k tokens allows for future 100k GPT-4 models and 500k characters
@@ -40,7 +48,7 @@ export function getTokenCount(messages: any[], model: string) {
           };
         }
 
-        numTokens += encoder.encode(message[key]).length;
+        numTokens += encoder.encode(value).length;
         if (key === "name") {
           numTokens += tokensPerName;
         }
@@ -51,6 +59,20 @@ export function getTokenCount(messages: any[], model: string) {
   return { tokenizer: "tiktoken", token_count: numTokens };
 }
 
+function getTextTokenCount(prompt: string) {
+  if (prompt.length > 500000) {
+    return {
+      tokenizer: "length fallback",
+      token_count: 100000,
+    };
+  }
+
+  return {
+    tokenizer: "tiktoken",
+    token_count: encoder.encode(prompt).length,
+  };
+}
+
 export type OpenAIPromptMessage = {
   name?: string;
   content: string;
diff --git a/src/tokenization/tokenizer.ts b/src/tokenization/tokenizer.ts
index 3f04dc9..379b828 100644
--- a/src/tokenization/tokenizer.ts
+++ b/src/tokenization/tokenizer.ts
@@ -24,27 +24,28 @@ type TokenCountResult = {
   tokenizer: string;
   tokenization_duration_ms: number;
 };
-type TokenCountRequest = {
-  req: Request;
-} & (
-  | { prompt: string; service: "anthropic" }
-  | { prompt: OpenAIPromptMessage[]; service: "openai" }
+type TokenCountRequest = { req: Request } & (
+  | { prompt: OpenAIPromptMessage[]; completion?: never; service: "openai" }
+  | { prompt: string; completion?: never; service: "anthropic" }
+  | { prompt?: never; completion: string; service: "openai" }
+  | { prompt?: never; completion: string; service: "anthropic" }
 );
 export async function countTokens({
   req,
   service,
   prompt,
+  completion,
 }: TokenCountRequest): Promise<TokenCountResult> {
   const time = process.hrtime();
   switch (service) {
     case "anthropic":
       return {
-        ...getClaudeTokenCount(prompt, req.body.model),
+        ...getClaudeTokenCount(prompt ?? completion, req.body.model),
         tokenization_duration_ms: getElapsedMs(time),
       };
     case "openai":
       return {
-        ...getOpenAITokenCount(prompt, req.body.model),
+        ...getOpenAITokenCount(prompt ?? completion, req.body.model),
         tokenization_duration_ms: getElapsedMs(time),
       };
     default:
diff --git a/src/views/admin/index.ejs b/src/views/admin/index.ejs
index 75bab20..660bf2b 100644
--- a/src/views/admin/index.ejs
+++ b/src/views/admin/index.ejs
@@ -1,6 +1,6 @@
 <%- include("../_partials/admin-header", { title: "OAI Reverse Proxy Admin" }) %>
 <h1>OAI Reverse Proxy Admin</h1>
-<% if (!isPersistenceEnabled) { %>
+<% if (!persistenceEnabled) { %>
 <p style="color: red; background-color: #eedddd; padding: 1em">
   <strong>⚠️ Users will be lost when the server restarts because persistence is
   not configured.</strong><br />
diff --git a/src/views/admin/list-users.ejs b/src/views/admin/list-users.ejs
index 7da5aa2..ebc9a06 100644
--- a/src/views/admin/list-users.ejs
+++ b/src/views/admin/list-users.ejs
@@ -1,17 +1,17 @@
 <%- include("../_partials/admin-header", { title: "Users - OAI Reverse Proxy Admin" }) %>
     <h1>User Token List</h1>
     
-    <input type="hidden" name="_csrf" value="<%= csrfToken %>" />
-    
+    <!-- <input type="hidden" name="_csrf" value="<%= csrfToken %>" /> -->    
     <% if (users.length === 0) { %>
     <p>No users found.</p>
     <% } else { %>
     <table>
       <thead>
         <tr>
-          <th>Token</th>
+          <th>User Token</th>
           <th <% if (sort.includes("ip")) { %>class="active"<% } %> ><a href="/admin/manage/list-users?sort=ip">IPs</a></th>
           <th <% if (sort.includes("promptCount")) { %>class="active"<% } %> ><a href="/admin/manage/list-users?sort=promptCount">Prompts</a></th>
+          <th <% if (sort.includes("sumTokenCounts")) { %>class="active"<% } %> ><a href="/admin/manage/list-users?sort=sumTokenCounts">Tokens</a></th>
           <th>Type</th>
           <th <% if (sort.includes("createdAt")) { %>class="active"<% } %> ><a href="/admin/manage/list-users?sort=createdAt">Created (UTC)</a></th>
           <th <% if (sort.includes("lastUsedAt")) { %>class="active"<% } %> ><a href="/admin/manage/list-users?sort=lastUsedAt">Last Used (UTC)</a></th>
@@ -26,6 +26,7 @@
           </td>
           <td><%= user.ip.length %></td>
           <td><%= user.promptCount %></td>
+          <td><%= user.sumTokenCounts %></td>
           <td><%= user.type %></td>
           <td><%= user.createdAt %></td>
           <td><%= user.lastUsedAt ?? "never" %></td>
@@ -40,6 +41,13 @@
         </tr>
         <% }); %>
     </table>
+    
+    <% if (quotasEnabled) { %>
+      <form action="/admin/manage/refresh-all-quotas" method="POST">
+        <input type="hidden" name="_csrf" value="<%- csrfToken %>" />
+        <button type="submit" class="btn btn-primary">Refresh All Quotas</button>
+      </form>
+    <% } %>
 
     <ul class="pagination">
       <% if (page > 1) { %>
diff --git a/src/views/admin/view-user.ejs b/src/views/admin/view-user.ejs
index c802897..846ea65 100644
--- a/src/views/admin/view-user.ejs
+++ b/src/views/admin/view-user.ejs
@@ -7,10 +7,14 @@
       <th scope="col">Key</th>
       <th scope="col">Value</th>
     </tr>
+  </thead>
+
   <tbody>
     <tr>
       <th scope="row">Token</th>
       <td><%- user.token %></td>
+    </tr>
+
     <tr>
       <th scope="row">Type</th>
       <td><%- user.type %></td>
@@ -20,8 +24,24 @@
       <td><%- user.promptCount %></td>
     </tr>
     <tr>
-      <th scope="row">Token Count</th>
-      <td><%- user.tokenCount %></td>
+      <th scope="row">Token Counts</th>
+      <td>
+        <ul style="padding-left: 1em; margin: 0">
+          <% Object.entries(user.tokenCounts).forEach(([key, count]) => { %>
+          <li><strong><%- key %></strong>: <%- count %></li>
+          <% }) %>
+        </ul>
+      </td>
+    </tr>
+    <tr>
+      <th scope="row">Token Limits</th>
+      <td>
+        <ul style="padding-left: 1em; margin: 0">
+          <% Object.entries(user.tokenLimits).forEach(([key, count]) => { %>
+          <li><strong><%- key %></strong>: <%- count %></li>
+          <% }) %>
+        </ul>
+      </td>
     </tr>
     <tr>
       <th scope="row">Created At</th>
@@ -37,22 +57,30 @@
     </tr>
     <tr>
       <th scope="row">Disabled Reason</th>
-      <td><%- user.disabledReason %></td>  
+      <td><%- user.disabledReason %></td>
     </tr>
     <tr>
       <th scope="row">IPs</th>
       <td>
         <a href="#" id="ip-list-toggle">Show all (<%- user.ip.length %>)</a>
-        <ol id="ip-list" style="display:none; padding-left:1em; margin: 0;">
+        <ol id="ip-list" style="display: none; padding-left: 1em; margin: 0">
           <% user.ip.forEach((ip) => { %>
-            <li><code><%- ip %></code></li>
+          <li><code><%- ip %></code></li>
           <% }) %>
-          </ol>
+        </ol>
       </td>
     </tr>
   </tbody>
 </table>
 
+<% if (quotasEnabled) { %>
+<form action="/admin/manage/refresh-user-quota" method="POST">
+  <input type="hidden" name="token" value="<%- user.token %>" />
+  <input type="hidden" name="_csrf" value="<%- csrfToken %>" />
+  <button type="submit" class="btn btn-primary">Refresh Quotas for User</button>
+</form>
+<% } %>
+
 <script>
   document.getElementById("ip-list-toggle").addEventListener("click", (e) => {
     e.preventDefault();
@@ -60,5 +88,5 @@
     document.getElementById("ip-list-toggle").style.display = "none";
   });
 </script>
-  
+
 <%- include("../_partials/admin-footer") %>