diff --git a/README.md b/README.md
index 77b73cb..485cb91 100644
--- a/README.md
+++ b/README.md
@@ -49,4 +49,10 @@ To set up token auth, add rows to the `token_auth` table in the SQLite database.
 
 `expire`: UNIX timestamp of when this token expires and is not longer valid.
 
-`disabled`: mark the token as disabled.
\ No newline at end of file
+`disabled`: mark the token as disabled.
+
+### To Do
+
+- Implement streaming
+- Add `huggingface/text-generation-inference`
+- Convince Oobabooga to implement concurrent generation
diff --git a/config/config.yml.sample b/config/config.yml.sample
index a20682a..04efc52 100644
--- a/config/config.yml.sample
+++ b/config/config.yml.sample
@@ -7,8 +7,9 @@ backend_url: http://x.x.x.x:5000
 mode: oobabooga
 
 # How many concurrent generation requests will be processed at the same time.
-# Oobabooga only supports one.
-concurrent_gens: 3
+# Oobabooga only supports one. If you're using Oobabooga, you MUST set this to 1
+# or else your estimated wait time will be incorrect.
+concurrent_gens: 1
 
 # The configured token limit of your backend.
 # This number is shown to clients and on the home page. (may be important later)
@@ -27,6 +28,7 @@ verify_ssl: false
 # Reject all requests if they aren't authenticated with a token.
 auth_required: false
 
+# JS tracking code to add to the home page.
 #analytics_tracking_code: |
 #  alert("hello");
 
diff --git a/llm_server/routes/stats.py b/llm_server/routes/stats.py
index 806baaf..1454a5f 100644
--- a/llm_server/routes/stats.py
+++ b/llm_server/routes/stats.py
@@ -29,6 +29,8 @@ generation_elapsed_lock = Lock()
 
 
 def calculate_avg_gen_time():
+    # TODO: calculate the average from the database. Have this be set by an option in the config
+
     # Get the average generation time from Redis
     average_generation_time = redis.get('average_generation_time')
     if average_generation_time is None:
diff --git a/llm_server/routes/v1/generate_stats.py b/llm_server/routes/v1/generate_stats.py
index d9da9cc..8104397 100644
--- a/llm_server/routes/v1/generate_stats.py
+++ b/llm_server/routes/v1/generate_stats.py
@@ -1,5 +1,5 @@
-from datetime import datetime
 import time
+from datetime import datetime
 
 from llm_server import opts
 from llm_server.llm.info import get_running_model
@@ -23,9 +23,11 @@ def generate_stats():
 
     average_generation_time = int(calculate_avg_gen_time())
     proompters_in_queue = len(priority_queue) + get_active_gen_workers()
+
+    # TODO: https://stackoverflow.com/questions/22721579/sorting-a-nested-ordereddict-by-key-recursively
     return {
         'stats': {
-            'prompts_in_queue': proompters_in_queue,
+            'proompts_in_queue': proompters_in_queue,
             'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
             'total_proompts': get_total_proompts() if opts.show_num_prompts else None,
             'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
@@ -37,12 +39,13 @@ def generate_stats():
         'endpoints': {
             'blocking': opts.full_client_api,
         },
-        'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
+        'estimated_wait_sec': int((average_generation_time * proompters_in_queue) / opts.concurrent_gens),
         'timestamp': int(time.time()),
         'openaiKeys': '∞',
         'anthropicKeys': '∞',
         'config': {
             'gatekeeper': 'none' if opts.auth_required is False else 'token',
             'context_size': opts.context_size,
+            'queue_size': opts.concurrent_gens,
         }
     }