dameon printer: Calculate the queue size the same way it's done on the stats

2023-10-20 17:41:47 -06:00 · 2023-10-20 17:41:47 -06:00 · f39e976b34
parent 1a15232400
commit f39e976b34
1 changed files with 9 additions and 1 deletions
--- a/llm_server/workers/printer.py
+++ b/llm_server/workers/printer.py
@ -1,6 +1,7 @@
 import logging
 import time

+from llm_server.cluster.backend import get_running_models
 from llm_server.cluster.cluster_config import cluster_config
 from llm_server.custom_redis import redis
 from llm_server.routes.queue import priority_queue
@ -24,8 +25,15 @@ def console_printer():
            for k in processing:
                processing_count += redis.get(k, default=0, dtype=int)
        backends = [k for k, v in cluster_config.all().items() if v['online']]
+
+        # Calculate the queue size the same way it's done on the stats.
+        queue_size = 0
+        running_models = get_running_models()
+        for model in running_models:
+            queue_size += priority_queue.len(model)
+
        activity = priority_queue.activity()

        # Active Workers and Processing should read the same. If not, that's an issue.
-        logger.info(f'REQUEST QUEUE -> Active Workers: {len([i for i in activity if i[1]])} | Processing: {processing_count} | Queued: {len(priority_queue)} | Backends Online: {len(backends)}')
+        logger.info(f'REQUEST QUEUE -> Active Workers: {len([i for i in activity if i[1]])} | Processing: {processing_count} | Queued: {queue_size} | Backends Online: {len(backends)}')
        time.sleep(2)