From f39e976b34988539e0dfdb27b6e15bf20beb13f5 Mon Sep 17 00:00:00 2001 From: Cyberes Date: Fri, 20 Oct 2023 17:41:47 -0600 Subject: [PATCH] dameon printer: Calculate the queue size the same way it's done on the stats --- llm_server/workers/printer.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/llm_server/workers/printer.py b/llm_server/workers/printer.py index c9c421e..a62f85c 100644 --- a/llm_server/workers/printer.py +++ b/llm_server/workers/printer.py @@ -1,6 +1,7 @@ import logging import time +from llm_server.cluster.backend import get_running_models from llm_server.cluster.cluster_config import cluster_config from llm_server.custom_redis import redis from llm_server.routes.queue import priority_queue @@ -24,8 +25,15 @@ def console_printer(): for k in processing: processing_count += redis.get(k, default=0, dtype=int) backends = [k for k, v in cluster_config.all().items() if v['online']] + + # Calculate the queue size the same way it's done on the stats. + queue_size = 0 + running_models = get_running_models() + for model in running_models: + queue_size += priority_queue.len(model) + activity = priority_queue.activity() # Active Workers and Processing should read the same. If not, that's an issue. - logger.info(f'REQUEST QUEUE -> Active Workers: {len([i for i in activity if i[1]])} | Processing: {processing_count} | Queued: {len(priority_queue)} | Backends Online: {len(backends)}') + logger.info(f'REQUEST QUEUE -> Active Workers: {len([i for i in activity if i[1]])} | Processing: {processing_count} | Queued: {queue_size} | Backends Online: {len(backends)}') time.sleep(2)