dameon printer: Calculate the queue size the same way it's done on the stats
This commit is contained in:
parent
1a15232400
commit
f39e976b34
|
@ -1,6 +1,7 @@
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
from llm_server.cluster.backend import get_running_models
|
||||||
from llm_server.cluster.cluster_config import cluster_config
|
from llm_server.cluster.cluster_config import cluster_config
|
||||||
from llm_server.custom_redis import redis
|
from llm_server.custom_redis import redis
|
||||||
from llm_server.routes.queue import priority_queue
|
from llm_server.routes.queue import priority_queue
|
||||||
|
@ -24,8 +25,15 @@ def console_printer():
|
||||||
for k in processing:
|
for k in processing:
|
||||||
processing_count += redis.get(k, default=0, dtype=int)
|
processing_count += redis.get(k, default=0, dtype=int)
|
||||||
backends = [k for k, v in cluster_config.all().items() if v['online']]
|
backends = [k for k, v in cluster_config.all().items() if v['online']]
|
||||||
|
|
||||||
|
# Calculate the queue size the same way it's done on the stats.
|
||||||
|
queue_size = 0
|
||||||
|
running_models = get_running_models()
|
||||||
|
for model in running_models:
|
||||||
|
queue_size += priority_queue.len(model)
|
||||||
|
|
||||||
activity = priority_queue.activity()
|
activity = priority_queue.activity()
|
||||||
|
|
||||||
# Active Workers and Processing should read the same. If not, that's an issue.
|
# Active Workers and Processing should read the same. If not, that's an issue.
|
||||||
logger.info(f'REQUEST QUEUE -> Active Workers: {len([i for i in activity if i[1]])} | Processing: {processing_count} | Queued: {len(priority_queue)} | Backends Online: {len(backends)}')
|
logger.info(f'REQUEST QUEUE -> Active Workers: {len([i for i in activity if i[1]])} | Processing: {processing_count} | Queued: {queue_size} | Backends Online: {len(backends)}')
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
Reference in New Issue