import time import traceback from llm_server.cluster.backend import get_running_models from llm_server.cluster.cluster_config import cluster_config from llm_server.custom_redis import redis from llm_server.logging import create_logger from llm_server.routes.queue import priority_queue def console_printer(): logger = create_logger('console_printer') time.sleep(3) while True: try: processing = redis.keys('active_gen_workers:http*') # backends always start with http processing_count = 0 if len(processing): for k in processing: processing_count += redis.get(k, default=0, dtype=int) backends = [k for k, v in cluster_config.all().items() if v['online']] activity = priority_queue.activity() # Calculate the queue size the same way it's done on the stats. queue_size = 0 running_models = get_running_models() for model in running_models: queue_size += priority_queue.len(model) # Active Workers and Processing should read the same. If not, that's an issue. logger.info(f'Active Workers: {len([i for i in activity if (i[1] and i[1] != "waiting...")])} | Processing: {processing_count} | Queued: {queue_size} | Backends Online: {len(backends)}') except: logger.error(traceback.format_exc()) time.sleep(10)