local-llm-server/llm_server/workers/printer.py

import time
import traceback

from llm_server.cluster.backend import get_running_models
from llm_server.cluster.cluster_config import cluster_config
from llm_server.custom_redis import redis
from llm_server.logging import create_logger
from llm_server.routes.queue import priority_queue


def console_printer():
    logger = create_logger('console_printer')
    time.sleep(3)
    while True:
        try:
            processing = redis.keys('active_gen_workers:http*')  # backends always start with http
            processing_count = 0
            if len(processing):
                for k in processing:
                    processing_count += redis.get(k, default=0, dtype=int)
            backends = [k for k, v in cluster_config.all().items() if v['online']]
            activity = priority_queue.activity()

            # Calculate the queue size the same way it's done on the stats.
            queue_size = 0
            running_models = get_running_models()
            for model in running_models:
                queue_size += priority_queue.len(model)

            # Active Workers and Processing should read the same. If not, that's an issue.
            logger.info(f'Active Workers: {len([i for i in activity if (i[1] and i[1] != "waiting...")])} | Processing: {processing_count} | Queued: {queue_size} | Backends Online: {len(backends)}')
        except:
            logger.error(traceback.format_exc())
        time.sleep(10)