33 lines
1.2 KiB
Python
33 lines
1.2 KiB
Python
import logging
|
|
import time
|
|
|
|
from llm_server.cluster.cluster_config import cluster_config
|
|
from llm_server.custom_redis import redis
|
|
from llm_server.routes.queue import priority_queue
|
|
|
|
logger = logging.getLogger('console_printer')
|
|
if not logger.handlers:
|
|
handler = logging.StreamHandler()
|
|
handler.setLevel(logging.INFO)
|
|
logger.setLevel(logging.INFO)
|
|
formatter = logging.Formatter("%(asctime)s: %(levelname)s:%(name)s - %(message)s")
|
|
handler.setFormatter(formatter)
|
|
logger.addHandler(handler)
|
|
|
|
|
|
def console_printer():
|
|
time.sleep(3)
|
|
while True:
|
|
processing = redis.keys('active_gen_workers:http*') # backends always start with http
|
|
processing_count = 0
|
|
if len(processing):
|
|
for k in processing:
|
|
processing_count += redis.get(k, default=0, dtype=int)
|
|
backends = [k for k, v in cluster_config.all().items() if v['online']]
|
|
activity = priority_queue.activity()
|
|
|
|
# TODO: Active Workers and Processing should read the same. If not, that's an issue
|
|
|
|
logger.info(f'REQUEST QUEUE -> Active Workers: {len([i for i in activity if i[1]])} | Processing: {processing_count} | Queued: {len(priority_queue)} | Backends Online: {len(backends)}')
|
|
time.sleep(10)
|