local-llm-server/llm_server/routes/stats.py

73 lines
2.0 KiB
Python

import time
from datetime import datetime
from threading import Thread
from llm_server.routes.cache import redis
# proompters_5_min = 0
# concurrent_semaphore = Semaphore(concurrent_gens)
server_start_time = datetime.now()
# TODO: do I need this?
# def elapsed_times_cleanup():
# global wait_in_queue_elapsed
# while True:
# current_time = time.time()
# with wait_in_queue_elapsed_lock:
# global wait_in_queue_elapsed
# wait_in_queue_elapsed = [(end_time, elapsed_time) for end_time, elapsed_time in wait_in_queue_elapsed if current_time - end_time <= 60]
# time.sleep(1)
def calculate_avg_gen_time():
# Get the average generation time from Redis
average_generation_time = redis.get('average_generation_time')
if average_generation_time is None:
return 0
else:
return float(average_generation_time)
def get_total_proompts():
count = redis.get('proompts')
if count is None:
count = 0
else:
count = int(count)
return count
def get_active_gen_workers():
active_gen_workers = redis.get('active_gen_workers')
if active_gen_workers is None:
count = 0
else:
count = int(active_gen_workers)
return count
class SemaphoreCheckerThread(Thread):
redis.set_dict('recent_prompters', {})
def __init__(self):
Thread.__init__(self)
self.daemon = True
def run(self):
while True:
current_time = time.time()
recent_prompters = redis.get_dict('recent_prompters')
new_recent_prompters = {}
for ip, (timestamp, token) in recent_prompters.items():
if token and token.startswith('SYSTEM__'):
continue
if current_time - timestamp <= 300:
new_recent_prompters[ip] = timestamp, token
redis.set_dict('recent_prompters', new_recent_prompters)
redis.set('proompters_5_min', len(new_recent_prompters))
time.sleep(1)