import time from datetime import datetime from threading import Lock, Thread from llm_server.routes.cache import redis # proompters_1_min = 0 # concurrent_semaphore = Semaphore(concurrent_gens) server_start_time = datetime.now() # TODO: have a background thread put the averages in a variable so we don't end up with massive arrays # wait_in_queue_elapsed = [] # wait_in_queue_elapsed_lock = Lock() generation_elapsed = [] generation_elapsed_lock = Lock() # TODO: do I need this? # def elapsed_times_cleanup(): # global wait_in_queue_elapsed # while True: # current_time = time.time() # with wait_in_queue_elapsed_lock: # global wait_in_queue_elapsed # wait_in_queue_elapsed = [(end_time, elapsed_time) for end_time, elapsed_time in wait_in_queue_elapsed if current_time - end_time <= 60] # time.sleep(1) def calculate_avg_gen_time(): # TODO: calculate the average from the database. Have this be set by an option in the config # Get the average generation time from Redis average_generation_time = redis.get('average_generation_time') if average_generation_time is None: return 0 else: return float(average_generation_time) def process_avg_gen_time(): global generation_elapsed while True: with generation_elapsed_lock: # Get the current time current_time = time.time() # Remove data older than 3 minutes three_minutes_ago = current_time - 180 generation_elapsed[:] = [(end, elapsed) for end, elapsed in generation_elapsed if end >= three_minutes_ago] # Get the data from the last minute one_minute_ago = current_time - 60 recent_data = [elapsed for end, elapsed in generation_elapsed if end >= one_minute_ago] # Calculate the average if len(recent_data) == 0: average_generation_time = 0 else: average_generation_time = sum(recent_data) / len(recent_data) redis.set('average_generation_time', average_generation_time) time.sleep(5) def get_total_proompts(): count = redis.get('proompts') if count is None: count = 0 else: count = int(count) return count def get_active_gen_workers(): active_gen_workers = redis.get('active_gen_workers') if active_gen_workers is None: count = 0 else: count = int(active_gen_workers) return count class SemaphoreCheckerThread(Thread): proompters_1_min = 0 recent_prompters = {} def __init__(self): Thread.__init__(self) self.daemon = True def run(self): while True: current_time = time.time() SemaphoreCheckerThread.recent_prompters = {ip: timestamp for ip, timestamp in SemaphoreCheckerThread.recent_prompters.items() if current_time - timestamp <= 60} SemaphoreCheckerThread.proompters_1_min = len(SemaphoreCheckerThread.recent_prompters) time.sleep(1)