107 lines
3.2 KiB
Python
107 lines
3.2 KiB
Python
import time
|
|
from datetime import datetime
|
|
from threading import Lock, Thread
|
|
|
|
from llm_server.routes.cache import redis
|
|
|
|
# proompters_5_min = 0
|
|
# concurrent_semaphore = Semaphore(concurrent_gens)
|
|
|
|
server_start_time = datetime.now()
|
|
|
|
# TODO: have a background thread put the averages in a variable so we don't end up with massive arrays
|
|
|
|
# wait_in_queue_elapsed = []
|
|
# wait_in_queue_elapsed_lock = Lock()
|
|
|
|
generation_elapsed = []
|
|
generation_elapsed_lock = Lock()
|
|
|
|
|
|
# TODO: do I need this?
|
|
# def elapsed_times_cleanup():
|
|
# global wait_in_queue_elapsed
|
|
# while True:
|
|
# current_time = time.time()
|
|
# with wait_in_queue_elapsed_lock:
|
|
# global wait_in_queue_elapsed
|
|
# wait_in_queue_elapsed = [(end_time, elapsed_time) for end_time, elapsed_time in wait_in_queue_elapsed if current_time - end_time <= 60]
|
|
# time.sleep(1)
|
|
|
|
|
|
def calculate_avg_gen_time():
|
|
# TODO: calculate the average from the database. Have this be set by an option in the config
|
|
|
|
# Get the average generation time from Redis
|
|
average_generation_time = redis.get('average_generation_time')
|
|
if average_generation_time is None:
|
|
return 0
|
|
else:
|
|
return float(average_generation_time)
|
|
|
|
|
|
def process_avg_gen_time():
|
|
global generation_elapsed
|
|
while True:
|
|
with generation_elapsed_lock:
|
|
# Get the current time
|
|
current_time = time.time()
|
|
|
|
# Remove data older than 3 minutes
|
|
three_minutes_ago = current_time - 180
|
|
generation_elapsed[:] = [(end, elapsed) for end, elapsed in generation_elapsed if end >= three_minutes_ago]
|
|
|
|
# Get the data from the last minute
|
|
one_minute_ago = current_time - 60
|
|
recent_data = [elapsed for end, elapsed in generation_elapsed if end >= one_minute_ago]
|
|
|
|
# Calculate the average
|
|
if len(recent_data) == 0:
|
|
average_generation_time = 0
|
|
else:
|
|
average_generation_time = sum(recent_data) / len(recent_data)
|
|
redis.set('average_generation_time', average_generation_time)
|
|
time.sleep(5)
|
|
|
|
|
|
def get_total_proompts():
|
|
count = redis.get('proompts')
|
|
if count is None:
|
|
count = 0
|
|
else:
|
|
count = int(count)
|
|
return count
|
|
|
|
|
|
def get_active_gen_workers():
|
|
active_gen_workers = redis.get('active_gen_workers')
|
|
if active_gen_workers is None:
|
|
count = 0
|
|
else:
|
|
count = int(active_gen_workers)
|
|
return count
|
|
|
|
|
|
class SemaphoreCheckerThread(Thread):
|
|
redis.set_dict('recent_prompters', {})
|
|
|
|
def __init__(self):
|
|
Thread.__init__(self)
|
|
self.daemon = True
|
|
|
|
def run(self):
|
|
while True:
|
|
current_time = time.time()
|
|
recent_prompters = redis.get_dict('recent_prompters')
|
|
new_recent_prompters = {}
|
|
|
|
for ip, (timestamp, token) in recent_prompters.items():
|
|
if token and token.startswith('SYSTEM__'):
|
|
continue
|
|
if current_time - timestamp <= 300:
|
|
new_recent_prompters[ip] = timestamp, token
|
|
|
|
redis.set_dict('recent_prompters', new_recent_prompters)
|
|
redis.set('proompters_5_min', len(new_recent_prompters))
|
|
time.sleep(1)
|