49 lines
1.8 KiB
Python
49 lines
1.8 KiB
Python
import time
|
|
from threading import Thread
|
|
|
|
import requests
|
|
|
|
from llm_server import opts
|
|
from llm_server.database import average_column
|
|
from llm_server.routes.cache import redis
|
|
|
|
|
|
class MainBackgroundThread(Thread):
|
|
backend_online = False
|
|
|
|
# TODO: do I really need to put everything in Redis?
|
|
# TODO: call generate_stats() every minute, cache the results, put results in a DB table, then have other parts of code call this cache
|
|
|
|
def __init__(self):
|
|
Thread.__init__(self)
|
|
self.daemon = True
|
|
redis.set('average_generation_elapsed_sec', 0)
|
|
redis.set('average_tps', 0)
|
|
redis.set('average_output_tokens', 0)
|
|
redis.set('backend_online', 0)
|
|
|
|
def run(self):
|
|
while True:
|
|
average_generation_elapsed_sec = average_column('prompts', 'generation_time') if not None else 0
|
|
redis.set('average_generation_elapsed_sec', average_generation_elapsed_sec)
|
|
|
|
average_output_tokens = average_column('prompts', 'response_tokens') if not None else 0
|
|
redis.set('average_output_tokens', average_output_tokens)
|
|
average_tps = round(average_output_tokens / average_generation_elapsed_sec, 2)
|
|
redis.set('average_tps', average_tps)
|
|
|
|
if opts.mode == 'oobabooga':
|
|
try:
|
|
r = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
|
|
opts.running_model = r.json()['result']
|
|
redis.set('backend_online', 1)
|
|
except Exception as e:
|
|
redis.set('backend_online', 0)
|
|
# TODO: handle error
|
|
print(e)
|
|
elif opts.mode == 'hf-textgen':
|
|
pass
|
|
else:
|
|
raise Exception
|
|
time.sleep(60)
|