local-llm-server/llm_server/threads.py

49 lines
1.8 KiB
Python
Raw Normal View History

2023-08-23 23:11:12 -06:00
import time
from threading import Thread
import requests
from llm_server import opts
from llm_server.database import average_column
2023-08-23 23:11:12 -06:00
from llm_server.routes.cache import redis
2023-08-24 20:43:11 -06:00
class MainBackgroundThread(Thread):
2023-08-23 23:11:12 -06:00
backend_online = False
2023-08-24 20:43:11 -06:00
# TODO: do I really need to put everything in Redis?
# TODO: call generate_stats() every minute, cache the results, put results in a DB table, then have other parts of code call this cache
2023-08-23 23:11:12 -06:00
def __init__(self):
Thread.__init__(self)
self.daemon = True
redis.set('average_generation_elapsed_sec', 0)
redis.set('average_tps', 0)
redis.set('average_output_tokens', 0)
redis.set('backend_online', 0)
2023-08-23 23:11:12 -06:00
def run(self):
while True:
average_generation_elapsed_sec = average_column('prompts', 'generation_time') if not None else 0
redis.set('average_generation_elapsed_sec', average_generation_elapsed_sec)
average_output_tokens = average_column('prompts', 'response_tokens') if not None else 0
redis.set('average_output_tokens', average_output_tokens)
average_tps = round(average_output_tokens / average_generation_elapsed_sec, 2)
redis.set('average_tps', average_tps)
2023-08-23 23:11:12 -06:00
if opts.mode == 'oobabooga':
try:
r = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
opts.running_model = r.json()['result']
redis.set('backend_online', 1)
except Exception as e:
redis.set('backend_online', 0)
# TODO: handle error
print(e)
elif opts.mode == 'hf-textgen':
pass
else:
raise Exception
2023-08-23 23:14:50 -06:00
time.sleep(60)