local-llm-server/llm_server/threads.py

46 lines
1.6 KiB
Python
Raw Normal View History

2023-08-23 23:11:12 -06:00
import time
from threading import Thread
import requests
from llm_server import opts
from llm_server.database import average_column
2023-08-23 23:11:12 -06:00
from llm_server.routes.cache import redis
class BackendHealthCheck(Thread):
backend_online = False
def __init__(self):
Thread.__init__(self)
self.daemon = True
redis.set('average_generation_elapsed_sec', 0)
redis.set('average_tps', 0)
redis.set('average_output_tokens', 0)
redis.set('backend_online', 0)
2023-08-23 23:11:12 -06:00
def run(self):
while True:
average_generation_elapsed_sec = average_column('prompts', 'generation_time') if not None else 0
redis.set('average_generation_elapsed_sec', average_generation_elapsed_sec)
average_output_tokens = average_column('prompts', 'response_tokens') if not None else 0
redis.set('average_output_tokens', average_output_tokens)
average_tps = round(average_output_tokens / average_generation_elapsed_sec, 2)
redis.set('average_tps', average_tps)
2023-08-23 23:11:12 -06:00
if opts.mode == 'oobabooga':
try:
r = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
opts.running_model = r.json()['result']
redis.set('backend_online', 1)
except Exception as e:
redis.set('backend_online', 0)
# TODO: handle error
print(e)
elif opts.mode == 'hf-textgen':
pass
else:
raise Exception
2023-08-23 23:14:50 -06:00
time.sleep(60)