This repository has been archived on 2024-10-27. You can view files and clone it, but cannot push or open issues or pull requests.
local-llm-server/llm_server/threads.py

49 lines
1.8 KiB
Python

import time
from threading import Thread
import requests
from llm_server import opts
from llm_server.database import average_column
from llm_server.routes.cache import redis
class MainBackgroundThread(Thread):
backend_online = False
# TODO: do I really need to put everything in Redis?
# TODO: call generate_stats() every minute, cache the results, put results in a DB table, then have other parts of code call this cache
def __init__(self):
Thread.__init__(self)
self.daemon = True
redis.set('average_generation_elapsed_sec', 0)
redis.set('average_tps', 0)
redis.set('average_output_tokens', 0)
redis.set('backend_online', 0)
def run(self):
while True:
average_generation_elapsed_sec = average_column('prompts', 'generation_time') if not None else 0
redis.set('average_generation_elapsed_sec', average_generation_elapsed_sec)
average_output_tokens = average_column('prompts', 'response_tokens') if not None else 0
redis.set('average_output_tokens', average_output_tokens)
average_tps = round(average_output_tokens / average_generation_elapsed_sec, 2)
redis.set('average_tps', average_tps)
if opts.mode == 'oobabooga':
try:
r = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
opts.running_model = r.json()['result']
redis.set('backend_online', 1)
except Exception as e:
redis.set('backend_online', 0)
# TODO: handle error
print(e)
elif opts.mode == 'hf-textgen':
pass
else:
raise Exception
time.sleep(60)