This repository has been archived on 2024-10-27. You can view files and clone it, but cannot push or open issues or pull requests.
local-llm-server/llm_server/threads.py

75 lines
3.3 KiB
Python
Raw Normal View History

2023-08-23 23:11:12 -06:00
import time
from threading import Thread
from llm_server import opts
from llm_server.database.database import weighted_average_column_for_model
2023-09-11 20:47:19 -06:00
from llm_server.llm.info import get_running_model
2023-08-23 23:11:12 -06:00
from llm_server.routes.cache import redis
2023-09-17 18:55:36 -06:00
from llm_server.routes.v1.generate_stats import generate_stats
2023-08-23 23:11:12 -06:00
2023-08-24 20:43:11 -06:00
class MainBackgroundThread(Thread):
2023-08-23 23:11:12 -06:00
backend_online = False
2023-08-24 20:43:11 -06:00
# TODO: do I really need to put everything in Redis?
# TODO: call generate_stats() every minute, cache the results, put results in a DB table, then have other parts of code call this cache
2023-08-23 23:11:12 -06:00
def __init__(self):
Thread.__init__(self)
self.daemon = True
redis.set('average_generation_elapsed_sec', 0)
redis.set('estimated_avg_tps', 0)
redis.set('average_output_tokens', 0)
redis.set('backend_online', 0)
redis.set_dict('backend_info', {})
2023-08-23 23:11:12 -06:00
def run(self):
while True:
if opts.mode == 'oobabooga':
2023-09-11 20:47:19 -06:00
model, err = get_running_model()
if err:
print(err)
2023-08-23 23:11:12 -06:00
redis.set('backend_online', 0)
2023-09-11 20:47:19 -06:00
else:
opts.running_model = model
redis.set('backend_online', 1)
elif opts.mode == 'vllm':
model, err = get_running_model()
if err:
print(err)
redis.set('backend_online', 0)
else:
opts.running_model = model
redis.set('backend_online', 1)
2023-08-23 23:11:12 -06:00
else:
raise Exception
# exclude_zeros=True filters out rows where an error message was returned. Previously, if there was an error, 0
# was entered into the column. The new code enters null instead but we need to be backwards compatible for now.
average_generation_elapsed_sec = weighted_average_column_for_model('prompts', 'generation_time', opts.running_model, opts.mode, opts.backend_url, exclude_zeros=True) or 0
2023-09-23 20:55:49 -06:00
if average_generation_elapsed_sec > -1:
redis.set('average_generation_elapsed_sec', average_generation_elapsed_sec)
# overall = average_column_for_model('prompts', 'generation_time', opts.running_model)
# print(f'Weighted: {average_generation_elapsed_sec}, overall: {overall}')
average_output_tokens = weighted_average_column_for_model('prompts', 'response_tokens', opts.running_model, opts.mode, opts.backend_url, exclude_zeros=True) or 0
2023-09-23 20:55:49 -06:00
if average_generation_elapsed_sec > -1:
redis.set('average_output_tokens', average_output_tokens)
# overall = average_column_for_model('prompts', 'response_tokens', opts.running_model)
# print(f'Weighted: {average_output_tokens}, overall: {overall}')
estimated_avg_tps = round(average_output_tokens / average_generation_elapsed_sec, 2) if average_generation_elapsed_sec > 0 else 0 # Avoid division by zero
redis.set('estimated_avg_tps', estimated_avg_tps)
2023-08-23 23:14:50 -06:00
time.sleep(60)
2023-09-17 18:55:36 -06:00
def cache_stats():
while True:
# If opts.base_client_api is null that means no one has visited the site yet
# and the base_client_api hasn't been set. Do nothing until then.
if redis.get('base_client_api'):
x = generate_stats()
time.sleep(5)