import time from threading import Thread import requests from llm_server import opts from llm_server.database import average_column from llm_server.routes.cache import redis class BackendHealthCheck(Thread): backend_online = False def __init__(self): Thread.__init__(self) self.daemon = True redis.set('average_generation_elapsed_sec', 0) redis.set('average_tps', 0) redis.set('average_output_tokens', 0) redis.set('backend_online', 0) def run(self): while True: average_generation_elapsed_sec = average_column('prompts', 'generation_time') if not None else 0 redis.set('average_generation_elapsed_sec', average_generation_elapsed_sec) average_output_tokens = average_column('prompts', 'response_tokens') if not None else 0 redis.set('average_output_tokens', average_output_tokens) average_tps = round(average_output_tokens / average_generation_elapsed_sec, 2) redis.set('average_tps', average_tps) if opts.mode == 'oobabooga': try: r = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl) opts.running_model = r.json()['result'] redis.set('backend_online', 1) except Exception as e: redis.set('backend_online', 0) # TODO: handle error print(e) elif opts.mode == 'hf-textgen': pass else: raise Exception time.sleep(60)