33 lines
896 B
Python
33 lines
896 B
Python
|
import time
|
||
|
from threading import Thread
|
||
|
|
||
|
import requests
|
||
|
|
||
|
from llm_server import opts
|
||
|
from llm_server.routes.cache import redis
|
||
|
|
||
|
|
||
|
class BackendHealthCheck(Thread):
|
||
|
backend_online = False
|
||
|
|
||
|
def __init__(self):
|
||
|
Thread.__init__(self)
|
||
|
self.daemon = True
|
||
|
|
||
|
def run(self):
|
||
|
while True:
|
||
|
if opts.mode == 'oobabooga':
|
||
|
try:
|
||
|
r = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
|
||
|
opts.running_model = r.json()['result']
|
||
|
redis.set('backend_online', 1)
|
||
|
except Exception as e:
|
||
|
redis.set('backend_online', 0)
|
||
|
# TODO: handle error
|
||
|
print(e)
|
||
|
elif opts.mode == 'hf-textgen':
|
||
|
pass
|
||
|
else:
|
||
|
raise Exception
|
||
|
time.sleep(1)
|