43 lines
1.4 KiB
Python
43 lines
1.4 KiB
Python
import time
|
|
from threading import Thread
|
|
|
|
from llm_server.cluster.cluster_config import cluster_config
|
|
from llm_server.cluster.backend import test_backend
|
|
from llm_server.cluster.stores import redis_running_models
|
|
|
|
|
|
def cluster_worker():
|
|
counter = 0
|
|
while True:
|
|
test_prompt = False
|
|
if counter % 4 == 0:
|
|
# Only send a test prompt every 120 seconds.
|
|
test_prompt = True
|
|
threads = []
|
|
for n, v in cluster_config.all().items():
|
|
thread = Thread(target=check_backend, args=(n, v, test_prompt))
|
|
thread.start()
|
|
threads.append(thread)
|
|
for thread in threads:
|
|
thread.join()
|
|
time.sleep(15)
|
|
counter += 1
|
|
|
|
|
|
def check_backend(n, v, test_prompt):
|
|
online, backend_info = test_backend(v['backend_url'], test_prompt=test_prompt)
|
|
# purge_backend_from_running_models(n)
|
|
if online:
|
|
running_model = backend_info['model']
|
|
for k, v in backend_info.items():
|
|
cluster_config.set_backend_value(n, k, v)
|
|
redis_running_models.sadd(running_model, n)
|
|
else:
|
|
for model in redis_running_models.keys():
|
|
redis_running_models.srem(model, n)
|
|
|
|
# redis_running_models.srem(backend_info['model'], n)
|
|
# backend_cycler_store.lrem(backend_info['model'], 1, n)
|
|
|
|
cluster_config.set_backend_value(n, 'online', online)
|