local-llm-server/llm_server/cluster/worker.py

50 lines
1.4 KiB
Python

import time
from threading import Thread
from llm_server.cluster.backend import test_backend
from llm_server.cluster.stores import redis_running_models
from llm_server.cluster.cluster_config import cluster_config
"""
The definition for the cluster worker used to test the backends.
"""
def cluster_worker():
counter = 0
while True:
test_prompt = False
if counter % 4 == 0:
# Only send a test prompt every 120 seconds.
test_prompt = True
threads = []
for n, v in cluster_config.all().items():
thread = Thread(target=check_backend, args=(n, v, test_prompt))
thread.start()
threads.append(thread)
for thread in threads:
thread.join()
time.sleep(15)
counter += 1
def check_backend(n, v, test_prompt):
"""
The function ran by the worker to test a backend.
:param n: I don't remember.
:param v: I don't remember.
:param test_prompt:
:return:
"""
online, backend_info = test_backend(v['backend_url'], test_prompt=test_prompt)
if online:
running_model = backend_info['model']
for k, v in backend_info.items():
cluster_config.set_backend_value(n, k, v)
redis_running_models.sadd(running_model, n)
else:
for model in redis_running_models.keys():
redis_running_models.srem(model, n)
cluster_config.set_backend_value(n, 'online', online)