local-llm-server/llm_server/cluster/worker.py

import time
from threading import Thread

from llm_server.cluster.cluster_config import cluster_config
from llm_server.cluster.backend import test_backend
from llm_server.cluster.stores import redis_running_models


def cluster_worker():
    counter = 0
    while True:
        test_prompt = False
        if counter % 4 == 0:
            # Only send a test prompt every 120 seconds.
            test_prompt = True
        threads = []
        for n, v in cluster_config.all().items():
            thread = Thread(target=check_backend, args=(n, v, test_prompt))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()
        time.sleep(15)
        counter += 1


def check_backend(n, v, test_prompt):
    online, backend_info = test_backend(v['backend_url'], test_prompt=test_prompt)
    # purge_backend_from_running_models(n)
    if online:
        running_model = backend_info['model']
        for k, v in backend_info.items():
            cluster_config.set_backend_value(n, k, v)
        redis_running_models.sadd(running_model, n)
    else:
        for model in redis_running_models.keys():
            redis_running_models.srem(model, n)

        # redis_running_models.srem(backend_info['model'], n)
        # backend_cycler_store.lrem(backend_info['model'], 1, n)

    cluster_config.set_backend_value(n, 'online', online)