from llm_server.cluster.cluster_config import cluster_config from llm_server.cluster.redis_cycle import add_backend_cycler, redis_cycle from llm_server.cluster.stores import redis_running_models from llm_server.llm.generator import generator from llm_server.llm.info import get_info def test_backend(backend_url: str, test_prompt: bool = False): backend_info = cluster_config.get_backend(backend_url) if test_prompt: data = { "prompt": "Test prompt", "stream": False, "temperature": 0, "max_new_tokens": 3, } try: success, response, err = generator(data, backend_url, timeout=10) if not success or not response or err: return False, {} except: return False, {} i = get_info(backend_url, backend_info['mode']) if not i.get('model'): return False, {} return True, i def get_backends(): backends = cluster_config.all() result = {} for k, v in backends.items(): b = cluster_config.get_backend(k) status = b.get('online', False) priority = b['priority'] result[k] = {'status': status, 'priority': priority} online_backends = sorted( ((url, info) for url, info in backends.items() if info['online']), key=lambda kv: -kv[1]['priority'], reverse=True ) offline_backends = sorted( ((url, info) for url, info in backends.items() if not info['online']), key=lambda kv: -kv[1]['priority'], reverse=True ) return [url for url, info in online_backends], [url for url, info in offline_backends] def get_a_cluster_backend(model=None): """ Get a backend from Redis. If there are no online backends, return None. If `model` is not supplied, we will pick one ourself. """ if model: # First, determine if there are multiple backends hosting the same model. backends_hosting_model = [i.decode('utf-8') for i in redis_running_models.smembers(model)] # If so, create an iterator for those backends if len(backends_hosting_model): add_backend_cycler(model, backends_hosting_model) cycled = redis_cycle(model) if len(cycled): return cycled[0] else: # No backend hosting that model return None else: online, _ = get_backends() if len(online): return online[0] def get_backends_from_model(model_name: str): return [x.decode('utf-8') for x in redis_running_models.smembers(model_name)] # def verify_context_size(model_name:str): # b = get_backends_from_model(model_name) # for backend_url in b: # backend_info = cluster_config.get_backend(backend_url) # backend_info.get() def get_running_models(): return redis_running_models.keys() def purge_backend_from_running_models(backend_url: str): keys = redis_running_models.keys() pipeline = redis_running_models.pipeline() for model in keys: pipeline.srem(model, backend_url) pipeline.execute() def is_valid_model(model_name: str): return redis_running_models.exists(model_name)