96 lines
3.1 KiB
Python
96 lines
3.1 KiB
Python
from llm_server.cluster.cluster_config import cluster_config
|
|
from llm_server.cluster.redis_cycle import add_backend_cycler, redis_cycle
|
|
from llm_server.cluster.stores import redis_running_models
|
|
from llm_server.llm.generator import generator
|
|
from llm_server.llm.info import get_info
|
|
|
|
|
|
def test_backend(backend_url: str, test_prompt: bool = False):
|
|
backend_info = cluster_config.get_backend(backend_url)
|
|
if test_prompt:
|
|
data = {
|
|
"prompt": "Test prompt",
|
|
"stream": False,
|
|
"temperature": 0,
|
|
"max_new_tokens": 16,
|
|
}
|
|
success, response, err = generator(data, backend_url, timeout=10)
|
|
if not success or not response or err:
|
|
return False, {}
|
|
i = get_info(backend_url, backend_info['mode'])
|
|
if not i.get('model'):
|
|
return False, {}
|
|
return True, i
|
|
|
|
|
|
def get_backends():
|
|
backends = cluster_config.all()
|
|
result = {}
|
|
for k, v in backends.items():
|
|
b = cluster_config.get_backend(k)
|
|
status = b.get('online', False)
|
|
priority = b['priority']
|
|
result[k] = {'status': status, 'priority': priority}
|
|
online_backends = sorted(
|
|
((url, info) for url, info in backends.items() if info['online']),
|
|
key=lambda kv: -kv[1]['priority'],
|
|
reverse=True
|
|
)
|
|
offline_backends = sorted(
|
|
((url, info) for url, info in backends.items() if not info['online']),
|
|
key=lambda kv: -kv[1]['priority'],
|
|
reverse=True
|
|
)
|
|
return [url for url, info in online_backends], [url for url, info in offline_backends]
|
|
|
|
|
|
def get_a_cluster_backend(model=None):
|
|
"""
|
|
Get a backend from Redis. If there are no online backends, return None.
|
|
If `model` is not supplied, we will pick one ourself.
|
|
"""
|
|
if model:
|
|
# First, determine if there are multiple backends hosting the same model.
|
|
backends_hosting_model = [i.decode('utf-8') for i in redis_running_models.smembers(model)]
|
|
|
|
# If so, create an iterator for those backends
|
|
if len(backends_hosting_model):
|
|
add_backend_cycler(model, backends_hosting_model)
|
|
cycled = redis_cycle(model)
|
|
if len(cycled):
|
|
return cycled[0]
|
|
else:
|
|
# No backend hosting that model
|
|
return None
|
|
else:
|
|
online, _ = get_backends()
|
|
if len(online):
|
|
return online[0]
|
|
|
|
|
|
def get_backends_from_model(model_name: str):
|
|
return [x.decode('utf-8') for x in redis_running_models.smembers(model_name)]
|
|
|
|
|
|
# def verify_context_size(model_name:str):
|
|
# b = get_backends_from_model(model_name)
|
|
# for backend_url in b:
|
|
# backend_info = cluster_config.get_backend(backend_url)
|
|
# backend_info.get()
|
|
|
|
|
|
def get_running_models():
|
|
return redis_running_models.keys()
|
|
|
|
|
|
def purge_backend_from_running_models(backend_url: str):
|
|
keys = redis_running_models.keys()
|
|
pipeline = redis_running_models.pipeline()
|
|
for model in keys:
|
|
pipeline.srem(model, backend_url)
|
|
pipeline.execute()
|
|
|
|
|
|
def is_valid_model(model_name: str):
|
|
return redis_running_models.exists(model_name)
|