diff --git a/llm_server/cluster/cluster_config.py b/llm_server/cluster/cluster_config.py index 453fa33..891dfc1 100644 --- a/llm_server/cluster/cluster_config.py +++ b/llm_server/cluster/cluster_config.py @@ -1,5 +1,6 @@ import hashlib import pickle +import traceback from llm_server import opts from llm_server.cluster.redis_cycle import add_backend_cycler, redis_cycle @@ -75,24 +76,28 @@ def get_backends(): priority = b['priority'] result[k] = {'status': status, 'priority': priority} - if not opts.prioritize_by_size: - online_backends = sorted( - ((url, info) for url, info in backends.items() if info['online']), + try: + if not opts.prioritize_by_size: + online_backends = sorted( + ((url, info) for url, info in backends.items() if info['online']), + key=lambda kv: -kv[1]['priority'], + reverse=True + ) + else: + online_backends = sorted( + ((url, info) for url, info in backends.items() if info['online']), + key=lambda kv: estimate_model_size(kv[1]['model_config']), + reverse=True + ) + offline_backends = sorted( + ((url, info) for url, info in backends.items() if not info['online']), key=lambda kv: -kv[1]['priority'], reverse=True ) - else: - online_backends = sorted( - ((url, info) for url, info in backends.items() if info['online']), - key=lambda kv: estimate_model_size(kv[1]['model_config']), - reverse=True - ) - offline_backends = sorted( - ((url, info) for url, info in backends.items() if not info['online']), - key=lambda kv: -kv[1]['priority'], - reverse=True - ) - return [url for url, info in online_backends], [url for url, info in offline_backends] + return [url for url, info in online_backends], [url for url, info in offline_backends] + except KeyError: + traceback.print_exc() + print(backends) def get_a_cluster_backend(model=None):