handle requests to offline backends
This commit is contained in:
parent
b0089859d7
commit
4f226ae38e
|
@ -1,6 +1,7 @@
|
|||
import threading
|
||||
import time
|
||||
|
||||
from llm_server.cluster.backend import get_a_cluster_backend
|
||||
from llm_server.cluster.cluster_config import cluster_config
|
||||
from llm_server.custom_redis import redis
|
||||
from llm_server.llm.generator import generator
|
||||
|
@ -10,8 +11,17 @@ from llm_server.routes.queue import DataEvent, decr_active_workers, decrement_ip
|
|||
def worker():
|
||||
while True:
|
||||
(request_json_body, client_ip, token, parameters, backend_url), event_id, selected_model = priority_queue.get()
|
||||
backend_info = cluster_config.get_backend(backend_url)
|
||||
|
||||
if not backend_info['online']:
|
||||
old = backend_url
|
||||
backend_url = get_a_cluster_backend()
|
||||
backend_info = cluster_config.get_backend(backend_url)
|
||||
print(f'Backend {old} offline. Request was redirected to {backend_url}')
|
||||
del old
|
||||
|
||||
if not selected_model:
|
||||
selected_model = cluster_config.get_backend(backend_url)['model']
|
||||
selected_model = backend_info['model']
|
||||
|
||||
# This wait time is "invisible", meaning the worker may as
|
||||
# well be still waiting to get an item from the queue.
|
||||
|
|
Reference in New Issue