handle requests to offline backends

This commit is contained in:
Cyberes 2023-10-02 11:11:48 -06:00
parent b0089859d7
commit 4f226ae38e
1 changed files with 11 additions and 1 deletions

View File

@ -1,6 +1,7 @@
import threading
import time
from llm_server.cluster.backend import get_a_cluster_backend
from llm_server.cluster.cluster_config import cluster_config
from llm_server.custom_redis import redis
from llm_server.llm.generator import generator
@ -10,8 +11,17 @@ from llm_server.routes.queue import DataEvent, decr_active_workers, decrement_ip
def worker():
while True:
(request_json_body, client_ip, token, parameters, backend_url), event_id, selected_model = priority_queue.get()
backend_info = cluster_config.get_backend(backend_url)
if not backend_info['online']:
old = backend_url
backend_url = get_a_cluster_backend()
backend_info = cluster_config.get_backend(backend_url)
print(f'Backend {old} offline. Request was redirected to {backend_url}')
del old
if not selected_model:
selected_model = cluster_config.get_backend(backend_url)['model']
selected_model = backend_info['model']
# This wait time is "invisible", meaning the worker may as
# well be still waiting to get an item from the queue.