handle requests to offline backends
This commit is contained in:
parent
b0089859d7
commit
4f226ae38e
|
@ -1,6 +1,7 @@
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
from llm_server.cluster.backend import get_a_cluster_backend
|
||||||
from llm_server.cluster.cluster_config import cluster_config
|
from llm_server.cluster.cluster_config import cluster_config
|
||||||
from llm_server.custom_redis import redis
|
from llm_server.custom_redis import redis
|
||||||
from llm_server.llm.generator import generator
|
from llm_server.llm.generator import generator
|
||||||
|
@ -10,8 +11,17 @@ from llm_server.routes.queue import DataEvent, decr_active_workers, decrement_ip
|
||||||
def worker():
|
def worker():
|
||||||
while True:
|
while True:
|
||||||
(request_json_body, client_ip, token, parameters, backend_url), event_id, selected_model = priority_queue.get()
|
(request_json_body, client_ip, token, parameters, backend_url), event_id, selected_model = priority_queue.get()
|
||||||
|
backend_info = cluster_config.get_backend(backend_url)
|
||||||
|
|
||||||
|
if not backend_info['online']:
|
||||||
|
old = backend_url
|
||||||
|
backend_url = get_a_cluster_backend()
|
||||||
|
backend_info = cluster_config.get_backend(backend_url)
|
||||||
|
print(f'Backend {old} offline. Request was redirected to {backend_url}')
|
||||||
|
del old
|
||||||
|
|
||||||
if not selected_model:
|
if not selected_model:
|
||||||
selected_model = cluster_config.get_backend(backend_url)['model']
|
selected_model = backend_info['model']
|
||||||
|
|
||||||
# This wait time is "invisible", meaning the worker may as
|
# This wait time is "invisible", meaning the worker may as
|
||||||
# well be still waiting to get an item from the queue.
|
# well be still waiting to get an item from the queue.
|
||||||
|
|
Reference in New Issue