This commit is contained in:
parent
c4cc7bbaa0
commit
8df667bc0a
|
@ -12,17 +12,10 @@ def worker():
|
||||||
(request_json_body, client_ip, token, parameters, backend_url), event_id, selected_model = priority_queue.get()
|
(request_json_body, client_ip, token, parameters, backend_url), event_id, selected_model = priority_queue.get()
|
||||||
if not backend_url:
|
if not backend_url:
|
||||||
backend_url = get_a_cluster_backend(selected_model)
|
backend_url = get_a_cluster_backend(selected_model)
|
||||||
|
else:
|
||||||
|
backend_url = cluster_config.validate_backend(backend_url)
|
||||||
backend_info = cluster_config.get_backend(backend_url)
|
backend_info = cluster_config.get_backend(backend_url)
|
||||||
|
|
||||||
# The backend could have died between when the request was
|
|
||||||
# submitted and now, so let's double check it's still online.
|
|
||||||
if not backend_info['online']:
|
|
||||||
old = backend_url
|
|
||||||
backend_url = get_a_cluster_backend()
|
|
||||||
backend_info = cluster_config.get_backend(backend_url)
|
|
||||||
print(f'Backend {old} offline. Request was redirected to {backend_url}')
|
|
||||||
del old # gc
|
|
||||||
|
|
||||||
if not selected_model:
|
if not selected_model:
|
||||||
selected_model = backend_info['model']
|
selected_model = backend_info['model']
|
||||||
|
|
||||||
|
@ -67,7 +60,7 @@ def need_to_wait(backend_url: str):
|
||||||
active_workers = redis.get(f'active_gen_workers:{backend_url}', 0, dtype=int)
|
active_workers = redis.get(f'active_gen_workers:{backend_url}', 0, dtype=int)
|
||||||
concurrent_gens = cluster_config.get_backend(backend_url).get('concurrent_gens', 1)
|
concurrent_gens = cluster_config.get_backend(backend_url).get('concurrent_gens', 1)
|
||||||
s = time.time()
|
s = time.time()
|
||||||
print(active_workers >= concurrent_gens, active_workers, concurrent_gens)
|
print(active_workers)
|
||||||
while active_workers >= concurrent_gens:
|
while active_workers >= concurrent_gens:
|
||||||
time.sleep(0.01)
|
time.sleep(0.01)
|
||||||
e = time.time()
|
e = time.time()
|
||||||
|
|
|
@ -30,6 +30,7 @@ from llm_server.routes.v1 import bp
|
||||||
from llm_server.routes.v1.generate_stats import generate_stats
|
from llm_server.routes.v1.generate_stats import generate_stats
|
||||||
from llm_server.sock import init_socketio
|
from llm_server.sock import init_socketio
|
||||||
|
|
||||||
|
# TODO: queue item timeout
|
||||||
# TODO: return an `error: True`, error code, and error message rather than just a formatted message
|
# TODO: return an `error: True`, error code, and error message rather than just a formatted message
|
||||||
# TODO: what happens when all backends are offline? What about the "online" key in the stats page?
|
# TODO: what happens when all backends are offline? What about the "online" key in the stats page?
|
||||||
# TODO: redis SCAN vs KEYS??
|
# TODO: redis SCAN vs KEYS??
|
||||||
|
|
Reference in New Issue