This commit is contained in:
Cyberes 2023-10-05 19:25:08 -06:00
parent c4cc7bbaa0
commit 8df667bc0a
2 changed files with 4 additions and 10 deletions

View File

@ -12,17 +12,10 @@ def worker():
(request_json_body, client_ip, token, parameters, backend_url), event_id, selected_model = priority_queue.get() (request_json_body, client_ip, token, parameters, backend_url), event_id, selected_model = priority_queue.get()
if not backend_url: if not backend_url:
backend_url = get_a_cluster_backend(selected_model) backend_url = get_a_cluster_backend(selected_model)
else:
backend_url = cluster_config.validate_backend(backend_url)
backend_info = cluster_config.get_backend(backend_url) backend_info = cluster_config.get_backend(backend_url)
# The backend could have died between when the request was
# submitted and now, so let's double check it's still online.
if not backend_info['online']:
old = backend_url
backend_url = get_a_cluster_backend()
backend_info = cluster_config.get_backend(backend_url)
print(f'Backend {old} offline. Request was redirected to {backend_url}')
del old # gc
if not selected_model: if not selected_model:
selected_model = backend_info['model'] selected_model = backend_info['model']
@ -67,7 +60,7 @@ def need_to_wait(backend_url: str):
active_workers = redis.get(f'active_gen_workers:{backend_url}', 0, dtype=int) active_workers = redis.get(f'active_gen_workers:{backend_url}', 0, dtype=int)
concurrent_gens = cluster_config.get_backend(backend_url).get('concurrent_gens', 1) concurrent_gens = cluster_config.get_backend(backend_url).get('concurrent_gens', 1)
s = time.time() s = time.time()
print(active_workers >= concurrent_gens, active_workers, concurrent_gens) print(active_workers)
while active_workers >= concurrent_gens: while active_workers >= concurrent_gens:
time.sleep(0.01) time.sleep(0.01)
e = time.time() e = time.time()

View File

@ -30,6 +30,7 @@ from llm_server.routes.v1 import bp
from llm_server.routes.v1.generate_stats import generate_stats from llm_server.routes.v1.generate_stats import generate_stats
from llm_server.sock import init_socketio from llm_server.sock import init_socketio
# TODO: queue item timeout
# TODO: return an `error: True`, error code, and error message rather than just a formatted message # TODO: return an `error: True`, error code, and error message rather than just a formatted message
# TODO: what happens when all backends are offline? What about the "online" key in the stats page? # TODO: what happens when all backends are offline? What about the "online" key in the stats page?
# TODO: redis SCAN vs KEYS?? # TODO: redis SCAN vs KEYS??