diff --git a/llm_server/workers/inferencer.py b/llm_server/workers/inferencer.py index f1a1a4f..4ec85b5 100644 --- a/llm_server/workers/inferencer.py +++ b/llm_server/workers/inferencer.py @@ -65,8 +65,8 @@ def need_to_wait(backend_url: str): active_workers = redis.get(f'active_gen_workers:{backend_url}', 0, dtype=int) concurrent_gens = cluster_config.get_backend(backend_url).get('concurrent_gens', 1) s = time.time() + print(active_workers >= concurrent_gens, active_workers, concurrent_gens) while active_workers >= concurrent_gens: - print('worker waiting') time.sleep(0.01) e = time.time() if e - s > 0.5: