From 94e845cd1a7c1102a6bee33d28dcd3d324252105 Mon Sep 17 00:00:00 2001 From: Cyberes Date: Sat, 23 Sep 2023 21:09:21 -0600 Subject: [PATCH] if there's less than num concurrent wait time is 0 --- llm_server/routes/v1/generate_stats.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llm_server/routes/v1/generate_stats.py b/llm_server/routes/v1/generate_stats.py index 94d663d..17b8bd4 100644 --- a/llm_server/routes/v1/generate_stats.py +++ b/llm_server/routes/v1/generate_stats.py @@ -13,7 +13,9 @@ from llm_server.routes.stats import calculate_avg_gen_time, get_active_gen_worke def calculate_wait_time(gen_time_calc, proompters_in_queue, concurrent_gens, active_gen_workers): workers_running = gen_time_calc if active_gen_workers > 0 else 0 - if proompters_in_queue > 0: + if proompters_in_queue < concurrent_gens: + return 0 + elif proompters_in_queue >= concurrent_gens: # Calculate how long it will take to complete the currently running gens and the queued requests. # If the proompters in the queue are equal to the number of workers, just use the calculated generation time. # Otherwise, use how many requests we can process concurrently times the calculated generation time. Then, round