diff --git a/llm_server/routes/v1/generate_stats.py b/llm_server/routes/v1/generate_stats.py index ccd24a3..ca24b74 100644 --- a/llm_server/routes/v1/generate_stats.py +++ b/llm_server/routes/v1/generate_stats.py @@ -22,8 +22,8 @@ def calculate_wait_time(gen_time_calc, proompters_in_queue, concurrent_gens, act # This assumes that all active workers will finish at the same time, which is unlikely. # Regardless, this is the most accurate estimate we can get without tracking worker elapsed times. proompters_in_queue_wait_time = gen_time_calc if (proompters_in_queue / concurrent_gens) <= 1 \ - else round_up_base(((proompters_in_queue / concurrent_gens) * gen_time_calc), base=gen_time_calc) + workers_running - return proompters_in_queue_wait_time + else round_up_base(((proompters_in_queue / concurrent_gens) * gen_time_calc), base=gen_time_calc) + return proompters_in_queue_wait_time + workers_running elif proompters_in_queue == 0 and active_gen_workers == 0: # No queue, no workers return 0