if there's less than num concurrent wait time is 0
This commit is contained in:
parent
41e622d19c
commit
94e845cd1a
|
@ -13,7 +13,9 @@ from llm_server.routes.stats import calculate_avg_gen_time, get_active_gen_worke
|
||||||
|
|
||||||
def calculate_wait_time(gen_time_calc, proompters_in_queue, concurrent_gens, active_gen_workers):
|
def calculate_wait_time(gen_time_calc, proompters_in_queue, concurrent_gens, active_gen_workers):
|
||||||
workers_running = gen_time_calc if active_gen_workers > 0 else 0
|
workers_running = gen_time_calc if active_gen_workers > 0 else 0
|
||||||
if proompters_in_queue > 0:
|
if proompters_in_queue < concurrent_gens:
|
||||||
|
return 0
|
||||||
|
elif proompters_in_queue >= concurrent_gens:
|
||||||
# Calculate how long it will take to complete the currently running gens and the queued requests.
|
# Calculate how long it will take to complete the currently running gens and the queued requests.
|
||||||
# If the proompters in the queue are equal to the number of workers, just use the calculated generation time.
|
# If the proompters in the queue are equal to the number of workers, just use the calculated generation time.
|
||||||
# Otherwise, use how many requests we can process concurrently times the calculated generation time. Then, round
|
# Otherwise, use how many requests we can process concurrently times the calculated generation time. Then, round
|
||||||
|
|
Reference in New Issue