actually wait again
This commit is contained in:
parent
e9f6fdf65e
commit
da20d1807b
|
@ -19,6 +19,8 @@ def worker():
|
||||||
if not selected_model:
|
if not selected_model:
|
||||||
selected_model = backend_info['model']
|
selected_model = backend_info['model']
|
||||||
|
|
||||||
|
need_to_wait(backend_url)
|
||||||
|
|
||||||
increment_ip_count(client_ip, 'processing_ips')
|
increment_ip_count(client_ip, 'processing_ips')
|
||||||
incr_active_workers(selected_model, backend_url)
|
incr_active_workers(selected_model, backend_url)
|
||||||
|
|
||||||
|
@ -56,3 +58,16 @@ def start_workers(num_workers: int):
|
||||||
t.start()
|
t.start()
|
||||||
i += 1
|
i += 1
|
||||||
print(f'Started {i} inference workers.')
|
print(f'Started {i} inference workers.')
|
||||||
|
|
||||||
|
|
||||||
|
def need_to_wait(backend_url: str):
|
||||||
|
# We need to check the number of active workers since the streaming endpoint may be doing something.
|
||||||
|
active_workers = redis.get(f'active_gen_workers:{backend_url}', 0, dtype=int)
|
||||||
|
concurrent_gens = cluster_config.get_backend(backend_url).get('concurrent_gens', 1)
|
||||||
|
s = time.time()
|
||||||
|
print(active_workers)
|
||||||
|
while active_workers >= concurrent_gens:
|
||||||
|
time.sleep(0.01)
|
||||||
|
e = time.time()
|
||||||
|
if e - s > 0.1:
|
||||||
|
print(f'Worker was delayed {e - s} seconds.')
|
||||||
|
|
Reference in New Issue