rename average_tps to estimated_avg_tps
This commit is contained in:
parent
c45e68a8c8
commit
e79b206e1a
|
@ -30,12 +30,12 @@ def generate_stats():
|
||||||
|
|
||||||
active_gen_workers = get_active_gen_workers()
|
active_gen_workers = get_active_gen_workers()
|
||||||
proompters_in_queue = len(priority_queue)
|
proompters_in_queue = len(priority_queue)
|
||||||
average_tps = float(redis.get('average_tps'))
|
estimated_avg_tps = float(redis.get('estimated_avg_tps'))
|
||||||
|
|
||||||
if opts.average_generation_time_mode == 'database':
|
if opts.average_generation_time_mode == 'database':
|
||||||
average_generation_time = float(redis.get('average_generation_elapsed_sec'))
|
average_generation_time = float(redis.get('average_generation_elapsed_sec'))
|
||||||
# average_output_tokens = float(redis.get('average_output_tokens'))
|
# average_output_tokens = float(redis.get('average_output_tokens'))
|
||||||
# average_generation_time_from_tps = (average_output_tokens / average_tps)
|
# average_generation_time_from_tps = (average_output_tokens / estimated_avg_tps)
|
||||||
|
|
||||||
# What to use in our math that calculates the wait time.
|
# What to use in our math that calculates the wait time.
|
||||||
# We could use the average TPS but we don't know the exact TPS value, only
|
# We could use the average TPS but we don't know the exact TPS value, only
|
||||||
|
@ -46,7 +46,7 @@ def generate_stats():
|
||||||
(gen_time_calc * proompters_in_queue) / opts.concurrent_gens # Calculate wait time for items in queue
|
(gen_time_calc * proompters_in_queue) / opts.concurrent_gens # Calculate wait time for items in queue
|
||||||
) + (
|
) + (
|
||||||
active_gen_workers * gen_time_calc # Calculate wait time for in-process items
|
active_gen_workers * gen_time_calc # Calculate wait time for in-process items
|
||||||
) if average_tps > 0 else 0
|
) if estimated_avg_tps > 0 else 0
|
||||||
elif opts.average_generation_time_mode == 'minute':
|
elif opts.average_generation_time_mode == 'minute':
|
||||||
average_generation_time = calculate_avg_gen_time()
|
average_generation_time = calculate_avg_gen_time()
|
||||||
gen_time_calc = average_generation_time
|
gen_time_calc = average_generation_time
|
||||||
|
@ -75,7 +75,7 @@ def generate_stats():
|
||||||
'proompts_total': get_total_proompts() if opts.show_num_prompts else None,
|
'proompts_total': get_total_proompts() if opts.show_num_prompts else None,
|
||||||
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
|
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
|
||||||
'average_generation_elapsed_sec': int(gen_time_calc),
|
'average_generation_elapsed_sec': int(gen_time_calc),
|
||||||
'average_tps': average_tps,
|
'estimated_avg_tps': estimated_avg_tps,
|
||||||
'tokens_generated': sum_column('prompts', 'response_tokens') if opts.show_total_output_tokens else None,
|
'tokens_generated': sum_column('prompts', 'response_tokens') if opts.show_total_output_tokens else None,
|
||||||
'nvidia': netdata_stats
|
'nvidia': netdata_stats
|
||||||
},
|
},
|
||||||
|
|
|
@ -17,7 +17,7 @@ class MainBackgroundThread(Thread):
|
||||||
Thread.__init__(self)
|
Thread.__init__(self)
|
||||||
self.daemon = True
|
self.daemon = True
|
||||||
redis.set('average_generation_elapsed_sec', 0)
|
redis.set('average_generation_elapsed_sec', 0)
|
||||||
redis.set('average_tps', 0)
|
redis.set('estimated_avg_tps', 0)
|
||||||
redis.set('average_output_tokens', 0)
|
redis.set('average_output_tokens', 0)
|
||||||
redis.set('backend_online', 0)
|
redis.set('backend_online', 0)
|
||||||
redis.set_dict('backend_info', {})
|
redis.set_dict('backend_info', {})
|
||||||
|
@ -66,6 +66,6 @@ class MainBackgroundThread(Thread):
|
||||||
# print(f'Weighted: {average_output_tokens}, overall: {overall}')
|
# print(f'Weighted: {average_output_tokens}, overall: {overall}')
|
||||||
|
|
||||||
# Avoid division by zero
|
# Avoid division by zero
|
||||||
average_tps = round(average_output_tokens / average_generation_elapsed_sec, 2) if average_generation_elapsed_sec > 0 else 0
|
estimated_avg_tps = round(average_output_tokens / average_generation_elapsed_sec, 2) if average_generation_elapsed_sec > 0 else 0
|
||||||
redis.set('average_tps', average_tps)
|
redis.set('estimated_avg_tps', estimated_avg_tps)
|
||||||
time.sleep(60)
|
time.sleep(60)
|
||||||
|
|
Reference in New Issue