adjust stats
This commit is contained in:
parent
91ba2fad1b
commit
1151bb5475
|
@ -54,7 +54,7 @@ def get_model_choices(regen: bool = False):
|
||||||
'estimated_wait': estimated_wait_sec,
|
'estimated_wait': estimated_wait_sec,
|
||||||
'queued': proompters_in_queue,
|
'queued': proompters_in_queue,
|
||||||
'processing': active_gen_workers,
|
'processing': active_gen_workers,
|
||||||
'avg_generation_time': average_generation_elapsed_sec
|
'avg_generation_time': average_generation_elapsed_sec,
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(context_size):
|
if len(context_size):
|
||||||
|
@ -63,25 +63,28 @@ def get_model_choices(regen: bool = False):
|
||||||
model_choices = dict(sorted(model_choices.items()))
|
model_choices = dict(sorted(model_choices.items()))
|
||||||
|
|
||||||
default_backend = get_a_cluster_backend()
|
default_backend = get_a_cluster_backend()
|
||||||
default_backend_info = cluster_config.get_backend(default_backend)
|
default_backend_dict = {}
|
||||||
default_context_size = default_backend_info['model_config']['max_position_embeddings']
|
if default_backend:
|
||||||
default_average_generation_elapsed_sec = default_backend_info.get('average_generation_elapsed_sec')
|
default_backend_info = cluster_config.get_backend(default_backend)
|
||||||
default_active_gen_workers = redis.get(f'active_gen_workers:{default_backend}', dtype=int, default=0)
|
default_context_size = default_backend_info['model_config']['max_position_embeddings']
|
||||||
default_proompters_in_queue = priority_queue.len(default_backend_info['model'])
|
default_average_generation_elapsed_sec = default_backend_info.get('average_generation_elapsed_sec')
|
||||||
default_estimated_wait_sec = calculate_wait_time(default_average_generation_elapsed_sec, default_proompters_in_queue, default_backend_info['concurrent_gens'], default_active_gen_workers)
|
default_active_gen_workers = redis.get(f'active_gen_workers:{default_backend}', dtype=int, default=0)
|
||||||
|
default_proompters_in_queue = priority_queue.len(default_backend_info['model'])
|
||||||
|
default_estimated_wait_sec = calculate_wait_time(default_average_generation_elapsed_sec, default_proompters_in_queue, default_backend_info['concurrent_gens'], default_active_gen_workers)
|
||||||
|
|
||||||
default_backend_dict = {
|
default_backend_dict = {
|
||||||
'client_api': f'https://{base_client_api}/v2',
|
'client_api': f'https://{base_client_api}/v2',
|
||||||
'ws_client_api': f'wss://{base_client_api}/v2' if opts.enable_streaming else None,
|
'ws_client_api': f'wss://{base_client_api}/v2' if opts.enable_streaming else None,
|
||||||
'openai_client_api': f'https://{base_client_api}/openai/v2' if opts.enable_openi_compatible_backend else 'disabled',
|
'openai_client_api': f'https://{base_client_api}/openai/v2' if opts.enable_openi_compatible_backend else 'disabled',
|
||||||
'estimated_wait': default_estimated_wait_sec,
|
'estimated_wait': default_estimated_wait_sec,
|
||||||
'queued': default_proompters_in_queue,
|
'queued': default_proompters_in_queue,
|
||||||
'processing': default_active_gen_workers,
|
'processing': default_active_gen_workers,
|
||||||
'context_size': default_context_size,
|
'context_size': default_context_size,
|
||||||
'hash': default_backend_info['hash'],
|
'hash': default_backend_info['hash'],
|
||||||
'model': default_backend_info['model'],
|
'model': default_backend_info['model'],
|
||||||
'avg_generation_time': default_average_generation_elapsed_sec
|
'avg_generation_time': default_average_generation_elapsed_sec,
|
||||||
}
|
'online': True
|
||||||
|
}
|
||||||
|
|
||||||
redis.setp('model_choices', (model_choices, default_backend_dict))
|
redis.setp('model_choices', (model_choices, default_backend_dict))
|
||||||
|
|
||||||
|
|
|
@ -39,6 +39,10 @@ class RequestHandler:
|
||||||
self.token_priority, self.token_simultaneous_ip = self.get_token_ratelimit()
|
self.token_priority, self.token_simultaneous_ip = self.get_token_ratelimit()
|
||||||
self.backend_url = get_a_cluster_backend(selected_model)
|
self.backend_url = get_a_cluster_backend(selected_model)
|
||||||
self.cluster_backend_info = cluster_config.get_backend(self.backend_url)
|
self.cluster_backend_info = cluster_config.get_backend(self.backend_url)
|
||||||
|
|
||||||
|
if not self.cluster_backend_info.get('mode'):
|
||||||
|
print(self.backend_url, self.cluster_backend_info)
|
||||||
|
|
||||||
self.backend = get_backend_handler(self.cluster_backend_info['mode'], self.backend_url)
|
self.backend = get_backend_handler(self.cluster_backend_info['mode'], self.backend_url)
|
||||||
self.parameters = None
|
self.parameters = None
|
||||||
self.used = False
|
self.used = False
|
||||||
|
|
Reference in New Issue