This repository has been archived on 2024-10-27. You can view files and clone it, but cannot push or open issues or pull requests.
local-llm-server/llm_server/routes/v1/generate_stats.py

85 lines
3.3 KiB
Python
Raw Normal View History

2023-08-23 23:11:12 -06:00
import time
2023-08-24 12:19:59 -06:00
from datetime import datetime
2023-08-23 23:11:12 -06:00
from llm_server import opts
2023-09-29 00:09:44 -06:00
from llm_server.cluster.cluster_config import cluster_config
2023-09-30 19:41:50 -06:00
from llm_server.cluster.model_choices import get_model_choices
2023-09-29 00:09:44 -06:00
from llm_server.custom_redis import redis
from llm_server.database.database import get_distinct_ips_24h, sum_column
2023-09-30 19:41:50 -06:00
from llm_server.helpers import deep_sort
from llm_server.routes.stats import get_total_proompts, server_start_time
2023-09-17 18:33:57 -06:00
def generate_stats(regen: bool = False):
if not regen:
2023-09-30 19:41:50 -06:00
c = redis.getp('proxy_stats')
if c:
return c
2023-10-03 20:42:53 -06:00
model_choices, default_model = get_model_choices(regen=True)
if not model_choices or not default_model:
return 'Please wait for Redis to be populated...'
2023-09-29 00:09:44 -06:00
base_client_api = redis.get('base_client_api', dtype=str)
proompters_5_min = len(redis.zrangebyscore('recent_prompters', time.time() - 5 * 60, '+inf'))
2023-08-24 18:59:52 -06:00
output = {
2023-10-03 20:42:53 -06:00
'models': {
'choices': model_choices,
'default': default_model,
2023-09-29 00:09:44 -06:00
},
2023-08-23 23:11:12 -06:00
'stats': {
'proompters': {
2023-09-20 21:21:22 -06:00
'5_min': proompters_5_min,
'24_hrs': get_distinct_ips_24h(),
},
2023-08-27 22:24:44 -06:00
'proompts_total': get_total_proompts() if opts.show_num_prompts else None,
2023-08-23 23:11:12 -06:00
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
# 'estimated_avg_tps': estimated_avg_tps,
2023-08-24 20:43:11 -06:00
'tokens_generated': sum_column('prompts', 'response_tokens') if opts.show_total_output_tokens else None,
2023-09-29 00:09:44 -06:00
'num_backends': len(cluster_config.all()) if opts.show_backends else None,
2023-08-23 23:11:12 -06:00
},
'endpoints': {
2023-09-17 18:55:36 -06:00
'blocking': f'https://{base_client_api}',
'streaming': f'wss://{base_client_api}/v1/stream' if opts.enable_streaming else None,
2023-08-23 23:11:12 -06:00
},
'timestamp': int(time.time()),
'config': {
'gatekeeper': 'none' if opts.auth_required is False else 'token',
2023-09-11 20:47:19 -06:00
'simultaneous_requests_per_ip': opts.simultaneous_requests_per_ip,
'api_mode': opts.frontend_api_mode
2023-08-24 18:59:52 -06:00
},
'keys': {
'openaiKeys': '',
'anthropicKeys': '',
},
'backend_info': redis.get_dict('backend_info') if opts.show_backend_info else None,
2023-08-23 23:11:12 -06:00
}
2023-09-29 00:09:44 -06:00
2023-09-30 19:41:50 -06:00
# TODO: have get_model_choices() return all the info so we don't have to loop over the backends ourself
2023-09-29 00:09:44 -06:00
if opts.show_backends:
for backend_url, v in cluster_config.all().items():
backend_info = cluster_config.get_backend(backend_url)
if not backend_info['online']:
continue
2023-09-30 19:41:50 -06:00
backend_uptime = int((datetime.now() - datetime.fromtimestamp(backend_info['startup_time'])).total_seconds()) if opts.show_uptime else None
2023-09-29 00:09:44 -06:00
output['backend_info'][backend_info['hash']] = {
'uptime': backend_uptime,
2023-09-30 19:41:50 -06:00
'max_tokens': backend_info['model_config']['max_position_embeddings'],
'model': backend_info['model'],
2023-09-29 00:09:44 -06:00
'mode': backend_info['mode'],
2023-09-30 19:41:50 -06:00
'nvidia': backend_info['nvidia'],
2023-10-01 14:15:01 -06:00
'priority': backend_info['priority'],
2023-09-29 00:09:44 -06:00
}
else:
output['backend_info'] = {}
result = deep_sort(output)
# It may take a bit to get the base client API, so don't cache until then.
if base_client_api:
2023-09-30 19:41:50 -06:00
redis.setp('proxy_stats', result)
return result