import time from datetime import datetime from llm_server import opts from llm_server.cluster.backend import get_a_cluster_backend from llm_server.cluster.cluster_config import cluster_config from llm_server.cluster.model_choices import get_model_choices from llm_server.custom_redis import redis from llm_server.database.database import get_distinct_ips_24h, sum_column from llm_server.helpers import deep_sort from llm_server.routes.stats import get_total_proompts, server_start_time def generate_stats(regen: bool = False): if not regen: c = redis.getp('proxy_stats') if c: return c default_backend_url = get_a_cluster_backend() default_backend_info = cluster_config.get_backend(default_backend_url) if not default_backend_info.get('mode'): return base_client_api = redis.get('base_client_api', dtype=str) proompters_5_min = len(redis.zrangebyscore('recent_prompters', time.time() - 5 * 60, '+inf')) output = { 'default': { 'model': default_backend_info['model'], 'backend': default_backend_url, }, 'stats': { 'proompters': { '5_min': proompters_5_min, '24_hrs': get_distinct_ips_24h(), }, 'proompts_total': get_total_proompts() if opts.show_num_prompts else None, 'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None, # 'estimated_avg_tps': estimated_avg_tps, 'tokens_generated': sum_column('prompts', 'response_tokens') if opts.show_total_output_tokens else None, 'num_backends': len(cluster_config.all()) if opts.show_backends else None, }, 'endpoints': { 'blocking': f'https://{base_client_api}', 'streaming': f'wss://{base_client_api}/v1/stream' if opts.enable_streaming else None, }, 'timestamp': int(time.time()), 'config': { 'gatekeeper': 'none' if opts.auth_required is False else 'token', 'concurrent': opts.concurrent_gens, 'simultaneous_requests_per_ip': opts.simultaneous_requests_per_ip, }, 'keys': { 'openaiKeys': '∞', 'anthropicKeys': '∞', }, 'backend_info': redis.get_dict('backend_info') if opts.show_backend_info else None, } # TODO: have get_model_choices() return all the info so we don't have to loop over the backends ourself if opts.show_backends: for backend_url, v in cluster_config.all().items(): backend_info = cluster_config.get_backend(backend_url) if not backend_info['online']: continue backend_uptime = int((datetime.now() - datetime.fromtimestamp(backend_info['startup_time'])).total_seconds()) if opts.show_uptime else None output['backend_info'][backend_info['hash']] = { 'uptime': backend_uptime, 'max_tokens': backend_info['model_config']['max_position_embeddings'], 'model': backend_info['model'], 'mode': backend_info['mode'], 'nvidia': backend_info['nvidia'], } else: output['backend_info'] = {} output['default'] = get_model_choices(regen=True)[1] result = deep_sort(output) # It may take a bit to get the base client API, so don't cache until then. if base_client_api: redis.setp('proxy_stats', result) return result