import time from datetime import datetime from flask import jsonify, request from llm_server import opts from llm_server.routes.v1.generate import concurrent_semaphore from . import bp from .. import stats from ..stats import SemaphoreCheckerThread from ...llm.info import get_running_model @bp.route('/stats', methods=['GET']) # @cache.cached(timeout=5, query_string=True) # @cache_control(5) def get_stats(): model_list = get_running_model() # will return False when the fetch fails if isinstance(model_list, bool): online = False else: online = True return jsonify({ 'stats': { 'proompters_now': opts.concurrent_gens - concurrent_semaphore._value, 'proompters_1_min': SemaphoreCheckerThread.proompters_1_min, 'total_proompts': stats.proompts.value, 'uptime': int((datetime.now() - stats.start_time).total_seconds()), }, 'online': online, 'mode': opts.mode, 'model': get_running_model(), 'endpoints': { 'blocking': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}', }, 'timestamp': int(time.time()), 'openaiKeys': '∞', 'anthropicKeys': '∞', 'config': { 'gatekeeper': 'none' if opts.auth_required is False else 'token', 'context_size': opts.context_size, } }), 200