local-llm-server/llm_server/routes/v1/proxy.py

56 lines
1.8 KiB
Python
Raw Normal View History

2023-08-21 21:28:52 -06:00
import time
from datetime import datetime
2023-08-22 16:50:49 -06:00
from flask import jsonify, request
2023-08-21 21:28:52 -06:00
from llm_server import opts
from . import bp
from .. import stats
2023-08-23 21:34:27 -06:00
from ..cache import cache
from ..queue import priority_queue
from ..stats import SemaphoreCheckerThread, calculate_avg_gen_time
from ...llm.info import get_running_model
2023-08-21 21:28:52 -06:00
@bp.route('/stats', methods=['GET'])
2023-08-22 23:14:56 -06:00
@cache.cached(timeout=5, query_string=True)
2023-08-21 21:28:52 -06:00
def get_stats():
2023-08-23 16:08:52 -06:00
model_list, error = get_running_model() # will return False when the fetch fails
2023-08-22 00:26:46 -06:00
if isinstance(model_list, bool):
online = False
else:
online = True
# t = elapsed_times.copy() # copy since we do multiple operations and don't want it to change
# if len(t) == 0:
# estimated_wait = 0
# else:
# waits = [elapsed for end, elapsed in t]
# estimated_wait = int(sum(waits) / len(waits))
average_generation_time = int(calculate_avg_gen_time())
2023-08-21 21:28:52 -06:00
return jsonify({
2023-08-22 20:42:38 -06:00
'stats': {
'prompts_in_queue': len(priority_queue),
2023-08-22 23:01:09 -06:00
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
2023-08-23 01:14:19 -06:00
'total_proompts': stats.get_count(),
'uptime': int((datetime.now() - stats.server_start_time).total_seconds()),
'average_generation_elapsed_sec': average_generation_time,
2023-08-22 20:42:38 -06:00
},
2023-08-22 00:26:46 -06:00
'online': online,
2023-08-22 16:41:55 -06:00
'mode': opts.mode,
2023-08-23 16:08:52 -06:00
'model': model_list,
2023-08-22 20:42:38 -06:00
'endpoints': {
'blocking': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}',
},
'estimated_wait_sec': int(average_generation_time * len(priority_queue)),
2023-08-22 20:42:38 -06:00
'timestamp': int(time.time()),
'openaiKeys': '',
'anthropicKeys': '',
'config': {
'gatekeeper': 'none' if opts.auth_required is False else 'token',
'context_size': opts.context_size,
}
2023-08-21 21:28:52 -06:00
}), 200