2023-08-21 21:28:52 -06:00
|
|
|
import time
|
|
|
|
from datetime import datetime
|
|
|
|
|
2023-08-22 16:50:49 -06:00
|
|
|
from flask import jsonify, request
|
2023-08-21 21:28:52 -06:00
|
|
|
|
|
|
|
from llm_server import opts
|
|
|
|
from . import bp
|
|
|
|
from .. import stats
|
2023-08-23 21:33:52 -06:00
|
|
|
from ..queue import priority_queue
|
2023-08-23 22:01:06 -06:00
|
|
|
from ..stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers
|
2023-08-22 19:58:31 -06:00
|
|
|
from ...llm.info import get_running_model
|
2023-08-21 21:28:52 -06:00
|
|
|
|
|
|
|
|
|
|
|
@bp.route('/stats', methods=['GET'])
|
2023-08-23 22:01:06 -06:00
|
|
|
# @cache.cached(timeout=5, query_string=True)
|
2023-08-21 21:28:52 -06:00
|
|
|
def get_stats():
|
2023-08-23 16:08:52 -06:00
|
|
|
model_list, error = get_running_model() # will return False when the fetch fails
|
2023-08-22 00:26:46 -06:00
|
|
|
if isinstance(model_list, bool):
|
|
|
|
online = False
|
|
|
|
else:
|
|
|
|
online = True
|
|
|
|
|
2023-08-23 21:33:52 -06:00
|
|
|
# t = elapsed_times.copy() # copy since we do multiple operations and don't want it to change
|
|
|
|
# if len(t) == 0:
|
|
|
|
# estimated_wait = 0
|
|
|
|
# else:
|
|
|
|
# waits = [elapsed for end, elapsed in t]
|
|
|
|
# estimated_wait = int(sum(waits) / len(waits))
|
|
|
|
|
|
|
|
average_generation_time = int(calculate_avg_gen_time())
|
2023-08-23 22:01:06 -06:00
|
|
|
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
|
2023-08-23 21:33:52 -06:00
|
|
|
|
2023-08-21 21:28:52 -06:00
|
|
|
return jsonify({
|
2023-08-22 20:42:38 -06:00
|
|
|
'stats': {
|
2023-08-23 22:01:06 -06:00
|
|
|
'prompts_in_queue': proompters_in_queue,
|
2023-08-22 23:01:09 -06:00
|
|
|
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
|
2023-08-23 22:08:10 -06:00
|
|
|
'total_proompts': stats.get_total_proompts() if opts.show_num_prompts else None,
|
|
|
|
'uptime': int((datetime.now() - stats.server_start_time).total_seconds()) if opts.show_uptime else None,
|
2023-08-23 21:33:52 -06:00
|
|
|
'average_generation_elapsed_sec': average_generation_time,
|
2023-08-22 20:42:38 -06:00
|
|
|
},
|
2023-08-22 00:26:46 -06:00
|
|
|
'online': online,
|
2023-08-22 16:41:55 -06:00
|
|
|
'mode': opts.mode,
|
2023-08-23 16:08:52 -06:00
|
|
|
'model': model_list,
|
2023-08-22 20:42:38 -06:00
|
|
|
'endpoints': {
|
|
|
|
'blocking': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}',
|
|
|
|
},
|
2023-08-23 22:01:06 -06:00
|
|
|
'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
|
2023-08-22 20:42:38 -06:00
|
|
|
'timestamp': int(time.time()),
|
|
|
|
'openaiKeys': '∞',
|
|
|
|
'anthropicKeys': '∞',
|
|
|
|
'config': {
|
|
|
|
'gatekeeper': 'none' if opts.auth_required is False else 'token',
|
|
|
|
'context_size': opts.context_size,
|
|
|
|
}
|
2023-08-21 21:28:52 -06:00
|
|
|
}), 200
|