local-llm-server/llm_server/routes/v1/proxy.py

35 lines
1019 B
Python
Raw Normal View History

2023-08-21 21:28:52 -06:00
import time
from datetime import datetime
from flask import jsonify
from llm_server import opts
2023-08-21 23:48:06 -06:00
from llm_server.routes.v1.generate import concurrent_semaphore
2023-08-21 21:28:52 -06:00
from . import bp
from .. import stats
from ..cache import cache
2023-08-21 22:49:44 -06:00
from ..helpers.http import cache_control
2023-08-21 23:48:06 -06:00
from ..stats import proompters_1_min
2023-08-22 00:26:46 -06:00
from ...llm.oobabooga.info import get_running_model
2023-08-21 21:28:52 -06:00
@bp.route('/stats', methods=['GET'])
@cache.cached(timeout=60, query_string=True)
2023-08-21 22:49:44 -06:00
@cache_control(60)
2023-08-21 21:28:52 -06:00
def get_stats():
2023-08-22 00:26:46 -06:00
model_list = get_running_model()
if isinstance(model_list, bool):
# get_running_model() will return False when the fetch fails
online = False
else:
online = True
2023-08-21 21:28:52 -06:00
return jsonify({
2023-08-22 00:26:46 -06:00
'proompters_now': opts.concurrent_gens - concurrent_semaphore._value,
2023-08-21 23:48:06 -06:00
'proompters_1_min': proompters_1_min,
2023-08-21 21:28:52 -06:00
'total_proompts': stats.proompts.value,
'uptime': int((datetime.now() - stats.start_time).total_seconds()),
2023-08-22 00:26:46 -06:00
'online': online,
2023-08-21 21:28:52 -06:00
'timestamp': int(time.time())
}), 200