local-llm-server/llm_server/routes/v1/proxy.py

import time
from datetime import datetime

from flask import jsonify

from llm_server import opts
from llm_server.routes.v1.generate import concurrent_semaphore
from . import bp
from .. import stats
from ..cache import cache
from ..helpers.http import cache_control
from ..stats import proompters_1_min
from ...llm.oobabooga.info import get_running_model


@bp.route('/stats', methods=['GET'])
@cache.cached(timeout=60, query_string=True)
@cache_control(60)
def get_stats():
    model_list = get_running_model()
    if isinstance(model_list, bool):
        # get_running_model() will return False when the fetch fails
        online = False
    else:
        online = True

    return jsonify({
        'proompters_now': opts.concurrent_gens - concurrent_semaphore._value,
        'proompters_1_min': proompters_1_min,
        'total_proompts': stats.proompts.value,
        'uptime': int((datetime.now() - stats.start_time).total_seconds()),
        'online': online,
        'timestamp': int(time.time())
    }), 200