local-llm-server/llm_server/routes/v1/info.py

import time

from flask import jsonify

from . import bp
from ..helpers.http import cache_control
from ...llm.oobabooga.info import get_running_model
from ..cache import cache


# cache = Cache(bp, config={'CACHE_TYPE': 'simple'})


# @bp.route('/info', methods=['GET'])
# # @cache.cached(timeout=3600, query_string=True)
# def get_info():
#     # requests.get()
#     return 'yes'


@bp.route('/model', methods=['GET'])
@cache.cached(timeout=60, query_string=True)
@cache_control(60)
def get_model():
    model = get_running_model()
    if not model:
        return jsonify({
            'code': 500,
            'error': 'failed to reach backend'
        }), 500
    else:
        return jsonify({
            'result': model,
            'timestamp': int(time.time())
        }), 200

# @openai_bp.route('/models', methods=['GET'])
# # @cache.cached(timeout=3600, query_string=True)
# def get_openai_models():
#     model = get_running_model()
#     return {
#         "object": "list",
#         "data": [{
#             "id": model,
#             "object": "model",
#             "created": stats.start_time,
#             "owned_by": "openai",
#             "permission": [{
#                 "id": f"modelperm-{model}",
#                 "object": "model_permission",
#                 "created": stats.start_time,
#                 "organization": "*",
#                 "group": None,
#                 "is_blocking": False
#             }],
#             "root": model,
#             "parent": None
#         }]
#     }