local-llm-server/llm_server/routes/openai/models.py

from flask import jsonify, request

from . import openai_bp
from ..cache import ONE_MONTH_SECONDS, cache, redis
from ..stats import server_start_time
from ... import opts
from ...llm.info import get_running_model
import openai


@openai_bp.route('/models', methods=['GET'])
@cache.cached(timeout=60, query_string=True)
def openai_list_models():
    cache_key = 'openai_model_cache::' + request.url
    cached_response = cache.get(cache_key)

    if cached_response:
        return cached_response

    model, error = get_running_model()
    if not model:
        response = jsonify({
            'code': 502,
            'msg': 'failed to reach backend',
            'type': error.__class__.__name__
        }), 500  # return 500 so Cloudflare doesn't intercept us
    else:
        oai = fetch_openai_models()
        r = {
            "object": "list",
            "data": [
                {
                    "id": opts.running_model,
                    "object": "model",
                    "created": int(server_start_time.timestamp()),
                    "owned_by": opts.llm_middleware_name,
                    "permission": [
                        {
                            "id": opts.running_model,
                            "object": "model_permission",
                            "created": int(server_start_time.timestamp()),
                            "allow_create_engine": False,
                            "allow_sampling": False,
                            "allow_logprobs": False,
                            "allow_search_indices": False,
                            "allow_view": True,
                            "allow_fine_tuning": False,
                            "organization": "*",
                            "group": None,
                            "is_blocking": False
                        }
                    ],
                    "root": None,
                    "parent": None
                }
            ]
        }
        response = jsonify({**r, **oai}), 200
        cache.set(cache_key, response, timeout=60)

    return response


@cache.memoize(timeout=ONE_MONTH_SECONDS)
def fetch_openai_models():
    return openai.Model.list()