local-llm-server/llm_server/routes/openai/models.py

67 lines
2.2 KiB
Python

from flask import jsonify, request
from . import openai_bp
from ..cache import ONE_MONTH_SECONDS, cache, redis
from ..stats import server_start_time
from ... import opts
from ...llm.info import get_running_model
import openai
@openai_bp.route('/models', methods=['GET'])
@cache.cached(timeout=60, query_string=True)
def openai_list_models():
cache_key = 'openai_model_cache::' + request.url
cached_response = cache.get(cache_key)
if cached_response:
return cached_response
model, error = get_running_model()
if not model:
response = jsonify({
'code': 502,
'msg': 'failed to reach backend',
'type': error.__class__.__name__
}), 500 # return 500 so Cloudflare doesn't intercept us
else:
oai = fetch_openai_models()
r = {
"object": "list",
"data": [
{
"id": opts.running_model,
"object": "model",
"created": int(server_start_time.timestamp()),
"owned_by": opts.llm_middleware_name,
"permission": [
{
"id": opts.running_model,
"object": "model_permission",
"created": int(server_start_time.timestamp()),
"allow_create_engine": False,
"allow_sampling": False,
"allow_logprobs": False,
"allow_search_indices": False,
"allow_view": True,
"allow_fine_tuning": False,
"organization": "*",
"group": None,
"is_blocking": False
}
],
"root": None,
"parent": None
}
]
}
response = jsonify({**r, **oai}), 200
cache.set(cache_key, response, timeout=60)
return response
@cache.memoize(timeout=ONE_MONTH_SECONDS)
def fetch_openai_models():
return openai.Model.list()