local-llm-server/llm_server/routes/openai/models.py

72 lines
2.5 KiB
Python

import traceback
import requests
from flask import jsonify
from . import openai_bp
from ..cache import ONE_MONTH_SECONDS, cache
from ..stats import server_start_time
from ... import opts
from ...helpers import jsonify_pretty
from ...llm.info import get_running_model
@openai_bp.route('/models', methods=['GET'])
@cache.cached(timeout=60, query_string=True)
def openai_list_models():
model, error = get_running_model()
if not model:
response = jsonify({
'code': 502,
'msg': 'failed to reach backend',
'type': error.__class__.__name__
}), 500 # return 500 so Cloudflare doesn't intercept us
else:
oai = fetch_openai_models()
r = []
if opts.openai_epose_our_model:
r = [{
"object": "list",
"data": [
{
"id": opts.running_model,
"object": "model",
"created": int(server_start_time.timestamp()),
"owned_by": opts.llm_middleware_name,
"permission": [
{
"id": opts.running_model,
"object": "model_permission",
"created": int(server_start_time.timestamp()),
"allow_create_engine": False,
"allow_sampling": False,
"allow_logprobs": False,
"allow_search_indices": False,
"allow_view": True,
"allow_fine_tuning": False,
"organization": "*",
"group": None,
"is_blocking": False
}
],
"root": None,
"parent": None
}
]
}]
response = jsonify_pretty(r + oai), 200
return response
@cache.memoize(timeout=ONE_MONTH_SECONDS)
def fetch_openai_models():
if opts.openai_api_key:
try:
response = requests.get('https://api.openai.com/v1/models', headers={'Authorization': f"Bearer {opts.openai_api_key}"}, timeout=10)
return response.json()['data']
except:
traceback.print_exc()
return []
else:
return []