This repository has been archived on 2024-10-27. You can view files and clone it, but cannot push or open issues or pull requests.
local-llm-server/llm_server/routes/openai/models.py

80 lines
2.8 KiB
Python
Raw Normal View History

import traceback
import requests
from flask import jsonify
2023-09-12 16:40:09 -06:00
from llm_server.custom_redis import ONE_MONTH_SECONDS, flask_cache, redis
2023-10-01 14:15:01 -06:00
from . import openai_bp
2023-09-12 16:40:09 -06:00
from ..stats import server_start_time
from ... import opts
2023-09-29 00:09:44 -06:00
from ...cluster.backend import get_a_cluster_backend
2023-10-01 14:15:01 -06:00
from ...cluster.cluster_config import cluster_config
from ...helpers import jsonify_pretty
2023-10-01 14:15:01 -06:00
from ...llm.openai.transform import generate_oai_string
2023-09-12 16:40:09 -06:00
@openai_bp.route('/models', methods=['GET'])
2023-09-26 22:09:11 -06:00
@flask_cache.cached(timeout=60, query_string=True)
2023-09-12 16:40:09 -06:00
def openai_list_models():
2023-10-01 14:15:01 -06:00
model_name = cluster_config.get_backend(get_a_cluster_backend()).get('model')
if not model_name:
2023-09-12 16:40:09 -06:00
response = jsonify({
'code': 502,
'msg': 'failed to reach backend',
}), 500 # return 500 so Cloudflare doesn't intercept us
else:
2023-09-29 00:09:44 -06:00
running_model = redis.get('running_model', 'ERROR', dtype=str)
oai = fetch_openai_models()
2023-10-01 23:07:49 -06:00
r = {
"object": "list",
"data": oai
}
# TODO: verify this works
if opts.openai_expose_our_model:
2023-10-01 23:07:49 -06:00
r["data"].insert(0, {
"id": running_model,
"object": "model",
"created": int(server_start_time.timestamp()),
"owned_by": opts.llm_middleware_name,
"permission": [
{
2023-09-26 13:32:33 -06:00
"id": running_model,
2023-10-01 23:07:49 -06:00
"object": "model_permission",
"created": int(server_start_time.timestamp()),
2023-10-01 23:07:49 -06:00
"allow_create_engine": False,
"allow_sampling": False,
"allow_logprobs": False,
"allow_search_indices": False,
"allow_view": True,
"allow_fine_tuning": False,
"organization": "*",
"group": None,
"is_blocking": False
}
2023-10-01 23:07:49 -06:00
],
"root": None,
"parent": None
})
response = jsonify_pretty(r), 200
2023-09-12 16:40:09 -06:00
return response
2023-09-26 22:09:11 -06:00
@flask_cache.memoize(timeout=ONE_MONTH_SECONDS)
def fetch_openai_models():
if opts.openai_api_key:
try:
response = requests.get('https://api.openai.com/v1/models', headers={'Authorization': f"Bearer {opts.openai_api_key}"}, timeout=10)
2023-10-01 14:15:01 -06:00
j = response.json()['data']
# The "modelperm" string appears to be user-specific, so we'll
# randomize it just to be safe.
for model in range(len(j)):
for p in range(len(j[model]['permission'])):
j[model]['permission'][p]['id'] = f'modelperm-{generate_oai_string(24)}'
return j
except:
traceback.print_exc()
return []
else:
return []