This repository has been archived on 2024-10-27. You can view files and clone it, but cannot push or open issues or pull requests.
local-llm-server/llm_server/routes/v1/info.py

53 lines
1.6 KiB
Python
Raw Normal View History

2023-08-21 21:28:52 -06:00
import time
2023-08-23 16:40:20 -06:00
from flask import jsonify, request
2023-08-21 21:28:52 -06:00
2023-09-29 00:09:44 -06:00
from llm_server.custom_redis import flask_cache
2023-08-21 21:28:52 -06:00
from . import bp
from ..auth import requires_auth
from ... import opts
2023-09-29 00:09:44 -06:00
from ...cluster.backend import get_a_cluster_backend, get_backends, get_backends_from_model, is_valid_model
from ...cluster.cluster_config import cluster_config
2023-08-21 21:28:52 -06:00
2023-09-30 19:41:50 -06:00
@bp.route('/v1/model', methods=['GET'])
@bp.route('/<model_name>/v1/model', methods=['GET'])
2023-09-29 00:09:44 -06:00
def get_model(model_name=None):
2023-08-23 16:40:20 -06:00
# We will manage caching ourself since we don't want to cache
# when the backend is down. Also, Cloudflare won't cache 500 errors.
cache_key = 'model_cache::' + request.url
2023-09-26 22:09:11 -06:00
cached_response = flask_cache.get(cache_key)
2023-08-23 16:40:20 -06:00
if cached_response:
return cached_response
2023-09-30 19:41:50 -06:00
if not model_name:
model_name = cluster_config.get_backend(get_a_cluster_backend()).get('model')
2023-09-29 00:09:44 -06:00
if not is_valid_model(model_name):
2023-08-23 16:40:20 -06:00
response = jsonify({
2023-09-29 00:09:44 -06:00
'code': 400,
'msg': 'Model does not exist.',
}), 400
2023-08-21 22:49:44 -06:00
else:
2023-09-29 00:09:44 -06:00
num_backends = len(get_backends_from_model(model_name))
response = jsonify({
'result': opts.manual_model_name if opts.manual_model_name else model_name,
2023-09-29 00:09:44 -06:00
'model_backend_count': num_backends,
'timestamp': int(time.time())
}), 200
2023-09-26 22:09:11 -06:00
flask_cache.set(cache_key, response, timeout=60)
2023-08-23 16:40:20 -06:00
return response
2023-09-29 00:09:44 -06:00
@bp.route('/backends', methods=['GET'])
@requires_auth
def get_backend():
2023-09-29 00:09:44 -06:00
online, offline = get_backends()
2023-09-30 19:41:50 -06:00
result = {}
2023-09-29 00:09:44 -06:00
for i in online + offline:
2023-09-30 19:41:50 -06:00
info = cluster_config.get_backend(i)
result[info['hash']] = info
2023-09-29 00:09:44 -06:00
return jsonify(result), 200