local-llm-server/llm_server/routes/v1/info.py

50 lines
1.3 KiB
Python
Raw Normal View History

2023-08-21 21:28:52 -06:00
import time
2023-08-23 16:40:20 -06:00
from flask import jsonify, request
2023-08-21 21:28:52 -06:00
from . import bp
from ..auth import requires_auth
2023-09-11 20:47:19 -06:00
from ..cache import cache
from ... import opts
from ...llm.info import get_running_model
2023-08-21 21:28:52 -06:00
# @bp.route('/info', methods=['GET'])
# # @cache.cached(timeout=3600, query_string=True)
# def get_info():
# # requests.get()
# return 'yes'
@bp.route('/model', methods=['GET'])
def get_model():
2023-08-23 16:40:20 -06:00
# We will manage caching ourself since we don't want to cache
# when the backend is down. Also, Cloudflare won't cache 500 errors.
cache_key = 'model_cache::' + request.url
cached_response = cache.get(cache_key)
if cached_response:
return cached_response
model_name, error = get_running_model()
if not model_name:
2023-08-23 16:40:20 -06:00
response = jsonify({
2023-08-23 16:07:43 -06:00
'code': 502,
2023-08-30 18:53:26 -06:00
'msg': 'failed to reach backend',
2023-08-23 16:07:43 -06:00
'type': error.__class__.__name__
2023-08-23 16:40:20 -06:00
}), 500 # return 500 so Cloudflare doesn't intercept us
2023-08-21 22:49:44 -06:00
else:
response = jsonify({
'result': opts.manual_model_name if opts.manual_model_name else model_name,
'timestamp': int(time.time())
}), 200
2023-08-23 16:40:20 -06:00
cache.set(cache_key, response, timeout=60)
return response
@bp.route('/backend', methods=['GET'])
@requires_auth
def get_backend():
return jsonify({'backend': opts.backend_url, 'mode': opts.mode}), 200