import time from flask import jsonify, request from llm_server.custom_redis import flask_cache from . import bp from ..auth import requires_auth from ... import opts from ...cluster.backend import get_a_cluster_backend, get_backends, get_backends_from_model, is_valid_model from ...cluster.cluster_config import cluster_config @bp.route('/v1/model', methods=['GET']) @bp.route('//v1/model', methods=['GET']) def get_model(model_name=None): # We will manage caching ourself since we don't want to cache # when the backend is down. Also, Cloudflare won't cache 500 errors. cache_key = 'model_cache::' + request.url cached_response = flask_cache.get(cache_key) if cached_response: return cached_response if not model_name: model_name = cluster_config.get_backend(get_a_cluster_backend()).get('model') if not is_valid_model(model_name): response = jsonify({ 'code': 400, 'msg': 'Model does not exist.', }), 400 else: num_backends = len(get_backends_from_model(model_name)) response = jsonify({ 'result': opts.manual_model_name if opts.manual_model_name else model_name, 'model_backend_count': num_backends, 'timestamp': int(time.time()) }), 200 flask_cache.set(cache_key, response, timeout=60) return response @bp.route('/backends', methods=['GET']) @requires_auth def get_backend(): online, offline = get_backends() result = {} for i in online + offline: info = cluster_config.get_backend(i) result[info['hash']] = info return jsonify(result), 200