diff --git a/llm_server/routes/v1/info.py b/llm_server/routes/v1/info.py index 9727431..6f202a7 100644 --- a/llm_server/routes/v1/info.py +++ b/llm_server/routes/v1/info.py @@ -1,6 +1,6 @@ import time -from flask import jsonify +from flask import jsonify, request from . import bp from ..helpers.http import cache_control @@ -20,18 +20,27 @@ from ..cache import cache @bp.route('/model', methods=['GET']) -@cache.cached(timeout=60, query_string=True) def get_model(): + # We will manage caching ourself since we don't want to cache + # when the backend is down. Also, Cloudflare won't cache 500 errors. + cache_key = 'model_cache::' + request.url + cached_response = cache.get(cache_key) + + if cached_response: + return cached_response + model, error = get_running_model() if not model: - return jsonify({ - 'result': None, + response = jsonify({ 'code': 502, 'error': 'failed to reach backend', 'type': error.__class__.__name__ - }), 200 # return 200 so Cloudflare caches the response + }), 500 # return 500 so Cloudflare doesn't intercept us else: - return jsonify({ + response = jsonify({ 'result': model, 'timestamp': int(time.time()) }), 200 + cache.set(cache_key, response, timeout=60) + + return response