From 9f14b166ddf7f19c3f31a5c6e3d46b3eb9e48f5f Mon Sep 17 00:00:00 2001 From: Cyberes Date: Tue, 22 Aug 2023 22:32:29 -0600 Subject: [PATCH] fix proompters_1_min, other minor changes --- llm_server/routes/stats.py | 10 ++++------ llm_server/routes/v1/generate.py | 7 +++---- llm_server/routes/v1/proxy.py | 4 ++-- server.py | 11 ++++++----- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/llm_server/routes/stats.py b/llm_server/routes/stats.py index 8e8a004..1a2bfc3 100644 --- a/llm_server/routes/stats.py +++ b/llm_server/routes/stats.py @@ -18,19 +18,17 @@ class SemaphoreCheckerThread(Thread): Thread.__init__(self) self.semaphore = semaphore self.values = collections.deque(maxlen=60) + self.prev_semaphore_value = self.semaphore._value self.daemon = True def run(self): global proompters_1_min while True: - # If the semaphore value is less than the maximum, a prompter has sent a prompt - if opts.concurrent_gens > self.semaphore._value: + current_semaphore_value = self.semaphore._value + if current_semaphore_value < opts.concurrent_gens and current_semaphore_value != self.prev_semaphore_value: self.values.append(1) else: self.values.append(0) + self.prev_semaphore_value = current_semaphore_value proompters_1_min = sum(self.values) time.sleep(1) - - -thread = SemaphoreCheckerThread(concurrent_semaphore) -thread.start() diff --git a/llm_server/routes/v1/generate.py b/llm_server/routes/v1/generate.py index b8968ab..b1af72d 100644 --- a/llm_server/routes/v1/generate.py +++ b/llm_server/routes/v1/generate.py @@ -64,15 +64,14 @@ def generate(): backend_response = safe_list_get(response_json_body.get('results', []), 0, {}).get('text') if not backend_response: if opts.mode == 'oobabooga': - backend_response = format_sillytavern_err(f'Backend ({opts.mode}) returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.context_size}.', 'error') + backend_response = format_sillytavern_err( + f'Backend (oobabooga) returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.context_size}. Furthermore, oobabooga does not support concurrent requests so all users have to wait in line and the backend server may have glitched for a moment. Please try again.', + 'error') response_json_body['results'][0]['text'] = backend_response else: raise Exception log_prompt(opts.database_path, client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code) - - print(response_json_body) - return jsonify({ **response_json_body }), 200 diff --git a/llm_server/routes/v1/proxy.py b/llm_server/routes/v1/proxy.py index b5e69c4..1149c08 100644 --- a/llm_server/routes/v1/proxy.py +++ b/llm_server/routes/v1/proxy.py @@ -14,8 +14,8 @@ from ...llm.info import get_running_model @bp.route('/stats', methods=['GET']) -@cache.cached(timeout=60, query_string=True) -@cache_control(60) +@cache.cached(timeout=5, query_string=True) +@cache_control(5) def get_stats(): model_list = get_running_model() # will return False when the fetch fails if isinstance(model_list, bool): diff --git a/server.py b/server.py index fd17771..28db475 100644 --- a/server.py +++ b/server.py @@ -2,16 +2,15 @@ import os import sys from pathlib import Path -import tiktoken -from flask import Flask, current_app, jsonify +from flask import Flask, jsonify from llm_server import opts from llm_server.config import ConfigLoader from llm_server.database import init_db from llm_server.helpers import resolve_path -from llm_server.llm.info import get_running_model from llm_server.routes.cache import cache from llm_server.routes.helpers.http import cache_control +from llm_server.routes.stats import SemaphoreCheckerThread, concurrent_semaphore from llm_server.routes.v1 import bp script_path = os.path.dirname(os.path.realpath(__file__)) @@ -48,9 +47,11 @@ opts.concurrent_gens = config['concurrent_gens'] opts.frontend_api_client = config['frontend_api_client'] opts.context_size = config['token_limit'] +SemaphoreCheckerThread(concurrent_semaphore).start() + app = Flask(__name__) cache.init_app(app) -cache.clear() # clear redis cache +cache.clear() # clear redis cache # with app.app_context(): # current_app.tokenizer = tiktoken.get_encoding("cl100k_base") app.register_blueprint(bp, url_prefix='/api/v1/') @@ -71,4 +72,4 @@ def fallback(first=None, rest=None): if __name__ == "__main__": - app.run(host='0.0.0.0', debug=True) + app.run(host='0.0.0.0')