fix proompters_1_min, other minor changes

This commit is contained in:
Cyberes 2023-08-22 22:32:29 -06:00
parent 3d6ba923bf
commit 9f14b166dd
4 changed files with 15 additions and 17 deletions

View File

@ -18,19 +18,17 @@ class SemaphoreCheckerThread(Thread):
Thread.__init__(self)
self.semaphore = semaphore
self.values = collections.deque(maxlen=60)
self.prev_semaphore_value = self.semaphore._value
self.daemon = True
def run(self):
global proompters_1_min
while True:
# If the semaphore value is less than the maximum, a prompter has sent a prompt
if opts.concurrent_gens > self.semaphore._value:
current_semaphore_value = self.semaphore._value
if current_semaphore_value < opts.concurrent_gens and current_semaphore_value != self.prev_semaphore_value:
self.values.append(1)
else:
self.values.append(0)
self.prev_semaphore_value = current_semaphore_value
proompters_1_min = sum(self.values)
time.sleep(1)
thread = SemaphoreCheckerThread(concurrent_semaphore)
thread.start()

View File

@ -64,15 +64,14 @@ def generate():
backend_response = safe_list_get(response_json_body.get('results', []), 0, {}).get('text')
if not backend_response:
if opts.mode == 'oobabooga':
backend_response = format_sillytavern_err(f'Backend ({opts.mode}) returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.context_size}.', 'error')
backend_response = format_sillytavern_err(
f'Backend (oobabooga) returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.context_size}. Furthermore, oobabooga does not support concurrent requests so all users have to wait in line and the backend server may have glitched for a moment. Please try again.',
'error')
response_json_body['results'][0]['text'] = backend_response
else:
raise Exception
log_prompt(opts.database_path, client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code)
print(response_json_body)
return jsonify({
**response_json_body
}), 200

View File

@ -14,8 +14,8 @@ from ...llm.info import get_running_model
@bp.route('/stats', methods=['GET'])
@cache.cached(timeout=60, query_string=True)
@cache_control(60)
@cache.cached(timeout=5, query_string=True)
@cache_control(5)
def get_stats():
model_list = get_running_model() # will return False when the fetch fails
if isinstance(model_list, bool):

View File

@ -2,16 +2,15 @@ import os
import sys
from pathlib import Path
import tiktoken
from flask import Flask, current_app, jsonify
from flask import Flask, jsonify
from llm_server import opts
from llm_server.config import ConfigLoader
from llm_server.database import init_db
from llm_server.helpers import resolve_path
from llm_server.llm.info import get_running_model
from llm_server.routes.cache import cache
from llm_server.routes.helpers.http import cache_control
from llm_server.routes.stats import SemaphoreCheckerThread, concurrent_semaphore
from llm_server.routes.v1 import bp
script_path = os.path.dirname(os.path.realpath(__file__))
@ -48,6 +47,8 @@ opts.concurrent_gens = config['concurrent_gens']
opts.frontend_api_client = config['frontend_api_client']
opts.context_size = config['token_limit']
SemaphoreCheckerThread(concurrent_semaphore).start()
app = Flask(__name__)
cache.init_app(app)
cache.clear() # clear redis cache
@ -71,4 +72,4 @@ def fallback(first=None, rest=None):
if __name__ == "__main__":
app.run(host='0.0.0.0', debug=True)
app.run(host='0.0.0.0')