fix proompters_1_min, other minor changes
This commit is contained in:
parent
3d6ba923bf
commit
9f14b166dd
|
@ -18,19 +18,17 @@ class SemaphoreCheckerThread(Thread):
|
|||
Thread.__init__(self)
|
||||
self.semaphore = semaphore
|
||||
self.values = collections.deque(maxlen=60)
|
||||
self.prev_semaphore_value = self.semaphore._value
|
||||
self.daemon = True
|
||||
|
||||
def run(self):
|
||||
global proompters_1_min
|
||||
while True:
|
||||
# If the semaphore value is less than the maximum, a prompter has sent a prompt
|
||||
if opts.concurrent_gens > self.semaphore._value:
|
||||
current_semaphore_value = self.semaphore._value
|
||||
if current_semaphore_value < opts.concurrent_gens and current_semaphore_value != self.prev_semaphore_value:
|
||||
self.values.append(1)
|
||||
else:
|
||||
self.values.append(0)
|
||||
self.prev_semaphore_value = current_semaphore_value
|
||||
proompters_1_min = sum(self.values)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
thread = SemaphoreCheckerThread(concurrent_semaphore)
|
||||
thread.start()
|
||||
|
|
|
@ -64,15 +64,14 @@ def generate():
|
|||
backend_response = safe_list_get(response_json_body.get('results', []), 0, {}).get('text')
|
||||
if not backend_response:
|
||||
if opts.mode == 'oobabooga':
|
||||
backend_response = format_sillytavern_err(f'Backend ({opts.mode}) returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.context_size}.', 'error')
|
||||
backend_response = format_sillytavern_err(
|
||||
f'Backend (oobabooga) returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.context_size}. Furthermore, oobabooga does not support concurrent requests so all users have to wait in line and the backend server may have glitched for a moment. Please try again.',
|
||||
'error')
|
||||
response_json_body['results'][0]['text'] = backend_response
|
||||
else:
|
||||
raise Exception
|
||||
|
||||
log_prompt(opts.database_path, client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code)
|
||||
|
||||
print(response_json_body)
|
||||
|
||||
return jsonify({
|
||||
**response_json_body
|
||||
}), 200
|
||||
|
|
|
@ -14,8 +14,8 @@ from ...llm.info import get_running_model
|
|||
|
||||
|
||||
@bp.route('/stats', methods=['GET'])
|
||||
@cache.cached(timeout=60, query_string=True)
|
||||
@cache_control(60)
|
||||
@cache.cached(timeout=5, query_string=True)
|
||||
@cache_control(5)
|
||||
def get_stats():
|
||||
model_list = get_running_model() # will return False when the fetch fails
|
||||
if isinstance(model_list, bool):
|
||||
|
|
|
@ -2,16 +2,15 @@ import os
|
|||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import tiktoken
|
||||
from flask import Flask, current_app, jsonify
|
||||
from flask import Flask, jsonify
|
||||
|
||||
from llm_server import opts
|
||||
from llm_server.config import ConfigLoader
|
||||
from llm_server.database import init_db
|
||||
from llm_server.helpers import resolve_path
|
||||
from llm_server.llm.info import get_running_model
|
||||
from llm_server.routes.cache import cache
|
||||
from llm_server.routes.helpers.http import cache_control
|
||||
from llm_server.routes.stats import SemaphoreCheckerThread, concurrent_semaphore
|
||||
from llm_server.routes.v1 import bp
|
||||
|
||||
script_path = os.path.dirname(os.path.realpath(__file__))
|
||||
|
@ -48,6 +47,8 @@ opts.concurrent_gens = config['concurrent_gens']
|
|||
opts.frontend_api_client = config['frontend_api_client']
|
||||
opts.context_size = config['token_limit']
|
||||
|
||||
SemaphoreCheckerThread(concurrent_semaphore).start()
|
||||
|
||||
app = Flask(__name__)
|
||||
cache.init_app(app)
|
||||
cache.clear() # clear redis cache
|
||||
|
@ -71,4 +72,4 @@ def fallback(first=None, rest=None):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host='0.0.0.0', debug=True)
|
||||
app.run(host='0.0.0.0')
|
||||
|
|
Reference in New Issue