fix proompters_1_min, other minor changes
This commit is contained in:
parent
3d6ba923bf
commit
9f14b166dd
|
@ -18,19 +18,17 @@ class SemaphoreCheckerThread(Thread):
|
||||||
Thread.__init__(self)
|
Thread.__init__(self)
|
||||||
self.semaphore = semaphore
|
self.semaphore = semaphore
|
||||||
self.values = collections.deque(maxlen=60)
|
self.values = collections.deque(maxlen=60)
|
||||||
|
self.prev_semaphore_value = self.semaphore._value
|
||||||
self.daemon = True
|
self.daemon = True
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
global proompters_1_min
|
global proompters_1_min
|
||||||
while True:
|
while True:
|
||||||
# If the semaphore value is less than the maximum, a prompter has sent a prompt
|
current_semaphore_value = self.semaphore._value
|
||||||
if opts.concurrent_gens > self.semaphore._value:
|
if current_semaphore_value < opts.concurrent_gens and current_semaphore_value != self.prev_semaphore_value:
|
||||||
self.values.append(1)
|
self.values.append(1)
|
||||||
else:
|
else:
|
||||||
self.values.append(0)
|
self.values.append(0)
|
||||||
|
self.prev_semaphore_value = current_semaphore_value
|
||||||
proompters_1_min = sum(self.values)
|
proompters_1_min = sum(self.values)
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
thread = SemaphoreCheckerThread(concurrent_semaphore)
|
|
||||||
thread.start()
|
|
||||||
|
|
|
@ -64,15 +64,14 @@ def generate():
|
||||||
backend_response = safe_list_get(response_json_body.get('results', []), 0, {}).get('text')
|
backend_response = safe_list_get(response_json_body.get('results', []), 0, {}).get('text')
|
||||||
if not backend_response:
|
if not backend_response:
|
||||||
if opts.mode == 'oobabooga':
|
if opts.mode == 'oobabooga':
|
||||||
backend_response = format_sillytavern_err(f'Backend ({opts.mode}) returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.context_size}.', 'error')
|
backend_response = format_sillytavern_err(
|
||||||
|
f'Backend (oobabooga) returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.context_size}. Furthermore, oobabooga does not support concurrent requests so all users have to wait in line and the backend server may have glitched for a moment. Please try again.',
|
||||||
|
'error')
|
||||||
response_json_body['results'][0]['text'] = backend_response
|
response_json_body['results'][0]['text'] = backend_response
|
||||||
else:
|
else:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
log_prompt(opts.database_path, client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code)
|
log_prompt(opts.database_path, client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code)
|
||||||
|
|
||||||
print(response_json_body)
|
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
**response_json_body
|
**response_json_body
|
||||||
}), 200
|
}), 200
|
||||||
|
|
|
@ -14,8 +14,8 @@ from ...llm.info import get_running_model
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/stats', methods=['GET'])
|
@bp.route('/stats', methods=['GET'])
|
||||||
@cache.cached(timeout=60, query_string=True)
|
@cache.cached(timeout=5, query_string=True)
|
||||||
@cache_control(60)
|
@cache_control(5)
|
||||||
def get_stats():
|
def get_stats():
|
||||||
model_list = get_running_model() # will return False when the fetch fails
|
model_list = get_running_model() # will return False when the fetch fails
|
||||||
if isinstance(model_list, bool):
|
if isinstance(model_list, bool):
|
||||||
|
|
11
server.py
11
server.py
|
@ -2,16 +2,15 @@ import os
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import tiktoken
|
from flask import Flask, jsonify
|
||||||
from flask import Flask, current_app, jsonify
|
|
||||||
|
|
||||||
from llm_server import opts
|
from llm_server import opts
|
||||||
from llm_server.config import ConfigLoader
|
from llm_server.config import ConfigLoader
|
||||||
from llm_server.database import init_db
|
from llm_server.database import init_db
|
||||||
from llm_server.helpers import resolve_path
|
from llm_server.helpers import resolve_path
|
||||||
from llm_server.llm.info import get_running_model
|
|
||||||
from llm_server.routes.cache import cache
|
from llm_server.routes.cache import cache
|
||||||
from llm_server.routes.helpers.http import cache_control
|
from llm_server.routes.helpers.http import cache_control
|
||||||
|
from llm_server.routes.stats import SemaphoreCheckerThread, concurrent_semaphore
|
||||||
from llm_server.routes.v1 import bp
|
from llm_server.routes.v1 import bp
|
||||||
|
|
||||||
script_path = os.path.dirname(os.path.realpath(__file__))
|
script_path = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
@ -48,9 +47,11 @@ opts.concurrent_gens = config['concurrent_gens']
|
||||||
opts.frontend_api_client = config['frontend_api_client']
|
opts.frontend_api_client = config['frontend_api_client']
|
||||||
opts.context_size = config['token_limit']
|
opts.context_size = config['token_limit']
|
||||||
|
|
||||||
|
SemaphoreCheckerThread(concurrent_semaphore).start()
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
cache.init_app(app)
|
cache.init_app(app)
|
||||||
cache.clear() # clear redis cache
|
cache.clear() # clear redis cache
|
||||||
# with app.app_context():
|
# with app.app_context():
|
||||||
# current_app.tokenizer = tiktoken.get_encoding("cl100k_base")
|
# current_app.tokenizer = tiktoken.get_encoding("cl100k_base")
|
||||||
app.register_blueprint(bp, url_prefix='/api/v1/')
|
app.register_blueprint(bp, url_prefix='/api/v1/')
|
||||||
|
@ -71,4 +72,4 @@ def fallback(first=None, rest=None):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run(host='0.0.0.0', debug=True)
|
app.run(host='0.0.0.0')
|
||||||
|
|
Reference in New Issue