reorganize stats, add 24 hr proompters, adjust logging when error

This commit is contained in:
Cyberes 2023-08-25 12:20:16 -06:00
parent 26a0a13aa7
commit 839bb115c6
3 changed files with 25 additions and 8 deletions

View File

@ -110,3 +110,14 @@ def sum_column(table_name, column_name):
result = cursor.fetchone()
conn.close()
return result[0] if result[0] else 0
def get_distinct_ips_24h():
# Get the current time and subtract 24 hours (in seconds)
past_24_hours = int(time.time()) - 24 * 60 * 60
conn = sqlite3.connect(opts.database_path)
cur = conn.cursor()
cur.execute("SELECT COUNT(DISTINCT ip) FROM prompts WHERE timestamp >= ?", (past_24_hours,))
result = cur.fetchone()
conn.close()
return result[0] if result else 0

View File

@ -75,7 +75,7 @@ def generate():
else:
raise Exception
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, elapsed_time, parameters, dict(request.headers), response if response else 0)
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, None, parameters, dict(request.headers), response if response else 0)
return jsonify({
'code': 500,
'error': 'failed to reach backend',
@ -90,13 +90,13 @@ def generate():
if opts.mode == 'oobabooga':
backend_err = True
backend_response = format_sillytavern_err(
f'Backend (oobabooga) returned an empty string. This is usually due to an error on the backend during inference. Make sure your context size is no greater than {opts.context_size}. Please try again.',
f'Backend (oobabooga) returned an empty string. This is usually due to an error on the backend during inference. Please check your parameters and try again.',
'error')
response_json_body['results'][0]['text'] = backend_response
else:
raise Exception
log_prompt(client_ip, token, request_json_body['prompt'], backend_response if not backend_err else '', elapsed_time, parameters, dict(request.headers), response.status_code)
log_prompt(client_ip, token, request_json_body['prompt'], backend_response if not backend_err else '', elapsed_time if not backend_err else None, parameters, dict(request.headers), response.status_code)
return jsonify({
**response_json_body
}), 200

View File

@ -2,7 +2,7 @@ import time
from datetime import datetime
from llm_server import opts
from llm_server.database import sum_column
from llm_server.database import get_distinct_ips_24h, sum_column
from llm_server.helpers import deep_sort
from llm_server.llm.info import get_running_model
from llm_server.netdata import get_power_states
@ -28,7 +28,7 @@ def generate_stats():
# waits = [elapsed for end, elapsed in t]
# estimated_wait = int(sum(waits) / len(waits))
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
proompters_in_queue = len(priority_queue)
average_tps = float(redis.get('average_tps'))
if opts.average_generation_time_mode == 'database':
@ -48,9 +48,15 @@ def generate_stats():
output = {
'stats': {
'proompts_in_queue': proompters_in_queue,
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
'proompts': get_total_proompts() if opts.show_num_prompts else None,
'proompters': {
'1_min': SemaphoreCheckerThread.proompters_1_min,
'24_hrs': get_distinct_ips_24h(),
},
'proompts': {
'processing': get_active_gen_workers(),
'queued': proompters_in_queue,
'total': get_total_proompts() if opts.show_num_prompts else None,
},
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
'average_generation_elapsed_sec': average_generation_time,
'average_tps': average_tps,