reorganize stats, add 24 hr proompters, adjust logging when error

This commit is contained in:
Cyberes 2023-08-25 12:20:16 -06:00
parent 26a0a13aa7
commit 839bb115c6
3 changed files with 25 additions and 8 deletions

View File

@ -110,3 +110,14 @@ def sum_column(table_name, column_name):
result = cursor.fetchone() result = cursor.fetchone()
conn.close() conn.close()
return result[0] if result[0] else 0 return result[0] if result[0] else 0
def get_distinct_ips_24h():
# Get the current time and subtract 24 hours (in seconds)
past_24_hours = int(time.time()) - 24 * 60 * 60
conn = sqlite3.connect(opts.database_path)
cur = conn.cursor()
cur.execute("SELECT COUNT(DISTINCT ip) FROM prompts WHERE timestamp >= ?", (past_24_hours,))
result = cur.fetchone()
conn.close()
return result[0] if result else 0

View File

@ -75,7 +75,7 @@ def generate():
else: else:
raise Exception raise Exception
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, elapsed_time, parameters, dict(request.headers), response if response else 0) log_prompt(client_ip, token, request_json_body['prompt'], backend_response, None, parameters, dict(request.headers), response if response else 0)
return jsonify({ return jsonify({
'code': 500, 'code': 500,
'error': 'failed to reach backend', 'error': 'failed to reach backend',
@ -90,13 +90,13 @@ def generate():
if opts.mode == 'oobabooga': if opts.mode == 'oobabooga':
backend_err = True backend_err = True
backend_response = format_sillytavern_err( backend_response = format_sillytavern_err(
f'Backend (oobabooga) returned an empty string. This is usually due to an error on the backend during inference. Make sure your context size is no greater than {opts.context_size}. Please try again.', f'Backend (oobabooga) returned an empty string. This is usually due to an error on the backend during inference. Please check your parameters and try again.',
'error') 'error')
response_json_body['results'][0]['text'] = backend_response response_json_body['results'][0]['text'] = backend_response
else: else:
raise Exception raise Exception
log_prompt(client_ip, token, request_json_body['prompt'], backend_response if not backend_err else '', elapsed_time, parameters, dict(request.headers), response.status_code) log_prompt(client_ip, token, request_json_body['prompt'], backend_response if not backend_err else '', elapsed_time if not backend_err else None, parameters, dict(request.headers), response.status_code)
return jsonify({ return jsonify({
**response_json_body **response_json_body
}), 200 }), 200

View File

@ -2,7 +2,7 @@ import time
from datetime import datetime from datetime import datetime
from llm_server import opts from llm_server import opts
from llm_server.database import sum_column from llm_server.database import get_distinct_ips_24h, sum_column
from llm_server.helpers import deep_sort from llm_server.helpers import deep_sort
from llm_server.llm.info import get_running_model from llm_server.llm.info import get_running_model
from llm_server.netdata import get_power_states from llm_server.netdata import get_power_states
@ -28,7 +28,7 @@ def generate_stats():
# waits = [elapsed for end, elapsed in t] # waits = [elapsed for end, elapsed in t]
# estimated_wait = int(sum(waits) / len(waits)) # estimated_wait = int(sum(waits) / len(waits))
proompters_in_queue = len(priority_queue) + get_active_gen_workers() proompters_in_queue = len(priority_queue)
average_tps = float(redis.get('average_tps')) average_tps = float(redis.get('average_tps'))
if opts.average_generation_time_mode == 'database': if opts.average_generation_time_mode == 'database':
@ -48,9 +48,15 @@ def generate_stats():
output = { output = {
'stats': { 'stats': {
'proompts_in_queue': proompters_in_queue, 'proompters': {
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min, '1_min': SemaphoreCheckerThread.proompters_1_min,
'proompts': get_total_proompts() if opts.show_num_prompts else None, '24_hrs': get_distinct_ips_24h(),
},
'proompts': {
'processing': get_active_gen_workers(),
'queued': proompters_in_queue,
'total': get_total_proompts() if opts.show_num_prompts else None,
},
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None, 'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
'average_generation_elapsed_sec': average_generation_time, 'average_generation_elapsed_sec': average_generation_time,
'average_tps': average_tps, 'average_tps': average_tps,