reorganize stats, add 24 hr proompters, adjust logging when error
This commit is contained in:
parent
26a0a13aa7
commit
839bb115c6
|
@ -110,3 +110,14 @@ def sum_column(table_name, column_name):
|
||||||
result = cursor.fetchone()
|
result = cursor.fetchone()
|
||||||
conn.close()
|
conn.close()
|
||||||
return result[0] if result[0] else 0
|
return result[0] if result[0] else 0
|
||||||
|
|
||||||
|
|
||||||
|
def get_distinct_ips_24h():
|
||||||
|
# Get the current time and subtract 24 hours (in seconds)
|
||||||
|
past_24_hours = int(time.time()) - 24 * 60 * 60
|
||||||
|
conn = sqlite3.connect(opts.database_path)
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("SELECT COUNT(DISTINCT ip) FROM prompts WHERE timestamp >= ?", (past_24_hours,))
|
||||||
|
result = cur.fetchone()
|
||||||
|
conn.close()
|
||||||
|
return result[0] if result else 0
|
||||||
|
|
|
@ -75,7 +75,7 @@ def generate():
|
||||||
else:
|
else:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, elapsed_time, parameters, dict(request.headers), response if response else 0)
|
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, None, parameters, dict(request.headers), response if response else 0)
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'code': 500,
|
'code': 500,
|
||||||
'error': 'failed to reach backend',
|
'error': 'failed to reach backend',
|
||||||
|
@ -90,13 +90,13 @@ def generate():
|
||||||
if opts.mode == 'oobabooga':
|
if opts.mode == 'oobabooga':
|
||||||
backend_err = True
|
backend_err = True
|
||||||
backend_response = format_sillytavern_err(
|
backend_response = format_sillytavern_err(
|
||||||
f'Backend (oobabooga) returned an empty string. This is usually due to an error on the backend during inference. Make sure your context size is no greater than {opts.context_size}. Please try again.',
|
f'Backend (oobabooga) returned an empty string. This is usually due to an error on the backend during inference. Please check your parameters and try again.',
|
||||||
'error')
|
'error')
|
||||||
response_json_body['results'][0]['text'] = backend_response
|
response_json_body['results'][0]['text'] = backend_response
|
||||||
else:
|
else:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response if not backend_err else '', elapsed_time, parameters, dict(request.headers), response.status_code)
|
log_prompt(client_ip, token, request_json_body['prompt'], backend_response if not backend_err else '', elapsed_time if not backend_err else None, parameters, dict(request.headers), response.status_code)
|
||||||
return jsonify({
|
return jsonify({
|
||||||
**response_json_body
|
**response_json_body
|
||||||
}), 200
|
}), 200
|
||||||
|
|
|
@ -2,7 +2,7 @@ import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from llm_server import opts
|
from llm_server import opts
|
||||||
from llm_server.database import sum_column
|
from llm_server.database import get_distinct_ips_24h, sum_column
|
||||||
from llm_server.helpers import deep_sort
|
from llm_server.helpers import deep_sort
|
||||||
from llm_server.llm.info import get_running_model
|
from llm_server.llm.info import get_running_model
|
||||||
from llm_server.netdata import get_power_states
|
from llm_server.netdata import get_power_states
|
||||||
|
@ -28,7 +28,7 @@ def generate_stats():
|
||||||
# waits = [elapsed for end, elapsed in t]
|
# waits = [elapsed for end, elapsed in t]
|
||||||
# estimated_wait = int(sum(waits) / len(waits))
|
# estimated_wait = int(sum(waits) / len(waits))
|
||||||
|
|
||||||
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
|
proompters_in_queue = len(priority_queue)
|
||||||
average_tps = float(redis.get('average_tps'))
|
average_tps = float(redis.get('average_tps'))
|
||||||
|
|
||||||
if opts.average_generation_time_mode == 'database':
|
if opts.average_generation_time_mode == 'database':
|
||||||
|
@ -48,9 +48,15 @@ def generate_stats():
|
||||||
|
|
||||||
output = {
|
output = {
|
||||||
'stats': {
|
'stats': {
|
||||||
'proompts_in_queue': proompters_in_queue,
|
'proompters': {
|
||||||
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
|
'1_min': SemaphoreCheckerThread.proompters_1_min,
|
||||||
'proompts': get_total_proompts() if opts.show_num_prompts else None,
|
'24_hrs': get_distinct_ips_24h(),
|
||||||
|
},
|
||||||
|
'proompts': {
|
||||||
|
'processing': get_active_gen_workers(),
|
||||||
|
'queued': proompters_in_queue,
|
||||||
|
'total': get_total_proompts() if opts.show_num_prompts else None,
|
||||||
|
},
|
||||||
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
|
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
|
||||||
'average_generation_elapsed_sec': average_generation_time,
|
'average_generation_elapsed_sec': average_generation_time,
|
||||||
'average_tps': average_tps,
|
'average_tps': average_tps,
|
||||||
|
|
Reference in New Issue