reorganize stats, add 24 hr proompters, adjust logging when error
This commit is contained in:
parent
26a0a13aa7
commit
839bb115c6
|
@ -110,3 +110,14 @@ def sum_column(table_name, column_name):
|
|||
result = cursor.fetchone()
|
||||
conn.close()
|
||||
return result[0] if result[0] else 0
|
||||
|
||||
|
||||
def get_distinct_ips_24h():
|
||||
# Get the current time and subtract 24 hours (in seconds)
|
||||
past_24_hours = int(time.time()) - 24 * 60 * 60
|
||||
conn = sqlite3.connect(opts.database_path)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT COUNT(DISTINCT ip) FROM prompts WHERE timestamp >= ?", (past_24_hours,))
|
||||
result = cur.fetchone()
|
||||
conn.close()
|
||||
return result[0] if result else 0
|
||||
|
|
|
@ -75,7 +75,7 @@ def generate():
|
|||
else:
|
||||
raise Exception
|
||||
|
||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, elapsed_time, parameters, dict(request.headers), response if response else 0)
|
||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, None, parameters, dict(request.headers), response if response else 0)
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'error': 'failed to reach backend',
|
||||
|
@ -90,13 +90,13 @@ def generate():
|
|||
if opts.mode == 'oobabooga':
|
||||
backend_err = True
|
||||
backend_response = format_sillytavern_err(
|
||||
f'Backend (oobabooga) returned an empty string. This is usually due to an error on the backend during inference. Make sure your context size is no greater than {opts.context_size}. Please try again.',
|
||||
f'Backend (oobabooga) returned an empty string. This is usually due to an error on the backend during inference. Please check your parameters and try again.',
|
||||
'error')
|
||||
response_json_body['results'][0]['text'] = backend_response
|
||||
else:
|
||||
raise Exception
|
||||
|
||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response if not backend_err else '', elapsed_time, parameters, dict(request.headers), response.status_code)
|
||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response if not backend_err else '', elapsed_time if not backend_err else None, parameters, dict(request.headers), response.status_code)
|
||||
return jsonify({
|
||||
**response_json_body
|
||||
}), 200
|
||||
|
|
|
@ -2,7 +2,7 @@ import time
|
|||
from datetime import datetime
|
||||
|
||||
from llm_server import opts
|
||||
from llm_server.database import sum_column
|
||||
from llm_server.database import get_distinct_ips_24h, sum_column
|
||||
from llm_server.helpers import deep_sort
|
||||
from llm_server.llm.info import get_running_model
|
||||
from llm_server.netdata import get_power_states
|
||||
|
@ -28,7 +28,7 @@ def generate_stats():
|
|||
# waits = [elapsed for end, elapsed in t]
|
||||
# estimated_wait = int(sum(waits) / len(waits))
|
||||
|
||||
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
|
||||
proompters_in_queue = len(priority_queue)
|
||||
average_tps = float(redis.get('average_tps'))
|
||||
|
||||
if opts.average_generation_time_mode == 'database':
|
||||
|
@ -48,9 +48,15 @@ def generate_stats():
|
|||
|
||||
output = {
|
||||
'stats': {
|
||||
'proompts_in_queue': proompters_in_queue,
|
||||
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
|
||||
'proompts': get_total_proompts() if opts.show_num_prompts else None,
|
||||
'proompters': {
|
||||
'1_min': SemaphoreCheckerThread.proompters_1_min,
|
||||
'24_hrs': get_distinct_ips_24h(),
|
||||
},
|
||||
'proompts': {
|
||||
'processing': get_active_gen_workers(),
|
||||
'queued': proompters_in_queue,
|
||||
'total': get_total_proompts() if opts.show_num_prompts else None,
|
||||
},
|
||||
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
|
||||
'average_generation_elapsed_sec': average_generation_time,
|
||||
'average_tps': average_tps,
|
||||
|
|
Reference in New Issue