log gen time to DB, also keep generation_elapsed under 3 min

This commit is contained in:
Cyberes 2023-08-23 22:20:39 -06:00
parent 3317bd5f1a
commit e52acb03a4
3 changed files with 27 additions and 17 deletions

View File

@ -23,6 +23,7 @@ def init_db():
response TEXT,
response_tokens INTEGER,
response_status INTEGER,
generation_time INTEGER,
parameters TEXT CHECK (parameters IS NULL OR json_valid(parameters)),
headers TEXT CHECK (headers IS NULL OR json_valid(headers)),
timestamp INTEGER
@ -43,7 +44,7 @@ def init_db():
conn.close()
def log_prompt(ip, token, prompt, response, parameters, headers, backend_response_code):
def log_prompt(ip, token, prompt, response, gen_time, parameters, headers, backend_response_code):
prompt_tokens = len(tokenizer.encode(prompt))
response_tokens = len(tokenizer.encode(response))
@ -53,8 +54,8 @@ def log_prompt(ip, token, prompt, response, parameters, headers, backend_respons
timestamp = int(time.time())
conn = sqlite3.connect(opts.database_path)
c = conn.cursor()
c.execute("INSERT INTO prompts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(ip, token, prompt, prompt_tokens, response, response_tokens, backend_response_code, json.dumps(parameters), json.dumps(headers), timestamp))
c.execute("INSERT INTO prompts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(ip, token, prompt, prompt_tokens, response, response_tokens, backend_response_code, gen_time, json.dumps(parameters), json.dumps(headers), timestamp))
conn.commit()
conn.close()

View File

@ -10,21 +10,22 @@ server_start_time = datetime.now()
# TODO: have a background thread put the averages in a variable so we don't end up with massive arrays
wait_in_queue_elapsed = []
wait_in_queue_elapsed_lock = Lock()
# wait_in_queue_elapsed = []
# wait_in_queue_elapsed_lock = Lock()
generation_elapsed = []
generation_elapsed_lock = Lock()
def elapsed_times_cleanup():
global wait_in_queue_elapsed
while True:
current_time = time.time()
with wait_in_queue_elapsed_lock:
global wait_in_queue_elapsed
wait_in_queue_elapsed = [(end_time, elapsed_time) for end_time, elapsed_time in wait_in_queue_elapsed if current_time - end_time <= 60]
time.sleep(1)
# TODO: do I need this?
# def elapsed_times_cleanup():
# global wait_in_queue_elapsed
# while True:
# current_time = time.time()
# with wait_in_queue_elapsed_lock:
# global wait_in_queue_elapsed
# wait_in_queue_elapsed = [(end_time, elapsed_time) for end_time, elapsed_time in wait_in_queue_elapsed if current_time - end_time <= 60]
# time.sleep(1)
def calculate_avg_gen_time():
@ -37,10 +38,18 @@ def calculate_avg_gen_time():
def process_avg_gen_time():
global generation_elapsed
while True:
with generation_elapsed_lock:
# Get the current time
current_time = time.time()
# Remove data older than 3 minutes
three_minutes_ago = current_time - 180
generation_elapsed[:] = [(end, elapsed) for end, elapsed in generation_elapsed if end >= three_minutes_ago]
# Get the data from the last minute
one_minute_ago = time.time() - 60
one_minute_ago = current_time - 60
recent_data = [elapsed for end, elapsed in generation_elapsed if end >= one_minute_ago]
# Calculate the average

View File

@ -76,7 +76,7 @@ def generate():
else:
raise Exception
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code)
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, elapsed_time, parameters, dict(request.headers), response.status_code)
return jsonify({
'code': 500,
'error': 'failed to reach backend',
@ -95,7 +95,7 @@ def generate():
else:
raise Exception
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code)
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, elapsed_time, parameters, dict(request.headers), response.status_code)
return jsonify({
**response_json_body
}), 200
@ -111,7 +111,7 @@ def generate():
}
else:
raise Exception
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code)
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, elapsed_time, parameters, dict(request.headers), response.status_code)
return jsonify({
'code': 500,
'error': 'the backend did not return valid JSON',