log gen time to DB, also keep generation_elapsed under 3 min
This commit is contained in:
parent
3317bd5f1a
commit
e52acb03a4
|
@ -23,6 +23,7 @@ def init_db():
|
||||||
response TEXT,
|
response TEXT,
|
||||||
response_tokens INTEGER,
|
response_tokens INTEGER,
|
||||||
response_status INTEGER,
|
response_status INTEGER,
|
||||||
|
generation_time INTEGER,
|
||||||
parameters TEXT CHECK (parameters IS NULL OR json_valid(parameters)),
|
parameters TEXT CHECK (parameters IS NULL OR json_valid(parameters)),
|
||||||
headers TEXT CHECK (headers IS NULL OR json_valid(headers)),
|
headers TEXT CHECK (headers IS NULL OR json_valid(headers)),
|
||||||
timestamp INTEGER
|
timestamp INTEGER
|
||||||
|
@ -43,7 +44,7 @@ def init_db():
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
def log_prompt(ip, token, prompt, response, parameters, headers, backend_response_code):
|
def log_prompt(ip, token, prompt, response, gen_time, parameters, headers, backend_response_code):
|
||||||
prompt_tokens = len(tokenizer.encode(prompt))
|
prompt_tokens = len(tokenizer.encode(prompt))
|
||||||
response_tokens = len(tokenizer.encode(response))
|
response_tokens = len(tokenizer.encode(response))
|
||||||
|
|
||||||
|
@ -53,8 +54,8 @@ def log_prompt(ip, token, prompt, response, parameters, headers, backend_respons
|
||||||
timestamp = int(time.time())
|
timestamp = int(time.time())
|
||||||
conn = sqlite3.connect(opts.database_path)
|
conn = sqlite3.connect(opts.database_path)
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
c.execute("INSERT INTO prompts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
c.execute("INSERT INTO prompts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||||
(ip, token, prompt, prompt_tokens, response, response_tokens, backend_response_code, json.dumps(parameters), json.dumps(headers), timestamp))
|
(ip, token, prompt, prompt_tokens, response, response_tokens, backend_response_code, gen_time, json.dumps(parameters), json.dumps(headers), timestamp))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
|
@ -10,21 +10,22 @@ server_start_time = datetime.now()
|
||||||
|
|
||||||
# TODO: have a background thread put the averages in a variable so we don't end up with massive arrays
|
# TODO: have a background thread put the averages in a variable so we don't end up with massive arrays
|
||||||
|
|
||||||
wait_in_queue_elapsed = []
|
# wait_in_queue_elapsed = []
|
||||||
wait_in_queue_elapsed_lock = Lock()
|
# wait_in_queue_elapsed_lock = Lock()
|
||||||
|
|
||||||
generation_elapsed = []
|
generation_elapsed = []
|
||||||
generation_elapsed_lock = Lock()
|
generation_elapsed_lock = Lock()
|
||||||
|
|
||||||
|
|
||||||
def elapsed_times_cleanup():
|
# TODO: do I need this?
|
||||||
global wait_in_queue_elapsed
|
# def elapsed_times_cleanup():
|
||||||
while True:
|
# global wait_in_queue_elapsed
|
||||||
current_time = time.time()
|
# while True:
|
||||||
with wait_in_queue_elapsed_lock:
|
# current_time = time.time()
|
||||||
global wait_in_queue_elapsed
|
# with wait_in_queue_elapsed_lock:
|
||||||
wait_in_queue_elapsed = [(end_time, elapsed_time) for end_time, elapsed_time in wait_in_queue_elapsed if current_time - end_time <= 60]
|
# global wait_in_queue_elapsed
|
||||||
time.sleep(1)
|
# wait_in_queue_elapsed = [(end_time, elapsed_time) for end_time, elapsed_time in wait_in_queue_elapsed if current_time - end_time <= 60]
|
||||||
|
# time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
def calculate_avg_gen_time():
|
def calculate_avg_gen_time():
|
||||||
|
@ -37,10 +38,18 @@ def calculate_avg_gen_time():
|
||||||
|
|
||||||
|
|
||||||
def process_avg_gen_time():
|
def process_avg_gen_time():
|
||||||
|
global generation_elapsed
|
||||||
while True:
|
while True:
|
||||||
with generation_elapsed_lock:
|
with generation_elapsed_lock:
|
||||||
|
# Get the current time
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# Remove data older than 3 minutes
|
||||||
|
three_minutes_ago = current_time - 180
|
||||||
|
generation_elapsed[:] = [(end, elapsed) for end, elapsed in generation_elapsed if end >= three_minutes_ago]
|
||||||
|
|
||||||
# Get the data from the last minute
|
# Get the data from the last minute
|
||||||
one_minute_ago = time.time() - 60
|
one_minute_ago = current_time - 60
|
||||||
recent_data = [elapsed for end, elapsed in generation_elapsed if end >= one_minute_ago]
|
recent_data = [elapsed for end, elapsed in generation_elapsed if end >= one_minute_ago]
|
||||||
|
|
||||||
# Calculate the average
|
# Calculate the average
|
||||||
|
|
|
@ -76,7 +76,7 @@ def generate():
|
||||||
else:
|
else:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code)
|
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, elapsed_time, parameters, dict(request.headers), response.status_code)
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'code': 500,
|
'code': 500,
|
||||||
'error': 'failed to reach backend',
|
'error': 'failed to reach backend',
|
||||||
|
@ -95,7 +95,7 @@ def generate():
|
||||||
else:
|
else:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code)
|
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, elapsed_time, parameters, dict(request.headers), response.status_code)
|
||||||
return jsonify({
|
return jsonify({
|
||||||
**response_json_body
|
**response_json_body
|
||||||
}), 200
|
}), 200
|
||||||
|
@ -111,7 +111,7 @@ def generate():
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
raise Exception
|
raise Exception
|
||||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code)
|
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, elapsed_time, parameters, dict(request.headers), response.status_code)
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'code': 500,
|
'code': 500,
|
||||||
'error': 'the backend did not return valid JSON',
|
'error': 'the backend did not return valid JSON',
|
||||||
|
|
Reference in New Issue