log gen time to DB, also keep generation_elapsed under 3 min
This commit is contained in:
parent
3317bd5f1a
commit
e52acb03a4
|
@ -23,6 +23,7 @@ def init_db():
|
|||
response TEXT,
|
||||
response_tokens INTEGER,
|
||||
response_status INTEGER,
|
||||
generation_time INTEGER,
|
||||
parameters TEXT CHECK (parameters IS NULL OR json_valid(parameters)),
|
||||
headers TEXT CHECK (headers IS NULL OR json_valid(headers)),
|
||||
timestamp INTEGER
|
||||
|
@ -43,7 +44,7 @@ def init_db():
|
|||
conn.close()
|
||||
|
||||
|
||||
def log_prompt(ip, token, prompt, response, parameters, headers, backend_response_code):
|
||||
def log_prompt(ip, token, prompt, response, gen_time, parameters, headers, backend_response_code):
|
||||
prompt_tokens = len(tokenizer.encode(prompt))
|
||||
response_tokens = len(tokenizer.encode(response))
|
||||
|
||||
|
@ -53,8 +54,8 @@ def log_prompt(ip, token, prompt, response, parameters, headers, backend_respons
|
|||
timestamp = int(time.time())
|
||||
conn = sqlite3.connect(opts.database_path)
|
||||
c = conn.cursor()
|
||||
c.execute("INSERT INTO prompts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(ip, token, prompt, prompt_tokens, response, response_tokens, backend_response_code, json.dumps(parameters), json.dumps(headers), timestamp))
|
||||
c.execute("INSERT INTO prompts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(ip, token, prompt, prompt_tokens, response, response_tokens, backend_response_code, gen_time, json.dumps(parameters), json.dumps(headers), timestamp))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
|
|
@ -10,21 +10,22 @@ server_start_time = datetime.now()
|
|||
|
||||
# TODO: have a background thread put the averages in a variable so we don't end up with massive arrays
|
||||
|
||||
wait_in_queue_elapsed = []
|
||||
wait_in_queue_elapsed_lock = Lock()
|
||||
# wait_in_queue_elapsed = []
|
||||
# wait_in_queue_elapsed_lock = Lock()
|
||||
|
||||
generation_elapsed = []
|
||||
generation_elapsed_lock = Lock()
|
||||
|
||||
|
||||
def elapsed_times_cleanup():
|
||||
global wait_in_queue_elapsed
|
||||
while True:
|
||||
current_time = time.time()
|
||||
with wait_in_queue_elapsed_lock:
|
||||
global wait_in_queue_elapsed
|
||||
wait_in_queue_elapsed = [(end_time, elapsed_time) for end_time, elapsed_time in wait_in_queue_elapsed if current_time - end_time <= 60]
|
||||
time.sleep(1)
|
||||
# TODO: do I need this?
|
||||
# def elapsed_times_cleanup():
|
||||
# global wait_in_queue_elapsed
|
||||
# while True:
|
||||
# current_time = time.time()
|
||||
# with wait_in_queue_elapsed_lock:
|
||||
# global wait_in_queue_elapsed
|
||||
# wait_in_queue_elapsed = [(end_time, elapsed_time) for end_time, elapsed_time in wait_in_queue_elapsed if current_time - end_time <= 60]
|
||||
# time.sleep(1)
|
||||
|
||||
|
||||
def calculate_avg_gen_time():
|
||||
|
@ -37,10 +38,18 @@ def calculate_avg_gen_time():
|
|||
|
||||
|
||||
def process_avg_gen_time():
|
||||
global generation_elapsed
|
||||
while True:
|
||||
with generation_elapsed_lock:
|
||||
# Get the current time
|
||||
current_time = time.time()
|
||||
|
||||
# Remove data older than 3 minutes
|
||||
three_minutes_ago = current_time - 180
|
||||
generation_elapsed[:] = [(end, elapsed) for end, elapsed in generation_elapsed if end >= three_minutes_ago]
|
||||
|
||||
# Get the data from the last minute
|
||||
one_minute_ago = time.time() - 60
|
||||
one_minute_ago = current_time - 60
|
||||
recent_data = [elapsed for end, elapsed in generation_elapsed if end >= one_minute_ago]
|
||||
|
||||
# Calculate the average
|
||||
|
|
|
@ -76,7 +76,7 @@ def generate():
|
|||
else:
|
||||
raise Exception
|
||||
|
||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code)
|
||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, elapsed_time, parameters, dict(request.headers), response.status_code)
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'error': 'failed to reach backend',
|
||||
|
@ -95,7 +95,7 @@ def generate():
|
|||
else:
|
||||
raise Exception
|
||||
|
||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code)
|
||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, elapsed_time, parameters, dict(request.headers), response.status_code)
|
||||
return jsonify({
|
||||
**response_json_body
|
||||
}), 200
|
||||
|
@ -111,7 +111,7 @@ def generate():
|
|||
}
|
||||
else:
|
||||
raise Exception
|
||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, parameters, dict(request.headers), response.status_code)
|
||||
log_prompt(client_ip, token, request_json_body['prompt'], backend_response, elapsed_time, parameters, dict(request.headers), response.status_code)
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'error': 'the backend did not return valid JSON',
|
||||
|
|
Reference in New Issue