update weighted_average_column_for_model to account for when there was an error reported, insert null for response tokens when error, correctly parse x-forwarded-for, correctly convert model reported by hf-textgen

This commit is contained in:
Cyberes 2023-08-29 15:46:56 -06:00
parent da77a24eaa
commit f9b9051bad
5 changed files with 19 additions and 20 deletions

View File

@ -48,9 +48,11 @@ def init_db():
def log_prompt(ip, token, prompt, response, gen_time, parameters, headers, backend_response_code, response_tokens: int = None, is_error: bool = False):
prompt_tokens = len(tokenizer.encode(prompt))
# TODO: insert None for response tokens when error
if not response_tokens:
response_tokens = len(tokenizer.encode(response))
if not is_error:
if not response_tokens:
response_tokens = len(tokenizer.encode(response))
else:
response_tokens = None
# Sometimes we may want to insert null into the DB, but
# usually we want to insert a float.
@ -127,7 +129,7 @@ def average_column_for_model(table_name, column_name, model_name):
return result[0]
def weighted_average_column_for_model(table_name, column_name, model_name):
def weighted_average_column_for_model(table_name, column_name, model_name, exclude_zeros: bool = False):
conn = sqlite3.connect(opts.database_path)
cursor = conn.cursor()
cursor.execute(f"SELECT DISTINCT model FROM {table_name}")
@ -144,7 +146,7 @@ def weighted_average_column_for_model(table_name, column_name, model_name):
total_weight = 0
weighted_sum = 0
for i, (value, rowid) in enumerate(results):
if value is None:
if value is None or (exclude_zeros and value == 0):
continue
weight = i + 1
total_weight += weight

View File

@ -9,20 +9,15 @@ def get_running_model():
backend_response = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
except Exception as e:
return False, e
try:
r_json = backend_response.json()
return r_json['result'], None
except Exception as e:
return False, e
elif opts.mode == 'hf-textgen':
try:
backend_response = requests.get(f'{opts.backend_url}/info', verify=opts.verify_ssl)
except Exception as e:
return False, e
try:
r_json = backend_response.json()
return r_json['model_id'].replace('/', '_'), None
except Exception as e:
return False, e
else:
raise Exception
try:
r_json = backend_response.json()
return r_json['model_id'].replace('/', '_'), None
except Exception as e:
return False, e

View File

@ -28,7 +28,7 @@ def generate():
if request.headers.get('cf-connecting-ip'):
client_ip = request.headers.get('cf-connecting-ip')
elif request.headers.get('x-forwarded-for'):
client_ip = request.headers.get('x-forwarded-for')
client_ip = request.headers.get('x-forwarded-for').split(',')[0]
else:
client_ip = request.remote_addr

View File

@ -34,7 +34,7 @@ def generate_stats():
if opts.average_generation_time_mode == 'database':
average_generation_time = float(redis.get('average_generation_elapsed_sec'))
average_output_tokens = float(redis.get('average_output_tokens'))
# average_output_tokens = float(redis.get('average_output_tokens'))
# average_generation_time_from_tps = (average_output_tokens / average_tps)
# What to use in our math that calculates the wait time.

View File

@ -38,7 +38,7 @@ class MainBackgroundThread(Thread):
try:
r = requests.get(f'{opts.backend_url}/info', timeout=3, verify=opts.verify_ssl)
j = r.json()
opts.running_model = j['model_id']
opts.running_model = j['model_id'].replace('/', '_')
redis.set('backend_online', 1)
redis.set_dict('backend_info', j)
except Exception as e:
@ -48,13 +48,15 @@ class MainBackgroundThread(Thread):
else:
raise Exception
average_generation_elapsed_sec = weighted_average_column_for_model('prompts', 'generation_time', opts.running_model) or 0
# exclude_zeros=True filters out rows where an error message was returned. Previously, if there was an error, 0
# was entered into the column. The new code enters null instead but we need to be backwards compatible for now
average_generation_elapsed_sec = weighted_average_column_for_model('prompts', 'generation_time', opts.running_model, exclude_zeros=True) or 0
redis.set('average_generation_elapsed_sec', average_generation_elapsed_sec)
# overall = average_column_for_model('prompts', 'generation_time', opts.running_model)
# print(f'Weighted: {average_generation_elapsed_sec}, overall: {overall}')
average_output_tokens = weighted_average_column_for_model('prompts', 'response_tokens', opts.running_model) or 0
average_output_tokens = weighted_average_column_for_model('prompts', 'response_tokens', opts.running_model, exclude_zeros=True) or 0
redis.set('average_output_tokens', average_output_tokens)
# overall = average_column_for_model('prompts', 'response_tokens', opts.running_model)