update weighted_average_column_for_model to account for when there was an error reported, insert null for response tokens when error, correctly parse x-forwarded-for, correctly convert model reported by hf-textgen
This commit is contained in:
parent
da77a24eaa
commit
f9b9051bad
|
@ -48,9 +48,11 @@ def init_db():
|
|||
def log_prompt(ip, token, prompt, response, gen_time, parameters, headers, backend_response_code, response_tokens: int = None, is_error: bool = False):
|
||||
prompt_tokens = len(tokenizer.encode(prompt))
|
||||
|
||||
# TODO: insert None for response tokens when error
|
||||
if not response_tokens:
|
||||
response_tokens = len(tokenizer.encode(response))
|
||||
if not is_error:
|
||||
if not response_tokens:
|
||||
response_tokens = len(tokenizer.encode(response))
|
||||
else:
|
||||
response_tokens = None
|
||||
|
||||
# Sometimes we may want to insert null into the DB, but
|
||||
# usually we want to insert a float.
|
||||
|
@ -127,7 +129,7 @@ def average_column_for_model(table_name, column_name, model_name):
|
|||
return result[0]
|
||||
|
||||
|
||||
def weighted_average_column_for_model(table_name, column_name, model_name):
|
||||
def weighted_average_column_for_model(table_name, column_name, model_name, exclude_zeros: bool = False):
|
||||
conn = sqlite3.connect(opts.database_path)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(f"SELECT DISTINCT model FROM {table_name}")
|
||||
|
@ -144,7 +146,7 @@ def weighted_average_column_for_model(table_name, column_name, model_name):
|
|||
total_weight = 0
|
||||
weighted_sum = 0
|
||||
for i, (value, rowid) in enumerate(results):
|
||||
if value is None:
|
||||
if value is None or (exclude_zeros and value == 0):
|
||||
continue
|
||||
weight = i + 1
|
||||
total_weight += weight
|
||||
|
|
|
@ -9,20 +9,15 @@ def get_running_model():
|
|||
backend_response = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
|
||||
except Exception as e:
|
||||
return False, e
|
||||
try:
|
||||
r_json = backend_response.json()
|
||||
return r_json['result'], None
|
||||
except Exception as e:
|
||||
return False, e
|
||||
elif opts.mode == 'hf-textgen':
|
||||
try:
|
||||
backend_response = requests.get(f'{opts.backend_url}/info', verify=opts.verify_ssl)
|
||||
except Exception as e:
|
||||
return False, e
|
||||
try:
|
||||
r_json = backend_response.json()
|
||||
return r_json['model_id'].replace('/', '_'), None
|
||||
except Exception as e:
|
||||
return False, e
|
||||
else:
|
||||
raise Exception
|
||||
try:
|
||||
r_json = backend_response.json()
|
||||
return r_json['model_id'].replace('/', '_'), None
|
||||
except Exception as e:
|
||||
return False, e
|
||||
|
|
|
@ -28,7 +28,7 @@ def generate():
|
|||
if request.headers.get('cf-connecting-ip'):
|
||||
client_ip = request.headers.get('cf-connecting-ip')
|
||||
elif request.headers.get('x-forwarded-for'):
|
||||
client_ip = request.headers.get('x-forwarded-for')
|
||||
client_ip = request.headers.get('x-forwarded-for').split(',')[0]
|
||||
else:
|
||||
client_ip = request.remote_addr
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ def generate_stats():
|
|||
|
||||
if opts.average_generation_time_mode == 'database':
|
||||
average_generation_time = float(redis.get('average_generation_elapsed_sec'))
|
||||
average_output_tokens = float(redis.get('average_output_tokens'))
|
||||
# average_output_tokens = float(redis.get('average_output_tokens'))
|
||||
# average_generation_time_from_tps = (average_output_tokens / average_tps)
|
||||
|
||||
# What to use in our math that calculates the wait time.
|
||||
|
|
|
@ -38,7 +38,7 @@ class MainBackgroundThread(Thread):
|
|||
try:
|
||||
r = requests.get(f'{opts.backend_url}/info', timeout=3, verify=opts.verify_ssl)
|
||||
j = r.json()
|
||||
opts.running_model = j['model_id']
|
||||
opts.running_model = j['model_id'].replace('/', '_')
|
||||
redis.set('backend_online', 1)
|
||||
redis.set_dict('backend_info', j)
|
||||
except Exception as e:
|
||||
|
@ -48,13 +48,15 @@ class MainBackgroundThread(Thread):
|
|||
else:
|
||||
raise Exception
|
||||
|
||||
average_generation_elapsed_sec = weighted_average_column_for_model('prompts', 'generation_time', opts.running_model) or 0
|
||||
# exclude_zeros=True filters out rows where an error message was returned. Previously, if there was an error, 0
|
||||
# was entered into the column. The new code enters null instead but we need to be backwards compatible for now
|
||||
average_generation_elapsed_sec = weighted_average_column_for_model('prompts', 'generation_time', opts.running_model, exclude_zeros=True) or 0
|
||||
redis.set('average_generation_elapsed_sec', average_generation_elapsed_sec)
|
||||
|
||||
# overall = average_column_for_model('prompts', 'generation_time', opts.running_model)
|
||||
# print(f'Weighted: {average_generation_elapsed_sec}, overall: {overall}')
|
||||
|
||||
average_output_tokens = weighted_average_column_for_model('prompts', 'response_tokens', opts.running_model) or 0
|
||||
average_output_tokens = weighted_average_column_for_model('prompts', 'response_tokens', opts.running_model, exclude_zeros=True) or 0
|
||||
redis.set('average_output_tokens', average_output_tokens)
|
||||
|
||||
# overall = average_column_for_model('prompts', 'response_tokens', opts.running_model)
|
||||
|
|
Reference in New Issue