diff --git a/llm_server/config.py b/llm_server/config.py index 3df9651..775e132 100644 --- a/llm_server/config.py +++ b/llm_server/config.py @@ -26,6 +26,7 @@ config_default_vars = { 'admin_token': None, 'openai_epose_our_model': False, 'openai_force_no_hashes': True, + 'include_system_tokens_in_stats': True } config_required_vars = ['token_limit', 'concurrent_gens', 'mode', 'llm_middleware_name'] diff --git a/llm_server/database/database.py b/llm_server/database/database.py index 2ae0835..c913fa5 100644 --- a/llm_server/database/database.py +++ b/llm_server/database/database.py @@ -97,12 +97,17 @@ def average_column_for_model(table_name, column_name, model_name): cursor.close() -def weighted_average_column_for_model(table_name, column_name, model_name, backend_name, backend_url, exclude_zeros: bool = False): +def weighted_average_column_for_model(table_name, column_name, model_name, backend_name, backend_url, exclude_zeros: bool = False, include_system_tokens: bool = True): + if include_system_tokens: + sql = f"SELECT {column_name}, id FROM {table_name} WHERE model = %s AND backend_mode = %s AND backend_url = %s ORDER BY id DESC" + else: + sql = f"SELECT {column_name}, id FROM {table_name} WHERE model = %s AND backend_mode = %s AND backend_url = %s AND (token NOT LIKE 'SYSTEM__%%' OR token IS NULL) ORDER BY id DESC" + conn = db_pool.connection() cursor = conn.cursor() try: try: - cursor.execute(f"SELECT {column_name}, id FROM {table_name} WHERE model = %s AND backend_mode = %s AND backend_url = %s AND (token NOT LIKE 'SYSTEM__%%' OR token IS NULL) ORDER BY id DESC", (model_name, backend_name, backend_url,)) + cursor.execute(sql, (model_name, backend_name, backend_url,)) results = cursor.fetchall() except Exception: traceback.print_exc() diff --git a/llm_server/opts.py b/llm_server/opts.py index 6893f6f..fee8a0e 100644 --- a/llm_server/opts.py +++ b/llm_server/opts.py @@ -31,3 +31,4 @@ backend_generate_request_timeout = 95 admin_token = None openai_expose_our_model = False openai_force_no_hashes = True +include_system_tokens_in_stats = True diff --git a/llm_server/threads.py b/llm_server/threads.py index 8f796dc..be72077 100644 --- a/llm_server/threads.py +++ b/llm_server/threads.py @@ -46,14 +46,14 @@ class MainBackgroundThread(Thread): # exclude_zeros=True filters out rows where an error message was returned. Previously, if there was an error, 0 # was entered into the column. The new code enters null instead but we need to be backwards compatible for now. - average_generation_elapsed_sec = weighted_average_column_for_model('prompts', 'generation_time', opts.running_model, opts.mode, opts.backend_url, exclude_zeros=True) or 0 + average_generation_elapsed_sec = weighted_average_column_for_model('prompts', 'generation_time', opts.running_model, opts.mode, opts.backend_url, exclude_zeros=True, include_system_tokens=opts.include_system_tokens_in_stats) or 0 if average_generation_elapsed_sec: # returns None on exception redis.set('average_generation_elapsed_sec', average_generation_elapsed_sec) # overall = average_column_for_model('prompts', 'generation_time', opts.running_model) # print(f'Weighted: {average_generation_elapsed_sec}, overall: {overall}') - average_output_tokens = weighted_average_column_for_model('prompts', 'response_tokens', opts.running_model, opts.mode, opts.backend_url, exclude_zeros=True) or 0 + average_output_tokens = weighted_average_column_for_model('prompts', 'response_tokens', opts.running_model, opts.mode, opts.backend_url, exclude_zeros=True, include_system_tokens=opts.include_system_tokens_in_stats) or 0 if average_generation_elapsed_sec: redis.set('average_output_tokens', average_output_tokens) diff --git a/server.py b/server.py index c1e2e51..b3eca0a 100644 --- a/server.py +++ b/server.py @@ -102,6 +102,7 @@ openai.api_key = opts.openai_api_key opts.admin_token = config['admin_token'] opts.openai_expose_our_model = config['openai_epose_our_model'] opts.openai_force_no_hashes = config['openai_force_no_hashes'] +opts.include_system_tokens_in_stats = config['include_system_tokens_in_stats'] if opts.openai_expose_our_model and not opts.openai_api_key: print('If you set openai_epose_our_model to false, you must set your OpenAI key in openai_api_key.')