diff --git a/llm_server/netdata.py b/llm_server/netdata.py index e73c2a0..44c68ea 100644 --- a/llm_server/netdata.py +++ b/llm_server/netdata.py @@ -1,4 +1,5 @@ import json +from datetime import datetime, timedelta import requests @@ -26,4 +27,26 @@ def get_power_states(): print('Failed to fetch Netdata metrics:', e) return output gpu_num += 1 - return {'power_states': output} + return output + + +def get_gpu_wh(gpu_id: int): + chart_name = f"nvidia_smi.gpu{gpu_id}_power" + now = datetime.now() + one_hour_ago = now - timedelta(hours=1) + num_seconds = int((now - one_hour_ago).total_seconds()) + params = { + "chart": chart_name, + "after": int(one_hour_ago.timestamp()), + "before": int(now.timestamp()), + "points": num_seconds, + "group": "second", + "format": "json", + "options": "absolute|jsonwrap" + } + response = requests.get(f'{opts.netdata_root}/api/v1/data', params=params) + data = json.loads(response.text) + total_power_usage_watts = sum(point[1] for point in data['result']['data']) + # total_power_usage_watt_hours = round(total_power_usage_watts / 3600, 1) + total_power_usage_kwh = round(total_power_usage_watts / 1000 / 3600, 3) + return total_power_usage_kwh diff --git a/llm_server/opts.py b/llm_server/opts.py index 476545b..6c82553 100644 --- a/llm_server/opts.py +++ b/llm_server/opts.py @@ -18,4 +18,4 @@ show_num_prompts = True show_uptime = True average_generation_time_mode = 'database' show_total_output_tokens = True -netdata_root = None \ No newline at end of file +netdata_root = None diff --git a/llm_server/routes/v1/generate_stats.py b/llm_server/routes/v1/generate_stats.py index 616cb83..ad70a8d 100644 --- a/llm_server/routes/v1/generate_stats.py +++ b/llm_server/routes/v1/generate_stats.py @@ -5,7 +5,7 @@ from llm_server import opts from llm_server.database import get_distinct_ips_24h, sum_column from llm_server.helpers import deep_sort from llm_server.llm.info import get_running_model -from llm_server.netdata import get_power_states +from llm_server.netdata import get_gpu_wh, get_power_states from llm_server.routes.cache import redis from llm_server.routes.queue import priority_queue from llm_server.routes.stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers, get_total_proompts, server_start_time @@ -42,7 +42,14 @@ def generate_stats(): raise Exception if opts.netdata_root: - netdata_stats = get_power_states() + netdata_stats = {} + power_states = get_power_states() + for gpu, power_state in power_states.items(): + netdata_stats[gpu] = { + 'power_state': power_state, + # 'wh_wasted_1_hr': get_gpu_wh(int(gpu.strip('gpu'))) + } + else: netdata_stats = {}