2023-08-24 21:36:00 -06:00
|
|
|
import json
|
2023-08-25 15:02:40 -06:00
|
|
|
from datetime import datetime, timedelta
|
2023-08-24 21:36:00 -06:00
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
from llm_server import opts
|
|
|
|
|
|
|
|
|
2023-08-24 21:56:15 -06:00
|
|
|
def get_power_states():
|
|
|
|
gpu_num = 0
|
|
|
|
output = {}
|
|
|
|
while True:
|
|
|
|
url = f"{opts.netdata_root}/api/v1/data?chart=nvidia_smi.gpu{gpu_num}_power_state"
|
|
|
|
try:
|
2023-09-14 14:05:50 -06:00
|
|
|
response = requests.get(url, timeout=10)
|
2023-08-24 21:56:15 -06:00
|
|
|
if response.status_code != 200:
|
2023-08-24 21:36:00 -06:00
|
|
|
break
|
2023-08-24 21:56:15 -06:00
|
|
|
data = json.loads(response.text)
|
|
|
|
power_state_data = data['data'][0]
|
|
|
|
power_state = None
|
|
|
|
for i in range(1, len(power_state_data)):
|
|
|
|
if power_state_data[i] == 1:
|
|
|
|
power_state = data['labels'][i]
|
|
|
|
break
|
|
|
|
output[f'gpu{gpu_num}'] = int(power_state.lower().strip('p'))
|
|
|
|
except Exception as e:
|
|
|
|
print('Failed to fetch Netdata metrics:', e)
|
|
|
|
return output
|
|
|
|
gpu_num += 1
|
2023-08-25 15:02:40 -06:00
|
|
|
return output
|
|
|
|
|
|
|
|
|
|
|
|
def get_gpu_wh(gpu_id: int):
|
|
|
|
chart_name = f"nvidia_smi.gpu{gpu_id}_power"
|
|
|
|
now = datetime.now()
|
|
|
|
one_hour_ago = now - timedelta(hours=1)
|
|
|
|
num_seconds = int((now - one_hour_ago).total_seconds())
|
|
|
|
params = {
|
|
|
|
"chart": chart_name,
|
|
|
|
"after": int(one_hour_ago.timestamp()),
|
|
|
|
"before": int(now.timestamp()),
|
|
|
|
"points": num_seconds,
|
|
|
|
"group": "second",
|
|
|
|
"format": "json",
|
|
|
|
"options": "absolute|jsonwrap"
|
|
|
|
}
|
2023-09-14 01:32:49 -06:00
|
|
|
response = requests.get(f'{opts.netdata_root}/api/v1/data', params=params, timeout=10)
|
2023-08-25 15:02:40 -06:00
|
|
|
data = json.loads(response.text)
|
|
|
|
total_power_usage_watts = sum(point[1] for point in data['result']['data'])
|
|
|
|
# total_power_usage_watt_hours = round(total_power_usage_watts / 3600, 1)
|
|
|
|
total_power_usage_kwh = round(total_power_usage_watts / 1000 / 3600, 3)
|
|
|
|
return total_power_usage_kwh
|