reorganize nvidia stats
This commit is contained in:
parent
c6edeb2b70
commit
d64152587c
|
@ -1,4 +1,5 @@
|
|||
import json
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import requests
|
||||
|
||||
|
@ -26,4 +27,26 @@ def get_power_states():
|
|||
print('Failed to fetch Netdata metrics:', e)
|
||||
return output
|
||||
gpu_num += 1
|
||||
return {'power_states': output}
|
||||
return output
|
||||
|
||||
|
||||
def get_gpu_wh(gpu_id: int):
|
||||
chart_name = f"nvidia_smi.gpu{gpu_id}_power"
|
||||
now = datetime.now()
|
||||
one_hour_ago = now - timedelta(hours=1)
|
||||
num_seconds = int((now - one_hour_ago).total_seconds())
|
||||
params = {
|
||||
"chart": chart_name,
|
||||
"after": int(one_hour_ago.timestamp()),
|
||||
"before": int(now.timestamp()),
|
||||
"points": num_seconds,
|
||||
"group": "second",
|
||||
"format": "json",
|
||||
"options": "absolute|jsonwrap"
|
||||
}
|
||||
response = requests.get(f'{opts.netdata_root}/api/v1/data', params=params)
|
||||
data = json.loads(response.text)
|
||||
total_power_usage_watts = sum(point[1] for point in data['result']['data'])
|
||||
# total_power_usage_watt_hours = round(total_power_usage_watts / 3600, 1)
|
||||
total_power_usage_kwh = round(total_power_usage_watts / 1000 / 3600, 3)
|
||||
return total_power_usage_kwh
|
||||
|
|
|
@ -18,4 +18,4 @@ show_num_prompts = True
|
|||
show_uptime = True
|
||||
average_generation_time_mode = 'database'
|
||||
show_total_output_tokens = True
|
||||
netdata_root = None
|
||||
netdata_root = None
|
||||
|
|
|
@ -5,7 +5,7 @@ from llm_server import opts
|
|||
from llm_server.database import get_distinct_ips_24h, sum_column
|
||||
from llm_server.helpers import deep_sort
|
||||
from llm_server.llm.info import get_running_model
|
||||
from llm_server.netdata import get_power_states
|
||||
from llm_server.netdata import get_gpu_wh, get_power_states
|
||||
from llm_server.routes.cache import redis
|
||||
from llm_server.routes.queue import priority_queue
|
||||
from llm_server.routes.stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers, get_total_proompts, server_start_time
|
||||
|
@ -42,7 +42,14 @@ def generate_stats():
|
|||
raise Exception
|
||||
|
||||
if opts.netdata_root:
|
||||
netdata_stats = get_power_states()
|
||||
netdata_stats = {}
|
||||
power_states = get_power_states()
|
||||
for gpu, power_state in power_states.items():
|
||||
netdata_stats[gpu] = {
|
||||
'power_state': power_state,
|
||||
# 'wh_wasted_1_hr': get_gpu_wh(int(gpu.strip('gpu')))
|
||||
}
|
||||
|
||||
else:
|
||||
netdata_stats = {}
|
||||
|
||||
|
|
Reference in New Issue