track nvidia power states through netdata
This commit is contained in:
parent
01b8442b95
commit
16b986c206
|
@ -0,0 +1,22 @@
|
||||||
|
import json
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from llm_server import opts
|
||||||
|
|
||||||
|
|
||||||
|
def get_power_state():
|
||||||
|
url = f"{opts.netdata_root}/api/v1/data?chart={opts.netdata_metric}"
|
||||||
|
try:
|
||||||
|
response = requests.get(url, timeout=3)
|
||||||
|
data = json.loads(response.text)
|
||||||
|
power_state_data = data['data'][0]
|
||||||
|
power_state = None
|
||||||
|
for i in range(1, len(power_state_data)):
|
||||||
|
if power_state_data[i] == 1:
|
||||||
|
power_state = data['labels'][i]
|
||||||
|
break
|
||||||
|
return power_state
|
||||||
|
except Exception as e:
|
||||||
|
print('Failed to fetch Netdata metrics:', e)
|
||||||
|
return None
|
|
@ -18,3 +18,5 @@ show_num_prompts = True
|
||||||
show_uptime = True
|
show_uptime = True
|
||||||
average_generation_time_mode = 'database'
|
average_generation_time_mode = 'database'
|
||||||
show_total_output_tokens = True
|
show_total_output_tokens = True
|
||||||
|
netdata_root = None
|
||||||
|
netdata_metric = None
|
||||||
|
|
|
@ -5,6 +5,7 @@ from llm_server import opts
|
||||||
from llm_server.database import sum_column
|
from llm_server.database import sum_column
|
||||||
from llm_server.helpers import deep_sort
|
from llm_server.helpers import deep_sort
|
||||||
from llm_server.llm.info import get_running_model
|
from llm_server.llm.info import get_running_model
|
||||||
|
from llm_server.netdata import get_power_state
|
||||||
from llm_server.routes.cache import redis
|
from llm_server.routes.cache import redis
|
||||||
from llm_server.routes.queue import priority_queue
|
from llm_server.routes.queue import priority_queue
|
||||||
from llm_server.routes.stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers, get_total_proompts, server_start_time
|
from llm_server.routes.stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers, get_total_proompts, server_start_time
|
||||||
|
@ -40,6 +41,15 @@ def generate_stats():
|
||||||
else:
|
else:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
|
if opts.netdata_root:
|
||||||
|
netdata_stats = {
|
||||||
|
'gpu0': {
|
||||||
|
'power_state': int(get_power_state().lower().strip('p'))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
netdata_stats = {}
|
||||||
|
|
||||||
output = {
|
output = {
|
||||||
'stats': {
|
'stats': {
|
||||||
'proompts_in_queue': proompters_in_queue,
|
'proompts_in_queue': proompters_in_queue,
|
||||||
|
@ -49,6 +59,7 @@ def generate_stats():
|
||||||
'average_generation_elapsed_sec': average_generation_time,
|
'average_generation_elapsed_sec': average_generation_time,
|
||||||
'average_tps': average_tps,
|
'average_tps': average_tps,
|
||||||
'tokens_generated': sum_column('prompts', 'response_tokens') if opts.show_total_output_tokens else None,
|
'tokens_generated': sum_column('prompts', 'response_tokens') if opts.show_total_output_tokens else None,
|
||||||
|
'nvidia': netdata_stats
|
||||||
},
|
},
|
||||||
'online': online,
|
'online': online,
|
||||||
'endpoints': {
|
'endpoints': {
|
||||||
|
|
|
@ -52,6 +52,8 @@ opts.show_num_prompts = config['show_num_prompts']
|
||||||
opts.show_uptime = config['show_uptime']
|
opts.show_uptime = config['show_uptime']
|
||||||
opts.backend_url = config['backend_url'].strip('/')
|
opts.backend_url = config['backend_url'].strip('/')
|
||||||
opts.show_total_output_tokens = config['show_total_output_tokens']
|
opts.show_total_output_tokens = config['show_total_output_tokens']
|
||||||
|
opts.netdata_root = config['netdata_root']
|
||||||
|
opts.netdata_metric = config['netdata_metric']
|
||||||
|
|
||||||
opts.verify_ssl = config['verify_ssl']
|
opts.verify_ssl = config['verify_ssl']
|
||||||
if not opts.verify_ssl:
|
if not opts.verify_ssl:
|
||||||
|
|
Reference in New Issue