icinga2-checks/check_idrac_temps.py

90 lines
3.7 KiB
Python
Executable File

import argparse
import sys
import traceback
from checker import nagios
from checker.result import quit_check
from checker.snmp import get_snmp_value
from checker.units import c_to_f
# TODO: support iDRAC 8
# https://github.com/ilovepancakes95/idrac_snmp-grafana/blob/master/idrac-input.conf
INLET_TEMP_OID = '.1.3.6.1.4.1.674.10892.5.4.700.20.1.6.1.1'
EXHAUST_TEMP_OID = '.1.3.6.1.4.1.674.10892.5.4.700.20.1.6.1.2'
CPU1_TEMP_OID = '.1.3.6.1.4.1.674.10892.5.4.700.20.1.6.1.3'
CPU2_TEMP_OID = '.1.3.6.1.4.1.674.10892.5.4.700.20.1.6.1.4'
def get_snmp_temp(oid, ip, community):
value = str(get_snmp_value(oid, ip, community))
return c_to_f(float(value[0] + value[1] + '.' + value[2]))
def main(args):
inlet_temp = get_snmp_temp(INLET_TEMP_OID, args.ip, args.community)
exhaust_temp = get_snmp_temp(EXHAUST_TEMP_OID, args.ip, args.community)
cpu_temps = []
cpu_temps.append(get_snmp_temp(CPU1_TEMP_OID, args.ip, args.community))
if args.cpu_num > 1:
cpu_temps.append(get_snmp_temp(CPU2_TEMP_OID, args.ip, args.community))
exit_code = nagios.STATE_OK
if inlet_temp >= args.inlet_crit:
exit_code = max(nagios.STATE_CRIT, exit_code)
elif inlet_temp >= args.inlet_warn:
exit_code = max(nagios.STATE_WARN, exit_code)
if exhaust_temp >= args.exhaust_crit:
exit_code = max(nagios.STATE_CRIT, exit_code)
elif exhaust_temp >= args.exhaust_warn:
exit_code = max(nagios.STATE_WARN, exit_code)
if max(cpu_temps) >= args.cpu_crit:
exit_code = max(nagios.STATE_CRIT, exit_code)
elif max(cpu_temps) >= args.cpu_warn:
exit_code = max(nagios.STATE_WARN, exit_code)
text_result = f'CPU1: {cpu_temps[0]}'
if len(cpu_temps) > 1:
text_result += f', CPU2: {cpu_temps[1]}'
text_result += f', Inlet: {inlet_temp}, Exhaust: {exhaust_temp}'
perf_data = {
'cpu1': {
'value': cpu_temps[0], 'warn': args.cpu_warn, 'crit': args.cpu_crit, 'unit': 'F'
},
'inlet': {
'value': inlet_temp, 'warn': args.inlet_warn, 'crit': args.inlet_crit, 'unit': 'F'
},
'exhaust': {
'value': exhaust_temp, 'warn': args.exhaust_warn, 'crit': args.exhaust_crit, 'unit': 'F'
}
}
if len(cpu_temps) > 1:
perf_data['cpu2'] = {'value': cpu_temps[1], 'warn': args.cpu_warn, 'crit': args.cpu_crit, 'unit': 'F'}
quit_check(text_result, exit_code, perfdata=perf_data)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--ip', required=True, help='The iDRAC IP to query.')
parser.add_argument('--community', default='public', help='Your SNMP community. Default: public')
parser.add_argument('--cpu-num', type=int, default=1, help='Number of CPU nodes. Default: 1')
parser.add_argument('--inlet-warn', type=int, default=108, help='System Board Inlet Temp warning level in F. Default: 108')
parser.add_argument('--inlet-crit', type=int, default=116, help='System Board Inlet Temp critical level in F. Default: 116')
parser.add_argument('--exhaust-warn', type=int, default=158, help='System Board Exhaust Temp warning level in F. Default: 158')
parser.add_argument('--exhaust-crit', type=int, default=167, help='System Board Exhaust Temp critical level in F. Default: 167')
parser.add_argument('--cpu-warn', type=int, default=186, help='CPU temp warning level in F. Default: 186')
parser.add_argument('--cpu-crit', type=int, default=195, help='CPU temp critical level in F. Default: 195')
args = parser.parse_args()
try:
main(args)
except Exception as e:
print(f"UNKNOWN: exception\n{e}")
print(traceback.format_exc())
sys.exit(nagios.STATE_UNKNOWN)