fix NaNs on pve guest metrics, add opnsense bandwidth levels,
This commit is contained in:
parent
afc0dcf781
commit
f4184c2c43
|
@ -9,7 +9,7 @@ import checker.nagios as nagios
|
||||||
from checker.markdown import list_to_markdown_table
|
from checker.markdown import list_to_markdown_table
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Check network interface bandwidth utilization.')
|
parser = argparse.ArgumentParser(description='Check network interface bandwidth utilization.')
|
||||||
parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps.')
|
parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps. Used to calculate percentage.')
|
||||||
parser.add_argument('--critical', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.')
|
parser.add_argument('--critical', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.')
|
||||||
parser.add_argument('--warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.')
|
parser.add_argument('--warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.')
|
||||||
parser.add_argument('--max', type=int, default=None, help='Set the max value the bandwidth can be. Useful for graphs and whatever.')
|
parser.add_argument('--max', type=int, default=None, help='Set the max value the bandwidth can be. Useful for graphs and whatever.')
|
||||||
|
|
|
@ -18,12 +18,14 @@ def main():
|
||||||
parser.add_argument('--opnsense', required=True, help='OPNsense hostname or IP address.')
|
parser.add_argument('--opnsense', required=True, help='OPNsense hostname or IP address.')
|
||||||
parser.add_argument('--key', required=True, help='OPNsense API key.')
|
parser.add_argument('--key', required=True, help='OPNsense API key.')
|
||||||
parser.add_argument('--secret', required=True, help='OPNsense API secret.')
|
parser.add_argument('--secret', required=True, help='OPNsense API secret.')
|
||||||
parser.add_argument('--interface', required=True,
|
parser.add_argument('--interface', required=True, help='Interface to check (e.g., lan). Can be something like "lan,wan"')
|
||||||
help='Interface to check (e.g., lan). Can be something like "lan,wan"')
|
|
||||||
parser.add_argument('--host', required=True, help='Address of the host to check.')
|
parser.add_argument('--host', required=True, help='Address of the host to check.')
|
||||||
parser.add_argument('--duration', default=10, type=int, help='How many seconds to gather statistics.')
|
parser.add_argument('--duration', default=10, type=int, help='How many seconds to gather statistics.')
|
||||||
parser.add_argument('--fail-empty', action='store_true',
|
parser.add_argument('--fail-empty', action='store_true', help='If the API did not return any data, fail with UNKNOWN. Otherwise, assume that there was no traffic.')
|
||||||
help='If the API did not return any data, fail with UNKNOWN. Otherwise, assume that there was no traffic.')
|
parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps. Used to calculate percentage.')
|
||||||
|
parser.add_argument('--critical', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.')
|
||||||
|
parser.add_argument('--warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.')
|
||||||
|
parser.add_argument('--max', type=int, default=None, help='Set the max value the bandwidth can be. Useful for graphs and whatever.')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
check_result = {}
|
check_result = {}
|
||||||
|
@ -71,6 +73,7 @@ def main():
|
||||||
print(traffic_data)
|
print(traffic_data)
|
||||||
sys.exit(nagios.UNKNOWN)
|
sys.exit(nagios.UNKNOWN)
|
||||||
elif not len(traffic_data):
|
elif not len(traffic_data):
|
||||||
|
# There was no traffic.
|
||||||
check_result[name] = {
|
check_result[name] = {
|
||||||
'rate_in': 0,
|
'rate_in': 0,
|
||||||
'rate_out': 0,
|
'rate_out': 0,
|
||||||
|
@ -95,10 +98,8 @@ def main():
|
||||||
print(traffic_data)
|
print(traffic_data)
|
||||||
sys.exit(nagios.UNKNOWN)
|
sys.exit(nagios.UNKNOWN)
|
||||||
|
|
||||||
# TODO: figure out status
|
warn_value = (args.bandwidth * args.warn / 100)
|
||||||
print('OK: no metrics defined.')
|
crit_value = (args.bandwidth * args.critical / 100)
|
||||||
warn_value = 0
|
|
||||||
crit_value = 0
|
|
||||||
|
|
||||||
exit_code = nagios.OK
|
exit_code = nagios.OK
|
||||||
critical = []
|
critical = []
|
||||||
|
|
|
@ -64,10 +64,12 @@ def main():
|
||||||
# requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
|
# requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
|
||||||
try:
|
try:
|
||||||
pve_auth_ticket = requests.post(f'https://{pve_node_address}:8006/api2/json/access/ticket', data={"username": args.user, "password": args.password}).json()['data']['ticket']
|
pve_auth_ticket = requests.post(f'https://{pve_node_address}:8006/api2/json/access/ticket', data={"username": args.user, "password": args.password}).json()['data']['ticket']
|
||||||
response = requests.get(f'https://{pve_node_address}:8006/api2/json/nodes/{pve_node}/{args.type}/{args.host}/rrddata?timeframe=hour',
|
response = requests.get(
|
||||||
# headers={"Authorization": f'PVEAPIToken={args.user}={args.token}'},
|
f'https://{pve_node_address}:8006/api2/json/nodes/{pve_node}/{args.type}/{args.host}/rrddata?timeframe=hour',
|
||||||
cookies={'PVEAuthCookie': pve_auth_ticket},
|
# headers={"Authorization": f'PVEAPIToken={args.user}={args.token}'},
|
||||||
verify=args.verify)
|
cookies={'PVEAuthCookie': pve_auth_ticket},
|
||||||
|
verify=args.verify
|
||||||
|
)
|
||||||
except requests.exceptions.SSLError as e:
|
except requests.exceptions.SSLError as e:
|
||||||
print('UNSKNOWN: SSL error ', e)
|
print('UNSKNOWN: SSL error ', e)
|
||||||
print('Using cert:', args.verify)
|
print('Using cert:', args.verify)
|
||||||
|
@ -111,7 +113,14 @@ def main():
|
||||||
for metric, value in metrics_data.items():
|
for metric, value in metrics_data.items():
|
||||||
check_data[metric] = {}
|
check_data[metric] = {}
|
||||||
# Average the data. Expects the interval to be 1 minute
|
# Average the data. Expects the interval to be 1 minute
|
||||||
avg = np.round(np.average(value[-5:-1]), 2)
|
if len(value) > 0:
|
||||||
|
avg = np.round(np.average(value[-5:-1]), 2) # TODO: why [-5:-1]
|
||||||
|
check_data[metric]['nan'] = False
|
||||||
|
else:
|
||||||
|
# Prevent NaN errors
|
||||||
|
check_data[metric]['nan'] = True
|
||||||
|
check_data[metric]['value_str'] = 'NaN'
|
||||||
|
continue
|
||||||
check_data[metric]['value'] = avg
|
check_data[metric]['value'] = avg
|
||||||
|
|
||||||
if metrics_levels[metric]['type'] == 'filesize':
|
if metrics_levels[metric]['type'] == 'filesize':
|
||||||
|
@ -124,7 +133,7 @@ def main():
|
||||||
check_data[metric]['status'] = nagios.CRITICAL
|
check_data[metric]['status'] = nagios.CRITICAL
|
||||||
check_data[metric]['status_str'] = '[CRITICAL]'
|
check_data[metric]['status_str'] = '[CRITICAL]'
|
||||||
elif avg >= metrics_levels[metric]['warn']:
|
elif avg >= metrics_levels[metric]['warn']:
|
||||||
check_data[metric]['status'] = nagios.WARN
|
check_data[metric]['status'] = nagios.WARNING
|
||||||
check_data[metric]['status_str'] = '[WARNING]'
|
check_data[metric]['status_str'] = '[WARNING]'
|
||||||
else:
|
else:
|
||||||
check_data[metric]['status'] = nagios.OK
|
check_data[metric]['status'] = nagios.OK
|
||||||
|
@ -139,15 +148,27 @@ def main():
|
||||||
output_str = 'WARNING: '
|
output_str = 'WARNING: '
|
||||||
elif exit_code == nagios.CRITICAL:
|
elif exit_code == nagios.CRITICAL:
|
||||||
output_str = 'CRITICAL: '
|
output_str = 'CRITICAL: '
|
||||||
|
else:
|
||||||
|
output_str = 'UNKNOWN: '
|
||||||
|
|
||||||
|
# Check for NaNs
|
||||||
|
for metric, data in check_data.items():
|
||||||
|
if check_data[metric]['nan']:
|
||||||
|
output_str = 'UNKNOWN: '
|
||||||
|
exit_code = nagios.UNKNOWN
|
||||||
|
|
||||||
perf_data = []
|
perf_data = []
|
||||||
for metric, data in check_data.items():
|
for metric, data in check_data.items():
|
||||||
output_str = output_str + f"{metric} {data['value_str']}, "
|
output_str = output_str + f"{metric} {data['value_str']}, "
|
||||||
perf_data.append(f"'{metric}'={data['value']};{metrics_levels[metric]['warn']};{metrics_levels[metric]['crit']};;")
|
if not check_data[metric]['nan']:
|
||||||
|
perf_data.append(f"'{metric}'={data['value']};{metrics_levels[metric]['warn']};{metrics_levels[metric]['crit']};;")
|
||||||
|
|
||||||
print(output_str.strip(', ').strip(), end=('\n' if args.table else ''))
|
print(output_str.strip(', ').strip(), end=('\n' if args.table else ''))
|
||||||
|
|
||||||
perf_data_str = f'| {" ".join(perf_data)}'
|
if len(perf_data):
|
||||||
|
perf_data_str = f'| {" ".join(perf_data)}'
|
||||||
|
else:
|
||||||
|
perf_data_str = ''
|
||||||
|
|
||||||
if args.table:
|
if args.table:
|
||||||
output_table = [('Metric', 'Value', 'Status')]
|
output_table = [('Metric', 'Value', 'Status')]
|
||||||
|
|
Loading…
Reference in New Issue