fix NaNs on pve guest metrics, add opnsense bandwidth levels,

This commit is contained in:
Cyberes 2023-05-06 14:51:50 -06:00
parent afc0dcf781
commit f4184c2c43
3 changed files with 39 additions and 17 deletions

View File

@ -9,7 +9,7 @@ import checker.nagios as nagios
from checker.markdown import list_to_markdown_table
parser = argparse.ArgumentParser(description='Check network interface bandwidth utilization.')
parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps.')
parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps. Used to calculate percentage.')
parser.add_argument('--critical', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.')
parser.add_argument('--warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.')
parser.add_argument('--max', type=int, default=None, help='Set the max value the bandwidth can be. Useful for graphs and whatever.')

View File

@ -18,12 +18,14 @@ def main():
parser.add_argument('--opnsense', required=True, help='OPNsense hostname or IP address.')
parser.add_argument('--key', required=True, help='OPNsense API key.')
parser.add_argument('--secret', required=True, help='OPNsense API secret.')
parser.add_argument('--interface', required=True,
help='Interface to check (e.g., lan). Can be something like "lan,wan"')
parser.add_argument('--interface', required=True, help='Interface to check (e.g., lan). Can be something like "lan,wan"')
parser.add_argument('--host', required=True, help='Address of the host to check.')
parser.add_argument('--duration', default=10, type=int, help='How many seconds to gather statistics.')
parser.add_argument('--fail-empty', action='store_true',
help='If the API did not return any data, fail with UNKNOWN. Otherwise, assume that there was no traffic.')
parser.add_argument('--fail-empty', action='store_true', help='If the API did not return any data, fail with UNKNOWN. Otherwise, assume that there was no traffic.')
parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps. Used to calculate percentage.')
parser.add_argument('--critical', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.')
parser.add_argument('--warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.')
parser.add_argument('--max', type=int, default=None, help='Set the max value the bandwidth can be. Useful for graphs and whatever.')
args = parser.parse_args()
check_result = {}
@ -71,6 +73,7 @@ def main():
print(traffic_data)
sys.exit(nagios.UNKNOWN)
elif not len(traffic_data):
# There was no traffic.
check_result[name] = {
'rate_in': 0,
'rate_out': 0,
@ -95,10 +98,8 @@ def main():
print(traffic_data)
sys.exit(nagios.UNKNOWN)
# TODO: figure out status
print('OK: no metrics defined.')
warn_value = 0
crit_value = 0
warn_value = (args.bandwidth * args.warn / 100)
crit_value = (args.bandwidth * args.critical / 100)
exit_code = nagios.OK
critical = []

View File

@ -64,10 +64,12 @@ def main():
# requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
try:
pve_auth_ticket = requests.post(f'https://{pve_node_address}:8006/api2/json/access/ticket', data={"username": args.user, "password": args.password}).json()['data']['ticket']
response = requests.get(f'https://{pve_node_address}:8006/api2/json/nodes/{pve_node}/{args.type}/{args.host}/rrddata?timeframe=hour',
# headers={"Authorization": f'PVEAPIToken={args.user}={args.token}'},
cookies={'PVEAuthCookie': pve_auth_ticket},
verify=args.verify)
response = requests.get(
f'https://{pve_node_address}:8006/api2/json/nodes/{pve_node}/{args.type}/{args.host}/rrddata?timeframe=hour',
# headers={"Authorization": f'PVEAPIToken={args.user}={args.token}'},
cookies={'PVEAuthCookie': pve_auth_ticket},
verify=args.verify
)
except requests.exceptions.SSLError as e:
print('UNSKNOWN: SSL error ', e)
print('Using cert:', args.verify)
@ -111,7 +113,14 @@ def main():
for metric, value in metrics_data.items():
check_data[metric] = {}
# Average the data. Expects the interval to be 1 minute
avg = np.round(np.average(value[-5:-1]), 2)
if len(value) > 0:
avg = np.round(np.average(value[-5:-1]), 2) # TODO: why [-5:-1]
check_data[metric]['nan'] = False
else:
# Prevent NaN errors
check_data[metric]['nan'] = True
check_data[metric]['value_str'] = 'NaN'
continue
check_data[metric]['value'] = avg
if metrics_levels[metric]['type'] == 'filesize':
@ -124,7 +133,7 @@ def main():
check_data[metric]['status'] = nagios.CRITICAL
check_data[metric]['status_str'] = '[CRITICAL]'
elif avg >= metrics_levels[metric]['warn']:
check_data[metric]['status'] = nagios.WARN
check_data[metric]['status'] = nagios.WARNING
check_data[metric]['status_str'] = '[WARNING]'
else:
check_data[metric]['status'] = nagios.OK
@ -139,15 +148,27 @@ def main():
output_str = 'WARNING: '
elif exit_code == nagios.CRITICAL:
output_str = 'CRITICAL: '
else:
output_str = 'UNKNOWN: '
# Check for NaNs
for metric, data in check_data.items():
if check_data[metric]['nan']:
output_str = 'UNKNOWN: '
exit_code = nagios.UNKNOWN
perf_data = []
for metric, data in check_data.items():
output_str = output_str + f"{metric} {data['value_str']}, "
perf_data.append(f"'{metric}'={data['value']};{metrics_levels[metric]['warn']};{metrics_levels[metric]['crit']};;")
if not check_data[metric]['nan']:
perf_data.append(f"'{metric}'={data['value']};{metrics_levels[metric]['warn']};{metrics_levels[metric]['crit']};;")
print(output_str.strip(', ').strip(), end=('\n' if args.table else ''))
perf_data_str = f'| {" ".join(perf_data)}'
if len(perf_data):
perf_data_str = f'| {" ".join(perf_data)}'
else:
perf_data_str = ''
if args.table:
output_table = [('Metric', 'Value', 'Status')]