fix NaNs on pve guest metrics, add opnsense bandwidth levels,

2023-05-06 14:51:50 -06:00 · 2023-05-06 14:51:50 -06:00 · f4184c2c43
parent afc0dcf781
commit f4184c2c43
3 changed files with 39 additions and 17 deletions
--- a/check_bandwidth.py
+++ b/check_bandwidth.py
@ -9,7 +9,7 @@ import checker.nagios as nagios
 from checker.markdown import list_to_markdown_table
 parser = argparse.ArgumentParser(description='Check network interface bandwidth utilization.')
-parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps.')
+parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps. Used to calculate percentage.')
 parser.add_argument('--critical', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.')
 parser.add_argument('--warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.')
 parser.add_argument('--max', type=int, default=None, help='Set the max value the bandwidth can be. Useful for graphs and whatever.')
--- a/check_opnsense_traffic_for_host.py
+++ b/check_opnsense_traffic_for_host.py
@ -18,12 +18,14 @@ def main():
    parser.add_argument('--opnsense', required=True, help='OPNsense hostname or IP address.')
    parser.add_argument('--key', required=True, help='OPNsense API key.')
    parser.add_argument('--secret', required=True, help='OPNsense API secret.')
-    parser.add_argument('--interface', required=True,
+    parser.add_argument('--interface', required=True, help='Interface to check (e.g., lan). Can be something like "lan,wan"')
                        help='Interface to check (e.g., lan). Can be something like "lan,wan"')
    parser.add_argument('--host', required=True, help='Address of the host to check.')
    parser.add_argument('--duration', default=10, type=int, help='How many seconds to gather statistics.')
-    parser.add_argument('--fail-empty', action='store_true',
+    parser.add_argument('--fail-empty', action='store_true', help='If the API did not return any data, fail with UNKNOWN. Otherwise, assume that there was no traffic.')
-                        help='If the API did not return any data, fail with UNKNOWN. Otherwise, assume that there was no traffic.')
+    parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps. Used to calculate percentage.')
    parser.add_argument('--critical', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.')
    parser.add_argument('--warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.')
    parser.add_argument('--max', type=int, default=None, help='Set the max value the bandwidth can be. Useful for graphs and whatever.')
    args = parser.parse_args()
    check_result = {}
@ -71,6 +73,7 @@ def main():
            print(traffic_data)
            sys.exit(nagios.UNKNOWN)
        elif not len(traffic_data):
            # There was no traffic.
            check_result[name] = {
                'rate_in': 0,
                'rate_out': 0,
@ -95,10 +98,8 @@ def main():
                print(traffic_data)
                sys.exit(nagios.UNKNOWN)
-    # TODO: figure out status
+    warn_value = (args.bandwidth * args.warn / 100)
-    print('OK: no metrics defined.')
+    crit_value = (args.bandwidth * args.critical / 100)
    warn_value = 0
    crit_value = 0
    exit_code = nagios.OK
    critical = []
--- a/check_pve_guest_metrics.py
+++ b/check_pve_guest_metrics.py
@ -64,10 +64,12 @@ def main():
    # requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
    try:
        pve_auth_ticket = requests.post(f'https://{pve_node_address}:8006/api2/json/access/ticket', data={"username": args.user, "password": args.password}).json()['data']['ticket']
-        response = requests.get(f'https://{pve_node_address}:8006/api2/json/nodes/{pve_node}/{args.type}/{args.host}/rrddata?timeframe=hour',
+        response = requests.get(
-                                # headers={"Authorization": f'PVEAPIToken={args.user}={args.token}'},
+            f'https://{pve_node_address}:8006/api2/json/nodes/{pve_node}/{args.type}/{args.host}/rrddata?timeframe=hour',
-                                cookies={'PVEAuthCookie': pve_auth_ticket},
+            # headers={"Authorization": f'PVEAPIToken={args.user}={args.token}'},
-                                verify=args.verify)
+            cookies={'PVEAuthCookie': pve_auth_ticket},
            verify=args.verify
        )
    except requests.exceptions.SSLError as e:
        print('UNSKNOWN: SSL error  ', e)
        print('Using cert:', args.verify)
@ -111,7 +113,14 @@ def main():
    for metric, value in metrics_data.items():
        check_data[metric] = {}
        # Average the data. Expects the interval to be 1 minute
-        avg = np.round(np.average(value[-5:-1]), 2)
+        if len(value) > 0:
            avg = np.round(np.average(value[-5:-1]), 2)  # TODO: why [-5:-1]
            check_data[metric]['nan'] = False
        else:
            # Prevent NaN errors
            check_data[metric]['nan'] = True
            check_data[metric]['value_str'] = 'NaN'
            continue
        check_data[metric]['value'] = avg
        if metrics_levels[metric]['type'] == 'filesize':
@ -124,7 +133,7 @@ def main():
            check_data[metric]['status'] = nagios.CRITICAL
            check_data[metric]['status_str'] = '[CRITICAL]'
        elif avg >= metrics_levels[metric]['warn']:
-            check_data[metric]['status'] = nagios.WARN
+            check_data[metric]['status'] = nagios.WARNING
            check_data[metric]['status_str'] = '[WARNING]'
        else:
            check_data[metric]['status'] = nagios.OK
@ -139,15 +148,27 @@ def main():
        output_str = 'WARNING: '
    elif exit_code == nagios.CRITICAL:
        output_str = 'CRITICAL: '
    else:
        output_str = 'UNKNOWN: '
    # Check for NaNs
    for metric, data in check_data.items():
        if check_data[metric]['nan']:
            output_str = 'UNKNOWN: '
            exit_code = nagios.UNKNOWN
    perf_data = []
    for metric, data in check_data.items():
        output_str = output_str + f"{metric} {data['value_str']}, "
-        perf_data.append(f"'{metric}'={data['value']};{metrics_levels[metric]['warn']};{metrics_levels[metric]['crit']};;")
+        if not check_data[metric]['nan']:
            perf_data.append(f"'{metric}'={data['value']};{metrics_levels[metric]['warn']};{metrics_levels[metric]['crit']};;")
    print(output_str.strip(', ').strip(), end=('\n' if args.table else ''))
-    perf_data_str = f'| {" ".join(perf_data)}'
+    if len(perf_data):
        perf_data_str = f'| {" ".join(perf_data)}'
    else:
        perf_data_str = ''
    if args.table:
        output_table = [('Metric', 'Value', 'Status')]