diff --git a/check_bandwidth.py b/check_bandwidth.py index 5dee941..d84519f 100755 --- a/check_bandwidth.py +++ b/check_bandwidth.py @@ -95,7 +95,7 @@ def main(): for i in range(len(data)): interface = data[i][0] bandwidth_utilization = data[i][3] - state_code = get_state(bandwidth_utilization, warn_value, crit_value, 'lt') + state_code = get_state(bandwidth_utilization, warn_value, crit_value, 'ge') if state_code == nagios.STATE_CRIT: critical.append(interface) @@ -116,7 +116,7 @@ def main(): 'warn': warn_value, 'crit': crit_value, 'min': 0 if args.max else None, - 'unit': 'Mbps' + 'unit': 'Mb' } }) diff --git a/check_opnsense_traffic_for_host.py b/check_opnsense_traffic_for_host.py index d22d960..8e37802 100755 --- a/check_opnsense_traffic_for_host.py +++ b/check_opnsense_traffic_for_host.py @@ -26,9 +26,9 @@ def is_internet_traffic(ip): def get_traffic_top(args, interface): - response = requests.get(f'https://{args.opnsense}/api/diagnostics/traffic/top/{interface}', - headers={'Accept': 'application/json'}, auth=(args.key, args.secret), verify=False, - timeout=10) + response = get_with_retry(f'https://{args.opnsense}/api/diagnostics/traffic/top/{interface}', + headers={'Accept': 'application/json'}, auth=(args.key, args.secret), verify=False, + timeout=args.timeout) if response.status_code != 200: print(f'UNKNOWN: unable to query OPNsense API for {interface}: {response.status_code}\n{response.text}') sys.exit(nagios.UNKNOWN) @@ -45,9 +45,9 @@ def main(): parser.add_argument('--duration', default=10, type=int, help='How many seconds to gather statistics.') parser.add_argument('--fail-empty', action='store_true', help='If the API did not return any data, fail with UNKNOWN. Otherwise, assume that there was no traffic.') parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps. Used to calculate percentage.') - parser.add_argument('--bandwidth-critical', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.') + parser.add_argument('--bandwidth-crit', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.') parser.add_argument('--bandwidth-warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.') - parser.add_argument('--conn-critical', type=int, default=-1, help='Set critical level for number of connections. Default: -1 (disabled).') + parser.add_argument('--conn-crit', type=int, default=-1, help='Set critical level for number of connections. Default: -1 (disabled).') parser.add_argument('--conn-warn', type=int, default=-1, help='Set warning level for number of connections. Default: -1 (disabled).') parser.add_argument('--timeout', type=int, default=10, help='Timeout in seconds for the HTTP requests to OPNsense. Default: 10.') args = parser.parse_args() @@ -81,7 +81,7 @@ def main(): traffic_data = [] for _ in range(args.duration): # start_time = time.time() - response = get_with_retry('https://{args.opnsense}/api/diagnostics/traffic/top/{interface}', + response = get_with_retry(f'https://{args.opnsense}/api/diagnostics/traffic/top/{interface}', headers={'Accept': 'application/json'}, auth=(args.key, args.secret), verify=False, timeout=args.timeout) # end_time = time.time() @@ -124,56 +124,73 @@ def main(): sys.exit(nagios.UNKNOWN) warn_b_value = (args.bandwidth * args.bandwidth_warn / 100) * 1e+6 - crit_b_value = (args.bandwidth * args.bandwidth_critical / 100) * 1e+6 + crit_b_value = (args.bandwidth * args.bandwidth_crit / 100) * 1e+6 + conn_warn = args.conn_warn if args.conn_warn > -1 else None + conn_crit = args.conn_crit if args.conn_crit > -1 else None exit_code = nagios.OK critical = [] warn = [] - perf_data = [] + ok = [] + perf_data = {} output_table = [ - ('Host', 'Interface', 'Rate In', 'Rate Out', 'Cumulative In', 'Cumulative Out', 'Connections', 'Status') + ('Interface', 'Rate In', 'Rate Out', 'Cumulative In', 'Cumulative Out', 'Connections', 'Status') ] - def check_b(name, state, value): + def check(name, state, value, do_filesize=True): + # TODO: improve this its kinda messy + def do_value(v): + # TODO: make filesize() handle rate + return filesize(v) + 'ps' if do_filesize else v + nonlocal exit_code if state == nagios.STATE_CRIT: - critical.append((name, filesize(value))) - exit_code = max(nagios.CRITICAL, exit_code) - return '[CRITICAL]', exit_code + critical.append((name, do_value(value))) + exit_code = max(nagios.STATE_CRIT, exit_code) + return nagios.STATE_CRIT elif state == nagios.STATE_WARN: - warn.append((name, filesize(value))) + warn.append((name, do_value(value))) exit_code = max(nagios.STATE_WARN, exit_code) - return '[WARNING]', exit_code + return nagios.STATE_WARN + elif state == nagios.STATE_OK: + exit_code = max(nagios.STATE_OK, exit_code) + ok.append((name, do_value(value))) + return nagios.STATE_OK for name, data in check_result.items(): - status = '[OK]' - in_state = get_state(data['rate_in'], warn_b_value, crit_b_value, 'ge') - in_status, exit_code = check_b(name, in_state, data['rate_in']) + in_exit_code = check(f'{name}_rate_in', in_state, data['rate_in']) out_state = get_state(data['rate_out'], warn_b_value, crit_b_value, 'ge') - in_status, exit_code = check_b(name, out_state, data['rate_out']) + out_exit_code = check(f'{name}_rate_out', out_state, data['rate_out']) - conn_state = get_state(data['connections'], args.conn_warn, args.conn_critical, 'ge') - conn_status, exit_code = check_b(name, conn_state, data['connections']) + conn_state = get_state(data['connections'], conn_warn, conn_crit, 'ge') + conn_exit_code = check(f'{name}_connections', conn_state, data['connections'], do_filesize=False) - perf_data[f'{name}_rate_in'] = {'value': int(data["rate_in"]), 'warn': warn_b_value, 'crit': crit_b_value, 'unit': 'B'} - perf_data[f'{name}_rate_out'] = {'value': int(data["rate_out"]), 'warn': warn_b_value, 'crit': crit_b_value, 'unit': 'B'} - perf_data[f'{name}_cumulative_in'] = {'value': int(data["cumulative_in"]), 'warn': warn_b_value, 'crit': crit_b_value, 'unit': 'B'} - perf_data[f'{name}_cumulative_out'] = {'value': int(data["cumulative_out"]), 'warn': warn_b_value, 'crit': crit_b_value, 'unit': 'B'} - perf_data[f'{name}_connections'] = {'value': int(data["connections"]), 'warn': args.conn_warn, 'crit': args.conn_critical, 'unit': 'B'} + interface_status_code = max(in_exit_code, out_exit_code, conn_exit_code) + if interface_status_code == nagios.STATE_OK: + interface_status = '[OK]' + elif interface_status_code == nagios.STATE_WARN: + interface_status = '[WARNING]' + elif interface_status_code == nagios.STATE_CRIT: + interface_status = '[CRITICAL]' + else: + interface_status = '[UNKNOWN]' - output_table.append((args.host, name, filesize(data['rate_in']), filesize(data['rate_out']), + perf_data[f'{name}_rate_in'] = {'value': int(data["rate_in"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'} + perf_data[f'{name}_rate_out'] = {'value': int(data["rate_out"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'} + perf_data[f'{name}_cumulative_in'] = {'value': int(data["cumulative_in"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'} + perf_data[f'{name}_cumulative_out'] = {'value': int(data["cumulative_out"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'} + perf_data[f'{name}_connections'] = {'value': int(data["connections"]), 'warn': conn_warn, 'crit': conn_crit, 'min': 0} + + output_table.append((name, filesize(data['rate_in']), filesize(data['rate_out']), filesize(data['cumulative_in']), filesize(data['cumulative_out']), data['connections'], - status)) + interface_status)) - if exit_code == nagios.STATE_OK: - text_result = f'bandwidth is below {args.bandwidth} Mbps.' - else: - text_result = ', '.join([*critical, *warn]) + text_result = ', '.join(f'{name}: {rate}' for name, rate in [*critical, *warn, *ok]) if len(check_result) > 1: - text_result += list_to_markdown_table(output_table, align='left', seperator='!', borders=False) + text_result = text_result + '\n' + list_to_markdown_table(output_table, align='left', seperator='!', borders=False) print_icinga2_check_status(text_result, exit_code, perf_data) sys.exit(exit_code) diff --git a/checker/http.py b/checker/http.py index be11596..7bf3b58 100644 --- a/checker/http.py +++ b/checker/http.py @@ -25,6 +25,6 @@ def get_with_retry(url, retries=3, delay=1, **kwargs): except requests.exceptions.RequestException as e: if i == retries - 1: # raise e - print_icinga2_check_status(f'HTTP request failed after {i} retries: {url}\n{e}', nagios.STATE_UNKNOWN) + print_icinga2_check_status(f'HTTP request failed after {i + 1} retries: {url}\n{e}', nagios.STATE_UNKNOWN) sys.exit(nagios.STATE_UNKNOWN) sleep(delay)