more refactoring
This commit is contained in:
parent
3df7c5d1ef
commit
2ca897f0a0
|
@ -95,7 +95,7 @@ def main():
|
||||||
for i in range(len(data)):
|
for i in range(len(data)):
|
||||||
interface = data[i][0]
|
interface = data[i][0]
|
||||||
bandwidth_utilization = data[i][3]
|
bandwidth_utilization = data[i][3]
|
||||||
state_code = get_state(bandwidth_utilization, warn_value, crit_value, 'lt')
|
state_code = get_state(bandwidth_utilization, warn_value, crit_value, 'ge')
|
||||||
|
|
||||||
if state_code == nagios.STATE_CRIT:
|
if state_code == nagios.STATE_CRIT:
|
||||||
critical.append(interface)
|
critical.append(interface)
|
||||||
|
@ -116,7 +116,7 @@ def main():
|
||||||
'warn': warn_value,
|
'warn': warn_value,
|
||||||
'crit': crit_value,
|
'crit': crit_value,
|
||||||
'min': 0 if args.max else None,
|
'min': 0 if args.max else None,
|
||||||
'unit': 'Mbps'
|
'unit': 'Mb'
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
@ -26,9 +26,9 @@ def is_internet_traffic(ip):
|
||||||
|
|
||||||
|
|
||||||
def get_traffic_top(args, interface):
|
def get_traffic_top(args, interface):
|
||||||
response = requests.get(f'https://{args.opnsense}/api/diagnostics/traffic/top/{interface}',
|
response = get_with_retry(f'https://{args.opnsense}/api/diagnostics/traffic/top/{interface}',
|
||||||
headers={'Accept': 'application/json'}, auth=(args.key, args.secret), verify=False,
|
headers={'Accept': 'application/json'}, auth=(args.key, args.secret), verify=False,
|
||||||
timeout=10)
|
timeout=args.timeout)
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
print(f'UNKNOWN: unable to query OPNsense API for {interface}: {response.status_code}\n{response.text}')
|
print(f'UNKNOWN: unable to query OPNsense API for {interface}: {response.status_code}\n{response.text}')
|
||||||
sys.exit(nagios.UNKNOWN)
|
sys.exit(nagios.UNKNOWN)
|
||||||
|
@ -45,9 +45,9 @@ def main():
|
||||||
parser.add_argument('--duration', default=10, type=int, help='How many seconds to gather statistics.')
|
parser.add_argument('--duration', default=10, type=int, help='How many seconds to gather statistics.')
|
||||||
parser.add_argument('--fail-empty', action='store_true', help='If the API did not return any data, fail with UNKNOWN. Otherwise, assume that there was no traffic.')
|
parser.add_argument('--fail-empty', action='store_true', help='If the API did not return any data, fail with UNKNOWN. Otherwise, assume that there was no traffic.')
|
||||||
parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps. Used to calculate percentage.')
|
parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps. Used to calculate percentage.')
|
||||||
parser.add_argument('--bandwidth-critical', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.')
|
parser.add_argument('--bandwidth-crit', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.')
|
||||||
parser.add_argument('--bandwidth-warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.')
|
parser.add_argument('--bandwidth-warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.')
|
||||||
parser.add_argument('--conn-critical', type=int, default=-1, help='Set critical level for number of connections. Default: -1 (disabled).')
|
parser.add_argument('--conn-crit', type=int, default=-1, help='Set critical level for number of connections. Default: -1 (disabled).')
|
||||||
parser.add_argument('--conn-warn', type=int, default=-1, help='Set warning level for number of connections. Default: -1 (disabled).')
|
parser.add_argument('--conn-warn', type=int, default=-1, help='Set warning level for number of connections. Default: -1 (disabled).')
|
||||||
parser.add_argument('--timeout', type=int, default=10, help='Timeout in seconds for the HTTP requests to OPNsense. Default: 10.')
|
parser.add_argument('--timeout', type=int, default=10, help='Timeout in seconds for the HTTP requests to OPNsense. Default: 10.')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
@ -81,7 +81,7 @@ def main():
|
||||||
traffic_data = []
|
traffic_data = []
|
||||||
for _ in range(args.duration):
|
for _ in range(args.duration):
|
||||||
# start_time = time.time()
|
# start_time = time.time()
|
||||||
response = get_with_retry('https://{args.opnsense}/api/diagnostics/traffic/top/{interface}',
|
response = get_with_retry(f'https://{args.opnsense}/api/diagnostics/traffic/top/{interface}',
|
||||||
headers={'Accept': 'application/json'}, auth=(args.key, args.secret), verify=False,
|
headers={'Accept': 'application/json'}, auth=(args.key, args.secret), verify=False,
|
||||||
timeout=args.timeout)
|
timeout=args.timeout)
|
||||||
# end_time = time.time()
|
# end_time = time.time()
|
||||||
|
@ -124,56 +124,73 @@ def main():
|
||||||
sys.exit(nagios.UNKNOWN)
|
sys.exit(nagios.UNKNOWN)
|
||||||
|
|
||||||
warn_b_value = (args.bandwidth * args.bandwidth_warn / 100) * 1e+6
|
warn_b_value = (args.bandwidth * args.bandwidth_warn / 100) * 1e+6
|
||||||
crit_b_value = (args.bandwidth * args.bandwidth_critical / 100) * 1e+6
|
crit_b_value = (args.bandwidth * args.bandwidth_crit / 100) * 1e+6
|
||||||
|
conn_warn = args.conn_warn if args.conn_warn > -1 else None
|
||||||
|
conn_crit = args.conn_crit if args.conn_crit > -1 else None
|
||||||
|
|
||||||
exit_code = nagios.OK
|
exit_code = nagios.OK
|
||||||
critical = []
|
critical = []
|
||||||
warn = []
|
warn = []
|
||||||
perf_data = []
|
ok = []
|
||||||
|
perf_data = {}
|
||||||
|
|
||||||
output_table = [
|
output_table = [
|
||||||
('Host', 'Interface', 'Rate In', 'Rate Out', 'Cumulative In', 'Cumulative Out', 'Connections', 'Status')
|
('Interface', 'Rate In', 'Rate Out', 'Cumulative In', 'Cumulative Out', 'Connections', 'Status')
|
||||||
]
|
]
|
||||||
|
|
||||||
def check_b(name, state, value):
|
def check(name, state, value, do_filesize=True):
|
||||||
|
# TODO: improve this its kinda messy
|
||||||
|
def do_value(v):
|
||||||
|
# TODO: make filesize() handle rate
|
||||||
|
return filesize(v) + 'ps' if do_filesize else v
|
||||||
|
|
||||||
nonlocal exit_code
|
nonlocal exit_code
|
||||||
if state == nagios.STATE_CRIT:
|
if state == nagios.STATE_CRIT:
|
||||||
critical.append((name, filesize(value)))
|
critical.append((name, do_value(value)))
|
||||||
exit_code = max(nagios.CRITICAL, exit_code)
|
exit_code = max(nagios.STATE_CRIT, exit_code)
|
||||||
return '[CRITICAL]', exit_code
|
return nagios.STATE_CRIT
|
||||||
elif state == nagios.STATE_WARN:
|
elif state == nagios.STATE_WARN:
|
||||||
warn.append((name, filesize(value)))
|
warn.append((name, do_value(value)))
|
||||||
exit_code = max(nagios.STATE_WARN, exit_code)
|
exit_code = max(nagios.STATE_WARN, exit_code)
|
||||||
return '[WARNING]', exit_code
|
return nagios.STATE_WARN
|
||||||
|
elif state == nagios.STATE_OK:
|
||||||
|
exit_code = max(nagios.STATE_OK, exit_code)
|
||||||
|
ok.append((name, do_value(value)))
|
||||||
|
return nagios.STATE_OK
|
||||||
|
|
||||||
for name, data in check_result.items():
|
for name, data in check_result.items():
|
||||||
status = '[OK]'
|
|
||||||
|
|
||||||
in_state = get_state(data['rate_in'], warn_b_value, crit_b_value, 'ge')
|
in_state = get_state(data['rate_in'], warn_b_value, crit_b_value, 'ge')
|
||||||
in_status, exit_code = check_b(name, in_state, data['rate_in'])
|
in_exit_code = check(f'{name}_rate_in', in_state, data['rate_in'])
|
||||||
|
|
||||||
out_state = get_state(data['rate_out'], warn_b_value, crit_b_value, 'ge')
|
out_state = get_state(data['rate_out'], warn_b_value, crit_b_value, 'ge')
|
||||||
in_status, exit_code = check_b(name, out_state, data['rate_out'])
|
out_exit_code = check(f'{name}_rate_out', out_state, data['rate_out'])
|
||||||
|
|
||||||
conn_state = get_state(data['connections'], args.conn_warn, args.conn_critical, 'ge')
|
conn_state = get_state(data['connections'], conn_warn, conn_crit, 'ge')
|
||||||
conn_status, exit_code = check_b(name, conn_state, data['connections'])
|
conn_exit_code = check(f'{name}_connections', conn_state, data['connections'], do_filesize=False)
|
||||||
|
|
||||||
perf_data[f'{name}_rate_in'] = {'value': int(data["rate_in"]), 'warn': warn_b_value, 'crit': crit_b_value, 'unit': 'B'}
|
interface_status_code = max(in_exit_code, out_exit_code, conn_exit_code)
|
||||||
perf_data[f'{name}_rate_out'] = {'value': int(data["rate_out"]), 'warn': warn_b_value, 'crit': crit_b_value, 'unit': 'B'}
|
if interface_status_code == nagios.STATE_OK:
|
||||||
perf_data[f'{name}_cumulative_in'] = {'value': int(data["cumulative_in"]), 'warn': warn_b_value, 'crit': crit_b_value, 'unit': 'B'}
|
interface_status = '[OK]'
|
||||||
perf_data[f'{name}_cumulative_out'] = {'value': int(data["cumulative_out"]), 'warn': warn_b_value, 'crit': crit_b_value, 'unit': 'B'}
|
elif interface_status_code == nagios.STATE_WARN:
|
||||||
perf_data[f'{name}_connections'] = {'value': int(data["connections"]), 'warn': args.conn_warn, 'crit': args.conn_critical, 'unit': 'B'}
|
interface_status = '[WARNING]'
|
||||||
|
elif interface_status_code == nagios.STATE_CRIT:
|
||||||
output_table.append((args.host, name, filesize(data['rate_in']), filesize(data['rate_out']),
|
interface_status = '[CRITICAL]'
|
||||||
filesize(data['cumulative_in']), filesize(data['cumulative_out']), data['connections'],
|
|
||||||
status))
|
|
||||||
|
|
||||||
if exit_code == nagios.STATE_OK:
|
|
||||||
text_result = f'bandwidth is below {args.bandwidth} Mbps.'
|
|
||||||
else:
|
else:
|
||||||
text_result = ', '.join([*critical, *warn])
|
interface_status = '[UNKNOWN]'
|
||||||
|
|
||||||
|
perf_data[f'{name}_rate_in'] = {'value': int(data["rate_in"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'}
|
||||||
|
perf_data[f'{name}_rate_out'] = {'value': int(data["rate_out"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'}
|
||||||
|
perf_data[f'{name}_cumulative_in'] = {'value': int(data["cumulative_in"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'}
|
||||||
|
perf_data[f'{name}_cumulative_out'] = {'value': int(data["cumulative_out"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'}
|
||||||
|
perf_data[f'{name}_connections'] = {'value': int(data["connections"]), 'warn': conn_warn, 'crit': conn_crit, 'min': 0}
|
||||||
|
|
||||||
|
output_table.append((name, filesize(data['rate_in']), filesize(data['rate_out']),
|
||||||
|
filesize(data['cumulative_in']), filesize(data['cumulative_out']), data['connections'],
|
||||||
|
interface_status))
|
||||||
|
|
||||||
|
text_result = ', '.join(f'{name}: {rate}' for name, rate in [*critical, *warn, *ok])
|
||||||
if len(check_result) > 1:
|
if len(check_result) > 1:
|
||||||
text_result += list_to_markdown_table(output_table, align='left', seperator='!', borders=False)
|
text_result = text_result + '\n' + list_to_markdown_table(output_table, align='left', seperator='!', borders=False)
|
||||||
|
|
||||||
print_icinga2_check_status(text_result, exit_code, perf_data)
|
print_icinga2_check_status(text_result, exit_code, perf_data)
|
||||||
sys.exit(exit_code)
|
sys.exit(exit_code)
|
||||||
|
|
|
@ -25,6 +25,6 @@ def get_with_retry(url, retries=3, delay=1, **kwargs):
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
if i == retries - 1:
|
if i == retries - 1:
|
||||||
# raise e
|
# raise e
|
||||||
print_icinga2_check_status(f'HTTP request failed after {i} retries: {url}\n{e}', nagios.STATE_UNKNOWN)
|
print_icinga2_check_status(f'HTTP request failed after {i + 1} retries: {url}\n{e}', nagios.STATE_UNKNOWN)
|
||||||
sys.exit(nagios.STATE_UNKNOWN)
|
sys.exit(nagios.STATE_UNKNOWN)
|
||||||
sleep(delay)
|
sleep(delay)
|
||||||
|
|
Loading…
Reference in New Issue