icinga2-checks/check_opnsense_traffic_for_...

206 lines
10 KiB
Python
Raw Normal View History

2023-04-21 23:54:20 -06:00
#!/usr/bin/env python3
import argparse
import sys
2023-11-14 14:59:28 -07:00
import time
2023-04-21 23:54:20 -06:00
import traceback
2023-06-15 11:00:41 -06:00
from ipaddress import ip_network
2023-04-21 23:54:20 -06:00
import numpy as np
import requests
from urllib3.exceptions import InsecureRequestWarning
import checker.nagios as nagios
2023-06-15 11:00:41 -06:00
from checker import print_icinga2_check_status
from checker.http import fetch_with_retry
2023-06-15 11:00:41 -06:00
from checker.linuxfabric.base import get_state
2023-04-21 23:54:20 -06:00
from checker.markdown import list_to_markdown_table
2023-04-21 23:54:20 -06:00
from checker.units import filesize
2023-04-21 23:54:20 -06:00
def is_internet_traffic(ip):
private_networks = [
ip_network("10.0.0.0/8"),
ip_network("172.16.0.0/12"),
ip_network("192.168.0.0/16"),
]
return not any(ip in network for network in private_networks)
def get_traffic_top(args, interface):
response = fetch_with_retry(f'https://{args.opnsense}/api/diagnostics/traffic/top/{interface}',
headers={'Accept': 'application/json'}, auth=(args.key, args.secret), verify=False,
timeout=args.timeout)
if response.status_code != 200:
print(f'UNKNOWN: unable to query OPNsense API for {interface}: {response.status_code}\n{response.text}')
sys.exit(nagios.UNKNOWN)
return response.json()
2023-04-21 23:54:20 -06:00
def main():
parser = argparse.ArgumentParser(description='Check OPNsense network traffic for a host.')
parser.add_argument('--opnsense', required=True, help='OPNsense hostname or IP address.')
parser.add_argument('--key', required=True, help='OPNsense API key.')
parser.add_argument('--secret', required=True, help='OPNsense API secret.')
parser.add_argument('--interface', required=True, help='Interface to check (e.g., lan). Can be something like "lan,wan"')
2023-04-21 23:54:20 -06:00
parser.add_argument('--host', required=True, help='Address of the host to check.')
2023-04-21 23:54:20 -06:00
parser.add_argument('--duration', default=10, type=int, help='How many seconds to gather statistics.')
parser.add_argument('--fail-empty', action='store_true', help='If the API did not return any data, fail with UNKNOWN. Otherwise, assume that there was no traffic.')
parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps. Used to calculate percentage.')
2023-06-15 12:04:10 -06:00
parser.add_argument('--bandwidth-crit', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.')
2023-05-06 15:55:46 -06:00
parser.add_argument('--bandwidth-warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.')
2023-06-15 12:04:10 -06:00
parser.add_argument('--conn-crit', type=int, default=-1, help='Set critical level for number of connections. Default: -1 (disabled).')
2023-05-06 15:55:46 -06:00
parser.add_argument('--conn-warn', type=int, default=-1, help='Set warning level for number of connections. Default: -1 (disabled).')
parser.add_argument('--timeout', type=int, default=10, help='Timeout in seconds for the HTTP requests to OPNsense. Default: 10.')
2023-04-21 23:54:20 -06:00
args = parser.parse_args()
check_result = {}
interface_names = {}
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
# Map interface names to their internal names
interfaces_mapping = requests.get(f'https://{args.opnsense}/api/diagnostics/traffic/interface',
headers={'Accept': 'application/json'}, auth=(args.key, args.secret),
verify=False, timeout=args.timeout)
2023-04-21 23:54:20 -06:00
if interfaces_mapping.status_code != 200:
print(
f'UNKNOWN: unable to query OPNsense API for interface mappings: {interfaces_mapping.status_code}\n{interfaces_mapping.text}')
2023-04-21 23:54:20 -06:00
sys.exit(nagios.UNKNOWN)
interfaces_mapping = interfaces_mapping.json()['interfaces']
interfaces_to_check = set(args.interface.split(','))
for name, interface in interfaces_mapping.items():
if interface['name'] in interfaces_to_check:
interfaces_to_check.remove(interface['name'])
interface_names[interface['name']] = name
if not len(interface_names.keys()):
2023-06-15 11:00:41 -06:00
print_icinga2_check_status('did not find any valid interface names! Double-check the name.', nagios.STATE_UNKNOWN)
2023-04-21 23:54:20 -06:00
sys.exit(nagios.UNKNOWN)
for name, interface in interface_names.items():
# Fetch the data
traffic_data = []
for _ in range(args.duration):
2023-11-14 14:59:28 -07:00
start_time = time.time()
response = fetch_with_retry(f'https://{args.opnsense}/api/diagnostics/traffic/top/{interface}',
headers={'Accept': 'application/json'}, auth=(args.key, args.secret), verify=False,
timeout=args.timeout)
2023-11-14 14:59:28 -07:00
end_time = time.time()
api_request_time = end_time - start_time
2023-06-15 11:00:41 -06:00
if isinstance(response.json(), list):
2023-06-15 11:00:41 -06:00
print_icinga2_check_status(f'OPNsense returned wrong datatype:\n{response.text}', nagios.STATE_UNKNOWN)
sys.exit(nagios.STATE_UNKNOWN)
2023-04-21 23:54:20 -06:00
for item in response.json().get(interface, {}).get('records', False):
if item['address'] == args.host:
traffic_data.append(item)
2023-11-14 14:59:28 -07:00
adjusted_sleep_duration = max(1 - api_request_time, 0)
time.sleep(adjusted_sleep_duration)
2023-04-21 23:54:20 -06:00
if not len(traffic_data) and args.fail_empty:
2023-06-15 11:00:41 -06:00
print_icinga2_check_status(f'interface or host not found in OPNsense API response. Raw response:\n{traffic_data}', nagios.STATE_UNKNOWN)
sys.exit(nagios.UNKNOWN)
elif not len(traffic_data):
# There was no traffic.
check_result[name] = {
2023-05-03 12:48:44 -06:00
'rate_in': 0,
'rate_out': 0,
'cumulative_in': 0,
'cumulative_out': 0,
2023-05-03 14:07:59 -06:00
'connections': 0
2023-05-03 12:12:07 -06:00
}
2023-05-03 12:48:44 -06:00
else:
try:
check_result[name] = {
'rate_in': np.average([x['rate_bits_in'] for x in traffic_data]),
'rate_out': np.average([x['rate_bits_out'] for x in traffic_data]),
'cumulative_in': np.average([x['cumulative_bytes_in'] for x in traffic_data]),
'cumulative_out': np.average([x['cumulative_bytes_out'] for x in traffic_data]),
'connections': int(np.average([len(x['details']) for x in traffic_data]))
}
except Exception as e:
2023-06-15 11:00:41 -06:00
print_icinga2_check_status(f'failed to parse traffic data: {e}\n{traceback.format_exc()}\n{traffic_data}', nagios.STATE_UNKNOWN)
2023-05-03 12:48:44 -06:00
sys.exit(nagios.UNKNOWN)
2023-04-21 23:54:20 -06:00
warn_b_value = int((args.bandwidth * args.bandwidth_warn / 100) * 1e+6)
crit_b_value = int((args.bandwidth * args.bandwidth_crit / 100) * 1e+6)
2023-06-15 12:04:10 -06:00
conn_warn = args.conn_warn if args.conn_warn > -1 else None
conn_crit = args.conn_crit if args.conn_crit > -1 else None
2023-04-21 23:54:20 -06:00
exit_code = nagios.OK
critical = []
warn = []
2023-06-15 12:04:10 -06:00
ok = []
perf_data = {}
2023-04-21 23:54:20 -06:00
2023-05-06 15:55:46 -06:00
output_table = [
2023-06-15 12:04:10 -06:00
('Interface', 'Rate In', 'Rate Out', 'Cumulative In', 'Cumulative Out', 'Connections', 'Status')
2023-05-06 15:55:46 -06:00
]
2023-06-15 12:04:10 -06:00
def check(name, state, value, do_filesize=True):
# TODO: improve this its kinda messy
def do_value(v):
# TODO: make filesize() handle rate
return filesize(v) + 'ps' if do_filesize else v
2023-05-06 15:55:46 -06:00
nonlocal exit_code
2023-06-15 11:00:41 -06:00
if state == nagios.STATE_CRIT:
2023-06-15 12:04:10 -06:00
critical.append((name, do_value(value)))
exit_code = max(nagios.STATE_CRIT, exit_code)
return nagios.STATE_CRIT
2023-06-15 11:00:41 -06:00
elif state == nagios.STATE_WARN:
2023-06-15 12:04:10 -06:00
warn.append((name, do_value(value)))
2023-06-15 11:00:41 -06:00
exit_code = max(nagios.STATE_WARN, exit_code)
2023-06-15 12:04:10 -06:00
return nagios.STATE_WARN
elif state == nagios.STATE_OK:
exit_code = max(nagios.STATE_OK, exit_code)
ok.append((name, do_value(value)))
return nagios.STATE_OK
2023-05-06 15:55:46 -06:00
for name, data in check_result.items():
2023-06-15 11:00:41 -06:00
in_state = get_state(data['rate_in'], warn_b_value, crit_b_value, 'ge')
2023-06-15 12:04:10 -06:00
in_exit_code = check(f'{name}_rate_in', in_state, data['rate_in'])
2023-05-06 15:55:46 -06:00
2023-06-15 11:00:41 -06:00
out_state = get_state(data['rate_out'], warn_b_value, crit_b_value, 'ge')
2023-06-15 12:04:10 -06:00
out_exit_code = check(f'{name}_rate_out', out_state, data['rate_out'])
conn_state = get_state(data['connections'], conn_warn, conn_crit, 'ge')
conn_exit_code = check(f'{name}_connections', conn_state, data['connections'], do_filesize=False)
interface_status_code = max(in_exit_code, out_exit_code, conn_exit_code)
if interface_status_code == nagios.STATE_OK:
interface_status = '[OK]'
elif interface_status_code == nagios.STATE_WARN:
interface_status = '[WARNING]'
elif interface_status_code == nagios.STATE_CRIT:
interface_status = '[CRITICAL]'
else:
interface_status = '[UNKNOWN]'
2023-06-15 11:00:41 -06:00
2023-06-15 12:04:10 -06:00
perf_data[f'{name}_rate_in'] = {'value': int(data["rate_in"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'}
perf_data[f'{name}_rate_out'] = {'value': int(data["rate_out"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'}
perf_data[f'{name}_cumulative_in'] = {'value': int(data["cumulative_in"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'}
perf_data[f'{name}_cumulative_out'] = {'value': int(data["cumulative_out"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'}
perf_data[f'{name}_connections'] = {'value': int(data["connections"]), 'warn': conn_warn, 'crit': conn_crit, 'min': 0}
2023-05-06 15:55:46 -06:00
2023-06-15 12:04:10 -06:00
output_table.append((name, filesize(data['rate_in']), filesize(data['rate_out']),
filesize(data['cumulative_in']), filesize(data['cumulative_out']), data['connections'],
2023-06-15 12:04:10 -06:00
interface_status))
2023-05-06 15:55:46 -06:00
2023-06-15 12:04:10 -06:00
text_result = ', '.join(f'{name}: {rate}' for name, rate in [*critical, *warn, *ok])
2023-06-15 11:00:41 -06:00
if len(check_result) > 1:
2023-06-15 12:04:10 -06:00
text_result = text_result + '\n' + list_to_markdown_table(output_table, align='left', seperator='!', borders=False)
2023-06-15 11:00:41 -06:00
print_icinga2_check_status(text_result, exit_code, perf_data)
2023-04-21 23:54:20 -06:00
sys.exit(exit_code)
if __name__ == "__main__":
try:
main()
except Exception as e:
2023-06-15 11:00:41 -06:00
print_icinga2_check_status(f'exception "{e}"\n{traceback.format_exc()}', nagios.STATE_UNKNOWN)
2023-04-21 23:54:20 -06:00
sys.exit(nagios.UNKNOWN)