icinga2-checks/check_opnsense_traffic_for_...

138 lines
6.3 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import sys
import traceback
from ipaddress import ip_network
import requests
from urllib3.exceptions import InsecureRequestWarning
import checker.nagios as nagios
from checker import print_icinga2_check_status
from checker.http import fetch_with_retry
from checker.linuxfabric.base import get_state
from checker.units import filesize
def is_internet_traffic(ip):
private_networks = [
ip_network("10.0.0.0/8"),
ip_network("172.16.0.0/12"),
ip_network("192.168.0.0/16"),
]
return not any(ip in network for network in private_networks)
def main():
parser = argparse.ArgumentParser(description='Check OPNsense network traffic for a host.')
parser.add_argument('--opnsense', required=True, help='OPNsense hostname or IP address.')
parser.add_argument('--host', required=True, help='Address of the host to check.')
parser.add_argument('--duration', default=60, type=int, help='How many seconds to gather statistics.')
parser.add_argument('--fail-empty', action='store_true', help='If the API did not return any data, fail with UNKNOWN. Otherwise, assume that there was no traffic.')
parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps. Used to calculate percentage.')
parser.add_argument('--bandwidth-crit', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.')
parser.add_argument('--bandwidth-warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.')
parser.add_argument('--conn-crit', type=int, default=-1, help='Set critical level for number of connections. Default: -1 (disabled).')
parser.add_argument('--conn-warn', type=int, default=-1, help='Set warning level for number of connections. Default: -1 (disabled).')
parser.add_argument('--timeout', type=int, default=10, help='Timeout in seconds for the HTTP requests to OPNsense. Default: 10.')
args = parser.parse_args()
check_result = {}
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
response = fetch_with_retry(f'{args.opnsense}/traffic/{args.host}?seconds={args.duration}',
headers={'Accept': 'application/json'}, verify=False,
timeout=args.timeout)
traffic_data = response.json()['data']
if not len(traffic_data) and args.fail_empty:
print_icinga2_check_status(f'interface or host not found in OPNsense API response. Raw response:\n{traffic_data}', nagios.STATE_UNKNOWN)
sys.exit(nagios.UNKNOWN)
elif not len(traffic_data):
# There was no traffic.
check_result = {
'rate_in': 0,
'rate_out': 0,
'cumulative_in': 0,
'cumulative_out': 0,
'connections': 0
}
else:
try:
check_result = {
'rate_in': traffic_data['max_rate_in'],
'rate_out': traffic_data['max_rate_out'],
'cumulative_in': traffic_data['bytes_in'],
'cumulative_out': traffic_data['bytes_out'],
'connections': traffic_data['connections']
}
except Exception as e:
print_icinga2_check_status(f'failed to parse traffic data: {e}\n{traceback.format_exc()}\n{traffic_data}', nagios.STATE_UNKNOWN)
sys.exit(nagios.UNKNOWN)
warn_b_value = int((args.bandwidth * args.bandwidth_warn / 100) * 1e+6)
crit_b_value = int((args.bandwidth * args.bandwidth_crit / 100) * 1e+6)
conn_warn = args.conn_warn if args.conn_warn > -1 else None
conn_crit = args.conn_crit if args.conn_crit > -1 else None
exit_code = nagios.STATE_OK
critical = []
warn = []
ok = []
perf_data = {}
def check(name, state, value, do_filesize=True):
# TODO: improve this its kinda messy
def do_value(v):
# TODO: make filesize() handle rate
return filesize(v) + 'ps' if do_filesize else v
nonlocal exit_code
if state == nagios.STATE_CRIT:
critical.append((name, do_value(value)))
exit_code = max(nagios.STATE_CRIT, exit_code)
return nagios.STATE_CRIT
elif state == nagios.STATE_WARN:
warn.append((name, do_value(value)))
exit_code = max(nagios.STATE_WARN, exit_code)
return nagios.STATE_WARN
elif state == nagios.STATE_OK:
exit_code = max(nagios.STATE_OK, exit_code)
ok.append((name, do_value(value)))
return nagios.STATE_OK
in_state = get_state(check_result['rate_in'], warn_b_value, crit_b_value, 'ge')
in_exit_code = check(f'max_rate_in', in_state, check_result['rate_in'])
out_state = get_state(check_result['rate_out'], warn_b_value, crit_b_value, 'ge')
out_exit_code = check(f'max_rate_out', out_state, check_result['rate_out'])
conn_state = get_state(check_result['connections'], conn_warn, conn_crit, 'ge')
conn_exit_code = check(f'connections', conn_state, check_result['connections'], do_filesize=False)
exit_code = max(exit_code, in_exit_code, out_exit_code, conn_exit_code)
perf_data[f'max_rate_in'] = {'value': int(check_result["rate_in"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'}
perf_data[f'max_rate_out'] = {'value': int(check_result["rate_out"]), 'warn': warn_b_value, 'crit': crit_b_value, 'min': 0, 'unit': 'B'}
# TODO: add warn/crit values for these metrics
perf_data[f'cumulative_in'] = {'value': int(check_result["cumulative_in"]), 'warn': None, 'crit': None, 'min': 0, 'unit': 'B'}
perf_data[f'cumulative_out'] = {'value': int(check_result["cumulative_out"]), 'warn': None, 'crit': None, 'min': 0, 'unit': 'B'}
perf_data[f'connections'] = {'value': int(check_result["connections"]), 'warn': conn_warn, 'crit': conn_crit, 'min': 0}
text_result = ', '.join(f'{name}: {rate}' for name, rate in sorted([*critical, *warn, *ok, ('cumulative_in', filesize(check_result["cumulative_in"])), ('cumulative_out', filesize(check_result["cumulative_out"]))]))
print_icinga2_check_status(text_result, exit_code, perf_data)
sys.exit(exit_code)
if __name__ == "__main__":
try:
main()
except Exception as e:
print_icinga2_check_status(f'exception "{e}"\n{traceback.format_exc()}', nagios.STATE_UNKNOWN)
sys.exit(nagios.UNKNOWN)