icinga2-checks/check_bandwidth.py

94 lines
3.4 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import sys
import traceback
import psutil
import checker.nagios as nagios
from checker.markdown import list_to_markdown_table
parser = argparse.ArgumentParser(description='Check network interface bandwidth utilization.')
parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps. Used to calculate percentage.')
parser.add_argument('--critical', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.')
parser.add_argument('--warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.')
parser.add_argument('--max', type=int, default=None, help='Set the max value the bandwidth can be. Useful for graphs and whatever.')
parser.add_argument('--ignore', default=None, help='Interface names to ignore.')
args = parser.parse_args()
ignore_interfaces = args.ignore.split(',') if args.ignore else []
def main():
data = []
warn_value = (args.bandwidth * args.warn / 100)
crit_value = (args.bandwidth * args.critical / 100)
# Get network interface statistics
net_io_counters = psutil.net_io_counters(pernic=True)
# Calculate bandwidth utilization for each interface
for interface, stats in net_io_counters.items():
if interface in ignore_interfaces:
continue
# Get the number of bytes sent and received
bytes_sent = stats.bytes_sent
bytes_recv = stats.bytes_recv
# Wait for 1 second
psutil.cpu_percent(interval=1)
# Get the number of bytes sent and received after 1 second
new_stats = psutil.net_io_counters(pernic=True)[interface]
new_bytes_sent = new_stats.bytes_sent
new_bytes_recv = new_stats.bytes_recv
# Calculate the bandwidth utilization in bits per second
bandwidth_utilization = (8 * (new_bytes_sent - bytes_sent + new_bytes_recv - bytes_recv)) / (1 * 1000 * 1000)
data.append([interface, bandwidth_utilization, 'none'])
exit_code = nagios.OK
critical = []
warn = []
ok = []
perf_data = []
for i in range(len(data)):
interface = data[i][0]
bandwidth_utilization = data[i][1]
if bandwidth_utilization >= crit_value:
critical.append(interface)
state = 'critical'
exit_code = nagios.CRITICAL
elif bandwidth_utilization >= warn_value:
warn.append(interface)
state = 'warning'
if exit_code < nagios.WARNING:
exit_code = nagios.WARNING
else:
ok.append(interface)
state = 'ok'
data[i][2] = f'[{state.upper()}]'
perf_data.append(f'{interface}={round(bandwidth_utilization, 2)}MB;{warn_value};{crit_value};{f"0;{args.max};" if args.max else ""} ')
if len(ok):
print(f'OK: {", ".join(ok)}')
if len(warn):
print(f'WARNING: {", ".join(warn)}')
if len(critical):
print(f'CRITICAL: {", ".join(critical)}')
data = [(x[0], f'{round(x[1], 2)} Mbps', x[2]) for x in data]
data.insert(0, ('Interface', 'Bandwidth', 'State'))
print(list_to_markdown_table(data, align='left', seperator='!', borders=False))
print(f' |{"".join(perf_data)}')
sys.exit(exit_code)
if __name__ == "__main__":
try:
main()
except Exception as e:
print(f'UNKNOWN: exception "{e}"')
print(traceback.format_exc())
sys.exit(nagios.UNKNOWN)