icinga2-checks/check_bandwidth.py

142 lines
4.8 KiB
Python
Raw Normal View History

2023-04-21 23:54:18 -06:00
#!/usr/bin/env python3
import argparse
import re
2023-04-21 23:54:18 -06:00
import sys
import time
2023-04-21 23:54:18 -06:00
import traceback
2023-04-21 23:54:18 -06:00
import psutil
2023-04-21 23:54:18 -06:00
import checker.nagios as nagios
2023-04-21 23:54:18 -06:00
from checker.markdown import list_to_markdown_table
parser = argparse.ArgumentParser(description='Check network interface bandwidth utilization.')
parser.add_argument('--bandwidth', type=float, required=True, help='Bandwidth speed in Mbps. Used to calculate percentage.')
2023-04-21 23:54:18 -06:00
parser.add_argument('--critical', type=int, default=75, help='Critical if percent of bandwidth usage is greater than or equal to this.')
parser.add_argument('--warn', type=int, default=50, help='Warning if percent of bandwidth usage is greater than or equal to this.')
2023-04-21 23:54:18 -06:00
parser.add_argument('--max', type=int, default=None, help='Set the max value the bandwidth can be. Useful for graphs and whatever.')
parser.add_argument('--ignore', nargs='*', default=[], help='Interface names to ignore, separated by a space.')
parser.add_argument('--ignore-re', default=None, help='Regex matching interface names to ignore.')
2023-04-21 23:54:18 -06:00
args = parser.parse_args()
# Icinga2 will merge the args to one string
if len(args.ignore) == 1:
args.ignore = args.ignore[0].split(' ')
if args.ignore_re:
ignore_re = re.compile(args.ignore_re)
else:
ignore_re = None
def get_interface_data(interface: str, data: list):
for y in data:
if y[0] == interface:
return y
def get_network_traffic(interface):
net_io = psutil.net_io_counters(pernic=True)
if interface in net_io:
return net_io[interface]
else:
raise ValueError(f"Interface '{interface}' not found")
def calculate_network_traffic(interface, interval=1):
initial_traffic = get_network_traffic(interface)
start_time = time.perf_counter()
# Should be more accurate that time.sleep()
while True:
current_time = time.perf_counter()
elapsed_time = current_time - start_time
if elapsed_time >= interval:
break
final_traffic = get_network_traffic(interface)
sent_bytes = final_traffic.bytes_sent - initial_traffic.bytes_sent
recv_bytes = final_traffic.bytes_recv - initial_traffic.bytes_recv
sent_speed = sent_bytes / elapsed_time
recv_speed = recv_bytes / elapsed_time
# Convert bytes per second to megabits per second
sent_speed_mbps = sent_speed * 8 / (1024 * 1024)
recv_speed_mbps = recv_speed * 8 / (1024 * 1024)
return sent_speed_mbps, recv_speed_mbps
2023-04-21 23:54:18 -06:00
def main():
data = []
warn_value = (args.bandwidth * args.warn / 100)
crit_value = (args.bandwidth * args.critical / 100)
# Get network interface statistics
net_io_counters = psutil.net_io_counters(pernic=True)
# Calculate bandwidth utilization for each interface
for interface, stats in net_io_counters.items():
if interface in args.ignore or (ignore_re and ignore_re.search(interface)):
continue
sent_speed, recv_speed = calculate_network_traffic(interface)
bandwidth_utilization = sent_speed + recv_speed
data.append([interface, sent_speed, recv_speed, bandwidth_utilization, 'none'])
2023-04-21 23:54:18 -06:00
exit_code = nagios.OK
critical = []
warn = []
ok = []
perf_data = []
for i in range(len(data)):
interface = data[i][0]
bandwidth_utilization = data[i][3]
2023-04-21 23:54:18 -06:00
if bandwidth_utilization >= crit_value:
critical.append(interface)
2023-04-21 23:54:18 -06:00
state = 'critical'
2023-04-21 23:54:18 -06:00
exit_code = nagios.CRITICAL
elif bandwidth_utilization >= warn_value:
warn.append(interface)
2023-04-21 23:54:18 -06:00
state = 'warning'
2023-04-21 23:54:20 -06:00
if exit_code < nagios.WARNING:
exit_code = nagios.WARNING
2023-04-21 23:54:18 -06:00
else:
ok.append(interface)
2023-04-21 23:54:18 -06:00
state = 'ok'
data[i][4] = f'[{state.upper()}]'
perf_data.append(f'{interface}={round(bandwidth_utilization, 2)}Mbps;{warn_value};{crit_value};{f"0;{args.max};" if args.max else ""} ')
2023-04-21 23:54:18 -06:00
# Print the status
if exit_code == nagios.CRITICAL:
status = 'CRITICAL'
listed_interfaces = [*critical, *warn]
elif exit_code == nagios.WARNING:
status = 'WARNING'
listed_interfaces = warn
else:
status = 'OK'
listed_interfaces = ok
listed_glances = []
for interface in listed_interfaces:
listed_glances.append(f'{interface}: {round(get_interface_data(interface, data)[3], 2)}Mbps')
print(f'{status} - {", ".join(listed_glances)}')
2023-04-21 23:54:18 -06:00
data = [(x[0], f'{round(x[3], 2)} Mbps', x[4]) for x in data]
2023-04-21 23:54:18 -06:00
data.insert(0, ('Interface', 'Bandwidth', 'State'))
2023-04-21 23:54:18 -06:00
print(list_to_markdown_table(data, align='left', seperator='!', borders=False))
2023-04-21 23:54:18 -06:00
print(f'|{"".join(perf_data)}')
2023-04-21 23:54:18 -06:00
sys.exit(exit_code)
if __name__ == "__main__":
try:
main()
except Exception as e:
print(f'UNKNOWN: exception "{e}"')
print(traceback.format_exc())
sys.exit(nagios.UNKNOWN)