diff --git a/check_monitor_bot.py b/check_monitor_bot.py index 7b483e8..efe6d5d 100755 --- a/check_monitor_bot.py +++ b/check_monitor_bot.py @@ -1,20 +1,24 @@ #!/usr/bin/env python3 import argparse +import re import sys import requests +from bs4 import BeautifulSoup from checker import nagios parser = argparse.ArgumentParser(description='') -parser.add_argument('--metrics-endpoint', required=True, help='Target URL to scrape.') +parser.add_argument('--metrics-endpoint', required=True, help='Monitor bot URL to scrape.') parser.add_argument('--domain', required=True, help='Our domain.') parser.add_argument('--ignore', nargs='*', default=[], help='Ignore these hosts.') parser.add_argument('--timeout', type=float, default=90, help='Request timeout limit.') parser.add_argument('--warn', type=float, default=20, help='Manually set warn level for response time in seconds.') parser.add_argument('--crit', type=float, default=30, help='Manually set critical levelfor response time in seconds.') -parser.add_argument('--warn-percent', type=int, default=30, help='Manually set warn level for the percentage of hosts that must fail the checks.') -parser.add_argument('--crit-percent', type=int, default=50, help='Manually set crit level for the percentage of hosts that must fail the checks.') +parser.add_argument('--warn-percent', type=int, default=30, + help='Manually set warn level for the percentage of hosts that must fail the checks.') +parser.add_argument('--crit-percent', type=int, default=50, + help='Manually set crit level for the percentage of hosts that must fail the checks.') args = parser.parse_args() @@ -22,30 +26,30 @@ def make_percent(num: float): return int(num * 100) -def main(): - from bs4 import BeautifulSoup - import re +def get_sec(time_str): + """Get seconds from time.""" + h, m, s = time_str.split(':') + return int(h) * 3600 + int(m) * 60 + int(s) + +def ms_to_s(s): + min_m = re.match(r'^(\d+)m([\d.]+)s', s) + if min_m: + return get_sec(f'0:{min_m.group(1)}:{int(float(min_m.group(2)))}') + elif s.endswith('ms'): + return float('0.' + s.strip('ms')) + elif s.endswith('s'): + return float(s.strip('ms')) + + +def main(): # Split the values since icinga will quote the args if len(args.ignore) == 1: args.ignore = args.ignore[0].strip(' ').split(' ') - def get_sec(time_str): - """Get seconds from time.""" - h, m, s = time_str.split(':') - return int(h) * 3600 + int(m) * 60 + int(s) - - def ms_to_s(s): - min_m = re.match(r'^(\d+)m([\d.]+)s', s) - if min_m: - return get_sec(f'0:{min_m.group(1)}:{int(float(min_m.group(2)))}') - elif s.endswith('ms'): - return float('0.' + s.strip('ms')) - elif s.endswith('s'): - return float(s.strip('ms')) - r = requests.get(args.metrics_endpoint) if r.status_code != 200: + print(f'UNKNOWN: monitor bot endpoint returned a bad status code {r.status_code}') sys.exit(nagios.UNKNOWN) soup = BeautifulSoup(r.text, 'html.parser') tooltips = soup.find_all('span', {'class', 'tooltip'}) @@ -103,7 +107,8 @@ def main(): if len(crit_failed_hosts) / len(data.keys()) >= (args.crit_percent / 100): # CRIT takes precedence exit_code = nagios.CRITICAL - print(f'CRITICAL: {make_percent(len(crit_failed_hosts) / len(data.keys()))}% of hosts are marked as critical.') + print( + f'CRITICAL: {make_percent(len(crit_failed_hosts) / len(data.keys()))}% of hosts are marked as critical.') elif len(warn_failed_hosts) / len(data.keys()) >= (args.warn_percent / 100): exit_code = nagios.WARNING print(f'WARN: {make_percent(len(warn_failed_hosts) / len(data.keys()))}% of hosts are marked as warn.')