diff --git a/fail-overdue.py b/fail-overdue.py index 490862e..969d00f 100644 --- a/fail-overdue.py +++ b/fail-overdue.py @@ -19,44 +19,53 @@ def main(args): current_time = time.time() # Get all checks - response = requests.get(url + "/objects/services", auth=(args.username, args.password), verify=not args.insecure) - response.raise_for_status() + objects = ["hosts", "services"] + for object in objects: + response = requests.get(url + "/objects/" + object, auth=(args.username, args.password), verify=not args.insecure) + response.raise_for_status() - checks = response.json()["results"] + checks = response.json()["results"] - # Loop through all checks - for check in checks: - last_check_time = check["attrs"]["last_check"] - check_interval = check["attrs"]["check_interval"] - check_timeout = check["attrs"]["check_timeout"] or args.default_timeout + # Loop through all checks + for check in checks: + last_check_time = check["attrs"]["last_check"] + check_interval = check["attrs"]["check_interval"] + check_timeout = check["attrs"]["check_timeout"] or args.default_timeout - # If the check is overdue. - if current_time - last_check_time > check_interval + check_timeout and check['attrs']['state'] != 3: - headers = {'Accept': 'application/json', 'Content-Type': 'application/json'} - next_check_time = last_check_time + check_interval + check_timeout - check_filter = f'host.name=="{check["attrs"]["host_name"]}" && service.name=="{check["attrs"]["name"]}"' - # Set the check to unknown - data = { - "type": check['type'], - "filter": check_filter, - "exit_status": 3, - "plugin_output": f"", - } + # If the check is overdue. + if current_time - last_check_time > check_interval + check_timeout and check['attrs']['state'] != 3: + headers = {'Accept': 'application/json', 'Content-Type': 'application/json'} + next_check_time = last_check_time + check_interval + check_timeout - # Trigger a full failure. - for _ in range(4): - response = requests.post(url + "/actions/process-check-result", data=json.dumps(data), headers=headers, auth=(args.username, args.password), verify=not args.insecure) + if object == 'services': + check_filter = f'host.name=="{check["attrs"]["host_name"]}" && service.name=="{check["attrs"]["name"]}"' + elif object == 'hosts': + check_filter = f'host.name=="{check["name"]}"' + else: + raise Exception + + # Set the check to unknown + data = { + "type": check['type'], + "filter": check_filter, + "exit_status": 3 if check['type'] == 'Service' else 1, + "plugin_output": f"", + } + + # Trigger a full failure. + for _ in range(4): + response = requests.post(url + "/actions/process-check-result", data=json.dumps(data), headers=headers, auth=(args.username, args.password), verify=not args.insecure) + response.raise_for_status() + time.sleep(3) + + # Rerun the check + data = { + "type": check['type'], + "filter": check_filter, + } + response = requests.post(url + "/actions/reschedule-check", data=json.dumps(data), headers=headers, auth=(args.username, args.password), verify=not args.insecure) response.raise_for_status() - time.sleep(3) - - # Rerun the check - data = { - "type": check['type'], - "filter": check_filter, - } - response = requests.post(url + "/actions/reschedule-check", data=json.dumps(data), headers=headers, auth=(args.username, args.password), verify=not args.insecure) - response.raise_for_status() - logging.info(f'Failed {check["name"]} - {int(current_time - next_check_time)} seconds overdue.') + logging.info(f'Failed {check["type"].lower()} {check["name"]} - {int(current_time - next_check_time)} seconds overdue.') time.sleep(args.interval)