fail overdue services

This commit is contained in:
Cyberes 2023-11-22 23:35:33 -07:00
parent 71fb07c6e8
commit 2ff284e09a
3 changed files with 86 additions and 0 deletions

View File

@ -46,6 +46,7 @@ def main():
timeout=args.timeout) timeout=args.timeout)
traffic_data = response.json()['data'] traffic_data = response.json()['data']
if not len(traffic_data) and args.fail_empty: if not len(traffic_data) and args.fail_empty:
print_icinga2_check_status(f'interface or host not found in OPNsense API response. Raw response:\n{traffic_data}', nagios.STATE_UNKNOWN) print_icinga2_check_status(f'interface or host not found in OPNsense API response. Raw response:\n{traffic_data}', nagios.STATE_UNKNOWN)
sys.exit(nagios.UNKNOWN) sys.exit(nagios.UNKNOWN)
@ -71,6 +72,7 @@ def main():
print_icinga2_check_status(f'failed to parse traffic data: {e}\n{traceback.format_exc()}\n{traffic_data}', nagios.STATE_UNKNOWN) print_icinga2_check_status(f'failed to parse traffic data: {e}\n{traceback.format_exc()}\n{traffic_data}', nagios.STATE_UNKNOWN)
sys.exit(nagios.UNKNOWN) sys.exit(nagios.UNKNOWN)
warn_b_value = int((args.bandwidth * args.bandwidth_warn / 100) * 1e+6) warn_b_value = int((args.bandwidth * args.bandwidth_warn / 100) * 1e+6)
crit_b_value = int((args.bandwidth * args.bandwidth_crit / 100) * 1e+6) crit_b_value = int((args.bandwidth * args.bandwidth_crit / 100) * 1e+6)
conn_warn = args.conn_warn if args.conn_warn > -1 else None conn_warn = args.conn_warn if args.conn_warn > -1 else None

72
fail-overdue.py Normal file
View File

@ -0,0 +1,72 @@
import argparse
import json
import logging
import time
import requests
from urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
def main(args):
# Icinga2 API URL
url = f"{args.api}/v1"
logging.basicConfig(level=logging.INFO)
while True:
current_time = time.time()
# Get all checks
response = requests.get(url + "/objects/services", auth=(args.username, args.password), verify=not args.insecure)
response.raise_for_status()
checks = response.json()["results"]
# Loop through all checks
for check in checks:
last_check_time = check["attrs"]["last_check"]
check_interval = check["attrs"]["check_interval"]
check_timeout = check["attrs"]["check_timeout"] or args.default_timeout
# If the check is overdue.
if current_time - last_check_time > check_interval + check_timeout:
headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
next_check_time = last_check_time + check_interval + check_timeout
check_filter = f'host.name=="{check["attrs"]["host_name"]}" && service.name=="{check["attrs"]["name"]}"'
# Set the check to unknown
data = {
"type": check['type'],
"filter": check_filter,
"exit_status": 3,
"plugin_output": f"<Check is overdue for {int(current_time - next_check_time)} seconds.>",
}
# Trigger a full failure.
for _ in range(4):
response = requests.post(url + "/actions/process-check-result", data=json.dumps(data), headers=headers, auth=(args.username, args.password), verify=not args.insecure)
response.raise_for_status()
time.sleep(3)
# Rerun the check
data = {
"type": check['type'],
"filter": check_filter,
}
response = requests.post(url + "/actions/reschedule-check", data=json.dumps(data), headers=headers, auth=(args.username, args.password), verify=not args.insecure)
response.raise_for_status()
logging.info(f'Failed {check["name"]} - {int(current_time - next_check_time)} seconds overdue.')
time.sleep(args.interval)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--api', required=True, default='http://localhost:5665', help='Base Icinga2 API.')
parser.add_argument('--insecure', action='store_true', help='Disable SSL verification.')
parser.add_argument('--username', default='icingaweb2', help='API username.')
parser.add_argument('--password', required=True, help='API password.')
parser.add_argument('--default-timeout', default=600, type=int, help='If a check does not have a timeout set, use this many seconds as the default. Default: 600 (10 minutes).')
parser.add_argument('--interval', default=900, type=int, help='Interval between service scans. Default: 900 (15 minutes).')
args = parser.parse_args()
main(args)

View File

@ -0,0 +1,12 @@
[Unit]
Description=Icinga2 fail overdue services.
After=network.target
[Service]
User=flask
ExecStart=/opt/venvs/icinga2/icinga2_checks/bin/python3 /opt/icinga2-checks/fail-overdue.py --password XXXXX --insecure
Restart=always
[Install]
WantedBy=multi-user.target