fail overdue services
This commit is contained in:
parent
71fb07c6e8
commit
2ff284e09a
|
@ -46,6 +46,7 @@ def main():
|
||||||
timeout=args.timeout)
|
timeout=args.timeout)
|
||||||
traffic_data = response.json()['data']
|
traffic_data = response.json()['data']
|
||||||
|
|
||||||
|
|
||||||
if not len(traffic_data) and args.fail_empty:
|
if not len(traffic_data) and args.fail_empty:
|
||||||
print_icinga2_check_status(f'interface or host not found in OPNsense API response. Raw response:\n{traffic_data}', nagios.STATE_UNKNOWN)
|
print_icinga2_check_status(f'interface or host not found in OPNsense API response. Raw response:\n{traffic_data}', nagios.STATE_UNKNOWN)
|
||||||
sys.exit(nagios.UNKNOWN)
|
sys.exit(nagios.UNKNOWN)
|
||||||
|
@ -71,6 +72,7 @@ def main():
|
||||||
print_icinga2_check_status(f'failed to parse traffic data: {e}\n{traceback.format_exc()}\n{traffic_data}', nagios.STATE_UNKNOWN)
|
print_icinga2_check_status(f'failed to parse traffic data: {e}\n{traceback.format_exc()}\n{traffic_data}', nagios.STATE_UNKNOWN)
|
||||||
sys.exit(nagios.UNKNOWN)
|
sys.exit(nagios.UNKNOWN)
|
||||||
|
|
||||||
|
|
||||||
warn_b_value = int((args.bandwidth * args.bandwidth_warn / 100) * 1e+6)
|
warn_b_value = int((args.bandwidth * args.bandwidth_warn / 100) * 1e+6)
|
||||||
crit_b_value = int((args.bandwidth * args.bandwidth_crit / 100) * 1e+6)
|
crit_b_value = int((args.bandwidth * args.bandwidth_crit / 100) * 1e+6)
|
||||||
conn_warn = args.conn_warn if args.conn_warn > -1 else None
|
conn_warn = args.conn_warn if args.conn_warn > -1 else None
|
||||||
|
|
|
@ -0,0 +1,72 @@
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from urllib3.exceptions import InsecureRequestWarning
|
||||||
|
|
||||||
|
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
# Icinga2 API URL
|
||||||
|
url = f"{args.api}/v1"
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# Get all checks
|
||||||
|
response = requests.get(url + "/objects/services", auth=(args.username, args.password), verify=not args.insecure)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
checks = response.json()["results"]
|
||||||
|
|
||||||
|
# Loop through all checks
|
||||||
|
for check in checks:
|
||||||
|
last_check_time = check["attrs"]["last_check"]
|
||||||
|
check_interval = check["attrs"]["check_interval"]
|
||||||
|
check_timeout = check["attrs"]["check_timeout"] or args.default_timeout
|
||||||
|
|
||||||
|
# If the check is overdue.
|
||||||
|
if current_time - last_check_time > check_interval + check_timeout:
|
||||||
|
headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
|
||||||
|
next_check_time = last_check_time + check_interval + check_timeout
|
||||||
|
check_filter = f'host.name=="{check["attrs"]["host_name"]}" && service.name=="{check["attrs"]["name"]}"'
|
||||||
|
# Set the check to unknown
|
||||||
|
data = {
|
||||||
|
"type": check['type'],
|
||||||
|
"filter": check_filter,
|
||||||
|
"exit_status": 3,
|
||||||
|
"plugin_output": f"<Check is overdue for {int(current_time - next_check_time)} seconds.>",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Trigger a full failure.
|
||||||
|
for _ in range(4):
|
||||||
|
response = requests.post(url + "/actions/process-check-result", data=json.dumps(data), headers=headers, auth=(args.username, args.password), verify=not args.insecure)
|
||||||
|
response.raise_for_status()
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
# Rerun the check
|
||||||
|
data = {
|
||||||
|
"type": check['type'],
|
||||||
|
"filter": check_filter,
|
||||||
|
}
|
||||||
|
response = requests.post(url + "/actions/reschedule-check", data=json.dumps(data), headers=headers, auth=(args.username, args.password), verify=not args.insecure)
|
||||||
|
response.raise_for_status()
|
||||||
|
logging.info(f'Failed {check["name"]} - {int(current_time - next_check_time)} seconds overdue.')
|
||||||
|
time.sleep(args.interval)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--api', required=True, default='http://localhost:5665', help='Base Icinga2 API.')
|
||||||
|
parser.add_argument('--insecure', action='store_true', help='Disable SSL verification.')
|
||||||
|
parser.add_argument('--username', default='icingaweb2', help='API username.')
|
||||||
|
parser.add_argument('--password', required=True, help='API password.')
|
||||||
|
parser.add_argument('--default-timeout', default=600, type=int, help='If a check does not have a timeout set, use this many seconds as the default. Default: 600 (10 minutes).')
|
||||||
|
parser.add_argument('--interval', default=900, type=int, help='Interval between service scans. Default: 900 (15 minutes).')
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args)
|
|
@ -0,0 +1,12 @@
|
||||||
|
[Unit]
|
||||||
|
Description=Icinga2 fail overdue services.
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
User=flask
|
||||||
|
ExecStart=/opt/venvs/icinga2/icinga2_checks/bin/python3 /opt/icinga2-checks/fail-overdue.py --password XXXXX --insecure
|
||||||
|
Restart=always
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
|
Loading…
Reference in New Issue