icinga2-checks/check_systemd_timer.py

130 lines
5.4 KiB
Python
Raw Normal View History

2024-02-28 09:54:57 -07:00
#!/usr/bin/env python3
import argparse
2024-03-07 15:18:46 -07:00
import re
import subprocess
2024-02-28 09:54:57 -07:00
import sys
import traceback
2024-03-07 15:18:46 -07:00
from datetime import datetime
2024-02-28 09:54:57 -07:00
2024-03-07 15:18:46 -07:00
import humanfriendly
from dateutil import tz
2024-02-28 09:54:57 -07:00
from checker import nagios
from checker.humanfriendly import parse_systemctl_time_delta
2024-02-28 09:54:57 -07:00
from checker.result import quit_check
sys.path.insert(0, "/usr/lib/python3/dist-packages")
import dbus
SYSTEMCTL_TIMERS_RE = re.compile(
r'^(([A-Za-z]*\s[0-9]{4}-[0-9]{2}-[0-9]{2}\s*[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]*)|n\/a)\s*((([0-9]*[a-z]*\s)*left)|n\/a)\s*(([A-Za-z]*\s[0-9]{4}-[0-9]{2}-[0-9]{2}\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]*)|n\/a)\s*(([0-9A-Za-z\s]*\sago)|n\/a)\s*([A-Za-z\-_]*.timer)\s*(([A-Za-z\-_]*.service)|\s*)')
2024-03-07 15:18:46 -07:00
def get_next_elapse(timer_name) -> tuple[None, Exception] | tuple[dict, None]:
try:
output = subprocess.check_output(["systemctl", "list-timers", "--all"], universal_newlines=True)
lines = output.split('\n')
for line in lines:
if timer_name in line:
parts = re.search(SYSTEMCTL_TIMERS_RE, line)
datetime_object = None
if parts.group(2):
try:
datetime_object = datetime.strptime(parts.group(2), '%a %Y-%m-%d %H:%M:%S %Z')
except ValueError as e:
return None, e
time_left = 'n/a'
if parts.group(4):
time_left = parse_systemctl_time_delta(parts.group(4))
if isinstance(time_left, humanfriendly.InvalidTimespan):
return None, humanfriendly.InvalidTimespan
time_passed = 'n/a'
if parts.group(9):
time_passed = parse_systemctl_time_delta(parts.group(9))
if isinstance(time_passed, humanfriendly.InvalidTimespan):
return None, humanfriendly.InvalidTimespan
timer_info = {
'next': datetime_object,
'left': time_left,
'last': parts.group(7),
'passed': time_passed,
'unit': parts.group(10),
'activates': parts.group(12)
}
return timer_info, None
return None, ValueError('Timer not found')
except subprocess.CalledProcessError as e:
return None, e
2024-02-28 09:54:57 -07:00
def check_timer(timer_name):
2024-02-28 21:07:48 -07:00
if not timer_name.endswith('.timer'):
timer_name = timer_name + '.timer'
2024-02-28 09:54:57 -07:00
try:
system_bus = dbus.SystemBus()
systemd1 = system_bus.get_object('org.freedesktop.systemd1', '/org/freedesktop/systemd1')
manager = dbus.Interface(systemd1, 'org.freedesktop.systemd1.Manager')
timer_unit_path = manager.GetUnit(timer_name)
timer_unit = system_bus.get_object('org.freedesktop.systemd1', timer_unit_path)
timer_properties = dbus.Interface(timer_unit, 'org.freedesktop.DBus.Properties')
active_state = timer_properties.Get('org.freedesktop.systemd1.Unit', 'ActiveState')
if active_state == 'active':
next_elapse, err = get_next_elapse(timer_name)
if err:
quit_check(f'{err}', nagios.STATE_UNKNOWN)
if (next_elapse['left'] != 'n/a' and next_elapse['passed'] != 'n/a') and (next_elapse['left'] < 0 or next_elapse['passed'] < 0):
2024-03-07 14:11:08 -07:00
quit_check(f'Timer is negative??? Left: {next_elapse["left"]}. Passed: {next_elapse["passed"]}', nagios.STATE_UNKNOWN)
if next_elapse['next']:
local_tz = tz.tzlocal()
next_elapse_str = next_elapse['next'].replace(tzinfo=local_tz).strftime('%a %Y-%m-%d %H:%M %Z')
else:
next_elapse_str = 'n/a'
if next_elapse['left'] != 'n/a':
remaining_time_human = humanfriendly.format_timespan(next_elapse['left'])
else:
remaining_time_human = 'n/a'
if next_elapse['passed'] != 'n/a':
passed_time_human = humanfriendly.format_timespan(next_elapse['passed'])
else:
passed_time_human = 'n/a'
2024-02-28 09:54:57 -07:00
perfdata_dict = {
'remaining_time': {
'value': int(next_elapse['left']) if next_elapse['left'] != 'n/a' else -1,
'unit': 's',
'min': 0
},
'passed_time': {
'value': int(next_elapse['passed']) if next_elapse['passed'] != 'n/a' else -1,
2024-02-28 09:54:57 -07:00
'unit': 's',
'min': 0
}
}
quit_check(f'{timer_name} is active. Trigger time: {next_elapse_str}. Remaining time: {remaining_time_human}. Time since last trigger: {passed_time_human}.', nagios.STATE_OK, perfdata_dict)
2024-02-28 09:54:57 -07:00
else:
quit_check(f'{timer_name} is not active.', nagios.STATE_CRIT)
except dbus.exceptions.DBusException:
2024-02-28 09:54:57 -07:00
quit_check(f'{timer_name} could not be found.', nagios.STATE_UNKNOWN)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
2024-02-28 10:04:13 -07:00
parser.add_argument('-t', '--timer', required=True, help='The name of the timer to check.')
2024-02-28 21:07:48 -07:00
parser.add_argument('-l', '--last-ran-delta', help='The associated service should have been triggered at least this many seconds ago.')
2024-02-28 09:54:57 -07:00
args = parser.parse_args()
try:
check_timer(args.timer)
except Exception as e:
print(f'UNKNOWN - exception "{e}"')
traceback.print_exc()
sys.exit(nagios.STATE_UNKNOWN)