icinga2-checks/check_systemd_timer.py

116 lines
4.6 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import sys
import traceback
from dateutil import tz
from checker import nagios
from checker.humanfriendly import parse_systemctl_time_delta
from checker.result import quit_check
sys.path.insert(0, "/usr/lib/python3/dist-packages")
import dbus
import re
import subprocess
from datetime import datetime
import humanfriendly
SYSTEMCTL_TIMERS_RE = re.compile(
r'^([A-Za-z]*\s[0-9]{4}-[0-9]{2}-[0-9]{2}\s*[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]*)\s*(([0-9]*[a-z]*\s)*left)\s*([A-Za-z]*\s[0-9]{4}-[0-9]{2}-[0-9]{2}\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]*)\s*([0-9A-Za-z\s]*\sago)\s*([A-Za-z\-_]*.timer)\s*([A-Za-z\-_]*.service)')
def get_next_elapse(timer_name):
try:
output = subprocess.check_output(["systemctl", "list-timers", "--all"], universal_newlines=True)
lines = output.split('\n')
for line in lines:
if timer_name in line:
parts = re.search(SYSTEMCTL_TIMERS_RE, line)
try:
datetime_object = datetime.strptime(parts.group(1), '%a %Y-%m-%d %H:%M:%S %Z')
except ValueError as e:
return None, e
time_left = parse_systemctl_time_delta(parts.group(2))
if isinstance(time_left, humanfriendly.InvalidTimespan):
return None, humanfriendly.InvalidTimespan
time_passed = parse_systemctl_time_delta(parts.group(5))
if isinstance(time_passed, humanfriendly.InvalidTimespan):
return None, humanfriendly.InvalidTimespan
timer_info = {
'next': datetime_object,
'left': time_left,
'last': parts.group(4),
'passed': time_passed,
'unit': parts.group(6),
'activates': parts.group(7)
}
return timer_info, None
return None, ValueError('Timer not found')
except subprocess.CalledProcessError as e:
return None, e
def check_timer(timer_name):
if not timer_name.endswith('.timer'):
timer_name = timer_name + '.timer'
try:
system_bus = dbus.SystemBus()
systemd1 = system_bus.get_object('org.freedesktop.systemd1', '/org/freedesktop/systemd1')
manager = dbus.Interface(systemd1, 'org.freedesktop.systemd1.Manager')
timer_unit_path = manager.GetUnit(timer_name)
timer_unit = system_bus.get_object('org.freedesktop.systemd1', timer_unit_path)
timer_properties = dbus.Interface(timer_unit, 'org.freedesktop.DBus.Properties')
active_state = timer_properties.Get('org.freedesktop.systemd1.Unit', 'ActiveState')
if active_state == 'active':
next_elapse, err = get_next_elapse(timer_name)
if err:
quit_check(f'{err}', nagios.STATE_UNKNOWN)
if next_elapse['left'] < 0 or next_elapse['passed'] < 0:
quit_check(f'Timer is negative??? Left: {next_elapse["left"]}. Passed: {next_elapse["passed"]}', nagios.STATE_UNKNOWN)
local_tz = tz.tzlocal()
next_elapse_str = next_elapse['next'].replace(tzinfo=local_tz).strftime('%a %Y-%m-%d %H:%M %Z')
remaining_time_human = humanfriendly.format_timespan(next_elapse['left'])
passed_time_human = humanfriendly.format_timespan(next_elapse['passed'])
perfdata_dict = {
'remaining_time': {
'value': int(next_elapse['left']),
'unit': 's',
'min': 0
},
'passed_time': {
'value': int(next_elapse['passed']),
'unit': 's',
'min': 0
}
}
quit_check(f'{timer_name} is active. Trigger time: {next_elapse_str}. Remaining time: {remaining_time_human}. Time since last trigger: {passed_time_human}.', nagios.STATE_OK, perfdata_dict)
else:
quit_check(f'{timer_name} is not active.', nagios.STATE_CRIT)
except dbus.exceptions.DBusException:
quit_check(f'{timer_name} could not be found.', nagios.STATE_UNKNOWN)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-t', '--timer', required=True, help='The name of the timer to check.')
parser.add_argument('-l', '--last-ran-delta', help='The associated service should have been triggered at least this many seconds ago.')
args = parser.parse_args()
try:
check_timer(args.timer)
except Exception as e:
print(f'UNKNOWN - exception "{e}"')
traceback.print_exc()
sys.exit(nagios.STATE_UNKNOWN)