2024-02-28 09:54:57 -07:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
import argparse
|
2024-03-07 15:18:46 -07:00
|
|
|
import re
|
|
|
|
import subprocess
|
2024-02-28 09:54:57 -07:00
|
|
|
import sys
|
|
|
|
import traceback
|
2024-10-10 20:34:47 -06:00
|
|
|
from datetime import datetime, timedelta
|
|
|
|
from typing import Tuple, Union
|
2024-02-28 09:54:57 -07:00
|
|
|
|
2024-03-07 15:18:46 -07:00
|
|
|
import humanfriendly
|
2024-10-10 20:34:47 -06:00
|
|
|
from dateparser import parse
|
2024-03-07 14:09:42 -07:00
|
|
|
from dateutil import tz
|
2024-10-10 19:47:22 -06:00
|
|
|
from pydantic import BaseModel
|
2024-02-28 09:54:57 -07:00
|
|
|
|
|
|
|
from checker import nagios
|
|
|
|
from checker.result import quit_check
|
|
|
|
|
|
|
|
sys.path.insert(0, "/usr/lib/python3/dist-packages")
|
|
|
|
import dbus
|
|
|
|
|
2024-10-10 20:55:42 -06:00
|
|
|
SYSTEMCTL_STATUS_RE = r'Loaded:\s.*\.timer;\s(.*?);.*?\)\s*Active:\s(.*?) since.*?\s*Trigger:\s(.*?;|n\/a)'
|
2024-03-07 14:09:42 -07:00
|
|
|
|
|
|
|
|
2024-10-10 19:47:22 -06:00
|
|
|
class TimerInfo(BaseModel):
|
2024-10-10 20:55:42 -06:00
|
|
|
next: Union[datetime, None]
|
|
|
|
time_left: Union[timedelta, None]
|
2024-10-10 20:34:47 -06:00
|
|
|
last: Union[datetime, None]
|
|
|
|
since_last: Union[timedelta, None]
|
|
|
|
unit: str
|
|
|
|
enabled: bool
|
|
|
|
active: bool
|
|
|
|
status: Union[str, None]
|
|
|
|
|
|
|
|
|
|
|
|
def get_last_trigger(timer_name: str):
|
|
|
|
output = subprocess.check_output("systemctl list-timers --all", shell=True).decode('utf-8')
|
|
|
|
lines = output.strip().split("\n")
|
|
|
|
for line in lines[1:]:
|
|
|
|
fields = line.split()
|
|
|
|
if timer_name in fields:
|
|
|
|
if len(fields) < 14:
|
|
|
|
# Timer has not been run yet.
|
|
|
|
return None
|
|
|
|
try:
|
|
|
|
return parse(fields[7] + ' ' + fields[8] + ' ' + fields[9] + ' ' + fields[10])
|
|
|
|
except IndexError:
|
|
|
|
print(fields)
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
|
|
def get_next_elapse(timer_name: str) -> Tuple[TimerInfo | None, None | str]:
|
2024-10-10 20:55:42 -06:00
|
|
|
now = datetime.now()
|
2024-10-10 19:47:22 -06:00
|
|
|
try:
|
|
|
|
output = subprocess.check_output(["systemctl", "status", timer_name], universal_newlines=True)
|
2024-10-10 20:34:47 -06:00
|
|
|
if timer_name in output.split('\n')[0]:
|
|
|
|
try:
|
|
|
|
parts = re.search(SYSTEMCTL_STATUS_RE, output)
|
|
|
|
|
2024-10-10 20:55:42 -06:00
|
|
|
next_trigger_str = parts.group(3)
|
|
|
|
if next_trigger_str.lower() != 'n/a':
|
|
|
|
next_trigger = parse(next_trigger_str)
|
|
|
|
time_left = next_trigger - now.replace(tzinfo=next_trigger.tzinfo)
|
|
|
|
else:
|
|
|
|
next_trigger = None
|
|
|
|
time_left = None
|
|
|
|
|
2024-10-10 20:34:47 -06:00
|
|
|
last_trigger = get_last_trigger(timer_name)
|
|
|
|
if last_trigger is not None:
|
2024-10-10 20:55:42 -06:00
|
|
|
since_last = now.replace(tzinfo=last_trigger.tzinfo) - last_trigger
|
2024-10-10 20:34:47 -06:00
|
|
|
else:
|
|
|
|
since_last = None
|
|
|
|
|
|
|
|
return TimerInfo(
|
|
|
|
next=next_trigger,
|
|
|
|
time_left=time_left,
|
|
|
|
last=last_trigger,
|
|
|
|
since_last=since_last,
|
|
|
|
enabled=parts.group(1).lower() == 'enabled',
|
|
|
|
active=parts.group(2).split(' ')[0].lower() == 'active',
|
|
|
|
status=parts.group(2).split(' ')[-1].lower().strip('(').strip(')') if '(' in parts.group(2).split(' ')[-1] else None,
|
|
|
|
unit=timer_name,
|
|
|
|
), None
|
|
|
|
except Exception:
|
|
|
|
print(output)
|
|
|
|
traceback.print_exc()
|
|
|
|
sys.exit(nagios.STATE_UNKNOWN)
|
2024-10-10 19:47:22 -06:00
|
|
|
return None, 'Timer not found'
|
2024-03-07 14:09:42 -07:00
|
|
|
except subprocess.CalledProcessError as e:
|
2024-10-10 20:34:47 -06:00
|
|
|
return None, f'systemctl status failed: {e}'
|
2024-03-07 14:09:42 -07:00
|
|
|
|
2024-02-28 09:54:57 -07:00
|
|
|
|
2024-10-10 19:47:22 -06:00
|
|
|
def check_timer(timer_name: str, expected_interval: int = None):
|
2024-02-28 21:07:48 -07:00
|
|
|
if not timer_name.endswith('.timer'):
|
|
|
|
timer_name = timer_name + '.timer'
|
|
|
|
|
2024-02-28 09:54:57 -07:00
|
|
|
try:
|
|
|
|
system_bus = dbus.SystemBus()
|
|
|
|
systemd1 = system_bus.get_object('org.freedesktop.systemd1', '/org/freedesktop/systemd1')
|
|
|
|
manager = dbus.Interface(systemd1, 'org.freedesktop.systemd1.Manager')
|
|
|
|
timer_unit_path = manager.GetUnit(timer_name)
|
|
|
|
timer_unit = system_bus.get_object('org.freedesktop.systemd1', timer_unit_path)
|
|
|
|
timer_properties = dbus.Interface(timer_unit, 'org.freedesktop.DBus.Properties')
|
|
|
|
active_state = timer_properties.Get('org.freedesktop.systemd1.Unit', 'ActiveState')
|
2024-10-10 19:47:22 -06:00
|
|
|
|
2024-02-28 09:54:57 -07:00
|
|
|
if active_state == 'active':
|
2024-03-07 14:09:42 -07:00
|
|
|
next_elapse, err = get_next_elapse(timer_name)
|
|
|
|
if err:
|
|
|
|
quit_check(f'{err}', nagios.STATE_UNKNOWN)
|
|
|
|
|
2024-10-10 19:47:22 -06:00
|
|
|
# if (next_elapse.left is not None and next_elapse.passed is not None) and (next_elapse.left < 0 or next_elapse.passed < 0):
|
|
|
|
# quit_check(f'Timer is negative??? Left: {next_elapse["left"]}. Passed: {next_elapse["passed"]}', nagios.STATE_UNKNOWN)
|
2024-03-07 14:09:42 -07:00
|
|
|
|
2024-10-10 20:55:42 -06:00
|
|
|
next_elapse_human = next_elapse.next.replace(tzinfo=tz.tzlocal()).strftime('%a %Y-%m-%d %H:%M %Z') if next_elapse.next else 'N/A'
|
|
|
|
remaining_time_human = humanfriendly.format_timespan(next_elapse.time_left) if next_elapse.time_left else 'N/A'
|
2024-10-10 20:34:47 -06:00
|
|
|
since_last_human = humanfriendly.format_timespan(next_elapse.since_last) if next_elapse.since_last else 'N/A'
|
2024-03-07 15:10:27 -07:00
|
|
|
|
2024-02-28 09:54:57 -07:00
|
|
|
perfdata_dict = {
|
|
|
|
'remaining_time': {
|
2024-10-10 20:55:42 -06:00
|
|
|
'value': next_elapse.time_left.seconds if next_elapse.time_left else 0,
|
2024-03-07 14:09:42 -07:00
|
|
|
'unit': 's',
|
|
|
|
'min': 0
|
|
|
|
},
|
2024-10-10 19:47:22 -06:00
|
|
|
'time_since_last': {
|
2024-10-10 20:55:42 -06:00
|
|
|
'value': next_elapse.since_last.seconds if next_elapse.since_last else 0,
|
2024-02-28 09:54:57 -07:00
|
|
|
'unit': 's',
|
|
|
|
'min': 0
|
|
|
|
}
|
|
|
|
}
|
2024-10-10 19:47:22 -06:00
|
|
|
|
2024-10-10 20:34:47 -06:00
|
|
|
timer_info = f'Next trigger time: {next_elapse_human}. Time until next trigger: {remaining_time_human}. Time since last trigger: {since_last_human}.'
|
2024-10-10 19:47:22 -06:00
|
|
|
|
|
|
|
if expected_interval is not None and next_elapse.next and next_elapse.last:
|
|
|
|
next_trigger_time = next_elapse.next
|
2024-10-10 20:34:47 -06:00
|
|
|
actual_interval = next_trigger_time - next_elapse.last
|
2024-10-10 19:47:22 -06:00
|
|
|
actual_interval_seconds = actual_interval.total_seconds()
|
|
|
|
|
|
|
|
if actual_interval_seconds > expected_interval:
|
|
|
|
quit_check(f'{timer_name} is active but the last trigger was more than the expected interval ago. {timer_info}', nagios.STATE_CRIT, perfdata_dict)
|
|
|
|
|
2024-10-10 20:34:47 -06:00
|
|
|
quit_check(f'{timer_name}{" is active" + (" (" + next_elapse.status + ")" if next_elapse else "") + "." if next_elapse.active else " -> "} {timer_info}', nagios.STATE_OK, perfdata_dict)
|
2024-02-28 09:54:57 -07:00
|
|
|
else:
|
2024-10-10 19:47:22 -06:00
|
|
|
quit_check(f'{timer_name} is not enabled', nagios.STATE_CRIT)
|
2024-02-28 11:55:46 -07:00
|
|
|
except dbus.exceptions.DBusException:
|
2024-10-10 19:47:22 -06:00
|
|
|
quit_check(f'{timer_name} does not exist or is disabled', nagios.STATE_CRIT)
|
2024-02-28 09:54:57 -07:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
parser = argparse.ArgumentParser()
|
2024-02-28 10:04:13 -07:00
|
|
|
parser.add_argument('-t', '--timer', required=True, help='The name of the timer to check.')
|
2024-10-10 19:47:22 -06:00
|
|
|
parser.add_argument('-i', '--interval', type=int, help='The expected interval between timer triggers in seconds.')
|
2024-02-28 09:54:57 -07:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
try:
|
2024-10-10 19:47:22 -06:00
|
|
|
check_timer(args.timer, args.interval)
|
2024-02-28 09:54:57 -07:00
|
|
|
except Exception as e:
|
|
|
|
print(f'UNKNOWN - exception "{e}"')
|
|
|
|
traceback.print_exc()
|
|
|
|
sys.exit(nagios.STATE_UNKNOWN)
|