From afacf15c50b8d6f39a9437d88233cdba50a287ee Mon Sep 17 00:00:00 2001 From: Cyberes Date: Thu, 10 Oct 2024 20:34:47 -0600 Subject: [PATCH] redo check_systemd_timer again --- check_systemd_timer.py | 148 ++++++++++++++++++----------------------- 1 file changed, 64 insertions(+), 84 deletions(-) diff --git a/check_systemd_timer.py b/check_systemd_timer.py index 590e749..a8b590a 100755 --- a/check_systemd_timer.py +++ b/check_systemd_timer.py @@ -4,93 +4,85 @@ import re import subprocess import sys import traceback -from datetime import datetime -from typing import Optional, Tuple, Union +from datetime import datetime, timedelta +from typing import Tuple, Union import humanfriendly +from dateparser import parse from dateutil import tz from pydantic import BaseModel from checker import nagios -from checker.humanfriendly import parse_systemctl_time_delta from checker.result import quit_check sys.path.insert(0, "/usr/lib/python3/dist-packages") import dbus -SYSTEMCTL_TIMERS_RE = re.compile( - r'^(([A-Za-z]*\s[0-9]{4}-[0-9]{2}-[0-9]{2}\s*[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]*)|n\/a|-)\s*((([0-9]*[a-z]*\s)*(?:left)?)|n\/a|-)\s*(([A-Za-z]*\s[0-9]{4}-[0-9]{2}-[0-9]{2}\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]*)|n\/a|-)\s*(([0-9A-Za-z\s]*\sago)|n\/a|-)\s*(.*?\.timer)\s*((.*?\.service)|\s*)' -) +SYSTEMCTL_STATUS_RE = r'Loaded:\s.*\.timer;\s(.*?);.*?\)\s*Active:\s(.*?) since.*?\s*Trigger:\s(.*?);' class TimerInfo(BaseModel): - next: Optional[datetime] - left: Optional[Union[float, None]] - last: Optional[str] - passed: Optional[Union[float, None]] - unit: Optional[str] - activates: Optional[str] + next: datetime + time_left: timedelta + last: Union[datetime, None] + since_last: Union[timedelta, None] + unit: str + enabled: bool + active: bool + status: Union[str, None] -def is_timer_running(timer_name): +def get_last_trigger(timer_name: str): + output = subprocess.check_output("systemctl list-timers --all", shell=True).decode('utf-8') + lines = output.strip().split("\n") + for line in lines[1:]: + fields = line.split() + if timer_name in fields: + if len(fields) < 14: + # Timer has not been run yet. + return None + try: + return parse(fields[7] + ' ' + fields[8] + ' ' + fields[9] + ' ' + fields[10]) + except IndexError: + print(fields) + raise + + +def get_next_elapse(timer_name: str) -> Tuple[TimerInfo | None, None | str]: try: output = subprocess.check_output(["systemctl", "status", timer_name], universal_newlines=True) - if "Active: active (running)" in output: - return True - return False - except subprocess.CalledProcessError as e: - return False + if timer_name in output.split('\n')[0]: + try: + parts = re.search(SYSTEMCTL_STATUS_RE, output) + next_trigger = parse(parts.group(3)) + now = datetime.now(tz=next_trigger.tzinfo) + time_left = next_trigger - now + last_trigger = get_last_trigger(timer_name) + if last_trigger is not None: + since_last = last_trigger - now + else: + since_last = None -def get_next_elapse(timer_name) -> Tuple[TimerInfo | None, None | str]: - try: - output = subprocess.check_output(["systemctl", "list-timers", "--all"], universal_newlines=True) - lines = output.split('\n') - for line in lines: - if timer_name in line: - try: - parts = re.search(SYSTEMCTL_TIMERS_RE, line) + print(since_last) - datetime_object = None - if parts.group(2): - datetime_object = datetime.strptime(parts.group(2), '%a %Y-%m-%d %H:%M:%S %Z') - - time_left: float | None = None - if parts.group(4): - try: - time_left = parse_systemctl_time_delta(parts.group(4)) - except Exception as tl_err: - if isinstance(tl_err, humanfriendly.InvalidTimespan): - return None, f'Invalid Timespan: "{parts.group(4)}"' - else: - raise - - time_passed: float | None = None - if parts.group(9): - try: - time_passed = parse_systemctl_time_delta(parts.group(9)) - except Exception as tp_err: - if isinstance(tp_err, humanfriendly.InvalidTimespan): - return None, f'Invalid Timespan: "{parts.group(9)}"' - else: - raise - - timer_info = TimerInfo( - next=datetime_object, - left=time_left, - last=parts.group(7), - passed=time_passed, - unit=parts.group(10), - activates=parts.group(12) - ) - return timer_info, None - except Exception: - print(output) - traceback.print_exc() - sys.exit(nagios.STATE_UNKNOWN) + return TimerInfo( + next=next_trigger, + time_left=time_left, + last=last_trigger, + since_last=since_last, + enabled=parts.group(1).lower() == 'enabled', + active=parts.group(2).split(' ')[0].lower() == 'active', + status=parts.group(2).split(' ')[-1].lower().strip('(').strip(')') if '(' in parts.group(2).split(' ')[-1] else None, + unit=timer_name, + ), None + except Exception: + print(output) + traceback.print_exc() + sys.exit(nagios.STATE_UNKNOWN) return None, 'Timer not found' except subprocess.CalledProcessError as e: - return None, f'systemctl list-timers failed: {e}' + return None, f'systemctl status failed: {e}' def check_timer(timer_name: str, expected_interval: int = None): @@ -105,7 +97,6 @@ def check_timer(timer_name: str, expected_interval: int = None): timer_unit = system_bus.get_object('org.freedesktop.systemd1', timer_unit_path) timer_properties = dbus.Interface(timer_unit, 'org.freedesktop.DBus.Properties') active_state = timer_properties.Get('org.freedesktop.systemd1.Unit', 'ActiveState') - running_state = is_timer_running(timer_name) if active_state == 'active': next_elapse, err = get_next_elapse(timer_name) @@ -115,45 +106,34 @@ def check_timer(timer_name: str, expected_interval: int = None): # if (next_elapse.left is not None and next_elapse.passed is not None) and (next_elapse.left < 0 or next_elapse.passed < 0): # quit_check(f'Timer is negative??? Left: {next_elapse["left"]}. Passed: {next_elapse["passed"]}', nagios.STATE_UNKNOWN) - if next_elapse.next: - next_elapse_human = next_elapse.next.replace(tzinfo=tz.tzlocal()).strftime('%a %Y-%m-%d %H:%M %Z') - else: - next_elapse_human = 'N/A' - - if next_elapse.left is not None: - remaining_time_human = humanfriendly.format_timespan(next_elapse.left) - else: - remaining_time_human = 'N/A' - if next_elapse.passed is not None: - passed_time_human = humanfriendly.format_timespan(next_elapse.passed) - else: - passed_time_human = 'N/A' + next_elapse_human = next_elapse.next.replace(tzinfo=tz.tzlocal()).strftime('%a %Y-%m-%d %H:%M %Z') + remaining_time_human = humanfriendly.format_timespan(next_elapse.time_left) + since_last_human = humanfriendly.format_timespan(next_elapse.since_last) if next_elapse.since_last else 'N/A' perfdata_dict = { 'remaining_time': { - 'value': int(next_elapse.left) if next_elapse.left is not None else 0, + 'value': next_elapse.time_left.seconds, 'unit': 's', 'min': 0 }, 'time_since_last': { - 'value': int(next_elapse.passed) if next_elapse.passed is not None else 0, + 'value': next_elapse.since_last.seconds if next_elapse.since_last is not None else 0, 'unit': 's', 'min': 0 } } - timer_info = f'Next trigger time: {next_elapse_human}. Time until next trigger: {remaining_time_human}. Time since last trigger: {passed_time_human}.' + timer_info = f'Next trigger time: {next_elapse_human}. Time until next trigger: {remaining_time_human}. Time since last trigger: {since_last_human}.' if expected_interval is not None and next_elapse.next and next_elapse.last: next_trigger_time = next_elapse.next - last_trigger_time = datetime.strptime(next_elapse.last, '%a %Y-%m-%d %H:%M:%S %Z') - actual_interval = next_trigger_time - last_trigger_time + actual_interval = next_trigger_time - next_elapse.last actual_interval_seconds = actual_interval.total_seconds() if actual_interval_seconds > expected_interval: quit_check(f'{timer_name} is active but the last trigger was more than the expected interval ago. {timer_info}', nagios.STATE_CRIT, perfdata_dict) - quit_check(f'{timer_name} is {"active" if not running_state else "active (running)"}. {timer_info}', nagios.STATE_OK, perfdata_dict) + quit_check(f'{timer_name}{" is active" + (" (" + next_elapse.status + ")" if next_elapse else "") + "." if next_elapse.active else " -> "} {timer_info}', nagios.STATE_OK, perfdata_dict) else: quit_check(f'{timer_name} is not enabled', nagios.STATE_CRIT) except dbus.exceptions.DBusException: