redo check_systemd_timer again

This commit is contained in:
Cyberes 2024-10-10 20:34:47 -06:00
parent c2d36b06db
commit afacf15c50
1 changed files with 64 additions and 84 deletions

View File

@ -4,93 +4,85 @@ import re
import subprocess import subprocess
import sys import sys
import traceback import traceback
from datetime import datetime from datetime import datetime, timedelta
from typing import Optional, Tuple, Union from typing import Tuple, Union
import humanfriendly import humanfriendly
from dateparser import parse
from dateutil import tz from dateutil import tz
from pydantic import BaseModel from pydantic import BaseModel
from checker import nagios from checker import nagios
from checker.humanfriendly import parse_systemctl_time_delta
from checker.result import quit_check from checker.result import quit_check
sys.path.insert(0, "/usr/lib/python3/dist-packages") sys.path.insert(0, "/usr/lib/python3/dist-packages")
import dbus import dbus
SYSTEMCTL_TIMERS_RE = re.compile( SYSTEMCTL_STATUS_RE = r'Loaded:\s.*\.timer;\s(.*?);.*?\)\s*Active:\s(.*?) since.*?\s*Trigger:\s(.*?);'
r'^(([A-Za-z]*\s[0-9]{4}-[0-9]{2}-[0-9]{2}\s*[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]*)|n\/a|-)\s*((([0-9]*[a-z]*\s)*(?:left)?)|n\/a|-)\s*(([A-Za-z]*\s[0-9]{4}-[0-9]{2}-[0-9]{2}\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]*)|n\/a|-)\s*(([0-9A-Za-z\s]*\sago)|n\/a|-)\s*(.*?\.timer)\s*((.*?\.service)|\s*)'
)
class TimerInfo(BaseModel): class TimerInfo(BaseModel):
next: Optional[datetime] next: datetime
left: Optional[Union[float, None]] time_left: timedelta
last: Optional[str] last: Union[datetime, None]
passed: Optional[Union[float, None]] since_last: Union[timedelta, None]
unit: Optional[str] unit: str
activates: Optional[str] enabled: bool
active: bool
status: Union[str, None]
def is_timer_running(timer_name): def get_last_trigger(timer_name: str):
output = subprocess.check_output("systemctl list-timers --all", shell=True).decode('utf-8')
lines = output.strip().split("\n")
for line in lines[1:]:
fields = line.split()
if timer_name in fields:
if len(fields) < 14:
# Timer has not been run yet.
return None
try:
return parse(fields[7] + ' ' + fields[8] + ' ' + fields[9] + ' ' + fields[10])
except IndexError:
print(fields)
raise
def get_next_elapse(timer_name: str) -> Tuple[TimerInfo | None, None | str]:
try: try:
output = subprocess.check_output(["systemctl", "status", timer_name], universal_newlines=True) output = subprocess.check_output(["systemctl", "status", timer_name], universal_newlines=True)
if "Active: active (running)" in output: if timer_name in output.split('\n')[0]:
return True try:
return False parts = re.search(SYSTEMCTL_STATUS_RE, output)
except subprocess.CalledProcessError as e:
return False
next_trigger = parse(parts.group(3))
now = datetime.now(tz=next_trigger.tzinfo)
time_left = next_trigger - now
last_trigger = get_last_trigger(timer_name)
if last_trigger is not None:
since_last = last_trigger - now
else:
since_last = None
def get_next_elapse(timer_name) -> Tuple[TimerInfo | None, None | str]: print(since_last)
try:
output = subprocess.check_output(["systemctl", "list-timers", "--all"], universal_newlines=True)
lines = output.split('\n')
for line in lines:
if timer_name in line:
try:
parts = re.search(SYSTEMCTL_TIMERS_RE, line)
datetime_object = None return TimerInfo(
if parts.group(2): next=next_trigger,
datetime_object = datetime.strptime(parts.group(2), '%a %Y-%m-%d %H:%M:%S %Z') time_left=time_left,
last=last_trigger,
time_left: float | None = None since_last=since_last,
if parts.group(4): enabled=parts.group(1).lower() == 'enabled',
try: active=parts.group(2).split(' ')[0].lower() == 'active',
time_left = parse_systemctl_time_delta(parts.group(4)) status=parts.group(2).split(' ')[-1].lower().strip('(').strip(')') if '(' in parts.group(2).split(' ')[-1] else None,
except Exception as tl_err: unit=timer_name,
if isinstance(tl_err, humanfriendly.InvalidTimespan): ), None
return None, f'Invalid Timespan: "{parts.group(4)}"' except Exception:
else: print(output)
raise traceback.print_exc()
sys.exit(nagios.STATE_UNKNOWN)
time_passed: float | None = None
if parts.group(9):
try:
time_passed = parse_systemctl_time_delta(parts.group(9))
except Exception as tp_err:
if isinstance(tp_err, humanfriendly.InvalidTimespan):
return None, f'Invalid Timespan: "{parts.group(9)}"'
else:
raise
timer_info = TimerInfo(
next=datetime_object,
left=time_left,
last=parts.group(7),
passed=time_passed,
unit=parts.group(10),
activates=parts.group(12)
)
return timer_info, None
except Exception:
print(output)
traceback.print_exc()
sys.exit(nagios.STATE_UNKNOWN)
return None, 'Timer not found' return None, 'Timer not found'
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
return None, f'systemctl list-timers failed: {e}' return None, f'systemctl status failed: {e}'
def check_timer(timer_name: str, expected_interval: int = None): def check_timer(timer_name: str, expected_interval: int = None):
@ -105,7 +97,6 @@ def check_timer(timer_name: str, expected_interval: int = None):
timer_unit = system_bus.get_object('org.freedesktop.systemd1', timer_unit_path) timer_unit = system_bus.get_object('org.freedesktop.systemd1', timer_unit_path)
timer_properties = dbus.Interface(timer_unit, 'org.freedesktop.DBus.Properties') timer_properties = dbus.Interface(timer_unit, 'org.freedesktop.DBus.Properties')
active_state = timer_properties.Get('org.freedesktop.systemd1.Unit', 'ActiveState') active_state = timer_properties.Get('org.freedesktop.systemd1.Unit', 'ActiveState')
running_state = is_timer_running(timer_name)
if active_state == 'active': if active_state == 'active':
next_elapse, err = get_next_elapse(timer_name) next_elapse, err = get_next_elapse(timer_name)
@ -115,45 +106,34 @@ def check_timer(timer_name: str, expected_interval: int = None):
# if (next_elapse.left is not None and next_elapse.passed is not None) and (next_elapse.left < 0 or next_elapse.passed < 0): # if (next_elapse.left is not None and next_elapse.passed is not None) and (next_elapse.left < 0 or next_elapse.passed < 0):
# quit_check(f'Timer is negative??? Left: {next_elapse["left"]}. Passed: {next_elapse["passed"]}', nagios.STATE_UNKNOWN) # quit_check(f'Timer is negative??? Left: {next_elapse["left"]}. Passed: {next_elapse["passed"]}', nagios.STATE_UNKNOWN)
if next_elapse.next: next_elapse_human = next_elapse.next.replace(tzinfo=tz.tzlocal()).strftime('%a %Y-%m-%d %H:%M %Z')
next_elapse_human = next_elapse.next.replace(tzinfo=tz.tzlocal()).strftime('%a %Y-%m-%d %H:%M %Z') remaining_time_human = humanfriendly.format_timespan(next_elapse.time_left)
else: since_last_human = humanfriendly.format_timespan(next_elapse.since_last) if next_elapse.since_last else 'N/A'
next_elapse_human = 'N/A'
if next_elapse.left is not None:
remaining_time_human = humanfriendly.format_timespan(next_elapse.left)
else:
remaining_time_human = 'N/A'
if next_elapse.passed is not None:
passed_time_human = humanfriendly.format_timespan(next_elapse.passed)
else:
passed_time_human = 'N/A'
perfdata_dict = { perfdata_dict = {
'remaining_time': { 'remaining_time': {
'value': int(next_elapse.left) if next_elapse.left is not None else 0, 'value': next_elapse.time_left.seconds,
'unit': 's', 'unit': 's',
'min': 0 'min': 0
}, },
'time_since_last': { 'time_since_last': {
'value': int(next_elapse.passed) if next_elapse.passed is not None else 0, 'value': next_elapse.since_last.seconds if next_elapse.since_last is not None else 0,
'unit': 's', 'unit': 's',
'min': 0 'min': 0
} }
} }
timer_info = f'Next trigger time: {next_elapse_human}. Time until next trigger: {remaining_time_human}. Time since last trigger: {passed_time_human}.' timer_info = f'Next trigger time: {next_elapse_human}. Time until next trigger: {remaining_time_human}. Time since last trigger: {since_last_human}.'
if expected_interval is not None and next_elapse.next and next_elapse.last: if expected_interval is not None and next_elapse.next and next_elapse.last:
next_trigger_time = next_elapse.next next_trigger_time = next_elapse.next
last_trigger_time = datetime.strptime(next_elapse.last, '%a %Y-%m-%d %H:%M:%S %Z') actual_interval = next_trigger_time - next_elapse.last
actual_interval = next_trigger_time - last_trigger_time
actual_interval_seconds = actual_interval.total_seconds() actual_interval_seconds = actual_interval.total_seconds()
if actual_interval_seconds > expected_interval: if actual_interval_seconds > expected_interval:
quit_check(f'{timer_name} is active but the last trigger was more than the expected interval ago. {timer_info}', nagios.STATE_CRIT, perfdata_dict) quit_check(f'{timer_name} is active but the last trigger was more than the expected interval ago. {timer_info}', nagios.STATE_CRIT, perfdata_dict)
quit_check(f'{timer_name} is {"active" if not running_state else "active (running)"}. {timer_info}', nagios.STATE_OK, perfdata_dict) quit_check(f'{timer_name}{" is active" + (" (" + next_elapse.status + ")" if next_elapse else "") + "." if next_elapse.active else " -> "} {timer_info}', nagios.STATE_OK, perfdata_dict)
else: else:
quit_check(f'{timer_name} is not enabled', nagios.STATE_CRIT) quit_check(f'{timer_name} is not enabled', nagios.STATE_CRIT)
except dbus.exceptions.DBusException: except dbus.exceptions.DBusException: