145 lines
5.6 KiB
Python
Executable File
145 lines
5.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import argparse
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import traceback
|
|
from datetime import datetime, timedelta
|
|
from typing import Tuple, Union
|
|
|
|
import humanfriendly
|
|
from dateparser import parse
|
|
from dateutil import tz
|
|
from pydantic import BaseModel
|
|
|
|
from checker import nagios
|
|
from checker.result import quit_check
|
|
|
|
SYSTEMCTL_STATUS_RE = r'Loaded:\s.*\.timer;\s(.*?);.*?\)\s*Active:\s(.*?) since.*?\s*Trigger:\s(.*?;|n\/a)'
|
|
|
|
|
|
class TimerInfo(BaseModel):
|
|
next: Union[datetime, None]
|
|
time_left: Union[timedelta, None]
|
|
last: Union[datetime, None]
|
|
since_last: Union[timedelta, None]
|
|
unit: str
|
|
enabled: bool
|
|
active: bool
|
|
status: Union[str, None]
|
|
|
|
|
|
def get_last_trigger(timer_name: str):
|
|
output = subprocess.check_output("systemctl list-timers --all", shell=True).decode('utf-8')
|
|
lines = output.strip().split("\n")
|
|
for line in lines[1:]:
|
|
fields = line.split()
|
|
if timer_name in fields:
|
|
if len(fields) < 14:
|
|
# Timer has not been run yet.
|
|
return None
|
|
try:
|
|
return parse(fields[7] + ' ' + fields[8] + ' ' + fields[9] + ' ' + fields[10])
|
|
except IndexError:
|
|
print(fields)
|
|
raise
|
|
|
|
|
|
def get_next_elapse(timer_name: str) -> Tuple[TimerInfo | None, None | str]:
|
|
now = datetime.now()
|
|
try:
|
|
output = subprocess.check_output(["systemctl", "status", timer_name], universal_newlines=True)
|
|
if timer_name in output.split('\n')[0]:
|
|
try:
|
|
parts = re.search(SYSTEMCTL_STATUS_RE, output)
|
|
|
|
next_trigger_str = parts.group(3)
|
|
if next_trigger_str.lower() != 'n/a':
|
|
next_trigger = parse(next_trigger_str)
|
|
time_left = next_trigger - now.replace(tzinfo=next_trigger.tzinfo)
|
|
else:
|
|
next_trigger = None
|
|
time_left = None
|
|
|
|
last_trigger = get_last_trigger(timer_name)
|
|
if last_trigger is not None:
|
|
since_last = now.replace(tzinfo=last_trigger.tzinfo) - last_trigger
|
|
else:
|
|
since_last = None
|
|
|
|
return TimerInfo(
|
|
next=next_trigger,
|
|
time_left=time_left,
|
|
last=last_trigger,
|
|
since_last=since_last,
|
|
enabled=parts.group(1).lower() == 'enabled',
|
|
active=parts.group(2).split(' ')[0].lower() == 'active',
|
|
status=parts.group(2).split(' ')[-1].lower().strip('(').strip(')') if '(' in parts.group(2).split(' ')[-1] else None,
|
|
unit=timer_name,
|
|
), None
|
|
except Exception:
|
|
print(output)
|
|
traceback.print_exc()
|
|
sys.exit(nagios.STATE_UNKNOWN)
|
|
return None, 'Timer not found'
|
|
except subprocess.CalledProcessError as e:
|
|
return None, f'systemctl status failed: {e}'
|
|
|
|
|
|
def check_timer(timer_name: str, expected_interval: int = None):
|
|
if not timer_name.endswith('.timer'):
|
|
timer_name = timer_name + '.timer'
|
|
|
|
timer_info, timer_error = get_next_elapse(timer_name)
|
|
if timer_error:
|
|
quit_check(str(timer_error), nagios.STATE_UNKNOWN)
|
|
|
|
if not timer_info.enabled:
|
|
quit_check(f'{timer_name} is not enabled', nagios.STATE_CRIT)
|
|
|
|
# if (next_elapse.left is not None and next_elapse.passed is not None) and (next_elapse.left < 0 or next_elapse.passed < 0):
|
|
# quit_check(f'Timer is negative??? Left: {next_elapse["left"]}. Passed: {next_elapse["passed"]}', nagios.STATE_UNKNOWN)
|
|
|
|
next_elapse_human = timer_info.next.replace(tzinfo=tz.tzlocal()).strftime('%a %Y-%m-%d %H:%M %Z') if timer_info.next else 'N/A'
|
|
remaining_time_human = humanfriendly.format_timespan(timer_info.time_left) if timer_info.time_left else 'N/A'
|
|
since_last_human = humanfriendly.format_timespan(timer_info.since_last) if timer_info.since_last else 'N/A'
|
|
|
|
perfdata_dict = {
|
|
'remaining_time': {
|
|
'value': timer_info.time_left.seconds if timer_info.time_left else 0,
|
|
'unit': 's',
|
|
'min': 0
|
|
},
|
|
'time_since_last': {
|
|
'value': timer_info.since_last.seconds if timer_info.since_last else 0,
|
|
'unit': 's',
|
|
'min': 0
|
|
}
|
|
}
|
|
|
|
timer_print_info = f'Next trigger time: {next_elapse_human}. Time until next trigger: {remaining_time_human}. Time since last trigger: {since_last_human}.'
|
|
|
|
if expected_interval is not None and timer_info.next and timer_info.last:
|
|
next_trigger_time = timer_info.next
|
|
actual_interval = next_trigger_time - timer_info.last
|
|
actual_interval_seconds = actual_interval.total_seconds()
|
|
|
|
if actual_interval_seconds > expected_interval:
|
|
quit_check(f'{timer_name} is active but the last trigger was more than the expected interval ago. {timer_print_info}', nagios.STATE_CRIT, perfdata_dict)
|
|
|
|
quit_check(f'{timer_name}{" is active" + (" (" + timer_info.status + ")" if timer_info else "") + "." if timer_info.active else " -> "} {timer_print_info}', nagios.STATE_OK, perfdata_dict)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('-t', '--timer', required=True, help='The name of the timer to check.')
|
|
parser.add_argument('-i', '--interval', type=int, help='The expected interval between timer triggers in seconds.')
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
check_timer(args.timer, args.interval)
|
|
except Exception as e:
|
|
print(f'UNKNOWN - exception "{e}"')
|
|
traceback.print_exc()
|
|
sys.exit(nagios.STATE_UNKNOWN)
|