redo check_systemd_timer again
This commit is contained in:
parent
c2d36b06db
commit
afacf15c50
|
@ -4,93 +4,85 @@ import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
from typing import Optional, Tuple, Union
|
from typing import Tuple, Union
|
||||||
|
|
||||||
import humanfriendly
|
import humanfriendly
|
||||||
|
from dateparser import parse
|
||||||
from dateutil import tz
|
from dateutil import tz
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from checker import nagios
|
from checker import nagios
|
||||||
from checker.humanfriendly import parse_systemctl_time_delta
|
|
||||||
from checker.result import quit_check
|
from checker.result import quit_check
|
||||||
|
|
||||||
sys.path.insert(0, "/usr/lib/python3/dist-packages")
|
sys.path.insert(0, "/usr/lib/python3/dist-packages")
|
||||||
import dbus
|
import dbus
|
||||||
|
|
||||||
SYSTEMCTL_TIMERS_RE = re.compile(
|
SYSTEMCTL_STATUS_RE = r'Loaded:\s.*\.timer;\s(.*?);.*?\)\s*Active:\s(.*?) since.*?\s*Trigger:\s(.*?);'
|
||||||
r'^(([A-Za-z]*\s[0-9]{4}-[0-9]{2}-[0-9]{2}\s*[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]*)|n\/a|-)\s*((([0-9]*[a-z]*\s)*(?:left)?)|n\/a|-)\s*(([A-Za-z]*\s[0-9]{4}-[0-9]{2}-[0-9]{2}\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]*)|n\/a|-)\s*(([0-9A-Za-z\s]*\sago)|n\/a|-)\s*(.*?\.timer)\s*((.*?\.service)|\s*)'
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TimerInfo(BaseModel):
|
class TimerInfo(BaseModel):
|
||||||
next: Optional[datetime]
|
next: datetime
|
||||||
left: Optional[Union[float, None]]
|
time_left: timedelta
|
||||||
last: Optional[str]
|
last: Union[datetime, None]
|
||||||
passed: Optional[Union[float, None]]
|
since_last: Union[timedelta, None]
|
||||||
unit: Optional[str]
|
unit: str
|
||||||
activates: Optional[str]
|
enabled: bool
|
||||||
|
active: bool
|
||||||
|
status: Union[str, None]
|
||||||
|
|
||||||
|
|
||||||
def is_timer_running(timer_name):
|
def get_last_trigger(timer_name: str):
|
||||||
|
output = subprocess.check_output("systemctl list-timers --all", shell=True).decode('utf-8')
|
||||||
|
lines = output.strip().split("\n")
|
||||||
|
for line in lines[1:]:
|
||||||
|
fields = line.split()
|
||||||
|
if timer_name in fields:
|
||||||
|
if len(fields) < 14:
|
||||||
|
# Timer has not been run yet.
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return parse(fields[7] + ' ' + fields[8] + ' ' + fields[9] + ' ' + fields[10])
|
||||||
|
except IndexError:
|
||||||
|
print(fields)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def get_next_elapse(timer_name: str) -> Tuple[TimerInfo | None, None | str]:
|
||||||
try:
|
try:
|
||||||
output = subprocess.check_output(["systemctl", "status", timer_name], universal_newlines=True)
|
output = subprocess.check_output(["systemctl", "status", timer_name], universal_newlines=True)
|
||||||
if "Active: active (running)" in output:
|
if timer_name in output.split('\n')[0]:
|
||||||
return True
|
|
||||||
return False
|
|
||||||
except subprocess.CalledProcessError as e:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def get_next_elapse(timer_name) -> Tuple[TimerInfo | None, None | str]:
|
|
||||||
try:
|
try:
|
||||||
output = subprocess.check_output(["systemctl", "list-timers", "--all"], universal_newlines=True)
|
parts = re.search(SYSTEMCTL_STATUS_RE, output)
|
||||||
lines = output.split('\n')
|
|
||||||
for line in lines:
|
|
||||||
if timer_name in line:
|
|
||||||
try:
|
|
||||||
parts = re.search(SYSTEMCTL_TIMERS_RE, line)
|
|
||||||
|
|
||||||
datetime_object = None
|
next_trigger = parse(parts.group(3))
|
||||||
if parts.group(2):
|
now = datetime.now(tz=next_trigger.tzinfo)
|
||||||
datetime_object = datetime.strptime(parts.group(2), '%a %Y-%m-%d %H:%M:%S %Z')
|
time_left = next_trigger - now
|
||||||
|
last_trigger = get_last_trigger(timer_name)
|
||||||
time_left: float | None = None
|
if last_trigger is not None:
|
||||||
if parts.group(4):
|
since_last = last_trigger - now
|
||||||
try:
|
|
||||||
time_left = parse_systemctl_time_delta(parts.group(4))
|
|
||||||
except Exception as tl_err:
|
|
||||||
if isinstance(tl_err, humanfriendly.InvalidTimespan):
|
|
||||||
return None, f'Invalid Timespan: "{parts.group(4)}"'
|
|
||||||
else:
|
else:
|
||||||
raise
|
since_last = None
|
||||||
|
|
||||||
time_passed: float | None = None
|
print(since_last)
|
||||||
if parts.group(9):
|
|
||||||
try:
|
|
||||||
time_passed = parse_systemctl_time_delta(parts.group(9))
|
|
||||||
except Exception as tp_err:
|
|
||||||
if isinstance(tp_err, humanfriendly.InvalidTimespan):
|
|
||||||
return None, f'Invalid Timespan: "{parts.group(9)}"'
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
timer_info = TimerInfo(
|
return TimerInfo(
|
||||||
next=datetime_object,
|
next=next_trigger,
|
||||||
left=time_left,
|
time_left=time_left,
|
||||||
last=parts.group(7),
|
last=last_trigger,
|
||||||
passed=time_passed,
|
since_last=since_last,
|
||||||
unit=parts.group(10),
|
enabled=parts.group(1).lower() == 'enabled',
|
||||||
activates=parts.group(12)
|
active=parts.group(2).split(' ')[0].lower() == 'active',
|
||||||
)
|
status=parts.group(2).split(' ')[-1].lower().strip('(').strip(')') if '(' in parts.group(2).split(' ')[-1] else None,
|
||||||
return timer_info, None
|
unit=timer_name,
|
||||||
|
), None
|
||||||
except Exception:
|
except Exception:
|
||||||
print(output)
|
print(output)
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
sys.exit(nagios.STATE_UNKNOWN)
|
sys.exit(nagios.STATE_UNKNOWN)
|
||||||
return None, 'Timer not found'
|
return None, 'Timer not found'
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
return None, f'systemctl list-timers failed: {e}'
|
return None, f'systemctl status failed: {e}'
|
||||||
|
|
||||||
|
|
||||||
def check_timer(timer_name: str, expected_interval: int = None):
|
def check_timer(timer_name: str, expected_interval: int = None):
|
||||||
|
@ -105,7 +97,6 @@ def check_timer(timer_name: str, expected_interval: int = None):
|
||||||
timer_unit = system_bus.get_object('org.freedesktop.systemd1', timer_unit_path)
|
timer_unit = system_bus.get_object('org.freedesktop.systemd1', timer_unit_path)
|
||||||
timer_properties = dbus.Interface(timer_unit, 'org.freedesktop.DBus.Properties')
|
timer_properties = dbus.Interface(timer_unit, 'org.freedesktop.DBus.Properties')
|
||||||
active_state = timer_properties.Get('org.freedesktop.systemd1.Unit', 'ActiveState')
|
active_state = timer_properties.Get('org.freedesktop.systemd1.Unit', 'ActiveState')
|
||||||
running_state = is_timer_running(timer_name)
|
|
||||||
|
|
||||||
if active_state == 'active':
|
if active_state == 'active':
|
||||||
next_elapse, err = get_next_elapse(timer_name)
|
next_elapse, err = get_next_elapse(timer_name)
|
||||||
|
@ -115,45 +106,34 @@ def check_timer(timer_name: str, expected_interval: int = None):
|
||||||
# if (next_elapse.left is not None and next_elapse.passed is not None) and (next_elapse.left < 0 or next_elapse.passed < 0):
|
# if (next_elapse.left is not None and next_elapse.passed is not None) and (next_elapse.left < 0 or next_elapse.passed < 0):
|
||||||
# quit_check(f'Timer is negative??? Left: {next_elapse["left"]}. Passed: {next_elapse["passed"]}', nagios.STATE_UNKNOWN)
|
# quit_check(f'Timer is negative??? Left: {next_elapse["left"]}. Passed: {next_elapse["passed"]}', nagios.STATE_UNKNOWN)
|
||||||
|
|
||||||
if next_elapse.next:
|
|
||||||
next_elapse_human = next_elapse.next.replace(tzinfo=tz.tzlocal()).strftime('%a %Y-%m-%d %H:%M %Z')
|
next_elapse_human = next_elapse.next.replace(tzinfo=tz.tzlocal()).strftime('%a %Y-%m-%d %H:%M %Z')
|
||||||
else:
|
remaining_time_human = humanfriendly.format_timespan(next_elapse.time_left)
|
||||||
next_elapse_human = 'N/A'
|
since_last_human = humanfriendly.format_timespan(next_elapse.since_last) if next_elapse.since_last else 'N/A'
|
||||||
|
|
||||||
if next_elapse.left is not None:
|
|
||||||
remaining_time_human = humanfriendly.format_timespan(next_elapse.left)
|
|
||||||
else:
|
|
||||||
remaining_time_human = 'N/A'
|
|
||||||
if next_elapse.passed is not None:
|
|
||||||
passed_time_human = humanfriendly.format_timespan(next_elapse.passed)
|
|
||||||
else:
|
|
||||||
passed_time_human = 'N/A'
|
|
||||||
|
|
||||||
perfdata_dict = {
|
perfdata_dict = {
|
||||||
'remaining_time': {
|
'remaining_time': {
|
||||||
'value': int(next_elapse.left) if next_elapse.left is not None else 0,
|
'value': next_elapse.time_left.seconds,
|
||||||
'unit': 's',
|
'unit': 's',
|
||||||
'min': 0
|
'min': 0
|
||||||
},
|
},
|
||||||
'time_since_last': {
|
'time_since_last': {
|
||||||
'value': int(next_elapse.passed) if next_elapse.passed is not None else 0,
|
'value': next_elapse.since_last.seconds if next_elapse.since_last is not None else 0,
|
||||||
'unit': 's',
|
'unit': 's',
|
||||||
'min': 0
|
'min': 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
timer_info = f'Next trigger time: {next_elapse_human}. Time until next trigger: {remaining_time_human}. Time since last trigger: {passed_time_human}.'
|
timer_info = f'Next trigger time: {next_elapse_human}. Time until next trigger: {remaining_time_human}. Time since last trigger: {since_last_human}.'
|
||||||
|
|
||||||
if expected_interval is not None and next_elapse.next and next_elapse.last:
|
if expected_interval is not None and next_elapse.next and next_elapse.last:
|
||||||
next_trigger_time = next_elapse.next
|
next_trigger_time = next_elapse.next
|
||||||
last_trigger_time = datetime.strptime(next_elapse.last, '%a %Y-%m-%d %H:%M:%S %Z')
|
actual_interval = next_trigger_time - next_elapse.last
|
||||||
actual_interval = next_trigger_time - last_trigger_time
|
|
||||||
actual_interval_seconds = actual_interval.total_seconds()
|
actual_interval_seconds = actual_interval.total_seconds()
|
||||||
|
|
||||||
if actual_interval_seconds > expected_interval:
|
if actual_interval_seconds > expected_interval:
|
||||||
quit_check(f'{timer_name} is active but the last trigger was more than the expected interval ago. {timer_info}', nagios.STATE_CRIT, perfdata_dict)
|
quit_check(f'{timer_name} is active but the last trigger was more than the expected interval ago. {timer_info}', nagios.STATE_CRIT, perfdata_dict)
|
||||||
|
|
||||||
quit_check(f'{timer_name} is {"active" if not running_state else "active (running)"}. {timer_info}', nagios.STATE_OK, perfdata_dict)
|
quit_check(f'{timer_name}{" is active" + (" (" + next_elapse.status + ")" if next_elapse else "") + "." if next_elapse.active else " -> "} {timer_info}', nagios.STATE_OK, perfdata_dict)
|
||||||
else:
|
else:
|
||||||
quit_check(f'{timer_name} is not enabled', nagios.STATE_CRIT)
|
quit_check(f'{timer_name} is not enabled', nagios.STATE_CRIT)
|
||||||
except dbus.exceptions.DBusException:
|
except dbus.exceptions.DBusException:
|
||||||
|
|
Loading…
Reference in New Issue