2024-02-28 09:54:57 -07:00
#!/usr/bin/env python3
import argparse
2024-03-07 15:18:46 -07:00
import re
import subprocess
2024-02-28 09:54:57 -07:00
import sys
import traceback
2024-10-10 20:34:47 -06:00
from datetime import datetime , timedelta
from typing import Tuple , Union
2024-02-28 09:54:57 -07:00
2024-03-07 15:18:46 -07:00
import humanfriendly
2024-10-10 20:34:47 -06:00
from dateparser import parse
2024-03-07 14:09:42 -07:00
from dateutil import tz
2024-10-10 19:47:22 -06:00
from pydantic import BaseModel
2024-02-28 09:54:57 -07:00
from checker import nagios
from checker . result import quit_check
2024-10-10 20:55:42 -06:00
SYSTEMCTL_STATUS_RE = r ' Loaded: \ s.* \ .timer; \ s(.*?);.*? \ ) \ s*Active: \ s(.*?) since.*? \ s*Trigger: \ s(.*?;|n \ /a) '
2024-03-07 14:09:42 -07:00
2024-10-10 19:47:22 -06:00
class TimerInfo ( BaseModel ) :
2024-10-10 20:55:42 -06:00
next : Union [ datetime , None ]
time_left : Union [ timedelta , None ]
2024-10-10 20:34:47 -06:00
last : Union [ datetime , None ]
since_last : Union [ timedelta , None ]
unit : str
enabled : bool
active : bool
status : Union [ str , None ]
def get_last_trigger ( timer_name : str ) :
output = subprocess . check_output ( " systemctl list-timers --all " , shell = True ) . decode ( ' utf-8 ' )
lines = output . strip ( ) . split ( " \n " )
for line in lines [ 1 : ] :
fields = line . split ( )
if timer_name in fields :
if len ( fields ) < 14 :
# Timer has not been run yet.
return None
try :
return parse ( fields [ 7 ] + ' ' + fields [ 8 ] + ' ' + fields [ 9 ] + ' ' + fields [ 10 ] )
except IndexError :
print ( fields )
raise
def get_next_elapse ( timer_name : str ) - > Tuple [ TimerInfo | None , None | str ] :
2024-10-10 20:55:42 -06:00
now = datetime . now ( )
2024-10-10 19:47:22 -06:00
try :
output = subprocess . check_output ( [ " systemctl " , " status " , timer_name ] , universal_newlines = True )
2024-10-10 20:34:47 -06:00
if timer_name in output . split ( ' \n ' ) [ 0 ] :
try :
parts = re . search ( SYSTEMCTL_STATUS_RE , output )
2024-10-10 20:55:42 -06:00
next_trigger_str = parts . group ( 3 )
if next_trigger_str . lower ( ) != ' n/a ' :
next_trigger = parse ( next_trigger_str )
time_left = next_trigger - now . replace ( tzinfo = next_trigger . tzinfo )
else :
next_trigger = None
time_left = None
2024-10-10 20:34:47 -06:00
last_trigger = get_last_trigger ( timer_name )
if last_trigger is not None :
2024-10-10 20:55:42 -06:00
since_last = now . replace ( tzinfo = last_trigger . tzinfo ) - last_trigger
2024-10-10 20:34:47 -06:00
else :
since_last = None
return TimerInfo (
next = next_trigger ,
time_left = time_left ,
last = last_trigger ,
since_last = since_last ,
enabled = parts . group ( 1 ) . lower ( ) == ' enabled ' ,
active = parts . group ( 2 ) . split ( ' ' ) [ 0 ] . lower ( ) == ' active ' ,
status = parts . group ( 2 ) . split ( ' ' ) [ - 1 ] . lower ( ) . strip ( ' ( ' ) . strip ( ' ) ' ) if ' ( ' in parts . group ( 2 ) . split ( ' ' ) [ - 1 ] else None ,
unit = timer_name ,
) , None
except Exception :
print ( output )
traceback . print_exc ( )
sys . exit ( nagios . STATE_UNKNOWN )
2024-10-10 19:47:22 -06:00
return None , ' Timer not found '
2024-03-07 14:09:42 -07:00
except subprocess . CalledProcessError as e :
2024-10-10 20:34:47 -06:00
return None , f ' systemctl status failed: { e } '
2024-03-07 14:09:42 -07:00
2024-02-28 09:54:57 -07:00
2024-10-10 19:47:22 -06:00
def check_timer ( timer_name : str , expected_interval : int = None ) :
2024-02-28 21:07:48 -07:00
if not timer_name . endswith ( ' .timer ' ) :
timer_name = timer_name + ' .timer '
2024-10-10 21:01:16 -06:00
timer_info , timer_error = get_next_elapse ( timer_name )
if timer_error :
quit_check ( str ( timer_error ) , nagios . STATE_UNKNOWN )
if not timer_info . enabled :
quit_check ( f ' { timer_name } is not enabled ' , nagios . STATE_CRIT )
# if (next_elapse.left is not None and next_elapse.passed is not None) and (next_elapse.left < 0 or next_elapse.passed < 0):
# quit_check(f'Timer is negative??? Left: {next_elapse["left"]}. Passed: {next_elapse["passed"]}', nagios.STATE_UNKNOWN)
next_elapse_human = timer_info . next . replace ( tzinfo = tz . tzlocal ( ) ) . strftime ( ' %a % Y- % m- %d % H: % M % Z ' ) if timer_info . next else ' N/A '
remaining_time_human = humanfriendly . format_timespan ( timer_info . time_left ) if timer_info . time_left else ' N/A '
since_last_human = humanfriendly . format_timespan ( timer_info . since_last ) if timer_info . since_last else ' N/A '
perfdata_dict = {
' remaining_time ' : {
' value ' : timer_info . time_left . seconds if timer_info . time_left else 0 ,
' unit ' : ' s ' ,
' min ' : 0
} ,
' time_since_last ' : {
' value ' : timer_info . since_last . seconds if timer_info . since_last else 0 ,
' unit ' : ' s ' ,
' min ' : 0
}
}
timer_print_info = f ' Next trigger time: { next_elapse_human } . Time until next trigger: { remaining_time_human } . Time since last trigger: { since_last_human } . '
if expected_interval is not None and timer_info . next and timer_info . last :
next_trigger_time = timer_info . next
actual_interval = next_trigger_time - timer_info . last
actual_interval_seconds = actual_interval . total_seconds ( )
if actual_interval_seconds > expected_interval :
quit_check ( f ' { timer_name } is active but the last trigger was more than the expected interval ago. { timer_print_info } ' , nagios . STATE_CRIT , perfdata_dict )
quit_check ( f ' { timer_name } { " is active " + ( " ( " + timer_info . status + " ) " if timer_info else " " ) + " . " if timer_info . active else " -> " } { timer_print_info } ' , nagios . STATE_OK , perfdata_dict )
2024-02-28 09:54:57 -07:00
if __name__ == ' __main__ ' :
parser = argparse . ArgumentParser ( )
2024-02-28 10:04:13 -07:00
parser . add_argument ( ' -t ' , ' --timer ' , required = True , help = ' The name of the timer to check. ' )
2024-10-10 19:47:22 -06:00
parser . add_argument ( ' -i ' , ' --interval ' , type = int , help = ' The expected interval between timer triggers in seconds. ' )
2024-02-28 09:54:57 -07:00
args = parser . parse_args ( )
try :
2024-10-10 19:47:22 -06:00
check_timer ( args . timer , args . interval )
2024-02-28 09:54:57 -07:00
except Exception as e :
print ( f ' UNKNOWN - exception " { e } " ' )
traceback . print_exc ( )
sys . exit ( nagios . STATE_UNKNOWN )