catch errors, message formatting

This commit is contained in:
Cyberes 2023-04-21 23:54:16 -06:00
parent 74a4849cd8
commit 62b7cd6594
3 changed files with 216 additions and 185 deletions

View File

@ -20,9 +20,11 @@ parser.add_argument('--warn', type=float, help='Manually set warn level.')
parser.add_argument('--crit', type=float, help='Manually set critical level.')
args = parser.parse_args()
# TODO: add warn suppoort
if args.type == 'gc-time':
def main():
if args.type == 'gc-time':
# in seconds
python_gc_time_sum_MAX = 0.002 if not args.crit else args.crit
try:
@ -36,7 +38,7 @@ if args.type == 'gc-time':
except Exception as e:
print(f'UNKNOWN: failed to check avg. GC time "{e}"')
sys.exit(nagios.UNKNOWN)
elif args.type == 'response-time':
elif args.type == 'response-time':
response_time_MAX = 1 if not args.crit else args.crit
timeout = 10
try:
@ -61,7 +63,7 @@ elif args.type == 'response-time':
except Exception as e:
print(f'UNKNOWN: failed to check response time "{e}"')
sys.exit(nagios.UNKNOWN)
elif args.type == 'outgoing-http-rate':
elif args.type == 'outgoing-http-rate':
# outgoing req/sec
outgoing_http_request_rate_MAX = 10 if not args.crit else args.crit
try:
@ -81,7 +83,7 @@ elif args.type == 'outgoing-http-rate':
except Exception as e:
print(f'UNKNOWN: failed to check outgoing HTTP request rate "{e}"')
sys.exit(nagios.UNKNOWN)
elif args.type == 'avg-send':
elif args.type == 'avg-send':
# Average send time in seconds
event_send_time_MAX = 1 if not args.crit else args.crit
try:
@ -95,7 +97,7 @@ elif args.type == 'avg-send':
except Exception as e:
print(f'UNKNOWN: failed to check average message send time "{e}"')
sys.exit(nagios.UNKNOWN)
elif args.type == 'db-lag':
elif args.type == 'db-lag':
# in seconds
db_lag_MAX = 0.01 if not args.crit else args.crit
try:
@ -109,6 +111,17 @@ elif args.type == 'db-lag':
except Exception as e:
print(f'UNKNOWN: failed to check DB lag "{e}"')
sys.exit(nagios.UNKNOWN)
else:
else:
print('Wrong type')
sys.exit(nagios.UNKNOWN)
if __name__ == "__main__":
try:
main()
except Exception as e:
print(f'UNKNOWN: exception "{e}"')
import traceback
print(traceback.format_exc())
sys.exit(nagios.UNKNOWN)

View File

@ -18,7 +18,9 @@ parser.add_argument('--warn', type=float, default=20, help='Manually set warn le
parser.add_argument('--crit', type=float, default=30, help='Manually set critical level.')
args = parser.parse_args()
if args.prometheus:
def main():
if args.prometheus:
from checker.prometheus import parse_metrics
r = requests.get(args.metrics_endpoint)
@ -48,7 +50,7 @@ if args.prometheus:
print('receiver latency is', receiver_avg)
print('sender latency is', sender_avg)
else:
else:
from bs4 import BeautifulSoup
import re
@ -56,13 +58,11 @@ else:
if len(args.ignore) == 1:
args.ignore = args.ignore[0].strip(' ').split(' ')
def get_sec(time_str):
"""Get seconds from time."""
h, m, s = time_str.split(':')
return int(h) * 3600 + int(m) * 60 + int(s)
def ms_to_s(s):
min_m = re.match(r'^(\d+)m([\d.]+)s', s)
if min_m:
@ -72,7 +72,6 @@ else:
elif s.endswith('s'):
return float(s.strip('ms'))
r = requests.get(args.metrics_endpoint)
if r.status_code != 200:
sys.exit(nagios.UNKNOWN)
@ -97,6 +96,7 @@ else:
for domain, values in data.items():
if domain not in args.ignore:
if 'send' in values.keys():
if values['send'] >= args.crit:
info_str.append(f'CRITICAL: {domain} send is {values["send"]}s.')
exit_code = nagios.CRITICAL
@ -106,7 +106,10 @@ else:
exit_code = nagios.WARNING
# else:
# print(f'OK: {domain} send is {values["send"]}s.')
else:
info_str.append(f'UNKNOWN: {domain} send is empty.')
if 'receive' in values.keys():
if values['receive'] >= args.crit:
info_str.append(f'CRITICAL: {domain} receive is {values["receive"]}s.')
exit_code = nagios.CRITICAL
@ -116,6 +119,10 @@ else:
exit_code = nagios.WARNING
# else:
# print(f'OK: {domain} receive is {values["receive"]}s.')
else:
info_str.append(f'UNKNOWN: {domain} receive is empty.')
if 'send' in values.keys() and 'receive' in values.keys():
data_str.append(
f"'{domain}-send'={values['send']}s;;; '{domain}-receive'={values['receive']}s;;;"
)
@ -127,3 +134,14 @@ else:
print(f'|{" ".join(data_str)}')
sys.exit(exit_code)
if __name__ == "__main__":
try:
main()
except Exception as e:
print(f'UNKNOWN: exception "{e}"')
import traceback
print(traceback.format_exc())
sys.exit(nagios.UNKNOWN)

View File

@ -66,7 +66,7 @@ def build_msg(host_name, host_display_name, state, date_str, output, service_nam
elif host_name:
icinga2_url = f'<br>[Quick Link]({icinga2_url}/icingadb/host?name={host_name.replace(" ", "+")})'
msg = f"""{icon}&nbsp;&nbsp;&nbsp;{item} is <font color="{choose_color(state)}">{state}</font> <br>
msg = f"""{icon}&nbsp;&nbsp;&nbsp;{item} is **<font color="{choose_color(state)}">{state}</font>** <br>
**When:** {date_str}. <br>
**Info:** {newline_to_formatted_html(output)}{address}{comment}{icinga2_url}"""
return msg