catch errors, message formatting
This commit is contained in:
parent
74a4849cd8
commit
62b7cd6594
|
@ -20,9 +20,11 @@ parser.add_argument('--warn', type=float, help='Manually set warn level.')
|
|||
parser.add_argument('--crit', type=float, help='Manually set critical level.')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
# TODO: add warn suppoort
|
||||
|
||||
if args.type == 'gc-time':
|
||||
def main():
|
||||
if args.type == 'gc-time':
|
||||
# in seconds
|
||||
python_gc_time_sum_MAX = 0.002 if not args.crit else args.crit
|
||||
try:
|
||||
|
@ -36,7 +38,7 @@ if args.type == 'gc-time':
|
|||
except Exception as e:
|
||||
print(f'UNKNOWN: failed to check avg. GC time "{e}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
elif args.type == 'response-time':
|
||||
elif args.type == 'response-time':
|
||||
response_time_MAX = 1 if not args.crit else args.crit
|
||||
timeout = 10
|
||||
try:
|
||||
|
@ -61,7 +63,7 @@ elif args.type == 'response-time':
|
|||
except Exception as e:
|
||||
print(f'UNKNOWN: failed to check response time "{e}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
elif args.type == 'outgoing-http-rate':
|
||||
elif args.type == 'outgoing-http-rate':
|
||||
# outgoing req/sec
|
||||
outgoing_http_request_rate_MAX = 10 if not args.crit else args.crit
|
||||
try:
|
||||
|
@ -81,7 +83,7 @@ elif args.type == 'outgoing-http-rate':
|
|||
except Exception as e:
|
||||
print(f'UNKNOWN: failed to check outgoing HTTP request rate "{e}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
elif args.type == 'avg-send':
|
||||
elif args.type == 'avg-send':
|
||||
# Average send time in seconds
|
||||
event_send_time_MAX = 1 if not args.crit else args.crit
|
||||
try:
|
||||
|
@ -95,7 +97,7 @@ elif args.type == 'avg-send':
|
|||
except Exception as e:
|
||||
print(f'UNKNOWN: failed to check average message send time "{e}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
elif args.type == 'db-lag':
|
||||
elif args.type == 'db-lag':
|
||||
# in seconds
|
||||
db_lag_MAX = 0.01 if not args.crit else args.crit
|
||||
try:
|
||||
|
@ -109,6 +111,17 @@ elif args.type == 'db-lag':
|
|||
except Exception as e:
|
||||
print(f'UNKNOWN: failed to check DB lag "{e}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
else:
|
||||
else:
|
||||
print('Wrong type')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except Exception as e:
|
||||
print(f'UNKNOWN: exception "{e}"')
|
||||
import traceback
|
||||
|
||||
print(traceback.format_exc())
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
|
|
|
@ -18,7 +18,9 @@ parser.add_argument('--warn', type=float, default=20, help='Manually set warn le
|
|||
parser.add_argument('--crit', type=float, default=30, help='Manually set critical level.')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.prometheus:
|
||||
|
||||
def main():
|
||||
if args.prometheus:
|
||||
from checker.prometheus import parse_metrics
|
||||
|
||||
r = requests.get(args.metrics_endpoint)
|
||||
|
@ -48,7 +50,7 @@ if args.prometheus:
|
|||
|
||||
print('receiver latency is', receiver_avg)
|
||||
print('sender latency is', sender_avg)
|
||||
else:
|
||||
else:
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
|
@ -56,13 +58,11 @@ else:
|
|||
if len(args.ignore) == 1:
|
||||
args.ignore = args.ignore[0].strip(' ').split(' ')
|
||||
|
||||
|
||||
def get_sec(time_str):
|
||||
"""Get seconds from time."""
|
||||
h, m, s = time_str.split(':')
|
||||
return int(h) * 3600 + int(m) * 60 + int(s)
|
||||
|
||||
|
||||
def ms_to_s(s):
|
||||
min_m = re.match(r'^(\d+)m([\d.]+)s', s)
|
||||
if min_m:
|
||||
|
@ -72,7 +72,6 @@ else:
|
|||
elif s.endswith('s'):
|
||||
return float(s.strip('ms'))
|
||||
|
||||
|
||||
r = requests.get(args.metrics_endpoint)
|
||||
if r.status_code != 200:
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
|
@ -97,6 +96,7 @@ else:
|
|||
|
||||
for domain, values in data.items():
|
||||
if domain not in args.ignore:
|
||||
if 'send' in values.keys():
|
||||
if values['send'] >= args.crit:
|
||||
info_str.append(f'CRITICAL: {domain} send is {values["send"]}s.')
|
||||
exit_code = nagios.CRITICAL
|
||||
|
@ -106,7 +106,10 @@ else:
|
|||
exit_code = nagios.WARNING
|
||||
# else:
|
||||
# print(f'OK: {domain} send is {values["send"]}s.')
|
||||
else:
|
||||
info_str.append(f'UNKNOWN: {domain} send is empty.')
|
||||
|
||||
if 'receive' in values.keys():
|
||||
if values['receive'] >= args.crit:
|
||||
info_str.append(f'CRITICAL: {domain} receive is {values["receive"]}s.')
|
||||
exit_code = nagios.CRITICAL
|
||||
|
@ -116,6 +119,10 @@ else:
|
|||
exit_code = nagios.WARNING
|
||||
# else:
|
||||
# print(f'OK: {domain} receive is {values["receive"]}s.')
|
||||
else:
|
||||
info_str.append(f'UNKNOWN: {domain} receive is empty.')
|
||||
|
||||
if 'send' in values.keys() and 'receive' in values.keys():
|
||||
data_str.append(
|
||||
f"'{domain}-send'={values['send']}s;;; '{domain}-receive'={values['receive']}s;;;"
|
||||
)
|
||||
|
@ -127,3 +134,14 @@ else:
|
|||
print(f'|{" ".join(data_str)}')
|
||||
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except Exception as e:
|
||||
print(f'UNKNOWN: exception "{e}"')
|
||||
import traceback
|
||||
|
||||
print(traceback.format_exc())
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
|
|
|
@ -66,7 +66,7 @@ def build_msg(host_name, host_display_name, state, date_str, output, service_nam
|
|||
elif host_name:
|
||||
icinga2_url = f'<br>[Quick Link]({icinga2_url}/icingadb/host?name={host_name.replace(" ", "+")})'
|
||||
|
||||
msg = f"""{icon} {item} is <font color="{choose_color(state)}">{state}</font> <br>
|
||||
msg = f"""{icon} {item} is **<font color="{choose_color(state)}">{state}</font>** <br>
|
||||
**When:** {date_str}. <br>
|
||||
**Info:** {newline_to_formatted_html(output)}{address}{comment}{icinga2_url}"""
|
||||
return msg
|
||||
|
|
Loading…
Reference in New Issue