add check_graylog_index_size, fix other stuff
This commit is contained in:
parent
e78212cf65
commit
6d41b7a2c6
|
@ -42,8 +42,7 @@ def main():
|
||||||
parser.add_argument('--crit-notif', action='store_true', help='Return critical when there are notifications')
|
parser.add_argument('--crit-notif', action='store_true', help='Return critical when there are notifications')
|
||||||
parser.add_argument('--ignore-update-notif', action='store_true', help='Ignore any update notifications')
|
parser.add_argument('--ignore-update-notif', action='store_true', help='Ignore any update notifications')
|
||||||
parser.add_argument('--html', action='store_true', help='Print HTML')
|
parser.add_argument('--html', action='store_true', help='Print HTML')
|
||||||
parser.add_argument('--cluster-metrics', action='store_true',
|
parser.add_argument('--cluster-metrics', action='store_true', help='Also gather cluster metrics and check for notifications')
|
||||||
help='Also gather cluster metrics and check for notifications')
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
base_url = args.url.strip('/')
|
base_url = args.url.strip('/')
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from checker import nagios
|
||||||
|
from checker.result import quit_check
|
||||||
|
from checker.string import strip_non_numbers
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
data = {} # make pycharm happy
|
||||||
|
try:
|
||||||
|
r = requests.get('http://localhost:9200/_cat/nodes?v&h=n,ip,disk*&format=json')
|
||||||
|
r.raise_for_status()
|
||||||
|
data = r.json()[0]
|
||||||
|
except Exception as e:
|
||||||
|
quit_check(f'Failed to reach Elasticsearch: {e}', nagios.STATE_CRIT)
|
||||||
|
|
||||||
|
perfdata = {
|
||||||
|
'disk_used': {
|
||||||
|
'value': data['disk.used'],
|
||||||
|
'min': 0,
|
||||||
|
},
|
||||||
|
'disk_total': {
|
||||||
|
'value': data['disk.total'],
|
||||||
|
'min': 0,
|
||||||
|
},
|
||||||
|
'disk_avail': {
|
||||||
|
'value': data['disk.avail'],
|
||||||
|
},
|
||||||
|
'disk_used_percent': {
|
||||||
|
'value': data['disk.used_percent'],
|
||||||
|
'unit': '%'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
used_percent_float = float(data['disk.used_percent'])
|
||||||
|
warn_percent = args.size_warn_percent * 100
|
||||||
|
crit_percent = args.size_crit_percent * 100
|
||||||
|
exit_str = f"{data['disk.used_percent']}% disk used ({data['disk.used'].upper()} / {data['disk.total'].upper()}). {data['disk.avail'].upper()} available."
|
||||||
|
exit_code = nagios.STATE_OK
|
||||||
|
|
||||||
|
if used_percent_float >= crit_percent:
|
||||||
|
exit_code = nagios.STATE_CRIT
|
||||||
|
elif used_percent_float >= warn_percent:
|
||||||
|
exit_code = nagios.STATE_CRIT
|
||||||
|
quit_check(exit_str, exit_code, perfdata)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description='Check the Graylog index size.')
|
||||||
|
parser.add_argument('--size-warn-percent', default=50, type=float)
|
||||||
|
parser.add_argument('--size-crit-percent', default=65, type=float)
|
||||||
|
args = parser.parse_args()
|
||||||
|
try:
|
||||||
|
main(args)
|
||||||
|
except Exception as e:
|
||||||
|
print(f'UNKNOWN: exception "{e}"')
|
||||||
|
print(traceback.format_exc())
|
||||||
|
sys.exit(nagios.STATE_UNKNOWN)
|
|
@ -60,15 +60,18 @@ def main(args):
|
||||||
exit_code = nagios.STATE_OK
|
exit_code = nagios.STATE_OK
|
||||||
exit_msg = []
|
exit_msg = []
|
||||||
|
|
||||||
if slave_sql_running != 'Yes':
|
|
||||||
exit_code = nagios.STATE_CRIT
|
|
||||||
exit_msg.append('Slave SQL is not running!')
|
|
||||||
if slave_io_running != 'Yes':
|
if slave_io_running != 'Yes':
|
||||||
exit_code = nagios.STATE_CRIT
|
exit_code = nagios.STATE_CRIT
|
||||||
exit_msg.append('Slave IO is not running!')
|
exit_msg.append('Slave IO is not running!')
|
||||||
if last_io_error:
|
if last_io_error:
|
||||||
exit_code = nagios.STATE_CRIT
|
exit_code = nagios.STATE_CRIT
|
||||||
exit_msg.append(f'Last IO Error: {last_io_error.strip(".")}.')
|
exit_msg.append(f'Last IO Error: {last_io_error.strip(".")}.')
|
||||||
|
if slave_sql_running != 'Yes':
|
||||||
|
exit_code = nagios.STATE_CRIT
|
||||||
|
exit_msg.append('Slave SQL is not running!')
|
||||||
|
if last_sql_error:
|
||||||
|
exit_code = nagios.STATE_CRIT
|
||||||
|
exit_msg.append(f'Last SQL Error: {last_sql_error.strip(".")}.')
|
||||||
|
|
||||||
if exit_code == nagios.STATE_OK:
|
if exit_code == nagios.STATE_OK:
|
||||||
# Only replication delay if everything else is healthy.
|
# Only replication delay if everything else is healthy.
|
||||||
|
|
|
@ -100,7 +100,10 @@ def main(args):
|
||||||
|
|
||||||
# ============================================
|
# ============================================
|
||||||
|
|
||||||
ap_data = cells[args.target_mac]
|
ap_data = cells.get(args.target_mac)
|
||||||
|
if not ap_data:
|
||||||
|
quit_check(f'AP address not found: {args.target_mac}', nagios.STATE_CRIT)
|
||||||
|
|
||||||
computed_quality = int(try_float(ap_data['quality'][0]) / try_float(ap_data['quality'][1]) * 100)
|
computed_quality = int(try_float(ap_data['quality'][0]) / try_float(ap_data['quality'][1]) * 100)
|
||||||
|
|
||||||
chan_width = ap_data['chan_width'].strip(' MHz')
|
chan_width = ap_data['chan_width'].strip(' MHz')
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
def strip_non_numbers(input_string):
|
||||||
|
return ''.join(char for char in input_string if char.isdigit())
|
Loading…
Reference in New Issue