add check_graylog_index_size, fix other stuff

This commit is contained in:
Cyberes 2024-04-09 21:23:07 -06:00
parent e78212cf65
commit 6d41b7a2c6
5 changed files with 76 additions and 6 deletions

View File

@ -42,8 +42,7 @@ def main():
parser.add_argument('--crit-notif', action='store_true', help='Return critical when there are notifications')
parser.add_argument('--ignore-update-notif', action='store_true', help='Ignore any update notifications')
parser.add_argument('--html', action='store_true', help='Print HTML')
parser.add_argument('--cluster-metrics', action='store_true',
help='Also gather cluster metrics and check for notifications')
parser.add_argument('--cluster-metrics', action='store_true', help='Also gather cluster metrics and check for notifications')
args = parser.parse_args()
base_url = args.url.strip('/')

63
check_graylog_index_size.py Executable file
View File

@ -0,0 +1,63 @@
#!/usr/bin/env python3
import argparse
import sys
import traceback
import requests
from checker import nagios
from checker.result import quit_check
from checker.string import strip_non_numbers
def main(args):
data = {} # make pycharm happy
try:
r = requests.get('http://localhost:9200/_cat/nodes?v&h=n,ip,disk*&format=json')
r.raise_for_status()
data = r.json()[0]
except Exception as e:
quit_check(f'Failed to reach Elasticsearch: {e}', nagios.STATE_CRIT)
perfdata = {
'disk_used': {
'value': data['disk.used'],
'min': 0,
},
'disk_total': {
'value': data['disk.total'],
'min': 0,
},
'disk_avail': {
'value': data['disk.avail'],
},
'disk_used_percent': {
'value': data['disk.used_percent'],
'unit': '%'
}
}
used_percent_float = float(data['disk.used_percent'])
warn_percent = args.size_warn_percent * 100
crit_percent = args.size_crit_percent * 100
exit_str = f"{data['disk.used_percent']}% disk used ({data['disk.used'].upper()} / {data['disk.total'].upper()}). {data['disk.avail'].upper()} available."
exit_code = nagios.STATE_OK
if used_percent_float >= crit_percent:
exit_code = nagios.STATE_CRIT
elif used_percent_float >= warn_percent:
exit_code = nagios.STATE_CRIT
quit_check(exit_str, exit_code, perfdata)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Check the Graylog index size.')
parser.add_argument('--size-warn-percent', default=50, type=float)
parser.add_argument('--size-crit-percent', default=65, type=float)
args = parser.parse_args()
try:
main(args)
except Exception as e:
print(f'UNKNOWN: exception "{e}"')
print(traceback.format_exc())
sys.exit(nagios.STATE_UNKNOWN)

View File

@ -60,15 +60,18 @@ def main(args):
exit_code = nagios.STATE_OK
exit_msg = []
if slave_sql_running != 'Yes':
exit_code = nagios.STATE_CRIT
exit_msg.append('Slave SQL is not running!')
if slave_io_running != 'Yes':
exit_code = nagios.STATE_CRIT
exit_msg.append('Slave IO is not running!')
if last_io_error:
exit_code = nagios.STATE_CRIT
exit_msg.append(f'Last IO Error: {last_io_error.strip(".")}.')
if slave_sql_running != 'Yes':
exit_code = nagios.STATE_CRIT
exit_msg.append('Slave SQL is not running!')
if last_sql_error:
exit_code = nagios.STATE_CRIT
exit_msg.append(f'Last SQL Error: {last_sql_error.strip(".")}.')
if exit_code == nagios.STATE_OK:
# Only replication delay if everything else is healthy.

View File

@ -100,7 +100,10 @@ def main(args):
# ============================================
ap_data = cells[args.target_mac]
ap_data = cells.get(args.target_mac)
if not ap_data:
quit_check(f'AP address not found: {args.target_mac}', nagios.STATE_CRIT)
computed_quality = int(try_float(ap_data['quality'][0]) / try_float(ap_data['quality'][1]) * 100)
chan_width = ap_data['chan_width'].strip(' MHz')

2
checker/string.py Normal file
View File

@ -0,0 +1,2 @@
def strip_non_numbers(input_string):
return ''.join(char for char in input_string if char.isdigit())