check_greylog: alert notifications, better metric names, better filesizes

This commit is contained in:
Cyberes 2023-06-27 14:32:10 -06:00
parent 96fc88737e
commit d431bd5d7a
2 changed files with 64 additions and 19 deletions

View File

@ -10,6 +10,7 @@ from urllib3.exceptions import InsecureRequestWarning
import checker.nagios
from checker import print_icinga2_check_status
from checker.linuxfabric.base import get_state
from checker.units import human_readable_size
def main():
@ -20,6 +21,7 @@ def main():
parser.add_argument('--warn-mem', type=int, default=75, help='Percentage of JVM memory used for warm')
parser.add_argument('--crit-mem', type=int, default=100, help='Percentage of JVM memory used for critical')
parser.add_argument('--insecure', action='store_false', help="Don't verify SSL")
parser.add_argument('--crit-notif', action='store_true', help='Return critical when there are notifications')
args = parser.parse_args()
if not args.insecure:
@ -78,27 +80,19 @@ def main():
'.').replace('-', '_').replace('.', '_')
value = None
if 'value' in metric['metric']:
# perfdata.append(f'{name}={metric["metric"]["value"]}')
value = metric["metric"]["value"]
elif 'count' in metric['metric']:
# perfdata.append(f'{name}={metric["metric"]["count"]}')
value = metric["metric"]["count"]
elif 'rate' in metric['metric']:
# perfdata.append(f'{name}_total={metric["metric"]["rate"]["total"]}')
# perfdata.append(f'{name}_mean={metric["metric"]["rate"]["mean"]}')
# perfdata.append(f'{name}_five_minute={metric["metric"]["rate"]["five_minute"]}')
# perfdata.append(f'{name}_fifteen_minute={metric["metric"]["rate"]["fifteen_minute"]}')
# perfdata.append(f'{name}_one_minute={metric["metric"]["rate"]["one_minute"]}')
value = metric["metric"]["rate"]["one_minute"]
name = f'{name}_one_minute'
# if isinstance(value, float):
# value = round(value, 1)
value = int(value)
metrics_data[name] = value
jvm_mem_usage_warn = int(metrics_data['jvm_memory_heap_max'] / int(100 / args.warn_mem))
jvm_mem_usage_crit = int(metrics_data['jvm_memory_heap_max'] / int(100 / args.crit_mem))
# Some metric names are changed for better readability
perfdata = {
'throughput_input_1_sec_rate': {
'value': int(metrics_data['throughput_input_1_sec_rate']),
@ -108,28 +102,34 @@ def main():
'value': int(metrics_data['throughput_output_1_sec_rate']),
'min': 0,
},
'incoming_messages_one_minute': {
'incoming_messages_rate_per_sec_one_minute': {
'value': metrics_data['incomingMessages_one_minute'],
'min': 0,
},
'open_connections': {
'connections': {
'value': metrics_data['open_connections'],
'min': 0,
},
'total_connections': {
'value': metrics_data['total_connections'],
'min': 0,
},
'written_bytes_1sec': {
'network_out_total_1sec': {
'value': metrics_data['written_bytes_1sec'],
'min': 0,
'unit': 'B',
},
'read_bytes_1sec': {
'network_out_total_total': {
'value': metrics_data['written_bytes_total'],
'min': 0,
'unit': 'B',
},
'network_in_1sec': {
'value': metrics_data['read_bytes_1sec'],
'min': 0,
'unit': 'B',
},
'network_in_total': {
'value': metrics_data['read_bytes_total'],
'min': 0,
'unit': 'B',
},
'entries_uncommitted': {
'value': metrics_data['journal_entries_uncommitted'],
'min': 0,
@ -173,9 +173,15 @@ def main():
if jvm_mem_usage_state != checker.nagios.STATE_OK:
text_result += f' JVM memory usage is high!'
text_result = text_result + f' JVM memory usage: {int((perfdata["jvm_memory_used"]["value"] / metrics_data["jvm_memory_heap_max"]) * 100)}%, incoming_messages_one_minute: {perfdata["incoming_messages_one_minute"]["value"]}, open_connections: {perfdata["open_connections"]["value"]}' + '\n' + notif_str
exit_code = max(checker.nagios.STATE_OK, jvm_mem_usage_state)
if notifications['total'] > 0:
text_result += f' There are notifications!'
if args.crit_notif:
exit_code = checker.nagios.STATE_CRIT
text_result = text_result + f' JVM memory usage: {int((perfdata["jvm_memory_used"]["value"] / metrics_data["jvm_memory_heap_max"]) * 100)}%, incoming rate (events/second for last minute): {perfdata["incoming_messages_rate_per_sec_one_minute"]["value"]}, connections: {perfdata["connections"]["value"]}, total network in: {human_readable_size(perfdata["network_in_total"]["value"], decimal_places=0)}' + '\n' + notif_str
print_icinga2_check_status(text_result, exit_code, perfdata)
sys.exit(exit_code)

View File

@ -1,3 +1,6 @@
from math import log2, log10
from typing import Union
from hurry.filesize import size
@ -18,3 +21,39 @@ def filesize(bytes: int, spaces: bool = True, formatter: bool = True):
return x
else:
return x.replace(' ', '')
def human_readable_size(size: Union[int, float], bits=False, decimal_places: int = 2, base: int = 10):
# Define the units
units = {False: {2: ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'],
10: ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']},
True: {2: ['bits', 'Kib', 'Mib', 'Gib', 'Tib', 'Pib', 'Eib', 'Zib', 'Yib'],
10: ['bits', 'Kb', 'Mb', 'Gb', 'Tb', 'Pb', 'Eb', 'Zb', 'Yb']}}
# Convert bytes to bits if needed
if bits:
size *= 8
# Determine the unit
if size == 0:
return '0 ' + units[bits][base][0]
else:
if base == 2:
log = int(log2(size))
exp = log // 10
elif base == 10:
log = int(log10(size))
exp = log // 3
else:
raise ValueError("Invalid base. Use either 2 or 10.")
if exp >= len(units[bits][base]):
exp = len(units[bits][base]) - 1
size /= base ** (exp * (10 if base == 2 else 3))
if decimal_places == 0:
size = int(size)
else:
round(size, decimal_places)
return f'{size} {units[bits][base][exp]}'