check_greylog: alert notifications, better metric names, better filesizes
This commit is contained in:
parent
96fc88737e
commit
d431bd5d7a
|
@ -10,6 +10,7 @@ from urllib3.exceptions import InsecureRequestWarning
|
||||||
import checker.nagios
|
import checker.nagios
|
||||||
from checker import print_icinga2_check_status
|
from checker import print_icinga2_check_status
|
||||||
from checker.linuxfabric.base import get_state
|
from checker.linuxfabric.base import get_state
|
||||||
|
from checker.units import human_readable_size
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -20,6 +21,7 @@ def main():
|
||||||
parser.add_argument('--warn-mem', type=int, default=75, help='Percentage of JVM memory used for warm')
|
parser.add_argument('--warn-mem', type=int, default=75, help='Percentage of JVM memory used for warm')
|
||||||
parser.add_argument('--crit-mem', type=int, default=100, help='Percentage of JVM memory used for critical')
|
parser.add_argument('--crit-mem', type=int, default=100, help='Percentage of JVM memory used for critical')
|
||||||
parser.add_argument('--insecure', action='store_false', help="Don't verify SSL")
|
parser.add_argument('--insecure', action='store_false', help="Don't verify SSL")
|
||||||
|
parser.add_argument('--crit-notif', action='store_true', help='Return critical when there are notifications')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if not args.insecure:
|
if not args.insecure:
|
||||||
|
@ -78,27 +80,19 @@ def main():
|
||||||
'.').replace('-', '_').replace('.', '_')
|
'.').replace('-', '_').replace('.', '_')
|
||||||
value = None
|
value = None
|
||||||
if 'value' in metric['metric']:
|
if 'value' in metric['metric']:
|
||||||
# perfdata.append(f'{name}={metric["metric"]["value"]}')
|
|
||||||
value = metric["metric"]["value"]
|
value = metric["metric"]["value"]
|
||||||
elif 'count' in metric['metric']:
|
elif 'count' in metric['metric']:
|
||||||
# perfdata.append(f'{name}={metric["metric"]["count"]}')
|
|
||||||
value = metric["metric"]["count"]
|
value = metric["metric"]["count"]
|
||||||
elif 'rate' in metric['metric']:
|
elif 'rate' in metric['metric']:
|
||||||
# perfdata.append(f'{name}_total={metric["metric"]["rate"]["total"]}')
|
|
||||||
# perfdata.append(f'{name}_mean={metric["metric"]["rate"]["mean"]}')
|
|
||||||
# perfdata.append(f'{name}_five_minute={metric["metric"]["rate"]["five_minute"]}')
|
|
||||||
# perfdata.append(f'{name}_fifteen_minute={metric["metric"]["rate"]["fifteen_minute"]}')
|
|
||||||
# perfdata.append(f'{name}_one_minute={metric["metric"]["rate"]["one_minute"]}')
|
|
||||||
value = metric["metric"]["rate"]["one_minute"]
|
value = metric["metric"]["rate"]["one_minute"]
|
||||||
name = f'{name}_one_minute'
|
name = f'{name}_one_minute'
|
||||||
# if isinstance(value, float):
|
|
||||||
# value = round(value, 1)
|
|
||||||
value = int(value)
|
value = int(value)
|
||||||
metrics_data[name] = value
|
metrics_data[name] = value
|
||||||
|
|
||||||
jvm_mem_usage_warn = int(metrics_data['jvm_memory_heap_max'] / int(100 / args.warn_mem))
|
jvm_mem_usage_warn = int(metrics_data['jvm_memory_heap_max'] / int(100 / args.warn_mem))
|
||||||
jvm_mem_usage_crit = int(metrics_data['jvm_memory_heap_max'] / int(100 / args.crit_mem))
|
jvm_mem_usage_crit = int(metrics_data['jvm_memory_heap_max'] / int(100 / args.crit_mem))
|
||||||
|
|
||||||
|
# Some metric names are changed for better readability
|
||||||
perfdata = {
|
perfdata = {
|
||||||
'throughput_input_1_sec_rate': {
|
'throughput_input_1_sec_rate': {
|
||||||
'value': int(metrics_data['throughput_input_1_sec_rate']),
|
'value': int(metrics_data['throughput_input_1_sec_rate']),
|
||||||
|
@ -108,28 +102,34 @@ def main():
|
||||||
'value': int(metrics_data['throughput_output_1_sec_rate']),
|
'value': int(metrics_data['throughput_output_1_sec_rate']),
|
||||||
'min': 0,
|
'min': 0,
|
||||||
},
|
},
|
||||||
'incoming_messages_one_minute': {
|
'incoming_messages_rate_per_sec_one_minute': {
|
||||||
'value': metrics_data['incomingMessages_one_minute'],
|
'value': metrics_data['incomingMessages_one_minute'],
|
||||||
'min': 0,
|
'min': 0,
|
||||||
},
|
},
|
||||||
'open_connections': {
|
'connections': {
|
||||||
'value': metrics_data['open_connections'],
|
'value': metrics_data['open_connections'],
|
||||||
'min': 0,
|
'min': 0,
|
||||||
},
|
},
|
||||||
'total_connections': {
|
'network_out_total_1sec': {
|
||||||
'value': metrics_data['total_connections'],
|
|
||||||
'min': 0,
|
|
||||||
},
|
|
||||||
'written_bytes_1sec': {
|
|
||||||
'value': metrics_data['written_bytes_1sec'],
|
'value': metrics_data['written_bytes_1sec'],
|
||||||
'min': 0,
|
'min': 0,
|
||||||
'unit': 'B',
|
'unit': 'B',
|
||||||
},
|
},
|
||||||
'read_bytes_1sec': {
|
'network_out_total_total': {
|
||||||
|
'value': metrics_data['written_bytes_total'],
|
||||||
|
'min': 0,
|
||||||
|
'unit': 'B',
|
||||||
|
},
|
||||||
|
'network_in_1sec': {
|
||||||
'value': metrics_data['read_bytes_1sec'],
|
'value': metrics_data['read_bytes_1sec'],
|
||||||
'min': 0,
|
'min': 0,
|
||||||
'unit': 'B',
|
'unit': 'B',
|
||||||
},
|
},
|
||||||
|
'network_in_total': {
|
||||||
|
'value': metrics_data['read_bytes_total'],
|
||||||
|
'min': 0,
|
||||||
|
'unit': 'B',
|
||||||
|
},
|
||||||
'entries_uncommitted': {
|
'entries_uncommitted': {
|
||||||
'value': metrics_data['journal_entries_uncommitted'],
|
'value': metrics_data['journal_entries_uncommitted'],
|
||||||
'min': 0,
|
'min': 0,
|
||||||
|
@ -173,9 +173,15 @@ def main():
|
||||||
if jvm_mem_usage_state != checker.nagios.STATE_OK:
|
if jvm_mem_usage_state != checker.nagios.STATE_OK:
|
||||||
text_result += f' JVM memory usage is high!'
|
text_result += f' JVM memory usage is high!'
|
||||||
|
|
||||||
text_result = text_result + f' JVM memory usage: {int((perfdata["jvm_memory_used"]["value"] / metrics_data["jvm_memory_heap_max"]) * 100)}%, incoming_messages_one_minute: {perfdata["incoming_messages_one_minute"]["value"]}, open_connections: {perfdata["open_connections"]["value"]}' + '\n' + notif_str
|
|
||||||
|
|
||||||
exit_code = max(checker.nagios.STATE_OK, jvm_mem_usage_state)
|
exit_code = max(checker.nagios.STATE_OK, jvm_mem_usage_state)
|
||||||
|
|
||||||
|
if notifications['total'] > 0:
|
||||||
|
text_result += f' There are notifications!'
|
||||||
|
if args.crit_notif:
|
||||||
|
exit_code = checker.nagios.STATE_CRIT
|
||||||
|
|
||||||
|
text_result = text_result + f' JVM memory usage: {int((perfdata["jvm_memory_used"]["value"] / metrics_data["jvm_memory_heap_max"]) * 100)}%, incoming rate (events/second for last minute): {perfdata["incoming_messages_rate_per_sec_one_minute"]["value"]}, connections: {perfdata["connections"]["value"]}, total network in: {human_readable_size(perfdata["network_in_total"]["value"], decimal_places=0)}' + '\n' + notif_str
|
||||||
|
|
||||||
print_icinga2_check_status(text_result, exit_code, perfdata)
|
print_icinga2_check_status(text_result, exit_code, perfdata)
|
||||||
sys.exit(exit_code)
|
sys.exit(exit_code)
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
from math import log2, log10
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
from hurry.filesize import size
|
from hurry.filesize import size
|
||||||
|
|
||||||
|
|
||||||
|
@ -18,3 +21,39 @@ def filesize(bytes: int, spaces: bool = True, formatter: bool = True):
|
||||||
return x
|
return x
|
||||||
else:
|
else:
|
||||||
return x.replace(' ', '')
|
return x.replace(' ', '')
|
||||||
|
|
||||||
|
|
||||||
|
def human_readable_size(size: Union[int, float], bits=False, decimal_places: int = 2, base: int = 10):
|
||||||
|
# Define the units
|
||||||
|
units = {False: {2: ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'],
|
||||||
|
10: ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']},
|
||||||
|
True: {2: ['bits', 'Kib', 'Mib', 'Gib', 'Tib', 'Pib', 'Eib', 'Zib', 'Yib'],
|
||||||
|
10: ['bits', 'Kb', 'Mb', 'Gb', 'Tb', 'Pb', 'Eb', 'Zb', 'Yb']}}
|
||||||
|
|
||||||
|
# Convert bytes to bits if needed
|
||||||
|
if bits:
|
||||||
|
size *= 8
|
||||||
|
|
||||||
|
# Determine the unit
|
||||||
|
if size == 0:
|
||||||
|
return '0 ' + units[bits][base][0]
|
||||||
|
else:
|
||||||
|
if base == 2:
|
||||||
|
log = int(log2(size))
|
||||||
|
exp = log // 10
|
||||||
|
elif base == 10:
|
||||||
|
log = int(log10(size))
|
||||||
|
exp = log // 3
|
||||||
|
else:
|
||||||
|
raise ValueError("Invalid base. Use either 2 or 10.")
|
||||||
|
|
||||||
|
if exp >= len(units[bits][base]):
|
||||||
|
exp = len(units[bits][base]) - 1
|
||||||
|
size /= base ** (exp * (10 if base == 2 else 3))
|
||||||
|
|
||||||
|
if decimal_places == 0:
|
||||||
|
size = int(size)
|
||||||
|
else:
|
||||||
|
round(size, decimal_places)
|
||||||
|
|
||||||
|
return f'{size} {units[bits][base][exp]}'
|
||||||
|
|
Loading…
Reference in New Issue