check_greylog: alert notifications, better metric names, better filesizes
This commit is contained in:
parent
96fc88737e
commit
d431bd5d7a
|
@ -10,6 +10,7 @@ from urllib3.exceptions import InsecureRequestWarning
|
|||
import checker.nagios
|
||||
from checker import print_icinga2_check_status
|
||||
from checker.linuxfabric.base import get_state
|
||||
from checker.units import human_readable_size
|
||||
|
||||
|
||||
def main():
|
||||
|
@ -20,6 +21,7 @@ def main():
|
|||
parser.add_argument('--warn-mem', type=int, default=75, help='Percentage of JVM memory used for warm')
|
||||
parser.add_argument('--crit-mem', type=int, default=100, help='Percentage of JVM memory used for critical')
|
||||
parser.add_argument('--insecure', action='store_false', help="Don't verify SSL")
|
||||
parser.add_argument('--crit-notif', action='store_true', help='Return critical when there are notifications')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.insecure:
|
||||
|
@ -78,27 +80,19 @@ def main():
|
|||
'.').replace('-', '_').replace('.', '_')
|
||||
value = None
|
||||
if 'value' in metric['metric']:
|
||||
# perfdata.append(f'{name}={metric["metric"]["value"]}')
|
||||
value = metric["metric"]["value"]
|
||||
elif 'count' in metric['metric']:
|
||||
# perfdata.append(f'{name}={metric["metric"]["count"]}')
|
||||
value = metric["metric"]["count"]
|
||||
elif 'rate' in metric['metric']:
|
||||
# perfdata.append(f'{name}_total={metric["metric"]["rate"]["total"]}')
|
||||
# perfdata.append(f'{name}_mean={metric["metric"]["rate"]["mean"]}')
|
||||
# perfdata.append(f'{name}_five_minute={metric["metric"]["rate"]["five_minute"]}')
|
||||
# perfdata.append(f'{name}_fifteen_minute={metric["metric"]["rate"]["fifteen_minute"]}')
|
||||
# perfdata.append(f'{name}_one_minute={metric["metric"]["rate"]["one_minute"]}')
|
||||
value = metric["metric"]["rate"]["one_minute"]
|
||||
name = f'{name}_one_minute'
|
||||
# if isinstance(value, float):
|
||||
# value = round(value, 1)
|
||||
value = int(value)
|
||||
metrics_data[name] = value
|
||||
|
||||
jvm_mem_usage_warn = int(metrics_data['jvm_memory_heap_max'] / int(100 / args.warn_mem))
|
||||
jvm_mem_usage_crit = int(metrics_data['jvm_memory_heap_max'] / int(100 / args.crit_mem))
|
||||
|
||||
# Some metric names are changed for better readability
|
||||
perfdata = {
|
||||
'throughput_input_1_sec_rate': {
|
||||
'value': int(metrics_data['throughput_input_1_sec_rate']),
|
||||
|
@ -108,28 +102,34 @@ def main():
|
|||
'value': int(metrics_data['throughput_output_1_sec_rate']),
|
||||
'min': 0,
|
||||
},
|
||||
'incoming_messages_one_minute': {
|
||||
'incoming_messages_rate_per_sec_one_minute': {
|
||||
'value': metrics_data['incomingMessages_one_minute'],
|
||||
'min': 0,
|
||||
},
|
||||
'open_connections': {
|
||||
'connections': {
|
||||
'value': metrics_data['open_connections'],
|
||||
'min': 0,
|
||||
},
|
||||
'total_connections': {
|
||||
'value': metrics_data['total_connections'],
|
||||
'min': 0,
|
||||
},
|
||||
'written_bytes_1sec': {
|
||||
'network_out_total_1sec': {
|
||||
'value': metrics_data['written_bytes_1sec'],
|
||||
'min': 0,
|
||||
'unit': 'B',
|
||||
},
|
||||
'read_bytes_1sec': {
|
||||
'network_out_total_total': {
|
||||
'value': metrics_data['written_bytes_total'],
|
||||
'min': 0,
|
||||
'unit': 'B',
|
||||
},
|
||||
'network_in_1sec': {
|
||||
'value': metrics_data['read_bytes_1sec'],
|
||||
'min': 0,
|
||||
'unit': 'B',
|
||||
},
|
||||
'network_in_total': {
|
||||
'value': metrics_data['read_bytes_total'],
|
||||
'min': 0,
|
||||
'unit': 'B',
|
||||
},
|
||||
'entries_uncommitted': {
|
||||
'value': metrics_data['journal_entries_uncommitted'],
|
||||
'min': 0,
|
||||
|
@ -173,9 +173,15 @@ def main():
|
|||
if jvm_mem_usage_state != checker.nagios.STATE_OK:
|
||||
text_result += f' JVM memory usage is high!'
|
||||
|
||||
text_result = text_result + f' JVM memory usage: {int((perfdata["jvm_memory_used"]["value"] / metrics_data["jvm_memory_heap_max"]) * 100)}%, incoming_messages_one_minute: {perfdata["incoming_messages_one_minute"]["value"]}, open_connections: {perfdata["open_connections"]["value"]}' + '\n' + notif_str
|
||||
|
||||
exit_code = max(checker.nagios.STATE_OK, jvm_mem_usage_state)
|
||||
|
||||
if notifications['total'] > 0:
|
||||
text_result += f' There are notifications!'
|
||||
if args.crit_notif:
|
||||
exit_code = checker.nagios.STATE_CRIT
|
||||
|
||||
text_result = text_result + f' JVM memory usage: {int((perfdata["jvm_memory_used"]["value"] / metrics_data["jvm_memory_heap_max"]) * 100)}%, incoming rate (events/second for last minute): {perfdata["incoming_messages_rate_per_sec_one_minute"]["value"]}, connections: {perfdata["connections"]["value"]}, total network in: {human_readable_size(perfdata["network_in_total"]["value"], decimal_places=0)}' + '\n' + notif_str
|
||||
|
||||
print_icinga2_check_status(text_result, exit_code, perfdata)
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
from math import log2, log10
|
||||
from typing import Union
|
||||
|
||||
from hurry.filesize import size
|
||||
|
||||
|
||||
|
@ -18,3 +21,39 @@ def filesize(bytes: int, spaces: bool = True, formatter: bool = True):
|
|||
return x
|
||||
else:
|
||||
return x.replace(' ', '')
|
||||
|
||||
|
||||
def human_readable_size(size: Union[int, float], bits=False, decimal_places: int = 2, base: int = 10):
|
||||
# Define the units
|
||||
units = {False: {2: ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'],
|
||||
10: ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']},
|
||||
True: {2: ['bits', 'Kib', 'Mib', 'Gib', 'Tib', 'Pib', 'Eib', 'Zib', 'Yib'],
|
||||
10: ['bits', 'Kb', 'Mb', 'Gb', 'Tb', 'Pb', 'Eb', 'Zb', 'Yb']}}
|
||||
|
||||
# Convert bytes to bits if needed
|
||||
if bits:
|
||||
size *= 8
|
||||
|
||||
# Determine the unit
|
||||
if size == 0:
|
||||
return '0 ' + units[bits][base][0]
|
||||
else:
|
||||
if base == 2:
|
||||
log = int(log2(size))
|
||||
exp = log // 10
|
||||
elif base == 10:
|
||||
log = int(log10(size))
|
||||
exp = log // 3
|
||||
else:
|
||||
raise ValueError("Invalid base. Use either 2 or 10.")
|
||||
|
||||
if exp >= len(units[bits][base]):
|
||||
exp = len(units[bits][base]) - 1
|
||||
size /= base ** (exp * (10 if base == 2 else 3))
|
||||
|
||||
if decimal_places == 0:
|
||||
size = int(size)
|
||||
else:
|
||||
round(size, decimal_places)
|
||||
|
||||
return f'{size} {units[bits][base][exp]}'
|
||||
|
|
Loading…
Reference in New Issue