check_zfs_zpool: use capacity for perfdata

This commit is contained in:
Cyberes 2024-02-23 16:42:47 -07:00
parent c2f2b5f3db
commit 989d222392
1 changed files with 27 additions and 33 deletions

View File

@ -10,7 +10,6 @@ from checker import nagios, dict_to_perfdata
from checker.markdown import list_to_markdown_table from checker.markdown import list_to_markdown_table
from checker.nagios import state_to_txt from checker.nagios import state_to_txt
from checker.units import filesize from checker.units import filesize
from checker.zfs import zfs_get_free
# TODO: add perfdata # TODO: add perfdata
@ -138,24 +137,11 @@ def get_zpool_zfs_properties(pool_name: str):
return zfs_properties return zfs_properties
def main(): def main(args):
parser = argparse.ArgumentParser(description='Check ZFS pool status') warning_free_float = percent_to_float(f'{args.warning_free}%')
parser.add_argument('--pool-name', required=True, help='Name of the ZFS pool to check.') critical_free_float = percent_to_float(f'{args.critical_free}%')
parser.add_argument('--check-type', required=True, choices=['status', 'cache', 'log'], help='What to check.') warning_frag_float = percent_to_float(f'{args.warning_frag}%')
parser.add_argument('--warning-free', type=int, default=65, critical_frag_float = percent_to_float(f'{args.critical_frag}%')
help='Warning level for free space percentage (default: 65)')
parser.add_argument('--critical-free', type=int, default=80,
help='Critical level for free space percentage (default: 80)')
parser.add_argument('--warning-frag', type=int, default=50,
help='Warning level for fragmentation percentage (default: 50)')
parser.add_argument('--critical-frag', type=int, default=75,
help='Critical level for fragmentation percentage (default: 75)')
args = parser.parse_args()
args.warning_free = percent_to_float(f'{args.warning_free}%')
args.critical_free = percent_to_float(f'{args.critical_free}%')
args.warning_frag = percent_to_float(f'{args.warning_frag}%')
args.critical_frag = percent_to_float(f'{args.critical_frag}%')
if args.check_type == 'status': if args.check_type == 'status':
vdev_devices = [x for x in get_vdev_info(args.pool_name, 'pool') if not x['pool']] vdev_devices = [x for x in get_vdev_info(args.pool_name, 'pool') if not x['pool']]
@ -171,10 +157,10 @@ def main():
pool_status['fragmentation'] = percent_to_float(f"{pool_status['fragmentation']}%") pool_status['fragmentation'] = percent_to_float(f"{pool_status['fragmentation']}%")
# Check for critical # Check for critical
if pool_status['capacity'] >= args.critical_free: if pool_status['capacity'] >= critical_free_float:
exit_code = nagios.CRITICAL exit_code = nagios.CRITICAL
issues.append('capacity') issues.append('capacity')
elif pool_status['fragmentation'] >= args.critical_frag: elif pool_status['fragmentation'] >= critical_frag_float:
exit_code = nagios.CRITICAL exit_code = nagios.CRITICAL
issues.append('fragmentation') issues.append('fragmentation')
elif pool_status['health'] != 'ONLINE': elif pool_status['health'] != 'ONLINE':
@ -183,10 +169,10 @@ def main():
# Check for warnings # Check for warnings
if exit_code == nagios.OK: if exit_code == nagios.OK:
if pool_status['capacity'] >= args.warning_free: if pool_status['capacity'] >= warning_free_float:
exit_code = nagios.WARNING exit_code = nagios.WARNING
issues.append('capacity') issues.append('capacity')
elif pool_status['fragmentation'] >= args.warning_frag: elif pool_status['fragmentation'] >= warning_frag_float:
exit_code = nagios.WARNING exit_code = nagios.WARNING
issues.append('fragmentation') issues.append('fragmentation')
@ -199,8 +185,8 @@ def main():
print('OK - pool', args.pool_name, 'is healthy') print('OK - pool', args.pool_name, 'is healthy')
# Build the table # Build the table
critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free, critical, warning, states = check_vdev_devices(vdev_devices, critical_free_float, warning_free_float,
args.critical_frag, args.warning_frag) critical_frag_float, warning_frag_float)
table_data = [ table_data = [
('Device', 'Size', 'Alloc', 'Free', 'Frag', 'Cap', 'Health', 'State'), ('Device', 'Size', 'Alloc', 'Free', 'Frag', 'Cap', 'Health', 'State'),
(args.pool_name, filesize(pool_status['size'], spaces=False, formatter=False), (args.pool_name, filesize(pool_status['size'], spaces=False, formatter=False),
@ -218,16 +204,15 @@ def main():
table_data.append((device['device'], device['size'], device['alloc'], device['free'], device['frag'], table_data.append((device['device'], device['size'], device['alloc'], device['free'], device['frag'],
device['cap'], device['health'], states[device['device']])) device['cap'], device['health'], states[device['device']]))
zpool_size, zpool_free = zfs_get_free(args.pool_name)
perf_data = { perf_data = {
'free': { 'capacity': {
'value': zpool_size, 'warn': int(zpool_size * args.warning_free), 'crit': int(zpool_size * args.critical_free), 'min': 0, 'unit': 'GB' 'value': int(float(float_to_percent(pool_status['capacity']).strip('%'))), 'warn': args.warning_free, 'crit': args.critical_free, 'min': 0, 'unit': '%'
}, },
'size': { 'size': {
'value': zpool_free, 'warn': int(zpool_size * args.warning_free), 'crit': int(zpool_size * args.critical_free), 'min': 0, 'unit': 'GB' 'value': pool_status['size'], 'min': 0
}, },
'fragmentation': { 'fragmentation': {
'value': int(float(float_to_percent(pool_status['fragmentation']).strip('%'))), 'warn': float_to_percent(args.warning_frag), 'crit': float_to_percent(args.critical_frag), 'min': 0, 'unit': '%' 'value': int(float(float_to_percent(pool_status['fragmentation']).strip('%'))), 'warn': float_to_percent(warning_frag_float).strip('%'), 'crit': float_to_percent(critical_frag_float).strip('%'), 'min': 0, 'unit': '%'
} }
} }
perf_data_str = dict_to_perfdata(perf_data) perf_data_str = dict_to_perfdata(perf_data)
@ -242,8 +227,8 @@ def main():
print('UNKNOWN - no devices found') print('UNKNOWN - no devices found')
sys.exit(nagios.UNKNOWN) sys.exit(nagios.UNKNOWN)
table_data = [('Device', 'Size', 'Alloc', 'Free', 'Frag', 'Cap', 'Health', 'State')] table_data = [('Device', 'Size', 'Alloc', 'Free', 'Frag', 'Cap', 'Health', 'State')]
critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free, critical, warning, states = check_vdev_devices(vdev_devices, critical_free_float, warning_free_float,
args.critical_frag, args.warning_frag) critical_frag_float, warning_frag_float)
for device in vdev_devices: for device in vdev_devices:
if device['frag'] != '-': if device['frag'] != '-':
@ -312,8 +297,17 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Check ZFS pool status')
parser.add_argument('--pool-name', required=True, help='Name of the ZFS pool to check.')
parser.add_argument('--check-type', required=True, choices=['status', 'cache', 'log'], help='What to check.')
parser.add_argument('--warning-free', type=int, default=65, help='Warning level for free space percentage (default: 65)')
parser.add_argument('--critical-free', type=int, default=80, help='Critical level for free space percentage (default: 80)')
parser.add_argument('--warning-frag', type=int, default=50, help='Warning level for fragmentation percentage (default: 50)')
parser.add_argument('--critical-frag', type=int, default=75, help='Critical level for fragmentation percentage (default: 75)')
args = parser.parse_args()
try: try:
main() main(args)
except Exception as e: except Exception as e:
print(f'UNKNOWN: exception "{e}"') print(f'UNKNOWN: exception "{e}"')
import traceback import traceback