check_zfs_zpool: fix str and float issues, fix zpool listing
check_iowait_proc: add missing package notifications check_coturn: add
This commit is contained in:
parent
465b9ea3a9
commit
29c09666e8
|
@ -0,0 +1,79 @@
|
|||
#!/bin/bash
|
||||
|
||||
SERVER_ADDRESS=""
|
||||
SECRET=""
|
||||
REALM=""
|
||||
PORT=3478
|
||||
PROTOCOL="udp"
|
||||
TIMEOUT=10
|
||||
|
||||
while getopts "s:S:u:r:p:P:t:" opt; do
|
||||
case $opt in
|
||||
s)
|
||||
SERVER_ADDRESS="$OPTARG"
|
||||
;;
|
||||
S)
|
||||
SECRET="$OPTARG"
|
||||
;;
|
||||
r)
|
||||
REALM="$OPTARG"
|
||||
;;
|
||||
p)
|
||||
PORT="$OPTARG"
|
||||
;;
|
||||
P)
|
||||
PROTOCOL="$OPTARG"
|
||||
;;
|
||||
t)
|
||||
TIMEOUT="$OPTARG"
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 -s SERVER_ADDRESS -S SECRET -r REALM [-p PORT] [-P PROTOCOL] [-t TIMEOUT]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Check if required options are set
|
||||
if [[ -z "$SERVER_ADDRESS" || -z "$SECRET" || -z "$REALM" ]]; then
|
||||
echo "Usage: $0 -s SERVER_ADDRESS -S SECRET -r REALM [-p PORT] [-P PROTOCOL] [-t TIMEOUT]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v turnutils_uclient &>/dev/null; then
|
||||
echo "UNKNOWN - turnutils_uclient not found! Please install coturn"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
if ! command -v stun &>/dev/null; then
|
||||
echo "UNKNWON - stun not found! Please install stun-client"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
# Fetch the user's public IP using the coturn server as a STUN server
|
||||
PEER_ADDRESS=$(stun "$SERVER_ADDRESS" -p "$PORT" -v 1 2>&1 | grep "MappedAddress" | awk -F'[ =:]+' '{print $2}')
|
||||
|
||||
if [[ -z "$PEER_ADDRESS" ]]; then
|
||||
echo "UNKNOWN Failed to fetch the user's public IP using the coturn server as a STUN server."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TURNUTILS_OUTPUT=$(turnutils_uclient -s -W "$SECRET" -r "$REALM" -p "$PORT" -e "$PEER_ADDRESS" -B -y "$SERVER_ADDRESS" 2>&1)
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
# TOT_SEND_BYTES=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "start_mclient: tot_send_bytes" | awk -F'[~ ,]+' '{print $5}')
|
||||
# TOT_RECV_BYTES=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "start_mclient: tot_send_bytes" | awk -F'[~ ,]+' '{print $7}')
|
||||
|
||||
LOST_PACKETS=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "Total lost packets" | awk -F'[(%)]' '{print $2"%"}' | tr -d '%' | cut -d. -f1)
|
||||
SEND_DROPPED=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "Total lost packets" | awk -F'[(%)]' '{print $5"%"}' | tr -d '%' | cut -d. -f1)
|
||||
|
||||
AVG_RTT=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "Average round trip delay" | awk '{print $7}' | cut -d. -f1)
|
||||
AVG_JITTER=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "Average jitter" | awk '{print $5}' | cut -d. -f1)
|
||||
|
||||
echo "OK - connected to TURN server $SERVER_ADDRESS | lost_packets_percent=$LOST_PACKETS send_dropped_percent=$SEND_DROPPED avg_rtt=${AVG_RTT}ms avg_jitter=${AVG_JITTER}ms"
|
||||
exit 0
|
||||
else
|
||||
echo "CRITICAL - failed to connect to TURN server:"
|
||||
echo "$TURNUTILS_OUTPUT"
|
||||
exit 2
|
||||
fi
|
|
@ -20,15 +20,13 @@ while getopts "w:c:" opt; do
|
|||
done
|
||||
|
||||
if ! command -v iostat &>/dev/null; then
|
||||
echo "iostat not found! Please install sysstat:"
|
||||
echo "sudo apt install sysstat"
|
||||
exit 1
|
||||
echo "UNKNOWN - iostat not found! Please install sysstat"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
if ! command -v iotop &>/dev/null; then
|
||||
echo "iotop not found! Please install sysstat:"
|
||||
echo "sudo apt install iotop"
|
||||
exit 1
|
||||
echo "UNKNWON - iotop not found! Please install iotop"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
# Get iowait value
|
||||
|
|
|
@ -26,8 +26,8 @@ def percent_to_float(percent_str: str):
|
|||
return percent / 100
|
||||
|
||||
|
||||
def float_to_percent(float_value: float):
|
||||
percent = round(float_value * 100, 2)
|
||||
def float_to_percent(float_value):
|
||||
percent = round(float(float_value) * 100, 2)
|
||||
return f"{percent}%"
|
||||
|
||||
|
||||
|
@ -37,10 +37,26 @@ def clean_device_list(in_str: str):
|
|||
|
||||
def zpool_list(zpool: str, vdev_type: str, header: bool = False):
|
||||
try:
|
||||
if vdev_type == 'pool':
|
||||
if not header:
|
||||
return subprocess.check_output(f"zpool list -v {zpool} | awk '/{vdev_type}/ {{while(getline && substr($0, 1, 1) ~ /[[:blank:]]/) print}}'", shell=True, stderr=subprocess.PIPE).decode('utf-8')
|
||||
# GPT-4's original awk command was this:
|
||||
# awk '/logs/ || /cache/ {{exit}} /^[[:space:]]+[^[:space:]]/ || /^[[:space:]]{2,}ata-/'
|
||||
return subprocess.check_output(
|
||||
f"zpool list -v {zpool} | awk '/logs/ || /cache/ {{exit}} /^[[:space:]]+[^[:space:]]/'", shell=True,
|
||||
stderr=subprocess.PIPE).decode('utf-8')
|
||||
else:
|
||||
return subprocess.check_output(f"zpool list -v {zpool} | awk 'NR==1 {{print}} /{vdev_type}/ {{while(getline && substr($0, 1, 1) ~ /[[:blank:]]/) print}}'", shell=True, stderr=subprocess.PIPE).decode('utf-8')
|
||||
raise NotImplementedError('not implemented for pool')
|
||||
elif vdev_type in ['cache', 'log']:
|
||||
if not header:
|
||||
return subprocess.check_output(
|
||||
f"zpool list -v {zpool} | awk '/{vdev_type}/ {{while(getline && substr($0, 1, 1) ~ /[[:blank:]]/) print}}'",
|
||||
shell=True, stderr=subprocess.PIPE).decode('utf-8')
|
||||
else:
|
||||
return subprocess.check_output(
|
||||
f"zpool list -v {zpool} | awk 'NR==1 {{print}} /{vdev_type}/ {{while(getline && substr($0, 1, 1) ~ /[[:blank:]]/) print}}'",
|
||||
shell=True, stderr=subprocess.PIPE).decode('utf-8')
|
||||
else:
|
||||
raise NotImplementedError
|
||||
except subprocess.CalledProcessError as e:
|
||||
print('UNKNOWN - failed to check pool:', e.stderr.decode(sys.getfilesystemencoding()))
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
|
@ -76,6 +92,8 @@ def check_vdev_devices(vdev_devices: list, critical_free, warning_free, critical
|
|||
states[device['device']] = state
|
||||
return critical, warning, states
|
||||
|
||||
def is_dash(string:str):
|
||||
return string == '-'
|
||||
|
||||
def get_vdev_info(zpool: str, vdev_type: str):
|
||||
output_zpool_logs = zpool_list(zpool, vdev_type)
|
||||
|
@ -83,6 +101,7 @@ def get_vdev_info(zpool: str, vdev_type: str):
|
|||
for line in list(filter(None, output_zpool_logs.split('\n'))):
|
||||
data = list(filter(None, clean_device_list(line).split(' ')))
|
||||
zpool_vdev_devices.append({
|
||||
'pool': not (is_dash(data[2]) and is_dash(data[3]) and is_dash(data[6]) and is_dash(data[7])),
|
||||
'device': data[0],
|
||||
'size': data[1],
|
||||
'alloc': data[2],
|
||||
|
@ -117,10 +136,14 @@ def main():
|
|||
parser = argparse.ArgumentParser(description='Check ZFS pool status')
|
||||
parser.add_argument('--pool-name', required=True, help='Name of the ZFS pool to check.')
|
||||
parser.add_argument('--check-type', required=True, choices=['status', 'cache', 'log'], help='What to check.')
|
||||
parser.add_argument('--warning-free', type=int, default=65, help='Warning level for free space percentage (default: 65)')
|
||||
parser.add_argument('--critical-free', type=int, default=80, help='Critical level for free space percentage (default: 80)')
|
||||
parser.add_argument('--warning-frag', type=int, default=50, help='Warning level for fragmentation percentage (default: 50)')
|
||||
parser.add_argument('--critical-frag', type=int, default=75, help='Critical level for fragmentation percentage (default: 75)')
|
||||
parser.add_argument('--warning-free', type=int, default=65,
|
||||
help='Warning level for free space percentage (default: 65)')
|
||||
parser.add_argument('--critical-free', type=int, default=80,
|
||||
help='Critical level for free space percentage (default: 80)')
|
||||
parser.add_argument('--warning-frag', type=int, default=50,
|
||||
help='Warning level for fragmentation percentage (default: 50)')
|
||||
parser.add_argument('--critical-frag', type=int, default=75,
|
||||
help='Critical level for fragmentation percentage (default: 75)')
|
||||
args = parser.parse_args()
|
||||
|
||||
args.warning_free = percent_to_float(f'{args.warning_free}%')
|
||||
|
@ -129,7 +152,7 @@ def main():
|
|||
args.critical_frag = percent_to_float(f'{args.critical_frag}%')
|
||||
|
||||
if args.check_type == 'status':
|
||||
vdev_devices = [x for x in get_vdev_info(args.pool_name, args.pool_name) if not x['device'].startswith('mirror-')]
|
||||
vdev_devices = [x for x in get_vdev_info(args.pool_name, 'pool') if not x['pool']]
|
||||
if not len(vdev_devices):
|
||||
print('UNKNOWN - no devices found')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
|
@ -170,20 +193,24 @@ def main():
|
|||
print('OK - pool', args.pool_name, 'is healthy')
|
||||
|
||||
# Build the table
|
||||
critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free, args.critical_frag, args.warning_frag)
|
||||
critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free,
|
||||
args.critical_frag, args.warning_frag)
|
||||
table_data = [
|
||||
('Device', 'Size', 'Alloc', 'Free', 'Frag', 'Cap', 'Health', 'State'),
|
||||
(args.pool_name, filesize(pool_status['size'], spaces=False, formatter=False), filesize(pool_status['allocated'], spaces=False, formatter=False), filesize(pool_status['free'], spaces=False, formatter=False), float_to_percent(pool_status['fragmentation']),
|
||||
(args.pool_name, filesize(pool_status['size'], spaces=False, formatter=False),
|
||||
filesize(pool_status['allocated'], spaces=False, formatter=False),
|
||||
filesize(pool_status['free'], spaces=False, formatter=False),
|
||||
float_to_percent(pool_status['fragmentation']),
|
||||
float_to_percent(pool_status['capacity']),
|
||||
pool_status['health'], f"[{('ok' if exit_code == nagios.OK else 'critical').upper()}]")
|
||||
]
|
||||
for device in vdev_devices:
|
||||
for device in vdev_devices:
|
||||
if isinstance(device['frag'], float):
|
||||
device['frag'] = float_to_percent(device['frag'])
|
||||
if isinstance(device['cap'], float):
|
||||
device['cap'] = float_to_percent(device['cap'])
|
||||
table_data.append((device['device'], device['size'], device['alloc'], device['free'], device['frag'], device['cap'], device['health'], states[device['device']]))
|
||||
table_data.append((device['device'], device['size'], device['alloc'], device['free'], device['frag'],
|
||||
device['cap'], device['health'], states[device['device']]))
|
||||
|
||||
print(list_to_markdown_table(table_data, align='left', seperator='!', borders=False))
|
||||
sys.exit(exit_code)
|
||||
|
@ -195,10 +222,16 @@ def main():
|
|||
print('UNKNOWN - no devices found')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
table_data = [('Device', 'Size', 'Alloc', 'Free', 'Frag', 'Cap', 'Health', 'State')]
|
||||
critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free, args.critical_frag, args.warning_frag)
|
||||
critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free,
|
||||
args.critical_frag, args.warning_frag)
|
||||
|
||||
for device in vdev_devices:
|
||||
table_data.append((device['device'], device['size'], device['alloc'], device['free'], float_to_percent(device['frag']), float_to_percent(device['cap']), device['health'], states[device['device']]))
|
||||
if device['frag'] != '-':
|
||||
device['frag'] = float_to_percent(device['frag'])
|
||||
if device['cap'] != '-':
|
||||
device['cap'] = float_to_percent(device['cap'])
|
||||
table_data.append((device['device'], device['size'], device['alloc'], device['free'], device['frag'],
|
||||
device['cap'], device['health'], states[device['device']]))
|
||||
|
||||
exit_code = nagios.OK
|
||||
out_str = None
|
||||
|
@ -225,7 +258,9 @@ def main():
|
|||
info_str = "shit's fucked"
|
||||
crit_drives = crit_drives + critical['health']
|
||||
issues.add('health')
|
||||
out_str = ['CRITICAL', '-', info_str, f'for {"devices" if len(crit_drives) > 1 else "devices"} for {args.pool_name}:', ', '.join([*set(crit_drives)])]
|
||||
out_str = ['CRITICAL', '-', info_str,
|
||||
f'for {"devices" if len(crit_drives) > 1 else "devices"} for {args.pool_name}:',
|
||||
', '.join([*set(crit_drives)])]
|
||||
if len(warning['cap']) or len(warning['frag']) and not len(critical['health']):
|
||||
if exit_code < nagios.WARNING:
|
||||
exit_code = nagios.WARNING
|
||||
|
@ -244,7 +279,9 @@ def main():
|
|||
elif not len(warning['cap']) and len(warning['frag']):
|
||||
info_str = 'critical fragmentation'
|
||||
warn_drives = warning['frag']
|
||||
out_str = ['WARNING', '-', info_str, f'for {"devices" if len(warn_drives) > 1 else "devices"} for {args.pool_name}:', ', '.join([*set(warn_drives)])]
|
||||
out_str = ['WARNING', '-', info_str,
|
||||
f'for {"devices" if len(warn_drives) > 1 else "devices"} for {args.pool_name}:',
|
||||
', '.join([*set(warn_drives)])]
|
||||
|
||||
if not len(warn_drives) and not len(crit_drives):
|
||||
out_str = ['OK', '-', f'{len(vdev_devices)} {args.check_type} devices for {args.pool_name} are healthy']
|
||||
|
|
Loading…
Reference in New Issue