check_zfs_zpool: fix str and float issues, fix zpool listing

check_iowait_proc: add missing package notifications
check_coturn: add
This commit is contained in:
Cyberes 2023-05-30 12:38:25 -06:00
parent 465b9ea3a9
commit 29c09666e8
3 changed files with 142 additions and 28 deletions

79
check_coturn.sh Executable file
View File

@ -0,0 +1,79 @@
#!/bin/bash
SERVER_ADDRESS=""
SECRET=""
REALM=""
PORT=3478
PROTOCOL="udp"
TIMEOUT=10
while getopts "s:S:u:r:p:P:t:" opt; do
case $opt in
s)
SERVER_ADDRESS="$OPTARG"
;;
S)
SECRET="$OPTARG"
;;
r)
REALM="$OPTARG"
;;
p)
PORT="$OPTARG"
;;
P)
PROTOCOL="$OPTARG"
;;
t)
TIMEOUT="$OPTARG"
;;
*)
echo "Usage: $0 -s SERVER_ADDRESS -S SECRET -r REALM [-p PORT] [-P PROTOCOL] [-t TIMEOUT]"
exit 1
;;
esac
done
# Check if required options are set
if [[ -z "$SERVER_ADDRESS" || -z "$SECRET" || -z "$REALM" ]]; then
echo "Usage: $0 -s SERVER_ADDRESS -S SECRET -r REALM [-p PORT] [-P PROTOCOL] [-t TIMEOUT]"
exit 1
fi
if ! command -v turnutils_uclient &>/dev/null; then
echo "UNKNOWN - turnutils_uclient not found! Please install coturn"
exit -1
fi
if ! command -v stun &>/dev/null; then
echo "UNKNWON - stun not found! Please install stun-client"
exit -1
fi
# Fetch the user's public IP using the coturn server as a STUN server
PEER_ADDRESS=$(stun "$SERVER_ADDRESS" -p "$PORT" -v 1 2>&1 | grep "MappedAddress" | awk -F'[ =:]+' '{print $2}')
if [[ -z "$PEER_ADDRESS" ]]; then
echo "UNKNOWN Failed to fetch the user's public IP using the coturn server as a STUN server."
exit 1
fi
TURNUTILS_OUTPUT=$(turnutils_uclient -s -W "$SECRET" -r "$REALM" -p "$PORT" -e "$PEER_ADDRESS" -B -y "$SERVER_ADDRESS" 2>&1)
if [ $? -eq 0 ]; then
# TOT_SEND_BYTES=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "start_mclient: tot_send_bytes" | awk -F'[~ ,]+' '{print $5}')
# TOT_RECV_BYTES=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "start_mclient: tot_send_bytes" | awk -F'[~ ,]+' '{print $7}')
LOST_PACKETS=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "Total lost packets" | awk -F'[(%)]' '{print $2"%"}' | tr -d '%' | cut -d. -f1)
SEND_DROPPED=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "Total lost packets" | awk -F'[(%)]' '{print $5"%"}' | tr -d '%' | cut -d. -f1)
AVG_RTT=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "Average round trip delay" | awk '{print $7}' | cut -d. -f1)
AVG_JITTER=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "Average jitter" | awk '{print $5}' | cut -d. -f1)
echo "OK - connected to TURN server $SERVER_ADDRESS | lost_packets_percent=$LOST_PACKETS send_dropped_percent=$SEND_DROPPED avg_rtt=${AVG_RTT}ms avg_jitter=${AVG_JITTER}ms"
exit 0
else
echo "CRITICAL - failed to connect to TURN server:"
echo "$TURNUTILS_OUTPUT"
exit 2
fi

View File

@ -20,15 +20,13 @@ while getopts "w:c:" opt; do
done
if ! command -v iostat &>/dev/null; then
echo "iostat not found! Please install sysstat:"
echo "sudo apt install sysstat"
exit 1
echo "UNKNOWN - iostat not found! Please install sysstat"
exit -1
fi
if ! command -v iotop &>/dev/null; then
echo "iotop not found! Please install sysstat:"
echo "sudo apt install iotop"
exit 1
echo "UNKNWON - iotop not found! Please install iotop"
exit -1
fi
# Get iowait value

View File

@ -26,8 +26,8 @@ def percent_to_float(percent_str: str):
return percent / 100
def float_to_percent(float_value: float):
percent = round(float_value * 100, 2)
def float_to_percent(float_value):
percent = round(float(float_value) * 100, 2)
return f"{percent}%"
@ -37,10 +37,26 @@ def clean_device_list(in_str: str):
def zpool_list(zpool: str, vdev_type: str, header: bool = False):
try:
if vdev_type == 'pool':
if not header:
return subprocess.check_output(f"zpool list -v {zpool} | awk '/{vdev_type}/ {{while(getline && substr($0, 1, 1) ~ /[[:blank:]]/) print}}'", shell=True, stderr=subprocess.PIPE).decode('utf-8')
# GPT-4's original awk command was this:
# awk '/logs/ || /cache/ {{exit}} /^[[:space:]]+[^[:space:]]/ || /^[[:space:]]{2,}ata-/'
return subprocess.check_output(
f"zpool list -v {zpool} | awk '/logs/ || /cache/ {{exit}} /^[[:space:]]+[^[:space:]]/'", shell=True,
stderr=subprocess.PIPE).decode('utf-8')
else:
return subprocess.check_output(f"zpool list -v {zpool} | awk 'NR==1 {{print}} /{vdev_type}/ {{while(getline && substr($0, 1, 1) ~ /[[:blank:]]/) print}}'", shell=True, stderr=subprocess.PIPE).decode('utf-8')
raise NotImplementedError('not implemented for pool')
elif vdev_type in ['cache', 'log']:
if not header:
return subprocess.check_output(
f"zpool list -v {zpool} | awk '/{vdev_type}/ {{while(getline && substr($0, 1, 1) ~ /[[:blank:]]/) print}}'",
shell=True, stderr=subprocess.PIPE).decode('utf-8')
else:
return subprocess.check_output(
f"zpool list -v {zpool} | awk 'NR==1 {{print}} /{vdev_type}/ {{while(getline && substr($0, 1, 1) ~ /[[:blank:]]/) print}}'",
shell=True, stderr=subprocess.PIPE).decode('utf-8')
else:
raise NotImplementedError
except subprocess.CalledProcessError as e:
print('UNKNOWN - failed to check pool:', e.stderr.decode(sys.getfilesystemencoding()))
sys.exit(nagios.UNKNOWN)
@ -76,6 +92,8 @@ def check_vdev_devices(vdev_devices: list, critical_free, warning_free, critical
states[device['device']] = state
return critical, warning, states
def is_dash(string:str):
return string == '-'
def get_vdev_info(zpool: str, vdev_type: str):
output_zpool_logs = zpool_list(zpool, vdev_type)
@ -83,6 +101,7 @@ def get_vdev_info(zpool: str, vdev_type: str):
for line in list(filter(None, output_zpool_logs.split('\n'))):
data = list(filter(None, clean_device_list(line).split(' ')))
zpool_vdev_devices.append({
'pool': not (is_dash(data[2]) and is_dash(data[3]) and is_dash(data[6]) and is_dash(data[7])),
'device': data[0],
'size': data[1],
'alloc': data[2],
@ -117,10 +136,14 @@ def main():
parser = argparse.ArgumentParser(description='Check ZFS pool status')
parser.add_argument('--pool-name', required=True, help='Name of the ZFS pool to check.')
parser.add_argument('--check-type', required=True, choices=['status', 'cache', 'log'], help='What to check.')
parser.add_argument('--warning-free', type=int, default=65, help='Warning level for free space percentage (default: 65)')
parser.add_argument('--critical-free', type=int, default=80, help='Critical level for free space percentage (default: 80)')
parser.add_argument('--warning-frag', type=int, default=50, help='Warning level for fragmentation percentage (default: 50)')
parser.add_argument('--critical-frag', type=int, default=75, help='Critical level for fragmentation percentage (default: 75)')
parser.add_argument('--warning-free', type=int, default=65,
help='Warning level for free space percentage (default: 65)')
parser.add_argument('--critical-free', type=int, default=80,
help='Critical level for free space percentage (default: 80)')
parser.add_argument('--warning-frag', type=int, default=50,
help='Warning level for fragmentation percentage (default: 50)')
parser.add_argument('--critical-frag', type=int, default=75,
help='Critical level for fragmentation percentage (default: 75)')
args = parser.parse_args()
args.warning_free = percent_to_float(f'{args.warning_free}%')
@ -129,7 +152,7 @@ def main():
args.critical_frag = percent_to_float(f'{args.critical_frag}%')
if args.check_type == 'status':
vdev_devices = [x for x in get_vdev_info(args.pool_name, args.pool_name) if not x['device'].startswith('mirror-')]
vdev_devices = [x for x in get_vdev_info(args.pool_name, 'pool') if not x['pool']]
if not len(vdev_devices):
print('UNKNOWN - no devices found')
sys.exit(nagios.UNKNOWN)
@ -170,20 +193,24 @@ def main():
print('OK - pool', args.pool_name, 'is healthy')
# Build the table
critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free, args.critical_frag, args.warning_frag)
critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free,
args.critical_frag, args.warning_frag)
table_data = [
('Device', 'Size', 'Alloc', 'Free', 'Frag', 'Cap', 'Health', 'State'),
(args.pool_name, filesize(pool_status['size'], spaces=False, formatter=False), filesize(pool_status['allocated'], spaces=False, formatter=False), filesize(pool_status['free'], spaces=False, formatter=False), float_to_percent(pool_status['fragmentation']),
(args.pool_name, filesize(pool_status['size'], spaces=False, formatter=False),
filesize(pool_status['allocated'], spaces=False, formatter=False),
filesize(pool_status['free'], spaces=False, formatter=False),
float_to_percent(pool_status['fragmentation']),
float_to_percent(pool_status['capacity']),
pool_status['health'], f"[{('ok' if exit_code == nagios.OK else 'critical').upper()}]")
]
for device in vdev_devices:
for device in vdev_devices:
if isinstance(device['frag'], float):
device['frag'] = float_to_percent(device['frag'])
if isinstance(device['cap'], float):
device['cap'] = float_to_percent(device['cap'])
table_data.append((device['device'], device['size'], device['alloc'], device['free'], device['frag'], device['cap'], device['health'], states[device['device']]))
table_data.append((device['device'], device['size'], device['alloc'], device['free'], device['frag'],
device['cap'], device['health'], states[device['device']]))
print(list_to_markdown_table(table_data, align='left', seperator='!', borders=False))
sys.exit(exit_code)
@ -195,10 +222,16 @@ def main():
print('UNKNOWN - no devices found')
sys.exit(nagios.UNKNOWN)
table_data = [('Device', 'Size', 'Alloc', 'Free', 'Frag', 'Cap', 'Health', 'State')]
critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free, args.critical_frag, args.warning_frag)
critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free,
args.critical_frag, args.warning_frag)
for device in vdev_devices:
table_data.append((device['device'], device['size'], device['alloc'], device['free'], float_to_percent(device['frag']), float_to_percent(device['cap']), device['health'], states[device['device']]))
if device['frag'] != '-':
device['frag'] = float_to_percent(device['frag'])
if device['cap'] != '-':
device['cap'] = float_to_percent(device['cap'])
table_data.append((device['device'], device['size'], device['alloc'], device['free'], device['frag'],
device['cap'], device['health'], states[device['device']]))
exit_code = nagios.OK
out_str = None
@ -225,7 +258,9 @@ def main():
info_str = "shit's fucked"
crit_drives = crit_drives + critical['health']
issues.add('health')
out_str = ['CRITICAL', '-', info_str, f'for {"devices" if len(crit_drives) > 1 else "devices"} for {args.pool_name}:', ', '.join([*set(crit_drives)])]
out_str = ['CRITICAL', '-', info_str,
f'for {"devices" if len(crit_drives) > 1 else "devices"} for {args.pool_name}:',
', '.join([*set(crit_drives)])]
if len(warning['cap']) or len(warning['frag']) and not len(critical['health']):
if exit_code < nagios.WARNING:
exit_code = nagios.WARNING
@ -244,7 +279,9 @@ def main():
elif not len(warning['cap']) and len(warning['frag']):
info_str = 'critical fragmentation'
warn_drives = warning['frag']
out_str = ['WARNING', '-', info_str, f'for {"devices" if len(warn_drives) > 1 else "devices"} for {args.pool_name}:', ', '.join([*set(warn_drives)])]
out_str = ['WARNING', '-', info_str,
f'for {"devices" if len(warn_drives) > 1 else "devices"} for {args.pool_name}:',
', '.join([*set(warn_drives)])]
if not len(warn_drives) and not len(crit_drives):
out_str = ['OK', '-', f'{len(vdev_devices)} {args.check_type} devices for {args.pool_name} are healthy']