From 29c09666e8442051838b6946c042238119a35b74 Mon Sep 17 00:00:00 2001 From: Drake Panzer Date: Tue, 30 May 2023 12:38:25 -0600 Subject: [PATCH] check_zfs_zpool: fix str and float issues, fix zpool listing check_iowait_proc: add missing package notifications check_coturn: add --- check_coturn.sh | 79 ++++++++++++++++++++++++++++++++++++++++++ check_iowait_proc.sh | 10 +++--- check_zfs_zpool.py | 81 ++++++++++++++++++++++++++++++++------------ 3 files changed, 142 insertions(+), 28 deletions(-) create mode 100755 check_coturn.sh diff --git a/check_coturn.sh b/check_coturn.sh new file mode 100755 index 0000000..bc5d835 --- /dev/null +++ b/check_coturn.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +SERVER_ADDRESS="" +SECRET="" +REALM="" +PORT=3478 +PROTOCOL="udp" +TIMEOUT=10 + +while getopts "s:S:u:r:p:P:t:" opt; do + case $opt in + s) + SERVER_ADDRESS="$OPTARG" + ;; + S) + SECRET="$OPTARG" + ;; + r) + REALM="$OPTARG" + ;; + p) + PORT="$OPTARG" + ;; + P) + PROTOCOL="$OPTARG" + ;; + t) + TIMEOUT="$OPTARG" + ;; + *) + echo "Usage: $0 -s SERVER_ADDRESS -S SECRET -r REALM [-p PORT] [-P PROTOCOL] [-t TIMEOUT]" + exit 1 + ;; + esac +done + +# Check if required options are set +if [[ -z "$SERVER_ADDRESS" || -z "$SECRET" || -z "$REALM" ]]; then + echo "Usage: $0 -s SERVER_ADDRESS -S SECRET -r REALM [-p PORT] [-P PROTOCOL] [-t TIMEOUT]" + exit 1 +fi + +if ! command -v turnutils_uclient &>/dev/null; then + echo "UNKNOWN - turnutils_uclient not found! Please install coturn" + exit -1 +fi + +if ! command -v stun &>/dev/null; then + echo "UNKNWON - stun not found! Please install stun-client" + exit -1 +fi + +# Fetch the user's public IP using the coturn server as a STUN server +PEER_ADDRESS=$(stun "$SERVER_ADDRESS" -p "$PORT" -v 1 2>&1 | grep "MappedAddress" | awk -F'[ =:]+' '{print $2}') + +if [[ -z "$PEER_ADDRESS" ]]; then + echo "UNKNOWN Failed to fetch the user's public IP using the coturn server as a STUN server." + exit 1 +fi + +TURNUTILS_OUTPUT=$(turnutils_uclient -s -W "$SECRET" -r "$REALM" -p "$PORT" -e "$PEER_ADDRESS" -B -y "$SERVER_ADDRESS" 2>&1) + +if [ $? -eq 0 ]; then + # TOT_SEND_BYTES=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "start_mclient: tot_send_bytes" | awk -F'[~ ,]+' '{print $5}') + # TOT_RECV_BYTES=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "start_mclient: tot_send_bytes" | awk -F'[~ ,]+' '{print $7}') + + LOST_PACKETS=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "Total lost packets" | awk -F'[(%)]' '{print $2"%"}' | tr -d '%' | cut -d. -f1) + SEND_DROPPED=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "Total lost packets" | awk -F'[(%)]' '{print $5"%"}' | tr -d '%' | cut -d. -f1) + + AVG_RTT=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "Average round trip delay" | awk '{print $7}' | cut -d. -f1) + AVG_JITTER=$(echo "$TURNUTILS_OUTPUT" | grep -m1 "Average jitter" | awk '{print $5}' | cut -d. -f1) + + echo "OK - connected to TURN server $SERVER_ADDRESS | lost_packets_percent=$LOST_PACKETS send_dropped_percent=$SEND_DROPPED avg_rtt=${AVG_RTT}ms avg_jitter=${AVG_JITTER}ms" + exit 0 +else + echo "CRITICAL - failed to connect to TURN server:" + echo "$TURNUTILS_OUTPUT" + exit 2 +fi diff --git a/check_iowait_proc.sh b/check_iowait_proc.sh index 79db965..135ae8a 100755 --- a/check_iowait_proc.sh +++ b/check_iowait_proc.sh @@ -20,15 +20,13 @@ while getopts "w:c:" opt; do done if ! command -v iostat &>/dev/null; then - echo "iostat not found! Please install sysstat:" - echo "sudo apt install sysstat" - exit 1 + echo "UNKNOWN - iostat not found! Please install sysstat" + exit -1 fi if ! command -v iotop &>/dev/null; then - echo "iotop not found! Please install sysstat:" - echo "sudo apt install iotop" - exit 1 + echo "UNKNWON - iotop not found! Please install iotop" + exit -1 fi # Get iowait value diff --git a/check_zfs_zpool.py b/check_zfs_zpool.py index b7aaae6..9b8db89 100755 --- a/check_zfs_zpool.py +++ b/check_zfs_zpool.py @@ -26,8 +26,8 @@ def percent_to_float(percent_str: str): return percent / 100 -def float_to_percent(float_value: float): - percent = round(float_value * 100, 2) +def float_to_percent(float_value): + percent = round(float(float_value) * 100, 2) return f"{percent}%" @@ -37,10 +37,26 @@ def clean_device_list(in_str: str): def zpool_list(zpool: str, vdev_type: str, header: bool = False): try: - if not header: - return subprocess.check_output(f"zpool list -v {zpool} | awk '/{vdev_type}/ {{while(getline && substr($0, 1, 1) ~ /[[:blank:]]/) print}}'", shell=True, stderr=subprocess.PIPE).decode('utf-8') + if vdev_type == 'pool': + if not header: + # GPT-4's original awk command was this: + # awk '/logs/ || /cache/ {{exit}} /^[[:space:]]+[^[:space:]]/ || /^[[:space:]]{2,}ata-/' + return subprocess.check_output( + f"zpool list -v {zpool} | awk '/logs/ || /cache/ {{exit}} /^[[:space:]]+[^[:space:]]/'", shell=True, + stderr=subprocess.PIPE).decode('utf-8') + else: + raise NotImplementedError('not implemented for pool') + elif vdev_type in ['cache', 'log']: + if not header: + return subprocess.check_output( + f"zpool list -v {zpool} | awk '/{vdev_type}/ {{while(getline && substr($0, 1, 1) ~ /[[:blank:]]/) print}}'", + shell=True, stderr=subprocess.PIPE).decode('utf-8') + else: + return subprocess.check_output( + f"zpool list -v {zpool} | awk 'NR==1 {{print}} /{vdev_type}/ {{while(getline && substr($0, 1, 1) ~ /[[:blank:]]/) print}}'", + shell=True, stderr=subprocess.PIPE).decode('utf-8') else: - return subprocess.check_output(f"zpool list -v {zpool} | awk 'NR==1 {{print}} /{vdev_type}/ {{while(getline && substr($0, 1, 1) ~ /[[:blank:]]/) print}}'", shell=True, stderr=subprocess.PIPE).decode('utf-8') + raise NotImplementedError except subprocess.CalledProcessError as e: print('UNKNOWN - failed to check pool:', e.stderr.decode(sys.getfilesystemencoding())) sys.exit(nagios.UNKNOWN) @@ -76,6 +92,8 @@ def check_vdev_devices(vdev_devices: list, critical_free, warning_free, critical states[device['device']] = state return critical, warning, states +def is_dash(string:str): + return string == '-' def get_vdev_info(zpool: str, vdev_type: str): output_zpool_logs = zpool_list(zpool, vdev_type) @@ -83,6 +101,7 @@ def get_vdev_info(zpool: str, vdev_type: str): for line in list(filter(None, output_zpool_logs.split('\n'))): data = list(filter(None, clean_device_list(line).split(' '))) zpool_vdev_devices.append({ + 'pool': not (is_dash(data[2]) and is_dash(data[3]) and is_dash(data[6]) and is_dash(data[7])), 'device': data[0], 'size': data[1], 'alloc': data[2], @@ -117,10 +136,14 @@ def main(): parser = argparse.ArgumentParser(description='Check ZFS pool status') parser.add_argument('--pool-name', required=True, help='Name of the ZFS pool to check.') parser.add_argument('--check-type', required=True, choices=['status', 'cache', 'log'], help='What to check.') - parser.add_argument('--warning-free', type=int, default=65, help='Warning level for free space percentage (default: 65)') - parser.add_argument('--critical-free', type=int, default=80, help='Critical level for free space percentage (default: 80)') - parser.add_argument('--warning-frag', type=int, default=50, help='Warning level for fragmentation percentage (default: 50)') - parser.add_argument('--critical-frag', type=int, default=75, help='Critical level for fragmentation percentage (default: 75)') + parser.add_argument('--warning-free', type=int, default=65, + help='Warning level for free space percentage (default: 65)') + parser.add_argument('--critical-free', type=int, default=80, + help='Critical level for free space percentage (default: 80)') + parser.add_argument('--warning-frag', type=int, default=50, + help='Warning level for fragmentation percentage (default: 50)') + parser.add_argument('--critical-frag', type=int, default=75, + help='Critical level for fragmentation percentage (default: 75)') args = parser.parse_args() args.warning_free = percent_to_float(f'{args.warning_free}%') @@ -129,7 +152,7 @@ def main(): args.critical_frag = percent_to_float(f'{args.critical_frag}%') if args.check_type == 'status': - vdev_devices = [x for x in get_vdev_info(args.pool_name, args.pool_name) if not x['device'].startswith('mirror-')] + vdev_devices = [x for x in get_vdev_info(args.pool_name, 'pool') if not x['pool']] if not len(vdev_devices): print('UNKNOWN - no devices found') sys.exit(nagios.UNKNOWN) @@ -170,20 +193,24 @@ def main(): print('OK - pool', args.pool_name, 'is healthy') # Build the table - critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free, args.critical_frag, args.warning_frag) + critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free, + args.critical_frag, args.warning_frag) table_data = [ ('Device', 'Size', 'Alloc', 'Free', 'Frag', 'Cap', 'Health', 'State'), - (args.pool_name, filesize(pool_status['size'], spaces=False, formatter=False), filesize(pool_status['allocated'], spaces=False, formatter=False), filesize(pool_status['free'], spaces=False, formatter=False), float_to_percent(pool_status['fragmentation']), + (args.pool_name, filesize(pool_status['size'], spaces=False, formatter=False), + filesize(pool_status['allocated'], spaces=False, formatter=False), + filesize(pool_status['free'], spaces=False, formatter=False), + float_to_percent(pool_status['fragmentation']), float_to_percent(pool_status['capacity']), pool_status['health'], f"[{('ok' if exit_code == nagios.OK else 'critical').upper()}]") ] for device in vdev_devices: - for device in vdev_devices: - if isinstance(device['frag'], float): - device['frag'] = float_to_percent(device['frag']) - if isinstance(device['cap'], float): - device['cap'] = float_to_percent(device['cap']) - table_data.append((device['device'], device['size'], device['alloc'], device['free'], device['frag'], device['cap'], device['health'], states[device['device']])) + if isinstance(device['frag'], float): + device['frag'] = float_to_percent(device['frag']) + if isinstance(device['cap'], float): + device['cap'] = float_to_percent(device['cap']) + table_data.append((device['device'], device['size'], device['alloc'], device['free'], device['frag'], + device['cap'], device['health'], states[device['device']])) print(list_to_markdown_table(table_data, align='left', seperator='!', borders=False)) sys.exit(exit_code) @@ -195,10 +222,16 @@ def main(): print('UNKNOWN - no devices found') sys.exit(nagios.UNKNOWN) table_data = [('Device', 'Size', 'Alloc', 'Free', 'Frag', 'Cap', 'Health', 'State')] - critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free, args.critical_frag, args.warning_frag) + critical, warning, states = check_vdev_devices(vdev_devices, args.critical_free, args.warning_free, + args.critical_frag, args.warning_frag) for device in vdev_devices: - table_data.append((device['device'], device['size'], device['alloc'], device['free'], float_to_percent(device['frag']), float_to_percent(device['cap']), device['health'], states[device['device']])) + if device['frag'] != '-': + device['frag'] = float_to_percent(device['frag']) + if device['cap'] != '-': + device['cap'] = float_to_percent(device['cap']) + table_data.append((device['device'], device['size'], device['alloc'], device['free'], device['frag'], + device['cap'], device['health'], states[device['device']])) exit_code = nagios.OK out_str = None @@ -225,7 +258,9 @@ def main(): info_str = "shit's fucked" crit_drives = crit_drives + critical['health'] issues.add('health') - out_str = ['CRITICAL', '-', info_str, f'for {"devices" if len(crit_drives) > 1 else "devices"} for {args.pool_name}:', ', '.join([*set(crit_drives)])] + out_str = ['CRITICAL', '-', info_str, + f'for {"devices" if len(crit_drives) > 1 else "devices"} for {args.pool_name}:', + ', '.join([*set(crit_drives)])] if len(warning['cap']) or len(warning['frag']) and not len(critical['health']): if exit_code < nagios.WARNING: exit_code = nagios.WARNING @@ -244,7 +279,9 @@ def main(): elif not len(warning['cap']) and len(warning['frag']): info_str = 'critical fragmentation' warn_drives = warning['frag'] - out_str = ['WARNING', '-', info_str, f'for {"devices" if len(warn_drives) > 1 else "devices"} for {args.pool_name}:', ', '.join([*set(warn_drives)])] + out_str = ['WARNING', '-', info_str, + f'for {"devices" if len(warn_drives) > 1 else "devices"} for {args.pool_name}:', + ', '.join([*set(warn_drives)])] if not len(warn_drives) and not len(crit_drives): out_str = ['OK', '-', f'{len(vdev_devices)} {args.check_type} devices for {args.pool_name} are healthy']