check_iperf3: add retry if iperf3 server is busy\

check_matrix_synapse: fix
This commit is contained in:
Cyberes 2023-09-16 13:33:45 -06:00
parent 9bd9ba93a7
commit 7f95ce919e
2 changed files with 40 additions and 26 deletions

View File

@ -34,7 +34,6 @@
# Warnng and critical levels are based on your specific network speed.
# Default values
SERVER=""
WARNING_LEVEL=""
@ -42,8 +41,8 @@ CRITICAL_LEVEL=""
RSA_PUBLIC_KEY=""
USERNAME=""
PASSWORD=""
RETRY=3
# Parse named arguments
while [[ $# -gt 0 ]]; do
key="$1"
@ -78,6 +77,11 @@ while [[ $# -gt 0 ]]; do
shift
shift
;;
--retry)
RETRY="$2"
shift
shift
;;
*)
shift
;;
@ -99,17 +103,27 @@ fi
export IPERF3_PASSWORD="$PASSWORD"
# Run iperf3 command with optional arguments
if [[ -n "$RSA_PUBLIC_KEY" ]] && [[ -n "$USERNAME" ]]; then
OUTPUT=$(iperf3 -c "$SERVER" -i 1 -t 10 -f m --rsa-public-key-path "$RSA_PUBLIC_KEY" --username "$USERNAME" 2>&1)
else
OUTPUT=$(iperf3 -c "$SERVER" -i 1 -t 10 -f m 2>&1)
fi
for ((i = 1; i <= RETRY; i++)); do
if [[ -n "$RSA_PUBLIC_KEY" ]] && [[ -n "$USERNAME" ]]; then
OUTPUT=$(iperf3 -c "$SERVER" -i 1 -t 30 -f m --rsa-public-key-path "$RSA_PUBLIC_KEY" --username "$USERNAME" 2>&1)
else
OUTPUT=$(iperf3 -c "$SERVER" -i 1 -t 10 -f m 2>&1)
fi
# Check if iperf3 command failed
if [[ $? -ne 0 ]]; then
echo -e "UNKNOWN - iperf3 command failed: $OUTPUT\n"
exit -1
fi
# Check if iperf3 command failed
if [[ $? -ne 0 ]]; then
if [[ $OUTPUT == *"the server is busy running a test. try again later"* ]]; then
if [[ $i -lt $RETRY ]]; then
sleep 30
continue
fi
fi
echo -e "UNKNOWN - iperf3 command failed: $OUTPUT\n"
exit -1
else
break
fi
done
# Extract the receiver bitrate
RECEIVER_BITRATE=$(echo "$OUTPUT" | grep -Eo '[0-9]+(\.[0-9]+)? Mbits/sec' | tail -1 | awk '{print $1}')

View File

@ -23,7 +23,7 @@ def get_avg_python_gc_time(api_key, interval, data_range, endpoint):
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'rate(python_gc_time_sum{instance="172.0.2.118:9000",job=~"synapse",index=~".*"}[2m])/rate(python_gc_time_count[2m])',
'expr': 'rate(python_gc_time_sum{instance="matrix.synapse",job=~".*",index=~".*"}[2m])/rate(python_gc_time_count[2m])',
'format': 'time_series',
'intervalFactor': 2,
'refId': 'A',
@ -69,7 +69,7 @@ def get_outgoing_http_request_rate(api_key, interval, data_range, endpoint):
'uid': 'DAMPdbiIz'
},
'editorMode': 'code',
'expr': 'rate(synapse_http_client_requests_total{job=~"synapse",index=~".*",instance="172.0.2.118:9000"}[2m])',
'expr': 'rate(synapse_http_client_requests_total{job=~".*",index=~".*",instance="matrix.synapse"}[2m])',
'range': True,
'refId': 'A',
'interval': '',
@ -84,7 +84,7 @@ def get_outgoing_http_request_rate(api_key, interval, data_range, endpoint):
'uid': 'DAMPdbiIz'
},
'editorMode': 'code',
'expr': 'rate(synapse_http_matrixfederationclient_requests_total{job=~"synapse",index=~".*",instance="172.0.2.118:9000"}[2m])',
'expr': 'rate(synapse_http_matrixfederationclient_requests_total{job=~".*",index=~".*",instance="matrix.synapse"}[2m])',
'range': True,
'refId': 'B',
'interval': '',
@ -114,7 +114,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'histogram_quantile(0.99, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
'expr': 'histogram_quantile(0.99, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) by (le))',
'format': 'time_series',
'intervalFactor': 1,
'refId': 'D',
@ -134,7 +134,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'histogram_quantile(0.9, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
'expr': 'histogram_quantile(0.9, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) by (le))',
'format': 'time_series',
'interval': '',
'intervalFactor': 1,
@ -151,7 +151,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'histogram_quantile(0.75, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
'expr': 'histogram_quantile(0.75, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) by (le))',
'format': 'time_series',
'intervalFactor': 1,
'refId': 'C',
@ -168,7 +168,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'histogram_quantile(0.5, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
'expr': 'histogram_quantile(0.5, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) by (le))',
'format': 'time_series',
'intervalFactor': 1,
'refId': 'B',
@ -185,7 +185,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'histogram_quantile(0.25, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
'expr': 'histogram_quantile(0.25, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) by (le))',
'refId': 'F',
'interval': '',
'queryType': 'timeSeriesQuery',
@ -200,7 +200,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'histogram_quantile(0.05, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
'expr': 'histogram_quantile(0.05, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) by (le))',
'refId': 'G',
'interval': '',
'queryType': 'timeSeriesQuery',
@ -215,7 +215,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'sum(rate(synapse_http_server_response_time_seconds_sum{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) / sum(rate(synapse_http_server_response_time_seconds_count{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m]))',
'expr': 'sum(rate(synapse_http_server_response_time_seconds_sum{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) / sum(rate(synapse_http_server_response_time_seconds_count{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m]))',
'refId': 'H',
'interval': '',
'queryType': 'timeSeriesQuery',
@ -230,7 +230,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'sum(rate(synapse_storage_events_persisted_events_total{instance="172.0.2.118:9000"}[2m]))',
'expr': 'sum(rate(synapse_storage_events_persisted_events_total{instance="matrix.synapse"}[2m]))',
'hide': False,
'instant': False,
'refId': 'E',
@ -261,7 +261,7 @@ def get_waiting_for_db(api_key, interval, data_range, endpoint):
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'rate(synapse_storage_schedule_time_sum{instance="172.0.2.118:9000",job=~"synapse",index=~".*"}[30s])/rate(synapse_storage_schedule_time_count[30s])',
'expr': 'rate(synapse_storage_schedule_time_sum{instance="matrix.synapse",job=~".*",index=~".*"}[30s])/rate(synapse_storage_schedule_time_count[30s])',
'format': 'time_series',
'intervalFactor': 2,
'refId': 'A',
@ -305,7 +305,7 @@ def get_stateres_worst_case(api_key, interval, data_range, endpoint):
'uid': 'DAMPdbiIz',
},
'exemplar': False,
'expr': 'sum(rate(synapse_state_res_db_for_biggest_room_seconds_total{instance="172.0.2.118:9000"}[1m]))',
'expr': 'sum(rate(synapse_state_res_db_for_biggest_room_seconds_total{instance="matrix.synapse"}[1m]))',
'format': 'time_series',
'hide': False,
'instant': False,
@ -324,7 +324,7 @@ def get_stateres_worst_case(api_key, interval, data_range, endpoint):
'uid': 'DAMPdbiIz',
},
'exemplar': False,
'expr': 'sum(rate(synapse_state_res_cpu_for_biggest_room_seconds_total{instance="172.0.2.118:9000"}[1m]))',
'expr': 'sum(rate(synapse_state_res_cpu_for_biggest_room_seconds_total{instance="matrix.synapse"}[1m]))',
'format': 'time_series',
'hide': False,
'instant': False,