check_iperf3: add retry if iperf3 server is busy\
check_matrix_synapse: fix
This commit is contained in:
parent
9bd9ba93a7
commit
7f95ce919e
|
@ -34,7 +34,6 @@
|
|||
|
||||
# Warnng and critical levels are based on your specific network speed.
|
||||
|
||||
|
||||
# Default values
|
||||
SERVER=""
|
||||
WARNING_LEVEL=""
|
||||
|
@ -42,8 +41,8 @@ CRITICAL_LEVEL=""
|
|||
RSA_PUBLIC_KEY=""
|
||||
USERNAME=""
|
||||
PASSWORD=""
|
||||
RETRY=3
|
||||
|
||||
# Parse named arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
key="$1"
|
||||
|
||||
|
@ -78,6 +77,11 @@ while [[ $# -gt 0 ]]; do
|
|||
shift
|
||||
shift
|
||||
;;
|
||||
--retry)
|
||||
RETRY="$2"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
shift
|
||||
;;
|
||||
|
@ -99,17 +103,27 @@ fi
|
|||
export IPERF3_PASSWORD="$PASSWORD"
|
||||
|
||||
# Run iperf3 command with optional arguments
|
||||
for ((i = 1; i <= RETRY; i++)); do
|
||||
if [[ -n "$RSA_PUBLIC_KEY" ]] && [[ -n "$USERNAME" ]]; then
|
||||
OUTPUT=$(iperf3 -c "$SERVER" -i 1 -t 10 -f m --rsa-public-key-path "$RSA_PUBLIC_KEY" --username "$USERNAME" 2>&1)
|
||||
OUTPUT=$(iperf3 -c "$SERVER" -i 1 -t 30 -f m --rsa-public-key-path "$RSA_PUBLIC_KEY" --username "$USERNAME" 2>&1)
|
||||
else
|
||||
OUTPUT=$(iperf3 -c "$SERVER" -i 1 -t 10 -f m 2>&1)
|
||||
fi
|
||||
|
||||
# Check if iperf3 command failed
|
||||
if [[ $? -ne 0 ]]; then
|
||||
if [[ $OUTPUT == *"the server is busy running a test. try again later"* ]]; then
|
||||
if [[ $i -lt $RETRY ]]; then
|
||||
sleep 30
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
echo -e "UNKNOWN - iperf3 command failed: $OUTPUT\n"
|
||||
exit -1
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
# Extract the receiver bitrate
|
||||
RECEIVER_BITRATE=$(echo "$OUTPUT" | grep -Eo '[0-9]+(\.[0-9]+)? Mbits/sec' | tail -1 | awk '{print $1}')
|
||||
|
|
|
@ -23,7 +23,7 @@ def get_avg_python_gc_time(api_key, interval, data_range, endpoint):
|
|||
'type': 'prometheus',
|
||||
'uid': 'DAMPdbiIz',
|
||||
},
|
||||
'expr': 'rate(python_gc_time_sum{instance="172.0.2.118:9000",job=~"synapse",index=~".*"}[2m])/rate(python_gc_time_count[2m])',
|
||||
'expr': 'rate(python_gc_time_sum{instance="matrix.synapse",job=~".*",index=~".*"}[2m])/rate(python_gc_time_count[2m])',
|
||||
'format': 'time_series',
|
||||
'intervalFactor': 2,
|
||||
'refId': 'A',
|
||||
|
@ -69,7 +69,7 @@ def get_outgoing_http_request_rate(api_key, interval, data_range, endpoint):
|
|||
'uid': 'DAMPdbiIz'
|
||||
},
|
||||
'editorMode': 'code',
|
||||
'expr': 'rate(synapse_http_client_requests_total{job=~"synapse",index=~".*",instance="172.0.2.118:9000"}[2m])',
|
||||
'expr': 'rate(synapse_http_client_requests_total{job=~".*",index=~".*",instance="matrix.synapse"}[2m])',
|
||||
'range': True,
|
||||
'refId': 'A',
|
||||
'interval': '',
|
||||
|
@ -84,7 +84,7 @@ def get_outgoing_http_request_rate(api_key, interval, data_range, endpoint):
|
|||
'uid': 'DAMPdbiIz'
|
||||
},
|
||||
'editorMode': 'code',
|
||||
'expr': 'rate(synapse_http_matrixfederationclient_requests_total{job=~"synapse",index=~".*",instance="172.0.2.118:9000"}[2m])',
|
||||
'expr': 'rate(synapse_http_matrixfederationclient_requests_total{job=~".*",index=~".*",instance="matrix.synapse"}[2m])',
|
||||
'range': True,
|
||||
'refId': 'B',
|
||||
'interval': '',
|
||||
|
@ -114,7 +114,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
|
|||
'type': 'prometheus',
|
||||
'uid': 'DAMPdbiIz',
|
||||
},
|
||||
'expr': 'histogram_quantile(0.99, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
|
||||
'expr': 'histogram_quantile(0.99, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) by (le))',
|
||||
'format': 'time_series',
|
||||
'intervalFactor': 1,
|
||||
'refId': 'D',
|
||||
|
@ -134,7 +134,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
|
|||
'type': 'prometheus',
|
||||
'uid': 'DAMPdbiIz',
|
||||
},
|
||||
'expr': 'histogram_quantile(0.9, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
|
||||
'expr': 'histogram_quantile(0.9, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) by (le))',
|
||||
'format': 'time_series',
|
||||
'interval': '',
|
||||
'intervalFactor': 1,
|
||||
|
@ -151,7 +151,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
|
|||
'type': 'prometheus',
|
||||
'uid': 'DAMPdbiIz',
|
||||
},
|
||||
'expr': 'histogram_quantile(0.75, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
|
||||
'expr': 'histogram_quantile(0.75, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) by (le))',
|
||||
'format': 'time_series',
|
||||
'intervalFactor': 1,
|
||||
'refId': 'C',
|
||||
|
@ -168,7 +168,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
|
|||
'type': 'prometheus',
|
||||
'uid': 'DAMPdbiIz',
|
||||
},
|
||||
'expr': 'histogram_quantile(0.5, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
|
||||
'expr': 'histogram_quantile(0.5, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) by (le))',
|
||||
'format': 'time_series',
|
||||
'intervalFactor': 1,
|
||||
'refId': 'B',
|
||||
|
@ -185,7 +185,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
|
|||
'type': 'prometheus',
|
||||
'uid': 'DAMPdbiIz',
|
||||
},
|
||||
'expr': 'histogram_quantile(0.25, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
|
||||
'expr': 'histogram_quantile(0.25, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) by (le))',
|
||||
'refId': 'F',
|
||||
'interval': '',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
|
@ -200,7 +200,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
|
|||
'type': 'prometheus',
|
||||
'uid': 'DAMPdbiIz',
|
||||
},
|
||||
'expr': 'histogram_quantile(0.05, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
|
||||
'expr': 'histogram_quantile(0.05, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) by (le))',
|
||||
'refId': 'G',
|
||||
'interval': '',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
|
@ -215,7 +215,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
|
|||
'type': 'prometheus',
|
||||
'uid': 'DAMPdbiIz',
|
||||
},
|
||||
'expr': 'sum(rate(synapse_http_server_response_time_seconds_sum{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) / sum(rate(synapse_http_server_response_time_seconds_count{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m]))',
|
||||
'expr': 'sum(rate(synapse_http_server_response_time_seconds_sum{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m])) / sum(rate(synapse_http_server_response_time_seconds_count{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="matrix.synapse",code=~"2.."}[2m]))',
|
||||
'refId': 'H',
|
||||
'interval': '',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
|
@ -230,7 +230,7 @@ def get_event_send_time(api_key, interval, data_range, endpoint):
|
|||
'type': 'prometheus',
|
||||
'uid': 'DAMPdbiIz',
|
||||
},
|
||||
'expr': 'sum(rate(synapse_storage_events_persisted_events_total{instance="172.0.2.118:9000"}[2m]))',
|
||||
'expr': 'sum(rate(synapse_storage_events_persisted_events_total{instance="matrix.synapse"}[2m]))',
|
||||
'hide': False,
|
||||
'instant': False,
|
||||
'refId': 'E',
|
||||
|
@ -261,7 +261,7 @@ def get_waiting_for_db(api_key, interval, data_range, endpoint):
|
|||
'type': 'prometheus',
|
||||
'uid': 'DAMPdbiIz',
|
||||
},
|
||||
'expr': 'rate(synapse_storage_schedule_time_sum{instance="172.0.2.118:9000",job=~"synapse",index=~".*"}[30s])/rate(synapse_storage_schedule_time_count[30s])',
|
||||
'expr': 'rate(synapse_storage_schedule_time_sum{instance="matrix.synapse",job=~".*",index=~".*"}[30s])/rate(synapse_storage_schedule_time_count[30s])',
|
||||
'format': 'time_series',
|
||||
'intervalFactor': 2,
|
||||
'refId': 'A',
|
||||
|
@ -305,7 +305,7 @@ def get_stateres_worst_case(api_key, interval, data_range, endpoint):
|
|||
'uid': 'DAMPdbiIz',
|
||||
},
|
||||
'exemplar': False,
|
||||
'expr': 'sum(rate(synapse_state_res_db_for_biggest_room_seconds_total{instance="172.0.2.118:9000"}[1m]))',
|
||||
'expr': 'sum(rate(synapse_state_res_db_for_biggest_room_seconds_total{instance="matrix.synapse"}[1m]))',
|
||||
'format': 'time_series',
|
||||
'hide': False,
|
||||
'instant': False,
|
||||
|
@ -324,7 +324,7 @@ def get_stateres_worst_case(api_key, interval, data_range, endpoint):
|
|||
'uid': 'DAMPdbiIz',
|
||||
},
|
||||
'exemplar': False,
|
||||
'expr': 'sum(rate(synapse_state_res_cpu_for_biggest_room_seconds_total{instance="172.0.2.118:9000"}[1m]))',
|
||||
'expr': 'sum(rate(synapse_state_res_cpu_for_biggest_room_seconds_total{instance="matrix.synapse"}[1m]))',
|
||||
'format': 'time_series',
|
||||
'hide': False,
|
||||
'instant': False,
|
||||
|
|
Loading…
Reference in New Issue