icinga2-checks/checker/synapse_grafana.py

353 lines
14 KiB
Python

import time
import numpy as np
import requests
from urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
def timestamp_minutes_ago(minutes):
current_time = time.time()
minutes_in_seconds = minutes * 60
time_in_past = current_time - minutes_in_seconds
timestamp_in_ms = int(time_in_past * 1000)
return timestamp_in_ms
def get_avg_python_gc_time(api_key, interval, data_range, endpoint):
json_data = {
'queries': [
{
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'rate(python_gc_time_sum{instance="172.0.2.118:9000",job=~"synapse",index=~".*"}[2m])/rate(python_gc_time_count[2m])',
'format': 'time_series',
'intervalFactor': 2,
'refId': 'A',
'step': 20,
'target': '',
'interval': '',
'queryType': 'timeSeriesQuery',
'exemplar': False,
'utcOffsetSec': -25200,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': interval * 1000,
},
],
'from': f'now-{data_range}m',
'to': 'now',
}
response = requests.post(f'{endpoint}/api/ds/query', headers={'Authorization': f'Bearer {api_key}'}, json=json_data, verify=False).json()
if not response['results'].get('A', {}).get('frames'):
return []
good = []
for i in response['results']['A']['frames']:
# This one can sometimes be null
new = []
for x in range(len(i['data']['values'][1])):
if i['data']['values'][1][x] is not None:
new.append(i['data']['values'][1][x])
good.append(new)
# Remove empty arrays
results = []
for x in good:
if len(x) > 0:
results.append(x)
return [np.round(np.average(i), 5) for i in results]
def get_outgoing_http_request_rate(api_key, interval, data_range, endpoint):
json_data = {
'queries': [{
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz'
},
'editorMode': 'code',
'expr': 'rate(synapse_http_client_requests_total{job=~"synapse",index=~".*",instance="172.0.2.118:9000"}[2m])',
'range': True,
'refId': 'A',
'interval': '',
'exemplar': False,
'utcOffsetSec': 0,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': interval * 1000,
}, {
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz'
},
'editorMode': 'code',
'expr': 'rate(synapse_http_matrixfederationclient_requests_total{job=~"synapse",index=~".*",instance="172.0.2.118:9000"}[2m])',
'range': True,
'refId': 'B',
'interval': '',
'exemplar': False,
'utcOffsetSec': 0,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': interval * 1000,
}],
'from': f'now-{data_range}m',
'to': 'now'
}
response = requests.post(f'{endpoint}/api/ds/query', headers={'Authorization': f'Bearer {api_key}'}, json=json_data, verify=False).json()
output = {}
for letter, result in response['results'].items():
if len(result['frames']):
name = result['frames'][0]['schema']['name'].split('=')[-1].strip('}').strip('"')
output[name] = np.round(np.average(result['frames'][0]['data']['values'][1]), 2)
return output
def get_event_send_time(api_key, interval, data_range, endpoint):
json_data = {
'queries': [
{
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'histogram_quantile(0.99, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
'format': 'time_series',
'intervalFactor': 1,
'refId': 'D',
'interval': '',
'editorMode': 'builder',
'range': True,
'instant': True,
'queryType': 'timeSeriesQuery',
'exemplar': False,
'utcOffsetSec': -25200,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': interval * 1000,
},
{
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'histogram_quantile(0.9, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
'format': 'time_series',
'interval': '',
'intervalFactor': 1,
'refId': 'A',
'queryType': 'timeSeriesQuery',
'exemplar': False,
'utcOffsetSec': -25200,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': interval * 1000,
},
{
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'histogram_quantile(0.75, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
'format': 'time_series',
'intervalFactor': 1,
'refId': 'C',
'interval': '',
'queryType': 'timeSeriesQuery',
'exemplar': False,
'utcOffsetSec': -25200,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': interval * 1000,
},
{
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'histogram_quantile(0.5, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
'format': 'time_series',
'intervalFactor': 1,
'refId': 'B',
'interval': '',
'queryType': 'timeSeriesQuery',
'exemplar': False,
'utcOffsetSec': -25200,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': interval * 1000,
},
{
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'histogram_quantile(0.25, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
'refId': 'F',
'interval': '',
'queryType': 'timeSeriesQuery',
'exemplar': False,
'utcOffsetSec': -25200,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': interval * 1000,
},
{
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'histogram_quantile(0.05, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) by (le))',
'refId': 'G',
'interval': '',
'queryType': 'timeSeriesQuery',
'exemplar': False,
'utcOffsetSec': -25200,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': interval * 1000,
},
{
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'sum(rate(synapse_http_server_response_time_seconds_sum{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m])) / sum(rate(synapse_http_server_response_time_seconds_count{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="172.0.2.118:9000",code=~"2.."}[2m]))',
'refId': 'H',
'interval': '',
'queryType': 'timeSeriesQuery',
'exemplar': False,
'utcOffsetSec': -25200,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': interval * 1000,
},
{
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'sum(rate(synapse_storage_events_persisted_events_total{instance="172.0.2.118:9000"}[2m]))',
'hide': False,
'instant': False,
'refId': 'E',
'interval': '',
'editorMode': 'code',
'queryType': 'timeSeriesQuery',
'exemplar': False,
'utcOffsetSec': -25200,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': interval * 1000,
},
],
'from': f'now-{data_range}m',
'to': 'now',
}
response = requests.post(f'{endpoint}/api/ds/query', headers={'Authorization': f'Bearer {api_key}'}, json=json_data, verify=False).json()
if not response['results'].get('E', {}).get('frames'):
return None
return np.round(np.average(response['results']['E']['frames'][0]['data']['values'][1]), 2)
def get_waiting_for_db(api_key, interval, data_range, endpoint):
json_data = {
'queries': [
{
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'expr': 'rate(synapse_storage_schedule_time_sum{instance="172.0.2.118:9000",job=~"synapse",index=~".*"}[30s])/rate(synapse_storage_schedule_time_count[30s])',
'format': 'time_series',
'intervalFactor': 2,
'refId': 'A',
'step': 20,
'interval': '',
'queryType': 'timeSeriesQuery',
'exemplar': False,
'utcOffsetSec': -25200,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': interval * 1000,
},
],
'from': f'now-{data_range}m',
'to': 'now',
}
response = requests.post(f'{endpoint}/api/ds/query', headers={'Authorization': f'Bearer {api_key}'}, json=json_data, verify=False).json()
raw_data = response['results']['A']['frames'][0]['data']['values'][1]
data = []
null_present = False
for i in range(len(raw_data)):
if raw_data[i] is not None:
data.append(raw_data[i])
else:
null_present = True
return np.round(np.average(data), 5), null_present, raw_data
def get_stateres_worst_case(api_key, interval, data_range, endpoint):
"""
CPU and DB time spent on most expensive state resolution in a room, summed over all workers.
This is a very rough proxy for "how fast is state res", but it doesn't accurately represent the system load (e.g. it completely ignores cheap state resolutions).
"""
json_data = {
'queries': [
{
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'exemplar': False,
'expr': 'sum(rate(synapse_state_res_db_for_biggest_room_seconds_total{instance="172.0.2.118:9000"}[1m]))',
'format': 'time_series',
'hide': False,
'instant': False,
'interval': '',
'refId': 'B',
'queryType': 'timeSeriesQuery',
'utcOffsetSec': -25200,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': 15000,
'maxDataPoints': 1863,
},
{
'datasource': {
'type': 'prometheus',
'uid': 'DAMPdbiIz',
},
'exemplar': False,
'expr': 'sum(rate(synapse_state_res_cpu_for_biggest_room_seconds_total{instance="172.0.2.118:9000"}[1m]))',
'format': 'time_series',
'hide': False,
'instant': False,
'interval': '',
'refId': 'C',
'queryType': 'timeSeriesQuery',
'utcOffsetSec': -25200,
'legendFormat': '',
'datasourceId': 8,
'intervalMs': 15000,
'maxDataPoints': 1863,
},
],
'range': {
'from': '2023-02-23T04:36:12.870Z',
'to': '2023-02-23T07:36:12.870Z',
'raw': {
'from': 'now-3h',
'to': 'now',
},
},
'from': f'now-{data_range}m',
'to': 'now',
}
response = requests.post(f'{endpoint}/api/ds/query', headers={'Authorization': f'Bearer {api_key}'}, json=json_data, verify=False).json()
# AVerage CPU time per block