add code
This commit is contained in:
parent
d6c165667e
commit
39042ba364
|
@ -0,0 +1,201 @@
|
|||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import urllib
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
|
||||
from nio import AsyncClient, AsyncClientConfig, JoinError, JoinResponse, LoginResponse, RoomCreateError, RoomGetEventResponse, RoomSendError
|
||||
|
||||
import nagios
|
||||
|
||||
parser = argparse.ArgumentParser(description='Test federation between two homeservers.')
|
||||
parser.add_argument('--bot1-user', required=True, help='User ID for bot 1.')
|
||||
parser.add_argument('--bot1-pw', required=True, help='Password for bot 1.')
|
||||
parser.add_argument('--bot1-hs', required=True, help='Homeserver for bot 1.')
|
||||
parser.add_argument('--bot1-auth-file', help="File to cache the bot's login details to.")
|
||||
parser.add_argument('--bot2-user', required=True, help='User ID for bot 2.')
|
||||
parser.add_argument('--bot2-pw', required=True, help='Password for bot 2.')
|
||||
parser.add_argument('--bot2-hs', required=True, help='Homeserver for bot 2.')
|
||||
parser.add_argument('--bot2-auth-file', help="File to cache the bot's login details to.")
|
||||
parser.add_argument('--timeout', type=float, default=90, help='Request timeout limit.')
|
||||
parser.add_argument('--warn', type=float, default=2.0, help='Manually set warn level.')
|
||||
parser.add_argument('--crit', type=float, default=2.5, help='Manually set critical level.')
|
||||
args = parser.parse_args()
|
||||
|
||||
bot1_hs_domain = urllib.parse.urlparse(args.bot1_hs).netloc
|
||||
bot2_hs_domain = urllib.parse.urlparse(args.bot2_hs).netloc
|
||||
|
||||
|
||||
def write_details_to_disk(resp: LoginResponse, homeserver, config_file) -> None:
|
||||
"""Writes the required login details to disk so we can log in later without
|
||||
using a password.
|
||||
Arguments:
|
||||
resp {LoginResponse} -- the successful client login response.
|
||||
homeserver -- URL of homeserver, e.g. "https://matrix.example.org"
|
||||
"""
|
||||
# open the config file in write-mode
|
||||
with open(config_file, "w") as f:
|
||||
# write the login details to disk
|
||||
json.dump({"homeserver": homeserver, # e.g. "https://matrix.example.org"
|
||||
"user_id": resp.user_id, # e.g. "@user:example.org"
|
||||
"device_id": resp.device_id, # device ID, 10 uppercase letters
|
||||
"access_token": resp.access_token, # cryptogr. access token
|
||||
}, f, )
|
||||
|
||||
|
||||
async def test_one_direction(sender_client, receiver_client, receiver_user_id):
|
||||
# The sender creates the room and invites the receiver
|
||||
test_room_name = str(uuid4())
|
||||
new_test_room = await sender_client.room_create(name=test_room_name, invite=[receiver_user_id])
|
||||
if isinstance(new_test_room, RoomCreateError):
|
||||
print(new_test_room)
|
||||
new_test_room_id = new_test_room.room_id
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# The receiver joins via invite
|
||||
timeout_start = datetime.now()
|
||||
while True:
|
||||
resp = await receiver_client.join(new_test_room_id)
|
||||
if isinstance(resp, JoinResponse):
|
||||
break
|
||||
elif isinstance(resp, JoinError):
|
||||
return f'UNKNOWN: failed to join room "{resp}"', nagios.UNKNOWN
|
||||
if (datetime.now() - timeout_start).total_seconds() >= args.timeout:
|
||||
return 'UNKNOWN: failed to join room, timeout.', nagios.UNKNOWN
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# Sender sends the msg to room
|
||||
send_msg_time = datetime.now()
|
||||
msg = {'id': str(uuid4()), 'ts': send_msg_time.microsecond}
|
||||
resp = (await sender_client.room_send(new_test_room_id, 'm.room.message', {'body': json.dumps(msg), 'msgtype': 'm.room.message'}))
|
||||
if isinstance(resp, RoomSendError):
|
||||
return f'UNKNOWN: failed to send message "{resp}', nagios.UNKNOWN
|
||||
msg_event_id = resp.event_id
|
||||
|
||||
# Sender watches for the message
|
||||
start_check = datetime.now()
|
||||
while True:
|
||||
resp = await receiver_client.room_get_event(new_test_room_id, msg_event_id)
|
||||
if isinstance(resp, RoomGetEventResponse):
|
||||
recv_msg_time = datetime.now()
|
||||
recv_msg = json.loads(resp.event.source['content']['body'])
|
||||
break
|
||||
if (datetime.now() - start_check).total_seconds() >= args.timeout:
|
||||
await sender_client.room_leave(new_test_room_id)
|
||||
await sender_client.room_forget(new_test_room_id)
|
||||
await receiver_client.room_leave(new_test_room_id)
|
||||
await receiver_client.room_forget(new_test_room_id)
|
||||
return "CRITICAL: timeout - receiver did not recieve the sender's message.", nagios.CRITICAL
|
||||
|
||||
# Double check everything makes sense
|
||||
if not msg == recv_msg:
|
||||
await sender_client.room_leave(new_test_room_id)
|
||||
await sender_client.room_forget(new_test_room_id)
|
||||
await receiver_client.room_leave(new_test_room_id)
|
||||
await receiver_client.room_forget(new_test_room_id)
|
||||
return "CRITICAL: sender's message did not match the receiver's.", nagios.CRITICAL
|
||||
|
||||
# Calculate the time it took to recieve the message, including sync
|
||||
bot1_msg_delta = (recv_msg_time - send_msg_time).total_seconds()
|
||||
|
||||
# Clean up the rooms
|
||||
await sender_client.room_leave(new_test_room_id)
|
||||
await sender_client.room_forget(new_test_room_id)
|
||||
await receiver_client.room_leave(new_test_room_id)
|
||||
await receiver_client.room_forget(new_test_room_id)
|
||||
|
||||
return bot1_msg_delta, True
|
||||
|
||||
|
||||
async def login(user_id, passwd, homeserver, config_file=None):
|
||||
client = AsyncClient(homeserver, user_id, config=AsyncClientConfig(request_timeout=args.timeout, max_timeout_retry_wait_time=10))
|
||||
if config_file:
|
||||
# If there are no previously-saved credentials, we'll use the password
|
||||
if not os.path.exists(config_file):
|
||||
resp = await client.login(passwd)
|
||||
|
||||
# check that we logged in successfully
|
||||
if isinstance(resp, LoginResponse):
|
||||
write_details_to_disk(resp, homeserver, config_file)
|
||||
else:
|
||||
print(f'UNKNOWN: failed to log in "{resp}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
else:
|
||||
# Otherwise the config file exists, so we'll use the stored credentials
|
||||
with open(config_file, "r") as f:
|
||||
config = json.load(f)
|
||||
client = AsyncClient(config["homeserver"])
|
||||
client.access_token = config["access_token"]
|
||||
client.user_id = config["user_id"]
|
||||
client.device_id = config["device_id"]
|
||||
else:
|
||||
await client.login(passwd)
|
||||
return client
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
bot1 = await login(args.bot1_user, args.bot1_pw, args.bot1_hs, args.bot1_auth_file)
|
||||
bot2 = await login(args.bot2_user, args.bot2_pw, args.bot2_hs, args.bot2_auth_file)
|
||||
|
||||
bot1_output_msg, bot1_output_code = await test_one_direction(bot1, bot2, args.bot2_user)
|
||||
bot2_output_msg, bot2_output_code = await test_one_direction(bot2, bot1, args.bot1_user)
|
||||
|
||||
nagios_output = nagios.OK
|
||||
|
||||
if not bot1_output_code:
|
||||
print(bot1_output_msg)
|
||||
nagios_output = bot1_output_code
|
||||
if not bot2_output_code:
|
||||
print(bot2_output_msg)
|
||||
if nagios_output < bot2_output_code:
|
||||
# Only set the code if our code is more severe
|
||||
nagios_output = bot2_output_code
|
||||
|
||||
# bot1 -> bot2
|
||||
if isinstance(bot1_output_msg, float): # only do this if the func returned a value
|
||||
bot1_output_msg = round(bot1_output_msg, 2)
|
||||
if bot1_output_msg >= args.crit:
|
||||
if nagios_output < nagios.CRITICAL:
|
||||
nagios_output = nagios.CRITICAL
|
||||
print('CRITICAL:', bot1_hs_domain, '->', bot2_hs_domain, 'is', bot1_output_msg, 'seconds.')
|
||||
elif bot1_output_msg >= args.warn:
|
||||
if nagios_output < nagios.WARNING:
|
||||
nagios_output = nagios.WARNING
|
||||
print('WARNING:', bot1_hs_domain, '->', bot2_hs_domain, 'is', bot1_output_msg, 'seconds.')
|
||||
else:
|
||||
print('OK:', bot1_hs_domain, '->', bot2_hs_domain, 'is', bot1_output_msg, 'seconds.')
|
||||
|
||||
# bot2 -> bot1
|
||||
if isinstance(bot2_output_msg, float):
|
||||
bot2_output_msg = round(bot2_output_msg, 2)
|
||||
if bot2_output_msg >= args.crit:
|
||||
if nagios_output < nagios.CRITICAL:
|
||||
nagios_output = nagios.CRITICAL
|
||||
print('CRITICAL:', bot1_hs_domain, '<-', bot2_hs_domain, 'is', bot2_output_msg, 'seconds.')
|
||||
elif bot2_output_msg >= args.warn:
|
||||
if nagios_output < nagios.WARNING:
|
||||
nagios_output = nagios.WARNING
|
||||
print('WARNING:', bot1_hs_domain, '<-', bot2_hs_domain, 'is', bot2_output_msg, 'seconds.')
|
||||
else:
|
||||
print('OK:', bot1_hs_domain, '<-', bot2_hs_domain, 'is', bot2_output_msg, 'seconds.')
|
||||
|
||||
# Clean up
|
||||
await bot1.close()
|
||||
await bot2.close()
|
||||
|
||||
sys.exit(nagios_output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(main())
|
||||
except Exception as e:
|
||||
print(f"UNKNOWN: exception '{e}'")
|
||||
sys.exit(nagios.UNKNOWN)
|
|
@ -0,0 +1,220 @@
|
|||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import urllib
|
||||
|
||||
import aiofiles.os
|
||||
import magic
|
||||
import numpy as np
|
||||
import requests
|
||||
from PIL import Image
|
||||
from nio import AsyncClient, AsyncClientConfig, LoginResponse, UploadResponse
|
||||
from urllib3.exceptions import InsecureRequestWarning
|
||||
|
||||
import nagios
|
||||
|
||||
parser = argparse.ArgumentParser(description='')
|
||||
parser.add_argument('--user', required=True, help='User ID for the bot.')
|
||||
parser.add_argument('--pw', required=True, help='Password for the bot.')
|
||||
parser.add_argument('--hs', required=True, help='Homeserver of the bot.')
|
||||
parser.add_argument('--admin-endpoint', required=True, help='Admin endpoint that will be called to purge media for this user.')
|
||||
parser.add_argument('--room', required=True, help='The room the bot should send its test messages in.')
|
||||
parser.add_argument('--media-cdn-domain', required=True, help='The domain to make sure it redirects to.')
|
||||
parser.add_argument('--auth-file', help="File to cache the bot's login details to.")
|
||||
parser.add_argument('--timeout', type=float, default=90, help='Request timeout limit.')
|
||||
parser.add_argument('--warn', type=float, default=2.0, help='Manually set warn level.')
|
||||
parser.add_argument('--crit', type=float, default=2.5, help='Manually set critical level.')
|
||||
args = parser.parse_args()
|
||||
|
||||
CONFIG_FILE = args.auth_file
|
||||
|
||||
|
||||
def verify_media_header(header: str, header_dict: dict, good_value: str = None, warn_value: str = None, critical_value: str = None):
|
||||
"""
|
||||
If you don't specify good_value, warn_value, or critical_value then the header will only be checked for existience.
|
||||
"""
|
||||
# Convert everything to strings to prevent any wierdness
|
||||
header_value = str(header_dict.get(header))
|
||||
good_value = str(good_value)
|
||||
warn_value = str(warn_value)
|
||||
critical_value = str(critical_value)
|
||||
if not header_value:
|
||||
return f'CRITICAL: missing header "{header}"', nagios.CRITICAL
|
||||
elif good_value and header_value == good_value:
|
||||
return f'OK: {header}: "{header_value}"', nagios.OK
|
||||
elif warn_value and header_value == warn_value:
|
||||
return f'WARN: {header}: "{header_value}"', nagios.WARNING
|
||||
elif critical_value and header_value == critical_value:
|
||||
return f'CRITICAL: {header}: "{header_value}"', nagios.CRITICAL
|
||||
return f'OK: {header} is present with value "{header_value}"', nagios.OK
|
||||
|
||||
|
||||
def write_details_to_disk(resp: LoginResponse, homeserver) -> None:
|
||||
"""Writes the required login details to disk so we can log in later without
|
||||
using a password.
|
||||
Arguments:
|
||||
resp {LoginResponse} -- the successful client login response.
|
||||
homeserver -- URL of homeserver, e.g. "https://matrix.example.org"
|
||||
"""
|
||||
# open the config file in write-mode
|
||||
with open(CONFIG_FILE, "w") as f:
|
||||
# write the login details to disk
|
||||
json.dump({"homeserver": homeserver, # e.g. "https://matrix.example.org"
|
||||
"user_id": resp.user_id, # e.g. "@user:example.org"
|
||||
"device_id": resp.device_id, # device ID, 10 uppercase letters
|
||||
"access_token": resp.access_token, # cryptogr. access token
|
||||
}, f, )
|
||||
|
||||
|
||||
async def send_image(client, room_id, image):
|
||||
"""Send image to room.
|
||||
Arguments:
|
||||
---------
|
||||
client : Client
|
||||
room_id : str
|
||||
image : str, file name of image
|
||||
This is a working example for a JPG image.
|
||||
"content": {
|
||||
"body": "someimage.jpg",
|
||||
"info": {
|
||||
"size": 5420,
|
||||
"mimetype": "image/jpeg",
|
||||
"thumbnail_info": {
|
||||
"w": 100,
|
||||
"h": 100,
|
||||
"mimetype": "image/jpeg",
|
||||
"size": 2106
|
||||
},
|
||||
"w": 100,
|
||||
"h": 100,
|
||||
"thumbnail_url": "mxc://example.com/SomeStrangeThumbnailUriKey"
|
||||
},
|
||||
"msgtype": "m.image",
|
||||
"url": "mxc://example.com/SomeStrangeUriKey"
|
||||
}
|
||||
"""
|
||||
mime_type = magic.from_file(image, mime=True) # e.g. "image/jpeg"
|
||||
if not mime_type.startswith("image/"):
|
||||
print(f'UNKNOWN: wrong mime type "{mime_type}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
|
||||
im = Image.open(image)
|
||||
(width, height) = im.size # im.size returns (width,height) tuple
|
||||
|
||||
# first do an upload of image, then send URI of upload to room
|
||||
file_stat = await aiofiles.os.stat(image)
|
||||
async with aiofiles.open(image, "r+b") as f:
|
||||
resp, maybe_keys = await client.upload(f, content_type=mime_type, # image/jpeg
|
||||
filename=os.path.basename(image), filesize=file_stat.st_size, )
|
||||
if not isinstance(resp, UploadResponse):
|
||||
print(f'UNKNOWN: failed to upload image "{resp}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
|
||||
content = {"body": os.path.basename(image), # descriptive title
|
||||
"info": {"size": file_stat.st_size, "mimetype": mime_type, "thumbnail_info": None, # TODO
|
||||
"w": width, # width in pixel
|
||||
"h": height, # height in pixel
|
||||
"thumbnail_url": None, # TODO
|
||||
}, "msgtype": "m.image", "url": resp.content_uri, }
|
||||
|
||||
try:
|
||||
return await client.room_send(room_id, message_type="m.room.message", content=content)
|
||||
except Exception as e:
|
||||
print(f"Image send of file {image} failed.")
|
||||
print(f'UNKNOWN: failed to send image event "{e}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
client = AsyncClient(args.hs, args.user, config=AsyncClientConfig(request_timeout=args.timeout, max_timeout_retry_wait_time=10))
|
||||
if args.auth_file:
|
||||
# If there are no previously-saved credentials, we'll use the password
|
||||
if not os.path.exists(CONFIG_FILE):
|
||||
resp = await client.login(args.pw)
|
||||
|
||||
# check that we logged in successfully
|
||||
if isinstance(resp, LoginResponse):
|
||||
write_details_to_disk(resp, args.hs)
|
||||
else:
|
||||
print(f'UNKNOWN: failed to log in "{resp}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
else:
|
||||
# Otherwise the config file exists, so we'll use the stored credentials
|
||||
with open(CONFIG_FILE, "r") as f:
|
||||
config = json.load(f)
|
||||
client = AsyncClient(config["homeserver"])
|
||||
client.access_token = config["access_token"]
|
||||
client.user_id = config["user_id"]
|
||||
client.device_id = config["device_id"]
|
||||
else:
|
||||
await client.login(args.pw)
|
||||
|
||||
await client.join(args.room)
|
||||
|
||||
# Create a random image
|
||||
imarray = np.random.rand(100, 100, 3) * 255
|
||||
im = Image.fromarray(imarray.astype('uint8')).convert('RGBA')
|
||||
_, test_image_path = tempfile.mkstemp()
|
||||
test_image_path = test_image_path + '.png'
|
||||
im.save(test_image_path)
|
||||
|
||||
# Send the image and get the event ID
|
||||
image_event_id = (await send_image(client, args.room, test_image_path)).event_id
|
||||
|
||||
# Get the event
|
||||
image_event = (await client.room_get_event(args.room, image_event_id)).event
|
||||
|
||||
# convert mxc:// to http://
|
||||
target_file_url = await client.mxc_to_http(image_event.url)
|
||||
|
||||
# Check the headers. Ignore the non-async thing here, it doesn't
|
||||
# matter in this situation.
|
||||
headers = dict(requests.head(target_file_url).headers)
|
||||
|
||||
exit_code = nagios.OK
|
||||
|
||||
# Check domain
|
||||
domain = urllib.parse.urlparse(headers['location']).netloc
|
||||
if domain != args.media_cdn_domain:
|
||||
exit_code = nagios.CRITICAL
|
||||
print(f'CRITICAL: media CDN domain is "{domain}"')
|
||||
else:
|
||||
print(f'OK: media CDN domain is "{domain}"')
|
||||
|
||||
results = [verify_media_header('synapse-media-local-status', headers), verify_media_header('synapse-media-s3-status', headers, good_value='200'), verify_media_header('synapse-media-server', headers, good_value='s3'),
|
||||
verify_media_header('Server', headers, good_value='cloudflare')]
|
||||
for header_chk, code in results:
|
||||
if code != nagios.OK:
|
||||
exit_code = code
|
||||
print(header_chk)
|
||||
|
||||
# Clean up
|
||||
await client.room_redact(args.room, image_event_id)
|
||||
os.remove(test_image_path)
|
||||
await client.close()
|
||||
|
||||
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
|
||||
try:
|
||||
r = requests.delete(f'{args.admin_endpoint}/_synapse/admin/v1/users/{args.user}/media', headers={'Authorization': f'Bearer {client.access_token}'}, verify=False)
|
||||
if r.status_code != 200:
|
||||
if nagios.WARNING < exit_code:
|
||||
exit_code = nagios.WARNING
|
||||
print(f"WARN: failed to purge media for this user, request failed with '{r.text}'")
|
||||
except Exception as e:
|
||||
if nagios.WARNING < exit_code:
|
||||
exit_code = nagios.WARNING
|
||||
print(f"WARN: failed to purge media for this user '{e}'")
|
||||
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(main())
|
||||
except Exception as e:
|
||||
print(f'UNKNOWN: exception "{e}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
|
@ -0,0 +1,130 @@
|
|||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
import nagios
|
||||
|
||||
parser = argparse.ArgumentParser(description='')
|
||||
parser.add_argument('--metrics-endpoint', required=True, help='Target URL to scrape.')
|
||||
parser.add_argument('--domain', required=True, help='Our domain.')
|
||||
parser.add_argument('--prometheus', action='store_true', help='Use Promethus instead of scraping the status page.')
|
||||
parser.add_argument('--ignore', nargs='*', default=[], help='Ignore these hosts.')
|
||||
parser.add_argument('--timeout', type=float, default=90, help='Request timeout limit.')
|
||||
parser.add_argument('--warn', type=float, default=20, help='Manually set warn level.')
|
||||
parser.add_argument('--crit', type=float, default=30, help='Manually set critical level.')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.prometheus:
|
||||
from prometheus import parse_metrics
|
||||
|
||||
r = requests.get(args.metrics_endpoint)
|
||||
if r.status_code != 200:
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
|
||||
metrics = {}
|
||||
for item in parse_metrics(r.text)['monbot_ping_receive_delay_seconds']['monbot_ping_receive_delay_seconds_sum']:
|
||||
if item.labels['receivingDomain'] not in metrics.keys():
|
||||
metrics[item.labels['receivingDomain']] = {}
|
||||
metrics[item.labels['receivingDomain']][item.labels['sourceDomain']] = item.value
|
||||
|
||||
pings = {'receiver': [], 'sender': [], }
|
||||
for receiving_domain, senders in metrics.items():
|
||||
if receiving_domain == args.domain:
|
||||
for k, v in senders.items():
|
||||
pings['receiver'].append(v)
|
||||
else:
|
||||
for k, v in senders.items():
|
||||
if k == args.domain:
|
||||
pings['sender'].append(v)
|
||||
|
||||
print(json.dumps(pings))
|
||||
|
||||
receiver_avg = np.round(np.average(pings['receiver']), 2)
|
||||
sender_avg = np.round(np.average(pings['sender']), 2)
|
||||
|
||||
print('receiver latency is', receiver_avg)
|
||||
print('sender latency is', sender_avg)
|
||||
else:
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
# Split the values since icinga will quote the args
|
||||
if len(args.ignore) == 1:
|
||||
args.ignore = args.ignore[0].strip(' ').split(' ')
|
||||
|
||||
|
||||
def get_sec(time_str):
|
||||
"""Get seconds from time."""
|
||||
h, m, s = time_str.split(':')
|
||||
return int(h) * 3600 + int(m) * 60 + int(s)
|
||||
|
||||
|
||||
def ms_to_s(s):
|
||||
min_m = re.match(r'^(\d+)m([\d.]+)s', s)
|
||||
if min_m:
|
||||
return get_sec(f'0:{min_m.group(1)}:{int(float(min_m.group(2)))}')
|
||||
elif s.endswith('ms'):
|
||||
return float('0.' + s.strip('ms'))
|
||||
elif s.endswith('s'):
|
||||
return float(s.strip('ms'))
|
||||
|
||||
|
||||
r = requests.get(args.metrics_endpoint)
|
||||
if r.status_code != 200:
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
tooltips = soup.find_all('span', {'class', 'tooltip'})
|
||||
data = {}
|
||||
for item in tooltips:
|
||||
m = re.match(r'<span class="tooltip">\s*Send: (.*?)\s*<br\/>\s*Receive: (.*?)\s*<\/span>', str(item))
|
||||
print(item)
|
||||
if m:
|
||||
domain = item.parent.parent.find('span', {'class': 'domain'}).text
|
||||
data[domain] = {
|
||||
'send': ms_to_s(m.group(1)),
|
||||
'receive': ms_to_s(m.group(2)),
|
||||
}
|
||||
exit_code = nagios.OK
|
||||
info_str = []
|
||||
data_str = []
|
||||
|
||||
if len(data.keys()) == 0:
|
||||
print('UNKNOWN: failed to find any servers.')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
|
||||
for domain, values in data.items():
|
||||
if domain not in args.ignore:
|
||||
if values['send'] >= args.crit:
|
||||
info_str.append(f'CRITICAL: {domain} send is {values["send"]}s.')
|
||||
exit_code = nagios.CRITICAL
|
||||
elif values['send'] >= args.warn:
|
||||
info_str.append(f'WARN: {domain} send is {values["send"]}s.')
|
||||
if exit_code < nagios.WARNING:
|
||||
exit_code = nagios.WARNING
|
||||
# else:
|
||||
# print(f'OK: {domain} send is {values["send"]}s.')
|
||||
|
||||
if values['receive'] >= args.crit:
|
||||
info_str.append(f'CRITICAL: {domain} receive is {values["receive"]}s.')
|
||||
exit_code = nagios.CRITICAL
|
||||
elif values['receive'] >= args.warn:
|
||||
info_str.append(f'WARN: {domain} receive is {values["receive"]}s.')
|
||||
if exit_code < nagios.WARNING:
|
||||
exit_code = nagios.WARNING
|
||||
# else:
|
||||
# print(f'OK: {domain} receive is {values["receive"]}s.')
|
||||
data_str.append(
|
||||
f"'{domain}-send'={values['send']}s;;; '{domain}-receive'={values['receive']}s;;;"
|
||||
)
|
||||
if any(('CRITICAL' not in s and 'WARNING' not in s) for s in info_str) or len(info_str) == 0:
|
||||
print(f'OK: ping time is good.', end=' ')
|
||||
else:
|
||||
for x in info_str:
|
||||
print(x, end=('\n' if info_str.index(x) + 1 < len(info_str) else ''))
|
||||
print(f'|{" ".join(data_str)}')
|
||||
|
||||
sys.exit(exit_code)
|
|
@ -0,0 +1,378 @@
|
|||
import json
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
from urllib3.exceptions import InsecureRequestWarning
|
||||
|
||||
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
|
||||
|
||||
|
||||
def get_avg_python_gc_time(api_key, interval, data_range, endpoint):
|
||||
json_data = {
|
||||
'queries': [
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'expr': 'rate(python_gc_time_sum{instance="10.0.0.34:9000",job=~"(federation-receiver|federation-sender|initialsync|synapse|synchrotron)",index=~".*"}[30s])/rate(python_gc_time_count[30s])',
|
||||
'format': 'time_series',
|
||||
'intervalFactor': 2,
|
||||
'refId': 'A',
|
||||
'step': 20,
|
||||
'target': '',
|
||||
'interval': '',
|
||||
# 'key': 'Q-7edaea76-89bd-4b29-8412-a68bf4646712-0',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'exemplar': False,
|
||||
# 'requestId': 'Q-7edaea76-89bd-4b29-8412-a68bf4646712-0A',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': interval * 1000,
|
||||
# 'maxDataPoints': 1383,
|
||||
},
|
||||
],
|
||||
'from': f'now-{data_range}m',
|
||||
'to': 'now',
|
||||
}
|
||||
response = requests.post(f'{endpoint}/api/ds/query', headers={'Authorization': f'Bearer {api_key}'}, json=json_data, verify=False).json()
|
||||
good = []
|
||||
for i in response['results']['A']['frames']:
|
||||
# This one can sometimes be null
|
||||
new = []
|
||||
for x in range(len(i['data']['values'][1])):
|
||||
if i['data']['values'][1][x] is not None:
|
||||
new.append(i['data']['values'][1][x])
|
||||
good.append(new)
|
||||
# Remove empty arrays
|
||||
results = []
|
||||
for x in good:
|
||||
if len(x) > 0:
|
||||
results.append(x)
|
||||
return [np.round(np.average(i), 5) for i in results]
|
||||
|
||||
|
||||
def get_outgoing_http_request_rate(api_key, interval, data_range, endpoint):
|
||||
json_data = {
|
||||
'queries': [
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'editorMode': 'code',
|
||||
'expr': 'rate(synapse_http_client_requests_total{job=~"(federation-receiver|federation-sender|initialsync|synapse|synchrotron)",index=~".*",instance="10.0.0.34:9000"}[2m])',
|
||||
'range': True,
|
||||
'refId': 'A',
|
||||
'interval': '',
|
||||
# 'key': 'Q-8b3dabd7-358e-45ed-a9ba-7be3f5fcf274-0',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'exemplar': False,
|
||||
# 'requestId': 'Q-8b3dabd7-358e-45ed-a9ba-7be3f5fcf274-0Q-c5c08c6b-7591-424c-8eac-53837fa51e89-1A',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': interval * 1000,
|
||||
# 'maxDataPoints': 10,
|
||||
},
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'editorMode': 'code',
|
||||
'expr': 'rate(synapse_http_matrixfederationclient_requests_total{job=~"(federation-receiver|federation-sender|initialsync|synapse|synchrotron)",index=~".*",instance="10.0.0.34:9000"}[2m])',
|
||||
'range': True,
|
||||
'refId': 'B',
|
||||
'interval': '',
|
||||
# 'key': 'Q-c5c08c6b-7591-424c-8eac-53837fa51e89-1',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'exemplar': False,
|
||||
# 'requestId': 'Q-8b3dabd7-358e-45ed-a9ba-7be3f5fcf274-0Q-c5c08c6b-7591-424c-8eac-53837fa51e89-1B',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': interval * 1000,
|
||||
# 'maxDataPoints': 10,
|
||||
},
|
||||
],
|
||||
'from': f'now-{data_range}m',
|
||||
'to': 'now',
|
||||
}
|
||||
response = requests.post(f'{endpoint}/api/ds/query', headers={'Authorization': f'Bearer {api_key}'}, json=json_data, verify=False).json()
|
||||
output = {}
|
||||
for letter, result in response['results'].items():
|
||||
name = result['frames'][0]['schema']['name'].split('=')[-1].strip('}').strip('"')
|
||||
output[name] = np.round(np.average(result['frames'][0]['data']['values'][1]), 2)
|
||||
return output
|
||||
# return {
|
||||
# 'GET': np.round(np.average(response['results']['A']['frames'][0]['data']['values'][1]), 2),
|
||||
# 'POST': np.round(np.average(response['results']['A']['frames'][1]['data']['values'][1]), 2),
|
||||
# 'PUT': np.round(np.average(response['results']['A']['frames'][2]['data']['values'][1]), 2),
|
||||
# 'fedr_GET': np.round(np.average(response['results']['B']['frames'][0]['data']['values'][1]), 2)
|
||||
# }
|
||||
|
||||
|
||||
def get_event_send_time(api_key, interval, data_range, endpoint):
|
||||
json_data = {
|
||||
'queries': [
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'expr': 'histogram_quantile(0.99, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="10.0.0.34:9000",code=~"2.."}[2m])) by (le))',
|
||||
'format': 'time_series',
|
||||
'intervalFactor': 1,
|
||||
'refId': 'D',
|
||||
'interval': '',
|
||||
# 'key': 'Q-d8eb3572-9aea-4a73-92f2-e08b33c21ecb-0',
|
||||
'editorMode': 'builder',
|
||||
'range': True,
|
||||
'instant': True,
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'exemplar': False,
|
||||
# 'requestId': 'Q-d8eb3572-9aea-4a73-92f2-e08b33c21ecb-0Q-a9222e59-18ff-4b3b-80ae-27bea8f149a9-1Q-0378a458-1ade-410e-a4b3-ae4aaa91d709-2Q-da4c00b6-61c1-49f5-8a0a-9f19990acfb7-3Q-21254889-3cf6-4d97-8dc5-ddf68360847e-4Q-502b8ed5-4050-461c-befc-76f6796dce68-5Q-364dc896-c399-4e58-8930-cba2e3d1d579-6Q-9072e904-da8d-4b00-b454-dac45b7c38f0-7D',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': interval * 1000,
|
||||
# 'maxDataPoints': 1383,
|
||||
},
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'expr': 'histogram_quantile(0.9, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="10.0.0.34:9000",code=~"2.."}[2m])) by (le))',
|
||||
'format': 'time_series',
|
||||
'interval': '',
|
||||
'intervalFactor': 1,
|
||||
'refId': 'A',
|
||||
# 'key': 'Q-a9222e59-18ff-4b3b-80ae-27bea8f149a9-1',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'exemplar': False,
|
||||
# 'requestId': 'Q-d8eb3572-9aea-4a73-92f2-e08b33c21ecb-0Q-a9222e59-18ff-4b3b-80ae-27bea8f149a9-1Q-0378a458-1ade-410e-a4b3-ae4aaa91d709-2Q-da4c00b6-61c1-49f5-8a0a-9f19990acfb7-3Q-21254889-3cf6-4d97-8dc5-ddf68360847e-4Q-502b8ed5-4050-461c-befc-76f6796dce68-5Q-364dc896-c399-4e58-8930-cba2e3d1d579-6Q-9072e904-da8d-4b00-b454-dac45b7c38f0-7A',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': interval * 1000,
|
||||
# 'maxDataPoints': 1383,
|
||||
},
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'expr': 'histogram_quantile(0.75, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="10.0.0.34:9000",code=~"2.."}[2m])) by (le))',
|
||||
'format': 'time_series',
|
||||
'intervalFactor': 1,
|
||||
'refId': 'C',
|
||||
'interval': '',
|
||||
# 'key': 'Q-0378a458-1ade-410e-a4b3-ae4aaa91d709-2',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'exemplar': False,
|
||||
# 'requestId': 'Q-d8eb3572-9aea-4a73-92f2-e08b33c21ecb-0Q-a9222e59-18ff-4b3b-80ae-27bea8f149a9-1Q-0378a458-1ade-410e-a4b3-ae4aaa91d709-2Q-da4c00b6-61c1-49f5-8a0a-9f19990acfb7-3Q-21254889-3cf6-4d97-8dc5-ddf68360847e-4Q-502b8ed5-4050-461c-befc-76f6796dce68-5Q-364dc896-c399-4e58-8930-cba2e3d1d579-6Q-9072e904-da8d-4b00-b454-dac45b7c38f0-7C',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': interval * 1000,
|
||||
# 'maxDataPoints': 1383,
|
||||
},
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'expr': 'histogram_quantile(0.5, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="10.0.0.34:9000",code=~"2.."}[2m])) by (le))',
|
||||
'format': 'time_series',
|
||||
'intervalFactor': 1,
|
||||
'refId': 'B',
|
||||
'interval': '',
|
||||
# 'key': 'Q-da4c00b6-61c1-49f5-8a0a-9f19990acfb7-3',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'exemplar': False,
|
||||
# 'requestId': 'Q-d8eb3572-9aea-4a73-92f2-e08b33c21ecb-0Q-a9222e59-18ff-4b3b-80ae-27bea8f149a9-1Q-0378a458-1ade-410e-a4b3-ae4aaa91d709-2Q-da4c00b6-61c1-49f5-8a0a-9f19990acfb7-3Q-21254889-3cf6-4d97-8dc5-ddf68360847e-4Q-502b8ed5-4050-461c-befc-76f6796dce68-5Q-364dc896-c399-4e58-8930-cba2e3d1d579-6Q-9072e904-da8d-4b00-b454-dac45b7c38f0-7B',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': interval * 1000,
|
||||
# 'maxDataPoints': 1383,
|
||||
},
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'expr': 'histogram_quantile(0.25, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="10.0.0.34:9000",code=~"2.."}[2m])) by (le))',
|
||||
'refId': 'F',
|
||||
'interval': '',
|
||||
# 'key': 'Q-21254889-3cf6-4d97-8dc5-ddf68360847e-4',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'exemplar': False,
|
||||
# 'requestId': 'Q-d8eb3572-9aea-4a73-92f2-e08b33c21ecb-0Q-a9222e59-18ff-4b3b-80ae-27bea8f149a9-1Q-0378a458-1ade-410e-a4b3-ae4aaa91d709-2Q-da4c00b6-61c1-49f5-8a0a-9f19990acfb7-3Q-21254889-3cf6-4d97-8dc5-ddf68360847e-4Q-502b8ed5-4050-461c-befc-76f6796dce68-5Q-364dc896-c399-4e58-8930-cba2e3d1d579-6Q-9072e904-da8d-4b00-b454-dac45b7c38f0-7F',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': interval * 1000,
|
||||
# 'maxDataPoints': 1383,
|
||||
},
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'expr': 'histogram_quantile(0.05, sum(rate(synapse_http_server_response_time_seconds_bucket{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="10.0.0.34:9000",code=~"2.."}[2m])) by (le))',
|
||||
'refId': 'G',
|
||||
'interval': '',
|
||||
# 'key': 'Q-502b8ed5-4050-461c-befc-76f6796dce68-5',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'exemplar': False,
|
||||
# 'requestId': 'Q-d8eb3572-9aea-4a73-92f2-e08b33c21ecb-0Q-a9222e59-18ff-4b3b-80ae-27bea8f149a9-1Q-0378a458-1ade-410e-a4b3-ae4aaa91d709-2Q-da4c00b6-61c1-49f5-8a0a-9f19990acfb7-3Q-21254889-3cf6-4d97-8dc5-ddf68360847e-4Q-502b8ed5-4050-461c-befc-76f6796dce68-5Q-364dc896-c399-4e58-8930-cba2e3d1d579-6Q-9072e904-da8d-4b00-b454-dac45b7c38f0-7G',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': interval * 1000,
|
||||
# 'maxDataPoints': 1383,
|
||||
},
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'expr': 'sum(rate(synapse_http_server_response_time_seconds_sum{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="10.0.0.34:9000",code=~"2.."}[2m])) / sum(rate(synapse_http_server_response_time_seconds_count{servlet=\'RoomSendEventRestServlet\',index=~".*",instance="10.0.0.34:9000",code=~"2.."}[2m]))',
|
||||
'refId': 'H',
|
||||
'interval': '',
|
||||
# 'key': 'Q-364dc896-c399-4e58-8930-cba2e3d1d579-6',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'exemplar': False,
|
||||
# 'requestId': 'Q-d8eb3572-9aea-4a73-92f2-e08b33c21ecb-0Q-a9222e59-18ff-4b3b-80ae-27bea8f149a9-1Q-0378a458-1ade-410e-a4b3-ae4aaa91d709-2Q-da4c00b6-61c1-49f5-8a0a-9f19990acfb7-3Q-21254889-3cf6-4d97-8dc5-ddf68360847e-4Q-502b8ed5-4050-461c-befc-76f6796dce68-5Q-364dc896-c399-4e58-8930-cba2e3d1d579-6Q-9072e904-da8d-4b00-b454-dac45b7c38f0-7H',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': interval * 1000,
|
||||
# 'maxDataPoints': 1383,
|
||||
},
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'expr': 'sum(rate(synapse_storage_events_persisted_events_total{instance="10.0.0.34:9000"}[2m]))',
|
||||
'hide': False,
|
||||
'instant': False,
|
||||
'refId': 'E',
|
||||
'interval': '',
|
||||
# 'key': 'Q-9072e904-da8d-4b00-b454-dac45b7c38f0-7',
|
||||
'editorMode': 'code',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'exemplar': False,
|
||||
# 'requestId': 'Q-d8eb3572-9aea-4a73-92f2-e08b33c21ecb-0Q-a9222e59-18ff-4b3b-80ae-27bea8f149a9-1Q-0378a458-1ade-410e-a4b3-ae4aaa91d709-2Q-da4c00b6-61c1-49f5-8a0a-9f19990acfb7-3Q-21254889-3cf6-4d97-8dc5-ddf68360847e-4Q-502b8ed5-4050-461c-befc-76f6796dce68-5Q-364dc896-c399-4e58-8930-cba2e3d1d579-6Q-9072e904-da8d-4b00-b454-dac45b7c38f0-7E',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': interval * 1000,
|
||||
# 'maxDataPoints': 1383,
|
||||
},
|
||||
],
|
||||
'from': f'now-{data_range}m',
|
||||
'to': 'now',
|
||||
}
|
||||
response = requests.post(f'{endpoint}/api/ds/query', headers={'Authorization': f'Bearer {api_key}'}, json=json_data, verify=False).json()
|
||||
return np.round(np.average(response['results']['E']['frames'][0]['data']['values'][1]), 2)
|
||||
|
||||
|
||||
def get_waiting_for_db(api_key, interval, data_range, endpoint):
|
||||
json_data = {
|
||||
'queries': [
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'expr': 'rate(synapse_storage_schedule_time_sum{instance="10.0.0.34:9000",job=~"(federation-receiver|federation-sender|initialsync|synapse|synchrotron)",index=~".*"}[30s])/rate(synapse_storage_schedule_time_count[30s])',
|
||||
'format': 'time_series',
|
||||
'intervalFactor': 2,
|
||||
'refId': 'A',
|
||||
'step': 20,
|
||||
'interval': '',
|
||||
# 'key': 'Q-459af7f4-0427-4832-9353-46086b3f5c27-0',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'exemplar': False,
|
||||
# 'requestId': 'Q-459af7f4-0427-4832-9353-46086b3f5c27-0A',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': interval * 1000,
|
||||
# 'maxDataPoints': 1383,
|
||||
},
|
||||
],
|
||||
'from': f'now-{data_range}m',
|
||||
'to': 'now',
|
||||
}
|
||||
response = requests.post(f'{endpoint}/api/ds/query', headers={'Authorization': f'Bearer {api_key}'}, json=json_data, verify=False).json()
|
||||
return np.round(np.average(response['results']['A']['frames'][0]['data']['values'][1]), 5)
|
||||
|
||||
|
||||
def get_stateres_worst_case(api_key, interval, data_range, endpoint):
|
||||
"""
|
||||
CPU and DB time spent on most expensive state resolution in a room, summed over all workers.
|
||||
This is a very rough proxy for "how fast is state res", but it doesn't accurately represent the system load (e.g. it completely ignores cheap state resolutions).
|
||||
"""
|
||||
json_data = {
|
||||
'queries': [
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'exemplar': False,
|
||||
'expr': 'sum(rate(synapse_state_res_db_for_biggest_room_seconds_total{instance="10.0.0.34:9000"}[1m]))',
|
||||
'format': 'time_series',
|
||||
'hide': False,
|
||||
'instant': False,
|
||||
'interval': '',
|
||||
'refId': 'B',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': 15000,
|
||||
'maxDataPoints': 1863,
|
||||
},
|
||||
{
|
||||
'datasource': {
|
||||
'type': 'prometheus',
|
||||
'uid': 'AbuT5CJ4z',
|
||||
},
|
||||
'exemplar': False,
|
||||
'expr': 'sum(rate(synapse_state_res_cpu_for_biggest_room_seconds_total{instance="10.0.0.34:9000"}[1m]))',
|
||||
'format': 'time_series',
|
||||
'hide': False,
|
||||
'instant': False,
|
||||
'interval': '',
|
||||
'refId': 'C',
|
||||
'queryType': 'timeSeriesQuery',
|
||||
'utcOffsetSec': -25200,
|
||||
'legendFormat': '',
|
||||
'datasourceId': 1,
|
||||
'intervalMs': 15000,
|
||||
'maxDataPoints': 1863,
|
||||
},
|
||||
],
|
||||
'range': {
|
||||
'from': '2023-02-23T04:36:12.870Z',
|
||||
'to': '2023-02-23T07:36:12.870Z',
|
||||
'raw': {
|
||||
'from': 'now-3h',
|
||||
'to': 'now',
|
||||
},
|
||||
},
|
||||
'from': f'now-{data_range}m',
|
||||
'to': 'now',
|
||||
}
|
||||
response = requests.post(f'{endpoint}/api/ds/query', headers={'Authorization': f'Bearer {api_key}'}, json=json_data, verify=False).json()
|
||||
|
||||
|
||||
# AVerage CPU time per block
|
|
@ -0,0 +1,111 @@
|
|||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import sys
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
import nagios
|
||||
from grafana import get_avg_python_gc_time, get_event_send_time, get_outgoing_http_request_rate, get_waiting_for_db
|
||||
|
||||
parser = argparse.ArgumentParser(description='Process some integers.')
|
||||
parser.add_argument('--grafana-server', required=True, help='Grafana server.')
|
||||
parser.add_argument('--synapse-server', required=True, help='Matrix Synapse server.')
|
||||
parser.add_argument('--grafana-api-key', required=True)
|
||||
parser.add_argument('--interval', default=15, type=int, help='Data interval in seconds.')
|
||||
parser.add_argument('--range', default=2, type=int, help='Data range in minutes. Used for comparison and averaging.')
|
||||
parser.add_argument('--type', required=True, choices=['gc-time', 'response-time', 'outgoing-http-rate', 'avg-send', 'db-lag'])
|
||||
parser.add_argument('--warn', type=float, help='Manually set warn level.')
|
||||
parser.add_argument('--crit', type=float, help='Manually set critical level.')
|
||||
args = parser.parse_args()
|
||||
|
||||
# TODO: add warn suppoort
|
||||
|
||||
if args.type == 'gc-time':
|
||||
# in seconds
|
||||
python_gc_time_sum_MAX = 0.002 if not args.crit else args.crit
|
||||
try:
|
||||
python_gc_time_sum = np.round(np.average(get_avg_python_gc_time(args.grafana_api_key, args.interval, args.range, args.grafana_server)), 5)
|
||||
if python_gc_time_sum >= python_gc_time_sum_MAX:
|
||||
print(f'CRITICAL: average GC time per collection is {python_gc_time_sum} sec.')
|
||||
sys.exit(nagios.CRITICAL)
|
||||
else:
|
||||
print(f'OK: average GC time per collection is {python_gc_time_sum} sec.')
|
||||
sys.exit(nagios.OK)
|
||||
except Exception as e:
|
||||
print(f'UNKNOWN: failed to check avg. GC time "{e}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
elif args.type == 'response-time':
|
||||
response_time_MAX = 1 if not args.crit else args.crit
|
||||
timeout = 10
|
||||
try:
|
||||
response_times = []
|
||||
for i in range(10):
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
response = requests.post(args.synapse_server, timeout=timeout, verify=False)
|
||||
except Exception as e:
|
||||
print(f'UNKNOWN: failed to ping endpoint "{e}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
request_time = time.perf_counter() - start
|
||||
response_times.append(np.round(request_time, 2))
|
||||
time.sleep(1)
|
||||
response_time = np.round(np.average(response_times), 2)
|
||||
if response_time > response_time_MAX:
|
||||
print(f'CRITICAL: response time is {response_time} sec.')
|
||||
sys.exit(nagios.CRITICAL)
|
||||
else:
|
||||
print(f'OK: response time is {response_time} sec.')
|
||||
sys.exit(nagios.OK)
|
||||
except Exception as e:
|
||||
print(f'UNKNOWN: failed to check response time "{e}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
elif args.type == 'outgoing-http-rate':
|
||||
# outgoing req/sec
|
||||
outgoing_http_request_rate_MAX = 10 if not args.crit else args.crit
|
||||
# try:
|
||||
outgoing_http_request_rate = get_outgoing_http_request_rate(args.grafana_api_key, args.interval, args.range, args.grafana_server)
|
||||
failed = {}
|
||||
for k, v in outgoing_http_request_rate.items():
|
||||
if v > outgoing_http_request_rate_MAX:
|
||||
failed[k] = v
|
||||
if len(failed.keys()) > 0:
|
||||
print(f'CRITICAL: outgoing HTTP request rate for {failed} req/sec.')
|
||||
sys.exit(nagios.CRITICAL)
|
||||
print(f'OK: outgoing HTTP request rate is {outgoing_http_request_rate} req/sec.')
|
||||
sys.exit(nagios.OK)
|
||||
# except Exception as e:
|
||||
# print(f'UNKNOWN: failed to check outgoing HTTP request rate "{e}"')
|
||||
# sys.exit(nagios.UNKNOWN)
|
||||
elif args.type == 'avg-send':
|
||||
# Average send time in seconds
|
||||
event_send_time_MAX = 1 if not args.crit else args.crit
|
||||
try:
|
||||
event_send_time = get_event_send_time(args.grafana_api_key, args.interval, args.range, args.grafana_server)
|
||||
if event_send_time > event_send_time_MAX:
|
||||
print(f'CRITICAL: average message send time is {event_send_time} sec.')
|
||||
sys.exit(nagios.CRITICAL)
|
||||
else:
|
||||
print(f'OK: average message send time is {event_send_time} sec.')
|
||||
sys.exit(nagios.OK)
|
||||
except Exception as e:
|
||||
print(f'UNKNOWN: failed to check average message send time "{e}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
elif args.type == 'db-lag':
|
||||
# in seconds
|
||||
db_lag_MAX = 0.01 if not args.crit else args.crit
|
||||
try:
|
||||
db_lag = get_waiting_for_db(args.grafana_api_key, args.interval, args.range, args.grafana_server)
|
||||
if db_lag > db_lag_MAX:
|
||||
print(f'CRITICAL: DB lag is {db_lag} sec.')
|
||||
sys.exit(nagios.CRITICAL)
|
||||
else:
|
||||
print(f'OK: DB lag is {db_lag} sec.')
|
||||
sys.exit(nagios.OK)
|
||||
except Exception as e:
|
||||
print(f'UNKNOWN: failed to check DB lag "{e}"')
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
else:
|
||||
print('Wrong type')
|
||||
sys.exit(nagios.UNKNOWN)
|
|
@ -0,0 +1,4 @@
|
|||
UNKNOWN = -1
|
||||
OK = 0
|
||||
WARNING = 1
|
||||
CRITICAL = 2
|
|
@ -0,0 +1,12 @@
|
|||
from prometheus_client.parser import text_string_to_metric_families
|
||||
|
||||
|
||||
def parse_metrics(families):
|
||||
output = {}
|
||||
for family in text_string_to_metric_families(families):
|
||||
output[family.name] = {}
|
||||
for sample in family.samples:
|
||||
if sample.name not in output[family.name].keys():
|
||||
output[family.name][sample.name] = []
|
||||
output[family.name][sample.name].append(sample)
|
||||
return output
|
|
@ -0,0 +1,9 @@
|
|||
prometheus_client
|
||||
requests
|
||||
numpy
|
||||
nagiosplugin
|
||||
matrix-nio
|
||||
Pillow
|
||||
python-magic
|
||||
numpy
|
||||
beautifulsoup4
|
|
@ -0,0 +1,110 @@
|
|||
import sys
|
||||
|
||||
import requests
|
||||
|
||||
import nagios
|
||||
|
||||
|
||||
def handle_err(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
crit, ret = func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
print(f"UNKNOWN: exception '{e}'")
|
||||
sys.exit(nagios.UNKNOWN)
|
||||
if crit:
|
||||
print(f"CRITICAL: {crit}")
|
||||
sys.exit(nagios.CRITICAL)
|
||||
else:
|
||||
return ret
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@handle_err
|
||||
def login(user_id: str, passwd: str, homeserver: str):
|
||||
data = {'type': 'm.login.password', 'user': user_id, 'password': passwd}
|
||||
r = requests.post(f'{homeserver}/_matrix/client/r0/login', json=data)
|
||||
if r.status_code != 200:
|
||||
return f'Bad status code on login for {user_id}: {r.status_code}\nBody: {r.text}', None
|
||||
return None, r.json()
|
||||
|
||||
|
||||
@handle_err
|
||||
def create_room(room_name, homeserver, auth_token):
|
||||
"""
|
||||
Creates an unencrypted room.
|
||||
"""
|
||||
data = {"name": room_name, "preset": "private_chat", "visibility": "private", # "initial_state": [{"type": "m.room.guest_access", "state_key": "", "content": {"guest_access": "can_join"}}]
|
||||
}
|
||||
r = requests.post(f'{homeserver}/_matrix/client/r0/createRoom?access_token={auth_token}', json=data)
|
||||
if r.status_code != 200:
|
||||
return Exception(f'Bad status code on create room for {room_name}: {r.status_code}\nBody: {r.text}'), None
|
||||
return None, r.json()
|
||||
|
||||
|
||||
@handle_err
|
||||
def send_invite(room_id, target_user_id, homeserver, auth_token):
|
||||
r = requests.post(f'{homeserver}/_matrix/client/r0/rooms/{room_id}/invite?access_token={auth_token}', json={'user_id': target_user_id})
|
||||
if r.status_code != 200:
|
||||
return Exception(f'Bad status code on send invite for {room_id}: {r.status_code}\nBody: {r.text}'), None
|
||||
return None, r.json()
|
||||
|
||||
|
||||
@handle_err
|
||||
def join_room(room_id, homeserver, auth_token):
|
||||
r = requests.post(f'{homeserver}/_matrix/client/r0/join/{room_id}?access_token={auth_token}', data='{}')
|
||||
if r.status_code != 200:
|
||||
return Exception(f'Bad status code on join room for {room_id}: {r.status_code}\nBody: {r.text}'), None
|
||||
return None, r.json()
|
||||
|
||||
|
||||
@handle_err
|
||||
def join_room_invite(room_id, homeserver, auth_token):
|
||||
r = requests.post(f'{homeserver}/_matrix/client/r0/rooms/{room_id}/join?access_token={auth_token}', data='{}')
|
||||
if r.status_code != 200:
|
||||
return Exception(f'Bad status code on join room via invite for {room_id}: {r.status_code}\nBody: {r.text}'), None
|
||||
return None, r.json()
|
||||
|
||||
|
||||
@handle_err
|
||||
def send_msg(message, room_id, homeserver, auth_token):
|
||||
r = requests.post(f'{homeserver}/_matrix/client/r0/rooms/{room_id}/send/m.room.message?access_token={auth_token}', json={'msgtype': 'm.text', 'body': message})
|
||||
if r.status_code != 200:
|
||||
return Exception(f'Bad status code on send message for {room_id}: {r.status_code}\nBody: {r.text}'), None
|
||||
return None, r.json()
|
||||
|
||||
|
||||
# errors will be handled in the other script
|
||||
def get_event(event_id, room_id, homeserver, auth_token):
|
||||
return requests.get(f'{homeserver}/_matrix/client/v3/rooms/{room_id}/event/{event_id}?access_token={auth_token}')
|
||||
|
||||
|
||||
@handle_err
|
||||
def get_state(homeserver, auth_token, since=None):
|
||||
if since:
|
||||
url = f'{homeserver}/_matrix/client/r0/sync?since{since}&access_token={auth_token}'
|
||||
else:
|
||||
url = f'{homeserver}/_matrix/client/r0/sync?access_token={auth_token}'
|
||||
r = requests.get(url)
|
||||
if r.status_code != 200:
|
||||
return Exception(f'Bad status code on sync: {r.status_code}\nBody: {r.text}'), None
|
||||
return None, r.json()
|
||||
|
||||
|
||||
@handle_err
|
||||
def forget_room(room_id, homeserver, auth_token):
|
||||
r = requests.post(f'{homeserver}/_matrix/client/r0/rooms/{room_id}/forget?access_token={auth_token}', data='{}')
|
||||
if r.status_code != 200:
|
||||
return Exception(f'Bad status code on leave room for {room_id}: {r.status_code}\nBody: {r.text}'), None
|
||||
return None, r.json()
|
||||
|
||||
|
||||
@handle_err
|
||||
def leave_room(room_id, homeserver, auth_token, forget=False):
|
||||
r = requests.post(f'{homeserver}/_matrix/client/r0/rooms/{room_id}/leave?access_token={auth_token}', data='{}')
|
||||
if r.status_code != 200:
|
||||
return Exception(f'Bad status code on leave room for {room_id}: {r.status_code}\nBody: {r.text}'), None
|
||||
if forget:
|
||||
f = forget_room(room_id, homeserver, auth_token)
|
||||
return None, r.json()
|
|
@ -0,0 +1,4 @@
|
|||
python3 Matrix\ Synapse/check_monitor_bot.py \
|
||||
--metrics-endpoint "https://matrix.your-hs.com/matrix-monitor-bot/" \
|
||||
--domain your-hs.com \
|
||||
--ignore canarymod.net catgirl.cloud
|
|
@ -0,0 +1,211 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# pip install redis
|
||||
|
||||
import redis
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
EXIT_OK = 0
|
||||
EXIT_WARNING = 1
|
||||
EXIT_CRITICAL = 2
|
||||
EXIT_UNKNONW = 3
|
||||
EXIT_INVALID_AUTH = 3
|
||||
|
||||
|
||||
class MonitoringPluginRedis(object):
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
"""
|
||||
cli_args = self.parse_args()
|
||||
|
||||
self.host = cli_args.host
|
||||
self.port = cli_args.port
|
||||
self.password = cli_args.password
|
||||
self.dbname = cli_args.dbname
|
||||
self.timeout = cli_args.timeout
|
||||
self.key = cli_args.key_value
|
||||
self.warning = cli_args.warning
|
||||
self.critical = cli_args.critical
|
||||
|
||||
try:
|
||||
self.conn = redis.Redis(
|
||||
host=self.host,
|
||||
port=self.port,
|
||||
password=self.password,
|
||||
socket_timeout=self.timeout
|
||||
)
|
||||
self.info_out = self.conn.info()
|
||||
self.conn.ping()
|
||||
|
||||
except Exception as e:
|
||||
print(f"CRITICAL REDIS : {e}")
|
||||
sys.exit(2)
|
||||
|
||||
def parse_args(self):
|
||||
"""
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="monitoring plugin for redis-server, version: 1.0"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-H", "--host",
|
||||
dest="host",
|
||||
help="Redis server to connect to. (default is 127.0.0.1)",
|
||||
default="127.0.0.1"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p", "--port",
|
||||
dest="port",
|
||||
help="Redis port to connect to. (default is 6379)",
|
||||
type=int,
|
||||
default=6379
|
||||
)
|
||||
parser.add_argument(
|
||||
"-P", "--password",
|
||||
dest="password",
|
||||
help="Redis password to connect to.",
|
||||
default=''
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d", "--dbname",
|
||||
dest="dbname",
|
||||
help="Redis database name (default is db0)",
|
||||
default='db0'
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t", "--timeout",
|
||||
dest="timeout",
|
||||
help="Number of seconds to wait before timing out and considering redis down",
|
||||
type=int,
|
||||
default=2
|
||||
)
|
||||
parser.add_argument(
|
||||
"-w", "--warning",
|
||||
dest="warning",
|
||||
type=int,
|
||||
help="Warning threshold."
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c", "--critical",
|
||||
dest="critical",
|
||||
type=int,
|
||||
help="Critical threshold."
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k", "--key",
|
||||
dest="key_value",
|
||||
help="Stat to monitor (memory_mb, hit_ratio, or custom)",
|
||||
default=None
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
def get_version(self):
|
||||
|
||||
return f"version: {self.info_out.get('redis_version')}"
|
||||
|
||||
def get_client_connection(self):
|
||||
|
||||
return f"connected_clients: {self.info_out.get('connected_clients')}"
|
||||
|
||||
def get_number_keys(self):
|
||||
|
||||
return f"{self.dbname}: {self.info_out.get(self.dbname).get('keys')}"
|
||||
|
||||
def get_uptime(self):
|
||||
|
||||
return f"uptime_in_days: {self.info_out.get('uptime_in_days')}"
|
||||
|
||||
def get_used_memory(self):
|
||||
|
||||
return f"used_memory_human: {self.info_out.get('used_memory_human')}"
|
||||
|
||||
def check(self):
|
||||
"""
|
||||
"""
|
||||
number_keys = ''
|
||||
version = self.get_version()
|
||||
client_connected = self.get_client_connection()
|
||||
reverse_check = False
|
||||
exit_string = "OK"
|
||||
|
||||
if self.dbname in str(self.info_out):
|
||||
number_keys = self.get_number_keys()
|
||||
|
||||
memory = self.get_used_memory()
|
||||
uptime = self.get_uptime()
|
||||
|
||||
# print(self.info_out)
|
||||
|
||||
if self.key:
|
||||
if not self.warning or not self.critical:
|
||||
exit_string = "UNKNOWN"
|
||||
|
||||
if not self.warning:
|
||||
status = "UNKNOWN: Warning level required"
|
||||
if not self.critical:
|
||||
status = "UNKNONW: Critical level required"
|
||||
|
||||
print(status)
|
||||
sys.exit(EXIT_UNKNONW)
|
||||
|
||||
if self.key == "memory_mb":
|
||||
reverse_check = True
|
||||
info_value = int(
|
||||
self.info_out.get("used_memory_rss") or self.info_out.get("used_memory")
|
||||
) / (1024 * 1024)
|
||||
elif self.key == "hit_ratio":
|
||||
reverse_check = False
|
||||
hit = int(self.info_out.get("keyspace_hits"))
|
||||
miss = int(self.info_out.get("keyspace_misses"))
|
||||
|
||||
if hit > 0 and miss > 0:
|
||||
info_value = int(100 * hit) / (hit + miss)
|
||||
else:
|
||||
info_value = 0
|
||||
else:
|
||||
info_value = int(self.info_out.get(self.key))
|
||||
|
||||
if reverse_check:
|
||||
if int(info_value) < int(self.critical):
|
||||
exit_string = "CRITICAL"
|
||||
elif int(info_value) < int(self.warning):
|
||||
exit_string = "WARNING"
|
||||
else:
|
||||
if int(info_value) > int(self.critical):
|
||||
exit_string = "CRITICAL"
|
||||
elif int(info_value) > int(self.warning):
|
||||
exit_string = "WARNING"
|
||||
|
||||
status = f"{exit_string}: Redis {self.key} is {info_value}"
|
||||
perfdata = f"{self.key}={info_value};{self.warning};{self.critical};0;{info_value}"
|
||||
|
||||
print(f"{status} || {perfdata}")
|
||||
|
||||
else:
|
||||
|
||||
if number_keys == '':
|
||||
status = f"OK REDIS No keys, {version}, {memory}, {uptime}"
|
||||
else:
|
||||
status = f"OK REDIS {version}, {client_connected}, {number_keys}, {memory}, {uptime}"
|
||||
|
||||
print(status)
|
||||
|
||||
if exit_string == "OK":
|
||||
sys.exit(EXIT_OK)
|
||||
if exit_string == "WARNING":
|
||||
sys.exit(EXIT_WARNING)
|
||||
if exit_string == "UNKNONW":
|
||||
sys.exit(EXIT_UNKNONW)
|
||||
else:
|
||||
sys.exit(EXIT_CRITICAL)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
"""
|
||||
server = MonitoringPluginRedis()
|
||||
server.check()
|
|
@ -0,0 +1,9 @@
|
|||
python3 Matrix\ Synapse/check_federation.py \
|
||||
--bot1-user '@bot1:your-hs.com' \
|
||||
--bot1-pw password1234 \
|
||||
--bot1-hs https://matrix.your-hs.com \
|
||||
--bot1-auth-file /opt/custom-nagios-checks/auth-fed-test-bot1.json \
|
||||
--bot2-user '@bot2:matrix.org' \
|
||||
--bot2-pw password1234 \
|
||||
--bot2-hs https://matrix-federation.matrix.org \
|
||||
--bot2-auth-file /opt/custom-nagios-checks/auth-fed-test-bot2.json
|
|
@ -0,0 +1,8 @@
|
|||
python3 Matrix\ Synapse/check_media_cdn.py \
|
||||
--user '@bot1:your-hs.com' \
|
||||
--pw password1234 \
|
||||
--hs https://matrix.your-hs.com \
|
||||
--room '!banana:your-hs.com' \
|
||||
--auth-file ./auth-cdn.json \
|
||||
--media-cdn-domain matrix-media-cdn.your-hs.com \
|
||||
--admin-endpoint https://172.0.2.118
|
|
@ -0,0 +1,85 @@
|
|||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from flask import Flask, Response, request
|
||||
from icinga2api.client import Client
|
||||
|
||||
client = Client('https://localhost:8080', 'icingaweb2', 'password1234')
|
||||
|
||||
OK = 0
|
||||
WARNING = 1
|
||||
CRITICAL = 2
|
||||
UNKNOWN = 3
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
def return_json(json_dict, start_response, status_code=200):
|
||||
headers = [('Content-Type', 'application/json')]
|
||||
start_response(str(status_code), headers)
|
||||
return iter([json.dumps(json_dict).encode('utf-8')])
|
||||
|
||||
|
||||
@app.route('/host')
|
||||
@app.route('/host/')
|
||||
@app.route("/host/<hostid>")
|
||||
def get_host_state(hostid=None):
|
||||
path = Path(request.base_url)
|
||||
args_service = request.args.getlist('service')
|
||||
kuma_mode = True if request.args.get('kuma') == 'true' else False
|
||||
|
||||
if not hostid:
|
||||
return Response(json.dumps({'error': 'must specify host'}), status=406, mimetype='application/json')
|
||||
|
||||
result = {
|
||||
'host': {},
|
||||
'services': {},
|
||||
'failed_services': []
|
||||
}
|
||||
|
||||
host_status = client.objects.list('Host', filters='match(hpattern, host.name)', filter_vars={'hpattern': hostid})
|
||||
if not len(host_status):
|
||||
return Response(json.dumps({'error': 'could not find host'}), status=404, mimetype='application/json')
|
||||
else:
|
||||
host_status = host_status[0]
|
||||
|
||||
result['host'] = {
|
||||
'name': host_status['name'],
|
||||
'state': 0 if (host_status['attrs']['acknowledgement'] or host_status['attrs']['acknowledgement_expiry']) else host_status['attrs']['state'],
|
||||
'actual_state': host_status['attrs']['state'],
|
||||
'attrs': {
|
||||
**host_status['attrs']
|
||||
}
|
||||
}
|
||||
|
||||
services_status = client.objects.list('Service', filters='match(hpattern, host.name)', filter_vars={'hpattern': hostid})
|
||||
for attrs in services_status:
|
||||
name = attrs['name'].split('!')[1]
|
||||
result['services'][name] = {
|
||||
'state': 0 if (attrs['attrs']['acknowledgement'] or attrs['attrs']['acknowledgement_expiry']) else attrs['attrs']['state'],
|
||||
'actual_state': attrs['attrs']['state'],
|
||||
'attrs': {
|
||||
**attrs
|
||||
}
|
||||
}
|
||||
|
||||
if len(args_service):
|
||||
services = {}
|
||||
for service in args_service:
|
||||
if service in result['services'].keys():
|
||||
services[service] = result['services'][service]
|
||||
else:
|
||||
return Response(json.dumps({'error': 'service not found', 'service': service}), status=400, mimetype='application/json')
|
||||
result['services'] = services
|
||||
|
||||
if kuma_mode:
|
||||
for name, service in result['services'].items():
|
||||
if service['state'] != OK:
|
||||
result['failed_services'].append({'name': name, 'state': service['state']})
|
||||
if result['host']['state'] != OK:
|
||||
result['failed_services'].append({'name': hostid, 'state': result['host']['state']})
|
||||
|
||||
if len(result['failed_services']):
|
||||
return Response(json.dumps(result), status=410, mimetype='application/json')
|
||||
else:
|
||||
return result
|
Loading…
Reference in New Issue