reorganize, fix bugs

This commit is contained in:
Cyberes 2023-04-21 23:54:16 -06:00
parent 39042ba364
commit 74a4849cd8
25 changed files with 735 additions and 269 deletions

3
.gitignore vendored
View File

@ -1,3 +1,5 @@
.idea
# ---> Python
# Byte-compiled / optimized / DLL files
__pycache__/
@ -137,4 +139,3 @@ dmypy.json
# Cython debug symbols
cython_debug/

View File

@ -1,9 +0,0 @@
prometheus_client
requests
numpy
nagiosplugin
matrix-nio
Pillow
python-magic
numpy
beautifulsoup4

View File

@ -1,110 +0,0 @@
import sys
import requests
import nagios
def handle_err(func):
def wrapper(*args, **kwargs):
try:
crit, ret = func(*args, **kwargs)
except Exception as e:
print(f"UNKNOWN: exception '{e}'")
sys.exit(nagios.UNKNOWN)
if crit:
print(f"CRITICAL: {crit}")
sys.exit(nagios.CRITICAL)
else:
return ret
return wrapper
@handle_err
def login(user_id: str, passwd: str, homeserver: str):
data = {'type': 'm.login.password', 'user': user_id, 'password': passwd}
r = requests.post(f'{homeserver}/_matrix/client/r0/login', json=data)
if r.status_code != 200:
return f'Bad status code on login for {user_id}: {r.status_code}\nBody: {r.text}', None
return None, r.json()
@handle_err
def create_room(room_name, homeserver, auth_token):
"""
Creates an unencrypted room.
"""
data = {"name": room_name, "preset": "private_chat", "visibility": "private", # "initial_state": [{"type": "m.room.guest_access", "state_key": "", "content": {"guest_access": "can_join"}}]
}
r = requests.post(f'{homeserver}/_matrix/client/r0/createRoom?access_token={auth_token}', json=data)
if r.status_code != 200:
return Exception(f'Bad status code on create room for {room_name}: {r.status_code}\nBody: {r.text}'), None
return None, r.json()
@handle_err
def send_invite(room_id, target_user_id, homeserver, auth_token):
r = requests.post(f'{homeserver}/_matrix/client/r0/rooms/{room_id}/invite?access_token={auth_token}', json={'user_id': target_user_id})
if r.status_code != 200:
return Exception(f'Bad status code on send invite for {room_id}: {r.status_code}\nBody: {r.text}'), None
return None, r.json()
@handle_err
def join_room(room_id, homeserver, auth_token):
r = requests.post(f'{homeserver}/_matrix/client/r0/join/{room_id}?access_token={auth_token}', data='{}')
if r.status_code != 200:
return Exception(f'Bad status code on join room for {room_id}: {r.status_code}\nBody: {r.text}'), None
return None, r.json()
@handle_err
def join_room_invite(room_id, homeserver, auth_token):
r = requests.post(f'{homeserver}/_matrix/client/r0/rooms/{room_id}/join?access_token={auth_token}', data='{}')
if r.status_code != 200:
return Exception(f'Bad status code on join room via invite for {room_id}: {r.status_code}\nBody: {r.text}'), None
return None, r.json()
@handle_err
def send_msg(message, room_id, homeserver, auth_token):
r = requests.post(f'{homeserver}/_matrix/client/r0/rooms/{room_id}/send/m.room.message?access_token={auth_token}', json={'msgtype': 'm.text', 'body': message})
if r.status_code != 200:
return Exception(f'Bad status code on send message for {room_id}: {r.status_code}\nBody: {r.text}'), None
return None, r.json()
# errors will be handled in the other script
def get_event(event_id, room_id, homeserver, auth_token):
return requests.get(f'{homeserver}/_matrix/client/v3/rooms/{room_id}/event/{event_id}?access_token={auth_token}')
@handle_err
def get_state(homeserver, auth_token, since=None):
if since:
url = f'{homeserver}/_matrix/client/r0/sync?since{since}&access_token={auth_token}'
else:
url = f'{homeserver}/_matrix/client/r0/sync?access_token={auth_token}'
r = requests.get(url)
if r.status_code != 200:
return Exception(f'Bad status code on sync: {r.status_code}\nBody: {r.text}'), None
return None, r.json()
@handle_err
def forget_room(room_id, homeserver, auth_token):
r = requests.post(f'{homeserver}/_matrix/client/r0/rooms/{room_id}/forget?access_token={auth_token}', data='{}')
if r.status_code != 200:
return Exception(f'Bad status code on leave room for {room_id}: {r.status_code}\nBody: {r.text}'), None
return None, r.json()
@handle_err
def leave_room(room_id, homeserver, auth_token, forget=False):
r = requests.post(f'{homeserver}/_matrix/client/r0/rooms/{room_id}/leave?access_token={auth_token}', data='{}')
if r.status_code != 200:
return Exception(f'Bad status code on leave room for {room_id}: {r.status_code}\nBody: {r.text}'), None
if forget:
f = forget_room(room_id, homeserver, auth_token)
return None, r.json()

View File

View File

@ -1,25 +1,23 @@
import argparse
import json
from pathlib import Path
from flask import Flask, Response, request
from icinga2api.client import Client
client = Client('https://localhost:8080', 'icingaweb2', 'password1234')
from checker import nagios
OK = 0
WARNING = 1
CRITICAL = 2
UNKNOWN = 3
parser = argparse.ArgumentParser(description='')
parser.add_argument('--endpoint', default='https://localhost:8080', help='Icinga2 URL for the API. Defaults to "https://localhost:8080"')
parser.add_argument('--user', default='icingaweb2', help='API username. Defaults to "icingaweb2"')
parser.add_argument('--pw', required=True, help='API password.')
args = parser.parse_args()
client = Client(args.endpoint, args.user, args.pw)
app = Flask(__name__)
def return_json(json_dict, start_response, status_code=200):
headers = [('Content-Type', 'application/json')]
start_response(str(status_code), headers)
return iter([json.dumps(json_dict).encode('utf-8')])
@app.route('/host')
@app.route('/host/')
@app.route("/host/<hostid>")
@ -74,9 +72,9 @@ def get_host_state(hostid=None):
if kuma_mode:
for name, service in result['services'].items():
if service['state'] != OK:
if service['state'] != nagios.OK:
result['failed_services'].append({'name': name, 'state': service['state']})
if result['host']['state'] != OK:
if result['host']['state'] != nagios.OK:
result['failed_services'].append({'name': hostid, 'state': result['host']['state']})
if len(result['failed_services']):

View File

@ -0,0 +1,158 @@
#!/bin/bash
# Based on original E-Mail Icinga2 notification
PROG="$(basename $0)"
ICINGA2HOST="$(hostname)"
CURLBIN="curl"
MX_TXN="$(date "+%s")$((RANDOM % 9999))"
if [ -z "$(which $CURLBIN)" ]; then
echo "$CURLBIN not found in \$PATH. Consider installing it."
exit 1
fi
warn_ico="⚠"
error_ico="❌"
ok_ico="🆗"
question_ico="❓"
#Set the message icon based on service state
# For nagios, replace ICINGA_ with NAGIOS_ to get the environment variables from Nagios
## Function helpers
Usage() {
cat <<EOF
Required parameters:
-d LONGDATETIME (\$icinga.long_date_time\$)
-l HOSTNAME (\$host.name\$)
-n HOSTDISPLAYNAME (\$host.display_name\$)
-o HOSTOUTPUT (\$host.output\$)
-s HOSTSTATE (\$host.state\$)
-t NOTIFICATIONTYPE (\$notification.type\$)
-m MATRIXROOM (\$notification_matrix_room_id\$)
-x MATRIXSERVER (\$notification_matrix_server\$)
-y MATRIXTOKEN (\$notification_matrix_token\$)
Optional parameters:
-4 HOSTADDRESS (\$address\$)
-6 HOSTADDRESS6 (\$address6\$)
-b NOTIFICATIONAUTHORNAME (\$notification.author\$)
-c NOTIFICATIONCOMMENT (\$notification.comment\$)
-i ICINGAWEB2URL (\$notification_icingaweb2url\$, Default: unset)
EOF
}
Help() {
Usage
exit 1
}
Error() {
if [ "$1" ]; then
echo $1
fi
Usage
exit 1
}
## Main
while getopts 4:6::b:c:d:hi:l:n:o:s:t:m:x:y: opt; do
case "$opt" in
4) HOSTADDRESS=$OPTARG ;;
6) HOSTADDRESS6=$OPTARG ;;
b) NOTIFICATIONAUTHORNAME=$OPTARG ;;
c) NOTIFICATIONCOMMENT=$OPTARG ;;
d) LONGDATETIME=$OPTARG ;; # required
h) Help ;;
i) ICINGAWEB2URL=$OPTARG ;;
l) HOSTNAME=$OPTARG ;; # required
n) HOSTDISPLAYNAME=$OPTARG ;; # required
o) HOSTOUTPUT=$OPTARG ;; # required
s) HOSTSTATE=$OPTARG ;; # required
t) NOTIFICATIONTYPE=$OPTARG ;; # required
m) MATRIXROOM=$OPTARG ;; # required
x) MATRIXSERVER=$OPTARG ;; # required
y) MATRIXTOKEN=$OPTARG ;; # required
\?)
echo "ERROR: Invalid option -$OPTARG" >&2
Error
;;
:)
echo "Missing option argument for -$OPTARG" >&2
Error
;;
*)
echo "Unimplemented option: -$OPTARG" >&2
Error
;;
esac
done
shift $((OPTIND - 1))
## Check required parameters (TODO: better error message)
if [ ! "$LONGDATETIME" ] ||
[ ! "$HOSTNAME" ] || [ ! "$HOSTDISPLAYNAME" ] ||
[ ! "$HOSTOUTPUT" ] || [ ! "$HOSTSTATE" ] ||
[ ! "$NOTIFICATIONTYPE" ]; then
Error "Requirement parameters are missing."
fi
## Build the notification message
if [ "$HOSTSTATE" = "UP" ]; then
ICON=$ok_ico
elif [ "$HOSTSTATE" = "DOWN" ]; then
ICON=$error_ico
fi
if [ "$HOSTSTATE" = "UNKNOWN" ]; then
ICON=$question_ico
elif [ "$ICINGA_SERVICESTATE" = "OK" ]; then
ICON=$ok_ico
elif [ "$ICINGA_SERVICESTATE" = "WARNING" ]; then
ICON=$warn_ico
elif [ "$ICINGA_SERVICESTATE" = "CRITICAL" ]; then
ICON=$error_ico
fi
NOTIFICATION_MESSAGE=$(
cat <<EOF
$ICON <b>HOST:</b> $HOSTDISPLAYNAME is <b>$HOSTSTATE!</b> <br/>
<b>When:</b> $LONGDATETIME<br/>
<b>Info:</b> $HOSTOUTPUT<br/>
EOF
)
## Check whether IPv4 was specified.
if [ -n "$HOSTADDRESS" ]; then
NOTIFICATION_MESSAGE="$NOTIFICATION_MESSAGE <b>IPv4:</b> $HOSTADDRESS <br/>"
fi
## Check whether IPv6 was specified.
if [ -n "$HOSTADDRESS6" ]; then
NOTIFICATION_MESSAGE="$NOTIFICATION_MESSAGE <b>IPv6:</b> $HOSTADDRESS6 <br/>"
fi
## Check whether author and comment was specified.
if [ -n "$NOTIFICATIONCOMMENT" ]; then
NOTIFICATION_MESSAGE="$NOTIFICATION_MESSAGE Comment by <b>$NOTIFICATIONAUTHORNAME:</b> $NOTIFICATIONCOMMENT <br/>"
fi
## Check whether Icinga Web 2 URL was specified.
if [ -n "$ICINGAWEB2URL" ]; then
NOTIFICATION_MESSAGE="$NOTIFICATION_MESSAGE $ICINGAWEB2URL/monitoring/host/show?host=$HOSTNAME <br/>"
fi
while read line; do
message="${message}\n${line}"
done <<<$NOTIFICATION_MESSAGE
BODY="${message}"
/usr/bin/printf "%b" "$NOTIFICATION_MESSAGE" | $CURLBIN -k -X PUT --header 'Content-Type: application/json' --header 'Accept: application/json' -d "{
\"msgtype\": \"m.text\",
\"body\": \"$BODY\",
\"formatted_body\": \"$BODY\",
\"format\": \"org.matrix.custom.html\"
}" "$MATRIXSERVER/_matrix/client/r0/rooms/$MATRIXROOM/send/m.room.message/$MX_TXN?access_token=$MATRIXTOKEN"

View File

@ -0,0 +1,165 @@
#!/bin/bash
# Based on original E-Mail Icinga2 notification
PROG="$(basename $0)"
ICINGA2HOST="$(hostname)"
CURLBIN="curl"
MX_TXN="$(date "+%s")$((RANDOM % 9999))"
if [ -z "$(which $CURLBIN)" ]; then
echo "$CURLBIN not found in \$PATH. Consider installing it."
exit 1
fi
warn_ico="⚠"
error_ico="❌"
ok_ico="🆗"
question_ico="❓"
#Set the message icon based on service state
## Function helpers
Usage() {
cat <<EOF
Required parameters:
-d LONGDATETIME (\$icinga.long_date_time\$)
-e SERVICENAME (\$service.name\$)
-l HOSTNAME (\$host.name\$)
-n HOSTDISPLAYNAME (\$host.display_name\$)
-o SERVICEOUTPUT (\$service.output\$)
-s SERVICESTATE (\$service.state\$)
-t NOTIFICATIONTYPE (\$notification.type\$)
-u SERVICEDISPLAYNAME (\$service.display_name\$)
-m MATRIXROOM (\$notification_matrix_room_id\$)
-x MATRIXSERVER (\$notification_matrix_server\$)
-y MATRIXTOKEN (\$notification_matrix_token\$)
Optional parameters:
-4 HOSTADDRESS (\$address\$)
-6 HOSTADDRESS6 (\$address6\$)
-b NOTIFICATIONAUTHORNAME (\$notification.author\$)
-c NOTIFICATIONCOMMENT (\$notification.comment\$)
-i ICINGAWEB2URL (\$notification_icingaweb2url\$, Default: unset)
EOF
}
Help() {
Usage
exit 1
}
Error() {
if [ "$1" ]; then
echo $1
fi
Usage
exit 1
}
## Main
while getopts 4:6:b:c:d:e:hi:l:n:o:s:t:u:m:x:y: opt; do
case "$opt" in
4) HOSTADDRESS=$OPTARG ;;
6) HOSTADDRESS6=$OPTARG ;;
b) NOTIFICATIONAUTHORNAME=$OPTARG ;;
c) NOTIFICATIONCOMMENT=$OPTARG ;;
d) LONGDATETIME=$OPTARG ;; # required
e) SERVICENAME=$OPTARG ;; # required
h) Usage ;;
i) ICINGAWEB2URL=$OPTARG ;;
l) HOSTNAME=$OPTARG ;; # required
n) HOSTDISPLAYNAME=$OPTARG ;; # required
o) SERVICEOUTPUT=$OPTARG ;; # required
s) SERVICESTATE=$OPTARG ;; # required
t) NOTIFICATIONTYPE=$OPTARG ;; # required
u) SERVICEDISPLAYNAME=$OPTARG ;; # required
m) MATRIXROOM=$OPTARG ;; # required
x) MATRIXSERVER=$OPTARG ;; # required
y) MATRIXTOKEN=$OPTARG ;; # required
\?)
echo "ERROR: Invalid option -$OPTARG" >&2
Usage
;;
:)
echo "Missing option argument for -$OPTARG" >&2
Usage
;;
*)
echo "Unimplemented option: -$OPTARG" >&2
Usage
;;
esac
done
shift $((OPTIND - 1))
echo "$LONGDATETIME $HOSTNAME $HOSTDISPLAYNAME $SERVICENAME $SERVICEDISPLAYNAME $SERVICEOUTPUT $SERVICESTATE $NOTIFICATIONTYPE"
## Check required parameters (TODO: better error message)
if [ ! "$LONGDATETIME" ] ||
[ ! "$HOSTNAME" ] || [ ! "$HOSTDISPLAYNAME" ] ||
[ ! "$SERVICENAME" ] || [ ! "$SERVICEDISPLAYNAME" ] ||
[ ! "$SERVICEOUTPUT" ] || [ ! "$SERVICESTATE" ] ||
[ ! "$NOTIFICATIONTYPE" ]; then
Error "Requirement parameters are missing."
fi
## Build the notification message
if [ "$HOSTSTATE" = "UP" ]; then
ICON=$ok_ico
elif [ "$HOSTSTATE" = "DOWN" ]; then
ICON=$error_ico
fi
if [ "$SERVICESTATE" = "UNKNOWN" ]; then
ICON=$question_ico
elif [ "$SERVICESTATE" = "OK" ]; then
ICON=$ok_ico
elif [ "$SERVICESTATE" = "WARNING" ]; then
ICON=$warn_ico
elif [ "$SERVICESTATE" = "CRITICAL" ]; then
ICON=$error_ico
fi
NOTIFICATION_MESSAGE=$(
cat <<-EOF
$ICON <strong>Service:</strong> $SERVICEDISPLAYNAME on $HOSTDISPLAYNAME
is <strong>$SERVICESTATE.</strong> <br/>
<strong>When:</strong> $LONGDATETIME. <br/>
<strong>Info:</strong> $SERVICEOUTPUT <br/>
EOF
)
## Check whether IPv4 was specified.
if [ -n "$HOSTADDRESS" ]; then
NOTIFICATION_MESSAGE="$NOTIFICATION_MESSAGE <strong>IPv4:</strong> $HOSTADDRESS <br/>"
fi
## Check whether IPv6 was specified.
if [ -n "$HOSTADDRESS6" ]; then
NOTIFICATION_MESSAGE="$NOTIFICATION_MESSAGE <strong>IPv6:</strong> $HOSTADDRESS6 <br/>"
fi
## Check whether author and comment was specified.
if [ -n "$NOTIFICATIONCOMMENT" ]; then
NOTIFICATION_MESSAGE="$NOTIFICATION_MESSAGE <strong>Comment by $NOTIFICATIONAUTHORNAME:</strong> $NOTIFICATIONCOMMENT <br/>"
fi
## Check whether Icinga Web 2 URL was specified.
if [ -n "$ICINGAWEB2URL" ]; then
# Replace space with HTML
SERVICENAME=${SERVICENAME// /%20}
NOTIFICATION_MESSAGE="$NOTIFICATION_MESSAGE $ICINGAWEB2URL/monitoring/service/show?host=$HOSTNAME&service=$SERVICENAME <br/>"
fi
while read line; do
message="${message}\n${line}"
done <<<$NOTIFICATION_MESSAGE
BODY="${message}"
/usr/bin/printf "%b" "$NOTIFICATION_MESSAGE" | $CURLBIN -k -X PUT --header 'Content-Type: application/json' --header 'Accept: application/json' -d "{
\"msgtype\": \"m.text\",
\"body\": \"$BODY\",
\"formatted_body\": \"$BODY\",
\"format\": \"org.matrix.custom.html\"
}" "$MATRIXSERVER/_matrix/client/r0/rooms/$MATRIXROOM/send/m.room.message/$MX_TXN?access_token=$MATRIXTOKEN"

View File

@ -11,7 +11,7 @@ from uuid import uuid4
from nio import AsyncClient, AsyncClientConfig, JoinError, JoinResponse, LoginResponse, RoomCreateError, RoomGetEventResponse, RoomSendError
import nagios
import checker.nagios as nagios
parser = argparse.ArgumentParser(description='Test federation between two homeservers.')
parser.add_argument('--bot1-user', required=True, help='User ID for bot 1.')
@ -53,7 +53,7 @@ async def test_one_direction(sender_client, receiver_client, receiver_user_id):
test_room_name = str(uuid4())
new_test_room = await sender_client.room_create(name=test_room_name, invite=[receiver_user_id])
if isinstance(new_test_room, RoomCreateError):
print(new_test_room)
return f'UNKNOWN: failed to create room "{new_test_room}"', nagios.UNKNOWN
new_test_room_id = new_test_room.room_id
time.sleep(2)
@ -76,6 +76,13 @@ async def test_one_direction(sender_client, receiver_client, receiver_user_id):
msg = {'id': str(uuid4()), 'ts': send_msg_time.microsecond}
resp = (await sender_client.room_send(new_test_room_id, 'm.room.message', {'body': json.dumps(msg), 'msgtype': 'm.room.message'}))
if isinstance(resp, RoomSendError):
await sender_client.room_leave(new_test_room_id)
time.sleep(1)
await sender_client.room_forget(new_test_room_id)
time.sleep(1)
await receiver_client.room_leave(new_test_room_id)
time.sleep(1)
await receiver_client.room_forget(new_test_room_id)
return f'UNKNOWN: failed to send message "{resp}', nagios.UNKNOWN
msg_event_id = resp.event_id
@ -89,16 +96,22 @@ async def test_one_direction(sender_client, receiver_client, receiver_user_id):
break
if (datetime.now() - start_check).total_seconds() >= args.timeout:
await sender_client.room_leave(new_test_room_id)
time.sleep(1)
await sender_client.room_forget(new_test_room_id)
time.sleep(1)
await receiver_client.room_leave(new_test_room_id)
time.sleep(1)
await receiver_client.room_forget(new_test_room_id)
return "CRITICAL: timeout - receiver did not recieve the sender's message.", nagios.CRITICAL
# Double check everything makes sense
if not msg == recv_msg:
await sender_client.room_leave(new_test_room_id)
time.sleep(1)
await sender_client.room_forget(new_test_room_id)
time.sleep(1)
await receiver_client.room_leave(new_test_room_id)
time.sleep(1)
await receiver_client.room_forget(new_test_room_id)
return "CRITICAL: sender's message did not match the receiver's.", nagios.CRITICAL
@ -107,11 +120,14 @@ async def test_one_direction(sender_client, receiver_client, receiver_user_id):
# Clean up the rooms
await sender_client.room_leave(new_test_room_id)
time.sleep(1)
await sender_client.room_forget(new_test_room_id)
time.sleep(1)
await receiver_client.room_leave(new_test_room_id)
time.sleep(1)
await receiver_client.room_forget(new_test_room_id)
return bot1_msg_delta, True
return bot1_msg_delta, nagios.OK
async def login(user_id, passwd, homeserver, config_file=None):
@ -149,10 +165,10 @@ async def main() -> None:
nagios_output = nagios.OK
if not bot1_output_code:
if bot1_output_code != nagios.OK:
print(bot1_output_msg)
nagios_output = bot1_output_code
if not bot2_output_code:
if bot2_output_code != nagios.OK:
print(bot2_output_msg)
if nagios_output < bot2_output_code:
# Only set the code if our code is more severe

View File

@ -6,8 +6,8 @@ import time
import numpy as np
import requests
import nagios
from grafana import get_avg_python_gc_time, get_event_send_time, get_outgoing_http_request_rate, get_waiting_for_db
from checker import nagios
from checker.synapse_grafana import get_avg_python_gc_time, get_event_send_time, get_outgoing_http_request_rate, get_waiting_for_db
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--grafana-server', required=True, help='Grafana server.')
@ -28,10 +28,10 @@ if args.type == 'gc-time':
try:
python_gc_time_sum = np.round(np.average(get_avg_python_gc_time(args.grafana_api_key, args.interval, args.range, args.grafana_server)), 5)
if python_gc_time_sum >= python_gc_time_sum_MAX:
print(f'CRITICAL: average GC time per collection is {python_gc_time_sum} sec.')
print(f"CRITICAL: average GC time per collection is {python_gc_time_sum} sec. |'garbage-collection'={python_gc_time_sum}s;;;")
sys.exit(nagios.CRITICAL)
else:
print(f'OK: average GC time per collection is {python_gc_time_sum} sec.')
print(f"OK: average GC time per collection is {python_gc_time_sum} sec. |'garbage-collection'={python_gc_time_sum}s;;;")
sys.exit(nagios.OK)
except Exception as e:
print(f'UNKNOWN: failed to check avg. GC time "{e}"')
@ -53,10 +53,10 @@ elif args.type == 'response-time':
time.sleep(1)
response_time = np.round(np.average(response_times), 2)
if response_time > response_time_MAX:
print(f'CRITICAL: response time is {response_time} sec.')
print(f"CRITICAL: response time is {response_time} sec. |'response-time'={response_time}s;;;")
sys.exit(nagios.CRITICAL)
else:
print(f'OK: response time is {response_time} sec.')
print(f"OK: response time is {response_time} sec. |'response-time'={response_time}s;;;")
sys.exit(nagios.OK)
except Exception as e:
print(f'UNKNOWN: failed to check response time "{e}"')
@ -64,30 +64,33 @@ elif args.type == 'response-time':
elif args.type == 'outgoing-http-rate':
# outgoing req/sec
outgoing_http_request_rate_MAX = 10 if not args.crit else args.crit
# try:
outgoing_http_request_rate = get_outgoing_http_request_rate(args.grafana_api_key, args.interval, args.range, args.grafana_server)
failed = {}
for k, v in outgoing_http_request_rate.items():
if v > outgoing_http_request_rate_MAX:
failed[k] = v
if len(failed.keys()) > 0:
print(f'CRITICAL: outgoing HTTP request rate for {failed} req/sec.')
sys.exit(nagios.CRITICAL)
print(f'OK: outgoing HTTP request rate is {outgoing_http_request_rate} req/sec.')
sys.exit(nagios.OK)
# except Exception as e:
# print(f'UNKNOWN: failed to check outgoing HTTP request rate "{e}"')
# sys.exit(nagios.UNKNOWN)
try:
outgoing_http_request_rate = get_outgoing_http_request_rate(args.grafana_api_key, args.interval, args.range, args.grafana_server)
failed = {}
perf_data = '|'
for k, v in outgoing_http_request_rate.items():
perf_data = perf_data + f"'{k}'={v}s;;; "
if v > outgoing_http_request_rate_MAX:
failed[k] = v
if len(failed.keys()) > 0:
print(f'CRITICAL: outgoing HTTP request rate for {failed} req/sec.', perf_data)
sys.exit(nagios.CRITICAL)
print(f'OK: outgoing HTTP request rate is {outgoing_http_request_rate} req/sec.', perf_data)
sys.exit(nagios.OK)
except Exception as e:
print(f'UNKNOWN: failed to check outgoing HTTP request rate "{e}"')
sys.exit(nagios.UNKNOWN)
elif args.type == 'avg-send':
# Average send time in seconds
event_send_time_MAX = 1 if not args.crit else args.crit
try:
event_send_time = get_event_send_time(args.grafana_api_key, args.interval, args.range, args.grafana_server)
if event_send_time > event_send_time_MAX:
print(f'CRITICAL: average message send time is {event_send_time} sec.')
print(f"CRITICAL: average message send time is {event_send_time} sec. |'avg-send-time'={event_send_time}s;;;")
sys.exit(nagios.CRITICAL)
else:
print(f'OK: average message send time is {event_send_time} sec.')
print(f"OK: average message send time is {event_send_time} sec. |'avg-send-time'={event_send_time}s;;;")
sys.exit(nagios.OK)
except Exception as e:
print(f'UNKNOWN: failed to check average message send time "{e}"')
@ -98,10 +101,10 @@ elif args.type == 'db-lag':
try:
db_lag = get_waiting_for_db(args.grafana_api_key, args.interval, args.range, args.grafana_server)
if db_lag > db_lag_MAX:
print(f'CRITICAL: DB lag is {db_lag} sec.')
print(f"CRITICAL: DB lag is {db_lag} sec. |'db-lag'={db_lag}s;;;")
sys.exit(nagios.CRITICAL)
else:
print(f'OK: DB lag is {db_lag} sec.')
print(f"OK: DB lag is {db_lag} sec. |'db-lag'={db_lag}s;;;")
sys.exit(nagios.OK)
except Exception as e:
print(f'UNKNOWN: failed to check DB lag "{e}"')

View File

@ -7,15 +7,14 @@ import sys
import tempfile
import urllib
import aiofiles.os
import magic
import numpy as np
import requests
from PIL import Image
from nio import AsyncClient, AsyncClientConfig, LoginResponse, UploadResponse
from nio import AsyncClient, AsyncClientConfig, LoginResponse, RoomSendError
from urllib3.exceptions import InsecureRequestWarning
import nagios
from checker import nagios
from checker.synapse_client import send_image, write_login_details_to_disk
parser = argparse.ArgumentParser(description='')
parser.add_argument('--user', required=True, help='User ID for the bot.')
@ -30,8 +29,6 @@ parser.add_argument('--warn', type=float, default=2.0, help='Manually set warn l
parser.add_argument('--crit', type=float, default=2.5, help='Manually set critical level.')
args = parser.parse_args()
CONFIG_FILE = args.auth_file
def verify_media_header(header: str, header_dict: dict, good_value: str = None, warn_value: str = None, critical_value: str = None):
"""
@ -53,98 +50,42 @@ def verify_media_header(header: str, header_dict: dict, good_value: str = None,
return f'OK: {header} is present with value "{header_value}"', nagios.OK
def write_details_to_disk(resp: LoginResponse, homeserver) -> None:
"""Writes the required login details to disk so we can log in later without
using a password.
Arguments:
resp {LoginResponse} -- the successful client login response.
homeserver -- URL of homeserver, e.g. "https://matrix.example.org"
"""
# open the config file in write-mode
with open(CONFIG_FILE, "w") as f:
# write the login details to disk
json.dump({"homeserver": homeserver, # e.g. "https://matrix.example.org"
"user_id": resp.user_id, # e.g. "@user:example.org"
"device_id": resp.device_id, # device ID, 10 uppercase letters
"access_token": resp.access_token, # cryptogr. access token
}, f, )
async def send_image(client, room_id, image):
"""Send image to room.
Arguments:
---------
client : Client
room_id : str
image : str, file name of image
This is a working example for a JPG image.
"content": {
"body": "someimage.jpg",
"info": {
"size": 5420,
"mimetype": "image/jpeg",
"thumbnail_info": {
"w": 100,
"h": 100,
"mimetype": "image/jpeg",
"size": 2106
},
"w": 100,
"h": 100,
"thumbnail_url": "mxc://example.com/SomeStrangeThumbnailUriKey"
},
"msgtype": "m.image",
"url": "mxc://example.com/SomeStrangeUriKey"
}
"""
mime_type = magic.from_file(image, mime=True) # e.g. "image/jpeg"
if not mime_type.startswith("image/"):
print(f'UNKNOWN: wrong mime type "{mime_type}"')
sys.exit(nagios.UNKNOWN)
im = Image.open(image)
(width, height) = im.size # im.size returns (width,height) tuple
# first do an upload of image, then send URI of upload to room
file_stat = await aiofiles.os.stat(image)
async with aiofiles.open(image, "r+b") as f:
resp, maybe_keys = await client.upload(f, content_type=mime_type, # image/jpeg
filename=os.path.basename(image), filesize=file_stat.st_size, )
if not isinstance(resp, UploadResponse):
print(f'UNKNOWN: failed to upload image "{resp}"')
sys.exit(nagios.UNKNOWN)
content = {"body": os.path.basename(image), # descriptive title
"info": {"size": file_stat.st_size, "mimetype": mime_type, "thumbnail_info": None, # TODO
"w": width, # width in pixel
"h": height, # height in pixel
"thumbnail_url": None, # TODO
}, "msgtype": "m.image", "url": resp.content_uri, }
try:
return await client.room_send(room_id, message_type="m.room.message", content=content)
except Exception as e:
print(f"Image send of file {image} failed.")
print(f'UNKNOWN: failed to send image event "{e}"')
sys.exit(nagios.UNKNOWN)
async def main() -> None:
async def cleanup(client, test_image_path, image_event_id=None):
global exit_code
# Clean up
if image_event_id:
await client.room_redact(args.room, image_event_id)
os.remove(test_image_path)
await client.close()
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
try:
r = requests.delete(f'{args.admin_endpoint}/_synapse/admin/v1/users/{args.user}/media', headers={'Authorization': f'Bearer {client.access_token}'}, verify=False)
if r.status_code != 200:
if nagios.WARNING < exit_code:
exit_code = nagios.WARNING
print(f"WARN: failed to purge media for this user, request failed with '{r.text}'")
except Exception as e:
if nagios.WARNING < exit_code:
exit_code = nagios.WARNING
print(f"WARN: failed to purge media for this user '{e}'")
client = AsyncClient(args.hs, args.user, config=AsyncClientConfig(request_timeout=args.timeout, max_timeout_retry_wait_time=10))
if args.auth_file:
# If there are no previously-saved credentials, we'll use the password
if not os.path.exists(CONFIG_FILE):
if not os.path.exists(args.auth_file):
resp = await client.login(args.pw)
# check that we logged in successfully
if isinstance(resp, LoginResponse):
write_details_to_disk(resp, args.hs)
write_login_details_to_disk(resp, args.hs, args.auth_file)
else:
print(f'UNKNOWN: failed to log in "{resp}"')
sys.exit(nagios.UNKNOWN)
else:
# Otherwise the config file exists, so we'll use the stored credentials
with open(CONFIG_FILE, "r") as f:
with open(args.auth_file, "r") as f:
config = json.load(f)
client = AsyncClient(config["homeserver"])
client.access_token = config["access_token"]
@ -163,7 +104,12 @@ async def main() -> None:
im.save(test_image_path)
# Send the image and get the event ID
image_event_id = (await send_image(client, args.room, test_image_path)).event_id
image_event_id = (await send_image(client, args.room, test_image_path))
if isinstance(image_event_id, RoomSendError):
await cleanup(client, test_image_path)
print(f'UNKNOWN: failed to send message "{image_event_id}"')
sys.exit(nagios.UNKNOWN)
image_event_id = image_event_id.event_id
# Get the event
image_event = (await client.room_get_event(args.room, image_event_id)).event
@ -186,29 +132,21 @@ async def main() -> None:
print(f'OK: media CDN domain is "{domain}"')
results = [verify_media_header('synapse-media-local-status', headers), verify_media_header('synapse-media-s3-status', headers, good_value='200'), verify_media_header('synapse-media-server', headers, good_value='s3'),
verify_media_header('Server', headers, good_value='cloudflare')]
verify_media_header('Server', headers, good_value='cloudflare')]
for header_chk, code in results:
if code != nagios.OK:
exit_code = code
print(header_chk)
# Clean up
await client.room_redact(args.room, image_event_id)
os.remove(test_image_path)
await client.close()
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
try:
r = requests.delete(f'{args.admin_endpoint}/_synapse/admin/v1/users/{args.user}/media', headers={'Authorization': f'Bearer {client.access_token}'}, verify=False)
if r.status_code != 200:
if nagios.WARNING < exit_code:
exit_code = nagios.WARNING
print(f"WARN: failed to purge media for this user, request failed with '{r.text}'")
except Exception as e:
if nagios.WARNING < exit_code:
exit_code = nagios.WARNING
print(f"WARN: failed to purge media for this user '{e}'")
# Make sure we aren't redirected if we're a Synapse server
test = requests.head(target_file_url, headers={'User-Agent': 'Synapse/1.77.3'}, allow_redirects=False)
if test.status_code != 200:
print('CRITICAL: Synapse user-agent redirected with status code', test.status_code)
exit_code = nagios.CRITICAL
else:
print(f'OK: Synapse user-agent not redirected.')
await cleanup(client, test_image_path, image_event_id=image_event_id)
sys.exit(exit_code)
@ -217,4 +155,7 @@ if __name__ == "__main__":
asyncio.run(main())
except Exception as e:
print(f'UNKNOWN: exception "{e}"')
import traceback
print(traceback.format_exc())
sys.exit(nagios.UNKNOWN)

View File

@ -6,7 +6,7 @@ import sys
import numpy as np
import requests
import nagios
from checker import nagios
parser = argparse.ArgumentParser(description='')
parser.add_argument('--metrics-endpoint', required=True, help='Target URL to scrape.')
@ -19,7 +19,7 @@ parser.add_argument('--crit', type=float, default=30, help='Manually set critica
args = parser.parse_args()
if args.prometheus:
from prometheus import parse_metrics
from checker.prometheus import parse_metrics
r = requests.get(args.metrics_endpoint)
if r.status_code != 200:
@ -81,7 +81,6 @@ else:
data = {}
for item in tooltips:
m = re.match(r'<span class="tooltip">\s*Send: (.*?)\s*<br\/>\s*Receive: (.*?)\s*<\/span>', str(item))
print(item)
if m:
domain = item.parent.parent.find('span', {'class': 'domain'}).text
data[domain] = {

1
checker/__init__.py Normal file
View File

@ -0,0 +1 @@

View File

@ -1,4 +1,4 @@
UNKNOWN = -1
OK = 0
WARNING = 1
CRITICAL = 2
CRITICAL = 2

72
checker/notify.py Normal file
View File

@ -0,0 +1,72 @@
warn_ico = ""
error_ico = ""
ok_ico = ""
question_ico = ""
host_ico = '🖥️'
service_ico = '⚙️'
def choose_icon(state):
if state == 'UP':
return ok_ico
elif state == 'DOWN':
return error_ico
elif state == 'UNKNOWN':
return question_ico
elif state == 'OK':
return ok_ico
elif state == 'WARNING':
return warn_ico
elif state == 'CRITICAL':
return error_ico
else:
raise Exception('No state to icon matched.')
def choose_color(state):
if state == 'UP':
return '#44bb77'
elif state == 'DOWN':
return '#ff5566'
elif state == 'UNKNOWN':
return '#aa44ff'
elif state == 'OK':
return '#44bb77'
elif state == 'WARNING':
return '#ffaa44'
elif state == 'CRITICAL':
return '#ff5566'
else:
raise Exception('No state to color matched.')
def newline_to_formatted_html(string):
if '\n' in string:
string = f'<br><pre>{string}</pre>'
return string
def build_msg(host_name, host_display_name, state, date_str, output, service_name=None, service_display_name='', address='', comment='', author='', icinga2_url=''):
if service_name:
item = f'**{service_display_name}** on **{host_display_name}**'
icon = service_ico
else:
item = f'**{host_display_name}**'
icon = host_ico
icon = f'{choose_icon(state)}&nbsp;&nbsp;{icon}'
if address:
address = f'<br>**IP:** {address}'
if comment and author:
comment = f'<br>**Comment by {author}:** {newline_to_formatted_html(comment)}'
if icinga2_url:
icinga2_url = icinga2_url.strip("/")
if service_name:
icinga2_url = f'<br>[Quick Link]({icinga2_url}/icingadb/service?name={service_name.replace(" ", "%20")}&host.name={host_name.replace(" ", "%20")})'
elif host_name:
icinga2_url = f'<br>[Quick Link]({icinga2_url}/icingadb/host?name={host_name.replace(" ", "+")})'
msg = f"""{icon}&nbsp;&nbsp;&nbsp;{item} is <font color="{choose_color(state)}">{state}</font> <br>
**When:** {date_str}. <br>
**Info:** {newline_to_formatted_html(output)}{address}{comment}{icinga2_url}"""
return msg

145
checker/synapse_client.py Normal file
View File

@ -0,0 +1,145 @@
import asyncio
import json
import os
import sys
import aiofiles.os
import magic
import markdown
from PIL import Image
from nio import AsyncClient, LoginResponse, RoomSendError, UploadResponse
from . import nagios
def handle_err(func):
def wrapper(*args, **kwargs):
try:
crit, ret = func(*args, **kwargs)
except Exception as e:
print(f"UNKNOWN: exception '{e}'")
sys.exit(nagios.UNKNOWN)
if crit:
print(f"CRITICAL: {crit}")
sys.exit(nagios.CRITICAL)
else:
return ret
return wrapper
def write_login_details_to_disk(resp: LoginResponse, homeserver, config_file) -> None:
"""Writes the required login details to disk so we can log in later without
using a password.
Arguments:
resp {LoginResponse} -- the successful client login response.
homeserver -- URL of homeserver, e.g. "https://matrix.example.org"
"""
# open the config file in write-mode
with open(config_file, "w") as f:
# write the login details to disk
json.dump({"homeserver": homeserver, # e.g. "https://matrix.example.org"
"user_id": resp.user_id, # e.g. "@user:example.org"
"device_id": resp.device_id, # device ID, 10 uppercase letters
"access_token": resp.access_token, # cryptogr. access token
}, f, )
async def send_image(client, room_id, image):
"""Send image to room.
Arguments:
---------
client : Client
room_id : str
image : str, file name of image
This is a working example for a JPG image.
"content": {
"body": "someimage.jpg",
"info": {
"size": 5420,
"mimetype": "image/jpeg",
"thumbnail_info": {
"w": 100,
"h": 100,
"mimetype": "image/jpeg",
"size": 2106
},
"w": 100,
"h": 100,
"thumbnail_url": "mxc://example.com/SomeStrangeThumbnailUriKey"
},
"msgtype": "m.image",
"url": "mxc://example.com/SomeStrangeUriKey"
}
"""
mime_type = magic.from_file(image, mime=True) # e.g. "image/jpeg"
if not mime_type.startswith("image/"):
print(f'UNKNOWN: wrong mime type "{mime_type}"')
sys.exit(nagios.UNKNOWN)
im = Image.open(image)
(width, height) = im.size # im.size returns (width,height) tuple
# first do an upload of image, then send URI of upload to room
file_stat = await aiofiles.os.stat(image)
async with aiofiles.open(image, "r+b") as f:
resp, maybe_keys = await client.upload(f, content_type=mime_type, # image/jpeg
filename=os.path.basename(image), filesize=file_stat.st_size, )
if not isinstance(resp, UploadResponse):
print(f'UNKNOWN: failed to upload image "{resp}"')
sys.exit(nagios.UNKNOWN)
content = {"body": os.path.basename(image), # descriptive title
"info": {"size": file_stat.st_size, "mimetype": mime_type, "thumbnail_info": None, # TODO
"w": width, # width in pixel
"h": height, # height in pixel
"thumbnail_url": None, # TODO
}, "msgtype": "m.image", "url": resp.content_uri, }
try:
return await client.room_send(room_id, message_type="m.room.message", content=content)
except Exception as e:
print(f"Image send of file {image} failed.")
print(f'UNKNOWN: failed to send image event "{e}"')
sys.exit(nagios.UNKNOWN)
def send_msg(client, room, msg):
async def inner(client, room, msg):
r = await client.room_send(room_id=room, message_type="m.room.message", content={"msgtype": "m.text", "body": msg, "format": "org.matrix.custom.html", "formatted_body": markdown.markdown(msg), }, )
if isinstance(r, RoomSendError):
print(r)
await client.close()
return asyncio.run(inner(client, room, msg))
def login(user, pw, hs, auth_file, room):
async def inner(user, pw, hs, auth_file, room):
client = AsyncClient(hs, user)
if auth_file:
# If there are no previously-saved credentials, we'll use the password
if not os.path.exists(auth_file):
resp = await client.login(pw)
# check that we logged in successfully
if isinstance(resp, LoginResponse):
write_login_details_to_disk(resp, hs, auth_file)
else:
print(f'Failed to log in "{resp}"')
else:
# Otherwise the config file exists, so we'll use the stored credentials
with open(auth_file, "r") as f:
config = json.load(f)
client = AsyncClient(config["homeserver"])
client.access_token = config["access_token"]
client.user_id = config["user_id"]
client.device_id = config["device_id"]
else:
await client.login(pw)
await client.join(room)
x = client.access_token
await client.close()
return x, client
return asyncio.run(inner(user, pw, hs, auth_file, room))

View File

@ -0,0 +1,40 @@
import argparse
import checker.synapse_client as synapse_client
from checker.notify import build_msg
parser = argparse.ArgumentParser(description='')
parser.add_argument('--user', required=True, help='User ID for the bot.')
parser.add_argument('--pw', required=True, help='Password for the bot.')
parser.add_argument('--hs', required=True, help='Homeserver of the bot.')
parser.add_argument('--room', required=True, help='The room the bot should send its messages in.')
parser.add_argument('--auth-file', help="File to cache the bot's login details to.")
parser.add_argument('--longdatetime', required=True, help='$icinga.long_date_time$')
parser.add_argument('--hostname', required=True, help='$host.name$')
parser.add_argument('--hostdisplayname', required=True, help='$host.display_name$')
parser.add_argument('--hoststate', required=True, help='$host.state$')
parser.add_argument('--hostoutput', required=True, help='$host.output$')
parser.add_argument('--notificationtype', required=True, help='$notification.type$')
parser.add_argument('--hostaddress', required=False, help='$address$')
parser.add_argument('--notificationauthor', required=False, help='$notification.author$')
parser.add_argument('--notificationcomment', required=False, help='$notification.comment$')
parser.add_argument('--icinga2weburl', required=False, help='$notification.icingaweb2url$')
args = parser.parse_args()
if __name__ == '__main__':
msg = build_msg(
host_name=args.hostname,
host_display_name=args.hostdisplayname,
state=args.hoststate,
date_str=args.longdatetime,
output=args.hostoutput,
address=args.hostaddress,
comment=args.notificationcomment,
author=args.notificationauthor,
icinga2_url=args.icinga2weburl
)
access_token, client = synapse_client.login(args.user, args.pw, args.hs, args.auth_file, args.room)
synapse_client.send_msg(client, args.room, msg)

View File

@ -0,0 +1,33 @@
import argparse
import checker.synapse_client as synapse_client
from checker.notify import build_msg
parser = argparse.ArgumentParser(description='')
parser.add_argument('--user', required=True, help='User ID for the bot.')
parser.add_argument('--pw', required=True, help='Password for the bot.')
parser.add_argument('--hs', required=True, help='Homeserver of the bot.')
parser.add_argument('--room', required=True, help='The room the bot should send its messages in.')
parser.add_argument('--auth-file', help="File to cache the bot's login details to.")
parser.add_argument('--longdatetime', required=True, help='$icinga.long_date_time$')
parser.add_argument('--servicename', required=True, help='$service.name$')
parser.add_argument('--servicedisplayname', required=True, help='$service.name$')
parser.add_argument('--hostname', required=True, help='$host.name$')
parser.add_argument('--hostdisplayname', required=True, help='$host.display_name$')
parser.add_argument('--serviceoutput', required=True, help='$service.output$')
parser.add_argument('--servicestate', required=True, help='$service.state$')
parser.add_argument('--notificationtype', required=True, help='$notification.type$')
parser.add_argument('--hostaddress', required=False, help='$address$')
parser.add_argument('--notificationauthor', required=False, help='$notification.author$')
parser.add_argument('--notificationcomment', required=False, help='$notification.comment$')
parser.add_argument('--icinga2weburl', required=False, help='$notification.icingaweb2url$')
args = parser.parse_args()
if __name__ == '__main__':
msg = build_msg(args.hostname, args.hostdisplayname, args.servicestate, args.longdatetime, args.serviceoutput, args.servicename, args.servicedisplayname, args.hostaddress, args.notificationcomment, args.notificationauthor, args.icinga2weburl)
print(msg)
access_token, client = synapse_client.login(args.user, args.pw, args.hs, args.auth_file, args.room)
synapse_client.send_msg(client, args.room, msg)

13
requirements.txt Normal file
View File

@ -0,0 +1,13 @@
prometheus_client
requests~=2.28.2
numpy~=1.24.2
matrix-nio
Pillow~=9.4.0
python-magic~=0.4.27
numpy
beautifulsoup4~=4.11.2
flask~=2.2.3
icinga2api~=0.6.1
urllib3~=1.26.14
aiofiles~=0.6.0
markdown