updates
This commit is contained in:
parent
23fb350116
commit
d1665ef9d1
|
@ -0,0 +1,240 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# startup checks
|
||||||
|
|
||||||
|
if [ -z "$BASH" ]; then
|
||||||
|
echo "Please use BASH."
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
if [ ! -e "/usr/bin/which" ]; then
|
||||||
|
echo "/usr/bin/which is missing."
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
curl=$(which curl)
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Please install curl."
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
# Default Values
|
||||||
|
proxy=""
|
||||||
|
method="GET"
|
||||||
|
body=""
|
||||||
|
contains=""
|
||||||
|
lacks=""
|
||||||
|
insecure=0
|
||||||
|
debug=0
|
||||||
|
warning=700
|
||||||
|
encodeurl=0
|
||||||
|
critical=2000
|
||||||
|
url=""
|
||||||
|
follow=0
|
||||||
|
header=""
|
||||||
|
name="default"
|
||||||
|
cookies=0
|
||||||
|
|
||||||
|
# Usage Info
|
||||||
|
usage() {
|
||||||
|
echo '''Usage: check_curl [OPTIONS]
|
||||||
|
[OPTIONS]:
|
||||||
|
-U URL Target URL
|
||||||
|
-M METHOD HTTP Method (default: GET)
|
||||||
|
-N NAME Display Name of scanned object (default: default)
|
||||||
|
-B BODY Request Body to be sent (default: not sent)
|
||||||
|
-E ENCODEURL Send body defined with url encoding (curl --data-urlencode) (default: off)
|
||||||
|
-I INSECURE Sets the curl flag --insecure
|
||||||
|
-C CONTAINS If not contained in response body, CRITICAL will be returned
|
||||||
|
-L LACKS If contained in response body, CRITICAL will be returned (-C has priority when both are set)
|
||||||
|
-w WARNING Warning threshold in milliseconds (default: 700)
|
||||||
|
-c CRITICAL Critical threshold in milliseconds (default: 2000)
|
||||||
|
-H HEADER Send Header (i.E. "AUTHORIZATION: Bearer 8*.UdUYwrl!nK")
|
||||||
|
-F FOLLOW Follow redirects (default: OFF)
|
||||||
|
-D DEBUG Only prints the curl command (default: OFF)
|
||||||
|
-P PROXY Set Proxy Address (default: No Proxy)
|
||||||
|
-K COOKIES Enables/Disabled cookie handling in a temporary cookie jar'''
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Check which threshold was reached
|
||||||
|
checkTime() {
|
||||||
|
if [ $1 -gt $critical ]; then
|
||||||
|
echo -n "CRITICAL: Slow "
|
||||||
|
elif [ $1 -gt $warning ]; then
|
||||||
|
echo -n "WARNING: Slow "
|
||||||
|
else
|
||||||
|
echo -n "OK"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Return code value
|
||||||
|
getStatus() {
|
||||||
|
if [ $1 -gt $critical ]; then
|
||||||
|
return 2
|
||||||
|
elif [ $1 -gt $warning ]; then
|
||||||
|
return 1
|
||||||
|
else
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
#main
|
||||||
|
#get options
|
||||||
|
while getopts "P:M:B:C:w:c:U:H:IFN:O:EL:D:K" opt; do
|
||||||
|
case $opt in
|
||||||
|
K)
|
||||||
|
cookies=1
|
||||||
|
;;
|
||||||
|
P)
|
||||||
|
proxy=$OPTARG
|
||||||
|
;;
|
||||||
|
M)
|
||||||
|
method=$OPTARG
|
||||||
|
;;
|
||||||
|
B)
|
||||||
|
body=$OPTARG
|
||||||
|
;;
|
||||||
|
C)
|
||||||
|
contains=$OPTARG
|
||||||
|
;;
|
||||||
|
w)
|
||||||
|
warning=$OPTARG
|
||||||
|
;;
|
||||||
|
c)
|
||||||
|
critical=$OPTARG
|
||||||
|
;;
|
||||||
|
U)
|
||||||
|
url=$OPTARG
|
||||||
|
;;
|
||||||
|
L)
|
||||||
|
lacks=$OPTARG
|
||||||
|
;;
|
||||||
|
I)
|
||||||
|
insecure=1
|
||||||
|
;;
|
||||||
|
N)
|
||||||
|
name=$( echo $OPTARG | sed -e 's/[^A-Za-z0-9._-]/_/g' )
|
||||||
|
;;
|
||||||
|
E)
|
||||||
|
encodeurl=1
|
||||||
|
;;
|
||||||
|
H)
|
||||||
|
header=$OPTARG
|
||||||
|
;;
|
||||||
|
F)
|
||||||
|
follow=1
|
||||||
|
;;
|
||||||
|
D)
|
||||||
|
debug=1
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
usage
|
||||||
|
exit 3
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
#hostname is required
|
||||||
|
if [ -z "$url" ] || [ $# -eq 0 ]; then
|
||||||
|
echo "Error: URL is required"
|
||||||
|
usage
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
|
||||||
|
proxyarg=""
|
||||||
|
if [ ! -z $proxy ] ; then
|
||||||
|
proxyarg=" -x "$proxy" "
|
||||||
|
fi
|
||||||
|
headerarg=""
|
||||||
|
if [ ! -z "$header" ] ; then
|
||||||
|
headerarg=' -H "'$header'" '
|
||||||
|
fi
|
||||||
|
followarg=""
|
||||||
|
if [ $follow -eq 1 ] ; then
|
||||||
|
followarg=" -L "
|
||||||
|
fi
|
||||||
|
insecurearg=""
|
||||||
|
if [ $insecure -eq 1 ] ; then
|
||||||
|
insecurearg=" --insecure "
|
||||||
|
fi
|
||||||
|
cookiesarg=""
|
||||||
|
if [ $cookies -eq 1 ] ; then
|
||||||
|
COOKIE_JAR_TEMP_PATH=$(mktemp /tmp/check_curl_cookiejar.XXXXXX)
|
||||||
|
cookiesarg=" -c ${COOKIE_JAR_TEMP_PATH} -b ${COOKIE_JAR_TEMP_PATH}"
|
||||||
|
fi
|
||||||
|
bodyarg=""
|
||||||
|
if [ ! -z $body ]; then
|
||||||
|
body=$(echo $body| sed "s/\"/\\\\\"/g")
|
||||||
|
bodyarg=" --data \""$body"\""
|
||||||
|
if [ $encodeurl -eq 1 ]; then
|
||||||
|
bodyarg=" --data-urlencode \""$body"\""
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $debug -eq 1 ]; then
|
||||||
|
echo $curl --no-keepalive -s $insecurearg $proxyarg $followarg $bodyarg $headerarg -X $method $cookiesarg "$url"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
start=$(echo $(($(date +%s%N)/1000000)))
|
||||||
|
body=$(eval $curl --no-keepalive -s $insecurearg $proxyarg $followarg $bodyarg $headerarg -X $method $cookiesarg "$url")
|
||||||
|
status=$?
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $cookies -eq 1 ] ; then
|
||||||
|
rm -f ${COOKIE_JAR_TEMP_PATH}
|
||||||
|
fi
|
||||||
|
|
||||||
|
end=$(echo $(($(date +%s%N)/1000000)))
|
||||||
|
#decide output by return code
|
||||||
|
if [ $status -eq 0 ] ; then
|
||||||
|
if [ -n "$contains" ]; then
|
||||||
|
if [[ ! $body =~ $contains ]]; then
|
||||||
|
echo "CRITICAL: body does not contain '${contains}'. Body: '$(echo $body | sed 's/\(.\{50\}\).*/\1.../')' |time=$((end - start))ms;${warning};${critical};0;"$critical"ms"
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
if [ -n "$lacks" ]; then
|
||||||
|
if [[ $body == *$lacks* ]]; then
|
||||||
|
echo "CRITICAL: body contains '${lacks}'|time=$((end - start))ms;${warning};${critical};0;"$critical"ms"
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
echo "$(checkTime $((end - start))) $((end - start))ms - ${url}|time=$((end - start))ms;${warning};${critical};0;"$critical"ms"
|
||||||
|
getStatus $((end - start))
|
||||||
|
exit $?
|
||||||
|
else
|
||||||
|
case $status in
|
||||||
|
1)
|
||||||
|
echo "CRITICAL: Unsupported protocol"
|
||||||
|
;;
|
||||||
|
3)
|
||||||
|
echo "CRITICAL: Malformed URL"
|
||||||
|
;;
|
||||||
|
5)
|
||||||
|
echo "CRITICAL: Couldn't resolve proxy $proxy"
|
||||||
|
;;
|
||||||
|
6)
|
||||||
|
echo "CRITICAL: Couldn't resolve host"
|
||||||
|
;;
|
||||||
|
7)
|
||||||
|
echo "CRITICAL: Couldn't connect to proxy $proxy"
|
||||||
|
;;
|
||||||
|
22)
|
||||||
|
echo "CRITICAL: Server returned http code >= 400"
|
||||||
|
;;
|
||||||
|
52)
|
||||||
|
echo "CRITICAL: Server returned empty response (52)"
|
||||||
|
;;
|
||||||
|
56)
|
||||||
|
echo "CRITICAL: Failure recieving network data (56)"
|
||||||
|
;;
|
||||||
|
60)
|
||||||
|
echo "CRITICAL: SSL/TLS connection problem (60)"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "UNKNOWN: $status - ${url}"
|
||||||
|
exit 3
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
|
@ -244,7 +244,7 @@ async def main() -> None:
|
||||||
|
|
||||||
for x in prints:
|
for x in prints:
|
||||||
print(f'\n{x}', end=' ')
|
print(f'\n{x}', end=' ')
|
||||||
print(f"|'{bot1_hs_domain}_outbound'={bot1_output_msg}s;;; '{bot1_hs_domain}_inbound'={bot2_output_msg}s;;;")
|
print(f"|'{bot1_hs_domain}_outbound'={bot1_output_msg}s;;; '{bot1_hs_domain}_inbound'={bot1_output_msg}s;;;")
|
||||||
|
|
||||||
sys.exit(nagios_output)
|
sys.exit(nagios_output)
|
||||||
|
|
||||||
|
|
|
@ -179,7 +179,7 @@ async def main() -> None:
|
||||||
exit_code = nagios.CRITICAL
|
exit_code = nagios.CRITICAL
|
||||||
prints.append(f"CRITICAL: recieved 301 to {urllib.parse.urlparse(headers['location']).netloc}")
|
prints.append(f"CRITICAL: recieved 301 to {urllib.parse.urlparse(headers['location']).netloc}")
|
||||||
else:
|
else:
|
||||||
prints.append(f'OK: is not redirected.')
|
prints.append(f'OK: was not redirected.')
|
||||||
|
|
||||||
if args.required_headers:
|
if args.required_headers:
|
||||||
# Icinga may pass the values as one string
|
# Icinga may pass the values as one string
|
||||||
|
@ -192,11 +192,11 @@ async def main() -> None:
|
||||||
if code > exit_code:
|
if code > exit_code:
|
||||||
exit_code = code
|
exit_code = code
|
||||||
|
|
||||||
results = [verify_media_header('synapse-media-local-status', headers), verify_media_header('synapse-media-s3-status', headers, good_value='200'), verify_media_header('synapse-media-server', headers, good_value='s3')]
|
# results = [verify_media_header('synapse-media-local-status', headers), verify_media_header('synapse-media-s3-status', headers, good_value='200'), verify_media_header('synapse-media-server', headers, good_value='s3')]
|
||||||
for header_chk, code in results:
|
# for header_chk, code in results:
|
||||||
prints.append(header_chk)
|
# prints.append(header_chk)
|
||||||
if code > exit_code:
|
# if code > exit_code:
|
||||||
exit_code = code
|
# exit_code = code
|
||||||
|
|
||||||
clean_msg = await cleanup(client, test_image_path, image_event_id=image_event_id)
|
clean_msg = await cleanup(client, test_image_path, image_event_id=image_event_id)
|
||||||
|
|
||||||
|
|
|
@ -54,9 +54,11 @@ def main():
|
||||||
m = re.match(r'<span class="tooltip">\s*Send: (.*?)\s*<br\/>\s*Receive: (.*?)\s*<\/span>', str(item))
|
m = re.match(r'<span class="tooltip">\s*Send: (.*?)\s*<br\/>\s*Receive: (.*?)\s*<\/span>', str(item))
|
||||||
if m:
|
if m:
|
||||||
domain = item.parent.parent.find('span', {'class': 'domain'}).text
|
domain = item.parent.parent.find('span', {'class': 'domain'}).text
|
||||||
|
s = ms_to_s(m.group(1))
|
||||||
|
r = ms_to_s(m.group(2))
|
||||||
data[domain] = {
|
data[domain] = {
|
||||||
'send': ms_to_s(m.group(1)),
|
'send': (s if s else -1),
|
||||||
'receive': ms_to_s(m.group(2)),
|
'receive': (r if r else -1),
|
||||||
}
|
}
|
||||||
exit_code = nagios.OK
|
exit_code = nagios.OK
|
||||||
info_str = []
|
info_str = []
|
||||||
|
|
|
@ -0,0 +1,113 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# check_nginx is a Nagios to monitor nginx status
|
||||||
|
# The version is 1.0.2
|
||||||
|
# fixed by Nikolay Kandalintsev (twitter: @nicloay)
|
||||||
|
# Based on yangzi2008@126.com from http://www.nginxs.com
|
||||||
|
# which available here http://exchange.nagios.org/directory/Plugins/Web-Servers/nginx/check_nginx/details
|
||||||
|
|
||||||
|
import getopt
|
||||||
|
import string
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
import urllib
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
|
|
||||||
|
def usage():
|
||||||
|
print("""check_nginx is a Nagios to monitor nginx status
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
check_nginx [-h|--help][-U|--url][-P|--path][-u|--user][-p|--passwd][-w|--warning][-c|--critical]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--help|-h)
|
||||||
|
print check_nginx help.
|
||||||
|
--url|-U)
|
||||||
|
Sets nginx status url.
|
||||||
|
--path|-P)
|
||||||
|
Sets nginx status url path. Default is: off
|
||||||
|
--user|-u)
|
||||||
|
Sets nginx status BasicAuth user. Default is: off
|
||||||
|
--passwd|-p)
|
||||||
|
Sets nginx status BasicAuth passwd. Default is: off
|
||||||
|
--warning|-w)
|
||||||
|
Sets a warning level for nginx Active connections. Default is: off
|
||||||
|
--critical|-c)
|
||||||
|
Sets a critical level for nginx Active connections. Default is: off
|
||||||
|
Example:
|
||||||
|
The url is www.nginxs.com/status
|
||||||
|
./check_nginx -U www.nginxs.com -P /status -u eric -p nginx -w 1000 -c 2000
|
||||||
|
if dont't have password:
|
||||||
|
./check_nginx -U www.nginxs.com -P /status -w 1000 -c 2000
|
||||||
|
if don't have path and password:
|
||||||
|
./check_nginx -U www.nginxs.com -w 1000 -c 2000""")
|
||||||
|
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
options, args = getopt.getopt(sys.argv[1:], "hU:P:u:p:w:c:", ["help", "url=", "path=", "user=", "passwd=", "warning=", "critical="])
|
||||||
|
|
||||||
|
except getopt.GetoptError:
|
||||||
|
usage()
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
for name, value in options:
|
||||||
|
if name in ("-h", "--help"):
|
||||||
|
usage()
|
||||||
|
if name in ("-U", "--url"):
|
||||||
|
url = "http://" + value
|
||||||
|
if name in ("-P", "--path"):
|
||||||
|
path = value
|
||||||
|
if name in ("-u", "--user"):
|
||||||
|
user = value
|
||||||
|
if name in ("-p", "--passwd"):
|
||||||
|
passwd = value
|
||||||
|
if name in ("-w", "--warning"):
|
||||||
|
warning = value
|
||||||
|
if name in ("-c", "--critical"):
|
||||||
|
critical = value
|
||||||
|
try:
|
||||||
|
if 'path' in dir():
|
||||||
|
req = urllib.Request(url + path)
|
||||||
|
else:
|
||||||
|
req = urllib.Request(url)
|
||||||
|
if 'user' in dir() and 'passwd' in dir():
|
||||||
|
passman = urllib.HTTPPasswordMgrWithDefaultRealm()
|
||||||
|
passman.add_password(None, url + path, user, passwd)
|
||||||
|
authhandler = urllib.HTTPBasicAuthHandler(passman)
|
||||||
|
opener = urllib.build_opener(authhandler)
|
||||||
|
urllib.install_opener(opener)
|
||||||
|
response = urlopen(req)
|
||||||
|
the_page = response.readline()
|
||||||
|
conn = the_page.split()
|
||||||
|
ActiveConn = conn[2]
|
||||||
|
the_page1 = response.readline()
|
||||||
|
the_page2 = response.readline()
|
||||||
|
the_page3 = response.readline()
|
||||||
|
response.close()
|
||||||
|
b = the_page3.split()
|
||||||
|
reading = b[1]
|
||||||
|
writing = b[3]
|
||||||
|
waiting = b[5]
|
||||||
|
output = 'ActiveConn:%s,reading:%s,writing:%s,waiting:%s' % (ActiveConn, reading, writing, waiting)
|
||||||
|
perfdata = 'ActiveConn=%s;reading=%s;writing=%s;waiting=%s' % (ActiveConn, reading, writing, waiting)
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
print("NGINX STATUS unknown: Error while getting Connection")
|
||||||
|
print(traceback.format_exc())
|
||||||
|
sys.exit(3)
|
||||||
|
if 'warning' in dir() and 'critical' in dir():
|
||||||
|
if int(ActiveConn) >= int(critical):
|
||||||
|
print('CRITICAL - %s|%s' % (output, perfdata))
|
||||||
|
sys.exit(2)
|
||||||
|
elif int(ActiveConn) >= int(warning):
|
||||||
|
print('WARNING - %s|%s' % (output, perfdata))
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print('OK - %s|%s' % (output, perfdata))
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
print('OK - %s|%s' % (output, perfdata))
|
||||||
|
sys.exit(0)
|
|
@ -0,0 +1,10 @@
|
||||||
|
FROM python:3
|
||||||
|
|
||||||
|
ADD check_pve.py /
|
||||||
|
ADD requirements.txt /
|
||||||
|
RUN apt-get update
|
||||||
|
RUN apt install -y python3 python3-requests python3-packaging
|
||||||
|
RUN pip3 install -r requirements.txt
|
||||||
|
|
||||||
|
|
||||||
|
CMD ["tail", "-f", "/dev/null"]
|
|
@ -0,0 +1,339 @@
|
||||||
|
GNU GENERAL PUBLIC LICENSE
|
||||||
|
Version 2, June 1991
|
||||||
|
|
||||||
|
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
|
of this license document, but changing it is not allowed.
|
||||||
|
|
||||||
|
Preamble
|
||||||
|
|
||||||
|
The licenses for most software are designed to take away your
|
||||||
|
freedom to share and change it. By contrast, the GNU General Public
|
||||||
|
License is intended to guarantee your freedom to share and change free
|
||||||
|
software--to make sure the software is free for all its users. This
|
||||||
|
General Public License applies to most of the Free Software
|
||||||
|
Foundation's software and to any other program whose authors commit to
|
||||||
|
using it. (Some other Free Software Foundation software is covered by
|
||||||
|
the GNU Lesser General Public License instead.) You can apply it to
|
||||||
|
your programs, too.
|
||||||
|
|
||||||
|
When we speak of free software, we are referring to freedom, not
|
||||||
|
price. Our General Public Licenses are designed to make sure that you
|
||||||
|
have the freedom to distribute copies of free software (and charge for
|
||||||
|
this service if you wish), that you receive source code or can get it
|
||||||
|
if you want it, that you can change the software or use pieces of it
|
||||||
|
in new free programs; and that you know you can do these things.
|
||||||
|
|
||||||
|
To protect your rights, we need to make restrictions that forbid
|
||||||
|
anyone to deny you these rights or to ask you to surrender the rights.
|
||||||
|
These restrictions translate to certain responsibilities for you if you
|
||||||
|
distribute copies of the software, or if you modify it.
|
||||||
|
|
||||||
|
For example, if you distribute copies of such a program, whether
|
||||||
|
gratis or for a fee, you must give the recipients all the rights that
|
||||||
|
you have. You must make sure that they, too, receive or can get the
|
||||||
|
source code. And you must show them these terms so they know their
|
||||||
|
rights.
|
||||||
|
|
||||||
|
We protect your rights with two steps: (1) copyright the software, and
|
||||||
|
(2) offer you this license which gives you legal permission to copy,
|
||||||
|
distribute and/or modify the software.
|
||||||
|
|
||||||
|
Also, for each author's protection and ours, we want to make certain
|
||||||
|
that everyone understands that there is no warranty for this free
|
||||||
|
software. If the software is modified by someone else and passed on, we
|
||||||
|
want its recipients to know that what they have is not the original, so
|
||||||
|
that any problems introduced by others will not reflect on the original
|
||||||
|
authors' reputations.
|
||||||
|
|
||||||
|
Finally, any free program is threatened constantly by software
|
||||||
|
patents. We wish to avoid the danger that redistributors of a free
|
||||||
|
program will individually obtain patent licenses, in effect making the
|
||||||
|
program proprietary. To prevent this, we have made it clear that any
|
||||||
|
patent must be licensed for everyone's free use or not licensed at all.
|
||||||
|
|
||||||
|
The precise terms and conditions for copying, distribution and
|
||||||
|
modification follow.
|
||||||
|
|
||||||
|
GNU GENERAL PUBLIC LICENSE
|
||||||
|
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||||
|
|
||||||
|
0. This License applies to any program or other work which contains
|
||||||
|
a notice placed by the copyright holder saying it may be distributed
|
||||||
|
under the terms of this General Public License. The "Program", below,
|
||||||
|
refers to any such program or work, and a "work based on the Program"
|
||||||
|
means either the Program or any derivative work under copyright law:
|
||||||
|
that is to say, a work containing the Program or a portion of it,
|
||||||
|
either verbatim or with modifications and/or translated into another
|
||||||
|
language. (Hereinafter, translation is included without limitation in
|
||||||
|
the term "modification".) Each licensee is addressed as "you".
|
||||||
|
|
||||||
|
Activities other than copying, distribution and modification are not
|
||||||
|
covered by this License; they are outside its scope. The act of
|
||||||
|
running the Program is not restricted, and the output from the Program
|
||||||
|
is covered only if its contents constitute a work based on the
|
||||||
|
Program (independent of having been made by running the Program).
|
||||||
|
Whether that is true depends on what the Program does.
|
||||||
|
|
||||||
|
1. You may copy and distribute verbatim copies of the Program's
|
||||||
|
source code as you receive it, in any medium, provided that you
|
||||||
|
conspicuously and appropriately publish on each copy an appropriate
|
||||||
|
copyright notice and disclaimer of warranty; keep intact all the
|
||||||
|
notices that refer to this License and to the absence of any warranty;
|
||||||
|
and give any other recipients of the Program a copy of this License
|
||||||
|
along with the Program.
|
||||||
|
|
||||||
|
You may charge a fee for the physical act of transferring a copy, and
|
||||||
|
you may at your option offer warranty protection in exchange for a fee.
|
||||||
|
|
||||||
|
2. You may modify your copy or copies of the Program or any portion
|
||||||
|
of it, thus forming a work based on the Program, and copy and
|
||||||
|
distribute such modifications or work under the terms of Section 1
|
||||||
|
above, provided that you also meet all of these conditions:
|
||||||
|
|
||||||
|
a) You must cause the modified files to carry prominent notices
|
||||||
|
stating that you changed the files and the date of any change.
|
||||||
|
|
||||||
|
b) You must cause any work that you distribute or publish, that in
|
||||||
|
whole or in part contains or is derived from the Program or any
|
||||||
|
part thereof, to be licensed as a whole at no charge to all third
|
||||||
|
parties under the terms of this License.
|
||||||
|
|
||||||
|
c) If the modified program normally reads commands interactively
|
||||||
|
when run, you must cause it, when started running for such
|
||||||
|
interactive use in the most ordinary way, to print or display an
|
||||||
|
announcement including an appropriate copyright notice and a
|
||||||
|
notice that there is no warranty (or else, saying that you provide
|
||||||
|
a warranty) and that users may redistribute the program under
|
||||||
|
these conditions, and telling the user how to view a copy of this
|
||||||
|
License. (Exception: if the Program itself is interactive but
|
||||||
|
does not normally print such an announcement, your work based on
|
||||||
|
the Program is not required to print an announcement.)
|
||||||
|
|
||||||
|
These requirements apply to the modified work as a whole. If
|
||||||
|
identifiable sections of that work are not derived from the Program,
|
||||||
|
and can be reasonably considered independent and separate works in
|
||||||
|
themselves, then this License, and its terms, do not apply to those
|
||||||
|
sections when you distribute them as separate works. But when you
|
||||||
|
distribute the same sections as part of a whole which is a work based
|
||||||
|
on the Program, the distribution of the whole must be on the terms of
|
||||||
|
this License, whose permissions for other licensees extend to the
|
||||||
|
entire whole, and thus to each and every part regardless of who wrote it.
|
||||||
|
|
||||||
|
Thus, it is not the intent of this section to claim rights or contest
|
||||||
|
your rights to work written entirely by you; rather, the intent is to
|
||||||
|
exercise the right to control the distribution of derivative or
|
||||||
|
collective works based on the Program.
|
||||||
|
|
||||||
|
In addition, mere aggregation of another work not based on the Program
|
||||||
|
with the Program (or with a work based on the Program) on a volume of
|
||||||
|
a storage or distribution medium does not bring the other work under
|
||||||
|
the scope of this License.
|
||||||
|
|
||||||
|
3. You may copy and distribute the Program (or a work based on it,
|
||||||
|
under Section 2) in object code or executable form under the terms of
|
||||||
|
Sections 1 and 2 above provided that you also do one of the following:
|
||||||
|
|
||||||
|
a) Accompany it with the complete corresponding machine-readable
|
||||||
|
source code, which must be distributed under the terms of Sections
|
||||||
|
1 and 2 above on a medium customarily used for software interchange; or,
|
||||||
|
|
||||||
|
b) Accompany it with a written offer, valid for at least three
|
||||||
|
years, to give any third party, for a charge no more than your
|
||||||
|
cost of physically performing source distribution, a complete
|
||||||
|
machine-readable copy of the corresponding source code, to be
|
||||||
|
distributed under the terms of Sections 1 and 2 above on a medium
|
||||||
|
customarily used for software interchange; or,
|
||||||
|
|
||||||
|
c) Accompany it with the information you received as to the offer
|
||||||
|
to distribute corresponding source code. (This alternative is
|
||||||
|
allowed only for noncommercial distribution and only if you
|
||||||
|
received the program in object code or executable form with such
|
||||||
|
an offer, in accord with Subsection b above.)
|
||||||
|
|
||||||
|
The source code for a work means the preferred form of the work for
|
||||||
|
making modifications to it. For an executable work, complete source
|
||||||
|
code means all the source code for all modules it contains, plus any
|
||||||
|
associated interface definition files, plus the scripts used to
|
||||||
|
control compilation and installation of the executable. However, as a
|
||||||
|
special exception, the source code distributed need not include
|
||||||
|
anything that is normally distributed (in either source or binary
|
||||||
|
form) with the major components (compiler, kernel, and so on) of the
|
||||||
|
operating system on which the executable runs, unless that component
|
||||||
|
itself accompanies the executable.
|
||||||
|
|
||||||
|
If distribution of executable or object code is made by offering
|
||||||
|
access to copy from a designated place, then offering equivalent
|
||||||
|
access to copy the source code from the same place counts as
|
||||||
|
distribution of the source code, even though third parties are not
|
||||||
|
compelled to copy the source along with the object code.
|
||||||
|
|
||||||
|
4. You may not copy, modify, sublicense, or distribute the Program
|
||||||
|
except as expressly provided under this License. Any attempt
|
||||||
|
otherwise to copy, modify, sublicense or distribute the Program is
|
||||||
|
void, and will automatically terminate your rights under this License.
|
||||||
|
However, parties who have received copies, or rights, from you under
|
||||||
|
this License will not have their licenses terminated so long as such
|
||||||
|
parties remain in full compliance.
|
||||||
|
|
||||||
|
5. You are not required to accept this License, since you have not
|
||||||
|
signed it. However, nothing else grants you permission to modify or
|
||||||
|
distribute the Program or its derivative works. These actions are
|
||||||
|
prohibited by law if you do not accept this License. Therefore, by
|
||||||
|
modifying or distributing the Program (or any work based on the
|
||||||
|
Program), you indicate your acceptance of this License to do so, and
|
||||||
|
all its terms and conditions for copying, distributing or modifying
|
||||||
|
the Program or works based on it.
|
||||||
|
|
||||||
|
6. Each time you redistribute the Program (or any work based on the
|
||||||
|
Program), the recipient automatically receives a license from the
|
||||||
|
original licensor to copy, distribute or modify the Program subject to
|
||||||
|
these terms and conditions. You may not impose any further
|
||||||
|
restrictions on the recipients' exercise of the rights granted herein.
|
||||||
|
You are not responsible for enforcing compliance by third parties to
|
||||||
|
this License.
|
||||||
|
|
||||||
|
7. If, as a consequence of a court judgment or allegation of patent
|
||||||
|
infringement or for any other reason (not limited to patent issues),
|
||||||
|
conditions are imposed on you (whether by court order, agreement or
|
||||||
|
otherwise) that contradict the conditions of this License, they do not
|
||||||
|
excuse you from the conditions of this License. If you cannot
|
||||||
|
distribute so as to satisfy simultaneously your obligations under this
|
||||||
|
License and any other pertinent obligations, then as a consequence you
|
||||||
|
may not distribute the Program at all. For example, if a patent
|
||||||
|
license would not permit royalty-free redistribution of the Program by
|
||||||
|
all those who receive copies directly or indirectly through you, then
|
||||||
|
the only way you could satisfy both it and this License would be to
|
||||||
|
refrain entirely from distribution of the Program.
|
||||||
|
|
||||||
|
If any portion of this section is held invalid or unenforceable under
|
||||||
|
any particular circumstance, the balance of the section is intended to
|
||||||
|
apply and the section as a whole is intended to apply in other
|
||||||
|
circumstances.
|
||||||
|
|
||||||
|
It is not the purpose of this section to induce you to infringe any
|
||||||
|
patents or other property right claims or to contest validity of any
|
||||||
|
such claims; this section has the sole purpose of protecting the
|
||||||
|
integrity of the free software distribution system, which is
|
||||||
|
implemented by public license practices. Many people have made
|
||||||
|
generous contributions to the wide range of software distributed
|
||||||
|
through that system in reliance on consistent application of that
|
||||||
|
system; it is up to the author/donor to decide if he or she is willing
|
||||||
|
to distribute software through any other system and a licensee cannot
|
||||||
|
impose that choice.
|
||||||
|
|
||||||
|
This section is intended to make thoroughly clear what is believed to
|
||||||
|
be a consequence of the rest of this License.
|
||||||
|
|
||||||
|
8. If the distribution and/or use of the Program is restricted in
|
||||||
|
certain countries either by patents or by copyrighted interfaces, the
|
||||||
|
original copyright holder who places the Program under this License
|
||||||
|
may add an explicit geographical distribution limitation excluding
|
||||||
|
those countries, so that distribution is permitted only in or among
|
||||||
|
countries not thus excluded. In such case, this License incorporates
|
||||||
|
the limitation as if written in the body of this License.
|
||||||
|
|
||||||
|
9. The Free Software Foundation may publish revised and/or new versions
|
||||||
|
of the General Public License from time to time. Such new versions will
|
||||||
|
be similar in spirit to the present version, but may differ in detail to
|
||||||
|
address new problems or concerns.
|
||||||
|
|
||||||
|
Each version is given a distinguishing version number. If the Program
|
||||||
|
specifies a version number of this License which applies to it and "any
|
||||||
|
later version", you have the option of following the terms and conditions
|
||||||
|
either of that version or of any later version published by the Free
|
||||||
|
Software Foundation. If the Program does not specify a version number of
|
||||||
|
this License, you may choose any version ever published by the Free Software
|
||||||
|
Foundation.
|
||||||
|
|
||||||
|
10. If you wish to incorporate parts of the Program into other free
|
||||||
|
programs whose distribution conditions are different, write to the author
|
||||||
|
to ask for permission. For software which is copyrighted by the Free
|
||||||
|
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||||
|
make exceptions for this. Our decision will be guided by the two goals
|
||||||
|
of preserving the free status of all derivatives of our free software and
|
||||||
|
of promoting the sharing and reuse of software generally.
|
||||||
|
|
||||||
|
NO WARRANTY
|
||||||
|
|
||||||
|
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||||
|
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||||
|
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||||
|
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||||
|
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||||
|
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||||
|
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||||
|
REPAIR OR CORRECTION.
|
||||||
|
|
||||||
|
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||||
|
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||||
|
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||||
|
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||||
|
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||||
|
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||||
|
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||||
|
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGES.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
How to Apply These Terms to Your New Programs
|
||||||
|
|
||||||
|
If you develop a new program, and you want it to be of the greatest
|
||||||
|
possible use to the public, the best way to achieve this is to make it
|
||||||
|
free software which everyone can redistribute and change under these terms.
|
||||||
|
|
||||||
|
To do so, attach the following notices to the program. It is safest
|
||||||
|
to attach them to the start of each source file to most effectively
|
||||||
|
convey the exclusion of warranty; and each file should have at least
|
||||||
|
the "copyright" line and a pointer to where the full notice is found.
|
||||||
|
|
||||||
|
<one line to give the program's name and a brief idea of what it does.>
|
||||||
|
Copyright (C) <year> <name of author>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
Also add information on how to contact you by electronic and paper mail.
|
||||||
|
|
||||||
|
If the program is interactive, make it output a short notice like this
|
||||||
|
when it starts in an interactive mode:
|
||||||
|
|
||||||
|
Gnomovision version 69, Copyright (C) year name of author
|
||||||
|
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||||
|
This is free software, and you are welcome to redistribute it
|
||||||
|
under certain conditions; type `show c' for details.
|
||||||
|
|
||||||
|
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||||
|
parts of the General Public License. Of course, the commands you use may
|
||||||
|
be called something other than `show w' and `show c'; they could even be
|
||||||
|
mouse-clicks or menu items--whatever suits your program.
|
||||||
|
|
||||||
|
You should also get your employer (if you work as a programmer) or your
|
||||||
|
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||||
|
necessary. Here is a sample; alter the names:
|
||||||
|
|
||||||
|
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||||
|
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||||
|
|
||||||
|
<signature of Ty Coon>, 1 April 1989
|
||||||
|
Ty Coon, President of Vice
|
||||||
|
|
||||||
|
This General Public License does not permit incorporating your program into
|
||||||
|
proprietary programs. If your program is a subroutine library, you may
|
||||||
|
consider it more useful to permit linking proprietary applications with the
|
||||||
|
library. If this is what you want to do, use the GNU Lesser General
|
||||||
|
Public License instead of this License.
|
|
@ -0,0 +1,304 @@
|
||||||
|
# check_pve
|
||||||
|
Icinga check command for Proxmox VE via API
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
### Requirements
|
||||||
|
|
||||||
|
This check command depends on **Python 3** and the following modules:
|
||||||
|
* requests
|
||||||
|
* argparse
|
||||||
|
* packaging
|
||||||
|
|
||||||
|
**Installation on Debian / Ubuntu**
|
||||||
|
```
|
||||||
|
apt install python3 python3-requests python3-packaging
|
||||||
|
```
|
||||||
|
|
||||||
|
**Installation on Redhat 7 / CentOS 7**
|
||||||
|
```
|
||||||
|
yum install python36 python36-requests python36-packaging
|
||||||
|
```
|
||||||
|
|
||||||
|
**Installation on FreeBSD**
|
||||||
|
```
|
||||||
|
pkg install python3 py39-requests py39-packaging
|
||||||
|
```
|
||||||
|
|
||||||
|
**Installation from requirements file**
|
||||||
|
```
|
||||||
|
pip3 install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
**Installation as Docker container**
|
||||||
|
```
|
||||||
|
docker build -t check_pve .
|
||||||
|
```
|
||||||
|
After this, you can start the container like so:
|
||||||
|
```
|
||||||
|
docker run -d --name check_pve --rm check_pve
|
||||||
|
```
|
||||||
|
The container will keep running without having the need for any of the requirements listed above (for environments that do not support this).
|
||||||
|
Running a check is as simple as:
|
||||||
|
```
|
||||||
|
docker exec check_pve python check_pve.py ....rest of the default arguments listed below....
|
||||||
|
```
|
||||||
|
|
||||||
|
### Create a API user in Proxmox VE
|
||||||
|
|
||||||
|
Create a role named ``Monitoring`` and assign necessary privileges:
|
||||||
|
|
||||||
|
```
|
||||||
|
pveum roleadd Monitoring
|
||||||
|
pveum rolemod Monitoring --privs VM.Monitor,Sys.Audit,Datastore.Audit,VM.Audit
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a user named ``monitoring`` and set password:
|
||||||
|
|
||||||
|
```
|
||||||
|
pveum useradd monitoring@pve --comment "The ICINGA 2 monitoring user"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Use token based authorization (recommended)
|
||||||
|
|
||||||
|
Create an API token named `monitoring` for the user `monitoring`:
|
||||||
|
|
||||||
|
```
|
||||||
|
pveum user token add monitoring@pve monitoring
|
||||||
|
```
|
||||||
|
|
||||||
|
Please save the token secret as there isn't any way to fetch it at a later point.
|
||||||
|
|
||||||
|
Assign role `monitoring` to token `monitoring` and the user `monitoring@pve`:
|
||||||
|
|
||||||
|
```
|
||||||
|
pveum acl modify / --roles Monitoring --user 'monitoring@pve'
|
||||||
|
pveum acl modify / --roles Monitoring --tokens 'monitoring@pve!monitoring'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
#### Use password based authorization
|
||||||
|
|
||||||
|
Set password for the user `monitoring`:
|
||||||
|
|
||||||
|
```
|
||||||
|
pveum passwd monitoring@pve
|
||||||
|
```
|
||||||
|
|
||||||
|
Assign ``monitoring`` role to user ``monitoring``
|
||||||
|
|
||||||
|
```
|
||||||
|
pveum acl modify / --users monitoring@pve --roles Monitoring
|
||||||
|
```
|
||||||
|
|
||||||
|
For further information about the Proxmox VE privilege system have a look into the [documentation](https://pve.proxmox.com/pve-docs/pve-admin-guide.html#_strong_pveum_strong_proxmox_ve_user_manager).
|
||||||
|
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
The ``icinga2`` folder contains the command definition and service examples for use with Icinga2.
|
||||||
|
|
||||||
|
```
|
||||||
|
usage: check_pve.py [-h] -e API_ENDPOINT [--api-port API_PORT] -u API_USER (-p API_PASSWORD | -t API_TOKEN) [-k] -m
|
||||||
|
{cluster,version,cpu,memory,swap,storage,io_wait,updates,services,subscription,vm,vm_status,replication,disk-health,ceph-health,zfs-health,zfs-fragmentation} [-n NODE] [--name NAME] [--vmid VMID]
|
||||||
|
[--expected-vm-status {running,stopped,paused}] [--ignore-vm-status] [--ignore-service NAME] [--ignore-disk NAME] [-w THRESHOLD_WARNING] [-c THRESHOLD_CRITICAL] [-M] [-V MIN_VERSION] [--unit {GB,MB,KB,GiB,MiB,KiB,B}]
|
||||||
|
|
||||||
|
Check command for PVE hosts via API
|
||||||
|
|
||||||
|
options:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
|
||||||
|
API Options:
|
||||||
|
-e API_ENDPOINT, --api-endpoint API_ENDPOINT
|
||||||
|
PVE api endpoint hostname
|
||||||
|
--api-port API_PORT PVE api endpoint port
|
||||||
|
-u API_USER, --username API_USER
|
||||||
|
PVE api user (e.g. icinga2@pve or icinga2@pam, depending on which backend you have chosen in proxmox)
|
||||||
|
-p API_PASSWORD, --password API_PASSWORD
|
||||||
|
PVE API user password
|
||||||
|
-t API_TOKEN, --api-token API_TOKEN
|
||||||
|
PVE API token (format: TOKEN_ID=TOKEN_SECRET
|
||||||
|
-k, --insecure Don't verify HTTPS certificate
|
||||||
|
|
||||||
|
Check Options:
|
||||||
|
-m {cluster,version,cpu,memory,swap,storage,io_wait,updates,services,subscription,vm,vm_status,replication,disk-health,ceph-health,zfs-health,zfs-fragmentation}, --mode {cluster,version,cpu,memory,swap,storage,io_wait,updates,services,subscription,vm,vm_status,replication,disk-health,ceph-health,zfs-health,zfs-fragmentation}
|
||||||
|
Mode to use.
|
||||||
|
-n NODE, --node NODE Node to check (necessary for all modes except cluster and version)
|
||||||
|
--name NAME Name of storage, vm, or container
|
||||||
|
--vmid VMID ID of virtual machine or container
|
||||||
|
--expected-vm-status {running,stopped,paused}
|
||||||
|
Expected VM status
|
||||||
|
--ignore-vm-status Ignore VM status in checks
|
||||||
|
--ignore-service NAME
|
||||||
|
Ignore service NAME in checks
|
||||||
|
--ignore-disk NAME Ignore disk NAME in health check
|
||||||
|
-w THRESHOLD_WARNING, --warning THRESHOLD_WARNING
|
||||||
|
Warning threshold for check value. Mutiple thresholds with name:value,name:value
|
||||||
|
-c THRESHOLD_CRITICAL, --critical THRESHOLD_CRITICAL
|
||||||
|
Critical threshold for check value Mutiple thresholds with name:value,name:value
|
||||||
|
-M Values are shown in the unit which is set with --unit (if available). Thresholds are also treated in this unit
|
||||||
|
-V MIN_VERSION, --min-version MIN_VERSION
|
||||||
|
The minimal pve version to check for. Any version lower than this will return CRITICAL.
|
||||||
|
--unit {GB,MB,KB,GiB,MiB,KiB,B}
|
||||||
|
Unit which is used for performance data and other values
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Check examples
|
||||||
|
|
||||||
|
|
||||||
|
**Check cluster health**
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -t <API_TOKEN> -e <API_ENDPOINT> -m cluster
|
||||||
|
OK - Cluster 'proxmox1' is healthy'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check PVE version**
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m version -V 5.0.0
|
||||||
|
OK - Your pve instance version '5.2' (0fcd7879) is up to date
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check CPU load**
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m cpu -n node1
|
||||||
|
OK - CPU usage is 2.4%|usage=2.4%;;
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check memory usage**
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m memory -n node1
|
||||||
|
OK - Memory usage is 37.44%|usage=37.44%;; used=96544.72MB;;;257867.91
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check disk-health**
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m disk-health -n node1
|
||||||
|
OK - All disks are healthy|wearout_sdb=96%;; wearout_sdc=96%;; wearout_sdd=96%;; wearout_sde=96%;;
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check storage usage**
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m storage -n node1 --name local
|
||||||
|
OK - Storage usage is 54.23%|usage=54.23%;; used=128513.11MB;;;236980.36
|
||||||
|
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m storage -n node1 --name vms-disx
|
||||||
|
CRITICAL - Storage 'vms-disx' doesn't exist on node 'node01'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check subscription status**
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m subscription -n node1 -w 50 -c 10
|
||||||
|
OK - Subscription of level 'Community' is valid until 2019-01-09
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check VM status**
|
||||||
|
|
||||||
|
Without specifying a node name:
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m vm --name test-vm
|
||||||
|
OK - VM 'test-vm' is running on 'node1'|cpu=1.85%;; memory=8.33%;;
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also pass a container name for the VM check:
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m vm --name test-lxc
|
||||||
|
OK - LXC 'test-lxc' on node 'node1' is running|cpu=0.11%;; memory=13.99%;;
|
||||||
|
```
|
||||||
|
|
||||||
|
With memory thresholds:
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m vm --name test-vm -w 50 -c 80
|
||||||
|
OK - VM 'test-vm' is running on 'node1'|cpu=1.85%;; memory=40.33%;50.0;80.0
|
||||||
|
```
|
||||||
|
|
||||||
|
With a specified node name, the check plugin verifies on which node the VM runs.
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m vm -n node1 --name test-vm
|
||||||
|
OK - VM 'test-vm' is running on node 'node1'|cpu=1.85%;; memory=8.33%;;
|
||||||
|
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m vm -n node1 --name test-vm
|
||||||
|
WARNING - VM 'test-vm' is running on node 'node2' instead of 'node1'|cpu=1.85%;; memory=8.33%;;
|
||||||
|
```
|
||||||
|
|
||||||
|
If you only want to gather metrics and don't care about the vm status add the ``--ignore-vm-status`` flag:
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m vm --name test-vm --ignore-vm-status
|
||||||
|
OK - VM 'test-vm' is not running
|
||||||
|
```
|
||||||
|
|
||||||
|
Specify the expected VM status:
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m vm --name test-vm --expected-vm-status stopped
|
||||||
|
OK - VM 'test-vm' is not running
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
For hostalive checks without gathering performance data use ``vm_status`` instead of ``vm``. The parameters are the same as with ``vm``.
|
||||||
|
|
||||||
|
**Check swap usage**
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m swap -n pve
|
||||||
|
OK - Swap usage is 0.0 %|usage=0.0%;; used=0.0MB;;;8192.0
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check storage replication status**
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m replication -n node1
|
||||||
|
OK - No failed replication jobs on node1
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check ceph cluster health**
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m ceph-health
|
||||||
|
WARNING - Ceph Cluster is in warning state
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check ZFS pool health**
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m zfs-health -n pve
|
||||||
|
OK - All ZFS pools are healthy
|
||||||
|
```
|
||||||
|
|
||||||
|
Check for specific pool:
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m zfs-health -n pve --name rpool
|
||||||
|
OK - ZFS pool 'rpool' is healthy
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check ZFS pool fragmentation**
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m zfs-fragmentation -n pve -w 40 -c 60
|
||||||
|
CRITICAL - 2 of 2 ZFS pools are above fragmentation thresholds:
|
||||||
|
|
||||||
|
- rpool (71 %) is CRITICAL
|
||||||
|
- diskpool (50 %) is WARNING
|
||||||
|
|fragmentation_diskpool=50%;40.0;60.0 fragmentation_rpool=71%;40.0;60.0
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Check for specific pool:
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m zfs-fragmentation -n pve --name diskpool -w 40 -c 60
|
||||||
|
WARNING - Fragmentation of ZFS pool 'diskpool' is above thresholds: 50 %|fragmentation=50%;40.0;60.0
|
||||||
|
```
|
||||||
|
|
||||||
|
## FAQ
|
||||||
|
|
||||||
|
### Individual thresholds per metric
|
||||||
|
|
||||||
|
You can either specify a threshold for warning or critical which is applied to all metrics or define individual thresholds like this (`name:value,name:value,...`):
|
||||||
|
|
||||||
|
```
|
||||||
|
./check_pve.py -u <API_USER> -p <API_PASSWORD> -e <API_ENDPOINT> -m vm --name test-vm -w memory:50 -c cpu:50,memory:80
|
||||||
|
OK - VM 'test-vm' is running on 'node1'|cpu=1.85%;50.0; memory=40.33%;50.0;80.0
|
||||||
|
```
|
||||||
|
|
||||||
|
### Could not connect to PVE API: Failed to resolve hostname
|
||||||
|
|
||||||
|
Verify that your DNS server is working and can resolve your hostname. If everything is fine check for proxyserver environment variables (HTTP_PROXY,HTTPS_PROXY), which maybe not allow communication to port 8006.
|
||||||
|
|
||||||
|
## Contributors
|
||||||
|
|
||||||
|
Thank you to everyone, who is contributing to `check_pve`: https://github.com/nbuchwitz/check_pve/graphs/contributors.
|
|
@ -0,0 +1,819 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# check_pve.py - A check plugin for Proxmox Virtual Environment (PVE).
|
||||||
|
# Copyright (C) 2018-2022 Nicolai Buchwitz <nb@tipi-net.de>
|
||||||
|
#
|
||||||
|
# Version: 1.2.2
|
||||||
|
#
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
|
||||||
|
try:
|
||||||
|
from enum import Enum
|
||||||
|
from datetime import datetime
|
||||||
|
from packaging import version
|
||||||
|
import argparse
|
||||||
|
import requests
|
||||||
|
|
||||||
|
except ImportError as e:
|
||||||
|
print("Missing python module: {}".format(str(e)))
|
||||||
|
sys.exit(255)
|
||||||
|
|
||||||
|
|
||||||
|
class CheckState(Enum):
|
||||||
|
OK = 0
|
||||||
|
WARNING = 1
|
||||||
|
CRITICAL = 2
|
||||||
|
UNKNOWN = 3
|
||||||
|
|
||||||
|
|
||||||
|
class CheckThreshold:
|
||||||
|
def __init__(self, value: float):
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.value == other.value
|
||||||
|
|
||||||
|
def __lt__(self, other):
|
||||||
|
return self.value < other.value
|
||||||
|
|
||||||
|
def __le__(self, other):
|
||||||
|
return self.value <= other.value
|
||||||
|
|
||||||
|
def __gt__(self, other):
|
||||||
|
return self.value > other.value
|
||||||
|
|
||||||
|
def __ge__(self, other):
|
||||||
|
return self.value >= other.value
|
||||||
|
|
||||||
|
def check(self, value: float, lower: bool = False):
|
||||||
|
if lower:
|
||||||
|
return value < self.value
|
||||||
|
else:
|
||||||
|
return value > self.value
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def threshold_type(arg: str):
|
||||||
|
thresholds = {}
|
||||||
|
|
||||||
|
try:
|
||||||
|
thresholds[None] = CheckThreshold(float(arg))
|
||||||
|
except:
|
||||||
|
for t in arg.split(','):
|
||||||
|
m = re.match("([a-z_0-9]+):([0-9.]+)", t)
|
||||||
|
|
||||||
|
if m:
|
||||||
|
thresholds[m.group(1)] = CheckThreshold(float(m.group(2)))
|
||||||
|
else:
|
||||||
|
raise argparse.ArgumentTypeError(
|
||||||
|
"invalid threshold format: {}".format(t))
|
||||||
|
|
||||||
|
return thresholds
|
||||||
|
|
||||||
|
|
||||||
|
class CheckPVE:
|
||||||
|
VERSION = '1.2.2'
|
||||||
|
API_URL = 'https://{hostname}:{port}/api2/json/{command}'
|
||||||
|
UNIT_SCALE = {
|
||||||
|
"GB": 10**9,
|
||||||
|
"MB": 10**6,
|
||||||
|
"KB": 10**3,
|
||||||
|
"GiB": 2**30,
|
||||||
|
"MiB": 2**20,
|
||||||
|
"KiB": 2**10,
|
||||||
|
"B": 1
|
||||||
|
}
|
||||||
|
|
||||||
|
def check_output(self):
|
||||||
|
message = self.check_message
|
||||||
|
if self.perfdata:
|
||||||
|
message += self.get_perfdata()
|
||||||
|
|
||||||
|
self.output(self.check_result, message)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def output(rc, message):
|
||||||
|
prefix = rc.name
|
||||||
|
message = '{} - {}'.format(prefix, message)
|
||||||
|
|
||||||
|
print(message)
|
||||||
|
sys.exit(rc.value)
|
||||||
|
|
||||||
|
def get_url(self, command):
|
||||||
|
return self.API_URL.format(hostname=self.options.api_endpoint, command=command, port=self.options.api_port)
|
||||||
|
|
||||||
|
def request(self, url, method='get', **kwargs):
|
||||||
|
response = None
|
||||||
|
try:
|
||||||
|
if method == 'post':
|
||||||
|
response = requests.post(
|
||||||
|
url,
|
||||||
|
verify=not self.options.api_insecure,
|
||||||
|
data=kwargs.get('data', None),
|
||||||
|
timeout=5
|
||||||
|
)
|
||||||
|
elif method == 'get':
|
||||||
|
response = requests.get(
|
||||||
|
url,
|
||||||
|
verify=not self.options.api_insecure,
|
||||||
|
cookies=self.__cookies,
|
||||||
|
headers=self.__headers,
|
||||||
|
params=kwargs.get('params', None),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.output(CheckState.CRITICAL, "Unsupport request method: {}".format(method))
|
||||||
|
except requests.exceptions.ConnectTimeout:
|
||||||
|
self.output(CheckState.UNKNOWN, "Could not connect to PVE API: Connection timeout")
|
||||||
|
except requests.exceptions.SSLError:
|
||||||
|
self.output(CheckState.UNKNOWN, "Could not connect to PVE API: Certificate validation failed")
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
self.output(CheckState.UNKNOWN, "Could not connect to PVE API: Failed to resolve hostname")
|
||||||
|
|
||||||
|
if response.ok:
|
||||||
|
return response.json()['data']
|
||||||
|
else:
|
||||||
|
message = "Could not fetch data from API: "
|
||||||
|
|
||||||
|
if response.status_code == 401:
|
||||||
|
message += "Could not connection to PVE API: invalid username or password"
|
||||||
|
elif response.status_code == 403:
|
||||||
|
message += "Access denied. Please check if API user has sufficient permissions / the role has been " \
|
||||||
|
"assigned."
|
||||||
|
else:
|
||||||
|
message += "HTTP error code was {}".format(response.status_code)
|
||||||
|
|
||||||
|
self.output(CheckState.UNKNOWN, message)
|
||||||
|
|
||||||
|
def get_ticket(self):
|
||||||
|
url = self.get_url('access/ticket')
|
||||||
|
data = {"username": self.options.api_user, "password": self.options.api_password}
|
||||||
|
result = self.request(url, "post", data=data)
|
||||||
|
|
||||||
|
return result['ticket']
|
||||||
|
|
||||||
|
def check_api_value(self, url, message, **kwargs):
|
||||||
|
result = self.request(url)
|
||||||
|
used = None
|
||||||
|
|
||||||
|
if 'key' in kwargs:
|
||||||
|
result = result[kwargs.get('key')]
|
||||||
|
|
||||||
|
if isinstance(result, (dict,)):
|
||||||
|
used_percent = self.get_value(result['used'], result['total'])
|
||||||
|
used = self.get_value(result['used'])
|
||||||
|
total = self.get_value(result['total'])
|
||||||
|
|
||||||
|
self.add_perfdata(kwargs.get('perfkey', 'usage'), used_percent)
|
||||||
|
self.add_perfdata(kwargs.get('perfkey', 'used'), used, max=total, unit=self.options.unit)
|
||||||
|
else:
|
||||||
|
used_percent = round(float(result) * 100, 2)
|
||||||
|
self.add_perfdata(kwargs.get('perfkey', 'usage'), used_percent)
|
||||||
|
|
||||||
|
if self.options.values_mb:
|
||||||
|
message += ' {} {}'.format(used, self.options.unit)
|
||||||
|
value = used
|
||||||
|
else:
|
||||||
|
message += ' {} {}'.format(used_percent, '%')
|
||||||
|
value = used_percent
|
||||||
|
|
||||||
|
self.check_thresholds(value, message)
|
||||||
|
|
||||||
|
def check_vm_status(self, idx, **kwargs):
|
||||||
|
url = self.get_url('cluster/resources', )
|
||||||
|
data = self.request(url, params={'type': 'vm'})
|
||||||
|
|
||||||
|
expected_state = kwargs.get("expected_state", "running")
|
||||||
|
only_status = kwargs.get("only_status", False)
|
||||||
|
|
||||||
|
found = False
|
||||||
|
for vm in data:
|
||||||
|
if vm['name'] == idx or vm['vmid'] == idx:
|
||||||
|
# Check if VM (default) or LXC
|
||||||
|
vm_type = "VM"
|
||||||
|
if vm['type'] == 'lxc':
|
||||||
|
vm_type = "LXC"
|
||||||
|
|
||||||
|
if vm['status'] != expected_state:
|
||||||
|
self.check_message = "{} '{}' is {} (expected: {})".format(vm_type, vm['name'], vm['status'],
|
||||||
|
expected_state)
|
||||||
|
if not self.options.ignore_vm_status:
|
||||||
|
self.check_result = CheckState.CRITICAL
|
||||||
|
else:
|
||||||
|
if self.options.node and self.options.node != vm['node']:
|
||||||
|
self.check_message = "{} '{}' is {}, but located on node '{}' instead of '{}'" \
|
||||||
|
.format(vm_type, vm['name'], expected_state, vm['node'], self.options.node)
|
||||||
|
self.check_result = CheckState.WARNING
|
||||||
|
else:
|
||||||
|
self.check_message = "{} '{}' is {} on node '{}'" \
|
||||||
|
.format(vm_type, vm['name'], expected_state, vm['node'])
|
||||||
|
|
||||||
|
if vm['status'] == 'running' and not only_status:
|
||||||
|
cpu = round(vm['cpu'] * 100, 2)
|
||||||
|
self.add_perfdata("cpu", cpu)
|
||||||
|
|
||||||
|
if self.options.values_mb:
|
||||||
|
memory = self.scale_value(vm['mem'])
|
||||||
|
self.add_perfdata("memory", memory, unit=self.options.unit, max=self.scale_value(vm['maxmem']))
|
||||||
|
|
||||||
|
else:
|
||||||
|
memory = self.get_value(vm['mem'], vm['maxmem'])
|
||||||
|
self.add_perfdata("memory", memory)
|
||||||
|
|
||||||
|
self.check_thresholds({"cpu": cpu, "memory": memory}, message=self.check_message)
|
||||||
|
|
||||||
|
found = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not found:
|
||||||
|
self.check_message = "VM or LXC '{}' not found".format(idx)
|
||||||
|
self.check_result = CheckState.WARNING
|
||||||
|
|
||||||
|
def check_disks(self):
|
||||||
|
url = self.get_url('nodes/{}/disks'.format(self.options.node))
|
||||||
|
|
||||||
|
failed = []
|
||||||
|
unknown = []
|
||||||
|
disks = self.request(url + '/list')
|
||||||
|
for disk in disks:
|
||||||
|
name = disk['devpath'].replace('/dev/', '')
|
||||||
|
|
||||||
|
if name in self.options.ignore_disks:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if disk['health'] == 'UNKNOWN':
|
||||||
|
self.check_result = CheckState.WARNING
|
||||||
|
unknown.append({"serial": disk["serial"], "device": disk['devpath']})
|
||||||
|
|
||||||
|
elif disk['health'] not in ('PASSED', 'OK'):
|
||||||
|
self.check_result = CheckState.WARNING
|
||||||
|
failed.append({"serial": disk["serial"], "device": disk['devpath']})
|
||||||
|
|
||||||
|
if disk['wearout'] != 'N/A':
|
||||||
|
self.add_perfdata('wearout_{}'.format(name), disk['wearout'])
|
||||||
|
|
||||||
|
if failed:
|
||||||
|
self.check_message = "{} of {} disks failed the health test:\n".format(len(failed), len(disks))
|
||||||
|
for disk in failed:
|
||||||
|
self.check_message += "- {} with serial '{}'\n".format(disk['device'], disk['serial'])
|
||||||
|
|
||||||
|
if unknown:
|
||||||
|
self.check_message += "{} of {} disks have unknown health status:\n".format(len(unknown), len(disks))
|
||||||
|
for disk in unknown:
|
||||||
|
self.check_message += "- {} with serial '{}'\n".format(disk['device'], disk['serial'])
|
||||||
|
|
||||||
|
if not failed and not unknown:
|
||||||
|
self.check_message = "All disks are healthy"
|
||||||
|
|
||||||
|
def check_replication(self):
|
||||||
|
url = self.get_url('nodes/{}/replication'.format(self.options.node))
|
||||||
|
|
||||||
|
if self.options.vmid:
|
||||||
|
data = self.request(url, params={'guest': self.options.vmid})
|
||||||
|
else:
|
||||||
|
data = self.request(url)
|
||||||
|
|
||||||
|
failed_jobs = [] # format: [{guest: str, fail_count: int, error: str}]
|
||||||
|
performance_data = []
|
||||||
|
|
||||||
|
for job in data:
|
||||||
|
if job['fail_count'] > 0:
|
||||||
|
failed_jobs.append({'guest': job['guest'], 'fail_count': job['fail_count'], 'error': job['error']})
|
||||||
|
else:
|
||||||
|
performance_data.append({'id': job['id'], 'duration': job['duration']})
|
||||||
|
|
||||||
|
if len(failed_jobs) > 0:
|
||||||
|
message = "Failed replication jobs on {}: ".format(self.options.node)
|
||||||
|
for job in failed_jobs:
|
||||||
|
message = message + "GUEST: {j[guest]}, FAIL_COUNT: {j[fail_count]}, ERROR: {j[error]} ; ".format(j=job)
|
||||||
|
self.check_message = message
|
||||||
|
self.check_result = CheckState.WARNING
|
||||||
|
else:
|
||||||
|
self.check_message = "No failed replication jobs on {}".format(self.options.node)
|
||||||
|
self.check_result = CheckState.OK
|
||||||
|
|
||||||
|
if len(performance_data) > 0:
|
||||||
|
for metric in performance_data:
|
||||||
|
self.add_perfdata('duration_' + metric['id'], metric['duration'], unit='s')
|
||||||
|
|
||||||
|
def check_services(self):
|
||||||
|
url = self.get_url('nodes/{}/services'.format(self.options.node))
|
||||||
|
data = self.request(url)
|
||||||
|
|
||||||
|
failed = {}
|
||||||
|
for service in data:
|
||||||
|
if service['state'] != 'running' \
|
||||||
|
and service.get('active-state', 'active') == 'active' \
|
||||||
|
and service['name'] not in self.options.ignore_services:
|
||||||
|
failed[service['name']] = service['desc']
|
||||||
|
|
||||||
|
if failed:
|
||||||
|
self.check_result = CheckState.CRITICAL
|
||||||
|
message = "{} services are not running:\n\n".format(len(failed))
|
||||||
|
message += "\n".join(['- {} ({}) is not running'.format(failed[i], i) for i in failed])
|
||||||
|
self.check_message = message
|
||||||
|
else:
|
||||||
|
self.check_message = "All services are running"
|
||||||
|
|
||||||
|
def check_subscription(self):
|
||||||
|
url = self.get_url('nodes/{}/subscription'.format(self.options.node))
|
||||||
|
data = self.request(url)
|
||||||
|
|
||||||
|
if data['status'] == 'NotFound':
|
||||||
|
self.check_result = CheckState.WARNING
|
||||||
|
self.check_message = "No valid subscription found"
|
||||||
|
if data['status'] == 'Inactive':
|
||||||
|
self.check_result = CheckState.CRITICAL
|
||||||
|
self.check_message = "Subscription expired"
|
||||||
|
elif data['status'] == 'Active':
|
||||||
|
subscription_due_date = data['nextduedate']
|
||||||
|
subscription_product_name = data['productname']
|
||||||
|
|
||||||
|
date_expire = datetime.strptime(subscription_due_date, '%Y-%m-%d')
|
||||||
|
date_today = datetime.today()
|
||||||
|
delta = (date_expire - date_today).days
|
||||||
|
|
||||||
|
message = '{} is valid until {}'.format(
|
||||||
|
subscription_product_name,
|
||||||
|
subscription_due_date)
|
||||||
|
message_warning_critical = '{} will expire in {} days ({})'.format(
|
||||||
|
subscription_product_name,
|
||||||
|
delta,
|
||||||
|
subscription_due_date)
|
||||||
|
|
||||||
|
self.check_thresholds(delta, message, messageWarning=message_warning_critical,
|
||||||
|
messageCritical=message_warning_critical, lowerValue=True)
|
||||||
|
|
||||||
|
def check_updates(self):
|
||||||
|
url = self.get_url('nodes/{}/apt/update'.format(self.options.node))
|
||||||
|
count = len(self.request(url))
|
||||||
|
|
||||||
|
if count:
|
||||||
|
self.check_result = CheckState.WARNING
|
||||||
|
msg = "{} pending update"
|
||||||
|
if count > 1:
|
||||||
|
msg += "s"
|
||||||
|
self.check_message = msg.format(count)
|
||||||
|
else:
|
||||||
|
self.check_message = "System up to date"
|
||||||
|
|
||||||
|
def check_cluster_status(self):
|
||||||
|
url = self.get_url('cluster/status')
|
||||||
|
data = self.request(url)
|
||||||
|
|
||||||
|
nodes = {}
|
||||||
|
quorate = None
|
||||||
|
cluster = ''
|
||||||
|
for elem in data:
|
||||||
|
if elem['type'] == 'cluster':
|
||||||
|
quorate = elem['quorate']
|
||||||
|
cluster = elem['name']
|
||||||
|
elif elem['type'] == 'node':
|
||||||
|
nodes[elem['name']] = elem['online']
|
||||||
|
|
||||||
|
if quorate is None:
|
||||||
|
self.check_message = 'No cluster configuration found'
|
||||||
|
elif quorate:
|
||||||
|
node_count = len(nodes)
|
||||||
|
nodes_online_count = len({k: v for k, v in nodes.items() if v})
|
||||||
|
|
||||||
|
if node_count > nodes_online_count:
|
||||||
|
diff = node_count - nodes_online_count
|
||||||
|
self.check_result = CheckState.WARNING
|
||||||
|
self.check_message = "Cluster '{}' is healthy, but {} node(s) offline'".format(cluster, diff)
|
||||||
|
else:
|
||||||
|
self.check_message = "Cluster '{}' is healthy'".format(cluster)
|
||||||
|
|
||||||
|
self.add_perfdata('nodes_total', node_count, unit='')
|
||||||
|
self.add_perfdata('nodes_online', nodes_online_count, unit='')
|
||||||
|
else:
|
||||||
|
self.check_result = CheckState.CRITICAL
|
||||||
|
self.check_message = 'Cluster is unhealthy - no quorum'
|
||||||
|
|
||||||
|
def check_zfs_fragmentation(self, name=None):
|
||||||
|
url = self.get_url('nodes/{}/disks/zfs'.format(self.options.node))
|
||||||
|
data = self.request(url)
|
||||||
|
|
||||||
|
warnings = []
|
||||||
|
critical = []
|
||||||
|
found = name is None
|
||||||
|
for pool in data:
|
||||||
|
found = found or name == pool['name']
|
||||||
|
if (name is not None and name == pool['name']) or name is None:
|
||||||
|
key = "fragmentation"
|
||||||
|
if name is None:
|
||||||
|
key += '_{}'.format(pool['name'])
|
||||||
|
self.add_perfdata(key, pool['frag'])
|
||||||
|
|
||||||
|
threshold_name = "fragmentation_{}".format(pool['name'])
|
||||||
|
threshold_warning = self.threshold_warning(threshold_name)
|
||||||
|
threshold_critical = self.threshold_critical(threshold_name)
|
||||||
|
|
||||||
|
if threshold_critical is not None and pool['frag'] > float(
|
||||||
|
threshold_critical.value):
|
||||||
|
critical.append(pool)
|
||||||
|
elif threshold_warning is not None and pool['frag'] > float(
|
||||||
|
threshold_warning.value):
|
||||||
|
warnings.append(pool)
|
||||||
|
|
||||||
|
if not found:
|
||||||
|
self.check_result = CheckState.UNKNOWN
|
||||||
|
self.check_message = "Could not fetch fragmentation of ZFS pool '{}'".format(name)
|
||||||
|
else:
|
||||||
|
if warnings or critical:
|
||||||
|
value = None
|
||||||
|
if critical:
|
||||||
|
self.check_result = CheckState.CRITICAL
|
||||||
|
if name is not None:
|
||||||
|
value = critical[0]['frag']
|
||||||
|
else:
|
||||||
|
self.check_result = CheckState.WARNING
|
||||||
|
if name is not None:
|
||||||
|
value = warnings[0]['frag']
|
||||||
|
|
||||||
|
if name is not None:
|
||||||
|
self.check_message = "Fragmentation of ZFS pool '{}' is above thresholds: {} %".format(name, value)
|
||||||
|
else:
|
||||||
|
message = "{} of {} ZFS pools are above fragmentation thresholds:\n\n".format(
|
||||||
|
len(warnings) + len(critical), len(data))
|
||||||
|
message += "\n".join(
|
||||||
|
['- {} ({} %) is CRITICAL\n'.format(pool['name'], pool['frag']) for pool in critical])
|
||||||
|
message += "\n".join(
|
||||||
|
['- {} ({} %) is WARNING\n'.format(pool['name'], pool['frag']) for pool in warnings])
|
||||||
|
self.check_message = message
|
||||||
|
else:
|
||||||
|
self.check_result = CheckState.OK
|
||||||
|
if name is not None:
|
||||||
|
self.check_message = "Fragmentation of ZFS pool '{}' is OK".format(name)
|
||||||
|
else:
|
||||||
|
self.check_message = "Fragmentation of all ZFS pools is OK"
|
||||||
|
|
||||||
|
def check_zfs_health(self, name=None):
|
||||||
|
url = self.get_url('nodes/{}/disks/zfs'.format(self.options.node))
|
||||||
|
data = self.request(url)
|
||||||
|
|
||||||
|
unhealthy = []
|
||||||
|
found = name is None
|
||||||
|
healthy_conditions = ['online']
|
||||||
|
for pool in data:
|
||||||
|
found = found or name == pool['name']
|
||||||
|
if (name is not None and name == pool['name']) or name is None:
|
||||||
|
if pool['health'].lower() not in healthy_conditions:
|
||||||
|
unhealthy.append(pool)
|
||||||
|
|
||||||
|
if not found:
|
||||||
|
self.check_result = CheckState.UNKNOWN
|
||||||
|
self.check_message = "Could not fetch health of ZFS pool '{}'".format(name)
|
||||||
|
else:
|
||||||
|
if unhealthy:
|
||||||
|
self.check_result = CheckState.CRITICAL
|
||||||
|
message = "{} ZFS pools are not healthy:\n\n".format(len(unhealthy))
|
||||||
|
message += "\n".join(
|
||||||
|
['- {} ({}) is not healthy'.format(pool['name'], pool['health']) for pool in unhealthy])
|
||||||
|
self.check_message = message
|
||||||
|
else:
|
||||||
|
self.check_result = CheckState.OK
|
||||||
|
if name is not None:
|
||||||
|
self.check_message = "ZFS pool '{}' is healthy".format(name)
|
||||||
|
else:
|
||||||
|
self.check_message = "All ZFS pools are healthy"
|
||||||
|
|
||||||
|
def check_ceph_health(self):
|
||||||
|
url = self.get_url('cluster/ceph/status')
|
||||||
|
data = self.request(url)
|
||||||
|
ceph_health = data.get('health', {})
|
||||||
|
|
||||||
|
if 'status' not in ceph_health:
|
||||||
|
self.check_result = CheckState.UNKNOWN
|
||||||
|
self.check_message = "Could not fetch Ceph status from API. " \
|
||||||
|
"Check the output of 'pvesh get cluster/ceph' on your node"
|
||||||
|
return
|
||||||
|
|
||||||
|
if ceph_health['status'] == 'HEALTH_OK':
|
||||||
|
self.check_result = CheckState.OK
|
||||||
|
self.check_message = "Ceph Cluster is healthy"
|
||||||
|
elif ceph_health['status'] == 'HEALTH_WARN':
|
||||||
|
self.check_result = CheckState.WARNING
|
||||||
|
self.check_message = "Ceph Cluster is in warning state"
|
||||||
|
elif ceph_health['status'] == 'HEALTH_CRIT':
|
||||||
|
self.check_result = CheckState.CRITICAL
|
||||||
|
self.check_message = "Ceph Cluster is in critical state"
|
||||||
|
else:
|
||||||
|
self.check_result = CheckState.UNKNOWN
|
||||||
|
self.check_message = "Ceph Cluster is in unknown state"
|
||||||
|
|
||||||
|
def check_storage(self, name):
|
||||||
|
# check if storage exists
|
||||||
|
url = self.get_url('nodes/{}/storage'.format(self.options.node))
|
||||||
|
data = self.request(url)
|
||||||
|
|
||||||
|
if not any(s['storage'] == name for s in data):
|
||||||
|
self.check_result = CheckState.CRITICAL
|
||||||
|
self.check_message = "Storage '{}' doesn't exist on node '{}'".format(name, self.options.node)
|
||||||
|
return
|
||||||
|
|
||||||
|
url = self.get_url('nodes/{}/storage/{}/status'.format(self.options.node, name))
|
||||||
|
self.check_api_value(url, "Usage of storage '{}' is".format(name))
|
||||||
|
|
||||||
|
def check_version(self):
|
||||||
|
url = self.get_url('version')
|
||||||
|
data = self.request(url)
|
||||||
|
if not data['version']:
|
||||||
|
self.check_result = CheckState.UNKNOWN
|
||||||
|
self.check_message = "Unable to determine pve version"
|
||||||
|
elif self.options.min_version and version.parse(self.options.min_version) > version.parse(data['version']):
|
||||||
|
self.check_result = CheckState.CRITICAL
|
||||||
|
self.check_message = "Current pve version '{}' ({}) is lower than the min. required version '{}'".format(
|
||||||
|
data['version'], data['repoid'], self.options.min_version)
|
||||||
|
else:
|
||||||
|
self.check_message = "Your pve instance version '{}' ({}) is up to date".format(data['version'],
|
||||||
|
data['repoid'])
|
||||||
|
|
||||||
|
def check_memory(self):
|
||||||
|
url = self.get_url('nodes/{}/status'.format(self.options.node))
|
||||||
|
self.check_api_value(url, 'Memory usage is', key='memory')
|
||||||
|
|
||||||
|
def check_swap(self):
|
||||||
|
url = self.get_url('nodes/{}/status'.format(self.options.node))
|
||||||
|
self.check_api_value(url, 'Swap usage is', key='swap')
|
||||||
|
|
||||||
|
def check_cpu(self):
|
||||||
|
url = self.get_url('nodes/{}/status'.format(self.options.node))
|
||||||
|
self.check_api_value(url, 'CPU usage is', key='cpu')
|
||||||
|
|
||||||
|
def check_io_wait(self):
|
||||||
|
url = self.get_url('nodes/{}/status'.format(self.options.node))
|
||||||
|
self.check_api_value(url, 'IO wait is', key='wait', perfkey='wait')
|
||||||
|
|
||||||
|
def check_thresholds(self, value, message, **kwargs):
|
||||||
|
is_warning = False
|
||||||
|
is_critical = False
|
||||||
|
|
||||||
|
if not isinstance(value, dict):
|
||||||
|
value = { None: value }
|
||||||
|
|
||||||
|
for metric, value in value.items():
|
||||||
|
value_warning = self.threshold_warning(metric)
|
||||||
|
if value_warning is not None:
|
||||||
|
is_warning = is_warning or value_warning.check(value, kwargs.get('lowerValue', False))
|
||||||
|
|
||||||
|
value_critical = self.threshold_critical(metric)
|
||||||
|
if value_critical is not None:
|
||||||
|
is_critical = is_critical or value_critical.check(value, kwargs.get('lowerValue', False))
|
||||||
|
|
||||||
|
if is_critical:
|
||||||
|
self.check_result = CheckState.CRITICAL
|
||||||
|
self.check_message = kwargs.get('messageCritical', message)
|
||||||
|
elif is_warning:
|
||||||
|
self.check_result = CheckState.WARNING
|
||||||
|
self.check_message = kwargs.get('messageWarning', message)
|
||||||
|
else:
|
||||||
|
self.check_message = message
|
||||||
|
|
||||||
|
def scale_value(self, value):
|
||||||
|
if self.options.unit in self.UNIT_SCALE:
|
||||||
|
return value / self.UNIT_SCALE[self.options.unit]
|
||||||
|
else:
|
||||||
|
assert('wrong unit')
|
||||||
|
|
||||||
|
def threshold_warning(self, name: str):
|
||||||
|
return self.options.threshold_warning.get(name, self.options.threshold_warning.get(None, None))
|
||||||
|
|
||||||
|
def threshold_critical(self, name: str):
|
||||||
|
return self.options.threshold_critical.get(name, self.options.threshold_critical.get(None, None))
|
||||||
|
|
||||||
|
def get_value(self, value, total=None):
|
||||||
|
value = float(value)
|
||||||
|
|
||||||
|
if total:
|
||||||
|
value /= float(total) / 100
|
||||||
|
else:
|
||||||
|
value = self.scale_value(value)
|
||||||
|
|
||||||
|
return round(value, 2)
|
||||||
|
|
||||||
|
def add_perfdata(self, name, value, **kwargs):
|
||||||
|
unit = kwargs.get('unit', '%')
|
||||||
|
|
||||||
|
perfdata = '{}={}{}'.format(name, value, unit)
|
||||||
|
|
||||||
|
threshold_warning = self.threshold_warning(name)
|
||||||
|
threshold_critical = self.threshold_critical(name)
|
||||||
|
|
||||||
|
perfdata += ';'
|
||||||
|
if threshold_warning:
|
||||||
|
perfdata += str(threshold_warning.value)
|
||||||
|
|
||||||
|
perfdata += ';'
|
||||||
|
if threshold_critical:
|
||||||
|
perfdata += str(threshold_critical.value)
|
||||||
|
|
||||||
|
perfdata += ';{}'.format(kwargs.get('min', 0))
|
||||||
|
perfdata += ';{}'.format(kwargs.get('max', ''))
|
||||||
|
|
||||||
|
self.perfdata.append(perfdata)
|
||||||
|
|
||||||
|
def get_perfdata(self):
|
||||||
|
perfdata = ''
|
||||||
|
|
||||||
|
if len(self.perfdata):
|
||||||
|
perfdata = '|'
|
||||||
|
perfdata += ' '.join(self.perfdata)
|
||||||
|
|
||||||
|
return perfdata
|
||||||
|
|
||||||
|
def check(self):
|
||||||
|
self.check_result = CheckState.OK
|
||||||
|
|
||||||
|
if self.options.mode == 'cluster':
|
||||||
|
self.check_cluster_status()
|
||||||
|
elif self.options.mode == 'version':
|
||||||
|
self.check_version()
|
||||||
|
elif self.options.mode == 'memory':
|
||||||
|
self.check_memory()
|
||||||
|
elif self.options.mode == 'swap':
|
||||||
|
self.check_swap()
|
||||||
|
elif self.options.mode == 'io_wait':
|
||||||
|
self.check_io_wait()
|
||||||
|
elif self.options.mode == 'disk-health':
|
||||||
|
self.check_disks()
|
||||||
|
elif self.options.mode == 'cpu':
|
||||||
|
self.check_cpu()
|
||||||
|
elif self.options.mode == 'services':
|
||||||
|
self.check_services()
|
||||||
|
elif self.options.mode == 'updates':
|
||||||
|
self.check_updates()
|
||||||
|
elif self.options.mode == 'subscription':
|
||||||
|
self.check_subscription()
|
||||||
|
elif self.options.mode == 'storage':
|
||||||
|
self.check_storage(self.options.name)
|
||||||
|
elif self.options.mode in ['vm', 'vm_status']:
|
||||||
|
only_status = self.options.mode == 'vm_status'
|
||||||
|
|
||||||
|
if self.options.name:
|
||||||
|
idx = self.options.name
|
||||||
|
else:
|
||||||
|
idx = self.options.vmid
|
||||||
|
|
||||||
|
if self.options.expected_vm_status:
|
||||||
|
self.check_vm_status(idx, expected_state=self.options.expected_vm_status, only_status=only_status)
|
||||||
|
else:
|
||||||
|
self.check_vm_status(idx, only_status=only_status)
|
||||||
|
elif self.options.mode == 'replication':
|
||||||
|
self.check_replication()
|
||||||
|
elif self.options.mode == 'ceph-health':
|
||||||
|
self.check_ceph_health()
|
||||||
|
elif self.options.mode == 'zfs-health':
|
||||||
|
self.check_zfs_health(self.options.name)
|
||||||
|
elif self.options.mode == 'zfs-fragmentation':
|
||||||
|
self.check_zfs_fragmentation(self.options.name)
|
||||||
|
else:
|
||||||
|
message = "Check mode '{}' not known".format(self.options.mode)
|
||||||
|
self.output(CheckState.UNKNOWN, message)
|
||||||
|
|
||||||
|
self.check_output()
|
||||||
|
|
||||||
|
def parse_args(self):
|
||||||
|
p = argparse.ArgumentParser(description='Check command for PVE hosts via API')
|
||||||
|
|
||||||
|
api_opts = p.add_argument_group('API Options')
|
||||||
|
|
||||||
|
api_opts.add_argument("-e", "--api-endpoint", required=True, help="PVE api endpoint hostname")
|
||||||
|
api_opts.add_argument("--api-port", required=False, help="PVE api endpoint port")
|
||||||
|
|
||||||
|
api_opts.add_argument("-u", "--username", dest='api_user', required=True,
|
||||||
|
help="PVE api user (e.g. icinga2@pve or icinga2@pam, depending on which backend you "
|
||||||
|
"have chosen in proxmox)")
|
||||||
|
|
||||||
|
group = api_opts.add_mutually_exclusive_group(required=True)
|
||||||
|
group.add_argument("-p", "--password", dest='api_password', help="PVE API user password")
|
||||||
|
group.add_argument("-t", "--api-token", dest="api_token", help="PVE API token (format: TOKEN_ID=TOKEN_SECRET")
|
||||||
|
|
||||||
|
api_opts.add_argument("-k", "--insecure", dest='api_insecure', action='store_true', default=False,
|
||||||
|
help="Don't verify HTTPS certificate")
|
||||||
|
|
||||||
|
api_opts.set_defaults(api_port=8006)
|
||||||
|
|
||||||
|
check_opts = p.add_argument_group('Check Options')
|
||||||
|
|
||||||
|
check_opts.add_argument("-m", "--mode",
|
||||||
|
choices=(
|
||||||
|
'cluster', 'version', 'cpu', 'memory', 'swap', 'storage', 'io_wait', 'updates', 'services',
|
||||||
|
'subscription', 'vm', 'vm_status', 'replication', 'disk-health', 'ceph-health',
|
||||||
|
'zfs-health', 'zfs-fragmentation'),
|
||||||
|
required=True,
|
||||||
|
help="Mode to use.")
|
||||||
|
|
||||||
|
check_opts.add_argument('-n', '--node', dest='node',
|
||||||
|
help='Node to check (necessary for all modes except cluster and version)')
|
||||||
|
|
||||||
|
check_opts.add_argument('--name', dest='name',
|
||||||
|
help='Name of storage, vm, or container')
|
||||||
|
|
||||||
|
check_opts.add_argument('--vmid', dest='vmid', type=int,
|
||||||
|
help='ID of virtual machine or container')
|
||||||
|
|
||||||
|
check_opts.add_argument('--expected-vm-status', choices=('running', 'stopped', 'paused'),
|
||||||
|
help='Expected VM status')
|
||||||
|
|
||||||
|
check_opts.add_argument('--ignore-vm-status', dest='ignore_vm_status', action='store_true',
|
||||||
|
help='Ignore VM status in checks',
|
||||||
|
default=False)
|
||||||
|
|
||||||
|
check_opts.add_argument('--ignore-service', dest='ignore_services', action='append', metavar='NAME',
|
||||||
|
help='Ignore service NAME in checks', default=[])
|
||||||
|
|
||||||
|
check_opts.add_argument('--ignore-disk', dest='ignore_disks', action='append', metavar='NAME',
|
||||||
|
help='Ignore disk NAME in health check', default=[])
|
||||||
|
|
||||||
|
check_opts.add_argument('-w', '--warning', dest='threshold_warning', type=CheckThreshold.threshold_type,
|
||||||
|
default={}, help='Warning threshold for check value. Mutiple thresholds with name:value,name:value')
|
||||||
|
check_opts.add_argument('-c', '--critical', dest='threshold_critical', type=CheckThreshold.threshold_type,
|
||||||
|
default={}, help='Critical threshold for check value. Mutiple thresholds with name:value,name:value')
|
||||||
|
check_opts.add_argument('-M', dest='values_mb', action='store_true', default=False,
|
||||||
|
help='Values are shown in the unit which is set with --unit (if available). Thresholds are also treated in this unit')
|
||||||
|
check_opts.add_argument('-V', '--min-version', dest='min_version', type=str,
|
||||||
|
help='The minimal pve version to check for. Any version lower than this will return '
|
||||||
|
'CRITICAL.')
|
||||||
|
|
||||||
|
check_opts.add_argument('--unit', choices=self.UNIT_SCALE.keys(), default='MiB', help='Unit which is used for performance data and other values')
|
||||||
|
|
||||||
|
options = p.parse_args()
|
||||||
|
|
||||||
|
if not options.node and options.mode not in ['cluster', 'vm', 'vm_status', 'version', 'ceph-health']:
|
||||||
|
p.print_usage()
|
||||||
|
message = "{}: error: --mode {} requires node name (--node)".format(p.prog, options.mode)
|
||||||
|
self.output(CheckState.UNKNOWN, message)
|
||||||
|
|
||||||
|
if not options.vmid and not options.name and options.mode in ('vm', 'vm_status'):
|
||||||
|
p.print_usage()
|
||||||
|
message = "{}: error: --mode {} requires either vm name (--name) or id (--vmid)".format(p.prog,
|
||||||
|
options.mode)
|
||||||
|
self.output(CheckState.UNKNOWN, message)
|
||||||
|
|
||||||
|
if not options.name and options.mode == 'storage':
|
||||||
|
p.print_usage()
|
||||||
|
message = "{}: error: --mode {} requires storage name (--name)".format(p.prog, options.mode)
|
||||||
|
self.output(CheckState.UNKNOWN, message)
|
||||||
|
|
||||||
|
def compare_thresholds(threshold_warning, threshold_critical, comparator):
|
||||||
|
ok = True
|
||||||
|
keys = set(list(threshold_warning.keys()) + list(threshold_critical.keys()))
|
||||||
|
for key in keys:
|
||||||
|
if (key in threshold_warning and key in threshold_critical) or (None in threshold_warning and None in threshold_critical):
|
||||||
|
ok = ok and comparator(threshold_warning[key], threshold_critical[key])
|
||||||
|
elif key in threshold_warning and None in threshold_critical:
|
||||||
|
ok = ok and comparator(threshold_warning[key], threshold_critical[None])
|
||||||
|
elif key in threshold_critical and None in threshold_warning:
|
||||||
|
ok = ok and comparator(threshold_warning[None], threshold_critical[key])
|
||||||
|
|
||||||
|
return ok
|
||||||
|
|
||||||
|
if options.threshold_warning and options.threshold_critical:
|
||||||
|
if options.mode != 'subscription' and not compare_thresholds(options.threshold_warning, options.threshold_critical, lambda w,c: w<=c):
|
||||||
|
p.error("Critical value must be greater than warning value")
|
||||||
|
elif options.mode == 'subscription' and not compare_thresholds(options.threshold_warning, options.threshold_critical, lambda w,c: w>=c):
|
||||||
|
p.error("Critical value must be lower than warning value")
|
||||||
|
|
||||||
|
self.options = options
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.options = {}
|
||||||
|
self.ticket = None
|
||||||
|
self.perfdata = []
|
||||||
|
self.check_result = CheckState.UNKNOWN
|
||||||
|
self.check_message = ""
|
||||||
|
|
||||||
|
self.__headers = {}
|
||||||
|
self.__cookies = {}
|
||||||
|
|
||||||
|
self.parse_args()
|
||||||
|
|
||||||
|
if self.options.api_insecure:
|
||||||
|
# disable urllib3 warning about insecure requests
|
||||||
|
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
if self.options.api_password is not None:
|
||||||
|
self.__cookies['PVEAuthCookie'] = self.get_ticket()
|
||||||
|
elif self.options.api_token is not None:
|
||||||
|
self.__headers["Authorization"] = "PVEAPIToken={}!{}".format(self.options.api_user, self.options.api_token)
|
||||||
|
|
||||||
|
pve = CheckPVE()
|
||||||
|
pve.check()
|
|
@ -0,0 +1,973 @@
|
||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"builtIn": 1,
|
||||||
|
"datasource": "-- Grafana --",
|
||||||
|
"enable": true,
|
||||||
|
"hide": true,
|
||||||
|
"iconColor": "rgba(0, 211, 255, 1)",
|
||||||
|
"name": "Annotations & Alerts",
|
||||||
|
"type": "dashboard"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"gnetId": null,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"hideControls": false,
|
||||||
|
"id": 11,
|
||||||
|
"links": [],
|
||||||
|
"refresh": "30s",
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "icinga2",
|
||||||
|
"fill": 1,
|
||||||
|
"id": 1,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": true,
|
||||||
|
"avg": true,
|
||||||
|
"current": true,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": true,
|
||||||
|
"min": true,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false,
|
||||||
|
"values": true
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [
|
||||||
|
{
|
||||||
|
"alias": "CRITICAL",
|
||||||
|
"color": "#BF1B00",
|
||||||
|
"fill": 0,
|
||||||
|
"legend": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "WARNING",
|
||||||
|
"color": "#EAB839",
|
||||||
|
"fill": 0,
|
||||||
|
"legend": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "memory used",
|
||||||
|
"color": "#0A437C",
|
||||||
|
"yaxis": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "memory used",
|
||||||
|
"fill": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 4,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"alias": "$service usage",
|
||||||
|
"dsType": "influxdb",
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$__interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"metric"
|
||||||
|
],
|
||||||
|
"type": "tag"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"none"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"hide": false,
|
||||||
|
"measurement": "pve",
|
||||||
|
"orderByTime": "ASC",
|
||||||
|
"policy": "default",
|
||||||
|
"refId": "A",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"value"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "hostname",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$hostname$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "service",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$service$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "metric",
|
||||||
|
"operator": "=",
|
||||||
|
"value": "usage"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "WARNING",
|
||||||
|
"dsType": "influxdb",
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$__interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"metric"
|
||||||
|
],
|
||||||
|
"type": "tag"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"none"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"hide": false,
|
||||||
|
"measurement": "pve",
|
||||||
|
"orderByTime": "ASC",
|
||||||
|
"policy": "default",
|
||||||
|
"query": "SELECT mean(\"value\") FROM \"pve\" WHERE (\"hostname\" =~ /^$hostname$/ AND \"service\" =~ /^$service$/ AND \"metric\" = 'used') AND $timeFilter GROUP BY time($__interval) fill(none)",
|
||||||
|
"rawQuery": false,
|
||||||
|
"refId": "C",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"warn"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "hostname",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$hostname$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "service",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$service$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "metric",
|
||||||
|
"operator": "=",
|
||||||
|
"value": "usage"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "CRITICAL",
|
||||||
|
"dsType": "influxdb",
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$__interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"metric"
|
||||||
|
],
|
||||||
|
"type": "tag"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"none"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"hide": false,
|
||||||
|
"measurement": "pve",
|
||||||
|
"orderByTime": "ASC",
|
||||||
|
"policy": "default",
|
||||||
|
"query": "SELECT mean(\"value\") FROM \"pve\" WHERE (\"hostname\" =~ /^$hostname$/ AND \"service\" =~ /^$service$/ AND \"metric\" = 'used') AND $timeFilter GROUP BY time($__interval) fill(none)",
|
||||||
|
"rawQuery": false,
|
||||||
|
"refId": "B",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"crit"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "hostname",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$hostname$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "service",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$service$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "metric",
|
||||||
|
"operator": "=",
|
||||||
|
"value": "usage"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "$service usage",
|
||||||
|
"tooltip": {
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "percent",
|
||||||
|
"label": "% usage",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": "0",
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"label": "used MB",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": "0",
|
||||||
|
"show": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "icinga2",
|
||||||
|
"fill": 1,
|
||||||
|
"id": 2,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": true,
|
||||||
|
"avg": true,
|
||||||
|
"current": true,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": true,
|
||||||
|
"min": true,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false,
|
||||||
|
"values": true
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [
|
||||||
|
{
|
||||||
|
"alias": "CRITICAL",
|
||||||
|
"color": "#BF1B00",
|
||||||
|
"fill": 0,
|
||||||
|
"legend": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "WARNING",
|
||||||
|
"color": "#EAB839",
|
||||||
|
"fill": 0,
|
||||||
|
"legend": false
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 4,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"alias": "$service used",
|
||||||
|
"dsType": "influxdb",
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$__interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"metric"
|
||||||
|
],
|
||||||
|
"type": "tag"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"none"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"hide": false,
|
||||||
|
"measurement": "pve",
|
||||||
|
"orderByTime": "ASC",
|
||||||
|
"policy": "default",
|
||||||
|
"refId": "A",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"value"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "hostname",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$hostname$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "service",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$service$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "metric",
|
||||||
|
"operator": "=",
|
||||||
|
"value": "used"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "WARNING",
|
||||||
|
"dsType": "influxdb",
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$__interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"metric"
|
||||||
|
],
|
||||||
|
"type": "tag"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"none"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"hide": false,
|
||||||
|
"measurement": "pve",
|
||||||
|
"orderByTime": "ASC",
|
||||||
|
"policy": "default",
|
||||||
|
"query": "SELECT mean(\"value\") FROM \"pve\" WHERE (\"hostname\" =~ /^$hostname$/ AND \"service\" =~ /^$service$/ AND \"metric\" = 'used') AND $timeFilter GROUP BY time($__interval) fill(none)",
|
||||||
|
"rawQuery": false,
|
||||||
|
"refId": "C",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"warn"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "hostname",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$hostname$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "service",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$service$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "metric",
|
||||||
|
"operator": "=",
|
||||||
|
"value": "used"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "CRITICAL",
|
||||||
|
"dsType": "influxdb",
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$__interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"metric"
|
||||||
|
],
|
||||||
|
"type": "tag"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"none"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"hide": false,
|
||||||
|
"measurement": "pve",
|
||||||
|
"orderByTime": "ASC",
|
||||||
|
"policy": "default",
|
||||||
|
"query": "SELECT mean(\"value\") FROM \"pve\" WHERE (\"hostname\" =~ /^$hostname$/ AND \"service\" =~ /^$service$/ AND \"metric\" = 'used') AND $timeFilter GROUP BY time($__interval) fill(none)",
|
||||||
|
"rawQuery": false,
|
||||||
|
"refId": "B",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"crit"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "hostname",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$hostname$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "service",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$service$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "metric",
|
||||||
|
"operator": "=",
|
||||||
|
"value": "used"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "$service used",
|
||||||
|
"tooltip": {
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"label": "used",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": "0",
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"label": "used MB",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": "0",
|
||||||
|
"show": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "icinga2",
|
||||||
|
"fill": 1,
|
||||||
|
"id": 3,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": true,
|
||||||
|
"avg": true,
|
||||||
|
"current": true,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": true,
|
||||||
|
"min": true,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false,
|
||||||
|
"values": true
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [
|
||||||
|
{
|
||||||
|
"alias": "CRITICAL",
|
||||||
|
"color": "#BF1B00",
|
||||||
|
"fill": 0,
|
||||||
|
"legend": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "WARNING",
|
||||||
|
"color": "#EAB839",
|
||||||
|
"fill": 0,
|
||||||
|
"legend": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "memory used",
|
||||||
|
"color": "#0A437C",
|
||||||
|
"yaxis": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "memory used",
|
||||||
|
"fill": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 4,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"alias": "I/O wait",
|
||||||
|
"dsType": "influxdb",
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$__interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"metric"
|
||||||
|
],
|
||||||
|
"type": "tag"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"none"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"hide": false,
|
||||||
|
"measurement": "pve",
|
||||||
|
"orderByTime": "ASC",
|
||||||
|
"policy": "default",
|
||||||
|
"refId": "A",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"value"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "hostname",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$hostname$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "service",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$service$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "metric",
|
||||||
|
"operator": "=",
|
||||||
|
"value": "wait"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "WARNING",
|
||||||
|
"dsType": "influxdb",
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$__interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"metric"
|
||||||
|
],
|
||||||
|
"type": "tag"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"none"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"hide": false,
|
||||||
|
"measurement": "pve",
|
||||||
|
"orderByTime": "ASC",
|
||||||
|
"policy": "default",
|
||||||
|
"query": "SELECT mean(\"value\") FROM \"pve\" WHERE (\"hostname\" =~ /^$hostname$/ AND \"service\" =~ /^$service$/ AND \"metric\" = 'used') AND $timeFilter GROUP BY time($__interval) fill(none)",
|
||||||
|
"rawQuery": false,
|
||||||
|
"refId": "C",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"warn"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "hostname",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$hostname$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "service",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$service$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "metric",
|
||||||
|
"operator": "=",
|
||||||
|
"value": "wait"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "CRITICAL",
|
||||||
|
"dsType": "influxdb",
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$__interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"metric"
|
||||||
|
],
|
||||||
|
"type": "tag"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"none"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"hide": false,
|
||||||
|
"measurement": "pve",
|
||||||
|
"orderByTime": "ASC",
|
||||||
|
"policy": "default",
|
||||||
|
"query": "SELECT mean(\"value\") FROM \"pve\" WHERE (\"hostname\" =~ /^$hostname$/ AND \"service\" =~ /^$service$/ AND \"metric\" = 'used') AND $timeFilter GROUP BY time($__interval) fill(none)",
|
||||||
|
"rawQuery": false,
|
||||||
|
"refId": "B",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"crit"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "hostname",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$hostname$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "service",
|
||||||
|
"operator": "=~",
|
||||||
|
"value": "/^$service$/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"condition": "AND",
|
||||||
|
"key": "metric",
|
||||||
|
"operator": "=",
|
||||||
|
"value": "wait"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "I/O wait",
|
||||||
|
"tooltip": {
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "percent",
|
||||||
|
"label": "% usage",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": "0",
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"label": "used MB",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": "0",
|
||||||
|
"show": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"repeat": null,
|
||||||
|
"repeatIteration": null,
|
||||||
|
"repeatRowId": null,
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "icmp checks",
|
||||||
|
"titleSize": "h6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 14,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"allValue": null,
|
||||||
|
"current": {
|
||||||
|
"text": "pve01.willi-graf.local",
|
||||||
|
"value": "pve01.willi-graf.local"
|
||||||
|
},
|
||||||
|
"datasource": "icinga2",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"label": null,
|
||||||
|
"multi": false,
|
||||||
|
"name": "hostname",
|
||||||
|
"options": [],
|
||||||
|
"query": "SHOW TAG VALUES WITH KEY = \"hostname\"",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"sort": 1,
|
||||||
|
"tagValuesQuery": "",
|
||||||
|
"tags": [],
|
||||||
|
"tagsQuery": "",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"allValue": null,
|
||||||
|
"current": {
|
||||||
|
"text": "io_wait",
|
||||||
|
"value": "io_wait"
|
||||||
|
},
|
||||||
|
"datasource": "icinga2",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"label": null,
|
||||||
|
"multi": false,
|
||||||
|
"name": "service",
|
||||||
|
"options": [],
|
||||||
|
"query": "SHOW TAG VALUES WITH KEY = \"service\" where hostname =~ /^$hostname$/",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"sort": 1,
|
||||||
|
"tagValuesQuery": "",
|
||||||
|
"tags": [],
|
||||||
|
"tagsQuery": "",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-2m",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {
|
||||||
|
"refresh_intervals": [
|
||||||
|
"5s",
|
||||||
|
"10s",
|
||||||
|
"30s",
|
||||||
|
"1m",
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"30m",
|
||||||
|
"1h",
|
||||||
|
"2h",
|
||||||
|
"1d"
|
||||||
|
],
|
||||||
|
"time_options": [
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"1h",
|
||||||
|
"6h",
|
||||||
|
"12h",
|
||||||
|
"24h",
|
||||||
|
"2d",
|
||||||
|
"7d",
|
||||||
|
"30d"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "icinga-pve-metrics",
|
||||||
|
"version": 23
|
||||||
|
}
|
|
@ -0,0 +1,74 @@
|
||||||
|
object CheckCommand "pve" {
|
||||||
|
import "plugin-check-command"
|
||||||
|
|
||||||
|
command = [ PluginDir + "/check_pve.py" ]
|
||||||
|
|
||||||
|
arguments = {
|
||||||
|
"-e" = {
|
||||||
|
value = "$pve_host$"
|
||||||
|
required = true
|
||||||
|
description = "Hostname for PVE API"
|
||||||
|
}
|
||||||
|
"-u" = {
|
||||||
|
value = "$pve_user$"
|
||||||
|
required = true
|
||||||
|
description = "API user (ex. monitoring@pve)"
|
||||||
|
}
|
||||||
|
"-p" = {
|
||||||
|
value = "$pve_password$"
|
||||||
|
required = true
|
||||||
|
description = "API user password"
|
||||||
|
}
|
||||||
|
"-k" = {
|
||||||
|
set_if = "$pve_insecure_connection$"
|
||||||
|
description = "Connect to this host instead of $pve_host$"
|
||||||
|
}
|
||||||
|
"-m" = {
|
||||||
|
value = "$pve_mode$"
|
||||||
|
required = true
|
||||||
|
description = "Check mode (cluster, version, updates, subscription, storage, cpu, memory, io_wait, vm, replication)"
|
||||||
|
}
|
||||||
|
"-n" = {
|
||||||
|
value = "$pve_node$"
|
||||||
|
description = "Node to check (necessary for all modes except cluster and version)"
|
||||||
|
}
|
||||||
|
"--name" = {
|
||||||
|
value = "$pve_resource_name$"
|
||||||
|
description = "Name of storage or vm to check"
|
||||||
|
}
|
||||||
|
"--expected-vm-status" = {
|
||||||
|
value = "$pve_expected_vm_status$"
|
||||||
|
description = "Expected status of the VM"
|
||||||
|
}
|
||||||
|
"--ignore-service" = {
|
||||||
|
repeat_key = true
|
||||||
|
value = "$pve_ignore_services$"
|
||||||
|
description = "Ignore services in check"
|
||||||
|
}
|
||||||
|
"--ignore-disk" = {
|
||||||
|
repeat_key = true
|
||||||
|
value = "$pve_ignore_disks$"
|
||||||
|
description = "Ignore disks in check"
|
||||||
|
}
|
||||||
|
"--ignore-vm-status" = {
|
||||||
|
set_if = "$pve_ignore_vm_status$"
|
||||||
|
description = "Ignore VM status in check"
|
||||||
|
}
|
||||||
|
"-w" = {
|
||||||
|
value = "$pve_warning$"
|
||||||
|
description = "Warning treshold"
|
||||||
|
}
|
||||||
|
"-c" = {
|
||||||
|
value = "$pve_critical$"
|
||||||
|
description = "Critical treshold"
|
||||||
|
}
|
||||||
|
"-M" = {
|
||||||
|
set_if = "$pve_tresholds_mb$"
|
||||||
|
description = "Unit of tresholds and values is MB"
|
||||||
|
}
|
||||||
|
"-V" = {
|
||||||
|
value = "$pve_min_version$"
|
||||||
|
description = "Minimal pve version. Everything lower than this will return CRITICAL."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,139 @@
|
||||||
|
template Host "proxmox-host" {
|
||||||
|
import "generic-host"
|
||||||
|
|
||||||
|
vars.pve_host = name
|
||||||
|
vars.pve_node = name.split(".")[0]
|
||||||
|
// ... or if not matching the fqdn (nodename.domain.example)
|
||||||
|
// vars.pve_node = "proxmox-host"
|
||||||
|
|
||||||
|
// if your icinga host don't trust your pve certificate, you'll have to uncomment this line
|
||||||
|
// vars.pve_insecure_connection = true
|
||||||
|
vars.pve_user = "monitor@pve"
|
||||||
|
vars.pve_password = "SuperSecretPassw0rd"
|
||||||
|
|
||||||
|
// change to false, if node is no member of a pve cluster
|
||||||
|
vars.pve_cluster = true
|
||||||
|
}
|
||||||
|
|
||||||
|
object Host "proxmox-host.domain.example" {
|
||||||
|
import "proxmox-host"
|
||||||
|
|
||||||
|
address = "192.168.42.42"
|
||||||
|
|
||||||
|
vars.pve_storage["flashpool"] = {
|
||||||
|
pve_warning = 80
|
||||||
|
pve_critical = 90
|
||||||
|
}
|
||||||
|
|
||||||
|
vars.pve_storage["diskpool"] = {
|
||||||
|
pve_warning = 80
|
||||||
|
pve_critical = 90
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ignore these disks in health check (USB sticks, SD cards, etc.)
|
||||||
|
vars.pve_ignore_disks = [ "sdn", "sdg" ]
|
||||||
|
|
||||||
|
vars.virtual_machines["vm-01"] = {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template Service "pve-service" {
|
||||||
|
import "generic-service"
|
||||||
|
|
||||||
|
check_command = "pve"
|
||||||
|
}
|
||||||
|
|
||||||
|
apply Service "cluster" {
|
||||||
|
import "pve-service"
|
||||||
|
|
||||||
|
vars.pve_mode = "cluster"
|
||||||
|
|
||||||
|
assign where host.vars.pve_host && host.vars.pve_cluster
|
||||||
|
}
|
||||||
|
|
||||||
|
apply Service "services" {
|
||||||
|
import "pve-service"
|
||||||
|
|
||||||
|
vars.pve_mode = "services"
|
||||||
|
|
||||||
|
// Ignore cluster status on single nodes
|
||||||
|
if (!host.vars.pve_cluster) {
|
||||||
|
vars.pve_ignore_services = host.vars.pve_ignore_services || []
|
||||||
|
vars.pve_ignore_services.add("corosync")
|
||||||
|
}
|
||||||
|
|
||||||
|
assign where host.vars.pve_host
|
||||||
|
}
|
||||||
|
|
||||||
|
apply Service "updates" {
|
||||||
|
import "pve-service"
|
||||||
|
|
||||||
|
check_interval = 12h
|
||||||
|
retry_interval = 2h
|
||||||
|
max_check_attempts = 3
|
||||||
|
|
||||||
|
vars.pve_mode = "updates"
|
||||||
|
|
||||||
|
assign where host.vars.pve_host
|
||||||
|
}
|
||||||
|
|
||||||
|
apply Service "disk-health" {
|
||||||
|
import "pve-service"
|
||||||
|
|
||||||
|
vars.pve_mode = "disk-health"
|
||||||
|
|
||||||
|
assign where host.vars.pve_host
|
||||||
|
}
|
||||||
|
|
||||||
|
apply Service "io_wait" {
|
||||||
|
import "pve-service"
|
||||||
|
|
||||||
|
vars.pve_mode = "io_wait"
|
||||||
|
|
||||||
|
vars.pve_warning = 10
|
||||||
|
vars.pve_critical = 30
|
||||||
|
|
||||||
|
assign where host.vars.pve_host
|
||||||
|
}
|
||||||
|
|
||||||
|
apply Service "cpu" {
|
||||||
|
import "pve-service"
|
||||||
|
|
||||||
|
vars.pve_mode = "cpu"
|
||||||
|
|
||||||
|
vars.pve_warning = 70
|
||||||
|
vars.pve_critical = 90
|
||||||
|
|
||||||
|
assign where host.vars.pve_host
|
||||||
|
}
|
||||||
|
|
||||||
|
apply Service "memory" {
|
||||||
|
import "pve-service"
|
||||||
|
|
||||||
|
vars.pve_mode = "memory"
|
||||||
|
|
||||||
|
vars.pve_warning = 80
|
||||||
|
vars.pve_critical = 90
|
||||||
|
|
||||||
|
assign where host.vars.pve_host
|
||||||
|
}
|
||||||
|
|
||||||
|
apply Service "storage " for (storage => config in host.vars.pve_storage) {
|
||||||
|
import "pve-service"
|
||||||
|
|
||||||
|
vars += config
|
||||||
|
|
||||||
|
vars.pve_mode = "storage"
|
||||||
|
vars.pve_resource_name = storage
|
||||||
|
}
|
||||||
|
|
||||||
|
apply Service "pve-vm " for (vm => config in host.vars.virtual_machines) {
|
||||||
|
import "pve-service"
|
||||||
|
|
||||||
|
vars += config
|
||||||
|
|
||||||
|
vars.pve_mode = "vm"
|
||||||
|
vars.pve_resource_name = vm
|
||||||
|
|
||||||
|
assign where host.vars.pve_host
|
||||||
|
}
|
|
@ -0,0 +1,3 @@
|
||||||
|
requests
|
||||||
|
argparse
|
||||||
|
packaging
|
|
@ -9,7 +9,7 @@ import aiofiles.os
|
||||||
import magic
|
import magic
|
||||||
import markdown
|
import markdown
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from nio import AsyncClient, LoginResponse, RoomSendError, UploadResponse, MatrixRoom, RoomLeaveResponse, RoomForgetResponse
|
from nio import AsyncClient, LoginResponse, MatrixRoom, RoomForgetResponse, RoomLeaveResponse, RoomSendError, UploadResponse
|
||||||
|
|
||||||
from . import nagios
|
from . import nagios
|
||||||
|
|
||||||
|
@ -166,7 +166,6 @@ async def leave_all_rooms_async(client, exclude_starting_with=None):
|
||||||
await client.sync()
|
await client.sync()
|
||||||
invited_rooms = copy.copy(client.invited_rooms) # RuntimeError: dictionary changed size during iteration
|
invited_rooms = copy.copy(client.invited_rooms) # RuntimeError: dictionary changed size during iteration
|
||||||
for name, room in invited_rooms.items():
|
for name, room in invited_rooms.items():
|
||||||
print(room.room_id)
|
|
||||||
# if exclude_starting_with and room.named_room_name() is not None and room.named_room_name().startswith(exclude_starting_with):
|
# if exclude_starting_with and room.named_room_name() is not None and room.named_room_name().startswith(exclude_starting_with):
|
||||||
# continue
|
# continue
|
||||||
s, l, f = await leave_room_async(room.room_id, client)
|
s, l, f = await leave_room_async(room.room_id, client)
|
||||||
|
|
Loading…
Reference in New Issue