icinga2-checks/check_pve/check_pve.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# ------------------------------------------------------------------------------
# check_pve.py - A check plugin for Proxmox Virtual Environment (PVE).
# Copyright (C) 2018-2022  Nicolai Buchwitz <nb@tipi-net.de>
#
# Version: 1.2.2
#
# ------------------------------------------------------------------------------
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
# ------------------------------------------------------------------------------

import sys
import re

try:
    from enum import Enum
    from datetime import datetime
    from packaging import version
    import argparse
    import requests

except ImportError as e:
    print("Missing python module: {}".format(str(e)))
    sys.exit(255)


class CheckState(Enum):
    OK = 0
    WARNING = 1
    CRITICAL = 2
    UNKNOWN = 3


class CheckThreshold:
    def __init__(self, value: float):
        self.value = value

    def __eq__(self, other):
        return self.value == other.value

    def __lt__(self, other):
        return self.value < other.value

    def __le__(self, other):
        return self.value <= other.value

    def __gt__(self, other):
        return self.value > other.value

    def __ge__(self, other):
        return self.value >= other.value

    def check(self, value: float, lower: bool = False):
        if lower:
            return value < self.value
        else:
            return value > self.value

    @staticmethod
    def threshold_type(arg: str):
        thresholds = {}

        try:
            thresholds[None] = CheckThreshold(float(arg))
        except:
            for t in arg.split(','):
                m = re.match("([a-z_0-9]+):([0-9.]+)", t)

                if m:
                    thresholds[m.group(1)] = CheckThreshold(float(m.group(2)))
                else:
                    raise argparse.ArgumentTypeError(
                        "invalid threshold format: {}".format(t))

        return thresholds


class CheckPVE:
    VERSION = '1.2.2'
    API_URL = 'https://{hostname}:{port}/api2/json/{command}'
    UNIT_SCALE = {
            "GB": 10**9,
            "MB": 10**6,
            "KB": 10**3,
            "GiB": 2**30,
            "MiB": 2**20,
            "KiB": 2**10,
            "B": 1
        }

    def check_output(self):
        message = self.check_message
        if self.perfdata:
            message += self.get_perfdata()

        self.output(self.check_result, message)

    @staticmethod
    def output(rc, message):
        prefix = rc.name
        message = '{} - {}'.format(prefix, message)

        print(message)
        sys.exit(rc.value)

    def get_url(self, command):
        return self.API_URL.format(hostname=self.options.api_endpoint, command=command, port=self.options.api_port)

    def request(self, url, method='get', **kwargs):
        response = None
        try:
            if method == 'post':
                response = requests.post(
                    url,
                    verify=not self.options.api_insecure,
                    data=kwargs.get('data', None),
                    timeout=5
                )
            elif method == 'get':
                response = requests.get(
                    url,
                    verify=not self.options.api_insecure,
                    cookies=self.__cookies,
                    headers=self.__headers,
                    params=kwargs.get('params', None),
                )
            else:
                self.output(CheckState.CRITICAL, "Unsupport request method: {}".format(method))
        except requests.exceptions.ConnectTimeout:
            self.output(CheckState.UNKNOWN, "Could not connect to PVE API: Connection timeout")
        except requests.exceptions.SSLError:
            self.output(CheckState.UNKNOWN, "Could not connect to PVE API: Certificate validation failed")
        except requests.exceptions.ConnectionError:
            self.output(CheckState.UNKNOWN, "Could not connect to PVE API: Failed to resolve hostname")

        if response.ok:
            return response.json()['data']
        else:
            message = "Could not fetch data from API: "

            if response.status_code == 401:
                message += "Could not connection to PVE API: invalid username or password"
            elif response.status_code == 403:
                message += "Access denied. Please check if API user has sufficient permissions / the role has been " \
                           "assigned."
            else:
                message += "HTTP error code was {}".format(response.status_code)

            self.output(CheckState.UNKNOWN, message)

    def get_ticket(self):
        url = self.get_url('access/ticket')
        data = {"username": self.options.api_user, "password": self.options.api_password}
        result = self.request(url, "post", data=data)

        return result['ticket']

    def check_api_value(self, url, message, **kwargs):
        result = self.request(url)
        used = None

        if 'key' in kwargs:
            result = result[kwargs.get('key')]

        if isinstance(result, (dict,)):
            used_percent = self.get_value(result['used'], result['total'])
            used = self.get_value(result['used'])
            total = self.get_value(result['total'])

            self.add_perfdata(kwargs.get('perfkey', 'usage'), used_percent)
            self.add_perfdata(kwargs.get('perfkey', 'used'), used, max=total, unit=self.options.unit)
        else:
            used_percent = round(float(result) * 100, 2)
            self.add_perfdata(kwargs.get('perfkey', 'usage'), used_percent)

        if self.options.values_mb:
            message += ' {} {}'.format(used, self.options.unit)
            value = used
        else:
            message += ' {} {}'.format(used_percent, '%')
            value = used_percent

        self.check_thresholds(value, message)

    def check_vm_status(self, idx, **kwargs):
        url = self.get_url('cluster/resources', )
        data = self.request(url, params={'type': 'vm'})

        expected_state = kwargs.get("expected_state", "running")
        only_status = kwargs.get("only_status", False)

        found = False
        for vm in data:
            if vm['name'] == idx or vm['vmid'] == idx:
                # Check if VM (default) or LXC
                vm_type = "VM"
                if vm['type'] == 'lxc':
                    vm_type = "LXC"

                if vm['status'] != expected_state:
                    self.check_message = "{} '{}' is {} (expected: {})".format(vm_type, vm['name'], vm['status'],
                                                                               expected_state)
                    if not self.options.ignore_vm_status:
                        self.check_result = CheckState.CRITICAL
                else:
                    if self.options.node and self.options.node != vm['node']:
                        self.check_message = "{} '{}' is {}, but located on node '{}' instead of '{}'" \
                            .format(vm_type, vm['name'], expected_state, vm['node'], self.options.node)
                        self.check_result = CheckState.WARNING
                    else:
                        self.check_message = "{} '{}' is {} on node '{}'" \
                            .format(vm_type, vm['name'], expected_state, vm['node'])

                if vm['status'] == 'running' and not only_status:
                    cpu = round(vm['cpu'] * 100, 2)
                    self.add_perfdata("cpu", cpu)

                    if self.options.values_mb:
                        memory = self.scale_value(vm['mem'])
                        self.add_perfdata("memory", memory, unit=self.options.unit, max=self.scale_value(vm['maxmem']))

                    else:
                        memory = self.get_value(vm['mem'], vm['maxmem'])
                        self.add_perfdata("memory", memory)

                    self.check_thresholds({"cpu": cpu, "memory": memory}, message=self.check_message)

                found = True
                break

        if not found:
            self.check_message = "VM or LXC '{}' not found".format(idx)
            self.check_result = CheckState.WARNING

    def check_disks(self):
        url = self.get_url('nodes/{}/disks'.format(self.options.node))

        failed = []
        unknown = []
        disks = self.request(url + '/list')
        for disk in disks:
            name = disk['devpath'].replace('/dev/', '')

            if name in self.options.ignore_disks:
                continue

            if disk['health'] == 'UNKNOWN':
                self.check_result = CheckState.WARNING
                unknown.append({"serial": disk["serial"], "device": disk['devpath']})

            elif disk['health'] not in ('PASSED', 'OK'):
                self.check_result = CheckState.WARNING
                failed.append({"serial": disk["serial"], "device": disk['devpath']})

            if disk['wearout'] != 'N/A':
                self.add_perfdata('wearout_{}'.format(name), disk['wearout'])

        if failed:
            self.check_message = "{} of {} disks failed the health test:\n".format(len(failed), len(disks))
            for disk in failed:
                self.check_message += "- {} with serial '{}'\n".format(disk['device'], disk['serial'])

        if unknown:
            self.check_message += "{} of {} disks have unknown health status:\n".format(len(unknown), len(disks))
            for disk in unknown:
                self.check_message += "- {} with serial '{}'\n".format(disk['device'], disk['serial'])

        if not failed and not unknown:
            self.check_message = "All disks are healthy"

    def check_replication(self):
        url = self.get_url('nodes/{}/replication'.format(self.options.node))

        if self.options.vmid:
            data = self.request(url, params={'guest': self.options.vmid})
        else:
            data = self.request(url)

        failed_jobs = []  # format: [{guest: str, fail_count: int, error: str}]
        performance_data = []

        for job in data:
            if job['fail_count'] > 0:
                failed_jobs.append({'guest': job['guest'], 'fail_count': job['fail_count'], 'error': job['error']})
            else:
                performance_data.append({'id': job['id'], 'duration': job['duration']})

        if len(failed_jobs) > 0:
            message = "Failed replication jobs on {}: ".format(self.options.node)
            for job in failed_jobs:
                message = message + "GUEST: {j[guest]}, FAIL_COUNT: {j[fail_count]}, ERROR: {j[error]} ; ".format(j=job)
            self.check_message = message
            self.check_result = CheckState.WARNING
        else:
            self.check_message = "No failed replication jobs on {}".format(self.options.node)
            self.check_result = CheckState.OK

        if len(performance_data) > 0:
            for metric in performance_data:
                self.add_perfdata('duration_' + metric['id'], metric['duration'], unit='s')

    def check_services(self):
        url = self.get_url('nodes/{}/services'.format(self.options.node))
        data = self.request(url)

        failed = {}
        for service in data:
            if service['state'] != 'running' \
                    and service.get('active-state', 'active') == 'active' \
                    and service['name'] not in self.options.ignore_services:
                failed[service['name']] = service['desc']

        if failed:
            self.check_result = CheckState.CRITICAL
            message = "{} services are not running:\n\n".format(len(failed))
            message += "\n".join(['- {} ({}) is not running'.format(failed[i], i) for i in failed])
            self.check_message = message
        else:
            self.check_message = "All services are running"

    def check_subscription(self):
        url = self.get_url('nodes/{}/subscription'.format(self.options.node))
        data = self.request(url)

        if data['status'] == 'NotFound':
            self.check_result = CheckState.WARNING
            self.check_message = "No valid subscription found"
        if data['status'] == 'Inactive':
            self.check_result = CheckState.CRITICAL
            self.check_message = "Subscription expired"
        elif data['status'] == 'Active':
            subscription_due_date = data['nextduedate']
            subscription_product_name = data['productname']

            date_expire = datetime.strptime(subscription_due_date, '%Y-%m-%d')
            date_today = datetime.today()
            delta = (date_expire - date_today).days

            message = '{} is valid until {}'.format(
                subscription_product_name,
                subscription_due_date)
            message_warning_critical = '{} will expire in {} days ({})'.format(
                subscription_product_name,
                delta,
                subscription_due_date)

            self.check_thresholds(delta, message, messageWarning=message_warning_critical,
                                  messageCritical=message_warning_critical, lowerValue=True)

    def check_updates(self):
        url = self.get_url('nodes/{}/apt/update'.format(self.options.node))
        count = len(self.request(url))

        if count:
            self.check_result = CheckState.WARNING
            msg = "{} pending update"
            if count > 1:
                msg += "s"
            self.check_message = msg.format(count)
        else:
            self.check_message = "System up to date"

    def check_cluster_status(self):
        url = self.get_url('cluster/status')
        data = self.request(url)

        nodes = {}
        quorate = None
        cluster = ''
        for elem in data:
            if elem['type'] == 'cluster':
                quorate = elem['quorate']
                cluster = elem['name']
            elif elem['type'] == 'node':
                nodes[elem['name']] = elem['online']

        if quorate is None:
            self.check_message = 'No cluster configuration found'
        elif quorate:
            node_count = len(nodes)
            nodes_online_count = len({k: v for k, v in nodes.items() if v})

            if node_count > nodes_online_count:
                diff = node_count - nodes_online_count
                self.check_result = CheckState.WARNING
                self.check_message = "Cluster '{}' is healthy, but {} node(s) offline'".format(cluster, diff)
            else:
                self.check_message = "Cluster '{}' is healthy'".format(cluster)

            self.add_perfdata('nodes_total', node_count, unit='')
            self.add_perfdata('nodes_online', nodes_online_count, unit='')
        else:
            self.check_result = CheckState.CRITICAL
            self.check_message = 'Cluster is unhealthy - no quorum'

    def check_zfs_fragmentation(self, name=None):
        url = self.get_url('nodes/{}/disks/zfs'.format(self.options.node))
        data = self.request(url)

        warnings = []
        critical = []
        found = name is None
        for pool in data:
            found = found or name == pool['name']
            if (name is not None and name == pool['name']) or name is None:
                key = "fragmentation"
                if name is None:
                    key += '_{}'.format(pool['name'])
                self.add_perfdata(key, pool['frag'])

                threshold_name = "fragmentation_{}".format(pool['name'])
                threshold_warning = self.threshold_warning(threshold_name)
                threshold_critical = self.threshold_critical(threshold_name)

                if threshold_critical is not None and pool['frag'] > float(
                        threshold_critical.value):
                    critical.append(pool)
                elif threshold_warning is not None and pool['frag'] > float(
                        threshold_warning.value):
                    warnings.append(pool)

        if not found:
            self.check_result = CheckState.UNKNOWN
            self.check_message = "Could not fetch fragmentation of ZFS pool '{}'".format(name)
        else:
            if warnings or critical:
                value = None
                if critical:
                    self.check_result = CheckState.CRITICAL
                    if name is not None:
                        value = critical[0]['frag']
                else:
                    self.check_result = CheckState.WARNING
                    if name is not None:
                        value = warnings[0]['frag']

                if name is not None:
                    self.check_message = "Fragmentation of ZFS pool '{}' is above thresholds: {} %".format(name, value)
                else:
                    message = "{} of {} ZFS pools are above fragmentation thresholds:\n\n".format(
                        len(warnings) + len(critical), len(data))
                    message += "\n".join(
                        ['- {} ({} %) is CRITICAL\n'.format(pool['name'], pool['frag']) for pool in critical])
                    message += "\n".join(
                        ['- {} ({} %) is WARNING\n'.format(pool['name'], pool['frag']) for pool in warnings])
                    self.check_message = message
            else:
                self.check_result = CheckState.OK
                if name is not None:
                    self.check_message = "Fragmentation of ZFS pool '{}' is OK".format(name)
                else:
                    self.check_message = "Fragmentation of all ZFS pools is OK"

    def check_zfs_health(self, name=None):
        url = self.get_url('nodes/{}/disks/zfs'.format(self.options.node))
        data = self.request(url)

        unhealthy = []
        found = name is None
        healthy_conditions = ['online']
        for pool in data:
            found = found or name == pool['name']
            if (name is not None and name == pool['name']) or name is None:
                if pool['health'].lower() not in healthy_conditions:
                    unhealthy.append(pool)

        if not found:
            self.check_result = CheckState.UNKNOWN
            self.check_message = "Could not fetch health of ZFS pool '{}'".format(name)
        else:
            if unhealthy:
                self.check_result = CheckState.CRITICAL
                message = "{} ZFS pools are not healthy:\n\n".format(len(unhealthy))
                message += "\n".join(
                    ['- {} ({}) is not healthy'.format(pool['name'], pool['health']) for pool in unhealthy])
                self.check_message = message
            else:
                self.check_result = CheckState.OK
                if name is not None:
                    self.check_message = "ZFS pool '{}' is healthy".format(name)
                else:
                    self.check_message = "All ZFS pools are healthy"

    def check_ceph_health(self):
        url = self.get_url('cluster/ceph/status')
        data = self.request(url)
        ceph_health = data.get('health', {})

        if 'status' not in ceph_health:
            self.check_result = CheckState.UNKNOWN
            self.check_message = "Could not fetch Ceph status from API. " \
                                 "Check the output of 'pvesh get cluster/ceph' on your node"
            return

        if ceph_health['status'] == 'HEALTH_OK':
            self.check_result = CheckState.OK
            self.check_message = "Ceph Cluster is healthy"
        elif ceph_health['status'] == 'HEALTH_WARN':
            self.check_result = CheckState.WARNING
            self.check_message = "Ceph Cluster is in warning state"
        elif ceph_health['status'] == 'HEALTH_CRIT':
            self.check_result = CheckState.CRITICAL
            self.check_message = "Ceph Cluster is in critical state"
        else:
            self.check_result = CheckState.UNKNOWN
            self.check_message = "Ceph Cluster is in unknown state"

    def check_storage(self, name):
        # check if storage exists
        url = self.get_url('nodes/{}/storage'.format(self.options.node))
        data = self.request(url)

        if not any(s['storage'] == name for s in data):
            self.check_result = CheckState.CRITICAL
            self.check_message = "Storage '{}' doesn't exist on node '{}'".format(name, self.options.node)
            return

        url = self.get_url('nodes/{}/storage/{}/status'.format(self.options.node, name))
        self.check_api_value(url, "Usage of storage '{}' is".format(name))

    def check_version(self):
        url = self.get_url('version')
        data = self.request(url)
        if not data['version']:
            self.check_result = CheckState.UNKNOWN
            self.check_message = "Unable to determine pve version"
        elif self.options.min_version and version.parse(self.options.min_version) > version.parse(data['version']):
            self.check_result = CheckState.CRITICAL
            self.check_message = "Current pve version '{}' ({}) is lower than the min. required version '{}'".format(
                data['version'], data['repoid'], self.options.min_version)
        else:
            self.check_message = "Your pve instance version '{}' ({}) is up to date".format(data['version'],
                                                                                            data['repoid'])

    def check_memory(self):
        url = self.get_url('nodes/{}/status'.format(self.options.node))
        self.check_api_value(url, 'Memory usage is', key='memory')

    def check_swap(self):
        url = self.get_url('nodes/{}/status'.format(self.options.node))
        self.check_api_value(url, 'Swap usage is', key='swap')

    def check_cpu(self):
        url = self.get_url('nodes/{}/status'.format(self.options.node))
        self.check_api_value(url, 'CPU usage is', key='cpu')

    def check_io_wait(self):
        url = self.get_url('nodes/{}/status'.format(self.options.node))
        self.check_api_value(url, 'IO wait is', key='wait', perfkey='wait')

    def check_thresholds(self, value, message, **kwargs):
        is_warning = False
        is_critical = False

        if not isinstance(value, dict):
            value = { None: value }

        for metric, value in value.items():
            value_warning = self.threshold_warning(metric)
            if value_warning is not None:
                is_warning = is_warning or value_warning.check(value, kwargs.get('lowerValue', False))

            value_critical = self.threshold_critical(metric)
            if value_critical is not None:
                is_critical = is_critical or value_critical.check(value, kwargs.get('lowerValue', False))

        if is_critical:
            self.check_result = CheckState.CRITICAL
            self.check_message = kwargs.get('messageCritical', message)
        elif is_warning:
            self.check_result = CheckState.WARNING
            self.check_message = kwargs.get('messageWarning', message)
        else:
            self.check_message = message

    def scale_value(self, value):
        if self.options.unit in self.UNIT_SCALE:
            return value / self.UNIT_SCALE[self.options.unit]
        else:
            assert('wrong unit')

    def threshold_warning(self, name: str):
        return self.options.threshold_warning.get(name, self.options.threshold_warning.get(None, None))

    def threshold_critical(self, name: str):
        return self.options.threshold_critical.get(name, self.options.threshold_critical.get(None, None))

    def get_value(self, value, total=None):
        value = float(value)

        if total:
            value /= float(total) / 100
        else:
            value = self.scale_value(value)

        return round(value, 2)

    def add_perfdata(self, name, value, **kwargs):
        unit = kwargs.get('unit', '%')

        perfdata = '{}={}{}'.format(name, value, unit)

        threshold_warning = self.threshold_warning(name)
        threshold_critical = self.threshold_critical(name)

        perfdata += ';'
        if threshold_warning:
            perfdata += str(threshold_warning.value)

        perfdata += ';'
        if threshold_critical:
            perfdata += str(threshold_critical.value)

        perfdata += ';{}'.format(kwargs.get('min', 0))
        perfdata += ';{}'.format(kwargs.get('max', ''))

        self.perfdata.append(perfdata)

    def get_perfdata(self):
        perfdata = ''

        if len(self.perfdata):
            perfdata = '|'
            perfdata += ' '.join(self.perfdata)

        return perfdata

    def check(self):
        self.check_result = CheckState.OK

        if self.options.mode == 'cluster':
            self.check_cluster_status()
        elif self.options.mode == 'version':
            self.check_version()
        elif self.options.mode == 'memory':
            self.check_memory()
        elif self.options.mode == 'swap':
            self.check_swap()
        elif self.options.mode == 'io_wait':
            self.check_io_wait()
        elif self.options.mode == 'disk-health':
            self.check_disks()
        elif self.options.mode == 'cpu':
            self.check_cpu()
        elif self.options.mode == 'services':
            self.check_services()
        elif self.options.mode == 'updates':
            self.check_updates()
        elif self.options.mode == 'subscription':
            self.check_subscription()
        elif self.options.mode == 'storage':
            self.check_storage(self.options.name)
        elif self.options.mode in ['vm', 'vm_status']:
            only_status = self.options.mode == 'vm_status'

            if self.options.name:
                idx = self.options.name
            else:
                idx = self.options.vmid

            if self.options.expected_vm_status:
                self.check_vm_status(idx, expected_state=self.options.expected_vm_status, only_status=only_status)
            else:
                self.check_vm_status(idx, only_status=only_status)
        elif self.options.mode == 'replication':
            self.check_replication()
        elif self.options.mode == 'ceph-health':
            self.check_ceph_health()
        elif self.options.mode == 'zfs-health':
            self.check_zfs_health(self.options.name)
        elif self.options.mode == 'zfs-fragmentation':
            self.check_zfs_fragmentation(self.options.name)
        else:
            message = "Check mode '{}' not known".format(self.options.mode)
            self.output(CheckState.UNKNOWN, message)

        self.check_output()

    def parse_args(self):
        p = argparse.ArgumentParser(description='Check command for PVE hosts via API')

        api_opts = p.add_argument_group('API Options')

        api_opts.add_argument("-e", "--api-endpoint", required=True, help="PVE api endpoint hostname")
        api_opts.add_argument("--api-port", required=False, help="PVE api endpoint port")

        api_opts.add_argument("-u", "--username", dest='api_user', required=True,
                              help="PVE api user (e.g. icinga2@pve or icinga2@pam, depending on which backend you "
                                   "have chosen in proxmox)")

        group = api_opts.add_mutually_exclusive_group(required=True)
        group.add_argument("-p", "--password", dest='api_password', help="PVE API user password")
        group.add_argument("-t", "--api-token", dest="api_token", help="PVE API token (format: TOKEN_ID=TOKEN_SECRET")

        api_opts.add_argument("-k", "--insecure", dest='api_insecure', action='store_true', default=False,
                              help="Don't verify HTTPS certificate")

        api_opts.set_defaults(api_port=8006)

        check_opts = p.add_argument_group('Check Options')

        check_opts.add_argument("-m", "--mode",
                                choices=(
                                    'cluster', 'version', 'cpu', 'memory', 'swap', 'storage', 'io_wait', 'updates', 'services',
                                    'subscription', 'vm', 'vm_status', 'replication', 'disk-health', 'ceph-health',
                                    'zfs-health', 'zfs-fragmentation'),
                                required=True,
                                help="Mode to use.")

        check_opts.add_argument('-n', '--node', dest='node',
                                help='Node to check (necessary for all modes except cluster and version)')

        check_opts.add_argument('--name', dest='name',
                                help='Name of storage, vm, or container')

        check_opts.add_argument('--vmid', dest='vmid', type=int,
                                help='ID of virtual machine or container')

        check_opts.add_argument('--expected-vm-status', choices=('running', 'stopped', 'paused'),
                                help='Expected VM status')

        check_opts.add_argument('--ignore-vm-status', dest='ignore_vm_status', action='store_true',
                                help='Ignore VM status in checks',
                                default=False)

        check_opts.add_argument('--ignore-service', dest='ignore_services', action='append', metavar='NAME',
                                help='Ignore service NAME in checks', default=[])

        check_opts.add_argument('--ignore-disk', dest='ignore_disks', action='append', metavar='NAME',
                                help='Ignore disk NAME in health check', default=[])

        check_opts.add_argument('-w', '--warning', dest='threshold_warning', type=CheckThreshold.threshold_type,
                                default={}, help='Warning threshold for check value. Mutiple thresholds with name:value,name:value')
        check_opts.add_argument('-c', '--critical', dest='threshold_critical', type=CheckThreshold.threshold_type,
                                default={}, help='Critical threshold for check value. Mutiple thresholds with name:value,name:value')
        check_opts.add_argument('-M', dest='values_mb', action='store_true', default=False,
                                help='Values are shown in the unit which is set with --unit (if available). Thresholds are also treated in this unit')
        check_opts.add_argument('-V', '--min-version', dest='min_version', type=str,
                                help='The minimal pve version to check for. Any version lower than this will return '
                                     'CRITICAL.')

        check_opts.add_argument('--unit', choices=self.UNIT_SCALE.keys(), default='MiB', help='Unit which is used for performance data and other values')

        options = p.parse_args()

        if not options.node and options.mode not in ['cluster', 'vm', 'vm_status', 'version', 'ceph-health']:
            p.print_usage()
            message = "{}: error: --mode {} requires node name (--node)".format(p.prog, options.mode)
            self.output(CheckState.UNKNOWN, message)

        if not options.vmid and not options.name and options.mode in ('vm', 'vm_status'):
            p.print_usage()
            message = "{}: error: --mode {} requires either vm name (--name) or id (--vmid)".format(p.prog,
                                                                                                    options.mode)
            self.output(CheckState.UNKNOWN, message)

        if not options.name and options.mode == 'storage':
            p.print_usage()
            message = "{}: error: --mode {} requires storage name (--name)".format(p.prog, options.mode)
            self.output(CheckState.UNKNOWN, message)

        def compare_thresholds(threshold_warning, threshold_critical, comparator):
            ok = True
            keys = set(list(threshold_warning.keys()) + list(threshold_critical.keys()))
            for key in keys:
                if (key in threshold_warning and key in threshold_critical) or (None in threshold_warning and None in threshold_critical):
                    ok = ok and comparator(threshold_warning[key], threshold_critical[key])
                elif key in threshold_warning and None in threshold_critical:
                    ok = ok and comparator(threshold_warning[key], threshold_critical[None])
                elif key in threshold_critical and None in threshold_warning:
                    ok = ok and comparator(threshold_warning[None], threshold_critical[key])

            return ok

        if options.threshold_warning and options.threshold_critical:
            if options.mode != 'subscription' and not compare_thresholds(options.threshold_warning, options.threshold_critical, lambda w,c: w<=c):
                p.error("Critical value must be greater than warning value")
            elif options.mode == 'subscription' and not compare_thresholds(options.threshold_warning, options.threshold_critical, lambda w,c: w>=c):
                p.error("Critical value must be lower than warning value")

        self.options = options

    def __init__(self):
        self.options = {}
        self.ticket = None
        self.perfdata = []
        self.check_result = CheckState.UNKNOWN
        self.check_message = ""

        self.__headers = {}
        self.__cookies = {}

        self.parse_args()

        if self.options.api_insecure:
            # disable urllib3 warning about insecure requests
            requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)

        if self.options.api_password is not None:
            self.__cookies['PVEAuthCookie'] = self.get_ticket()
        elif self.options.api_token is not None:
            self.__headers["Authorization"] = "PVEAPIToken={}!{}".format(self.options.api_user, self.options.api_token)

pve = CheckPVE()
pve.check()