diff --git a/check_mysql_slave.py b/check_mysql_slave.py new file mode 100755 index 0000000..5f4010d --- /dev/null +++ b/check_mysql_slave.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +import argparse +import sys +import traceback + +import mysql.connector + +from checker import nagios, print_icinga2_check_status +from checker.types import try_int + +""" +## Usage example ## + +Target delay = 300 seconds +Warning deviation = 10% +Critical deviation = 15% + +This will set the warning levels to 330 and 270 seconds, if the delay is greater or less than these values it will return WARNING. +The critical levels will be 345 and 255 seconds. +""" + + +def main(args): + cursor = cnx = None + try: + cnx = mysql.connector.connect(user=args.username, password=args.password, host=args.host) + cursor = cnx.cursor(dictionary=True) + cursor.execute("SHOW SLAVE STATUS") + slave_status = cursor.fetchone() + + if slave_status is None: + print("UNKNOWN - Could not retrieve slave status") + sys.exit(3) + + slave_io_running = slave_status['Slave_IO_Running'] + slave_sql_running = slave_status['Slave_SQL_Running'] + replication_delay = try_int(slave_status['Seconds_Behind_Master']) + last_io_error = slave_status['Last_IO_Error'] + last_sql_error = slave_status['Last_SQL_Error'] + + exit_code = nagios.STATE_OK + exit_msg = [] + + if slave_sql_running != 'Yes': + exit_code = nagios.STATE_CRIT + exit_msg.append('Slave SQL is not running!') + if slave_io_running != 'Yes': + exit_code = nagios.STATE_CRIT + exit_msg.append('Slave IO is not running!') + if last_sql_error: + exit_code = nagios.STATE_CRIT + exit_msg.append(f'Last SQL Error: {last_sql_error.strip(".")}.') + if last_io_error: + exit_code = nagios.STATE_CRIT + exit_msg.append(f'Last IO Error: {last_io_error.strip(".")}.') + + if args.target_delay: + warn_deviation_max = args.target_delay * (1 + (args.warning_deviation / 100)) + warn_deviation_min = args.target_delay * (1 - (args.warning_deviation / 100)) + crit_deviation_max = args.target_delay * (1 + (args.critical_deviation / 100)) + crit_deviation_min = args.target_delay * (1 - (args.critical_deviation / 100)) + if replication_delay <= crit_deviation_min: + exit_code = nagios.STATE_CRIT + exit_msg.append('Replication is delayed!') + if replication_delay >= crit_deviation_max: + exit_code = nagios.STATE_CRIT + exit_msg.append('Replication is ahead???') + if replication_delay <= warn_deviation_min: + exit_code = nagios.STATE_WARN + exit_msg.append('Replication is delayed!') + if replication_delay >= warn_deviation_max: + exit_code = nagios.STATE_WARN + exit_msg.append('Replication is ahead???') + + if exit_code == nagios.STATE_OK: + exit_msg.append('Slave is healthy! Slave SQL: running. Slave IO: running.') + + exit_msg.append(f'Slave is {replication_delay} seconds behind master.') + + perfdata = { + "replication_delay": { + "value": replication_delay, + "min": 0, + "unit": "s", + }, + } + + text_result = ' '.join(exit_msg) + print_icinga2_check_status(text_result, exit_code, perfdata) + sys.exit(exit_code) + + except mysql.connector.Error as e: + print("UNKNOWN - Could not connect to database!") + print(e) + sys.exit(nagios.STATE_UNKNOWN) + except Exception as e: + print(f"UNKNOWN - {e}") + traceback.print_exc() + sys.exit(nagios.STATE_UNKNOWN) + + finally: + if cursor: + cursor.close() + if cnx: + cnx.close() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Check a MySQL slave.') + parser.add_argument('--host', required=True, help='The IP of the slave to connect to.') + parser.add_argument('--username', required=True, help='Username.') + parser.add_argument('--password', required=True, help='Password.') + parser.add_argument('--target-delay', default=None, type=int, help='The target delay in seconds.') + parser.add_argument('--warning-deviation', default=10, type=int, help='If the delay deviates more than this percentage from the target delay, return warning.') + parser.add_argument('--critical-deviation', default=15, type=int, help='If the delay deviates more than this percentage from the target delay, return critical.') + args = parser.parse_args() + main(args) diff --git a/checker/result.py b/checker/result.py index d02c724..e256e37 100644 --- a/checker/result.py +++ b/checker/result.py @@ -85,6 +85,7 @@ def print_icinga2_check_status(text_result: str, return_code: int, perfdata=None "crit": 100, "min": 0, "max": 200, + "unit": "s", }, } print_icinga2_check_status(sample_text_result, sample_return_code, sample_perfdata) diff --git a/checker/types.py b/checker/types.py index db1a067..58a3b04 100644 --- a/checker/types.py +++ b/checker/types.py @@ -1,14 +1,20 @@ from typing import Union -def try_float(value: str) -> Union[int, float, str]: +def try_float(value: str) -> float: try: return float(value) except: pass try: - return int(value) + return float(int(value)) except: pass raise ValueError(f"Could not convert {value} to float or int") - # return value + + +def try_int(value: str) -> int: + try: + return int(value) + except: + raise ValueError(f"Could not convert {value} to float or int") diff --git a/requirements-difficult.txt b/requirements-difficult.txt index 839d0f3..e3c6f57 100644 --- a/requirements-difficult.txt +++ b/requirements-difficult.txt @@ -1 +1,2 @@ tiktoken==0.5.2 +mysql-connector-python==8.3.0 \ No newline at end of file