check_mysql_slave: make work when slave is kill
This commit is contained in:
parent
404d33fc81
commit
f76ec953b9
|
@ -35,18 +35,29 @@ def main(args):
|
|||
cnx = mysql.connector.connect(user=args.username, password=args.password, host=args.host)
|
||||
cursor = cnx.cursor(dictionary=True)
|
||||
cursor.execute("SHOW SLAVE STATUS")
|
||||
slave_status = cursor.fetchone()
|
||||
|
||||
slave_status = cursor.fetchone()
|
||||
if slave_status is None:
|
||||
print("UNKNOWN - Could not retrieve slave status")
|
||||
sys.exit(3)
|
||||
sys.exit(nagios.STATE_UNKNOWN)
|
||||
|
||||
slave_io_running = slave_status['Slave_IO_Running']
|
||||
slave_sql_running = slave_status['Slave_SQL_Running']
|
||||
replication_delay = try_int(slave_status['Seconds_Behind_Master'])
|
||||
last_io_error = slave_status['Last_IO_Error']
|
||||
last_sql_error = slave_status['Last_SQL_Error']
|
||||
if not slave_io_running:
|
||||
print("UNKNOWN - Could not retrieve Slave_IO_Running")
|
||||
sys.exit(nagios.STATE_UNKNOWN)
|
||||
|
||||
slave_sql_running = slave_status['Slave_SQL_Running']
|
||||
if not slave_sql_running:
|
||||
print("UNKNOWN - Could not retrieve Slave_SQL_Running")
|
||||
sys.exit(nagios.STATE_UNKNOWN)
|
||||
|
||||
last_io_error = slave_status['Last_IO_Error']
|
||||
if not last_io_error:
|
||||
print("UNKNOWN - Could not retrieve Last_IO_Error")
|
||||
sys.exit(nagios.STATE_UNKNOWN)
|
||||
|
||||
replication_delay = -1
|
||||
perfdata = {}
|
||||
exit_code = nagios.STATE_OK
|
||||
exit_msg = []
|
||||
|
||||
|
@ -56,43 +67,56 @@ def main(args):
|
|||
if slave_io_running != 'Yes':
|
||||
exit_code = nagios.STATE_CRIT
|
||||
exit_msg.append('Slave IO is not running!')
|
||||
if last_sql_error:
|
||||
exit_code = nagios.STATE_CRIT
|
||||
exit_msg.append(f'Last SQL Error: {last_sql_error.strip(".")}.')
|
||||
if last_io_error:
|
||||
exit_code = nagios.STATE_CRIT
|
||||
exit_msg.append(f'Last IO Error: {last_io_error.strip(".")}.')
|
||||
|
||||
if args.target_delay:
|
||||
warn_deviation_max = args.target_delay * (1 + (args.warning_deviation / 100))
|
||||
warn_deviation_min = args.target_delay * (1 - (args.warning_deviation / 100))
|
||||
crit_deviation_max = args.target_delay * (1 + (args.critical_deviation / 100))
|
||||
crit_deviation_min = args.target_delay * (1 - (args.critical_deviation / 100))
|
||||
if replication_delay <= crit_deviation_min:
|
||||
if exit_code == nagios.STATE_OK:
|
||||
# Only check these things if everything else is healthy.
|
||||
|
||||
last_sql_error = slave_status['Last_SQL_Error']
|
||||
if not last_sql_error:
|
||||
print("UNKNOWN - Could not retrieve Last_SQL_Error")
|
||||
sys.exit(nagios.STATE_UNKNOWN)
|
||||
if last_sql_error:
|
||||
exit_code = nagios.STATE_CRIT
|
||||
exit_msg.append('Replication is delayed!')
|
||||
if replication_delay >= crit_deviation_max:
|
||||
exit_code = nagios.STATE_CRIT
|
||||
exit_msg.append('Replication is ahead???')
|
||||
if replication_delay <= warn_deviation_min:
|
||||
exit_code = nagios.STATE_WARN
|
||||
exit_msg.append('Replication is delayed!')
|
||||
if replication_delay >= warn_deviation_max:
|
||||
exit_code = nagios.STATE_WARN
|
||||
exit_msg.append('Replication is ahead???')
|
||||
exit_msg.append(f'Last SQL Error: {last_sql_error.strip(".")}.')
|
||||
|
||||
if not slave_status['Seconds_Behind_Master']:
|
||||
print("UNKNOWN - Could not retrieve Seconds_Behind_Master")
|
||||
sys.exit(nagios.STATE_UNKNOWN)
|
||||
replication_delay = try_int(slave_status['Seconds_Behind_Master'])
|
||||
|
||||
if args.target_delay:
|
||||
warn_deviation_max = args.target_delay * (1 + (args.warning_deviation / 100))
|
||||
warn_deviation_min = args.target_delay * (1 - (args.warning_deviation / 100))
|
||||
crit_deviation_max = args.target_delay * (1 + (args.critical_deviation / 100))
|
||||
crit_deviation_min = args.target_delay * (1 - (args.critical_deviation / 100))
|
||||
if replication_delay <= crit_deviation_min:
|
||||
exit_code = nagios.STATE_CRIT
|
||||
exit_msg.append('Replication is delayed!')
|
||||
if replication_delay >= crit_deviation_max:
|
||||
exit_code = nagios.STATE_CRIT
|
||||
exit_msg.append('Replication is ahead???')
|
||||
if replication_delay <= warn_deviation_min:
|
||||
exit_code = nagios.STATE_WARN
|
||||
exit_msg.append('Replication is delayed!')
|
||||
if replication_delay >= warn_deviation_max:
|
||||
exit_code = nagios.STATE_WARN
|
||||
exit_msg.append('Replication is ahead???')
|
||||
|
||||
if exit_code == nagios.STATE_OK:
|
||||
exit_msg.append('Slave is healthy! Slave SQL: running. Slave IO: running.')
|
||||
|
||||
exit_msg.append(f'Slave is {replication_delay} seconds behind master.')
|
||||
|
||||
perfdata = {
|
||||
"replication_delay": {
|
||||
"value": replication_delay,
|
||||
"min": 0,
|
||||
"unit": "s",
|
||||
},
|
||||
}
|
||||
if replication_delay > -1:
|
||||
exit_msg.append(f'Slave is {replication_delay} seconds behind master.')
|
||||
perfdata = {
|
||||
"replication_delay": {
|
||||
"value": replication_delay,
|
||||
"min": 0,
|
||||
"unit": "s",
|
||||
},
|
||||
}
|
||||
|
||||
text_result = ' '.join(exit_msg)
|
||||
print_icinga2_check_status(text_result, exit_code, perfdata)
|
||||
|
|
Loading…
Reference in New Issue