CORE-POS/IS4C

View on GitHub
scripts/check_replica.php

Summary

Maintainability
A
0 mins
Test Coverage
<?php
/**
 * Description: This script checks the replication status on the configured host
 *              printing "BAD <description>" or "OK <description>" for failure
 *              or success respectively.
 *              The possible failures could include:
 *              1) connection failure
 *              2) query failure (permissions, network, etc.)
 *              3) fetch failure (???)
 *              4) replica sql or io thread is not running
 *              5) Unknown primary state (seconds_behind_master is null)
 *              6) seconds_behind_master has exceeded the configured threshold
 *
 *              If none of these condition occur, we asssume success and return
 *              an "OK" response, otherwise we include the error we can find
 *              (mysqli_connect_error() or $mysqli->error, or problem
 *               description).  A monitoring system need only check for:
 *              /^BAD/ (alert) or /^OK/ (everybody happy)
 */
 
/* **************************
 * Change related value below
 * **************************
 */
    $host = array(
        // "cr-1" => "192.168.0.81",
        // "cr-2" => "192.168.0.82",
        // "cr-3" => "192.168.0.83",
        // "cr-4" => "192.168.0.84",
        // "cr-5" => "192.168.0.85",
        // "jp-1" => "192.168.100.81",
        // "jp-2" => "192.168.100.82",
        // "jp-3" => "192.168.100.83",
        // "jp-4" => "192.168.100.84",
        // "jp-5" => "192.168.100.85",
        "cr-test" => "192.168.0.87"
        );
    $user = "";
    $pass = "";
    $mailto = "";
    $mailfrom = "";
 
/* ******************************************
 * No need to change anything below this line
 * ******************************************
*/

error_reporting(E_ALL);
header("Content-Type: text/plain"); # Not HTML
foreach ($host as $key => $value) {
    
    $mailsubject = "[".$key."] SLAVE REPLICATION ALERT";
    $mailheaders = "From:" . $mailfrom;
    $sql = "SHOW SLAVE STATUS";
    $skip_file = 'skip_alerts';
    $link = mysql_connect($value, $user, $pass, null);
 
    if($link)
        $result = mysql_query($sql, $link);
    else {
        printf("[".$key."] BAD: Connection Failed %s", mysql_error());
        mysql_close($link);
        return;
    }
 
    if($result)
        $status = mysql_fetch_assoc($result);
    else {
        printf("[".$key."] BAD: Query failed - %s\n", mysql_error($link));
        mysql_close($link);
        return;
    }
 
    mysql_close($link);
 
    $lag_threshold = 120;
 
    $tests = array(
        'test_replica_io_thread' => array('Slave_IO_Running', "\$var === 'Yes'",
                                        'Replica IO Thread is not running'),
        'test_replica_sql_thread' => array('Slave_SQL_Running', "\$var === 'Yes'",
                                        'Replica SQL Thread is not running')
//        'test_last_err' => array('Last_Errno', "\$var == 0",
//                                 "Error encountered during replication - "
//                                 .$status['Last_Error']),
//        'test_primary_status' => array('Seconds_Behind_Master', "isset(\$var)",
//                                        'Unknown primary status (Seconds_Behind_Master IS NULL)'),
//        'test_replica_lag' => array('Seconds_Behind_Master',
//                                  "\$var < \$lag_threshold",
//                                  "Replica is ${status['Seconds_Behind_Master']}s behind the primary (threshold=$lag_threshold)")
    );
 
    $epic_fail = false;
    if(is_file($skip_file))
        $epic_fail = false;
    else {
        $mailmessage = "";
        $val1 = 0;
        foreach($tests as $test_name => $data) {
            list($field, $expr, $err_msg) = $data;
            $var = $status[$field];
            $val = eval("return $expr;");
            $val1 = (!$val) ? $val1 + 1 : $val1 - 1;
            // $mailmessage .= "BAD: " . $key . " replication failed. Reason: " . $err_msg . "\n";
        //print $val1."\n";
        }
        if ($val1 > 0) {
            print "[".$key."] BAD: Replica failure detected.  Attempting RESET.\n"; 
            $reset = "mysql -u".$user." -p".$pass." -h ".$value." -e \"slave stop;reset slave;slave start\"";
            exec($reset, $output, $result);
        if ($result <> 0) {
        print "[".$key."] BAD: RESET FAILED :-( \n";
        } else {
        print "[".$key."] RESET Successful.\n";
                sleep(3);
            $val2 = 0;
                foreach($tests as $test_name => $data) {
                    list($field, $expr, $err_msg) = $data;
                    $var = $status[$field];
                    $val = eval("return $expr;");
                    $val1 = (!$val) ? $val1 + 1 : $val1 - 1;
                    $mailmessage .= "[".$key."] BAD: Replication failed. Reason: " . $err_msg . "\n";
                }
        }
            if ($val2 > 0) {
                mail($mailto,$mailsubject,$mailmessage,$mailheaders);
                print $mailmessage . "\n";
                $epic_fail = true;
            }
        }
    }
 
    if(!$epic_fail) {
        print "[".$key."] OK: Checks all completed successfully.\n";
    }
}

function reset_replica($host_ip) {
    $reset = "mysql -u".$user." -p".$pass." -h ".$host_ip." -e \"slave stop;reset slave;slave start\"";
    exec($reset, $output, $result);
    $ret = ($result > 0) ? true : false;
}

function reset_slave($host_ip) {
    trigger_error("Deprecated function called; use 'reset_replica' instead.", E_USER_NOTICE);
    return reset_replica($host_ip);
}