coyled/hdfshealth

View on GitHub
lib/hdfshealth/plugins/check_last_checkpoint_time.rb

Summary

Maintainability
A
0 mins
Test Coverage
#
# CheckLastCheckpointTime: see how old the last snapshot is and
# complain if it's old.  this could indicate a broken secondary
# namenode (if you have one) or a broken standby namenode in an HA
# pair (if you have one)
#
class CheckLastCheckpointTime < HDFSHealth::Plugin

    require_relative 'load_nn_jmx'

    def run(namenode)
        jmx = LoadNNJMX.jmx(namenode)

        # we see LastCheckpointTime in milliseconds but want it in seconds...
        last_checkpoint_time = jmx['FSNamesystem']['LastCheckpointTime'].to_i / 1000
        now = Time.now.to_i

        last_checkpoint_age = now - last_checkpoint_time

        if last_checkpoint_age < 43200
            @status = 'OK'
        elsif last_checkpoint_age < 86400
            @status = 'WARNING'
        else
            @status = 'CRITICAL'
        end

        @message = "last checkpoint is at least #{last_checkpoint_age / 3600} hours old"
    end

end