sensu-plugins/sensu-plugins-rabbitmq

View on GitHub
bin/check-rabbitmq-node-health.rb

Summary

Maintainability
B
5 hrs
Test Coverage
#!/usr/bin/env ruby
# frozen_string_literal: true

#
# RabbitMQ check node health plugin
# ===
#
# DESCRIPTION:
# This plugin checks if RabbitMQ server node is in a running state.
#
# The plugin is based on the RabbitMQ cluster node health plugin by Tim Smith
#
# PLATFORMS:
#   Linux, Windows, BSD, Solaris
#
# DEPENDENCIES:
#   RabbitMQ rabbitmq_management plugin
#   gem: sensu-plugin
#   gem: rest-client
#
# LICENSE:
# Copyright 2012 Abhijith G <abhi@runa.com> and Runa Inc.
# Copyright 2014 Tim Smith <tim@cozy.co> and Cozy Services Ltd.
# Copyright 2015 Edward McLain <ed@edmclain.com> and Daxko, LLC.
#
# Released under the same terms as Sensu (the MIT license); see LICENSE
# for details.

require 'sensu-plugin/check/cli'
require 'json'
require 'rest_client'
require 'inifile'

# main plugin class
class CheckRabbitMQNodeHealth < Sensu::Plugin::Check::CLI
  option :host,
         description: 'RabbitMQ host',
         short: '-w',
         long: '--host HOST',
         default: 'localhost'

  option :username,
         description: 'RabbitMQ username',
         short: '-u',
         long: '--username USERNAME',
         default: 'guest'

  option :password,
         description: 'RabbitMQ password',
         short: '-p',
         long: '--password PASSWORD',
         default: 'guest'

  option :port,
         description: 'RabbitMQ API port',
         short: '-P',
         long: '--port PORT',
         default: '15672'

  option :ssl,
         description: 'Enable SSL for connection to the API',
         long: '--ssl',
         boolean: true,
         default: false

  option :verify_ssl_off,
         description: 'Do not check validity of SSL cert. Use for self-signed certs, etc (insecure)',
         long: '--verify_ssl_off',
         boolean: true,
         default: false

  option :memwarn,
         description: 'Warning % of mem usage vs high watermark',
         short: '-m',
         long: '--mwarn PERCENT',
         proc: proc(&:to_f),
         default: 80

  option :memcrit,
         description: 'Critical % of mem usage vs high watermark',
         short: '-c',
         long: '--mcrit PERCENT',
         proc: proc(&:to_f),
         default: 90

  option :fdwarn,
         description: 'Warning % of file descriptor usage vs high watermark',
         short: '-f',
         long: '--fwarn PERCENT',
         proc: proc(&:to_f),
         default: 80

  option :fdcrit,
         description: 'Critical % of file descriptor usage vs high watermark',
         short: '-F',
         long: '--fcrit PERCENT',
         proc: proc(&:to_f),
         default: 90

  option :socketwarn,
         description: 'Warning % of socket usage vs high watermark',
         short: '-s',
         long: '--swarn PERCENT',
         proc: proc(&:to_f),
         default: 80

  option :socketcrit,
         description: 'Critical % of socket usage vs high watermark',
         short: '-S',
         long: '--scrit PERCENT',
         proc: proc(&:to_f),
         default: 90

  option :watchalarms,
         description: 'Sound critical if one or more alarms are triggered',
         short: '-a BOOLEAN',
         long: '--alarms BOOLEAN',
         default: 'true'

  option :ini,
         description: 'Configuration ini file',
         short: '-i',
         long: '--ini VALUE'

  def run
    res = node_healthy?

    if res['status'] == 'ok'
      ok res['message']
    elsif res['status'] == 'warning'
      warning res['message']
    elsif res['status'] == 'critical'
      critical res['message']
    else
      unknown res['message']
    end
  end

  def node_healthy?
    host       = config[:host]
    port       = config[:port]
    username   = config[:username]
    password   = config[:password]
    ssl        = config[:ssl]
    verify_ssl = config[:verify_ssl_off]
    if config[:ini]
      ini = IniFile.load(config[:ini])
      section = ini['auth']
      username = section['username']
      password = section['password']
    else
      username = config[:username]
      password = config[:password]
    end

    begin
      url_prefix = ssl ? 'https' : 'http'
      resource = RestClient::Resource.new(
        "#{url_prefix}://#{host}:#{port}/api/nodes",
        user: username,
        password: password,
        verify_ssl: !verify_ssl
      )
      # Parse our json data
      nodeinfo = JSON.parse(resource.get)[0]

      # Determine % memory consumed
      pmem = format('%.2f', nodeinfo['mem_used'].fdiv(nodeinfo['mem_limit']) * 100)
      # Determine % sockets consumed
      psocket = format('%.2f', nodeinfo['sockets_used'].fdiv(nodeinfo['sockets_total']) * 100)
      # Determine % file descriptors consumed
      # Non-numeric value fails silently to handle fd_used = 'unknown' on OSX
      if nodeinfo['fd_used'].is_a?(Numeric)
        pfd = format('%.2f', nodeinfo['fd_used'].fdiv(nodeinfo['fd_total']) * 100)
      end

      # build status and message
      status = 'ok'
      message = 'Server is healthy'

      # criticals
      if pmem.to_f >= config[:memcrit]
        message = "Memory usage is critical: #{pmem}%"
        status = 'critical'
      elsif psocket.to_f >= config[:socketcrit]
        message = "Socket usage is critical: #{psocket}%"
        status = 'critical'
      elsif pfd.to_f >= config[:fdcrit]
        message = "File Descriptor usage is critical: #{pfd}%"
        status = 'critical'
      # warnings
      elsif pmem.to_f >= config[:memwarn]
        message = "Memory usage is at warning: #{pmem}%"
        status = 'warning'
      elsif psocket.to_f >= config[:socketwarn]
        message = "Socket usage is at warning: #{psocket}%"
        status = 'warning'
      elsif pfd.to_f >= config[:fdwarn]
        message = "File Descriptor usage is at warning: #{pfd}%"
        status = 'warning'
      end

      # If we are set to watch alarms then watch those and set status and messages accordingly
      if config[:watchalarms] == 'true'
        if nodeinfo['mem_alarm'] == true
          status = 'critical'
          message += ' Memory Alarm ON'
        end

        if nodeinfo['disk_free_alarm'] == true
          status = 'critical'
          message += ' Disk Alarm ON'
        end
      end

      { 'status' => status, 'message' => message }
    rescue Errno::ECONNREFUSED => e
      { 'status' => 'critical', 'message' => e.message }
    rescue StandardError => e
      { 'status' => 'unknown', 'message' => e.message }
    end
  end
end