sensu-plugins/sensu-plugins-consul

View on GitHub
bin/check-consul-failures.rb

Summary

Maintainability
B
4 hrs
Test Coverage
#! /usr/bin/env ruby
# frozen_string_literal: true

#
# OUTPUT:
#   plain text
#
# PLATFORMS:
#   Linux
#
# DEPENDENCIES:
#   gem: sensu-plugin
#   gem: rest-client
#
# Consul returns the numerical values for consul members state, which the
# numbers used are defined in : https://github.com/hashicorp/serf/blob/master/serf/serf.go
#
# StatusNone MemberStatus = iota  (0, "none")
# StatusAlive                     (1, "alive")
# StatusLeaving                   (2, "leaving")
# StatusLeft                      (3, "left")
# StatusFailed                    (4, "failed")
#

require 'sensu-plugin/check/cli'
require 'rest-client'
require 'json'

class ConsulStatus < Sensu::Plugin::Check::CLI
  option :server,
         description: 'consul server',
         short: '-s SERVER',
         long: '--server SERVER',
         default: '127.0.0.1'

  option :port,
         description: 'consul http port',
         short: '-p PORT',
         long: '--port PORT',
         default: '8500'

  option :scheme,
         description: 'consul listener scheme',
         short: '-S SCHEME',
         long: '--scheme SCHEME',
         default: 'http'

  option :keep_failures,
         description: 'do not remove failing nodes',
         short: '-k',
         long: '--keep-failures',
         boolean: true,
         default: false

  option :critical,
         description: 'set state to critical',
         short: '-c',
         long: '--critical',
         boolean: true,
         default: false

  option :token,
         description: 'ACL token',
         long: '--token ACL_TOKEN'

  def run
    r = RestClient::Resource.new(
      "#{config[:scheme]}://#{config[:server]}:#{config[:port]}/v1/agent/members",
      timeout: 5,
      headers: { 'X-Consul-Token' => config[:token] }
    ).get
    if r.code == 200
      failing_nodes = JSON.parse(r).find_all { |node| node['Status'] == 4 }
      if !failing_nodes.nil? && !failing_nodes.empty?
        nodes_names = []
        failing_nodes.each_entry do |node|
          nodes_names.push(node['Name'])
          next if config[:keep_failures]
          puts "Removing failed node: #{node['Name']}"
          RestClient::Resource.new(
            "#{config[:scheme]}://#{config[:server]}:#{config[:port]}/v1/agent/force-leave/#{node['Name']}",
            timeout: 5,
            headers: { 'X-Consul-Token' => config[:token] }
          ).put
          nodes_names.delete(node['Name'])
        end
        ok 'All clear' if nodes_names.empty?
        critical "Found failed nodes: #{nodes_names}" if config[:critical]
        warning "Found failed nodes: #{nodes_names}"
      else
        ok 'All nodes are alive'
      end
    else
      critical 'Consul is not responding'
    end
  rescue Errno::ECONNREFUSED
    critical 'Consul is not responding'
  rescue RestClient::RequestTimeout
    critical 'Consul Connection timed out'
  rescue RestClient::Exception => e
    unknown "Consul returned: #{e}"
  end
end