sensu-plugins/sensu-plugins-elasticsearch

View on GitHub
bin/check-es-query-ratio.rb

Summary

Maintainability
C
1 day
Test Coverage
#! /usr/bin/env ruby
#
#   check-es-query
#
# DESCRIPTION:
#   This plugin checks ratio between results of two Elasticsearch queries
#
# OUTPUT:
#   plain text
#
# PLATFORMS:
#   Linux
#
# DEPENDENCIES:
#   gem: sensu-plugin
#   gem: elasticsearch
#   gem: aws_es_transport
#
# USAGE:
#   This example checks the ratio from the count of two different queries
#   as dividend and divisor at the host elasticsearch.service.consul for the past 90 minutes
#   will warn if percentage is lower than 10 and critical if percentage is lower than 5
#   (The invert flag warns if results are _below_ the critical and warning values)
#   check-es-query-ratio.rb -h elasticsearch.service.consul -Q "orders:*"
#     -q "orders:OK" --invert --types special_type -d 'logging-%Y.%m.%d'
#     --minutes-previous 90 -p 9200 -c 5 -w 10
#
#
# NOTES:
#
# LICENSE:
#
#   Released under the same terms as Sensu (the MIT license); see LICENSE
#   for details.
#

require 'sensu-plugin/check/cli'
require 'elasticsearch'
require 'time'
require 'uri'
require 'aws_es_transport'
require 'sensu-plugins-elasticsearch'

#
# ES Query Count
#
class ESQueryRatio < Sensu::Plugin::Check::CLI
  include ElasticsearchCommon

  option :index,
         description: 'Elasticsearch indices to query.
         Comma-separated list of index names to search.
         Use `_all` or empty string to perform the operation on all indices.
         Accepts wildcards',
         short: '-i INDEX',
         long: '--indices INDEX'

  option :transport,
         long: '--transport TRANSPORT',
         description: 'Transport to use to communicate with ES. Use "AWS" for signed AWS transports.'

  option :region,
         long: '--region REGION',
         description: 'Region (necessary for AWS Transport)'

  option :types,
         description: 'Elasticsearch types to limit searches to, comma separated list.',
         long: '--types TYPES'

  option :timestamp_field,
         description: 'Field to use instead of @timestamp for query.',
         long: '--timestamp-field FIELD_NAME',
         default: '@timestamp'

  option :offset,
         description: 'Seconds before offset to end @timestamp against query.',
         long: '--offset OFFSET',
         proc: proc(&:to_i),
         default: 0

  option :ignore_unavailable,
         description: 'Ignore unavailable indices.',
         long: '--ignore-unavailable',
         boolean: true,
         default: true

  option :minutes_previous,
         description: 'Minutes before offset to check @timestamp against query.',
         long: '--minutes-previous MINUTES_PREVIOUS',
         proc: proc(&:to_i),
         default: 0

  option :hours_previous,
         description: 'Hours before offset to check @timestamp against query.',
         long: '--hours-previous HOURS_PREVIOUS',
         proc: proc(&:to_i),
         default: 0

  option :days_previous,
         description: 'Days before offset to check @timestamp against query.',
         long: '--days-previous DAYS_PREVIOUS',
         proc: proc(&:to_i),
         default: 0

  option :weeks_previous,
         description: 'Weeks before offset to check @timestamp against query.',
         long: '--weeks-previous WEEKS_PREVIOUS',
         proc: proc(&:to_i),
         default: 0

  option :months_previous,
         description: 'Months before offset to check @timestamp against query.',
         long: '--months-previous MONTHS_PREVIOUS',
         proc: proc(&:to_i),
         default: 0

  option :date_index,
         description: 'Elasticsearch time based index.
            Accepts format from http://ruby-doc.org/core-2.2.0/Time.html#method-i-strftime',
         short: '-d DATE_INDEX',
         long: '--date-index DATE_INDEX'

  option :date_repeat_daily,
         description: 'Elasticsearch date based index repeats daily.',
         long: '--repeat-daily',
         boolean: true,
         default: true

  option :date_repeat_hourly,
         description: 'Elasticsearch date based index repeats hourly.',
         long: '--repeat-hourly',
         boolean: true,
         default: false

  option :search_field,
         description: 'The Elasticsearch document field to search for your query string.',
         short: '-f FIELD',
         long: '--field FIELD',
         required: false,
         default: 'message'

  option :dividend,
         description: 'Elasticsearch query where percentage is calculated for',
         short: '-Q QUERY',
         long: '--dividend QUERY',
         required: true

  option :divisor,
         description: 'Elasticsearch query where percentage is calculated from',
         short: '-q QUERY',
         long: '--divisor QUERY',
         required: true

  option :host,
         description: 'Elasticsearch host',
         short: '-h HOST',
         long: '--host HOST',
         default: 'localhost'

  option :port,
         description: 'Elasticsearch port',
         short: '-p PORT',
         long: '--port PORT',
         proc: proc(&:to_i),
         default: 9200

  option :scheme,
         description: 'Elasticsearch connection scheme, defaults to https for authenticated connections',
         short: '-s SCHEME',
         long: '--scheme SCHEME'

  option :password,
         description: 'Elasticsearch connection password',
         short: '-P PASSWORD',
         long: '--password PASSWORD'

  option :user,
         description: 'Elasticsearch connection user',
         short: '-u USER',
         long: '--user USER'

  option :headers,
         description: 'A comma separated list of headers to pass to elasticsearch http client',
         short: '-H headers',
         long: '--headers headers',
         default: 'Content-Type: application/json'

  option :timeout,
         description: 'Elasticsearch query timeout in seconds',
         short: '-t TIMEOUT',
         long: '--timeout TIMEOUT',
         proc: proc(&:to_i),
         default: 30

  option :warn,
         short: '-w N',
         long: '--warn N',
         description: 'Result count WARNING threshold',
         proc: proc(&:to_f),
         default: 0

  option :crit,
         short: '-c N',
         long: '--crit N',
         description: 'Result count CRITICAL threshold',
         proc: proc(&:to_f),
         default: 0

  option :invert,
         long: '--invert',
         description: 'Invert thresholds',
         boolean: true

  option :divisor_zero_ok,
         short: '-z',
         long: '--zero',
         description: 'Division by 0 returns OK',
         boolean: true,
         default: false

  option :kibana_url,
         long: '--kibana-url KIBANA_URL',
         description: 'Kibana URL query prefix that will be in critical / warning response output.'

  def kibana_info
    kibana_date_format = '%Y-%m-%dT%H:%M:%S.%LZ'
    unless config[:kibana_url].nil?
      index = config[:index]
      unless config[:date_index].nil?
        date_index_partition = config[:date_index].split('%')
        index = "[#{date_index_partition.first}]" \
          "#{date_index_partition[1..-1].join.sub('Y', 'YYYY').sub('y', 'YY').sub('m', 'MM').sub('d', 'DD').sub('j', 'DDDD').sub('H', 'hh')}"
      end
      end_time = Time.now.utc.to_i
      start_time = end_time
      if config[:minutes_previous] != 0
        start_time -= (config[:minutes_previous] * 60)
      end
      if config[:hours_previous] != 0
        start_time -= (config[:hours_previous] * 60 * 60)
      end
      if config[:days_previous] != 0
        start_time -= (config[:days_previous] * 60 * 60 * 24)
      end
      if config[:weeks_previous] != 0
        start_time -= (config[:weeks_previous] * 60 * 60 * 24 * 7)
      end
      if config[:months_previous] != 0
        start_time -= (config[:months_previous] * 60 * 60 * 24 * 31)
      end
      "Kibana logs: #{config[:kibana_url]}/#/discover?_g=" \
      "(refreshInterval:(display:Off,section:0,value:0),time:(from:'" \
      "#{URI.escape(Time.at(start_time).utc.strftime kibana_date_format)}',mode:absolute,to:'" \
      "#{URI.escape(Time.at(end_time).utc.strftime kibana_date_format)}'))&_a=(columns:!(_source),index:" \
      "#{URI.escape(index)},interval:auto,query:(query_string:(analyze_wildcard:!t,query:'" \
      "#{URI.escape(config[:query])}')),sort:!('#{config[:timestamp_field]}',desc))&dummy"
    end
  end

  def run
    dividend_query = config[:dividend]
    divisor_query = config[:divisor]
    config.delete(:dividend)
    config.delete(:divisor)
    config[:query] = dividend_query
    dividend = client.count(build_request_options)
    config[:query] = divisor_query
    divisor = client.count(build_request_options)
    divisor_zero_ok = config[:divisor_zero_ok]
    if divisor_zero_ok && divisor['count'].zero?
      ok 'Divisor is 0, ratio check cannot be performed, failing safe with ok'
    elsif divisor['count'].zero?
      critical 'Divisor is 0, ratio check cannot be performed, raising an alert'
    else
      response = {}
      response['count'] = (dividend['count'].to_f / divisor['count'])
    end
    if config[:invert]
      if response['count'] < config[:crit]
        critical "Query count (#{response['count']}) was below critical threshold. #{kibana_info}"
      elsif response['count'] < config[:warn]
        warning "Query count (#{response['count']}) was below warning threshold. #{kibana_info}"
      else
        ok "Query count (#{response['count']}) was ok"
      end
    elsif response['count'] > config[:crit]
      critical "Query count (#{response['count']}) was above critical threshold. #{kibana_info}"
    elsif response['count'] > config[:warn]
      warning "Query count (#{response['count']}) was above warning threshold. #{kibana_info}"
    else
      ok "Query count (#{response['count']}) was ok"
    end
  rescue Elasticsearch::Transport::Transport::Errors::NotFound
    if config[:invert]
      if response['count'] < config[:crit]
        critical "Query count (#{response['count']}) was below critical threshold. #{kibana_info}"
      elsif response['count'] < config[:warn]
        warning "Query count (#{response['count']}) was below warning threshold. #{kibana_info}"
      else
        ok "Query count (#{response['count']}) was ok"
      end
    else
      ok 'No results found, count was below thresholds'
    end
  end
end