sensu-plugins/sensu-plugins-aws

View on GitHub
bin/check-ebs-burst-limit.rb

Summary

Maintainability
A
3 hrs
Test Coverage
#! /usr/bin/env ruby
#
# check-ebs-burst-limit
#
# DESCRIPTION:
#   Check EC2 Volumes for volumes with low burst balance
#   Optionally check only volumes attached to the current instance
#
# OUTPUT:
#   plain-text
#
# PLATFORMS:
#   Linux
#
# DEPENDENCIES:
#   gem: aws-sdk
#   gem: sensu-plugin
#
# USAGE:
#   ./check-ebs-burst-limit.rb -r ${you_region}
#   ./check-ebs-burst-limit.rb -r ${you_region} -c 50
#   ./check-ebs-burst-limit.rb -r ${you_region} -c 50 -t Name
#   ./check-ebs-burst-limit.rb -r ${you_region} -w 50 -c 10
#   ./check-ebs-burst-limit.rb -r ${you_region} -w 50 -c 10 -f "{name:tag-value,values:[infrastructure]}"
#   ./check-ebs-burst-limit.rb -r ${you_region} -w 50 -c 10 -f "{name:tag-value,values:[infrastructure]}" -t Name
#
# LICENSE:
#   Barry Martin <nyxcharon@gmail.com>
#   Released under the same terms as Sensu (the MIT license); see LICENSE
#   for details.
#

require 'sensu-plugin/check/cli'
require 'sensu-plugins-aws'
require 'sensu-plugins-aws/filter'
require 'aws-sdk'
require 'net/http'

class CheckEbsBurstLimit < Sensu::Plugin::Check::CLI
  include CloudwatchCommon
  include Filter

  option :aws_region,
         short:       '-r R',
         long:        '--region REGION',
         description: 'AWS region, will be overridden by the -s option',
         default: 'us-east-1'

  option :tag,
         description: 'Add volume TAG value to warn/critical message.',
         short: '-t TAG',
         long: '--tag TAG'

  option :critical,
         description: 'Trigger a critical when ebs burst limit is under VALUE',
         short: '-c VALUE',
         long: '--critical VALUE',
         proc: proc(&:to_f),
         required: true

  option :warning,
         description: 'Trigger a warning when ebs burst limit is under VALUE',
         short: '-w VALUE',
         long: '--warning VALUE',
         proc: proc(&:to_f)

  option :check_self,
         short: '-s',
         long: '--check-self',
         description: 'Only check the instance on which this plugin is being run - this overrides the -r option and uses the region of the current instance',
         boolean: true,
         default: false

  option :filter,
         short: '-f FILTER',
         long: '--filter FILTER',
         description: 'String representation of the filter to apply',
         default: '{}'

  def volume_tag(volume, tag_name)
    tag = volume.tags.select { |t| t.key == tag_name }.first
    tag.nil? ? '' : tag.value
  end

  def run
    errors = []

    volume_filters = Filter.parse(config[:filter])

    # Set the describe-volumes filter depending on whether -s was specified
    if config[:check_self] == true
      # Get the region from the availability zone, and override the -r option
      my_instance_az = Net::HTTP.get(URI.parse('http://169.254.169.254/latest/meta-data/placement/availability-zone'))
      Aws.config[:region] = my_instance_az.chop
      my_instance_id = Net::HTTP.get(URI.parse('http://169.254.169.254/latest/meta-data/instance-id'))
      volume_filters.push(
        name: 'attachment.instance-id',
        values: [my_instance_id]
      )
    else
      # The -s option was not specified, look at all volumes which are attached
      volume_filters.push(
        name: 'attachment.status',
        values: ['attached']
      )
    end

    ec2 = Aws::EC2::Client.new
    volumes = ec2.describe_volumes(
      filters: volume_filters
    )
    config[:metric_name] = 'BurstBalance'
    config[:namespace] = 'AWS/EBS'
    config[:statistics] = 'Average'
    config[:period] = 120
    crit = false
    should_warn = false

    volumes[:volumes].each do |volume|
      config[:dimensions] = []
      config[:dimensions] << { name: 'VolumeId', value: volume[:volume_id] }
      volume_tag = config[:tag] ? " (#{volume_tag(volume, config[:tag])})" : ''
      resp = client.get_metric_statistics(metrics_request(config))
      unless resp.datapoints.first.nil?
        if resp.datapoints.first[:average] < config[:critical]
          errors << "#{volume[:volume_id]}#{volume_tag} #{resp.datapoints.first[:average]}"
          crit = true
        elsif config[:warning] && resp.datapoints.first[:average] < config[:warning]
          errors << "#{volume[:volume_id]}#{volume_tag} #{resp.datapoints.first[:average]}"
          should_warn = true
        end
      end
    end

    if crit
      critical "Volume(s) have exceeded critical threshold: #{errors}"
    elsif should_warn
      warning "Volume(s) have exceeded warning threshold: #{errors}"
    else
      ok 'No volume(s) exceed thresholds'
    end
  end
end