bin/check-emr-cluster.rb
#! /usr/bin/env ruby
#
# check-emr-cluster
#
# DESCRIPTION:
# This plugin checks if a cluster exists.
#
# OUTPUT:
# plain-text
#
# PLATFORMS:
# Linux
#
# DEPENDENCIES:
# gem: aws-sdk
# gem: sensu-plugin
#
# USAGE:
# ./check-emr-cluster.rb --cluster-name MyCluster --aws-region eu-west-1 --use-iam --warning-over 14400 --critical-over 21600
#
# NOTES:
#
# LICENSE:
# Copyright (c) 2015, Olivier Bazoud, olivier.bazoud@gmail.com
# Released under the same terms as Sensu (the MIT license); see LICENSE
# for details.
#
require 'sensu-plugin/check/cli'
require 'aws-sdk'
class CheckEMRCluster < Sensu::Plugin::Check::CLI
option :aws_access_key,
short: '-a AWS_ACCESS_KEY',
long: '--aws-access-key AWS_ACCESS_KEY',
description: "AWS Access Key. Either set ENV['AWS_ACCESS_KEY'] or provide it as an option",
default: ENV['AWS_ACCESS_KEY']
option :aws_secret_access_key,
short: '-k AWS_SECRET_KEY',
long: '--aws-secret-access-key AWS_SECRET_KEY',
description: "AWS Secret Access Key. Either set ENV['AWS_SECRET_KEY'] or provide it as an option",
default: ENV['AWS_SECRET_KEY']
option :aws_region,
short: '-r AWS_REGION',
long: '--aws-region REGION',
description: 'AWS Region (defaults to us-east-1).',
default: 'us-east-1'
option :use_iam_role,
short: '-u',
long: '--use-iam',
description: 'Use IAM role authenticiation. Instance must have IAM role assigned for this to work'
option :cluster_name,
short: '-b CLUSTER_NAME',
long: '--cluster-name',
description: 'The name of the EMR cluster',
required: true
option :warning_over,
description: 'Warn if cluster\'s age is greater than provided age in seconds',
short: '-w SECONDS',
long: '--warning-over SECONDS',
default: -1,
proc: proc(&:to_i)
option :critical_over,
description: 'Critical if cluster\'s age is greater than provided age in seconds',
short: '-c SECONDS',
long: '--critical-over SECONDS',
default: -1,
proc: proc(&:to_i)
option :warning_under,
description: 'Warn if cluster\'s age is lower than provided age in seconds',
short: '-w SECONDS',
long: '--warning-under SECONDS',
default: -1,
proc: proc(&:to_i)
option :critical_under,
description: 'Critical if cluster\'s age is lower than provided age in seconds',
short: '-C SECONDS',
long: '--critical-under SECONDS',
default: -1,
proc: proc(&:to_i)
def aws_config
{ access_key_id: config[:aws_access_key],
secret_access_key: config[:aws_secret_access_key],
region: config[:aws_region] }
end
def humanize(secs)
[[60, :seconds], [60, :minutes], [24, :hours], [1000, :days]].map do |count, name|
if secs > 0
secs, n = secs.divmod(count)
"#{n.to_i} #{name}"
end
end.compact.reverse.join(' ')
end
def run
aws_config = {}
if config[:use_iam_role].nil?
aws_config[:access_key_id] = config[:aws_access_key]
aws_config[:secret_access_key] = config[:aws_secret_access_key]
end
emr = Aws::EMR::Client.new(aws_config.merge!(region: config[:aws_region]))
begin
emr_clusters = emr.list_clusters(created_after: Time.now - 24 * 60 * 60, created_before: Time.now).clusters
clusters = emr_clusters.select { |c| c.name == config[:cluster_name] }
critical "EMR cluster #{config[:cluster_name]} appears #{clusters.size} times" if clusters.size > 1
critical "EMR cluster #{config[:cluster_name]} not found" if clusters.empty?
cluster = clusters.first
state = cluster.status.state
if state == 'TERMINATED_WITH_ERRORS'
critical "EMR cluster #{config[:cluster_name]} state is '#{state}'"
else
creation_date_time = cluster.status.timeline.creation_date_time
end_date_time = cluster.status.timeline.end_date_time || Time.now
age = end_date_time.to_i - creation_date_time.to_i
if age >= config[:critical_over]
critical "EMR cluster #{config[:cluster_name]} - #{humanize(age)} vs. #{humanize(config[:critical_over])}"
elsif age >= config[:warning_over]
warning "EMR cluster #{config[:cluster_name]} - #{humanize(age)} vs. #{humanize(config[:warning_over])}"
elsif age <= config[:critical_under] && state == 'TERMINATED'
critical "EMR cluster #{config[:cluster_name]} - #{humanize(age)} vs. #{humanize(config[:critical_under])}"
elsif age <= config[:warning_under] && state == 'TERMINATED'
warning "EMR cluster #{config[:cluster_name]} - #{humanize(age)} vs. #{humanize(config[:warning_under])}"
else
ok "EMR cluster #{config[:cluster_name]} - #{humanize(age)}"
end
end
rescue StandardError => e
critical "EMR cluster #{config[:cluster_name]} - #{e.message}"
end
end
end