bin/check-mesos-running-tasks.rb
#! /usr/bin/env ruby
# frozen_string_literal: false
#
# check-mesos-running-tasks
#
# DESCRIPTION:
# This plugin checks that there are running tasks on a mesos cluster
#
# OUTPUT:
# plain text
#
# PLATFORMS:
# Linux
#
# DEPENDENCIES:
# gem: sensu-plugin
# gem: rest-client
# gem: json
#
# USAGE:
# #YELLOW
#
# NOTES:
#
# LICENSE:
# Copyright 2016, Oskar Flores (oskar.flores@gmail.com)
# Released under the same terms as Sensu (the MIT license); see LICENSE
# for details.
#
require 'sensu-plugin/check/cli'
require 'rest-client'
require 'json'
require 'daybreak'
class MesosRunningTaskCheck < Sensu::Plugin::Check::CLI
check_name 'CheckMesosRunningTask'
@metrics_name = 'master/tasks_running'.freeze
class << self
attr_reader :metrics_name
end
option :server,
description: 'Mesos server',
short: '-s SERVER',
long: '--server SERVER',
default: 'localhost'
option :port,
description: 'port (default 5050)',
short: '-p PORT',
long: '--port PORT',
default: 5050,
required: false
option :uri,
description: 'Endpoint URI',
short: '-u URI',
long: '--uri URI',
default: '/metrics/snapshot'
option :timeout,
description: 'timeout in seconds',
short: '-t TIMEOUT',
long: '--timeout TIMEOUT',
proc: proc(&:to_i),
default: 5
option :mode,
description: 'eq ne lt gt or rg',
short: '-m MODE',
long: '--mode MODE',
required: true
option :min,
description: 'min value on range',
short: '-l VALUE',
long: '--low VALUE',
required: false,
proc: proc(&:to_i),
derfault: 0
option :max,
description: 'max value on range',
short: '-h VALUE',
long: '--high VALUE',
required: false,
proc: proc(&:to_i),
default: 1
option :value,
description: 'value to check against',
short: '-v VALUE',
long: '--value VALUE',
proc: proc(&:to_i),
default: 0,
required: false
option :delta,
short: '-d',
long: '--delta',
description: 'Use this flag to compare the metric with the previously retrieved value',
boolean: true
def run
port = config[:port]
uri = config[:uri]
timeout = config[:timeout]
mode = config[:mode]
value = config[:value]
server = config[:server]
min = config[:min]
max = config[:max]
begin
server = get_leader_url server, port
r = RestClient::Resource.new("#{server}#{uri}", timeout).get
metric_value = check_tasks(r)
check_mesos_tasks(metric_value, mode, value, min, max)
rescue Errno::ECONNREFUSED, RestClient::ResourceNotFound, SocketError
unknown "Mesos #{server} is not responding"
rescue RestClient::RequestTimeout
unknown "Mesos #{server} connection timed out"
end
ok "Found #{metric_value} tasks running"
end
# Redirects server call to discover the Leader
# @param server [String] Server address
# @param port [Number] api port
# @return [Url] Url representing the Leader
def get_leader_url(server, port)
RestClient::Resource.new("http://#{server}:#{port}/redirect").get.request.url
end
# Parses JSON data as returned from Mesos API
# @param data [String] Server response
# @return [Numeric] Number of running tasks
def check_tasks(data)
begin
running_tasks = JSON.parse(data)[MesosRunningTaskCheck.metrics_name]
rescue JSON::ParserError
raise "Could not parse JSON response: #{data}"
end
if running_tasks.nil?
raise "No tasks in server response: #{data}"
end
running_tasks.round
end
def check_mesos_tasks(metric_value, mode, value, min, max)
if config[:delta]
db = Daybreak::DB.new '/tmp/mesos-metrics.db', default: 0
prev_value = db['task_running']
db.lock do
db['task_running'] = metric_value
end
metric_value -= prev_value
db.flush
db.compact
db.close
end
case mode
when 'eq'
critical "The number of running tasks cluster is equal to #{value}!" if metric_value.equal? value
when 'ne'
critical "The number of running tasks cluster is not equal to #{value}!" if metric_value != value
when 'lt'
critical "The number of running tasks cluster is lower than #{value}!" if metric_value < value
when 'gt'
critical "The number of running tasks cluster is greater than #{value}!" if metric_value > value
when 'rg'
unless (min.to_i..max.to_i).cover? metric_value
critical "The number of running tasks in cluster is not in #{min} - #{max} value range!"
end
end
end
end