sensu-plugins/sensu-plugins-mesos

View on GitHub
bin/check-mesos-running-tasks.rb

Summary

Maintainability
A
2 hrs
Test Coverage
#! /usr/bin/env ruby
# frozen_string_literal: false

#
#   check-mesos-running-tasks
#
# DESCRIPTION:
#   This plugin checks that there are running tasks on a mesos cluster
#
# OUTPUT:
#   plain text
#
# PLATFORMS:
#   Linux
#
# DEPENDENCIES:
#   gem: sensu-plugin
#   gem: rest-client
#   gem: json
#
# USAGE:
#   #YELLOW
#
# NOTES:
#
# LICENSE:
#   Copyright 2016, Oskar Flores (oskar.flores@gmail.com)
#   Released under the same terms as Sensu (the MIT license); see LICENSE
#   for details.
#

require 'sensu-plugin/check/cli'
require 'rest-client'
require 'json'
require 'daybreak'

class MesosRunningTaskCheck < Sensu::Plugin::Check::CLI
  check_name 'CheckMesosRunningTask'
  @metrics_name = 'master/tasks_running'.freeze

  class << self
    attr_reader :metrics_name
  end

  option :server,
         description: 'Mesos server',
         short: '-s SERVER',
         long: '--server SERVER',
         default: 'localhost'

  option :port,
         description: 'port (default 5050)',
         short: '-p PORT',
         long: '--port PORT',
         default: 5050,
         required: false

  option :uri,
         description: 'Endpoint URI',
         short: '-u URI',
         long: '--uri URI',
         default: '/metrics/snapshot'

  option :timeout,
         description: 'timeout in seconds',
         short: '-t TIMEOUT',
         long: '--timeout TIMEOUT',
         proc: proc(&:to_i),
         default: 5

  option :mode,
         description: 'eq ne lt gt or rg',
         short: '-m MODE',
         long: '--mode MODE',
         required: true

  option :min,
         description: 'min value on range',
         short: '-l VALUE',
         long: '--low VALUE',
         required: false,
         proc: proc(&:to_i),
         derfault: 0

  option :max,
         description: 'max value on range',
         short: '-h VALUE',
         long: '--high VALUE',
         required: false,
         proc: proc(&:to_i),
         default: 1

  option :value,
         description: 'value to check against',
         short: '-v VALUE',
         long: '--value VALUE',
         proc: proc(&:to_i),
         default: 0,
         required: false

  option :delta,
         short: '-d',
         long: '--delta',
         description: 'Use this flag to compare the metric with the previously retrieved value',
         boolean: true

  def run
    port = config[:port]
    uri = config[:uri]
    timeout = config[:timeout]
    mode = config[:mode]
    value = config[:value]
    server = config[:server]
    min = config[:min]
    max = config[:max]

    begin
      server = get_leader_url server, port
      r = RestClient::Resource.new("#{server}#{uri}", timeout).get
      metric_value = check_tasks(r)
      check_mesos_tasks(metric_value, mode, value, min, max)
    rescue Errno::ECONNREFUSED, RestClient::ResourceNotFound, SocketError
      unknown  "Mesos #{server} is not responding"
    rescue RestClient::RequestTimeout
      unknown  "Mesos #{server} connection timed out"
    end
    ok "Found #{metric_value} tasks running"
  end

  # Redirects server call to discover the Leader
  # @param server [String] Server address
  # @param port [Number] api port
  # @return [Url] Url representing the Leader

  def get_leader_url(server, port)
    RestClient::Resource.new("http://#{server}:#{port}/redirect").get.request.url
  end

  # Parses JSON data as returned from Mesos  API
  # @param data [String] Server response
  # @return [Numeric] Number of running tasks

  def check_tasks(data)
    begin
      running_tasks = JSON.parse(data)[MesosRunningTaskCheck.metrics_name]
    rescue JSON::ParserError
      raise "Could not parse JSON response: #{data}"
    end

    if running_tasks.nil?
      raise "No tasks in server response: #{data}"
    end

    running_tasks.round
  end

  def check_mesos_tasks(metric_value, mode, value, min, max)
    if config[:delta]
      db = Daybreak::DB.new '/tmp/mesos-metrics.db', default: 0
      prev_value = db['task_running']
      db.lock do
        db['task_running'] = metric_value
      end
      metric_value -= prev_value
      db.flush
      db.compact
      db.close
    end
    case mode
    when 'eq'
      critical "The number of running tasks cluster is equal to #{value}!" if metric_value.equal? value
    when 'ne'
      critical "The number of running tasks cluster is not equal to #{value}!" if metric_value != value
    when 'lt'
      critical "The number of running tasks cluster is lower than #{value}!" if metric_value < value
    when 'gt'
      critical "The number of running tasks cluster is greater than #{value}!" if metric_value > value
    when 'rg'
      unless (min.to_i..max.to_i).cover? metric_value
        critical "The number of running tasks in cluster is not in #{min} - #{max} value range!"
      end
    end
  end
end