sensu-plugins/sensu-plugins-kubernetes

View on GitHub
bin/check-kube-pods-runtime.rb

Summary

Maintainability
B
5 hrs
Test Coverage
#! /usr/bin/env ruby
# frozen_string_literal: false

#   check-kube-pods-runtime
#
# DESCRIPTION:
# => Check if pods are running longer than expected
#
# OUTPUT:
#   plain text
#
# PLATFORMS:
#   Linux
#
# DEPENDENCIES:
#   gem: sensu-plugin
#   gem: kube-client
#
# USAGE:
# -s, --api-server URL             URL to API server
# -v, --api-version VERSION        API version. Defaults to 'v1'
#     --in-cluster                 Use service account authentication
#     --ca-file CA-FILE            CA file to verify API server cert
#     --cert CERT-FILE             Client cert to present
#     --key KEY-FILE               Client key for the client cert
# -u, --user USER                  User with access to API
#     --password PASSWORD          If user is passed, also pass a password
#     --token TOKEN                Bearer token for authorization
#     --token-file TOKEN-FILE      File containing bearer token for authorization
# -c, --critical COUNT             Threshold for Pods to be critical
# -f, --filter FILTER              Selector filter for pods to be checked
# -p, --pods PODS                  List of pods to check
# -w, --warn TIMEOUT               Threshold for pods to be in the pending state
#
# LICENSE:
#   Barry Martin <nyxcharon@gmail.com>
#   Released under the same terms as Sensu (the MIT license); see LICENSE
#   for details.
#

require 'sensu-plugins-kubernetes/cli'

class PodRuntime < Sensu::Plugins::Kubernetes::CLI
  @options = Sensu::Plugins::Kubernetes::CLI.options.dup

  option :pod_list,
         description: 'List of pods to check',
         short: '-p PODS',
         long: '--pods',
         default: 'all'

  option :pod_filter,
         description: 'Selector filter for pods to be checked',
         short: '-f FILTER',
         long: '--filter'

  option :warn_timeout,
         description: 'Threshold for pods to be in the pending state',
         short: '-w TIMEOUT',
         long: '--warn',
         proc: proc(&:to_i)

  option :critical_timeout,
         description: 'Threshold for Pods to be critical',
         short: '-c COUNT',
         long: '--critical',
         proc: proc(&:to_i)

  def run
    pods_list = []
    pods = []
    warn = false
    crit = false
    message = ''

    if config[:pod_filter].nil?
      pods_list = parse_list(config[:pod_list])
      pods = client.get_pods
    else
      pods = client.get_pods(label_selector: config[:pod_filter].to_s)
      pods_list = ['all']
    end

    pods.each do |pod|
      next if pod.nil?
      next unless pods_list.include?(pod.metadata.name) || pods_list.include?('all')
      # Check for Running state
      next unless pod.status.phase == 'Running'

      pod_stamp = Time.parse(pod.status.startTime)
      runtime = (Time.now.utc - pod_stamp.utc).to_i

      if !config[:critical_timeout].nil? && runtime > config[:critical_timeout]
        message << "#{pod.metadata.name} exceeds threshold #{config[:critical_timeout]} "
        crit = true
      elsif !config[:warn_timeout].nil? && runtime > config[:warn_timeout]
        message << "#{pod.metadata.name} exceeds threshold #{config[:warn_timeout]} "
        warn = true
      end
    end

    if crit
      critical message
    elsif warn
      warning message
    else
      ok 'All pods within threshold'
    end
  rescue KubeException => e
    critical 'API error: ' << e.message
  end

  def parse_list(list)
    return list.split(',') if list&.include?(',')
    return [list] if list

    ['']
  end
end