sensu-plugins/sensu-plugins-disk-checks

View on GitHub
bin/check-smart.rb

Summary

Maintainability
A
55 mins
Test Coverage
#! /usr/bin/env ruby
# frozen_string_literal: false

#
#   check-smart
#
# DESCRIPTION:
#
# OUTPUT:
#   plain text
#
# PLATFORMS:
#   Linux
#
# DEPENDENCIES:
#   gem: sensu-plugin
#
# USAGE:
#
# NOTES:
#   This is a drop-in replacement for check-disk-health.sh.
#
#   smartctl requires root permissions.  When running this script as a non-root
#   user such as sensu, ensure it is run with sudo.
#
#   Create a file named /etc/sudoers.d/smartctl with this line inside :
#   sensu ALL=(ALL) NOPASSWD: /usr/sbin/smartctl
#
#   Fedora has some additional restrictions : if requiretty is set, sudo will only
#   run when the user is logged in to a real tty.
#   Then add this in the sudoers file (/etc/sudoers), below the line Defaults requiretty :
#   Defaults sensu !requiretty
#
# LICENSE:
#   Copyright 2013 Mitsutoshi Aoe <maoe@foldr.in>
#   Released under the same terms as Sensu (the MIT license); see LICENSE
#   for details.
#

require 'sensu-plugin/check/cli'
require 'json'

#
# Disk
#
class Disk
  # Setup variables
  #
  def initialize(name, override, binary)
    @device_path = "/dev/#{name}"
    @smart_available = false
    @smart_enabled = false
    @smart_healty = nil
    @smart_binary = binary
    @override_path = override
    check_smart_capability!
    check_health! if smart_capable?
  end
  attr_reader :capability_output, :health_output, :smart_healthy
  alias healthy? smart_healthy

  # Is the device SMART capable and enabled
  #
  def smart_capable?
    @smart_available && @smart_enabled
  end

  # Is the device SMART capable and enabled
  #
  def device_path
    if @override_path.nil?
      @device_path
    else
      @override_path
    end
  end

  # Check for SMART cspability
  #
  def check_smart_capability!
    output = `sudo #{@smart_binary} -i #{device_path}`

    # Newer smartctl
    @smart_available = !output.scan(/SMART support is:\s+Available/).empty?
    @smart_enabled = !output.scan(/SMART support is:\s+Enabled/).empty?

    unless smart_capable?
      # Older smartctl
      @smart_available = !output.scan(/Device supports SMART/).empty?
      @smart_enabled = !output.scan(/and is Enabled/).empty?
    end

    @capability_output = output
  end

  # Check the SMART health
  #
  def check_health!
    output = `sudo #{@smart_binary} -H #{device_path}`
    @smart_healthy = !output.scan(/PASSED|OK$/).empty?
    @health_output = output
  end
end

#
# Check SMART
#
class CheckSMART < Sensu::Plugin::Check::CLI
  option :smart_incapable_disks,
         long: '--smart-incapable-disks EXIT_CODE',
         description: 'Exit code when SMART is unavailable/disabled on a disk',
         proc: proc(&:to_sym),
         default: :unknown,
         in: %i[unknown ok warn critical]

  option :no_smart_capable_disks,
         long: '--zero-smart-capable-disks EXIT_CODE',
         description: 'Exit code when there are no SMART capable disks',
         proc: proc(&:to_sym),
         default: :unknown,
         in: %i[unknown ok warn critical]

  option :binary,
         short: '-b path/to/smartctl',
         long: '--binary /usr/sbin/smartctl',
         description: 'smartctl binary to use, in case you hide yours',
         required: false,
         default: 'smartctl'

  option :json,
         short: '-j path/to/smart.json',
         long: '--json path/to/smart.json',
         description: 'Path to SMART attributes JSON file',
         required: false,
         default: File.dirname(__FILE__) + '/smart.json'

  # Setup variables
  #
  def initialize
    super
    @devices = []

    # Load in the device configuration
    @hardware = if File.readable?(config[:json])
                  JSON.parse(IO.read(config[:json]), symbolize_names: true)[:hardware][:devices]
                else
                  {}
                end

    scan_disks!
  end

  # Generate a list of all block devices
  #
  def scan_disks!
    `lsblk -nro NAME,TYPE`.each_line do |line|
      name, type = line.split

      if type == 'disk'
        jconfig = @hardware.find { |h1| h1[:path] == name }

        override = !jconfig.nil? ? jconfig[:override] : nil

        device = Disk.new(name, override, config[:binary])

        @devices << device if device.smart_capable?
      end
    end
  end

  # Main function
  #
  def run
    unless @devices.length > 0
      exit_with(
        config[:no_smart_capable_disks],
        'No SMART capable devices found'
      )
    end

    unhealthy_disks = @devices.select { |disk| disk.smart_capable? && !disk.healthy? }
    unknown_disks = @devices.reject(&:smart_capable?)

    if unhealthy_disks.length > 0
      output = unhealthy_disks.map(&:health_output)
      output.concat(unknown_disks.map(&:capability_output))
      critical output.join("\n")
    end

    if unknown_disks.length > 0
      exit_with(
        config[:smart_incapable_disks],
        unknown_disks.map(&:capability_output).join("\n")
      )
    end

    ok 'PASSED'
  end

  # Set exit status and message
  #
  def exit_with(sym, message)
    case sym
    when :ok
      ok message
    when :warn
      warn message
    when :critical
      critical message
    else
      unknown message
    end
  end
end