sensu-plugins/sensu-plugins-java

View on GitHub
bin/metrics-jstat-java8.py

Summary

Maintainability
F
6 days
Test Coverage
#!/usr/bin/python -tt
#
# DESCRIPTION:
# Collect everything that can be collected out of jstat (shells out 5 times)
# and spits to STDOUT in a graphite ready format, thus meant to be used with a
# graphite metric tcp handler.
# Since it shells out to jps(1) you will need the user running the sensu client
# executing this script to be able to run jps as the same user running the JVM
# you are trying to get stats from.
# In addition it will also need to be able to run jstat(2) against the JVM
# This can be all achieved by allowing the script to be ran as the same user
# running the JVM, for instance by prepending "sudo -u <jvm_process_owner>"
# in the command check definition (with the proper sudoers config to allow this
# with no password being asked)
#
# The graphite node is composed of an optional root node (defaults to 'metrics')
# the specified FQDN "reversed" ('foo.bar.com' becomes 'com.bar.foo') and an
# optional scheme (defaults to 'jstat')
#
# (1) http://docs.oracle.com/javase/8/docs/technotes/tools/share/jps.html
# (2) http://docs.oracle.com/javase/8/docs/technotes/tools/share/jstat.html
#
# OUTPUT:
# Graphite plain-text format (name value timestamp\n)
#
# DEPENDENCIES:
# Python 2.7 (untested on python 3 but should work fine)
# Java 8
#
#
# Released under the same terms as Sensu (the MIT license); see LICENSE
# for details.

# #RED
import logging
import logging.handlers
import optparse
import sys
import time

"""
Python 2.6 support for check_output:
http://stackoverflow.com/questions/4814970/subprocess-check-output-doesnt-seem-to-exist-python-2-6-5
"""
try:
    from subprocess import STDOUT, check_output, CalledProcessError
except ImportError:  # pragma: no cover
    # python 2.6 doesn't include check_output
    # monkey patch it in!
    import subprocess
    STDOUT = subprocess.STDOUT

    def check_output(*popenargs, **kwargs):
        if 'stdout' in kwargs:  # pragma: no cover
            raise ValueError('stdout argument not allowed, '
                             'it will be overridden.')
        process = subprocess.Popen(stdout=subprocess.PIPE,
                                   *popenargs, **kwargs)
        output, _ = process.communicate()
        retcode = process.poll()
        if retcode:
            cmd = kwargs.get("args")
            if cmd is None:
                cmd = popenargs[0]
            raise subprocess.CalledProcessError(retcode, cmd,
                                                output=output)
        return output
    subprocess.check_output = check_output

    # overwrite CalledProcessError due to `output`
    # keyword not being available (in 2.6)
    class CalledProcessError(Exception):

        def __init__(self, returncode, cmd, output=None):
            self.returncode = returncode
            self.cmd = cmd
            self.output = output

        def __str__(self):
            return "Command '%s' returned non-zero exit status %d" % (
                self.cmd, self.returncode)
    subprocess.CalledProcessError = CalledProcessError

class JstatMetricsToGraphiteFormat(object):
  '''Prints jstat metrics to stdout in graphite format

  Shells out to run jstat using the JVM id found via jps (also shelled out) and
  passed argument to print to STDOUT (for use with sensu) the metrics value.
  Jstat column titles are replaced with more explanatory names. Requires to be
  ran as a user that can get the JVM id via jps and run jstat on that JVM'''

  def main(self):
    # Setting up logging to syslog
    try:
      logger = logging.getLogger(__name__)
      logger.setLevel(logging.DEBUG)

      formatter = logging.Formatter("%(pathname)s: %(message)s")

      handler = logging.handlers.SysLogHandler(address = '/dev/log')
      handler.setFormatter(formatter)
      logger.addHandler(handler)
    except Exception:
      # booting is more important than logging
      logging.critical("Failed to configure syslog handler")

    parser = optparse.OptionParser()

    parser.add_option('-g', '--graphite-base',
      default = 'metrics',
      dest    = 'graphite_base',
      help    = 'The base graphite node',
      metavar = 'NODE')

    parser.add_option('-D', '--debug',
      action  = 'store_true',
      default = False,
      dest    = 'debug',
      help    = 'Debug output (NOISY!)')

    parser.add_option('-H', '--host',
      default = None,
      dest    = 'hostname',
      help    = 'The name of the host to run jstat on',
      metavar = 'HOST')

    parser.add_option('-j', '--java-name',
      default = None,
      dest    = 'java_app_name',
      help    = 'The name of the Java app to call jstat on',
      metavar = 'JAVANAME')

    parser.add_option('-s', '--scheme',
      default = 'jstat',
      dest    = 'service',
      help    = 'Metric naming scheme, text to prepend to metric',
      metavar = 'SERVICE')

    (options, args) = parser.parse_args()

    if not options.java_app_name:
      parser.error('A Java app name is required')

    if not options.hostname:
      parser.error('A host name is required')

    # Replace jstat colums titles with more explicit ones
    # Stats coming from -gc
    metric_maps_gc = { "S0U": "survivor_space_0_utilization_KB",
                       "S1U": "survivor_space_1_utilization_KB",
                       "EC": "current_eden_space_capacity_KB",
                       "EU": "eden_space_utilization_KB",
                       "OC": "current_old_space_capacity_KB",
                       "OU": "old_space_utilization_KB",
                       "MC": "metaspace_capacity_KB",
                       "MU": "metacspace_utilization_KB",
                       "CCSC": "compressed_class_space_capacity_KB",
                       "CCSU": "compressed_class_space_used_KB",
                       "YGC": "number_of_young_generation_GC_events",
                       "YGCT": "young_generation_garbage_collection_time",
                       "FGC": "number_of_stop_the_world_events",
                       "FGCT": "full_garbage_collection_time",
                       "GCT": "total_garbage_collection_time"
                       }
    # Stats coming from -gccapacity
    metric_maps_gccapacity = { "NGCMN": "minimum_size_of_new_area",
                               "NGCMX": "maximum_size_of_new_area",
                               "NGC": "current_size_of_new_area",
                               "OGCMN": "minimum_size_of_old_area",
                               "OGCMX": "maximum_size_of_old_area",
                               "OGC": "current_size_of_old_area",
                               "MCMN": "minimum_metaspace_capacity",
                               "MCMX": "maximum_metaspace_capacity",
                               "MC": "metaspace_capacity",
                               "CCSMN": "compressed_class_space_minimum_capacity",
                               "CCSMX": "compressed_class_space_maximum_capacity",
                               "CCSC": "compressed_class_space_capacity"
                               }

    # Stats coming from -gcnew
    metric_maps_gcnew = { "TT" : "tenuring_threshold",
                          "MTT": "maximum_tenuring_threshold",
                          "DSS": "adequate_size_of_survivor"
                           }

    # Stats coming from -compiler
    metric_maps_compiler = {
        "Compiled": "compilation_tasks_performed",
        "Failed": "compilation_tasks_failed",
        "Invalid": "compilation_tasks_invalidated",
        "Time": "time_spent_on_compilation_tasks"
        }

    # Stats coming from -class
    ## Note that since "Bytes" appears twice in jstat -class output we need
    ## to differentiate them by colum number
    metric_maps_class = {
        "Loaded": "loaded_classes",
        "Bytes_column2": "loaded_KB",
        "Unloaded": "unloaded_classes",
        "Bytes_column4": "unloaded_KB",
        "Time": "time_spent_on_class_load_unload"
        }

    def get_jstat_metrics(jstat_option, lvmid, metric_maps):
      '''Runs jstat with provided option on provided host, returns mapped stats'''
      def is_number(s):
        '''returns true if string is a number'''
        try:
          float(s)
          return True
        except ValueError:
          pass
        try:
          import unicodedata
          unicodedata.numeric(s)
          return True
        except (TypeError, ValueError):
          pass
        return False

      # Get stats from jstat stdout
      try :
        jstat_gc_out = check_output(["jstat", jstat_option, lvmid])
      except Exception as e:
        if options.debug:
          print e
          sys.exit(1)
        logger.critical(e)
        sys.exit(1)

      values_all = jstat_gc_out.split("\n")[1].split()
      # Remove non number strings
      values = [ jstat_val for jstat_val in values_all if is_number(jstat_val) ]
      # Transform float strings to integers
      values = map(int, map(float, values))

      # Change stats titles to long names
      titles = jstat_gc_out.split("\n")[0].split()
      # Deal with -class special "double Bytes" output
      if jstat_option == "-class":
       titles[2] = "Bytes_column2"
       titles[4] = "Bytes_column4"
      return dict([(metric_maps[title], values[position]) for position, title in enumerate(titles) if title in metric_maps])

    # Get lvmid (JVM id)
    try :
      jps_out = check_output(["jps", "-v"])
    except Exception as e:
      if options.debug:
        print e
        sys.exit(1)
      logger.critical(e)
      sys.exit(1)

    lvmid = False
    for line in jps_out.split("\n"):
      if options.java_app_name in line:
        lvmid = line.split()[0]

    if not lvmid:
      if options.debug:
        print "Could not get an LVM id"
        sys.exit(1)
      logger.critical("Could not get an LVM id")
      sys.exit(1)

    # Get stats from -gc
    gc_stats = get_jstat_metrics("-gc", lvmid, metric_maps_gc)
    if options.debug:
      print gc_stats
    # Get stats from -gccapacity
    gccapacity_stats = get_jstat_metrics("-gccapacity",
        lvmid, metric_maps_gccapacity)
    if options.debug:
      print gccapacity_stats
    # Get stats from -gcnew
    gcnew_stats = get_jstat_metrics("-gcnew", lvmid, metric_maps_gcnew)
    if options.debug:
      print gccapacity_stats

    # Put all GC related stats to the same dict
    gc_stats.update(gccapacity_stats)
    gc_stats.update(gcnew_stats)

    # Get stats from -compiler
    compiler_stats = get_jstat_metrics("-compiler", lvmid, metric_maps_compiler)
    if options.debug:
      print compiler_stats

    # Get stats from -class
    class_stats = get_jstat_metrics("-class", lvmid, metric_maps_class)
    if options.debug:
      print class_stats

    # Print to stdout in graphite format
    now = time.time()
    graphite_base = '.'.join([options.graphite_base,
        '.'.join(reversed(options.hostname.split('.')))])

    for metric in gc_stats:
      print "%s.%s.jvm.gc.%s %s %d" % (graphite_base, options.service, metric,
          gc_stats[metric], now)

    for metric in compiler_stats:
      print "%s.%s.jvm.compiler.%s %s %d" % (graphite_base, options.service,
          metric, compiler_stats[metric], now)

    for metric in class_stats:
      print "%s.%s.jvm.class.%s %s %d" % (graphite_base, options.service,
          metric, class_stats[metric], now)

    sys.exit(0)

if '__main__' == __name__:
  JstatMetricsToGraphiteFormat().main()