thenetcircle/dino

View on GitHub
bin/statsd-agent.py

Summary

Maintainability
A
3 hrs
Test Coverage
import statsd
import time
import psutil
import multiprocessing
import socket
import os
import subprocess
import pyudev

STATSD_HOST='10.20.2.108'
PREFIX='%s.' % socket.gethostname()
GRANULARITY = 10  # seconds
PATHS = [('/', 'root'), ('/data', 'data')]


def smart():
    c = statsd.StatsClient(STATSD_HOST, 8125, prefix=PREFIX + 'system.smart')
    context = pyudev.Context()
    devices = list()

    for device in context.list_devices(MAJOR='8'):
        if device.device_type == 'disk':
            devices.append(device.device_node)

    while True:
        for device in devices:
            try:
                process = subprocess.Popen([
                    'sudo', 'smartctl', '--attributes',
                    '-d', 'sat+megaraid,0', device
                ], stdout=subprocess.PIPE)

                out, _ = process.communicate()
                out = str(out, 'utf-8').strip()
                out = out.split('\n')
                out = out[7:-2]

                for line in out:
                    try:
                        line = line.split()[0:10]
                        key = '{}.{}_{}'.format(device.split('/')[-1], line[1], line[0])
                        value = int(float(line[-1]))
                        c.gauge(key, value)
                    except Exception as inner_e:
                        print('could not parse line: {}\n{}'.format(str(inner_e), line))
            except Exception as e:
                print('error: %s' % str(e))
        time.sleep(GRANULARITY * 6)


def connections():
    c = statsd.StatsClient(STATSD_HOST, 8125, prefix=PREFIX + 'system.network')
    to_check = [
        ('conn_established', 'count_established_conn.sh'),
        ('pkts_collapsed', 'count_pkts_collapsed.sh'),
        ('pkts_pruned', 'count_pkts_pruned.sh'),
        ('pkts_pruned_overrun', 'count_pkts_pruned_overrun.sh'),
        ('syn_recv', 'count_syn_recv.sh'),
        ('tcp_in_errs', 'count_in_errs_snmp.sh'),
        ('rx_discards', 'count_rx_discards.sh'),
        ('pkts_recv_errs', 'count_pkts_recv_errors.sh'),
        ('pkts_recv_buff_errs', 'count_pkts_recv_buffer_errors.sh'),
    ]

    while True:
        for key, script in to_check:
            try:
                process = subprocess.Popen([script], stdout=subprocess.PIPE)
                out, _ = process.communicate()
                try:
                    the_count = int(float(str(out, 'utf-8').strip()))
                except:
                    the_count = 0
                c.gauge(key, the_count)
            except Exception as e:
                print('error: %s' % str(e))
        time.sleep(GRANULARITY)


def disk():
    c = statsd.StatsClient(STATSD_HOST, 8125, prefix=PREFIX + 'system.disk')
    while True:
        for path, label in PATHS:
            disk_usage = psutil.disk_usage(path)

            st = os.statvfs(path)
            total_inode = st.f_files
            free_inode = st.f_ffree
            inode_percentage = int(100*(float(total_inode - free_inode) / total_inode))

            c.gauge('%s.inodes.percent' % label, inode_percentage)
            c.gauge('%s.total' % label, disk_usage.total)
            c.gauge('%s.used' % label, disk_usage.used)
            c.gauge('%s.free' % label, disk_usage.free)
            c.gauge('%s.percent' % label, disk_usage.percent)
        time.sleep(GRANULARITY)


def network():
    c = statsd.StatsClient(STATSD_HOST, 8125, prefix=PREFIX + 'system.network')
    t0 = time.time()
    counters = psutil.net_io_counters(pernic=True)

    last_totals = dict()
    totals = dict()
    interfaces = set([key for key in counters.keys() if key != 'lo'])
    for interface in interfaces:
        totals[interface] = (counters[interface].bytes_sent, counters[interface].bytes_recv)
        last_totals[interface] = (counters[interface].bytes_sent, counters[interface].bytes_recv)

    while True:
        for interface in interfaces:
            counter = psutil.net_io_counters(pernic=True)[interface]
            t1 = time.time()
            totals[interface] = (counter.bytes_sent, counter.bytes_recv)

            ul, dl = [(now - last) / (t1 - t0) / 1000.0
                      for now, last in zip(totals[interface], last_totals[interface])]

            t0 = time.time()
            c.gauge('%s.upload.kbps' % interface, ul)
            c.gauge('%s.download.kbps' % interface, dl)
            last_totals[interface] = totals[interface]

        time.sleep(GRANULARITY)


def cpu_times():
    c = statsd.StatsClient(STATSD_HOST, 8125, prefix=PREFIX + 'system.cpu')
    while True:
        cpu_t = psutil.cpu_times()
        c.gauge('system_wide.times.user', cpu_t.user)
        c.gauge('system_wide.times.nice', cpu_t.nice)
        c.gauge('system_wide.times.system', cpu_t.system)
        c.gauge('system_wide.times.idle', cpu_t.idle)
        c.gauge('system_wide.times.iowait', cpu_t.iowait)
        c.gauge('system_wide.times.irq', cpu_t.irq)
        c.gauge('system_wide.times.softirq', cpu_t.softirq)
        c.gauge('system_wide.times.steal', cpu_t.steal)
        c.gauge('system_wide.times.guest', cpu_t.guest)
        c.gauge('system_wide.times.guest_nice', cpu_t.guest_nice)
        time.sleep(GRANULARITY)


def cpu_times_percent():
    c = statsd.StatsClient(STATSD_HOST, 8125, prefix=PREFIX + 'system.cpu')
    while True:
        value = psutil.cpu_percent(interval=1)
        c.gauge('system_wide.percent', value)

        cpu_t_percent = psutil.cpu_times_percent(interval=1)
        c.gauge('system_wide.times_percent.user', cpu_t_percent.user)
        c.gauge('system_wide.times_percent.nice', cpu_t_percent.nice)
        c.gauge('system_wide.times_percent.system', cpu_t_percent.system)
        c.gauge('system_wide.times_percent.idle', cpu_t_percent.idle)
        c.gauge('system_wide.times_percent.iowait', cpu_t_percent.iowait)
        c.gauge('system_wide.times_percent.irq', cpu_t_percent.irq)
        c.gauge('system_wide.times_percent.softirq', cpu_t_percent.softirq)
        c.gauge('system_wide.times_percent.steal', cpu_t_percent.steal)
        c.gauge('system_wide.times_percent.guest', cpu_t_percent.guest)
        c.gauge('system_wide.times_percent.guest_nice', cpu_t_percent.guest_nice)
        time.sleep(GRANULARITY)


def memory():
    c = statsd.StatsClient(STATSD_HOST, 8125, prefix=PREFIX + 'system.memory')
    while True:
        swap = psutil.swap_memory()
        c.gauge('swap.total', swap.total)
        c.gauge('swap.used', swap.used)
        c.gauge('swap.free', swap.free)
        c.gauge('swap.percent', swap.percent)

        virtual = psutil.virtual_memory()
        c.gauge('virtual.total', virtual.total)
        c.gauge('virtual.available', virtual.available)
        c.gauge('virtual.used', virtual.used)
        c.gauge('virtual.free', virtual.free)
        c.gauge('virtual.percent', virtual.percent)
        c.gauge('virtual.active', virtual.active)
        c.gauge('virtual.inactive', virtual.inactive)
        c.gauge('virtual.buffers', virtual.buffers)
        c.gauge('virtual.cached', virtual.cached)
        time.sleep(GRANULARITY)


if __name__ == '__main__':
    multiprocessing.Process(target=disk).start()
    multiprocessing.Process(target=cpu_times).start()
    multiprocessing.Process(target=cpu_times_percent).start()
    multiprocessing.Process(target=memory).start()
    multiprocessing.Process(target=smart).start()
    multiprocessing.Process(target=network).start()