dotcloud/docker

View on GitHub
daemon/metrics.go

Summary

Maintainability
A
35 mins
Test Coverage
package daemon // import "github.com/docker/docker/daemon"

import (
    "context"
    "sync"

    "github.com/containerd/log"
    "github.com/docker/docker/errdefs"
    "github.com/docker/docker/pkg/plugingetter"
    "github.com/docker/docker/pkg/plugins"
    metrics "github.com/docker/go-metrics"
    "github.com/pkg/errors"
    "github.com/prometheus/client_golang/prometheus"
)

const metricsPluginType = "MetricsCollector"

var (
    metricsNS = metrics.NewNamespace("engine", "daemon", nil)

    containerActions  = metricsNS.NewLabeledTimer("container_actions", "The number of seconds it takes to process each container action", "action")
    networkActions    = metricsNS.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
    hostInfoFunctions = metricsNS.NewLabeledTimer("host_info_functions", "The number of seconds it takes to call functions gathering info about the host", "function")

    engineInfo = metricsNS.NewLabeledGauge("engine", "The information related to the engine and the OS it is running on", metrics.Unit("info"),
        "version",
        "commit",
        "architecture",
        "graphdriver",
        "kernel",
        "os",
        "os_type",
        "os_version",
        "daemon_id", // ID is a randomly generated unique identifier (e.g. UUID4)
    )
    engineCpus   = metricsNS.NewGauge("engine_cpus", "The number of cpus that the host system of the engine has", metrics.Unit("cpus"))
    engineMemory = metricsNS.NewGauge("engine_memory", "The number of bytes of memory that the host system of the engine has", metrics.Bytes)

    healthChecksCounter       = metricsNS.NewCounter("health_checks", "The total number of health checks")
    healthChecksFailedCounter = metricsNS.NewCounter("health_checks_failed", "The total number of failed health checks")
    healthCheckStartDuration  = metricsNS.NewTimer("health_check_start_duration", "The number of seconds it takes to prepare to run health checks")

    stateCtr = newStateCounter(metricsNS, metricsNS.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state"))
)

func init() {
    for _, a := range []string{
        "start",
        "changes",
        "commit",
        "create",
        "delete",
    } {
        containerActions.WithValues(a).Update(0)
    }

    metrics.Register(metricsNS)
}

type stateCounter struct {
    mu     sync.RWMutex
    states map[string]string
    desc   *prometheus.Desc
}

func newStateCounter(ns *metrics.Namespace, desc *prometheus.Desc) *stateCounter {
    c := &stateCounter{
        states: make(map[string]string),
        desc:   desc,
    }
    ns.Add(c)
    return c
}

func (ctr *stateCounter) get() (running int, paused int, stopped int) {
    ctr.mu.RLock()
    defer ctr.mu.RUnlock()

    states := map[string]int{
        "running": 0,
        "paused":  0,
        "stopped": 0,
    }
    for _, state := range ctr.states {
        states[state]++
    }
    return states["running"], states["paused"], states["stopped"]
}

func (ctr *stateCounter) set(id, label string) {
    ctr.mu.Lock()
    ctr.states[id] = label
    ctr.mu.Unlock()
}

func (ctr *stateCounter) del(id string) {
    ctr.mu.Lock()
    delete(ctr.states, id)
    ctr.mu.Unlock()
}

func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) {
    ch <- ctr.desc
}

func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
    running, paused, stopped := ctr.get()
    ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running")
    ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
    ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
}

func (daemon *Daemon) cleanupMetricsPlugins() {
    ls := daemon.PluginStore.GetAllManagedPluginsByCap(metricsPluginType)
    var wg sync.WaitGroup
    wg.Add(len(ls))

    for _, plugin := range ls {
        p := plugin
        go func() {
            defer wg.Done()

            adapter, err := makePluginAdapter(p)
            if err != nil {
                log.G(context.TODO()).WithError(err).WithField("plugin", p.Name()).Error("Error creating metrics plugin adapter")
                return
            }
            if err := adapter.StopMetrics(); err != nil {
                log.G(context.TODO()).WithError(err).WithField("plugin", p.Name()).Error("Error stopping plugin metrics collection")
            }
        }()
    }
    wg.Wait()

    if daemon.metricsPluginListener != nil {
        daemon.metricsPluginListener.Close()
    }
}

type metricsPlugin interface {
    StartMetrics() error
    StopMetrics() error
}

func makePluginAdapter(p plugingetter.CompatPlugin) (metricsPlugin, error) {
    if pc, ok := p.(plugingetter.PluginWithV1Client); ok {
        return &metricsPluginAdapter{pc.Client(), p.Name()}, nil
    }

    pa, ok := p.(plugingetter.PluginAddr)
    if !ok {
        return nil, errdefs.System(errors.Errorf("got unknown plugin type %T", p))
    }

    if pa.Protocol() != plugins.ProtocolSchemeHTTPV1 {
        return nil, errors.Errorf("plugin protocol not supported: %s", pa.Protocol())
    }

    addr := pa.Addr()
    client, err := plugins.NewClientWithTimeout(addr.Network()+"://"+addr.String(), nil, pa.Timeout())
    if err != nil {
        return nil, errors.Wrap(err, "error creating metrics plugin client")
    }
    return &metricsPluginAdapter{client, p.Name()}, nil
}

type metricsPluginAdapter struct {
    c    *plugins.Client
    name string
}

func (a *metricsPluginAdapter) StartMetrics() error {
    type metricsPluginResponse struct {
        Err string
    }
    var res metricsPluginResponse
    if err := a.c.Call(metricsPluginType+".StartMetrics", nil, &res); err != nil {
        return errors.Wrap(err, "could not start metrics plugin")
    }
    if res.Err != "" {
        return errors.New(res.Err)
    }
    return nil
}

func (a *metricsPluginAdapter) StopMetrics() error {
    if err := a.c.Call(metricsPluginType+".StopMetrics", nil, nil); err != nil {
        return errors.Wrap(err, "error stopping metrics collector")
    }
    return nil
}