dotcloud/docker

View on GitHub
daemon/monitor.go

Summary

Maintainability
D
1 day
Test Coverage
package daemon // import "github.com/docker/docker/daemon"

import (
    "context"
    "strconv"
    "time"

    "github.com/containerd/log"
    "github.com/docker/docker/api/types/backend"
    "github.com/docker/docker/api/types/events"
    "github.com/docker/docker/container"
    "github.com/docker/docker/daemon/config"
    "github.com/docker/docker/errdefs"
    libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
    "github.com/docker/docker/restartmanager"
    "github.com/pkg/errors"
)

func (daemon *Daemon) setStateCounter(c *container.Container) {
    switch c.StateString() {
    case "paused":
        stateCtr.set(c.ID, "paused")
    case "running":
        stateCtr.set(c.ID, "running")
    default:
        stateCtr.set(c.ID, "stopped")
    }
}

func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error {
    var exitStatus container.ExitStatus
    c.Lock()

    cfg := daemon.config()

    // Health checks will be automatically restarted if/when the
    // container is started again.
    daemon.stopHealthchecks(c)

    tsk, ok := c.Task()
    if ok {
        ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
        es, err := tsk.Delete(ctx)
        cancel()
        if err != nil {
            log.G(ctx).WithFields(log.Fields{
                "error":     err,
                "container": c.ID,
            }).Warn("failed to delete container from containerd")
        } else {
            exitStatus = container.ExitStatus{
                ExitCode: int(es.ExitCode()),
                ExitedAt: es.ExitTime(),
            }
        }
    }

    ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
    c.StreamConfig.Wait(ctx)
    cancel()

    c.Reset(false)

    if e != nil {
        exitStatus.ExitCode = int(e.ExitCode)
        exitStatus.ExitedAt = e.ExitedAt
        if e.Error != nil {
            c.SetError(e.Error)
        }
    }

    daemonShutdown := daemon.IsShuttingDown()
    execDuration := time.Since(c.StartedAt)
    restart, wait, err := c.RestartManager().ShouldRestart(uint32(exitStatus.ExitCode), daemonShutdown || c.HasBeenManuallyStopped, execDuration)
    if err != nil {
        log.G(ctx).WithFields(log.Fields{
            "error":                  err,
            "container":              c.ID,
            "restartCount":           c.RestartCount,
            "exitStatus":             exitStatus,
            "daemonShuttingDown":     daemonShutdown,
            "hasBeenManuallyStopped": c.HasBeenManuallyStopped,
            "execDuration":           execDuration,
        }).Warn("ShouldRestart failed, container will not be restarted")
        restart = false
    }

    attributes := map[string]string{
        "exitCode":     strconv.Itoa(exitStatus.ExitCode),
        "execDuration": strconv.Itoa(int(execDuration.Seconds())),
    }
    daemon.Cleanup(context.TODO(), c)

    if restart {
        c.RestartCount++
        log.G(ctx).WithFields(log.Fields{
            "container":     c.ID,
            "restartCount":  c.RestartCount,
            "exitStatus":    exitStatus,
            "manualRestart": c.HasBeenManuallyRestarted,
        }).Debug("Restarting container")
        c.SetRestarting(&exitStatus)
    } else {
        c.SetStopped(&exitStatus)
        if !c.HasBeenManuallyRestarted {
            defer daemon.autoRemove(&cfg.Config, c)
        }
    }
    defer c.Unlock() // needs to be called before autoRemove

    daemon.setStateCounter(c)
    checkpointErr := c.CheckpointTo(context.TODO(), daemon.containersReplica)

    daemon.LogContainerEventWithAttributes(c, events.ActionDie, attributes)

    if restart {
        go func() {
            err := <-wait
            if err == nil {
                // daemon.netController is initialized when daemon is restoring containers.
                // But containerStart will use daemon.netController segment.
                // So to avoid panic at startup process, here must wait util daemon restore done.
                daemon.waitForStartupDone()
                cfg := daemon.config() // Apply the most up-to-date daemon config to the restarted container.

                // update the error if we fail to start the container, so that the cleanup code
                // below can handle updating the container's status, and auto-remove (if set).
                err = daemon.containerStart(context.Background(), cfg, c, "", "", false)
                if err != nil {
                    log.G(ctx).Debugf("failed to restart container: %+v", err)
                }
            }
            if err != nil {
                c.Lock()
                c.SetStopped(&exitStatus)
                daemon.setStateCounter(c)
                c.CheckpointTo(context.TODO(), daemon.containersReplica)
                c.Unlock()
                defer daemon.autoRemove(&cfg.Config, c)
                if err != restartmanager.ErrRestartCanceled {
                    log.G(ctx).Errorf("restartmanger wait error: %+v", err)
                }
            }
        }()
    }

    return checkpointErr
}

// ProcessEvent is called by libcontainerd whenever an event occurs
func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error {
    c, err := daemon.GetContainer(id)
    if err != nil {
        return errors.Wrapf(err, "could not find container %s", id)
    }

    switch e {
    case libcontainerdtypes.EventOOM:
        // StateOOM is Linux specific and should never be hit on Windows
        if isWindows {
            return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
        }

        c.Lock()
        defer c.Unlock()
        c.OOMKilled = true
        daemon.updateHealthMonitor(c)
        if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
            return err
        }

        daemon.LogContainerEvent(c, events.ActionOOM)
    case libcontainerdtypes.EventExit:
        if ei.ProcessID == ei.ContainerID {
            return daemon.handleContainerExit(c, &ei)
        }

        exitCode := 127
        if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil {
            ec := int(ei.ExitCode)
            execConfig.Lock()
            defer execConfig.Unlock()

            // Remove the exec command from the container's store only and not the
            // daemon's store so that the exec command can be inspected. Remove it
            // before mutating execConfig to maintain the invariant that
            // c.ExecCommands only contains execs that have not exited.
            c.ExecCommands.Delete(execConfig.ID)

            execConfig.ExitCode = &ec
            execConfig.Running = false

            ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
            execConfig.StreamConfig.Wait(ctx)
            cancel()

            if err := execConfig.CloseStreams(); err != nil {
                log.G(ctx).Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
            }

            exitCode = ec

            // If the exec failed at start in such a way that containerd
            // publishes an exit event for it, we will race processing the event
            // with daemon.ContainerExecStart() removing the exec from
            // c.ExecCommands. If we win the race, we will find that there is no
            // process to clean up. (And ContainerExecStart will clobber the
            // exit code we set.) Prevent a nil-dereferenc panic in that
            // situation to restore the status quo where this is merely a
            // logical race condition.
            if execConfig.Process != nil {
                go func() {
                    if _, err := execConfig.Process.Delete(context.Background()); err != nil {
                        log.G(ctx).WithFields(log.Fields{
                            "error":     err,
                            "container": ei.ContainerID,
                            "process":   ei.ProcessID,
                        }).Warn("failed to delete process")
                    }
                }()
            }
        }
        daemon.LogContainerEventWithAttributes(c, events.ActionExecDie, map[string]string{
            "execID":   ei.ProcessID,
            "exitCode": strconv.Itoa(exitCode),
        })
    case libcontainerdtypes.EventStart:
        c.Lock()
        defer c.Unlock()

        // This is here to handle start not generated by docker
        if !c.Running {
            ctr, err := daemon.containerd.LoadContainer(context.Background(), c.ID)
            if err != nil {
                if errdefs.IsNotFound(err) {
                    // The container was started by not-docker and so could have been deleted by
                    // not-docker before we got around to loading it from containerd.
                    log.G(context.TODO()).WithFields(log.Fields{
                        "error":     err,
                        "container": c.ID,
                    }).Debug("could not load containerd container for start event")
                    return nil
                }
                return err
            }
            tsk, err := ctr.Task(context.Background())
            if err != nil {
                if errdefs.IsNotFound(err) {
                    log.G(context.TODO()).WithFields(log.Fields{
                        "error":     err,
                        "container": c.ID,
                    }).Debug("failed to load task for externally-started container")
                    return nil
                }
                return err
            }
            c.SetRunningExternal(ctr, tsk)
            c.HasBeenManuallyStopped = false
            c.HasBeenStartedBefore = true
            daemon.setStateCounter(c)

            daemon.initHealthMonitor(c)

            if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
                return err
            }
            daemon.LogContainerEvent(c, events.ActionStart)
        }

    case libcontainerdtypes.EventPaused:
        c.Lock()
        defer c.Unlock()

        if !c.Paused {
            c.Paused = true
            daemon.setStateCounter(c)
            daemon.updateHealthMonitor(c)
            if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
                return err
            }
            daemon.LogContainerEvent(c, events.ActionPause)
        }
    case libcontainerdtypes.EventResumed:
        c.Lock()
        defer c.Unlock()

        if c.Paused {
            c.Paused = false
            daemon.setStateCounter(c)
            daemon.updateHealthMonitor(c)

            if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
                return err
            }
            daemon.LogContainerEvent(c, events.ActionUnPause)
        }
    }
    return nil
}

func (daemon *Daemon) autoRemove(cfg *config.Config, c *container.Container) {
    c.Lock()
    ar := c.HostConfig.AutoRemove
    c.Unlock()
    if !ar {
        return
    }

    err := daemon.containerRm(cfg, c.ID, &backend.ContainerRmConfig{ForceRemove: true, RemoveVolume: true})
    if err == nil {
        return
    }
    if c := daemon.containers.Get(c.ID); c == nil {
        return
    }

    log.G(context.TODO()).WithFields(log.Fields{"error": err, "container": c.ID}).Error("error removing container")
}