daemon/monitor.go
package daemon // import "github.com/docker/docker/daemon"
import (
"context"
"strconv"
"time"
"github.com/containerd/log"
"github.com/docker/docker/api/types/backend"
"github.com/docker/docker/api/types/events"
"github.com/docker/docker/container"
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/errdefs"
libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
"github.com/docker/docker/restartmanager"
"github.com/pkg/errors"
)
func (daemon *Daemon) setStateCounter(c *container.Container) {
switch c.StateString() {
case "paused":
stateCtr.set(c.ID, "paused")
case "running":
stateCtr.set(c.ID, "running")
default:
stateCtr.set(c.ID, "stopped")
}
}
func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error {
var exitStatus container.ExitStatus
c.Lock()
cfg := daemon.config()
// Health checks will be automatically restarted if/when the
// container is started again.
daemon.stopHealthchecks(c)
tsk, ok := c.Task()
if ok {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
es, err := tsk.Delete(ctx)
cancel()
if err != nil {
log.G(ctx).WithFields(log.Fields{
"error": err,
"container": c.ID,
}).Warn("failed to delete container from containerd")
} else {
exitStatus = container.ExitStatus{
ExitCode: int(es.ExitCode()),
ExitedAt: es.ExitTime(),
}
}
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
c.StreamConfig.Wait(ctx)
cancel()
c.Reset(false)
if e != nil {
exitStatus.ExitCode = int(e.ExitCode)
exitStatus.ExitedAt = e.ExitedAt
if e.Error != nil {
c.SetError(e.Error)
}
}
daemonShutdown := daemon.IsShuttingDown()
execDuration := time.Since(c.StartedAt)
restart, wait, err := c.RestartManager().ShouldRestart(uint32(exitStatus.ExitCode), daemonShutdown || c.HasBeenManuallyStopped, execDuration)
if err != nil {
log.G(ctx).WithFields(log.Fields{
"error": err,
"container": c.ID,
"restartCount": c.RestartCount,
"exitStatus": exitStatus,
"daemonShuttingDown": daemonShutdown,
"hasBeenManuallyStopped": c.HasBeenManuallyStopped,
"execDuration": execDuration,
}).Warn("ShouldRestart failed, container will not be restarted")
restart = false
}
attributes := map[string]string{
"exitCode": strconv.Itoa(exitStatus.ExitCode),
"execDuration": strconv.Itoa(int(execDuration.Seconds())),
}
daemon.Cleanup(context.TODO(), c)
if restart {
c.RestartCount++
log.G(ctx).WithFields(log.Fields{
"container": c.ID,
"restartCount": c.RestartCount,
"exitStatus": exitStatus,
"manualRestart": c.HasBeenManuallyRestarted,
}).Debug("Restarting container")
c.SetRestarting(&exitStatus)
} else {
c.SetStopped(&exitStatus)
if !c.HasBeenManuallyRestarted {
defer daemon.autoRemove(&cfg.Config, c)
}
}
defer c.Unlock() // needs to be called before autoRemove
daemon.setStateCounter(c)
checkpointErr := c.CheckpointTo(context.TODO(), daemon.containersReplica)
daemon.LogContainerEventWithAttributes(c, events.ActionDie, attributes)
if restart {
go func() {
err := <-wait
if err == nil {
// daemon.netController is initialized when daemon is restoring containers.
// But containerStart will use daemon.netController segment.
// So to avoid panic at startup process, here must wait util daemon restore done.
daemon.waitForStartupDone()
cfg := daemon.config() // Apply the most up-to-date daemon config to the restarted container.
// update the error if we fail to start the container, so that the cleanup code
// below can handle updating the container's status, and auto-remove (if set).
err = daemon.containerStart(context.Background(), cfg, c, "", "", false)
if err != nil {
log.G(ctx).Debugf("failed to restart container: %+v", err)
}
}
if err != nil {
c.Lock()
c.SetStopped(&exitStatus)
daemon.setStateCounter(c)
c.CheckpointTo(context.TODO(), daemon.containersReplica)
c.Unlock()
defer daemon.autoRemove(&cfg.Config, c)
if err != restartmanager.ErrRestartCanceled {
log.G(ctx).Errorf("restartmanger wait error: %+v", err)
}
}
}()
}
return checkpointErr
}
// ProcessEvent is called by libcontainerd whenever an event occurs
func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error {
c, err := daemon.GetContainer(id)
if err != nil {
return errors.Wrapf(err, "could not find container %s", id)
}
switch e {
case libcontainerdtypes.EventOOM:
// StateOOM is Linux specific and should never be hit on Windows
if isWindows {
return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
}
c.Lock()
defer c.Unlock()
c.OOMKilled = true
daemon.updateHealthMonitor(c)
if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
return err
}
daemon.LogContainerEvent(c, events.ActionOOM)
case libcontainerdtypes.EventExit:
if ei.ProcessID == ei.ContainerID {
return daemon.handleContainerExit(c, &ei)
}
exitCode := 127
if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil {
ec := int(ei.ExitCode)
execConfig.Lock()
defer execConfig.Unlock()
// Remove the exec command from the container's store only and not the
// daemon's store so that the exec command can be inspected. Remove it
// before mutating execConfig to maintain the invariant that
// c.ExecCommands only contains execs that have not exited.
c.ExecCommands.Delete(execConfig.ID)
execConfig.ExitCode = &ec
execConfig.Running = false
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
execConfig.StreamConfig.Wait(ctx)
cancel()
if err := execConfig.CloseStreams(); err != nil {
log.G(ctx).Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
}
exitCode = ec
// If the exec failed at start in such a way that containerd
// publishes an exit event for it, we will race processing the event
// with daemon.ContainerExecStart() removing the exec from
// c.ExecCommands. If we win the race, we will find that there is no
// process to clean up. (And ContainerExecStart will clobber the
// exit code we set.) Prevent a nil-dereference panic in that
// situation to restore the status quo where this is merely a
// logical race condition.
if execConfig.Process != nil {
go func() {
if _, err := execConfig.Process.Delete(context.Background()); err != nil {
log.G(ctx).WithFields(log.Fields{
"error": err,
"container": ei.ContainerID,
"process": ei.ProcessID,
}).Warn("failed to delete process")
}
}()
}
}
daemon.LogContainerEventWithAttributes(c, events.ActionExecDie, map[string]string{
"execID": ei.ProcessID,
"exitCode": strconv.Itoa(exitCode),
})
case libcontainerdtypes.EventStart:
c.Lock()
defer c.Unlock()
// This is here to handle start not generated by docker
if !c.Running {
ctr, err := daemon.containerd.LoadContainer(context.Background(), c.ID)
if err != nil {
if errdefs.IsNotFound(err) {
// The container was started by not-docker and so could have been deleted by
// not-docker before we got around to loading it from containerd.
log.G(context.TODO()).WithFields(log.Fields{
"error": err,
"container": c.ID,
}).Debug("could not load containerd container for start event")
return nil
}
return err
}
tsk, err := ctr.Task(context.Background())
if err != nil {
if errdefs.IsNotFound(err) {
log.G(context.TODO()).WithFields(log.Fields{
"error": err,
"container": c.ID,
}).Debug("failed to load task for externally-started container")
return nil
}
return err
}
c.SetRunningExternal(ctr, tsk)
c.HasBeenManuallyStopped = false
c.HasBeenStartedBefore = true
daemon.setStateCounter(c)
daemon.initHealthMonitor(c)
if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
return err
}
daemon.LogContainerEvent(c, events.ActionStart)
}
case libcontainerdtypes.EventPaused:
c.Lock()
defer c.Unlock()
if !c.Paused {
c.Paused = true
daemon.setStateCounter(c)
daemon.updateHealthMonitor(c)
if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
return err
}
daemon.LogContainerEvent(c, events.ActionPause)
}
case libcontainerdtypes.EventResumed:
c.Lock()
defer c.Unlock()
if c.Paused {
c.Paused = false
daemon.setStateCounter(c)
daemon.updateHealthMonitor(c)
if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
return err
}
daemon.LogContainerEvent(c, events.ActionUnPause)
}
}
return nil
}
func (daemon *Daemon) autoRemove(cfg *config.Config, c *container.Container) {
c.Lock()
ar := c.HostConfig.AutoRemove
c.Unlock()
if !ar {
return
}
err := daemon.containerRm(cfg, c.ID, &backend.ContainerRmConfig{ForceRemove: true, RemoveVolume: true})
if err == nil {
return
}
if c := daemon.containers.Get(c.ID); c == nil {
return
}
log.G(context.TODO()).WithFields(log.Fields{"error": err, "container": c.ID}).Error("error removing container")
}