dotcloud/docker

View on GitHub
libnetwork/sandbox.go

Summary

Maintainability
D
1 day
Test Coverage
package libnetwork

import (
    "context"
    "encoding/json"
    "fmt"
    "net"
    "sort"
    "strings"
    "sync"

    "github.com/containerd/log"
    "github.com/docker/docker/libnetwork/etchosts"
    "github.com/docker/docker/libnetwork/osl"
    "github.com/docker/docker/libnetwork/types"
    "go.opentelemetry.io/otel"
    "go.opentelemetry.io/otel/attribute"
    "go.opentelemetry.io/otel/trace"
)

// SandboxOption is an option setter function type used to pass various options to
// NewNetContainer method. The various setter functions of type SandboxOption are
// provided by libnetwork, they look like ContainerOptionXXXX(...)
type SandboxOption func(sb *Sandbox)

func (sb *Sandbox) processOptions(options ...SandboxOption) {
    for _, opt := range options {
        if opt != nil {
            opt(sb)
        }
    }
}

// Sandbox provides the control over the network container entity.
// It is a one to one mapping with the container.
type Sandbox struct {
    id                 string
    containerID        string
    config             containerConfig
    extDNS             []extDNSEntry
    osSbox             *osl.Namespace
    controller         *Controller
    resolver           *Resolver
    resolverOnce       sync.Once
    endpoints          []*Endpoint
    epPriority         map[string]int
    populatedEndpoints map[string]struct{}
    joinLeaveDone      chan struct{}
    dbIndex            uint64
    dbExists           bool
    isStub             bool
    inDelete           bool
    ingress            bool
    ndotsSet           bool
    oslTypes           []osl.SandboxType // slice of properties of this sandbox
    loadBalancerNID    string            // NID that this SB is a load balancer for
    mu                 sync.Mutex
    // This mutex is used to serialize service related operation for an endpoint
    // The lock is here because the endpoint is saved into the store so is not unique
    service sync.Mutex
}

// These are the container configs used to customize container /etc/hosts file.
type hostsPathConfig struct {
    hostName        string
    domainName      string
    hostsPath       string
    originHostsPath string
    extraHosts      []extraHost
    parentUpdates   []parentUpdate
}

type parentUpdate struct {
    cid  string
    name string
    ip   string
}

type extraHost struct {
    name string
    IP   string
}

// These are the container configs used to customize container /etc/resolv.conf file.
type resolvConfPathConfig struct {
    resolvConfPath       string
    originResolvConfPath string
    resolvConfHashFile   string
    dnsList              []string
    dnsSearchList        []string
    dnsOptionsList       []string
}

type containerConfig struct {
    containerConfigOS //nolint:nolintlint,unused // only populated on windows
    hostsPathConfig
    resolvConfPathConfig
    generic           map[string]interface{}
    useDefaultSandBox bool
    useExternalKey    bool
    exposedPorts      []types.TransportPort
}

// ID returns the ID of the sandbox.
func (sb *Sandbox) ID() string {
    return sb.id
}

// ContainerID returns the container id associated to this sandbox.
func (sb *Sandbox) ContainerID() string {
    return sb.containerID
}

// Key returns the sandbox's key.
func (sb *Sandbox) Key() string {
    if sb.config.useDefaultSandBox {
        return osl.GenerateKey("default")
    }
    return osl.GenerateKey(sb.id)
}

// Labels returns the sandbox's labels.
func (sb *Sandbox) Labels() map[string]interface{} {
    sb.mu.Lock()
    defer sb.mu.Unlock()
    opts := make(map[string]interface{}, len(sb.config.generic))
    for k, v := range sb.config.generic {
        opts[k] = v
    }
    return opts
}

// Delete destroys this container after detaching it from all connected endpoints.
func (sb *Sandbox) Delete(ctx context.Context) error {
    return sb.delete(ctx, false)
}

func (sb *Sandbox) delete(ctx context.Context, force bool) error {
    sb.mu.Lock()
    if sb.inDelete {
        sb.mu.Unlock()
        return types.ForbiddenErrorf("another sandbox delete in progress")
    }
    // Set the inDelete flag. This will ensure that we don't
    // update the store until we have completed all the endpoint
    // leaves and deletes. And when endpoint leaves and deletes
    // are completed then we can finally delete the sandbox object
    // altogether from the data store. If the daemon exits
    // ungracefully in the middle of a sandbox delete this way we
    // will have all the references to the endpoints in the
    // sandbox so that we can clean them up when we restart
    sb.inDelete = true
    sb.mu.Unlock()

    c := sb.controller

    // Detach from all endpoints
    retain := false
    for _, ep := range sb.Endpoints() {
        // gw network endpoint detach and removal are automatic
        if ep.endpointInGWNetwork() && !force {
            continue
        }
        // Retain the sanbdox if we can't obtain the network from store.
        if _, err := c.getNetworkFromStore(ep.getNetwork().ID()); err != nil {
            if !c.isSwarmNode() {
                retain = true
            }
            log.G(ctx).Warnf("Failed getting network for ep %s during sandbox %s delete: %v", ep.ID(), sb.ID(), err)
            continue
        }

        if !force {
            if err := ep.Leave(context.WithoutCancel(ctx), sb); err != nil {
                log.G(ctx).Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err)
            }
        }

        if err := ep.Delete(context.WithoutCancel(ctx), force); err != nil {
            log.G(ctx).Warnf("Failed deleting endpoint %s: %v\n", ep.ID(), err)
        }
    }

    if retain {
        sb.mu.Lock()
        sb.inDelete = false
        sb.mu.Unlock()
        return fmt.Errorf("could not cleanup all the endpoints in container %s / sandbox %s", sb.containerID, sb.id)
    }
    // Container is going away. Path cache in etchosts is most
    // likely not required any more. Drop it.
    etchosts.Drop(sb.config.hostsPath)

    if sb.resolver != nil {
        sb.resolver.Stop()
    }

    if sb.osSbox != nil && !sb.config.useDefaultSandBox {
        if err := sb.osSbox.Destroy(); err != nil {
            log.G(ctx).WithError(err).Warn("error destroying network sandbox")
        }
    }

    if err := sb.storeDelete(); err != nil {
        log.G(ctx).Warnf("Failed to delete sandbox %s from store: %v", sb.ID(), err)
    }

    c.mu.Lock()
    if sb.ingress {
        c.ingressSandbox = nil
    }
    delete(c.sandboxes, sb.ID())
    c.mu.Unlock()

    return nil
}

// Rename changes the name of all attached Endpoints.
func (sb *Sandbox) Rename(name string) error {
    var err error

    for _, ep := range sb.Endpoints() {
        if ep.endpointInGWNetwork() {
            continue
        }

        oldName := ep.Name()
        lEp := ep
        if err = ep.rename(name); err != nil {
            break
        }

        defer func() {
            if err != nil {
                if err2 := lEp.rename(oldName); err2 != nil {
                    log.G(context.TODO()).WithField("old", oldName).WithField("origError", err).WithError(err2).Error("error renaming sandbox")
                }
            }
        }()
    }

    return err
}

// Refresh leaves all the endpoints, resets and re-applies the options,
// re-joins all the endpoints without destroying the osl sandbox
func (sb *Sandbox) Refresh(ctx context.Context, options ...SandboxOption) error {
    // Store connected endpoints
    epList := sb.Endpoints()

    // Detach from all endpoints
    for _, ep := range epList {
        if err := ep.Leave(context.WithoutCancel(ctx), sb); err != nil {
            log.G(ctx).Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err)
        }
    }

    // Re-apply options
    sb.config = containerConfig{}
    sb.processOptions(options...)

    // Setup discovery files
    if err := sb.setupResolutionFiles(ctx); err != nil {
        return err
    }

    // Re-connect to all endpoints
    for _, ep := range epList {
        if err := ep.Join(context.WithoutCancel(ctx), sb); err != nil {
            log.G(ctx).Warnf("Failed attach sandbox %s to endpoint %s: %v\n", sb.ID(), ep.ID(), err)
        }
    }

    return nil
}

func (sb *Sandbox) MarshalJSON() ([]byte, error) {
    sb.mu.Lock()
    defer sb.mu.Unlock()

    // We are just interested in the container ID. This can be expanded to include all of containerInfo if there is a need
    return json.Marshal(sb.id)
}

func (sb *Sandbox) UnmarshalJSON(b []byte) (err error) {
    sb.mu.Lock()
    defer sb.mu.Unlock()

    var id string
    if err := json.Unmarshal(b, &id); err != nil {
        return err
    }
    sb.id = id
    return nil
}

// Endpoints returns all the endpoints connected to the sandbox.
func (sb *Sandbox) Endpoints() []*Endpoint {
    sb.mu.Lock()
    defer sb.mu.Unlock()

    eps := make([]*Endpoint, len(sb.endpoints))
    copy(eps, sb.endpoints)

    return eps
}

func (sb *Sandbox) addEndpoint(ep *Endpoint) {
    sb.mu.Lock()
    defer sb.mu.Unlock()

    l := len(sb.endpoints)
    i := sort.Search(l, func(j int) bool {
        return ep.Less(sb.endpoints[j])
    })

    sb.endpoints = append(sb.endpoints, nil)
    copy(sb.endpoints[i+1:], sb.endpoints[i:])
    sb.endpoints[i] = ep
}

func (sb *Sandbox) removeEndpoint(ep *Endpoint) {
    sb.mu.Lock()
    defer sb.mu.Unlock()

    sb.removeEndpointRaw(ep)
}

func (sb *Sandbox) removeEndpointRaw(ep *Endpoint) {
    for i, e := range sb.endpoints {
        if e == ep {
            sb.endpoints = append(sb.endpoints[:i], sb.endpoints[i+1:]...)
            return
        }
    }
}

func (sb *Sandbox) GetEndpoint(id string) *Endpoint {
    sb.mu.Lock()
    defer sb.mu.Unlock()

    for _, ep := range sb.endpoints {
        if ep.id == id {
            return ep
        }
    }

    return nil
}

func (sb *Sandbox) HandleQueryResp(name string, ip net.IP) {
    for _, ep := range sb.Endpoints() {
        n := ep.getNetwork()
        n.HandleQueryResp(name, ip)
    }
}

func (sb *Sandbox) ResolveIP(ctx context.Context, ip string) string {
    var svc string
    log.G(ctx).Debugf("IP To resolve %v", ip)

    for _, ep := range sb.Endpoints() {
        n := ep.getNetwork()
        svc = n.ResolveIP(ctx, ip)
        if len(svc) != 0 {
            return svc
        }
    }

    return svc
}

// ResolveService returns all the backend details about the containers or hosts
// backing a service. Its purpose is to satisfy an SRV query.
func (sb *Sandbox) ResolveService(ctx context.Context, name string) ([]*net.SRV, []net.IP) {
    log.G(ctx).Debugf("Service name To resolve: %v", name)

    // There are DNS implementations that allow SRV queries for names not in
    // the format defined by RFC 2782. Hence specific validations checks are
    // not done
    if parts := strings.SplitN(name, ".", 3); len(parts) < 3 {
        return nil, nil
    }

    for _, ep := range sb.Endpoints() {
        n := ep.getNetwork()

        srv, ip := n.ResolveService(ctx, name)
        if len(srv) > 0 {
            return srv, ip
        }
    }
    return nil, nil
}

func (sb *Sandbox) ResolveName(ctx context.Context, name string, ipType int) ([]net.IP, bool) {
    // Embedded server owns the docker network domain. Resolution should work
    // for both container_name and container_name.network_name
    // We allow '.' in service name and network name. For a name a.b.c.d the
    // following have to tried;
    // {a.b.c.d in the networks container is connected to}
    // {a.b.c in network d},
    // {a.b in network c.d},
    // {a in network b.c.d},

    log.G(ctx).Debugf("Name To resolve: %v", name)
    name = strings.TrimSuffix(name, ".")
    reqName := []string{name}
    networkName := []string{""}

    if strings.Contains(name, ".") {
        var i int
        dup := name
        for {
            if i = strings.LastIndex(dup, "."); i == -1 {
                break
            }
            networkName = append(networkName, name[i+1:])
            reqName = append(reqName, name[:i])

            dup = dup[:i]
        }
    }

    epList := sb.Endpoints()

    // In swarm mode, services with exposed ports are connected to user overlay
    // network, ingress network and docker_gwbridge networks. Name resolution
    // should prioritize returning the VIP/IPs on user overlay network.
    //
    // Re-order the endpoints based on the network-type they're attached to;
    //
    //  1. dynamic networks (user overlay networks)
    //  2. ingress network(s)
    //  3. local networks ("docker_gwbridge")
    if sb.controller.isSwarmNode() {
        sort.Sort(ByNetworkType(epList))
    }

    for i := 0; i < len(reqName); i++ {
        // First check for local container alias
        ip, ipv6Miss := sb.resolveName(ctx, reqName[i], networkName[i], epList, true, ipType)
        if ip != nil {
            return ip, false
        }
        if ipv6Miss {
            return ip, ipv6Miss
        }

        // Resolve the actual container name
        ip, ipv6Miss = sb.resolveName(ctx, reqName[i], networkName[i], epList, false, ipType)
        if ip != nil {
            return ip, false
        }
        if ipv6Miss {
            return ip, ipv6Miss
        }
    }
    return nil, false
}

func (sb *Sandbox) resolveName(ctx context.Context, nameOrAlias string, networkName string, epList []*Endpoint, lookupAlias bool, ipType int) (_ []net.IP, ipv6Miss bool) {
    ctx, span := otel.Tracer("").Start(ctx, "Sandbox.resolveName", trace.WithAttributes(
        attribute.String("libnet.resolver.name-or-alias", nameOrAlias),
        attribute.String("libnet.network.name", networkName),
        attribute.Bool("libnet.resolver.alias-lookup", lookupAlias),
        attribute.Int("libnet.resolver.ip-family", ipType)))
    defer span.End()

    for _, ep := range epList {
        if lookupAlias && len(ep.aliases) == 0 {
            continue
        }

        nw := ep.getNetwork()
        if networkName != "" && networkName != nw.Name() {
            continue
        }

        name := nameOrAlias
        if lookupAlias {
            ep.mu.Lock()
            alias, ok := ep.aliases[nameOrAlias]
            ep.mu.Unlock()
            if !ok {
                continue
            }
            name = alias
        } else {
            // If it is a regular lookup and if the requested name is an alias
            // don't perform a svc lookup for this endpoint.
            ep.mu.Lock()
            _, ok := ep.aliases[nameOrAlias]
            ep.mu.Unlock()
            if ok {
                continue
            }
        }

        ip, miss := nw.ResolveName(ctx, name, ipType)
        if ip != nil {
            return ip, false
        }
        if miss {
            ipv6Miss = miss
        }
    }
    return nil, ipv6Miss
}

// hasExternalAccess returns true if any of sb's Endpoints appear to have external
// network access.
func (sb *Sandbox) hasExternalAccess() bool {
    for _, ep := range sb.Endpoints() {
        nw := ep.getNetwork()
        if nw.Internal() || nw.Type() == "null" || nw.Type() == "host" {
            continue
        }
        if ep.hasGatewayOrDefaultRoute() {
            return true
        }
    }
    return false
}

// EnableService makes a managed container's service available by adding the
// endpoint to the service load balancer and service discovery.
func (sb *Sandbox) EnableService() (err error) {
    log.G(context.TODO()).Debugf("EnableService %s START", sb.containerID)
    defer func() {
        if err != nil {
            if err2 := sb.DisableService(); err2 != nil {
                log.G(context.TODO()).WithError(err2).WithField("origError", err).Error("Error while disabling service after original error")
            }
        }
    }()
    for _, ep := range sb.Endpoints() {
        if !ep.isServiceEnabled() {
            if err := ep.addServiceInfoToCluster(sb); err != nil {
                return fmt.Errorf("could not update state for endpoint %s into cluster: %v", ep.Name(), err)
            }
            ep.enableService()
        }
    }
    log.G(context.TODO()).Debugf("EnableService %s DONE", sb.containerID)
    return nil
}

// DisableService removes a managed container's endpoints from the load balancer
// and service discovery.
func (sb *Sandbox) DisableService() (err error) {
    log.G(context.TODO()).Debugf("DisableService %s START", sb.containerID)
    failedEps := []string{}
    defer func() {
        if len(failedEps) > 0 {
            err = fmt.Errorf("failed to disable service on sandbox:%s, for endpoints %s", sb.ID(), strings.Join(failedEps, ","))
        }
    }()
    for _, ep := range sb.Endpoints() {
        if ep.isServiceEnabled() {
            if err := ep.deleteServiceInfoFromCluster(sb, false, "DisableService"); err != nil {
                failedEps = append(failedEps, ep.Name())
                log.G(context.TODO()).Warnf("failed update state for endpoint %s into cluster: %v", ep.Name(), err)
            }
            ep.disableService()
        }
    }
    log.G(context.TODO()).Debugf("DisableService %s DONE", sb.containerID)
    return nil
}

func (sb *Sandbox) clearNetworkResources(origEp *Endpoint) error {
    ep := sb.GetEndpoint(origEp.id)
    if ep == nil {
        return fmt.Errorf("could not find the sandbox endpoint data for endpoint %s",
            origEp.id)
    }

    sb.mu.Lock()
    osSbox := sb.osSbox
    inDelete := sb.inDelete
    sb.mu.Unlock()
    if osSbox != nil {
        releaseOSSboxResources(osSbox, ep)
    }

    sb.mu.Lock()
    delete(sb.populatedEndpoints, ep.ID())

    if len(sb.endpoints) == 0 {
        // sb.endpoints should never be empty and this is unexpected error condition
        // We log an error message to note this down for debugging purposes.
        log.G(context.TODO()).Errorf("No endpoints in sandbox while trying to remove endpoint %s", ep.Name())
        sb.mu.Unlock()
        return nil
    }

    var (
        gwepBefore, gwepAfter *Endpoint
        index                 = -1
    )
    for i, e := range sb.endpoints {
        if e == ep {
            index = i
        }
        if len(e.Gateway()) > 0 && gwepBefore == nil {
            gwepBefore = e
        }
        if index != -1 && gwepBefore != nil {
            break
        }
    }

    if index == -1 {
        log.G(context.TODO()).Warnf("Endpoint %s has already been deleted", ep.Name())
        sb.mu.Unlock()
        return nil
    }

    sb.removeEndpointRaw(ep)
    for _, e := range sb.endpoints {
        if len(e.Gateway()) > 0 {
            gwepAfter = e
            break
        }
    }
    delete(sb.epPriority, ep.ID())
    sb.mu.Unlock()

    if gwepAfter != nil && gwepBefore != gwepAfter {
        if err := sb.updateGateway(gwepAfter); err != nil {
            return err
        }
    }

    // Only update the store if we did not come here as part of
    // sandbox delete. If we came here as part of delete then do
    // not bother updating the store. The sandbox object will be
    // deleted anyway
    if !inDelete {
        return sb.storeUpdate(context.TODO())
    }

    return nil
}

// joinLeaveStart waits to ensure there are no joins or leaves in progress and
// marks this join/leave in progress without race
func (sb *Sandbox) joinLeaveStart() {
    sb.mu.Lock()
    defer sb.mu.Unlock()

    for sb.joinLeaveDone != nil {
        joinLeaveDone := sb.joinLeaveDone
        sb.mu.Unlock()

        <-joinLeaveDone

        sb.mu.Lock()
    }

    sb.joinLeaveDone = make(chan struct{})
}

// joinLeaveEnd marks the end of this join/leave operation and
// signals the same without race to other join and leave waiters
func (sb *Sandbox) joinLeaveEnd() {
    sb.mu.Lock()
    defer sb.mu.Unlock()

    if sb.joinLeaveDone != nil {
        close(sb.joinLeaveDone)
        sb.joinLeaveDone = nil
    }
}

// <=> Returns true if a < b, false if a > b and advances to next level if a == b
// epi.prio <=> epj.prio           # 2 < 1
// epi.gw <=> epj.gw               # non-gw < gw
// epi.internal <=> epj.internal   # non-internal < internal
// epi.joininfo <=> epj.joininfo   # ipv6 < ipv4
// epi.name <=> epj.name           # bar < foo
func (epi *Endpoint) Less(epj *Endpoint) bool {
    var prioi, prioj int

    sbi, _ := epi.getSandbox()
    sbj, _ := epj.getSandbox()

    // Prio defaults to 0
    if sbi != nil {
        prioi = sbi.epPriority[epi.ID()]
    }
    if sbj != nil {
        prioj = sbj.epPriority[epj.ID()]
    }

    if prioi != prioj {
        return prioi > prioj
    }

    gwi := epi.endpointInGWNetwork()
    gwj := epj.endpointInGWNetwork()
    if gwi != gwj {
        return gwj
    }

    inti := epi.getNetwork().Internal()
    intj := epj.getNetwork().Internal()
    if inti != intj {
        return intj
    }

    jii := 0
    if epi.joinInfo != nil {
        if epi.joinInfo.gw != nil {
            jii = jii + 1
        }
        if epi.joinInfo.gw6 != nil {
            jii = jii + 2
        }
    }

    jij := 0
    if epj.joinInfo != nil {
        if epj.joinInfo.gw != nil {
            jij = jij + 1
        }
        if epj.joinInfo.gw6 != nil {
            jij = jij + 2
        }
    }

    if jii != jij {
        return jii > jij
    }

    return epi.network.Name() < epj.network.Name()
}

func (sb *Sandbox) NdotsSet() bool {
    return sb.ndotsSet
}