dotcloud/docker

View on GitHub
pkg/tarsum/tarsum.go

Summary

Maintainability
C
1 day
Test Coverage
// Package tarsum provides algorithms to perform checksum calculation on
// filesystem layers.
//
// The transportation of filesystems, regarding Docker, is done with tar(1)
// archives. There are a variety of tar serialization formats [2], and a key
// concern here is ensuring a repeatable checksum given a set of inputs from a
// generic tar archive. Types of transportation include distribution to and from a
// registry endpoint, saving and loading through commands or Docker daemon APIs,
// transferring the build context from client to Docker daemon, and committing the
// filesystem of a container to become an image.
//
// As tar archives are used for transit, but not preserved in many situations, the
// focus of the algorithm is to ensure the integrity of the preserved filesystem,
// while maintaining a deterministic accountability. This includes neither
// constraining the ordering or manipulation of the files during the creation or
// unpacking of the archive, nor include additional metadata state about the file
// system attributes.
package tarsum // import "github.com/docker/docker/pkg/tarsum"

import (
    "archive/tar"
    "bytes"
    "compress/gzip"
    "crypto"
    "crypto/sha256"
    "encoding/hex"
    "errors"
    "fmt"
    "hash"
    "io"
    "path"
    "strings"
)

const (
    buf8K  = 8 * 1024
    buf16K = 16 * 1024
    buf32K = 32 * 1024
)

// NewTarSum creates a new interface for calculating a fixed time checksum of a
// tar archive.
//
// This is used for calculating checksums of layers of an image, in some cases
// including the byte payload of the image's json metadata as well, and for
// calculating the checksums for buildcache.
func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
    return NewTarSumHash(r, dc, v, DefaultTHash)
}

// NewTarSumHash creates a new TarSum, providing a THash to use rather than
// the DefaultTHash.
func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) {
    headerSelector, err := getTarHeaderSelector(v)
    if err != nil {
        return nil, err
    }
    ts := &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash}
    err = ts.initTarSum()
    return ts, err
}

// NewTarSumForLabel creates a new TarSum using the provided TarSum version+hash label.
func NewTarSumForLabel(r io.Reader, disableCompression bool, label string) (TarSum, error) {
    versionName, hashName, ok := strings.Cut(label, "+")
    if !ok {
        return nil, errors.New("tarsum label string should be of the form: {tarsum_version}+{hash_name}")
    }

    version, ok := tarSumVersionsByName[versionName]
    if !ok {
        return nil, fmt.Errorf("unknown TarSum version name: %q", versionName)
    }

    hashConfig, ok := standardHashConfigs[hashName]
    if !ok {
        return nil, fmt.Errorf("unknown TarSum hash name: %q", hashName)
    }

    tHash := NewTHash(hashConfig.name, hashConfig.hash.New)

    return NewTarSumHash(r, disableCompression, version, tHash)
}

// TarSum is the generic interface for calculating fixed time
// checksums of a tar archive.
type TarSum interface {
    io.Reader
    GetSums() FileInfoSums
    Sum([]byte) string
    Version() Version
    Hash() THash
}

// tarSum struct is the structure for a Version0 checksum calculation.
type tarSum struct {
    io.Reader
    tarR               *tar.Reader
    tarW               *tar.Writer
    writer             writeCloseFlusher
    bufTar             *bytes.Buffer
    bufWriter          *bytes.Buffer
    bufData            []byte
    h                  hash.Hash
    tHash              THash
    sums               FileInfoSums
    fileCounter        int64
    currentFile        string
    finished           bool
    first              bool
    DisableCompression bool              // false by default. When false, the output gzip compressed.
    tarSumVersion      Version           // this field is not exported so it can not be mutated during use
    headerSelector     tarHeaderSelector // handles selecting and ordering headers for files in the archive
}

func (ts tarSum) Hash() THash {
    return ts.tHash
}

func (ts tarSum) Version() Version {
    return ts.tarSumVersion
}

// THash provides a hash.Hash type generator and its name.
type THash interface {
    Hash() hash.Hash
    Name() string
}

// NewTHash is a convenience method for creating a THash.
func NewTHash(name string, h func() hash.Hash) THash {
    return simpleTHash{n: name, h: h}
}

type tHashConfig struct {
    name string
    hash crypto.Hash
}

// NOTE: DO NOT include MD5 or SHA1, which are considered insecure.
var standardHashConfigs = map[string]tHashConfig{
    "sha256": {name: "sha256", hash: crypto.SHA256},
    "sha512": {name: "sha512", hash: crypto.SHA512},
}

// DefaultTHash is default TarSum hashing algorithm - "sha256".
var DefaultTHash = NewTHash("sha256", sha256.New)

type simpleTHash struct {
    n string
    h func() hash.Hash
}

func (sth simpleTHash) Name() string    { return sth.n }
func (sth simpleTHash) Hash() hash.Hash { return sth.h() }

func (ts *tarSum) encodeHeader(h *tar.Header) error {
    for _, elem := range ts.headerSelector.selectHeaders(h) {
        // Ignore these headers to be compatible with versions
        // before go 1.10
        if elem[0] == "gname" || elem[0] == "uname" {
            elem[1] = ""
        }
        if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
            return err
        }
    }
    return nil
}

func (ts *tarSum) initTarSum() error {
    ts.bufTar = bytes.NewBuffer([]byte{})
    ts.bufWriter = bytes.NewBuffer([]byte{})
    ts.tarR = tar.NewReader(ts.Reader)
    ts.tarW = tar.NewWriter(ts.bufTar)
    if !ts.DisableCompression {
        ts.writer = gzip.NewWriter(ts.bufWriter)
    } else {
        ts.writer = &nopCloseFlusher{Writer: ts.bufWriter}
    }
    if ts.tHash == nil {
        ts.tHash = DefaultTHash
    }
    ts.h = ts.tHash.Hash()
    ts.h.Reset()
    ts.first = true
    ts.sums = FileInfoSums{}
    return nil
}

func (ts *tarSum) Read(buf []byte) (int, error) {
    if ts.finished {
        return ts.bufWriter.Read(buf)
    }
    if len(ts.bufData) < len(buf) {
        switch {
        case len(buf) <= buf8K:
            ts.bufData = make([]byte, buf8K)
        case len(buf) <= buf16K:
            ts.bufData = make([]byte, buf16K)
        case len(buf) <= buf32K:
            ts.bufData = make([]byte, buf32K)
        default:
            ts.bufData = make([]byte, len(buf))
        }
    }
    buf2 := ts.bufData[:len(buf)]

    n, err := ts.tarR.Read(buf2)
    if err != nil {
        if err == io.EOF {
            if _, err := ts.h.Write(buf2[:n]); err != nil {
                return 0, err
            }
            if !ts.first {
                ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
                ts.fileCounter++
                ts.h.Reset()
            } else {
                ts.first = false
            }

            if _, err := ts.tarW.Write(buf2[:n]); err != nil {
                return 0, err
            }

            currentHeader, err := ts.tarR.Next()
            if err != nil {
                if err == io.EOF {
                    if err := ts.tarW.Close(); err != nil {
                        return 0, err
                    }
                    if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
                        return 0, err
                    }
                    if err := ts.writer.Close(); err != nil {
                        return 0, err
                    }
                    ts.finished = true
                    return ts.bufWriter.Read(buf)
                }
                return 0, err
            }

            // #nosec G305 -- The joined path is not passed to any filesystem APIs.
            ts.currentFile = path.Join(".", path.Join("/", currentHeader.Name))
            if err := ts.encodeHeader(currentHeader); err != nil {
                return 0, err
            }
            if err := ts.tarW.WriteHeader(currentHeader); err != nil {
                return 0, err
            }

            if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
                return 0, err
            }
            ts.writer.Flush()

            return ts.bufWriter.Read(buf)
        }
        return 0, err
    }

    // Filling the hash buffer
    if _, err = ts.h.Write(buf2[:n]); err != nil {
        return 0, err
    }

    // Filling the tar writer
    if _, err = ts.tarW.Write(buf2[:n]); err != nil {
        return 0, err
    }

    // Filling the output writer
    if _, err = io.Copy(ts.writer, ts.bufTar); err != nil {
        return 0, err
    }
    ts.writer.Flush()

    return ts.bufWriter.Read(buf)
}

func (ts *tarSum) Sum(extra []byte) string {
    ts.sums.SortBySums()
    h := ts.tHash.Hash()
    if extra != nil {
        h.Write(extra)
    }
    for _, fis := range ts.sums {
        h.Write([]byte(fis.Sum()))
    }
    checksum := ts.Version().String() + "+" + ts.tHash.Name() + ":" + hex.EncodeToString(h.Sum(nil))
    return checksum
}

func (ts *tarSum) GetSums() FileInfoSums {
    return ts.sums
}