
View on GitHub


1 day
Test Coverage
// Package tarsum provides algorithms to perform checksum calculation on
// filesystem layers.
// The transportation of filesystems, regarding Docker, is done with tar(1)
// archives. There are a variety of tar serialization formats [2], and a key
// concern here is ensuring a repeatable checksum given a set of inputs from a
// generic tar archive. Types of transportation include distribution to and from a
// registry endpoint, saving and loading through commands or Docker daemon APIs,
// transferring the build context from client to Docker daemon, and committing the
// filesystem of a container to become an image.
// As tar archives are used for transit, but not preserved in many situations, the
// focus of the algorithm is to ensure the integrity of the preserved filesystem,
// while maintaining a deterministic accountability. This includes neither
// constraining the ordering or manipulation of the files during the creation or
// unpacking of the archive, nor include additional metadata state about the file
// system attributes.
package tarsum // import ""

import (

const (
    buf8K  = 8 * 1024
    buf16K = 16 * 1024
    buf32K = 32 * 1024

// NewTarSum creates a new interface for calculating a fixed time checksum of a
// tar archive.
// This is used for calculating checksums of layers of an image, in some cases
// including the byte payload of the image's json metadata as well, and for
// calculating the checksums for buildcache.
func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
    return NewTarSumHash(r, dc, v, DefaultTHash)

// NewTarSumHash creates a new TarSum, providing a THash to use rather than
// the DefaultTHash.
func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) {
    headerSelector, err := getTarHeaderSelector(v)
    if err != nil {
        return nil, err
    ts := &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash}
    err = ts.initTarSum()
    return ts, err

// NewTarSumForLabel creates a new TarSum using the provided TarSum version+hash label.
func NewTarSumForLabel(r io.Reader, disableCompression bool, label string) (TarSum, error) {
    versionName, hashName, ok := strings.Cut(label, "+")
    if !ok {
        return nil, errors.New("tarsum label string should be of the form: {tarsum_version}+{hash_name}")

    version, ok := tarSumVersionsByName[versionName]
    if !ok {
        return nil, fmt.Errorf("unknown TarSum version name: %q", versionName)

    hashConfig, ok := standardHashConfigs[hashName]
    if !ok {
        return nil, fmt.Errorf("unknown TarSum hash name: %q", hashName)

    tHash := NewTHash(, hashConfig.hash.New)

    return NewTarSumHash(r, disableCompression, version, tHash)

// TarSum is the generic interface for calculating fixed time
// checksums of a tar archive.
type TarSum interface {
    GetSums() FileInfoSums
    Sum([]byte) string
    Version() Version
    Hash() THash

// tarSum struct is the structure for a Version0 checksum calculation.
type tarSum struct {
    tarR               *tar.Reader
    tarW               *tar.Writer
    writer             writeCloseFlusher
    bufTar             *bytes.Buffer
    bufWriter          *bytes.Buffer
    bufData            []byte
    h                  hash.Hash
    tHash              THash
    sums               FileInfoSums
    fileCounter        int64
    currentFile        string
    finished           bool
    first              bool
    DisableCompression bool              // false by default. When false, the output gzip compressed.
    tarSumVersion      Version           // this field is not exported so it can not be mutated during use
    headerSelector     tarHeaderSelector // handles selecting and ordering headers for files in the archive

func (ts tarSum) Hash() THash {
    return ts.tHash

func (ts tarSum) Version() Version {
    return ts.tarSumVersion

// THash provides a hash.Hash type generator and its name.
type THash interface {
    Hash() hash.Hash
    Name() string

// NewTHash is a convenience method for creating a THash.
func NewTHash(name string, h func() hash.Hash) THash {
    return simpleTHash{n: name, h: h}

type tHashConfig struct {
    name string
    hash crypto.Hash

// NOTE: DO NOT include MD5 or SHA1, which are considered insecure.
var standardHashConfigs = map[string]tHashConfig{
    "sha256": {name: "sha256", hash: crypto.SHA256},
    "sha512": {name: "sha512", hash: crypto.SHA512},

// DefaultTHash is default TarSum hashing algorithm - "sha256".
var DefaultTHash = NewTHash("sha256", sha256.New)

type simpleTHash struct {
    n string
    h func() hash.Hash

func (sth simpleTHash) Name() string    { return sth.n }
func (sth simpleTHash) Hash() hash.Hash { return sth.h() }

func (ts *tarSum) encodeHeader(h *tar.Header) error {
    for _, elem := range ts.headerSelector.selectHeaders(h) {
        // Ignore these headers to be compatible with versions
        // before go 1.10
        if elem[0] == "gname" || elem[0] == "uname" {
            elem[1] = ""
        if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
            return err
    return nil

func (ts *tarSum) initTarSum() error {
    ts.bufTar = bytes.NewBuffer([]byte{})
    ts.bufWriter = bytes.NewBuffer([]byte{})
    ts.tarR = tar.NewReader(ts.Reader)
    ts.tarW = tar.NewWriter(ts.bufTar)
    if !ts.DisableCompression {
        ts.writer = gzip.NewWriter(ts.bufWriter)
    } else {
        ts.writer = &nopCloseFlusher{Writer: ts.bufWriter}
    if ts.tHash == nil {
        ts.tHash = DefaultTHash
    ts.h = ts.tHash.Hash()
    ts.first = true
    ts.sums = FileInfoSums{}
    return nil

func (ts *tarSum) Read(buf []byte) (int, error) {
    if ts.finished {
        return ts.bufWriter.Read(buf)
    if len(ts.bufData) < len(buf) {
        switch {
        case len(buf) <= buf8K:
            ts.bufData = make([]byte, buf8K)
        case len(buf) <= buf16K:
            ts.bufData = make([]byte, buf16K)
        case len(buf) <= buf32K:
            ts.bufData = make([]byte, buf32K)
            ts.bufData = make([]byte, len(buf))
    buf2 := ts.bufData[:len(buf)]

    n, err := ts.tarR.Read(buf2)
    if err != nil {
        if err == io.EOF {
            if _, err := ts.h.Write(buf2[:n]); err != nil {
                return 0, err
            if !ts.first {
                ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
            } else {
                ts.first = false

            if _, err := ts.tarW.Write(buf2[:n]); err != nil {
                return 0, err

            currentHeader, err := ts.tarR.Next()
            if err != nil {
                if err == io.EOF {
                    if err := ts.tarW.Close(); err != nil {
                        return 0, err
                    if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
                        return 0, err
                    if err := ts.writer.Close(); err != nil {
                        return 0, err
                    ts.finished = true
                    return ts.bufWriter.Read(buf)
                return 0, err

            // #nosec G305 -- The joined path is not passed to any filesystem APIs.
            ts.currentFile = path.Join(".", path.Join("/", currentHeader.Name))
            if err := ts.encodeHeader(currentHeader); err != nil {
                return 0, err
            if err := ts.tarW.WriteHeader(currentHeader); err != nil {
                return 0, err

            if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
                return 0, err

            return ts.bufWriter.Read(buf)
        return 0, err

    // Filling the hash buffer
    if _, err = ts.h.Write(buf2[:n]); err != nil {
        return 0, err

    // Filling the tar writer
    if _, err = ts.tarW.Write(buf2[:n]); err != nil {
        return 0, err

    // Filling the output writer
    if _, err = io.Copy(ts.writer, ts.bufTar); err != nil {
        return 0, err

    return ts.bufWriter.Read(buf)

func (ts *tarSum) Sum(extra []byte) string {
    h := ts.tHash.Hash()
    if extra != nil {
    for _, fis := range ts.sums {
    checksum := ts.Version().String() + "+" + ts.tHash.Name() + ":" + hex.EncodeToString(h.Sum(nil))
    return checksum

func (ts *tarSum) GetSums() FileInfoSums {
    return ts.sums