pkg/archive/changes.go
package archive // import "github.com/docker/docker/pkg/archive"
import (
"archive/tar"
"bytes"
"context"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strings"
"syscall"
"time"
"github.com/containerd/log"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/pools"
"github.com/docker/docker/pkg/system"
)
// ChangeType represents the change type.
type ChangeType int
const (
ChangeModify = 0 // ChangeModify represents the modify operation.
ChangeAdd = 1 // ChangeAdd represents the add operation.
ChangeDelete = 2 // ChangeDelete represents the delete operation.
)
func (c ChangeType) String() string {
switch c {
case ChangeModify:
return "C"
case ChangeAdd:
return "A"
case ChangeDelete:
return "D"
}
return ""
}
// Change represents a change, it wraps the change type and path.
// It describes changes of the files in the path respect to the
// parent layers. The change could be modify, add, delete.
// This is used for layer diff.
type Change struct {
Path string
Kind ChangeType
}
func (change *Change) String() string {
return fmt.Sprintf("%s %s", change.Kind, change.Path)
}
// for sort.Sort
type changesByPath []Change
func (c changesByPath) Less(i, j int) bool { return c[i].Path < c[j].Path }
func (c changesByPath) Len() int { return len(c) }
func (c changesByPath) Swap(i, j int) { c[j], c[i] = c[i], c[j] }
// Gnu tar doesn't have sub-second mtime precision. The go tar
// writer (1.10+) does when using PAX format, but we round times to seconds
// to ensure archives have the same hashes for backwards compatibility.
// See https://github.com/moby/moby/pull/35739/commits/fb170206ba12752214630b269a40ac7be6115ed4.
//
// Non-sub-second is problematic when we apply changes via tar
// files. We handle this by comparing for exact times, *or* same
// second count and either a or b having exactly 0 nanoseconds
func sameFsTime(a, b time.Time) bool {
return a.Equal(b) ||
(a.Unix() == b.Unix() &&
(a.Nanosecond() == 0 || b.Nanosecond() == 0))
}
func sameFsTimeSpec(a, b syscall.Timespec) bool {
return a.Sec == b.Sec &&
(a.Nsec == b.Nsec || a.Nsec == 0 || b.Nsec == 0)
}
// Changes walks the path rw and determines changes for the files in the path,
// with respect to the parent layers
func Changes(layers []string, rw string) ([]Change, error) {
return changes(layers, rw, aufsDeletedFile, aufsMetadataSkip)
}
func aufsMetadataSkip(path string) (skip bool, err error) {
skip, err = filepath.Match(string(os.PathSeparator)+WhiteoutMetaPrefix+"*", path)
if err != nil {
skip = true
}
return
}
func aufsDeletedFile(root, path string, fi os.FileInfo) (string, error) {
f := filepath.Base(path)
// If there is a whiteout, then the file was removed
if strings.HasPrefix(f, WhiteoutPrefix) {
originalFile := f[len(WhiteoutPrefix):]
return filepath.Join(filepath.Dir(path), originalFile), nil
}
return "", nil
}
type (
skipChange func(string) (bool, error)
deleteChange func(string, string, os.FileInfo) (string, error)
)
func changes(layers []string, rw string, dc deleteChange, sc skipChange) ([]Change, error) {
var (
changes []Change
changedDirs = make(map[string]struct{})
)
err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error {
if err != nil {
return err
}
// Rebase path
path, err = filepath.Rel(rw, path)
if err != nil {
return err
}
// As this runs on the daemon side, file paths are OS specific.
path = filepath.Join(string(os.PathSeparator), path)
// Skip root
if path == string(os.PathSeparator) {
return nil
}
if sc != nil {
if skip, err := sc(path); skip {
return err
}
}
change := Change{
Path: path,
}
deletedFile, err := dc(rw, path, f)
if err != nil {
return err
}
// Find out what kind of modification happened
if deletedFile != "" {
change.Path = deletedFile
change.Kind = ChangeDelete
} else {
// Otherwise, the file was added
change.Kind = ChangeAdd
// ...Unless it already existed in a top layer, in which case, it's a modification
for _, layer := range layers {
stat, err := os.Stat(filepath.Join(layer, path))
if err != nil && !os.IsNotExist(err) {
return err
}
if err == nil {
// The file existed in the top layer, so that's a modification
// However, if it's a directory, maybe it wasn't actually modified.
// If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar
if stat.IsDir() && f.IsDir() {
if f.Size() == stat.Size() && f.Mode() == stat.Mode() && sameFsTime(f.ModTime(), stat.ModTime()) {
// Both directories are the same, don't record the change
return nil
}
}
change.Kind = ChangeModify
break
}
}
}
// If /foo/bar/file.txt is modified, then /foo/bar must be part of the changed files.
// This block is here to ensure the change is recorded even if the
// modify time, mode and size of the parent directory in the rw and ro layers are all equal.
// Check https://github.com/docker/docker/pull/13590 for details.
if f.IsDir() {
changedDirs[path] = struct{}{}
}
if change.Kind == ChangeAdd || change.Kind == ChangeDelete {
parent := filepath.Dir(path)
if _, ok := changedDirs[parent]; !ok && parent != "/" {
changes = append(changes, Change{Path: parent, Kind: ChangeModify})
changedDirs[parent] = struct{}{}
}
}
// Record change
changes = append(changes, change)
return nil
})
if err != nil && !os.IsNotExist(err) {
return nil, err
}
return changes, nil
}
// FileInfo describes the information of a file.
type FileInfo struct {
parent *FileInfo
name string
stat *system.StatT
children map[string]*FileInfo
capability []byte
added bool
}
// LookUp looks up the file information of a file.
func (info *FileInfo) LookUp(path string) *FileInfo {
// As this runs on the daemon side, file paths are OS specific.
parent := info
if path == string(os.PathSeparator) {
return info
}
pathElements := strings.Split(path, string(os.PathSeparator))
for _, elem := range pathElements {
if elem != "" {
child := parent.children[elem]
if child == nil {
return nil
}
parent = child
}
}
return parent
}
func (info *FileInfo) path() string {
if info.parent == nil {
// As this runs on the daemon side, file paths are OS specific.
return string(os.PathSeparator)
}
return filepath.Join(info.parent.path(), info.name)
}
func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) {
sizeAtEntry := len(*changes)
if oldInfo == nil {
// add
change := Change{
Path: info.path(),
Kind: ChangeAdd,
}
*changes = append(*changes, change)
info.added = true
}
// We make a copy so we can modify it to detect additions
// also, we only recurse on the old dir if the new info is a directory
// otherwise any previous delete/change is considered recursive
oldChildren := make(map[string]*FileInfo)
if oldInfo != nil && info.isDir() {
for k, v := range oldInfo.children {
oldChildren[k] = v
}
}
for name, newChild := range info.children {
oldChild := oldChildren[name]
if oldChild != nil {
// change?
oldStat := oldChild.stat
newStat := newChild.stat
// Note: We can't compare inode or ctime or blocksize here, because these change
// when copying a file into a container. However, that is not generally a problem
// because any content change will change mtime, and any status change should
// be visible when actually comparing the stat fields. The only time this
// breaks down is if some code intentionally hides a change by setting
// back mtime
if statDifferent(oldStat, newStat) ||
!bytes.Equal(oldChild.capability, newChild.capability) {
change := Change{
Path: newChild.path(),
Kind: ChangeModify,
}
*changes = append(*changes, change)
newChild.added = true
}
// Remove from copy so we can detect deletions
delete(oldChildren, name)
}
newChild.addChanges(oldChild, changes)
}
for _, oldChild := range oldChildren {
// delete
change := Change{
Path: oldChild.path(),
Kind: ChangeDelete,
}
*changes = append(*changes, change)
}
// If there were changes inside this directory, we need to add it, even if the directory
// itself wasn't changed. This is needed to properly save and restore filesystem permissions.
// As this runs on the daemon side, file paths are OS specific.
if len(*changes) > sizeAtEntry && info.isDir() && !info.added && info.path() != string(os.PathSeparator) {
change := Change{
Path: info.path(),
Kind: ChangeModify,
}
// Let's insert the directory entry before the recently added entries located inside this dir
*changes = append(*changes, change) // just to resize the slice, will be overwritten
copy((*changes)[sizeAtEntry+1:], (*changes)[sizeAtEntry:])
(*changes)[sizeAtEntry] = change
}
}
// Changes add changes to file information.
func (info *FileInfo) Changes(oldInfo *FileInfo) []Change {
var changes []Change
info.addChanges(oldInfo, &changes)
return changes
}
func newRootFileInfo() *FileInfo {
// As this runs on the daemon side, file paths are OS specific.
root := &FileInfo{
name: string(os.PathSeparator),
children: make(map[string]*FileInfo),
}
return root
}
// ChangesDirs compares two directories and generates an array of Change objects describing the changes.
// If oldDir is "", then all files in newDir will be Add-Changes.
func ChangesDirs(newDir, oldDir string) ([]Change, error) {
var oldRoot, newRoot *FileInfo
if oldDir == "" {
emptyDir, err := os.MkdirTemp("", "empty")
if err != nil {
return nil, err
}
defer os.Remove(emptyDir)
oldDir = emptyDir
}
oldRoot, newRoot, err := collectFileInfoForChanges(oldDir, newDir)
if err != nil {
return nil, err
}
return newRoot.Changes(oldRoot), nil
}
// ChangesSize calculates the size in bytes of the provided changes, based on newDir.
func ChangesSize(newDir string, changes []Change) int64 {
var (
size int64
sf = make(map[uint64]struct{})
)
for _, change := range changes {
if change.Kind == ChangeModify || change.Kind == ChangeAdd {
file := filepath.Join(newDir, change.Path)
fileInfo, err := os.Lstat(file)
if err != nil {
log.G(context.TODO()).Errorf("Can not stat %q: %s", file, err)
continue
}
if fileInfo != nil && !fileInfo.IsDir() {
if hasHardlinks(fileInfo) {
inode := getIno(fileInfo)
if _, ok := sf[inode]; !ok {
size += fileInfo.Size()
sf[inode] = struct{}{}
}
} else {
size += fileInfo.Size()
}
}
}
}
return size
}
// ExportChanges produces an Archive from the provided changes, relative to dir.
func ExportChanges(dir string, changes []Change, idMap idtools.IdentityMapping) (io.ReadCloser, error) {
reader, writer := io.Pipe()
go func() {
ta := newTarAppender(idMap, writer, nil)
// this buffer is needed for the duration of this piped stream
defer pools.BufioWriter32KPool.Put(ta.Buffer)
sort.Sort(changesByPath(changes))
// In general we log errors here but ignore them because
// during e.g. a diff operation the container can continue
// mutating the filesystem and we can see transient errors
// from this
for _, change := range changes {
if change.Kind == ChangeDelete {
whiteOutDir := filepath.Dir(change.Path)
whiteOutBase := filepath.Base(change.Path)
whiteOut := filepath.Join(whiteOutDir, WhiteoutPrefix+whiteOutBase)
timestamp := time.Now()
hdr := &tar.Header{
Name: whiteOut[1:],
Size: 0,
ModTime: timestamp,
AccessTime: timestamp,
ChangeTime: timestamp,
}
if err := ta.TarWriter.WriteHeader(hdr); err != nil {
log.G(context.TODO()).Debugf("Can't write whiteout header: %s", err)
}
} else {
path := filepath.Join(dir, change.Path)
if err := ta.addTarFile(path, change.Path[1:]); err != nil {
log.G(context.TODO()).Debugf("Can't add file %s to tar: %s", path, err)
}
}
}
// Make sure to check the error on Close.
if err := ta.TarWriter.Close(); err != nil {
log.G(context.TODO()).Debugf("Can't close layer: %s", err)
}
if err := writer.Close(); err != nil {
log.G(context.TODO()).Debugf("failed close Changes writer: %s", err)
}
}()
return reader, nil
}