docker/docker

View on GitHub
profiles/seccomp/default_linux.go

Summary

Maintainability
F
5 days
Test Coverage
package seccomp // import "github.com/docker/docker/profiles/seccomp"

import (
    "github.com/opencontainers/runtime-spec/specs-go"
    "golang.org/x/sys/unix"
)

func arches() []Architecture {
    return []Architecture{
        {
            Arch:      specs.ArchX86_64,
            SubArches: []specs.Arch{specs.ArchX86, specs.ArchX32},
        },
        {
            Arch:      specs.ArchAARCH64,
            SubArches: []specs.Arch{specs.ArchARM},
        },
        {
            Arch:      specs.ArchMIPS64,
            SubArches: []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64N32},
        },
        {
            Arch:      specs.ArchMIPS64N32,
            SubArches: []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64},
        },
        {
            Arch:      specs.ArchMIPSEL64,
            SubArches: []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64N32},
        },
        {
            Arch:      specs.ArchMIPSEL64N32,
            SubArches: []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64},
        },
        {
            Arch:      specs.ArchS390X,
            SubArches: []specs.Arch{specs.ArchS390},
        },
        {
            Arch:      specs.ArchRISCV64,
            SubArches: nil,
        },
    }
}

// DefaultProfile defines the allowed syscalls for the default seccomp profile.
func DefaultProfile() *Seccomp {
    nosys := uint(unix.ENOSYS)
    syscalls := []*Syscall{
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "accept",
                    "accept4",
                    "access",
                    "adjtimex",
                    "alarm",
                    "bind",
                    "brk",
                    "cachestat", // kernel v6.5, libseccomp v2.5.5
                    "capget",
                    "capset",
                    "chdir",
                    "chmod",
                    "chown",
                    "chown32",
                    "clock_adjtime",
                    "clock_adjtime64",
                    "clock_getres",
                    "clock_getres_time64",
                    "clock_gettime",
                    "clock_gettime64",
                    "clock_nanosleep",
                    "clock_nanosleep_time64",
                    "close",
                    "close_range",
                    "connect",
                    "copy_file_range",
                    "creat",
                    "dup",
                    "dup2",
                    "dup3",
                    "epoll_create",
                    "epoll_create1",
                    "epoll_ctl",
                    "epoll_ctl_old",
                    "epoll_pwait",
                    "epoll_pwait2",
                    "epoll_wait",
                    "epoll_wait_old",
                    "eventfd",
                    "eventfd2",
                    "execve",
                    "execveat",
                    "exit",
                    "exit_group",
                    "faccessat",
                    "faccessat2",
                    "fadvise64",
                    "fadvise64_64",
                    "fallocate",
                    "fanotify_mark",
                    "fchdir",
                    "fchmod",
                    "fchmodat",
                    "fchmodat2", // kernel v6.6, libseccomp v2.5.5
                    "fchown",
                    "fchown32",
                    "fchownat",
                    "fcntl",
                    "fcntl64",
                    "fdatasync",
                    "fgetxattr",
                    "flistxattr",
                    "flock",
                    "fork",
                    "fremovexattr",
                    "fsetxattr",
                    "fstat",
                    "fstat64",
                    "fstatat64",
                    "fstatfs",
                    "fstatfs64",
                    "fsync",
                    "ftruncate",
                    "ftruncate64",
                    "futex",
                    "futex_requeue", // kernel v6.7, libseccomp v2.5.5
                    "futex_time64",
                    "futex_wait", // kernel v6.7, libseccomp v2.5.5
                    "futex_waitv",
                    "futex_wake", // kernel v6.7, libseccomp v2.5.5
                    "futimesat",
                    "getcpu",
                    "getcwd",
                    "getdents",
                    "getdents64",
                    "getegid",
                    "getegid32",
                    "geteuid",
                    "geteuid32",
                    "getgid",
                    "getgid32",
                    "getgroups",
                    "getgroups32",
                    "getitimer",
                    "getpeername",
                    "getpgid",
                    "getpgrp",
                    "getpid",
                    "getppid",
                    "getpriority",
                    "getrandom",
                    "getresgid",
                    "getresgid32",
                    "getresuid",
                    "getresuid32",
                    "getrlimit",
                    "get_robust_list",
                    "getrusage",
                    "getsid",
                    "getsockname",
                    "getsockopt",
                    "get_thread_area",
                    "gettid",
                    "gettimeofday",
                    "getuid",
                    "getuid32",
                    "getxattr",
                    "inotify_add_watch",
                    "inotify_init",
                    "inotify_init1",
                    "inotify_rm_watch",
                    "io_cancel",
                    "ioctl",
                    "io_destroy",
                    "io_getevents",
                    "io_pgetevents",
                    "io_pgetevents_time64",
                    "ioprio_get",
                    "ioprio_set",
                    "io_setup",
                    "io_submit",
                    "ipc",
                    "kill",
                    "landlock_add_rule",
                    "landlock_create_ruleset",
                    "landlock_restrict_self",
                    "lchown",
                    "lchown32",
                    "lgetxattr",
                    "link",
                    "linkat",
                    "listen",
                    "listxattr",
                    "llistxattr",
                    "_llseek",
                    "lremovexattr",
                    "lseek",
                    "lsetxattr",
                    "lstat",
                    "lstat64",
                    "madvise",
                    "map_shadow_stack", // kernel v6.6, libseccomp v2.5.5
                    "membarrier",
                    "memfd_create",
                    "memfd_secret",
                    "mincore",
                    "mkdir",
                    "mkdirat",
                    "mknod",
                    "mknodat",
                    "mlock",
                    "mlock2",
                    "mlockall",
                    "mmap",
                    "mmap2",
                    "mprotect",
                    "mq_getsetattr",
                    "mq_notify",
                    "mq_open",
                    "mq_timedreceive",
                    "mq_timedreceive_time64",
                    "mq_timedsend",
                    "mq_timedsend_time64",
                    "mq_unlink",
                    "mremap",
                    "msgctl",
                    "msgget",
                    "msgrcv",
                    "msgsnd",
                    "msync",
                    "munlock",
                    "munlockall",
                    "munmap",
                    "name_to_handle_at",
                    "nanosleep",
                    "newfstatat",
                    "_newselect",
                    "open",
                    "openat",
                    "openat2",
                    "pause",
                    "pidfd_open",
                    "pidfd_send_signal",
                    "pipe",
                    "pipe2",
                    "pkey_alloc",
                    "pkey_free",
                    "pkey_mprotect",
                    "poll",
                    "ppoll",
                    "ppoll_time64",
                    "prctl",
                    "pread64",
                    "preadv",
                    "preadv2",
                    "prlimit64",
                    "process_mrelease",
                    "pselect6",
                    "pselect6_time64",
                    "pwrite64",
                    "pwritev",
                    "pwritev2",
                    "read",
                    "readahead",
                    "readlink",
                    "readlinkat",
                    "readv",
                    "recv",
                    "recvfrom",
                    "recvmmsg",
                    "recvmmsg_time64",
                    "recvmsg",
                    "remap_file_pages",
                    "removexattr",
                    "rename",
                    "renameat",
                    "renameat2",
                    "restart_syscall",
                    "rmdir",
                    "rseq",
                    "rt_sigaction",
                    "rt_sigpending",
                    "rt_sigprocmask",
                    "rt_sigqueueinfo",
                    "rt_sigreturn",
                    "rt_sigsuspend",
                    "rt_sigtimedwait",
                    "rt_sigtimedwait_time64",
                    "rt_tgsigqueueinfo",
                    "sched_getaffinity",
                    "sched_getattr",
                    "sched_getparam",
                    "sched_get_priority_max",
                    "sched_get_priority_min",
                    "sched_getscheduler",
                    "sched_rr_get_interval",
                    "sched_rr_get_interval_time64",
                    "sched_setaffinity",
                    "sched_setattr",
                    "sched_setparam",
                    "sched_setscheduler",
                    "sched_yield",
                    "seccomp",
                    "select",
                    "semctl",
                    "semget",
                    "semop",
                    "semtimedop",
                    "semtimedop_time64",
                    "send",
                    "sendfile",
                    "sendfile64",
                    "sendmmsg",
                    "sendmsg",
                    "sendto",
                    "setfsgid",
                    "setfsgid32",
                    "setfsuid",
                    "setfsuid32",
                    "setgid",
                    "setgid32",
                    "setgroups",
                    "setgroups32",
                    "setitimer",
                    "setpgid",
                    "setpriority",
                    "setregid",
                    "setregid32",
                    "setresgid",
                    "setresgid32",
                    "setresuid",
                    "setresuid32",
                    "setreuid",
                    "setreuid32",
                    "setrlimit",
                    "set_robust_list",
                    "setsid",
                    "setsockopt",
                    "set_thread_area",
                    "set_tid_address",
                    "setuid",
                    "setuid32",
                    "setxattr",
                    "shmat",
                    "shmctl",
                    "shmdt",
                    "shmget",
                    "shutdown",
                    "sigaltstack",
                    "signalfd",
                    "signalfd4",
                    "sigprocmask",
                    "sigreturn",
                    "socketcall",
                    "socketpair",
                    "splice",
                    "stat",
                    "stat64",
                    "statfs",
                    "statfs64",
                    "statx",
                    "symlink",
                    "symlinkat",
                    "sync",
                    "sync_file_range",
                    "syncfs",
                    "sysinfo",
                    "tee",
                    "tgkill",
                    "time",
                    "timer_create",
                    "timer_delete",
                    "timer_getoverrun",
                    "timer_gettime",
                    "timer_gettime64",
                    "timer_settime",
                    "timer_settime64",
                    "timerfd_create",
                    "timerfd_gettime",
                    "timerfd_gettime64",
                    "timerfd_settime",
                    "timerfd_settime64",
                    "times",
                    "tkill",
                    "truncate",
                    "truncate64",
                    "ugetrlimit",
                    "umask",
                    "uname",
                    "unlink",
                    "unlinkat",
                    "utime",
                    "utimensat",
                    "utimensat_time64",
                    "utimes",
                    "vfork",
                    "vmsplice",
                    "wait4",
                    "waitid",
                    "waitpid",
                    "write",
                    "writev",
                },
                Action: specs.ActAllow,
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "process_vm_readv",
                    "process_vm_writev",
                    "ptrace",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                MinKernel: &KernelVersion{4, 8},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names:  []string{"socket"},
                Action: specs.ActAllow,
                Args: []specs.LinuxSeccompArg{
                    {
                        Index: 0,
                        Value: unix.AF_VSOCK,
                        Op:    specs.OpNotEqual,
                    },
                },
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names:  []string{"personality"},
                Action: specs.ActAllow,
                Args: []specs.LinuxSeccompArg{
                    {
                        Index: 0,
                        Value: 0x0,
                        Op:    specs.OpEqualTo,
                    },
                },
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names:  []string{"personality"},
                Action: specs.ActAllow,
                Args: []specs.LinuxSeccompArg{
                    {
                        Index: 0,
                        Value: 0x0008,
                        Op:    specs.OpEqualTo,
                    },
                },
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names:  []string{"personality"},
                Action: specs.ActAllow,
                Args: []specs.LinuxSeccompArg{
                    {
                        Index: 0,
                        Value: 0x20000,
                        Op:    specs.OpEqualTo,
                    },
                },
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names:  []string{"personality"},
                Action: specs.ActAllow,
                Args: []specs.LinuxSeccompArg{
                    {
                        Index: 0,
                        Value: 0x20008,
                        Op:    specs.OpEqualTo,
                    },
                },
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names:  []string{"personality"},
                Action: specs.ActAllow,
                Args: []specs.LinuxSeccompArg{
                    {
                        Index: 0,
                        Value: 0xffffffff,
                        Op:    specs.OpEqualTo,
                    },
                },
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "sync_file_range2",
                    "swapcontext",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Arches: []string{"ppc64le"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "arm_fadvise64_64",
                    "arm_sync_file_range",
                    "sync_file_range2",
                    "breakpoint",
                    "cacheflush",
                    "set_tls",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Arches: []string{"arm", "arm64"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "arch_prctl",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Arches: []string{"amd64", "x32"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "modify_ldt",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Arches: []string{"amd64", "x32", "x86"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "s390_pci_mmio_read",
                    "s390_pci_mmio_write",
                    "s390_runtime_instr",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Arches: []string{"s390", "s390x"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "riscv_flush_icache",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Arches: []string{"riscv64"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "open_by_handle_at",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_DAC_READ_SEARCH"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "bpf",
                    "clone",
                    "clone3",
                    "fanotify_init",
                    "fsconfig",
                    "fsmount",
                    "fsopen",
                    "fspick",
                    "lookup_dcookie",
                    "mount",
                    "mount_setattr",
                    "move_mount",
                    "open_tree",
                    "perf_event_open",
                    "quotactl",
                    "quotactl_fd",
                    "setdomainname",
                    "sethostname",
                    "setns",
                    "syslog",
                    "umount",
                    "umount2",
                    "unshare",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_SYS_ADMIN"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "clone",
                },
                Action: specs.ActAllow,
                Args: []specs.LinuxSeccompArg{
                    {
                        Index:    0,
                        Value:    unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP,
                        ValueTwo: 0,
                        Op:       specs.OpMaskedEqual,
                    },
                },
            },
            Excludes: &Filter{
                Caps:   []string{"CAP_SYS_ADMIN"},
                Arches: []string{"s390", "s390x"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "clone",
                },
                Action: specs.ActAllow,
                Args: []specs.LinuxSeccompArg{
                    {
                        Index:    1,
                        Value:    unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP,
                        ValueTwo: 0,
                        Op:       specs.OpMaskedEqual,
                    },
                },
            },
            Comment: "s390 parameter ordering for clone is different",
            Includes: &Filter{
                Arches: []string{"s390", "s390x"},
            },
            Excludes: &Filter{
                Caps: []string{"CAP_SYS_ADMIN"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "clone3",
                },
                Action:   specs.ActErrno,
                ErrnoRet: &nosys,
            },
            Excludes: &Filter{
                Caps: []string{"CAP_SYS_ADMIN"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "reboot",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_SYS_BOOT"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "chroot",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_SYS_CHROOT"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "delete_module",
                    "init_module",
                    "finit_module",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_SYS_MODULE"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "acct",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_SYS_PACCT"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "kcmp",
                    "pidfd_getfd",
                    "process_madvise",
                    "process_vm_readv",
                    "process_vm_writev",
                    "ptrace",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_SYS_PTRACE"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "iopl",
                    "ioperm",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_SYS_RAWIO"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "settimeofday",
                    "stime",
                    "clock_settime",
                    "clock_settime64",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_SYS_TIME"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "vhangup",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_SYS_TTY_CONFIG"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "get_mempolicy",
                    "mbind",
                    "set_mempolicy",
                    "set_mempolicy_home_node", // kernel v5.17, libseccomp v2.5.4
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_SYS_NICE"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "syslog",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_SYSLOG"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "bpf",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_BPF"},
            },
        },
        {
            LinuxSyscall: specs.LinuxSyscall{
                Names: []string{
                    "perf_event_open",
                },
                Action: specs.ActAllow,
            },
            Includes: &Filter{
                Caps: []string{"CAP_PERFMON"},
            },
        },
    }

    errnoRet := uint(unix.EPERM)
    return &Seccomp{
        LinuxSeccomp: specs.LinuxSeccomp{
            DefaultAction:   specs.ActErrno,
            DefaultErrnoRet: &errnoRet,
        },
        ArchMap:  arches(),
        Syscalls: syscalls,
    }
}