const CLONE_VM … const CLONE_FS … const CLONE_FILES … const CLONE_SIGHAND … const CLONE_PIDFD … const CLONE_PTRACE … const CLONE_VFORK … const CLONE_PARENT … const CLONE_THREAD … const CLONE_NEWNS … const CLONE_SYSVSEM … const CLONE_SETTLS … const CLONE_PARENT_SETTID … const CLONE_CHILD_CLEARTID … const CLONE_DETACHED … const CLONE_UNTRACED … const CLONE_CHILD_SETTID … const CLONE_NEWCGROUP … const CLONE_NEWUTS … const CLONE_NEWIPC … const CLONE_NEWUSER … const CLONE_NEWPID … const CLONE_NEWNET … const CLONE_IO … const CLONE_CLEAR_SIGHAND … const CLONE_INTO_CGROUP … const CLONE_NEWTIME … type SysProcIDMap … type SysProcAttr … var none … var slash … var forceClone3 … // Implemented in runtime package. func runtime_BeforeFork() func runtime_AfterFork() func runtime_AfterForkInChild() // Fork, dup fd onto 0..len(fd), and exec(argv0, argvv, envv) in child. // If a dup or exec fails, write the errno error to pipe. // (Pipe is close-on-exec so if exec succeeds, it will be closed.) // In the child, this function must not acquire any locks, because // they might have been locked at the time of the fork. This means // no rescheduling, no malloc calls, and no new stack segments. // For the same reason compiler does not race instrument it. // The calls to RawSyscall are okay because they are assembly // functions that do not grow the stack. // //go:norace func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) { … } const _LINUX_CAPABILITY_VERSION_3 … type capHeader … type capData … type caps … // See CAP_TO_INDEX in linux/capability.h: func capToIndex(cap uintptr) uintptr { … } // See CAP_TO_MASK in linux/capability.h: func capToMask(cap uintptr) uint32 { … } type cloneArgs … // forkAndExecInChild1 implements the body of forkAndExecInChild up to // the parent's post-fork path. This is a separate function so we can // separate the child's and parent's stack frames if we're using // vfork. // // This is go:noinline because the point is to keep the stack frames // of this and forkAndExecInChild separate. // //go:noinline //go:norace //go:nocheckptr func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid uintptr, pidfd int32, err1 Errno, mapPipe [2]int, locked bool) { … } func formatIDMappings(idMap []SysProcIDMap) []byte { … } // writeIDMappings writes the user namespace User ID or Group ID mappings to the specified path. func writeIDMappings(path string, idMap []SysProcIDMap) error { … } // writeSetgroups writes to /proc/PID/setgroups "deny" if enable is false // and "allow" if enable is true. // This is needed since kernel 3.19, because you can't write gid_map without // disabling setgroups() system call. func writeSetgroups(pid int, enable bool) error { … } // writeUidGidMappings writes User ID and Group ID mappings for user namespaces // for a process and it is called from the parent process. func writeUidGidMappings(pid int, sys *SysProcAttr) error { … } // forkAndExecFailureCleanup cleans up after an exec failure. func forkAndExecFailureCleanup(attr *ProcAttr, sys *SysProcAttr) { … } // checkClonePidfd verifies that clone(CLONE_PIDFD) works by actually doing a // clone. // //go:linkname os_checkClonePidfd os.checkClonePidfd func os_checkClonePidfd() error { … } // doCheckClonePidfd implements the actual clone call of os_checkClonePidfd and // child execution. This is a separate function so we can separate the child's // and parent's stack frames if we're using vfork. // // This is go:noinline because the point is to keep the stack frames of this // and os_checkClonePidfd separate. // //go:noinline func doCheckClonePidfd(pidfd *int32) (pid uintptr, errno Errno) { … }