//go:build !windows
// +build !windows
package utils
import (
"errors"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
_ "unsafe" // for go:linkname
"github.com/opencontainers/runc/libcontainer/system"
securejoin "github.com/cyphar/filepath-securejoin"
"golang.org/x/sys/unix"
)
// EnsureProcHandle returns whether or not the given file handle is on procfs.
func EnsureProcHandle(fh *os.File) error {
var buf unix.Statfs_t
if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err)
}
if buf.Type != unix.PROC_SUPER_MAGIC {
return fmt.Errorf("%s is not on procfs", fh.Name())
}
return nil
}
type fdFunc func(fd int)
// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
// the current process.
func fdRangeFrom(minFd int, fn fdFunc) error {
fdDir, err := os.Open("/proc/self/fd")
if err != nil {
return err
}
defer fdDir.Close()
if err := EnsureProcHandle(fdDir); err != nil {
return err
}
fdList, err := fdDir.Readdirnames(-1)
if err != nil {
return err
}
for _, fdStr := range fdList {
fd, err := strconv.Atoi(fdStr)
// Ignore non-numeric file names.
if err != nil {
continue
}
// Ignore descriptors lower than our specified minimum.
if fd < minFd {
continue
}
// Ignore the file descriptor we used for readdir, as it will be closed
// when we return.
if uintptr(fd) == fdDir.Fd() {
continue
}
// Run the closure.
fn(fd)
}
return nil
}
// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
// equal to minFd in the current process.
func CloseExecFrom(minFd int) error {
return fdRangeFrom(minFd, unix.CloseOnExec)
}
//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor
// In order to make sure we do not close the internal epoll descriptors the Go
// runtime uses, we need to ensure that we skip descriptors that match
// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing,
// unfortunately there's no other way to be sure we're only keeping the file
// descriptors the Go runtime needs. Hopefully nothing blows up doing this...
func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive
// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the
// current process, except for those critical to Go's runtime (such as the
// netpoll management descriptors).
//
// NOTE: That this function is incredibly dangerous to use in most Go code, as
// closing file descriptors from underneath *os.File handles can lead to very
// bad behaviour (the closed file descriptor can be re-used and then any
// *os.File operations would apply to the wrong file). This function is only
// intended to be called from the last stage of runc init.
func UnsafeCloseFrom(minFd int) error {
// We must not close some file descriptors.
return fdRangeFrom(minFd, func(fd int) {
if runtime_IsPollDescriptor(uintptr(fd)) {
// These are the Go runtimes internal netpoll file descriptors.
// These file descriptors are operated on deep in the Go scheduler,
// and closing those files from underneath Go can result in panics.
// There is no issue with keeping them because they are not
// executable and are not useful to an attacker anyway. Also we
// don't have any choice.
return
}
// There's nothing we can do about errors from close(2), and the
// only likely error to be seen is EBADF which indicates the fd was
// already closed (in which case, we got what we wanted).
_ = unix.Close(fd)
})
}
// NewSockPair returns a new unix socket pair
func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
}
// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"),
// but properly handling the case where path or root are "/".
//
// NOTE: The return value only make sense if the path doesn't contain "..".
func IsLexicallyInRoot(root, path string) bool {
if root != "/" {
root += "/"
}
if path != "/" {
path += "/"
}
return strings.HasPrefix(path, root)
}
// MkdirAllInRootOpen attempts to make
//
// path, _ := securejoin.SecureJoin(root, unsafePath)
// os.MkdirAll(path, mode)
// os.Open(path)
//
// safer against attacks where components in the path are changed between
// SecureJoin returning and MkdirAll (or Open) being called. In particular, we
// try to detect any symlink components in the path while we are doing the
// MkdirAll.
//
// NOTE: Unlike os.MkdirAll, mode is not Go's os.FileMode, it is the unix mode
// (the suid/sgid/sticky bits are not the same as for os.FileMode).
//
// NOTE: If unsafePath is a subpath of root, we assume that you have already
// called SecureJoin and so we use the provided path verbatim without resolving
// any symlinks (this is done in a way that avoids symlink-exchange races).
// This means that the path also must not contain ".." elements, otherwise an
// error will occur.
//
// This is a somewhat less safe alternative to
// <https://github.com/cyphar/filepath-securejoin/pull/13>, but it should
// detect attempts to trick us into creating directories outside of the root.
// We should migrate to securejoin.MkdirAll once it is merged.
func MkdirAllInRootOpen(root, unsafePath string, mode uint32) (_ *os.File, Err error) {
// If the path is already "within" the root, use it verbatim.
fullPath := unsafePath
if !IsLexicallyInRoot(root, unsafePath) {
var err error
fullPath, err = securejoin.SecureJoin(root, unsafePath)
if err != nil {
return nil, err
}
}
subPath, err := filepath.Rel(root, fullPath)
if err != nil {
return nil, err
}
// Check for any silly mode bits.
if mode&^0o7777 != 0 {
return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode)
}
currentDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("open root handle: %w", err)
}
defer func() {
if Err != nil {
currentDir.Close()
}
}()
for _, part := range strings.Split(subPath, string(filepath.Separator)) {
switch part {
case "", ".":
// Skip over no-op components.
continue
case "..":
return nil, fmt.Errorf("possible breakout detected: found %q component in SecureJoin subpath %s", part, subPath)
}
nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
switch {
case err == nil:
// Update the currentDir.
_ = currentDir.Close()
currentDir = nextDir
case errors.Is(err, unix.ENOTDIR):
// This might be a symlink or some other random file. Either way,
// error out.
return nil, fmt.Errorf("cannot mkdir in %s/%s: %w", currentDir.Name(), part, unix.ENOTDIR)
case errors.Is(err, os.ErrNotExist):
// Luckily, mkdirat will not follow trailing symlinks, so this is
// safe to do as-is.
if err := system.Mkdirat(currentDir, part, mode); err != nil {
return nil, err
}
// Open the new directory. There is a race here where an attacker
// could swap the directory with a different directory, but
// MkdirAll's fuzzy semantics mean we don't care about that.
nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("open newly created directory: %w", err)
}
// Update the currentDir.
_ = currentDir.Close()
currentDir = nextDir
default:
return nil, err
}
}
return currentDir, nil
}
// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the
// returned handle, for callers that don't need to use it.
func MkdirAllInRoot(root, unsafePath string, mode uint32) error {
f, err := MkdirAllInRootOpen(root, unsafePath, mode)
if err == nil {
_ = f.Close()
}
return err
}