kubernetes/vendor/github.com/vishvananda/netns/netns_linux.go

package netns

import (
	"fmt"
	"os"
	"path"
	"path/filepath"
	"strconv"
	"strings"

	"golang.org/x/sys/unix"
)

// Deprecated: use golang.org/x/sys/unix pkg instead.
const (
	CLONE_NEWUTS  = unix.CLONE_NEWUTS  /* New utsname group? */
	CLONE_NEWIPC  = unix.CLONE_NEWIPC  /* New ipcs */
	CLONE_NEWUSER = unix.CLONE_NEWUSER /* New user namespace */
	CLONE_NEWPID  = unix.CLONE_NEWPID  /* New pid namespace */
	CLONE_NEWNET  = unix.CLONE_NEWNET  /* New network namespace */
	CLONE_IO      = unix.CLONE_IO      /* Get io context */
)

const bindMountPath = "/run/netns" /* Bind mount path for named netns */

// Setns sets namespace using golang.org/x/sys/unix.Setns.
//
// Deprecated: Use golang.org/x/sys/unix.Setns instead.
func Setns(ns NsHandle, nstype int) (err error) {
	return unix.Setns(int(ns), nstype)
}

// Set sets the current network namespace to the namespace represented
// by NsHandle.
func Set(ns NsHandle) (err error) {
	return unix.Setns(int(ns), unix.CLONE_NEWNET)
}

// New creates a new network namespace, sets it as current and returns
// a handle to it.
func New() (ns NsHandle, err error) {
	if err := unix.Unshare(unix.CLONE_NEWNET); err != nil {
		return -1, err
	}
	return Get()
}

// NewNamed creates a new named network namespace, sets it as current,
// and returns a handle to it
func NewNamed(name string) (NsHandle, error) {
	if _, err := os.Stat(bindMountPath); os.IsNotExist(err) {
		err = os.MkdirAll(bindMountPath, 0755)
		if err != nil {
			return None(), err
		}
	}

	newNs, err := New()
	if err != nil {
		return None(), err
	}

	namedPath := path.Join(bindMountPath, name)

	f, err := os.OpenFile(namedPath, os.O_CREATE|os.O_EXCL, 0444)
	if err != nil {
		newNs.Close()
		return None(), err
	}
	f.Close()

	nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
	err = unix.Mount(nsPath, namedPath, "bind", unix.MS_BIND, "")
	if err != nil {
		newNs.Close()
		return None(), err
	}

	return newNs, nil
}

// DeleteNamed deletes a named network namespace
func DeleteNamed(name string) error {
	namedPath := path.Join(bindMountPath, name)

	err := unix.Unmount(namedPath, unix.MNT_DETACH)
	if err != nil {
		return err
	}

	return os.Remove(namedPath)
}

// Get gets a handle to the current threads network namespace.
func Get() (NsHandle, error) {
	return GetFromThread(os.Getpid(), unix.Gettid())
}

// GetFromPath gets a handle to a network namespace
// identified by the path
func GetFromPath(path string) (NsHandle, error) {
	fd, err := unix.Open(path, unix.O_RDONLY|unix.O_CLOEXEC, 0)
	if err != nil {
		return -1, err
	}
	return NsHandle(fd), nil
}

// GetFromName gets a handle to a named network namespace such as one
// created by `ip netns add`.
func GetFromName(name string) (NsHandle, error) {
	return GetFromPath(filepath.Join(bindMountPath, name))
}

// GetFromPid gets a handle to the network namespace of a given pid.
func GetFromPid(pid int) (NsHandle, error) {
	return GetFromPath(fmt.Sprintf("/proc/%d/ns/net", pid))
}

// GetFromThread gets a handle to the network namespace of a given pid and tid.
func GetFromThread(pid, tid int) (NsHandle, error) {
	return GetFromPath(fmt.Sprintf("/proc/%d/task/%d/ns/net", pid, tid))
}

// GetFromDocker gets a handle to the network namespace of a docker container.
// Id is prefixed matched against the running docker containers, so a short
// identifier can be used as long as it isn't ambiguous.
func GetFromDocker(id string) (NsHandle, error) {
	pid, err := getPidForContainer(id)
	if err != nil {
		return -1, err
	}
	return GetFromPid(pid)
}

// borrowed from docker/utils/utils.go
func findCgroupMountpoint(cgroupType string) (int, string, error) {
	output, err := os.ReadFile("/proc/mounts")
	if err != nil {
		return -1, "", err
	}

	// /proc/mounts has 6 fields per line, one mount per line, e.g.
	// cgroup /sys/fs/cgroup/devices cgroup rw,relatime,devices 0 0
	for _, line := range strings.Split(string(output), "\n") {
		parts := strings.Split(line, " ")
		if len(parts) == 6 {
			switch parts[2] {
			case "cgroup2":
				return 2, parts[1], nil
			case "cgroup":
				for _, opt := range strings.Split(parts[3], ",") {
					if opt == cgroupType {
						return 1, parts[1], nil
					}
				}
			}
		}
	}

	return -1, "", fmt.Errorf("cgroup mountpoint not found for %s", cgroupType)
}

// Returns the relative path to the cgroup docker is running in.
// borrowed from docker/utils/utils.go
// modified to get the docker pid instead of using /proc/self
func getDockerCgroup(cgroupVer int, cgroupType string) (string, error) {
	dockerpid, err := os.ReadFile("/var/run/docker.pid")
	if err != nil {
		return "", err
	}
	result := strings.Split(string(dockerpid), "\n")
	if len(result) == 0 || len(result[0]) == 0 {
		return "", fmt.Errorf("docker pid not found in /var/run/docker.pid")
	}
	pid, err := strconv.Atoi(result[0])
	if err != nil {
		return "", err
	}
	output, err := os.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid))
	if err != nil {
		return "", err
	}
	for _, line := range strings.Split(string(output), "\n") {
		parts := strings.Split(line, ":")
		// any type used by docker should work
		if (cgroupVer == 1 && parts[1] == cgroupType) ||
			(cgroupVer == 2 && parts[1] == "") {
			return parts[2], nil
		}
	}
	return "", fmt.Errorf("cgroup '%s' not found in /proc/%d/cgroup", cgroupType, pid)
}

// Returns the first pid in a container.
// borrowed from docker/utils/utils.go
// modified to only return the first pid
// modified to glob with id
// modified to search for newer docker containers
// modified to look for cgroups v2
func getPidForContainer(id string) (int, error) {
	pid := 0

	// memory is chosen randomly, any cgroup used by docker works
	cgroupType := "memory"

	cgroupVer, cgroupRoot, err := findCgroupMountpoint(cgroupType)
	if err != nil {
		return pid, err
	}

	cgroupDocker, err := getDockerCgroup(cgroupVer, cgroupType)
	if err != nil {
		return pid, err
	}

	id += "*"

	var pidFile string
	if cgroupVer == 1 {
		pidFile = "tasks"
	} else if cgroupVer == 2 {
		pidFile = "cgroup.procs"
	} else {
		return -1, fmt.Errorf("Invalid cgroup version '%d'", cgroupVer)
	}

	attempts := []string{
		filepath.Join(cgroupRoot, cgroupDocker, id, pidFile),
		// With more recent lxc versions use, cgroup will be in lxc/
		filepath.Join(cgroupRoot, cgroupDocker, "lxc", id, pidFile),
		// With more recent docker, cgroup will be in docker/
		filepath.Join(cgroupRoot, cgroupDocker, "docker", id, pidFile),
		// Even more recent docker versions under systemd use docker-<id>.scope/
		filepath.Join(cgroupRoot, "system.slice", "docker-"+id+".scope", pidFile),
		// Even more recent docker versions under cgroup/systemd/docker/<id>/
		filepath.Join(cgroupRoot, "..", "systemd", "docker", id, pidFile),
		// Kubernetes with docker and CNI is even more different. Works for BestEffort and Burstable QoS
		filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "*", "pod*", id, pidFile),
		// Same as above but for Guaranteed QoS
		filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "pod*", id, pidFile),
		// Another flavor of containers location in recent kubernetes 1.11+. Works for BestEffort and Burstable QoS
		filepath.Join(cgroupRoot, cgroupDocker, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", pidFile),
		// Same as above but for Guaranteed QoS
		filepath.Join(cgroupRoot, cgroupDocker, "kubepods.slice", "*", "docker-"+id+".scope", pidFile),
		// When runs inside of a container with recent kubernetes 1.11+. Works for BestEffort and Burstable QoS
		filepath.Join(cgroupRoot, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", pidFile),
		// Same as above but for Guaranteed QoS
		filepath.Join(cgroupRoot, "kubepods.slice", "*", "docker-"+id+".scope", pidFile),
	}

	var filename string
	for _, attempt := range attempts {
		filenames, _ := filepath.Glob(attempt)
		if len(filenames) > 1 {
			return pid, fmt.Errorf("Ambiguous id supplied: %v", filenames)
		} else if len(filenames) == 1 {
			filename = filenames[0]
			break
		}
	}

	if filename == "" {
		return pid, fmt.Errorf("Unable to find container: %v", id[:len(id)-1])
	}

	output, err := os.ReadFile(filename)
	if err != nil {
		return pid, err
	}

	result := strings.Split(string(output), "\n")
	if len(result) == 0 || len(result[0]) == 0 {
		return pid, fmt.Errorf("No pid found for container")
	}

	pid, err = strconv.Atoi(result[0])
	if err != nil {
		return pid, fmt.Errorf("Invalid pid '%s': %s", result[0], err)
	}

	return pid, nil
}