kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go

//go:build linux
// +build linux

/*
Copyright 2018 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package kuberuntime

import (
	"context"
	"fmt"
	"math"
	"os"
	"reflect"
	"strconv"
	"testing"

	"k8s.io/kubernetes/pkg/kubelet/cm"
	"k8s.io/kubernetes/pkg/kubelet/types"

	"github.com/google/go-cmp/cmp"
	libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
	"github.com/stretchr/testify/assert"
	v1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/resource"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	utilfeature "k8s.io/apiserver/pkg/util/feature"
	featuregatetesting "k8s.io/component-base/featuregate/testing"
	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
	"k8s.io/kubernetes/pkg/apis/scheduling"
	"k8s.io/kubernetes/pkg/features"
	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
	"k8s.io/utils/ptr"
)

func makeExpectedConfig(m *kubeGenericRuntimeManager, pod *v1.Pod, containerIndex int, enforceMemoryQoS bool) *runtimeapi.ContainerConfig {
	ctx := context.Background()
	container := &pod.Spec.Containers[containerIndex]
	podIP := ""
	restartCount := 0
	opts, _, _ := m.runtimeHelper.GenerateRunContainerOptions(ctx, pod, container, podIP, []string{podIP}, nil)
	containerLogsPath := buildContainerLogsPath(container.Name, restartCount)
	restartCountUint32 := uint32(restartCount)
	envs := make([]*runtimeapi.KeyValue, len(opts.Envs))

	l, _ := m.generateLinuxContainerConfig(container, pod, new(int64), "", nil, enforceMemoryQoS)

	expectedConfig := &runtimeapi.ContainerConfig{
		Metadata: &runtimeapi.ContainerMetadata{
			Name:    container.Name,
			Attempt: restartCountUint32,
		},
		Image:       &runtimeapi.ImageSpec{Image: container.Image, UserSpecifiedImage: container.Image},
		Command:     container.Command,
		Args:        []string(nil),
		WorkingDir:  container.WorkingDir,
		Labels:      newContainerLabels(container, pod),
		Annotations: newContainerAnnotations(container, pod, restartCount, opts),
		Devices:     makeDevices(opts),
		Mounts:      m.makeMounts(opts, container),
		LogPath:     containerLogsPath,
		Stdin:       container.Stdin,
		StdinOnce:   container.StdinOnce,
		Tty:         container.TTY,
		Linux:       l,
		Envs:        envs,
		CDIDevices:  makeCDIDevices(opts),
	}
	return expectedConfig
}

func TestGenerateContainerConfig(t *testing.T) {
	ctx := context.Background()
	_, imageService, m, err := createTestRuntimeManager()
	assert.NoError(t, err)

	runAsUser := int64(1000)
	runAsGroup := int64(2000)
	pod := &v1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			UID:       "12345678",
			Name:      "bar",
			Namespace: "new",
		},
		Spec: v1.PodSpec{
			Containers: []v1.Container{
				{
					Name:            "foo",
					Image:           "busybox",
					ImagePullPolicy: v1.PullIfNotPresent,
					Command:         []string{"testCommand"},
					WorkingDir:      "testWorkingDir",
					SecurityContext: &v1.SecurityContext{
						RunAsUser:  &runAsUser,
						RunAsGroup: &runAsGroup,
					},
				},
			},
		},
	}

	expectedConfig := makeExpectedConfig(m, pod, 0, false)
	containerConfig, _, err := m.generateContainerConfig(ctx, &pod.Spec.Containers[0], pod, 0, "", pod.Spec.Containers[0].Image, []string{}, nil, nil)
	assert.NoError(t, err)
	assert.Equal(t, expectedConfig, containerConfig, "generate container config for kubelet runtime v1.")
	assert.Equal(t, runAsUser, containerConfig.GetLinux().GetSecurityContext().GetRunAsUser().GetValue(), "RunAsUser should be set")
	assert.Equal(t, runAsGroup, containerConfig.GetLinux().GetSecurityContext().GetRunAsGroup().GetValue(), "RunAsGroup should be set")

	runAsRoot := int64(0)
	runAsNonRootTrue := true
	podWithContainerSecurityContext := &v1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			UID:       "12345678",
			Name:      "bar",
			Namespace: "new",
		},
		Spec: v1.PodSpec{
			Containers: []v1.Container{
				{
					Name:            "foo",
					Image:           "busybox",
					ImagePullPolicy: v1.PullIfNotPresent,
					Command:         []string{"testCommand"},
					WorkingDir:      "testWorkingDir",
					SecurityContext: &v1.SecurityContext{
						RunAsNonRoot: &runAsNonRootTrue,
						RunAsUser:    &runAsRoot,
					},
				},
			},
		},
	}

	_, _, err = m.generateContainerConfig(ctx, &podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image, []string{}, nil, nil)
	assert.Error(t, err)

	imageID, _ := imageService.PullImage(ctx, &runtimeapi.ImageSpec{Image: "busybox"}, nil, nil)
	resp, _ := imageService.ImageStatus(ctx, &runtimeapi.ImageSpec{Image: imageID}, false)

	resp.Image.Uid = nil
	resp.Image.Username = "test"

	podWithContainerSecurityContext.Spec.Containers[0].SecurityContext.RunAsUser = nil
	podWithContainerSecurityContext.Spec.Containers[0].SecurityContext.RunAsNonRoot = &runAsNonRootTrue

	_, _, err = m.generateContainerConfig(ctx, &podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image, []string{}, nil, nil)
	assert.Error(t, err, "RunAsNonRoot should fail for non-numeric username")
}

func TestGenerateLinuxContainerConfigResources(t *testing.T) {
	_, _, m, err := createTestRuntimeManager()
	m.cpuCFSQuota = true

	assert.NoError(t, err)

	tests := []struct {
		name         string
		podResources v1.ResourceRequirements
		expected     *runtimeapi.LinuxContainerResources
	}{
		{
			name: "Request 128M/1C, Limit 256M/3C",
			podResources: v1.ResourceRequirements{
				Requests: v1.ResourceList{
					v1.ResourceMemory: resource.MustParse("128Mi"),
					v1.ResourceCPU:    resource.MustParse("1"),
				},
				Limits: v1.ResourceList{
					v1.ResourceMemory: resource.MustParse("256Mi"),
					v1.ResourceCPU:    resource.MustParse("3"),
				},
			},
			expected: &runtimeapi.LinuxContainerResources{
				CpuPeriod:          100000,
				CpuQuota:           300000,
				CpuShares:          1024,
				MemoryLimitInBytes: 256 * 1024 * 1024,
			},
		},
		{
			name: "Request 128M/2C, No Limit",
			podResources: v1.ResourceRequirements{
				Requests: v1.ResourceList{
					v1.ResourceMemory: resource.MustParse("128Mi"),
					v1.ResourceCPU:    resource.MustParse("2"),
				},
			},
			expected: &runtimeapi.LinuxContainerResources{
				CpuPeriod:          100000,
				CpuQuota:           0,
				CpuShares:          2048,
				MemoryLimitInBytes: 0,
			},
		},
	}

	for _, test := range tests {
		pod := &v1.Pod{
			ObjectMeta: metav1.ObjectMeta{
				UID:       "12345678",
				Name:      "bar",
				Namespace: "new",
			},
			Spec: v1.PodSpec{
				Containers: []v1.Container{
					{
						Name:            "foo",
						Image:           "busybox",
						ImagePullPolicy: v1.PullIfNotPresent,
						Command:         []string{"testCommand"},
						WorkingDir:      "testWorkingDir",
						Resources:       test.podResources,
					},
				},
			},
		}

		linuxConfig, err := m.generateLinuxContainerConfig(&pod.Spec.Containers[0], pod, new(int64), "", nil, false)
		assert.NoError(t, err)
		assert.Equal(t, test.expected.CpuPeriod, linuxConfig.GetResources().CpuPeriod, test.name)
		assert.Equal(t, test.expected.CpuQuota, linuxConfig.GetResources().CpuQuota, test.name)
		assert.Equal(t, test.expected.CpuShares, linuxConfig.GetResources().CpuShares, test.name)
		assert.Equal(t, test.expected.MemoryLimitInBytes, linuxConfig.GetResources().MemoryLimitInBytes, test.name)
	}
}

func TestCalculateLinuxResources(t *testing.T) {
	_, _, m, err := createTestRuntimeManager()
	m.cpuCFSQuota = true

	assert.NoError(t, err)

	generateResourceQuantity := func(str string) *resource.Quantity {
		quantity := resource.MustParse(str)
		return &quantity
	}

	tests := []struct {
		name          string
		cpuReq        *resource.Quantity
		cpuLim        *resource.Quantity
		memLim        *resource.Quantity
		expected      *runtimeapi.LinuxContainerResources
		cgroupVersion CgroupVersion
	}{
		{
			name:   "Request128MBLimit256MB",
			cpuReq: generateResourceQuantity("1"),
			cpuLim: generateResourceQuantity("2"),
			memLim: generateResourceQuantity("128Mi"),
			expected: &runtimeapi.LinuxContainerResources{
				CpuPeriod:          100000,
				CpuQuota:           200000,
				CpuShares:          1024,
				MemoryLimitInBytes: 134217728,
			},
			cgroupVersion: cgroupV1,
		},
		{
			name:   "RequestNoMemory",
			cpuReq: generateResourceQuantity("2"),
			cpuLim: generateResourceQuantity("8"),
			memLim: generateResourceQuantity("0"),
			expected: &runtimeapi.LinuxContainerResources{
				CpuPeriod:          100000,
				CpuQuota:           800000,
				CpuShares:          2048,
				MemoryLimitInBytes: 0,
			},
			cgroupVersion: cgroupV1,
		},
		{
			name:   "RequestNilCPU",
			cpuLim: generateResourceQuantity("2"),
			memLim: generateResourceQuantity("0"),
			expected: &runtimeapi.LinuxContainerResources{
				CpuPeriod:          100000,
				CpuQuota:           200000,
				CpuShares:          2048,
				MemoryLimitInBytes: 0,
			},
			cgroupVersion: cgroupV1,
		},
		{
			name:   "RequestZeroCPU",
			cpuReq: generateResourceQuantity("0"),
			cpuLim: generateResourceQuantity("2"),
			memLim: generateResourceQuantity("0"),
			expected: &runtimeapi.LinuxContainerResources{
				CpuPeriod:          100000,
				CpuQuota:           200000,
				CpuShares:          2,
				MemoryLimitInBytes: 0,
			},
			cgroupVersion: cgroupV1,
		},
		{
			name:   "Request128MBLimit256MB",
			cpuReq: generateResourceQuantity("1"),
			cpuLim: generateResourceQuantity("2"),
			memLim: generateResourceQuantity("128Mi"),
			expected: &runtimeapi.LinuxContainerResources{
				CpuPeriod:          100000,
				CpuQuota:           200000,
				CpuShares:          1024,
				MemoryLimitInBytes: 134217728,
				Unified:            map[string]string{"memory.oom.group": "1"},
			},
			cgroupVersion: cgroupV2,
		},
		{
			name:   "RequestNoMemory",
			cpuReq: generateResourceQuantity("2"),
			cpuLim: generateResourceQuantity("8"),
			memLim: generateResourceQuantity("0"),
			expected: &runtimeapi.LinuxContainerResources{
				CpuPeriod:          100000,
				CpuQuota:           800000,
				CpuShares:          2048,
				MemoryLimitInBytes: 0,
				Unified:            map[string]string{"memory.oom.group": "1"},
			},
			cgroupVersion: cgroupV2,
		},
		{
			name:   "RequestNilCPU",
			cpuLim: generateResourceQuantity("2"),
			memLim: generateResourceQuantity("0"),
			expected: &runtimeapi.LinuxContainerResources{
				CpuPeriod:          100000,
				CpuQuota:           200000,
				CpuShares:          2048,
				MemoryLimitInBytes: 0,
				Unified:            map[string]string{"memory.oom.group": "1"},
			},
			cgroupVersion: cgroupV2,
		},
		{
			name:   "RequestZeroCPU",
			cpuReq: generateResourceQuantity("0"),
			cpuLim: generateResourceQuantity("2"),
			memLim: generateResourceQuantity("0"),
			expected: &runtimeapi.LinuxContainerResources{
				CpuPeriod:          100000,
				CpuQuota:           200000,
				CpuShares:          2,
				MemoryLimitInBytes: 0,
				Unified:            map[string]string{"memory.oom.group": "1"},
			},
			cgroupVersion: cgroupV2,
		},
	}
	for _, test := range tests {
		setCgroupVersionDuringTest(test.cgroupVersion)
		linuxContainerResources := m.calculateLinuxResources(test.cpuReq, test.cpuLim, test.memLim)
		assert.Equal(t, test.expected, linuxContainerResources)
	}
}

func TestGenerateContainerConfigWithMemoryQoSEnforced(t *testing.T) {
	_, _, m, err := createTestRuntimeManager()
	assert.NoError(t, err)

	podRequestMemory := resource.MustParse("128Mi")
	pod1LimitMemory := resource.MustParse("256Mi")
	pod1 := &v1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			UID:       "12345678",
			Name:      "bar",
			Namespace: "new",
		},
		Spec: v1.PodSpec{
			Containers: []v1.Container{
				{
					Name:            "foo",
					Image:           "busybox",
					ImagePullPolicy: v1.PullIfNotPresent,
					Command:         []string{"testCommand"},
					WorkingDir:      "testWorkingDir",
					Resources: v1.ResourceRequirements{
						Requests: v1.ResourceList{
							v1.ResourceMemory: podRequestMemory,
						},
						Limits: v1.ResourceList{
							v1.ResourceMemory: pod1LimitMemory,
						},
					},
				},
			},
		},
	}

	pod2 := &v1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			UID:       "12345678",
			Name:      "bar",
			Namespace: "new",
		},
		Spec: v1.PodSpec{
			Containers: []v1.Container{
				{
					Name:            "foo",
					Image:           "busybox",
					ImagePullPolicy: v1.PullIfNotPresent,
					Command:         []string{"testCommand"},
					WorkingDir:      "testWorkingDir",
					Resources: v1.ResourceRequirements{
						Requests: v1.ResourceList{
							v1.ResourceMemory: podRequestMemory,
						},
					},
				},
			},
		},
	}
	pageSize := int64(os.Getpagesize())
	memoryNodeAllocatable := resource.MustParse(fakeNodeAllocatableMemory)
	pod1MemoryHigh := int64(math.Floor(
		float64(podRequestMemory.Value())+
			(float64(pod1LimitMemory.Value())-float64(podRequestMemory.Value()))*float64(m.memoryThrottlingFactor))/float64(pageSize)) * pageSize
	pod2MemoryHigh := int64(math.Floor(
		float64(podRequestMemory.Value())+
			(float64(memoryNodeAllocatable.Value())-float64(podRequestMemory.Value()))*float64(m.memoryThrottlingFactor))/float64(pageSize)) * pageSize

	type expectedResult struct {
		containerConfig *runtimeapi.LinuxContainerConfig
		memoryLow       int64
		memoryHigh      int64
	}
	l1, _ := m.generateLinuxContainerConfig(&pod1.Spec.Containers[0], pod1, new(int64), "", nil, true)
	l2, _ := m.generateLinuxContainerConfig(&pod2.Spec.Containers[0], pod2, new(int64), "", nil, true)
	tests := []struct {
		name     string
		pod      *v1.Pod
		expected *expectedResult
	}{
		{
			name: "Request128MBLimit256MB",
			pod:  pod1,
			expected: &expectedResult{
				l1,
				128 * 1024 * 1024,
				int64(pod1MemoryHigh),
			},
		},
		{
			name: "Request128MBWithoutLimit",
			pod:  pod2,
			expected: &expectedResult{
				l2,
				128 * 1024 * 1024,
				int64(pod2MemoryHigh),
			},
		},
	}

	for _, test := range tests {
		linuxConfig, err := m.generateLinuxContainerConfig(&test.pod.Spec.Containers[0], test.pod, new(int64), "", nil, true)
		assert.NoError(t, err)
		assert.Equal(t, test.expected.containerConfig, linuxConfig, test.name)
		assert.Equal(t, linuxConfig.GetResources().GetUnified()["memory.min"], strconv.FormatInt(test.expected.memoryLow, 10), test.name)
		assert.Equal(t, linuxConfig.GetResources().GetUnified()["memory.high"], strconv.FormatInt(test.expected.memoryHigh, 10), test.name)
	}
}

func TestGetHugepageLimitsFromResources(t *testing.T) {
	var baseHugepage []*runtimeapi.HugepageLimit

	// For each page size, limit to 0.
	for _, pageSize := range libcontainercgroups.HugePageSizes() {
		baseHugepage = append(baseHugepage, &runtimeapi.HugepageLimit{
			PageSize: pageSize,
			Limit:    uint64(0),
		})
	}

	tests := []struct {
		name      string
		resources v1.ResourceRequirements
		expected  []*runtimeapi.HugepageLimit
	}{
		{
			name: "Success2MB",
			resources: v1.ResourceRequirements{
				Limits: v1.ResourceList{
					"hugepages-2Mi": resource.MustParse("2Mi"),
				},
			},
			expected: []*runtimeapi.HugepageLimit{
				{
					PageSize: "2MB",
					Limit:    2097152,
				},
			},
		},
		{
			name: "Success1GB",
			resources: v1.ResourceRequirements{
				Limits: v1.ResourceList{
					"hugepages-1Gi": resource.MustParse("2Gi"),
				},
			},
			expected: []*runtimeapi.HugepageLimit{
				{
					PageSize: "1GB",
					Limit:    2147483648,
				},
			},
		},
		{
			name: "Skip2MB",
			resources: v1.ResourceRequirements{
				Limits: v1.ResourceList{
					"hugepages-2MB": resource.MustParse("2Mi"),
				},
			},
			expected: []*runtimeapi.HugepageLimit{
				{
					PageSize: "2MB",
					Limit:    0,
				},
			},
		},
		{
			name: "Skip1GB",
			resources: v1.ResourceRequirements{
				Limits: v1.ResourceList{
					"hugepages-1GB": resource.MustParse("2Gi"),
				},
			},
			expected: []*runtimeapi.HugepageLimit{
				{
					PageSize: "1GB",
					Limit:    0,
				},
			},
		},
		{
			name: "Success2MBand1GB",
			resources: v1.ResourceRequirements{
				Limits: v1.ResourceList{
					v1.ResourceName(v1.ResourceCPU): resource.MustParse("0"),
					"hugepages-2Mi":                 resource.MustParse("2Mi"),
					"hugepages-1Gi":                 resource.MustParse("2Gi"),
				},
			},
			expected: []*runtimeapi.HugepageLimit{
				{
					PageSize: "2MB",
					Limit:    2097152,
				},
				{
					PageSize: "1GB",
					Limit:    2147483648,
				},
			},
		},
		{
			name: "Skip2MBand1GB",
			resources: v1.ResourceRequirements{
				Limits: v1.ResourceList{
					v1.ResourceName(v1.ResourceCPU): resource.MustParse("0"),
					"hugepages-2MB":                 resource.MustParse("2Mi"),
					"hugepages-1GB":                 resource.MustParse("2Gi"),
				},
			},
			expected: []*runtimeapi.HugepageLimit{
				{
					PageSize: "2MB",
					Limit:    0,
				},
				{
					PageSize: "1GB",
					Limit:    0,
				},
			},
		},
	}

	for _, test := range tests {
		// Validate if machine supports hugepage size that used in test case.
		machineHugepageSupport := true
		for _, hugepageLimit := range test.expected {
			hugepageSupport := false
			for _, pageSize := range libcontainercgroups.HugePageSizes() {
				if pageSize == hugepageLimit.PageSize {
					hugepageSupport = true
					break
				}
			}

			if !hugepageSupport {
				machineHugepageSupport = false
				break
			}
		}

		// Case of machine can't support hugepage size
		if !machineHugepageSupport {
			continue
		}

		expectedHugepages := baseHugepage
		for _, hugepage := range test.expected {
			for _, expectedHugepage := range expectedHugepages {
				if expectedHugepage.PageSize == hugepage.PageSize {
					expectedHugepage.Limit = hugepage.Limit
				}
			}
		}

		results := GetHugepageLimitsFromResources(test.resources)
		if !reflect.DeepEqual(expectedHugepages, results) {
			t.Errorf("%s test failed. Expected %v but got %v", test.name, expectedHugepages, results)
		}

		for _, hugepage := range baseHugepage {
			hugepage.Limit = uint64(0)
		}
	}
}

func TestGenerateLinuxContainerConfigNamespaces(t *testing.T) {
	_, _, m, err := createTestRuntimeManager()
	if err != nil {
		t.Fatalf("error creating test RuntimeManager: %v", err)
	}

	for _, tc := range []struct {
		name   string
		pod    *v1.Pod
		target *kubecontainer.ContainerID
		want   *runtimeapi.NamespaceOption
	}{
		{
			"Default namespaces",
			&v1.Pod{
				Spec: v1.PodSpec{
					Containers: []v1.Container{
						{Name: "test"},
					},
				},
			},
			nil,
			&runtimeapi.NamespaceOption{
				Pid: runtimeapi.NamespaceMode_CONTAINER,
			},
		},
		{
			"PID Namespace POD",
			&v1.Pod{
				Spec: v1.PodSpec{
					Containers: []v1.Container{
						{Name: "test"},
					},
					ShareProcessNamespace: &[]bool{true}[0],
				},
			},
			nil,
			&runtimeapi.NamespaceOption{
				Pid: runtimeapi.NamespaceMode_POD,
			},
		},
		{
			"PID Namespace TARGET",
			&v1.Pod{
				Spec: v1.PodSpec{
					Containers: []v1.Container{
						{Name: "test"},
					},
				},
			},
			&kubecontainer.ContainerID{Type: "docker", ID: "really-long-id-string"},
			&runtimeapi.NamespaceOption{
				Pid:      runtimeapi.NamespaceMode_TARGET,
				TargetId: "really-long-id-string",
			},
		},
	} {
		t.Run(tc.name, func(t *testing.T) {
			got, err := m.generateLinuxContainerConfig(&tc.pod.Spec.Containers[0], tc.pod, nil, "", tc.target, false)
			assert.NoError(t, err)
			if diff := cmp.Diff(tc.want, got.SecurityContext.NamespaceOptions); diff != "" {
				t.Errorf("%v: diff (-want +got):\n%v", t.Name(), diff)
			}
		})
	}
}

var (
	supplementalGroupsPolicyUnSupported = v1.SupplementalGroupsPolicy("UnSupported")
	supplementalGroupsPolicyMerge       = v1.SupplementalGroupsPolicyMerge
	supplementalGroupsPolicyStrict      = v1.SupplementalGroupsPolicyStrict
)

func TestGenerateLinuxConfigSupplementalGroupsPolicy(t *testing.T) {
	_, _, m, err := createTestRuntimeManager()
	if err != nil {
		t.Fatalf("error creating test RuntimeManager: %v", err)
	}

	containerName := "test"
	for _, tc := range []struct {
		name           string
		pod            *v1.Pod
		expected       runtimeapi.SupplementalGroupsPolicy
		expectErr      bool
		expectedErrMsg string
	}{{
		name: "Merge SupplementalGroupsPolicy should convert to Merge",
		pod: &v1.Pod{
			Spec: v1.PodSpec{
				SecurityContext: &v1.PodSecurityContext{
					SupplementalGroupsPolicy: &supplementalGroupsPolicyMerge,
				},
				Containers: []v1.Container{
					{Name: containerName},
				},
			},
		},
		expected: runtimeapi.SupplementalGroupsPolicy_Merge,
	}, {
		name: "Strict SupplementalGroupsPolicy should convert to Strict",
		pod: &v1.Pod{
			Spec: v1.PodSpec{
				SecurityContext: &v1.PodSecurityContext{
					SupplementalGroupsPolicy: &supplementalGroupsPolicyStrict,
				},
				Containers: []v1.Container{
					{Name: containerName},
				},
			},
		},
		expected: runtimeapi.SupplementalGroupsPolicy_Strict,
	}, {
		name: "nil SupplementalGroupsPolicy should convert to Merge",
		pod: &v1.Pod{
			Spec: v1.PodSpec{
				SecurityContext: &v1.PodSecurityContext{},
				Containers: []v1.Container{
					{Name: containerName},
				},
			},
		},
		expected: runtimeapi.SupplementalGroupsPolicy_Merge,
	}, {
		name: "unsupported SupplementalGroupsPolicy should raise an error",
		pod: &v1.Pod{
			Spec: v1.PodSpec{
				SecurityContext: &v1.PodSecurityContext{
					SupplementalGroupsPolicy: &supplementalGroupsPolicyUnSupported,
				},
				Containers: []v1.Container{
					{Name: containerName},
				},
			},
		},
		expectErr:      true,
		expectedErrMsg: "unsupported supplementalGroupsPolicy: UnSupported",
	},
	} {
		t.Run(tc.name, func(t *testing.T) {
			actual, err := m.generateLinuxContainerConfig(&tc.pod.Spec.Containers[0], tc.pod, nil, "", nil, false)
			if !tc.expectErr {
				assert.Emptyf(t, err, "Unexpected error")
				assert.EqualValuesf(t, tc.expected, actual.SecurityContext.SupplementalGroupsPolicy, "SupplementalGroupPolicy for %s", tc.name)
			} else {
				assert.NotEmpty(t, err, "Unexpected success")
				assert.Empty(t, actual, "Unexpected non empty value")
				assert.ErrorContainsf(t, err, tc.expectedErrMsg, "Error for %s", tc.name)
			}
		})
	}
}

func TestGenerateLinuxContainerResources(t *testing.T) {
	_, _, m, err := createTestRuntimeManager()
	assert.NoError(t, err)
	m.machineInfo.MemoryCapacity = 17179860387 // 16GB

	pod := &v1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			UID:       "12345678",
			Name:      "foo",
			Namespace: "bar",
		},
		Spec: v1.PodSpec{
			Containers: []v1.Container{
				{
					Name:  "c1",
					Image: "busybox",
				},
			},
		},
	}

	for _, tc := range []struct {
		name          string
		limits        v1.ResourceList
		requests      v1.ResourceList
		expected      *runtimeapi.LinuxContainerResources
		cgroupVersion CgroupVersion
	}{
		{
			"requests & limits, cpu & memory, guaranteed qos",
			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
			&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 524288000, OomScoreAdj: -997},
			cgroupV1,
		},
		{
			"requests & limits, cpu & memory, burstable qos",
			v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")},
			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
			&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 970},
			cgroupV1,
		},
		{
			"best-effort qos",
			nil,
			nil,
			&runtimeapi.LinuxContainerResources{CpuShares: 2, OomScoreAdj: 1000},
			cgroupV1,
		},
		{
			"requests & limits, cpu & memory, guaranteed qos",
			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
			&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 524288000, OomScoreAdj: -997, Unified: map[string]string{"memory.oom.group": "1"}},
			cgroupV2,
		},
		{
			"requests & limits, cpu & memory, burstable qos",
			v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")},
			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
			&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 970, Unified: map[string]string{"memory.oom.group": "1"}},
			cgroupV2,
		},
		{
			"best-effort qos",
			nil,
			nil,
			&runtimeapi.LinuxContainerResources{CpuShares: 2, OomScoreAdj: 1000, Unified: map[string]string{"memory.oom.group": "1"}},
			cgroupV2,
		},
	} {
		t.Run(fmt.Sprintf("cgroup%s:%s", tc.cgroupVersion, tc.name), func(t *testing.T) {
			defer setSwapControllerAvailableDuringTest(false)()
			setCgroupVersionDuringTest(tc.cgroupVersion)

			pod.Spec.Containers[0].Resources = v1.ResourceRequirements{Limits: tc.limits, Requests: tc.requests}

			resources := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[0], false)
			tc.expected.HugepageLimits = resources.HugepageLimits
			assert.Equal(t, tc.expected, resources)
		})
	}
}

func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
	_, _, m, err := createTestRuntimeManager()
	assert.NoError(t, err)
	m.machineInfo.MemoryCapacity = 42949672960 // 40Gb == 40 * 1024^3
	m.machineInfo.SwapCapacity = 5368709120    // 5Gb == 5 * 1024^3

	pod := &v1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			UID:       "12345678",
			Name:      "foo",
			Namespace: "bar",
		},
		Spec: v1.PodSpec{
			Containers: []v1.Container{
				{
					Name: "c1",
				},
				{
					Name: "c2",
				},
			},
		},
		Status: v1.PodStatus{},
	}

	expectSwapDisabled := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) {
		const msg = "container is expected to not have swap configured"

		for _, r := range resources {
			switch cgroupVersion {
			case cgroupV1:
				assert.Equal(t, int64(0), r.MemorySwapLimitInBytes, msg)
			case cgroupV2:
				assert.NotContains(t, r.Unified, cm.Cgroup2MaxSwapFilename, msg)
			}
		}
	}

	expectNoSwap := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) {
		const msg = "container is expected to not have swap access"

		for _, r := range resources {
			switch cgroupVersion {
			case cgroupV1:
				assert.Equal(t, r.MemoryLimitInBytes, r.MemorySwapLimitInBytes, msg)
			case cgroupV2:
				assert.Equal(t, "0", r.Unified[cm.Cgroup2MaxSwapFilename], msg)
			}
		}
	}

	expectSwap := func(cgroupVersion CgroupVersion, swapBytesExpected int64, resources *runtimeapi.LinuxContainerResources) {
		msg := fmt.Sprintf("container swap is expected to be limited by %d bytes", swapBytesExpected)

		switch cgroupVersion {
		case cgroupV1:
			assert.Equal(t, resources.MemoryLimitInBytes+swapBytesExpected, resources.MemorySwapLimitInBytes, msg)
		case cgroupV2:
			assert.Equal(t, fmt.Sprintf("%d", swapBytesExpected), resources.Unified[cm.Cgroup2MaxSwapFilename], msg)
		}
	}

	calcSwapForBurstablePods := func(containerMemoryRequest int64) int64 {
		swapSize, err := calcSwapForBurstablePods(containerMemoryRequest, int64(m.machineInfo.MemoryCapacity), int64(m.machineInfo.SwapCapacity))
		assert.NoError(t, err)

		return swapSize
	}

	for _, tc := range []struct {
		name                        string
		cgroupVersion               CgroupVersion
		qosClass                    v1.PodQOSClass
		swapDisabledOnNode          bool
		nodeSwapFeatureGateEnabled  bool
		swapBehavior                string
		addContainerWithoutRequests bool
		addGuaranteedContainer      bool
		isCriticalPod               bool
	}{
		// With cgroup v1
		{
			name:                       "cgroups v1, LimitedSwap, Burstable QoS",
			cgroupVersion:              cgroupV1,
			qosClass:                   v1.PodQOSBurstable,
			nodeSwapFeatureGateEnabled: true,
			swapBehavior:               types.LimitedSwap,
		},
		{
			name:                       "cgroups v1, LimitedSwap, Best-effort QoS",
			cgroupVersion:              cgroupV1,
			qosClass:                   v1.PodQOSBestEffort,
			nodeSwapFeatureGateEnabled: true,
			swapBehavior:               types.LimitedSwap,
		},

		// With feature gate turned off
		{
			name:                       "NodeSwap feature gate turned off, cgroups v2, LimitedSwap",
			cgroupVersion:              cgroupV2,
			qosClass:                   v1.PodQOSBurstable,
			nodeSwapFeatureGateEnabled: false,
			swapBehavior:               types.LimitedSwap,
		},

		// With no swapBehavior, NoSwap should be the default
		{
			name:                       "With no swapBehavior - NoSwap should be the default",
			cgroupVersion:              cgroupV2,
			qosClass:                   v1.PodQOSBestEffort,
			nodeSwapFeatureGateEnabled: true,
			swapBehavior:               "",
		},

		// With Guaranteed and Best-effort QoS
		{
			name:                       "Best-effort QoS, cgroups v2, NoSwap",
			cgroupVersion:              cgroupV2,
			qosClass:                   v1.PodQOSBestEffort,
			nodeSwapFeatureGateEnabled: true,
			swapBehavior:               "NoSwap",
		},
		{
			name:                       "Best-effort QoS, cgroups v2, LimitedSwap",
			cgroupVersion:              cgroupV2,
			qosClass:                   v1.PodQOSBurstable,
			nodeSwapFeatureGateEnabled: true,
			swapBehavior:               types.LimitedSwap,
		},
		{
			name:                       "Guaranteed QoS, cgroups v2, LimitedSwap",
			cgroupVersion:              cgroupV2,
			qosClass:                   v1.PodQOSGuaranteed,
			nodeSwapFeatureGateEnabled: true,
			swapBehavior:               types.LimitedSwap,
		},

		// With a "guaranteed" container (when memory requests equal to limits)
		{
			name:                        "Burstable QoS, cgroups v2, LimitedSwap, with a guaranteed container",
			cgroupVersion:               cgroupV2,
			qosClass:                    v1.PodQOSBurstable,
			nodeSwapFeatureGateEnabled:  true,
			swapBehavior:                types.LimitedSwap,
			addContainerWithoutRequests: false,
			addGuaranteedContainer:      true,
		},

		// Swap is expected to be allocated
		{
			name:                        "Burstable QoS, cgroups v2, LimitedSwap",
			cgroupVersion:               cgroupV2,
			qosClass:                    v1.PodQOSBurstable,
			nodeSwapFeatureGateEnabled:  true,
			swapBehavior:                types.LimitedSwap,
			addContainerWithoutRequests: false,
			addGuaranteedContainer:      false,
		},
		{
			name:                        "Burstable QoS, cgroups v2, LimitedSwap, with a container with no requests",
			cgroupVersion:               cgroupV2,
			qosClass:                    v1.PodQOSBurstable,
			nodeSwapFeatureGateEnabled:  true,
			swapBehavior:                types.LimitedSwap,
			addContainerWithoutRequests: true,
			addGuaranteedContainer:      false,
		},
		// All the above examples with Swap disabled on node
		{
			name:                       "Swap disabled on node, cgroups v1, LimitedSwap, Burstable QoS",
			swapDisabledOnNode:         true,
			cgroupVersion:              cgroupV1,
			qosClass:                   v1.PodQOSBurstable,
			nodeSwapFeatureGateEnabled: true,
			swapBehavior:               types.LimitedSwap,
		},
		{
			name:                       "Swap disabled on node, cgroups v1, LimitedSwap, Best-effort QoS",
			swapDisabledOnNode:         true,
			cgroupVersion:              cgroupV1,
			qosClass:                   v1.PodQOSBestEffort,
			nodeSwapFeatureGateEnabled: true,
			swapBehavior:               types.LimitedSwap,
		},

		// With feature gate turned off
		{
			name:                       "Swap disabled on node, NodeSwap feature gate turned off, cgroups v2, LimitedSwap",
			swapDisabledOnNode:         true,
			cgroupVersion:              cgroupV2,
			qosClass:                   v1.PodQOSBurstable,
			nodeSwapFeatureGateEnabled: false,
			swapBehavior:               types.LimitedSwap,
		},

		// With no swapBehavior, NoSwap should be the default
		{
			name:                       "Swap disabled on node, With no swapBehavior - NoSwap should be the default",
			swapDisabledOnNode:         true,
			cgroupVersion:              cgroupV2,
			qosClass:                   v1.PodQOSBestEffort,
			nodeSwapFeatureGateEnabled: true,
			swapBehavior:               "",
		},

		// With Guaranteed and Best-effort QoS
		{
			name:                       "Swap disabled on node, Best-effort QoS, cgroups v2, LimitedSwap",
			swapDisabledOnNode:         true,
			cgroupVersion:              cgroupV2,
			qosClass:                   v1.PodQOSBurstable,
			nodeSwapFeatureGateEnabled: true,
			swapBehavior:               types.LimitedSwap,
		},
		{
			name:                       "Swap disabled on node, Guaranteed QoS, cgroups v2, LimitedSwap",
			swapDisabledOnNode:         true,
			cgroupVersion:              cgroupV2,
			qosClass:                   v1.PodQOSGuaranteed,
			nodeSwapFeatureGateEnabled: true,
			swapBehavior:               types.LimitedSwap,
		},

		// With a "guaranteed" container (when memory requests equal to limits)
		{
			name:                        "Swap disabled on node, Burstable QoS, cgroups v2, LimitedSwap, with a guaranteed container",
			swapDisabledOnNode:          true,
			cgroupVersion:               cgroupV2,
			qosClass:                    v1.PodQOSBurstable,
			nodeSwapFeatureGateEnabled:  true,
			swapBehavior:                types.LimitedSwap,
			addContainerWithoutRequests: false,
			addGuaranteedContainer:      true,
		},

		// Swap is expected to be allocated
		{
			name:                        "Swap disabled on node, Burstable QoS, cgroups v2, LimitedSwap",
			swapDisabledOnNode:          true,
			cgroupVersion:               cgroupV2,
			qosClass:                    v1.PodQOSBurstable,
			nodeSwapFeatureGateEnabled:  true,
			swapBehavior:                types.LimitedSwap,
			addContainerWithoutRequests: false,
			addGuaranteedContainer:      false,
		},
		{
			name:                        "Swap disabled on node, Burstable QoS, cgroups v2, LimitedSwap, with a container with no requests",
			swapDisabledOnNode:          true,
			cgroupVersion:               cgroupV2,
			qosClass:                    v1.PodQOSBurstable,
			nodeSwapFeatureGateEnabled:  true,
			swapBehavior:                types.LimitedSwap,
			addContainerWithoutRequests: true,
			addGuaranteedContainer:      false,
		},

		// When the pod is considered critical, disallow swap access
		{
			name:                       "Best-effort QoS, cgroups v2, LimitedSwap, critical pod",
			cgroupVersion:              cgroupV2,
			qosClass:                   v1.PodQOSBurstable,
			nodeSwapFeatureGateEnabled: true,
			swapBehavior:               types.LimitedSwap,
			isCriticalPod:              true,
		},
	} {
		t.Run(tc.name, func(t *testing.T) {
			setCgroupVersionDuringTest(tc.cgroupVersion)
			defer setSwapControllerAvailableDuringTest(!tc.swapDisabledOnNode)()
			featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, tc.nodeSwapFeatureGateEnabled)
			m.memorySwapBehavior = tc.swapBehavior

			var resourceReqsC1, resourceReqsC2 v1.ResourceRequirements
			switch tc.qosClass {
			case v1.PodQOSBurstable:
				resourceReqsC1 = v1.ResourceRequirements{
					Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")},
				}

				if !tc.addContainerWithoutRequests {
					resourceReqsC2 = v1.ResourceRequirements{
						Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi")},
					}

					if tc.addGuaranteedContainer {
						resourceReqsC2.Limits = v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi")}
					}
				}
			case v1.PodQOSGuaranteed:
				resourceReqsC1 = v1.ResourceRequirements{
					Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi"), v1.ResourceCPU: resource.MustParse("1")},
					Limits:   v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi"), v1.ResourceCPU: resource.MustParse("1")},
				}
				resourceReqsC2 = v1.ResourceRequirements{
					Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi"), v1.ResourceCPU: resource.MustParse("1")},
					Limits:   v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi"), v1.ResourceCPU: resource.MustParse("1")},
				}
			}
			pod.Spec.Containers[0].Resources = resourceReqsC1
			pod.Spec.Containers[1].Resources = resourceReqsC2

			if tc.isCriticalPod {
				pod.Spec.Priority = ptr.To(scheduling.SystemCriticalPriority)
				assert.True(t, types.IsCriticalPod(pod), "pod is expected to be critical")
			}

			resourcesC1 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[0], false)
			resourcesC2 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[1], false)

			if tc.swapDisabledOnNode {
				expectSwapDisabled(tc.cgroupVersion, resourcesC1, resourcesC2)
				return
			}

			if tc.isCriticalPod || !tc.nodeSwapFeatureGateEnabled || tc.cgroupVersion == cgroupV1 || (tc.swapBehavior == types.LimitedSwap && tc.qosClass != v1.PodQOSBurstable) {
				expectNoSwap(tc.cgroupVersion, resourcesC1, resourcesC2)
				return
			}

			if tc.swapBehavior == types.NoSwap || tc.swapBehavior == "" {
				expectNoSwap(tc.cgroupVersion, resourcesC1, resourcesC2)
				return
			}

			c1ExpectedSwap := calcSwapForBurstablePods(resourceReqsC1.Requests.Memory().Value())
			c2ExpectedSwap := int64(0)
			if !tc.addContainerWithoutRequests && !tc.addGuaranteedContainer {
				c2ExpectedSwap = calcSwapForBurstablePods(resourceReqsC2.Requests.Memory().Value())
			}

			expectSwap(tc.cgroupVersion, c1ExpectedSwap, resourcesC1)
			expectSwap(tc.cgroupVersion, c2ExpectedSwap, resourcesC2)
		})
	}
}

type CgroupVersion string

const (
	cgroupV1 CgroupVersion = "v1"
	cgroupV2 CgroupVersion = "v2"
)

func setCgroupVersionDuringTest(version CgroupVersion) {
	isCgroup2UnifiedMode = func() bool {
		return version == cgroupV2
	}
}

func setSwapControllerAvailableDuringTest(available bool) func() {
	original := swapControllerAvailable
	swapControllerAvailable = func() bool {
		return available
	}

	return func() {
		swapControllerAvailable = original
	}
}