kubernetes/vendor/github.com/google/cadvisor/container/raw/watcher.go

// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package container defines types for sub-container events and also
// defines an interface for container operation handlers.
package raw

import (
	"fmt"
	"os"
	"path"
	"strings"

	inotify "k8s.io/utils/inotify"

	"github.com/google/cadvisor/container"
	"github.com/google/cadvisor/container/common"
	"github.com/google/cadvisor/container/libcontainer"
	"github.com/google/cadvisor/watcher"

	"k8s.io/klog/v2"
)

type rawContainerWatcher struct {
	// Absolute path to the root of the cgroup hierarchies
	cgroupPaths map[string]string

	// Inotify event watcher.
	watcher *common.InotifyWatcher

	// Signal for watcher thread to stop.
	stopWatcher chan error
}

func NewRawContainerWatcher(includedMetrics container.MetricSet) (watcher.ContainerWatcher, error) {
	cgroupSubsystems, err := libcontainer.GetCgroupSubsystems(includedMetrics)
	if err != nil {
		return nil, fmt.Errorf("failed to get cgroup subsystems: %v", err)
	}
	if len(cgroupSubsystems) == 0 {
		return nil, fmt.Errorf("failed to find supported cgroup mounts for the raw factory")
	}

	watcher, err := common.NewInotifyWatcher()
	if err != nil {
		return nil, err
	}

	rawWatcher := &rawContainerWatcher{
		cgroupPaths: cgroupSubsystems,
		watcher:     watcher,
		stopWatcher: make(chan error),
	}

	return rawWatcher, nil
}

func (w *rawContainerWatcher) Start(events chan watcher.ContainerEvent) error {
	// Watch this container (all its cgroups) and all subdirectories.
	watched := make([]string, 0)
	for _, cgroupPath := range w.cgroupPaths {
		_, err := w.watchDirectory(events, cgroupPath, "/")
		if err != nil {
			for _, watchedCgroupPath := range watched {
				_, removeErr := w.watcher.RemoveWatch("/", watchedCgroupPath)
				if removeErr != nil {
					klog.Warningf("Failed to remove inotify watch for %q with error: %v", watchedCgroupPath, removeErr)
				}
			}
			return err
		}
		watched = append(watched, cgroupPath)
	}

	// Process the events received from the kernel.
	go func() {
		for {
			select {
			case event := <-w.watcher.Event():
				err := w.processEvent(event, events)
				if err != nil {
					klog.Warningf("Error while processing event (%+v): %v", event, err)
				}
			case err := <-w.watcher.Error():
				klog.Warningf("Error while watching %q: %v", "/", err)
			case <-w.stopWatcher:
				err := w.watcher.Close()
				if err == nil {
					w.stopWatcher <- err
					return
				}
			}
		}
	}()

	return nil
}

func (w *rawContainerWatcher) Stop() error {
	// Rendezvous with the watcher thread.
	w.stopWatcher <- nil
	return <-w.stopWatcher
}

// Watches the specified directory and all subdirectories. Returns whether the path was
// already being watched and an error (if any).
func (w *rawContainerWatcher) watchDirectory(events chan watcher.ContainerEvent, dir string, containerName string) (bool, error) {
	// Don't watch .mount cgroups because they never have containers as sub-cgroups.  A single container
	// can have many .mount cgroups associated with it which can quickly exhaust the inotify watches on a node.
	if strings.HasSuffix(containerName, ".mount") {
		return false, nil
	}
	alreadyWatching, err := w.watcher.AddWatch(containerName, dir)
	if err != nil {
		return alreadyWatching, err
	}

	// Remove the watch if further operations failed.
	cleanup := true
	defer func() {
		if cleanup {
			_, err := w.watcher.RemoveWatch(containerName, dir)
			if err != nil {
				klog.Warningf("Failed to remove inotify watch for %q: %v", dir, err)
			}
		}
	}()

	// TODO(vmarmol): We should re-do this once we're done to ensure directories were not added in the meantime.
	// Watch subdirectories as well.
	entries, err := os.ReadDir(dir)
	if err != nil {
		return alreadyWatching, err
	}
	for _, entry := range entries {
		if entry.IsDir() {
			entryPath := path.Join(dir, entry.Name())
			subcontainerName := path.Join(containerName, entry.Name())
			alreadyWatchingSubDir, err := w.watchDirectory(events, entryPath, subcontainerName)
			if err != nil {
				klog.Errorf("Failed to watch directory %q: %v", entryPath, err)
				if os.IsNotExist(err) {
					// The directory may have been removed before watching. Try to watch the other
					// subdirectories. (https://github.com/kubernetes/kubernetes/issues/28997)
					continue
				}
				return alreadyWatching, err
			}
			// since we already missed the creation event for this directory, publish an event here.
			if !alreadyWatchingSubDir {
				go func() {
					events <- watcher.ContainerEvent{
						EventType:   watcher.ContainerAdd,
						Name:        subcontainerName,
						WatchSource: watcher.Raw,
					}
				}()
			}
		}
	}

	cleanup = false
	return alreadyWatching, nil
}

func (w *rawContainerWatcher) processEvent(event *inotify.Event, events chan watcher.ContainerEvent) error {
	// Convert the inotify event type to a container create or delete.
	var eventType watcher.ContainerEventType
	switch {
	case (event.Mask & inotify.InCreate) > 0:
		eventType = watcher.ContainerAdd
	case (event.Mask & inotify.InDelete) > 0:
		eventType = watcher.ContainerDelete
	case (event.Mask & inotify.InMovedFrom) > 0:
		eventType = watcher.ContainerDelete
	case (event.Mask & inotify.InMovedTo) > 0:
		eventType = watcher.ContainerAdd
	default:
		// Ignore other events.
		return nil
	}

	// Derive the container name from the path name.
	var containerName string
	for _, mount := range w.cgroupPaths {
		mountLocation := path.Clean(mount) + "/"
		if strings.HasPrefix(event.Name, mountLocation) {
			containerName = event.Name[len(mountLocation)-1:]
			break
		}
	}
	if containerName == "" {
		return fmt.Errorf("unable to detect container from watch event on directory %q", event.Name)
	}

	// Maintain the watch for the new or deleted container.
	switch eventType {
	case watcher.ContainerAdd:
		// New container was created, watch it.
		alreadyWatched, err := w.watchDirectory(events, event.Name, containerName)
		if err != nil {
			return err
		}

		// Only report container creation once.
		if alreadyWatched {
			return nil
		}
	case watcher.ContainerDelete:
		// Container was deleted, stop watching for it.
		lastWatched, err := w.watcher.RemoveWatch(containerName, event.Name)
		if err != nil {
			return err
		}

		// Only report container deletion once.
		if !lastWatched {
			return nil
		}
	default:
		return fmt.Errorf("unknown event type %v", eventType)
	}

	// Deliver the event.
	events <- watcher.ContainerEvent{
		EventType:   eventType,
		Name:        containerName,
		WatchSource: watcher.Raw,
	}

	return nil
}