// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sysinfo
import (
"fmt"
"os"
"regexp"
"runtime"
"strconv"
"strings"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/utils/sysfs"
"k8s.io/klog/v2"
)
var (
schedulerRegExp = regexp.MustCompile(`.*\[(.*)\].*`)
nodeDirRegExp = regexp.MustCompile(`node/node(\d*)`)
cpuDirRegExp = regexp.MustCompile(`/cpu(\d+)`)
memoryCapacityRegexp = regexp.MustCompile(`MemTotal:\s*([0-9]+) kB`)
cpusPath = "/sys/devices/system/cpu"
)
const (
cacheLevel2 = 2
hugepagesDir = "hugepages/"
)
// Get information about block devices present on the system.
// Uses the passed in system interface to retrieve the low level OS information.
func GetBlockDeviceInfo(sysfs sysfs.SysFs) (map[string]info.DiskInfo, error) {
disks, err := sysfs.GetBlockDevices()
if err != nil {
return nil, err
}
diskMap := make(map[string]info.DiskInfo)
for _, disk := range disks {
name := disk.Name()
// Ignore non-disk devices.
// TODO(rjnagal): Maybe just match hd, sd, and dm prefixes.
if strings.HasPrefix(name, "loop") || strings.HasPrefix(name, "ram") || strings.HasPrefix(name, "sr") {
continue
}
// Ignore "hidden" devices (i.e. nvme path device sysfs entries).
// These devices are in the form of /dev/nvme$Xc$Yn$Z and will
// not have a device handle (i.e. "hidden")
isHidden, err := sysfs.IsBlockDeviceHidden(name)
if err != nil {
return nil, err
}
if isHidden {
continue
}
diskInfo := info.DiskInfo{
Name: name,
}
dev, err := sysfs.GetBlockDeviceNumbers(name)
if err != nil {
return nil, err
}
n, err := fmt.Sscanf(dev, "%d:%d", &diskInfo.Major, &diskInfo.Minor)
if err != nil || n != 2 {
return nil, fmt.Errorf("could not parse device numbers from %s for device %s", dev, name)
}
out, err := sysfs.GetBlockDeviceSize(name)
if err != nil {
return nil, err
}
// Remove trailing newline before conversion.
size, err := strconv.ParseUint(strings.TrimSpace(out), 10, 64)
if err != nil {
return nil, err
}
// size is in 512 bytes blocks.
diskInfo.Size = size * 512
diskInfo.Scheduler = "none"
blkSched, err := sysfs.GetBlockDeviceScheduler(name)
if err == nil {
matches := schedulerRegExp.FindSubmatch([]byte(blkSched))
if len(matches) >= 2 {
diskInfo.Scheduler = string(matches[1])
}
}
device := fmt.Sprintf("%d:%d", diskInfo.Major, diskInfo.Minor)
diskMap[device] = diskInfo
}
return diskMap, nil
}
// Get information about network devices present on the system.
func GetNetworkDevices(sysfs sysfs.SysFs) ([]info.NetInfo, error) {
devs, err := sysfs.GetNetworkDevices()
if err != nil {
return nil, err
}
netDevices := []info.NetInfo{}
for _, dev := range devs {
name := dev.Name()
// Ignore docker, loopback, and veth devices.
ignoredDevices := []string{"lo", "veth", "docker", "nerdctl"}
ignored := false
for _, prefix := range ignoredDevices {
if strings.HasPrefix(name, prefix) {
ignored = true
break
}
}
if ignored {
continue
}
address, err := sysfs.GetNetworkAddress(name)
if err != nil {
return nil, err
}
mtuStr, err := sysfs.GetNetworkMtu(name)
if err != nil {
return nil, err
}
var mtu int64
n, err := fmt.Sscanf(mtuStr, "%d", &mtu)
if err != nil || n != 1 {
return nil, fmt.Errorf("could not parse mtu from %s for device %s", mtuStr, name)
}
netInfo := info.NetInfo{
Name: name,
MacAddress: strings.TrimSpace(address),
Mtu: mtu,
}
speed, err := sysfs.GetNetworkSpeed(name)
// Some devices don't set speed.
if err == nil {
var s int64
n, err := fmt.Sscanf(speed, "%d", &s)
if err != nil || n != 1 {
return nil, fmt.Errorf("could not parse speed from %s for device %s", speed, name)
}
netInfo.Speed = s
}
netDevices = append(netDevices, netInfo)
}
return netDevices, nil
}
// GetHugePagesInfo returns information about pre-allocated huge pages
// hugepagesDirectory should be top directory of hugepages
// Such as: /sys/kernel/mm/hugepages/
func GetHugePagesInfo(sysFs sysfs.SysFs, hugepagesDirectory string) ([]info.HugePagesInfo, error) {
var hugePagesInfo []info.HugePagesInfo
files, err := sysFs.GetHugePagesInfo(hugepagesDirectory)
if err != nil {
// treat as non-fatal since kernels and machine can be
// configured to disable hugepage support
return hugePagesInfo, nil
}
for _, st := range files {
nameArray := strings.Split(st.Name(), "-")
pageSizeArray := strings.Split(nameArray[1], "kB")
pageSize, err := strconv.ParseUint(string(pageSizeArray[0]), 10, 64)
if err != nil {
return hugePagesInfo, err
}
val, err := sysFs.GetHugePagesNr(hugepagesDirectory, st.Name())
if err != nil {
return hugePagesInfo, err
}
var numPages uint64
// we use sscanf as the file as a new-line that trips up ParseUint
// it returns the number of tokens successfully parsed, so if
// n != 1, it means we were unable to parse a number from the file
n, err := fmt.Sscanf(string(val), "%d", &numPages)
if err != nil || n != 1 {
return hugePagesInfo, fmt.Errorf("could not parse file nr_hugepage for %s, contents %q", st.Name(), string(val))
}
hugePagesInfo = append(hugePagesInfo, info.HugePagesInfo{
NumPages: numPages,
PageSize: pageSize,
})
}
return hugePagesInfo, nil
}
// GetNodesInfo returns information about NUMA nodes and their topology
func GetNodesInfo(sysFs sysfs.SysFs) ([]info.Node, int, error) {
nodes := []info.Node{}
allLogicalCoresCount := 0
nodesDirs, err := sysFs.GetNodesPaths()
if err != nil {
return nil, 0, err
}
if len(nodesDirs) == 0 {
klog.V(4).Info("Nodes topology is not available, providing CPU topology")
return getCPUTopology(sysFs)
}
for _, nodeDir := range nodesDirs {
id, err := getMatchedInt(nodeDirRegExp, nodeDir)
if err != nil {
return nil, 0, err
}
node := info.Node{Id: id}
cpuDirs, err := sysFs.GetCPUsPaths(nodeDir)
if len(cpuDirs) == 0 {
klog.Warningf("Found node without any CPU, nodeDir: %s, number of cpuDirs %d, err: %v", nodeDir, len(cpuDirs), err)
} else {
cores, err := getCoresInfo(sysFs, cpuDirs)
if err != nil {
return nil, 0, err
}
node.Cores = cores
for _, core := range cores {
allLogicalCoresCount += len(core.Threads)
}
}
// On some Linux platforms(such as Arm64 guest kernel), cache info may not exist.
// So, we should ignore error here.
err = addCacheInfo(sysFs, &node)
if err != nil {
klog.V(1).Infof("Found node without cache information, nodeDir: %s", nodeDir)
}
node.Memory, err = getNodeMemInfo(sysFs, nodeDir)
if err != nil {
return nil, 0, err
}
hugepagesDirectory := fmt.Sprintf("%s/%s", nodeDir, hugepagesDir)
node.HugePages, err = GetHugePagesInfo(sysFs, hugepagesDirectory)
if err != nil {
return nil, 0, err
}
node.Distances, err = getDistances(sysFs, nodeDir)
if err != nil {
return nil, 0, err
}
nodes = append(nodes, node)
}
return nodes, allLogicalCoresCount, err
}
func getCPUTopology(sysFs sysfs.SysFs) ([]info.Node, int, error) {
nodes := []info.Node{}
cpusPaths, err := sysFs.GetCPUsPaths(cpusPath)
if err != nil {
return nil, 0, err
}
cpusCount := len(cpusPaths)
if cpusCount == 0 {
err = fmt.Errorf("Any CPU is not available, cpusPath: %s", cpusPath)
return nil, 0, err
}
cpusByPhysicalPackageID, err := getCpusByPhysicalPackageID(sysFs, cpusPaths)
if err != nil {
return nil, 0, err
}
if len(cpusByPhysicalPackageID) == 0 {
klog.Warningf("Cannot read any physical package id for any CPU")
return nil, cpusCount, nil
}
for physicalPackageID, cpus := range cpusByPhysicalPackageID {
node := info.Node{Id: physicalPackageID}
cores, err := getCoresInfo(sysFs, cpus)
if err != nil {
return nil, 0, err
}
node.Cores = cores
// On some Linux platforms(such as Arm64 guest kernel), cache info may not exist.
// So, we should ignore error here.
err = addCacheInfo(sysFs, &node)
if err != nil {
klog.V(1).Infof("Found cpu without cache information, cpuPath: %s", cpus)
}
nodes = append(nodes, node)
}
return nodes, cpusCount, nil
}
func getCpusByPhysicalPackageID(sysFs sysfs.SysFs, cpusPaths []string) (map[int][]string, error) {
cpuPathsByPhysicalPackageID := make(map[int][]string)
for _, cpuPath := range cpusPaths {
rawPhysicalPackageID, err := sysFs.GetCPUPhysicalPackageID(cpuPath)
if os.IsNotExist(err) {
klog.Warningf("Cannot read physical package id for %s, physical_package_id file does not exist, err: %s", cpuPath, err)
continue
} else if err != nil {
return nil, err
}
physicalPackageID, err := strconv.Atoi(rawPhysicalPackageID)
if err != nil {
return nil, err
}
if _, ok := cpuPathsByPhysicalPackageID[physicalPackageID]; !ok {
cpuPathsByPhysicalPackageID[physicalPackageID] = make([]string, 0)
}
cpuPathsByPhysicalPackageID[physicalPackageID] = append(cpuPathsByPhysicalPackageID[physicalPackageID], cpuPath)
}
return cpuPathsByPhysicalPackageID, nil
}
// addCacheInfo adds information about cache for NUMA node
func addCacheInfo(sysFs sysfs.SysFs, node *info.Node) error {
for coreID, core := range node.Cores {
threadID := core.Threads[0] //get any thread for core
caches, err := GetCacheInfo(sysFs, threadID)
if err != nil {
return err
}
numThreadsPerCore := len(core.Threads)
numThreadsPerNode := len(node.Cores) * numThreadsPerCore
for _, cache := range caches {
c := info.Cache{
Id: cache.Id,
Size: cache.Size,
Level: cache.Level,
Type: cache.Type,
}
if cache.Level > cacheLevel2 {
if cache.Cpus == numThreadsPerNode {
// Add a node level cache.
cacheFound := false
for _, nodeCache := range node.Caches {
if nodeCache == c {
cacheFound = true
}
}
if !cacheFound {
node.Caches = append(node.Caches, c)
}
} else {
// Add uncore cache, for architecture in which l3 cache only shared among some cores.
uncoreCacheFound := false
for _, uncoreCache := range node.Cores[coreID].UncoreCaches {
if uncoreCache == c {
uncoreCacheFound = true
}
}
if !uncoreCacheFound {
node.Cores[coreID].UncoreCaches = append(node.Cores[coreID].UncoreCaches, c)
}
}
} else if cache.Cpus == numThreadsPerCore {
// Add core level cache
node.Cores[coreID].Caches = append(node.Cores[coreID].Caches, c)
}
// Ignore unknown caches.
}
}
return nil
}
// getNodeMemInfo returns information about total memory for NUMA node
func getNodeMemInfo(sysFs sysfs.SysFs, nodeDir string) (uint64, error) {
rawMem, err := sysFs.GetMemInfo(nodeDir)
if err != nil {
//Ignore if per-node info is not available.
klog.Warningf("Found node without memory information, nodeDir: %s", nodeDir)
return 0, nil
}
matches := memoryCapacityRegexp.FindStringSubmatch(rawMem)
if len(matches) != 2 {
return 0, fmt.Errorf("failed to match regexp in output: %q", string(rawMem))
}
memory, err := strconv.ParseUint(matches[1], 10, 64)
if err != nil {
return 0, err
}
memory = memory * 1024 // Convert to bytes
return uint64(memory), nil
}
// getDistances returns information about distances between NUMA nodes
func getDistances(sysFs sysfs.SysFs, nodeDir string) ([]uint64, error) {
rawDistance, err := sysFs.GetDistances(nodeDir)
if err != nil {
//Ignore if per-node info is not available.
klog.Warningf("Found node without distance information, nodeDir: %s", nodeDir)
return nil, nil
}
distances := []uint64{}
for _, distance := range strings.Split(rawDistance, " ") {
distanceUint, err := strconv.ParseUint(distance, 10, 64)
if err != nil {
return nil, fmt.Errorf("cannot convert %s to int", distance)
}
distances = append(distances, distanceUint)
}
return distances, nil
}
// getCoresInfo returns information about physical cores
func getCoresInfo(sysFs sysfs.SysFs, cpuDirs []string) ([]info.Core, error) {
cores := make([]info.Core, 0, len(cpuDirs))
for _, cpuDir := range cpuDirs {
cpuID, err := getMatchedInt(cpuDirRegExp, cpuDir)
if err != nil {
return nil, fmt.Errorf("unexpected format of CPU directory, cpuDirRegExp %s, cpuDir: %s", cpuDirRegExp, cpuDir)
}
if !sysFs.IsCPUOnline(cpuDir) {
continue
}
rawPhysicalID, err := sysFs.GetCoreID(cpuDir)
if os.IsNotExist(err) {
klog.Warningf("Cannot read core id for %s, core_id file does not exist, err: %s", cpuDir, err)
continue
} else if err != nil {
return nil, err
}
physicalID, err := strconv.Atoi(rawPhysicalID)
if err != nil {
return nil, err
}
rawPhysicalPackageID, err := sysFs.GetCPUPhysicalPackageID(cpuDir)
if os.IsNotExist(err) {
klog.Warningf("Cannot read physical package id for %s, physical_package_id file does not exist, err: %s", cpuDir, err)
continue
} else if err != nil {
return nil, err
}
physicalPackageID, err := strconv.Atoi(rawPhysicalPackageID)
if err != nil {
return nil, err
}
var bookID, drawerID string
// s390/s390x additional cpu topology levels
if runtime.GOARCH == "s390x" {
bookID, err = sysFs.GetBookID(cpuDir)
if os.IsNotExist(err) {
klog.Warningf("Cannot read book id for %s, book_id file does not exist, err: %s", cpuDir, err)
continue
} else if err != nil {
return nil, err
}
drawerID, err = sysFs.GetDrawerID(cpuDir)
if os.IsNotExist(err) {
klog.Warningf("Cannot read drawer id for %s, drawer_id file does not exist, err: %s", cpuDir, err)
continue
} else if err != nil {
return nil, err
}
}
coreIDx := -1
for id, core := range cores {
if core.Id == physicalID && core.SocketID == physicalPackageID {
// For s390x, we need to check the BookID and DrawerID match as well.
if runtime.GOARCH != "s390x" || (core.BookID == bookID && core.DrawerID == drawerID) {
coreIDx = id
}
}
}
if coreIDx == -1 {
cores = append(cores, info.Core{})
coreIDx = len(cores) - 1
}
desiredCore := &cores[coreIDx]
desiredCore.Id = physicalID
desiredCore.SocketID = physicalPackageID
desiredCore.BookID = bookID
desiredCore.DrawerID = drawerID
if len(desiredCore.Threads) == 0 {
desiredCore.Threads = []int{cpuID}
} else {
desiredCore.Threads = append(desiredCore.Threads, cpuID)
}
}
return cores, nil
}
// GetCacheInfo return information about a cache accessible from the given cpu thread
func GetCacheInfo(sysFs sysfs.SysFs, id int) ([]sysfs.CacheInfo, error) {
caches, err := sysFs.GetCaches(id)
if err != nil {
return nil, err
}
info := []sysfs.CacheInfo{}
for _, cache := range caches {
if !strings.HasPrefix(cache.Name(), "index") {
continue
}
cacheInfo, err := sysFs.GetCacheInfo(id, cache.Name())
if err != nil {
return nil, err
}
info = append(info, cacheInfo)
}
return info, nil
}
func getNetworkStats(name string, sysFs sysfs.SysFs) (info.InterfaceStats, error) {
var stats info.InterfaceStats
var err error
stats.Name = name
stats.RxBytes, err = sysFs.GetNetworkStatValue(name, "rx_bytes")
if err != nil {
return stats, err
}
stats.RxPackets, err = sysFs.GetNetworkStatValue(name, "rx_packets")
if err != nil {
return stats, err
}
stats.RxErrors, err = sysFs.GetNetworkStatValue(name, "rx_errors")
if err != nil {
return stats, err
}
stats.RxDropped, err = sysFs.GetNetworkStatValue(name, "rx_dropped")
if err != nil {
return stats, err
}
stats.TxBytes, err = sysFs.GetNetworkStatValue(name, "tx_bytes")
if err != nil {
return stats, err
}
stats.TxPackets, err = sysFs.GetNetworkStatValue(name, "tx_packets")
if err != nil {
return stats, err
}
stats.TxErrors, err = sysFs.GetNetworkStatValue(name, "tx_errors")
if err != nil {
return stats, err
}
stats.TxDropped, err = sysFs.GetNetworkStatValue(name, "tx_dropped")
if err != nil {
return stats, err
}
return stats, nil
}
func GetSystemUUID(sysFs sysfs.SysFs) (string, error) {
return sysFs.GetSystemUUID()
}
func getMatchedInt(rgx *regexp.Regexp, str string) (int, error) {
matches := rgx.FindStringSubmatch(str)
if len(matches) != 2 {
return 0, fmt.Errorf("failed to match regexp, str: %s", str)
}
valInt, err := strconv.Atoi(matches[1])
if err != nil {
return 0, err
}
return valInt, nil
}
// GetSocketFromCPU returns Socket ID of passed CPU. If is not present, returns -1.
func GetSocketFromCPU(topology []info.Node, cpu int) int {
for _, node := range topology {
found, coreID := node.FindCoreByThread(cpu)
if found {
return node.Cores[coreID].SocketID
}
}
return -1
}
// GetOnlineCPUs returns available cores.
func GetOnlineCPUs(topology []info.Node) []int {
onlineCPUs := make([]int, 0)
for _, node := range topology {
for _, core := range node.Cores {
onlineCPUs = append(onlineCPUs, core.Threads...)
}
}
return onlineCPUs
}