//go:build libpfm && cgo
// +build libpfm,cgo
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Collector of perf events for a container.
package perf
// #cgo CFLAGS: -I/usr/include
// #cgo LDFLAGS: -lpfm
// #include <perfmon/pfmlib.h>
// #include <stdlib.h>
// #include <string.h>
import "C"
import (
"bytes"
"encoding/binary"
"fmt"
"os"
"sync"
"unsafe"
"golang.org/x/sys/unix"
"k8s.io/klog/v2"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
)
type collector struct {
cgroupPath string
events PerfEvents
cpuFiles map[int]group
cpuFilesLock sync.Mutex
onlineCPUs []int
eventToCustomEvent map[Event]*CustomEvent
uncore stats.Collector
// Handle for mocking purposes.
perfEventOpen func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error)
ioctlSetInt func(fd int, req uint, value int) error
}
type group struct {
cpuFiles map[string]map[int]readerCloser
names []string
leaderName string
}
var (
isLibpfmInitialized = false
libpfmMutex = sync.Mutex{}
)
const (
groupLeaderFileDescriptor = -1
)
func init() {
libpfmMutex.Lock()
defer libpfmMutex.Unlock()
pErr := C.pfm_initialize()
if pErr != C.PFM_SUCCESS {
klog.Errorf("unable to initialize libpfm: %d", int(pErr))
return
}
isLibpfmInitialized = true
}
func newCollector(cgroupPath string, events PerfEvents, onlineCPUs []int, cpuToSocket map[int]int) *collector {
collector := &collector{cgroupPath: cgroupPath, events: events, onlineCPUs: onlineCPUs, cpuFiles: map[int]group{}, uncore: NewUncoreCollector(cgroupPath, events, cpuToSocket), perfEventOpen: unix.PerfEventOpen, ioctlSetInt: unix.IoctlSetInt}
mapEventsToCustomEvents(collector)
return collector
}
func (c *collector) UpdateStats(stats *info.ContainerStats) error {
err := c.uncore.UpdateStats(stats)
if err != nil {
klog.Errorf("Failed to get uncore perf event stats: %v", err)
}
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
stats.PerfStats = []info.PerfStat{}
klog.V(5).Infof("Attempting to update perf_event stats from cgroup %q", c.cgroupPath)
for _, group := range c.cpuFiles {
for cpu, file := range group.cpuFiles[group.leaderName] {
stat, err := readGroupPerfStat(file, group, cpu, c.cgroupPath)
if err != nil {
klog.Warningf("Unable to read from perf_event_file (event: %q, CPU: %d) for %q: %q", group.leaderName, cpu, c.cgroupPath, err.Error())
continue
}
stats.PerfStats = append(stats.PerfStats, stat...)
}
}
return nil
}
func readGroupPerfStat(file readerCloser, group group, cpu int, cgroupPath string) ([]info.PerfStat, error) {
values, err := getPerfValues(file, group)
if err != nil {
return nil, err
}
perfStats := make([]info.PerfStat, len(values))
for i, value := range values {
klog.V(5).Infof("Read metric for event %q for cpu %d from cgroup %q: %d", value.Name, cpu, cgroupPath, value.Value)
perfStats[i] = info.PerfStat{
PerfValue: value,
Cpu: cpu,
}
}
return perfStats, nil
}
func getPerfValues(file readerCloser, group group) ([]info.PerfValue, error) {
// 24 bytes of GroupReadFormat struct.
// 16 bytes of Values struct for each element in group.
// See https://man7.org/linux/man-pages/man2/perf_event_open.2.html section "Reading results" with PERF_FORMAT_GROUP specified.
buf := make([]byte, 24+16*len(group.names))
_, err := file.Read(buf)
if err != nil {
return []info.PerfValue{}, fmt.Errorf("unable to read perf event group ( leader = %s ): %w", group.leaderName, err)
}
perfData := &GroupReadFormat{}
reader := bytes.NewReader(buf[:24])
err = binary.Read(reader, binary.LittleEndian, perfData)
if err != nil {
return []info.PerfValue{}, fmt.Errorf("unable to decode perf event group ( leader = %s ): %w", group.leaderName, err)
}
values := make([]Values, perfData.Nr)
reader = bytes.NewReader(buf[24:])
err = binary.Read(reader, binary.LittleEndian, values)
if err != nil {
return []info.PerfValue{}, fmt.Errorf("unable to decode perf event group values ( leader = %s ): %w", group.leaderName, err)
}
scalingRatio := 1.0
if perfData.TimeRunning != 0 && perfData.TimeEnabled != 0 {
scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled)
}
perfValues := make([]info.PerfValue, perfData.Nr)
if scalingRatio != float64(0) {
for i, name := range group.names {
perfValues[i] = info.PerfValue{
ScalingRatio: scalingRatio,
Value: uint64(float64(values[i].Value) / scalingRatio),
Name: name,
}
}
} else {
for i, name := range group.names {
perfValues[i] = info.PerfValue{
ScalingRatio: scalingRatio,
Value: values[i].Value,
Name: name,
}
}
}
return perfValues, nil
}
func (c *collector) setup() error {
cgroup, err := os.Open(c.cgroupPath)
if err != nil {
return fmt.Errorf("unable to open cgroup directory %s: %s", c.cgroupPath, err)
}
defer cgroup.Close()
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
cgroupFd := int(cgroup.Fd())
groupIndex := 0
for _, group := range c.events.Core.Events {
// CPUs file descriptors of group leader needed for perf_event_open.
leaderFileDescriptors := make(map[int]int, len(c.onlineCPUs))
for _, cpu := range c.onlineCPUs {
leaderFileDescriptors[cpu] = groupLeaderFileDescriptor
}
leaderFileDescriptors, err := c.createLeaderFileDescriptors(group.events, cgroupFd, groupIndex, leaderFileDescriptors)
if err != nil {
klog.Errorf("Cannot count perf event group %v: %v", group.events, err)
c.deleteGroup(groupIndex)
continue
} else {
groupIndex++
}
// Group is prepared so we should reset and enable counting.
for _, fd := range leaderFileDescriptors {
err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0)
if err != nil {
return err
}
err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0)
if err != nil {
return err
}
}
}
return nil
}
func (c *collector) createLeaderFileDescriptors(events []Event, cgroupFd int, groupIndex int, leaderFileDescriptors map[int]int) (map[int]int, error) {
for j, event := range events {
// First element is group leader.
isGroupLeader := j == 0
customEvent, ok := c.eventToCustomEvent[event]
var err error
if ok {
config := c.createConfigFromRawEvent(customEvent)
leaderFileDescriptors, err = c.registerEvent(eventInfo{string(customEvent.Name), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors)
if err != nil {
return nil, fmt.Errorf("cannot register perf event: %v", err)
}
} else {
config, err := c.createConfigFromEvent(event)
if err != nil {
return nil, fmt.Errorf("cannot create config from perf event: %v", err)
}
leaderFileDescriptors, err = c.registerEvent(eventInfo{string(event), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors)
if err != nil {
return nil, fmt.Errorf("cannot register perf event: %v", err)
}
// Clean memory allocated by C code.
C.free(unsafe.Pointer(config))
}
}
return leaderFileDescriptors, nil
}
func readPerfEventAttr(name string, pfmGetOsEventEncoding func(string, unsafe.Pointer) error) (*unix.PerfEventAttr, error) {
perfEventAttrMemory := C.malloc(C.size_t(unsafe.Sizeof(unix.PerfEventAttr{})))
// Fill memory with 0 values.
C.memset(perfEventAttrMemory, 0, C.size_t(unsafe.Sizeof(unix.PerfEventAttr{})))
err := pfmGetOsEventEncoding(name, unsafe.Pointer(perfEventAttrMemory))
if err != nil {
return nil, err
}
return (*unix.PerfEventAttr)(perfEventAttrMemory), nil
}
func pfmGetOsEventEncoding(name string, perfEventAttrMemory unsafe.Pointer) error {
event := pfmPerfEncodeArgT{}
fstr := C.CString("")
defer C.free(unsafe.Pointer(fstr))
event.fstr = unsafe.Pointer(fstr)
event.attr = perfEventAttrMemory
event.size = C.size_t(unsafe.Sizeof(event))
cSafeName := C.CString(name)
defer C.free(unsafe.Pointer(cSafeName))
pErr := C.pfm_get_os_event_encoding(cSafeName, C.PFM_PLM0|C.PFM_PLM3, C.PFM_OS_PERF_EVENT, unsafe.Pointer(&event))
if pErr != C.PFM_SUCCESS {
return fmt.Errorf("unable to transform event name %s to perf_event_attr: %d", name, int(pErr))
}
return nil
}
type eventInfo struct {
name string
config *unix.PerfEventAttr
pid int
groupIndex int
isGroupLeader bool
}
func (c *collector) registerEvent(event eventInfo, leaderFileDescriptors map[int]int) (map[int]int, error) {
newLeaderFileDescriptors := make(map[int]int, len(c.onlineCPUs))
var pid, flags int
if event.isGroupLeader {
pid = event.pid
flags = unix.PERF_FLAG_FD_CLOEXEC | unix.PERF_FLAG_PID_CGROUP
} else {
pid = -1
flags = unix.PERF_FLAG_FD_CLOEXEC
}
setAttributes(event.config, event.isGroupLeader)
for _, cpu := range c.onlineCPUs {
fd, err := c.perfEventOpen(event.config, pid, cpu, leaderFileDescriptors[cpu], flags)
if err != nil {
return leaderFileDescriptors, fmt.Errorf("setting up perf event %#v failed: %q", event.config, err)
}
perfFile := os.NewFile(uintptr(fd), event.name)
if perfFile == nil {
return leaderFileDescriptors, fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
}
c.addEventFile(event.groupIndex, event.name, cpu, perfFile)
// If group leader, save fd for others.
if event.isGroupLeader {
newLeaderFileDescriptors[cpu] = fd
}
}
if event.isGroupLeader {
return newLeaderFileDescriptors, nil
}
return leaderFileDescriptors, nil
}
func (c *collector) addEventFile(index int, name string, cpu int, perfFile *os.File) {
_, ok := c.cpuFiles[index]
if !ok {
c.cpuFiles[index] = group{
leaderName: name,
cpuFiles: map[string]map[int]readerCloser{},
}
}
_, ok = c.cpuFiles[index].cpuFiles[name]
if !ok {
c.cpuFiles[index].cpuFiles[name] = map[int]readerCloser{}
}
c.cpuFiles[index].cpuFiles[name][cpu] = perfFile
// Check if name is already stored.
for _, have := range c.cpuFiles[index].names {
if name == have {
return
}
}
// Otherwise save it.
c.cpuFiles[index] = group{
cpuFiles: c.cpuFiles[index].cpuFiles,
names: append(c.cpuFiles[index].names, name),
leaderName: c.cpuFiles[index].leaderName,
}
}
func (c *collector) deleteGroup(index int) {
for name, files := range c.cpuFiles[index].cpuFiles {
for cpu, file := range files {
klog.V(5).Infof("Closing perf event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
err := file.Close()
if err != nil {
klog.Warningf("Unable to close perf event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
}
}
}
delete(c.cpuFiles, index)
}
func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr {
length := len(event.Config)
config := &unix.PerfEventAttr{
Type: event.Type,
Config: event.Config[0],
}
if length >= 2 {
config.Ext1 = event.Config[1]
}
if length == 3 {
config.Ext2 = event.Config[2]
}
klog.V(5).Infof("perf_event_attr struct prepared: %#v", config)
return config
}
func setAttributes(config *unix.PerfEventAttr, leader bool) {
config.Sample_type = unix.PERF_SAMPLE_IDENTIFIER
config.Read_format = unix.PERF_FORMAT_TOTAL_TIME_ENABLED | unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_GROUP | unix.PERF_FORMAT_ID
config.Bits = unix.PerfBitInherit
// Group leader should have this flag set to disable counting until all group would be prepared.
if leader {
config.Bits |= unix.PerfBitDisabled
}
config.Size = uint32(unsafe.Sizeof(unix.PerfEventAttr{}))
}
func (c *collector) Destroy() {
c.uncore.Destroy()
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
for i := range c.cpuFiles {
c.deleteGroup(i)
}
}
// Finalize terminates libpfm4 to free resources.
func Finalize() {
libpfmMutex.Lock()
defer libpfmMutex.Unlock()
klog.V(1).Info("Attempting to terminate libpfm4")
if !isLibpfmInitialized {
klog.V(1).Info("libpfm4 has not been initialized; not terminating.")
return
}
C.pfm_terminate()
isLibpfmInitialized = false
}
func mapEventsToCustomEvents(collector *collector) {
collector.eventToCustomEvent = map[Event]*CustomEvent{}
for key, event := range collector.events.Core.CustomEvents {
collector.eventToCustomEvent[event.Name] = &collector.events.Core.CustomEvents[key]
}
}
func (c *collector) createConfigFromRawEvent(event *CustomEvent) *unix.PerfEventAttr {
klog.V(5).Infof("Setting up raw perf event %#v", event)
config := createPerfEventAttr(*event)
klog.V(5).Infof("perf_event_attr: %#v", config)
return config
}
func (c *collector) createConfigFromEvent(event Event) (*unix.PerfEventAttr, error) {
klog.V(5).Infof("Setting up perf event %s", string(event))
config, err := readPerfEventAttr(string(event), pfmGetOsEventEncoding)
if err != nil {
C.free((unsafe.Pointer)(config))
return nil, err
}
klog.V(5).Infof("perf_event_attr: %#v", config)
return config, nil
}