kubernetes/vendor/github.com/google/pprof/profile/profile.go

// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package profile provides a representation of profile.proto and
// methods to encode/decode profiles in this format.
package profile

import (
	"bytes"
	"compress/gzip"
	"fmt"
	"io"
	"math"
	"path/filepath"
	"regexp"
	"sort"
	"strings"
	"sync"
	"time"
)

// Profile is an in-memory representation of profile.proto.
type Profile struct {
	SampleType        []*ValueType
	DefaultSampleType string
	Sample            []*Sample
	Mapping           []*Mapping
	Location          []*Location
	Function          []*Function
	Comments          []string
	DocURL            string

	DropFrames string
	KeepFrames string

	TimeNanos     int64
	DurationNanos int64
	PeriodType    *ValueType
	Period        int64

	// The following fields are modified during encoding and copying,
	// so are protected by a Mutex.
	encodeMu sync.Mutex

	commentX           []int64
	docURLX            int64
	dropFramesX        int64
	keepFramesX        int64
	stringTable        []string
	defaultSampleTypeX int64
}

// ValueType corresponds to Profile.ValueType
type ValueType struct {
	Type string // cpu, wall, inuse_space, etc
	Unit string // seconds, nanoseconds, bytes, etc

	typeX int64
	unitX int64
}

// Sample corresponds to Profile.Sample
type Sample struct {
	Location []*Location
	Value    []int64
	// Label is a per-label-key map to values for string labels.
	//
	// In general, having multiple values for the given label key is strongly
	// discouraged - see docs for the sample label field in profile.proto.  The
	// main reason this unlikely state is tracked here is to make the
	// decoding->encoding roundtrip not lossy. But we expect that the value
	// slices present in this map are always of length 1.
	Label map[string][]string
	// NumLabel is a per-label-key map to values for numeric labels. See a note
	// above on handling multiple values for a label.
	NumLabel map[string][]int64
	// NumUnit is a per-label-key map to the unit names of corresponding numeric
	// label values. The unit info may be missing even if the label is in
	// NumLabel, see the docs in profile.proto for details. When the value is
	// slice is present and not nil, its length must be equal to the length of
	// the corresponding value slice in NumLabel.
	NumUnit map[string][]string

	locationIDX []uint64
	labelX      []label
}

// label corresponds to Profile.Label
type label struct {
	keyX int64
	// Exactly one of the two following values must be set
	strX int64
	numX int64 // Integer value for this label
	// can be set if numX has value
	unitX int64
}

// Mapping corresponds to Profile.Mapping
type Mapping struct {
	ID              uint64
	Start           uint64
	Limit           uint64
	Offset          uint64
	File            string
	BuildID         string
	HasFunctions    bool
	HasFilenames    bool
	HasLineNumbers  bool
	HasInlineFrames bool

	fileX    int64
	buildIDX int64

	// Name of the kernel relocation symbol ("_text" or "_stext"), extracted from File.
	// For linux kernel mappings generated by some tools, correct symbolization depends
	// on knowing which of the two possible relocation symbols was used for `Start`.
	// This is given to us as a suffix in `File` (e.g. "[kernel.kallsyms]_stext").
	//
	// Note, this public field is not persisted in the proto. For the purposes of
	// copying / merging / hashing profiles, it is considered subsumed by `File`.
	KernelRelocationSymbol string
}

// Location corresponds to Profile.Location
type Location struct {
	ID       uint64
	Mapping  *Mapping
	Address  uint64
	Line     []Line
	IsFolded bool

	mappingIDX uint64
}

// Line corresponds to Profile.Line
type Line struct {
	Function *Function
	Line     int64
	Column   int64

	functionIDX uint64
}

// Function corresponds to Profile.Function
type Function struct {
	ID         uint64
	Name       string
	SystemName string
	Filename   string
	StartLine  int64

	nameX       int64
	systemNameX int64
	filenameX   int64
}

// Parse parses a profile and checks for its validity. The input
// may be a gzip-compressed encoded protobuf or one of many legacy
// profile formats which may be unsupported in the future.
func Parse(r io.Reader) (*Profile, error) {
	data, err := io.ReadAll(r)
	if err != nil {
		return nil, err
	}
	return ParseData(data)
}

// ParseData parses a profile from a buffer and checks for its
// validity.
func ParseData(data []byte) (*Profile, error) {
	var p *Profile
	var err error
	if len(data) >= 2 && data[0] == 0x1f && data[1] == 0x8b {
		gz, err := gzip.NewReader(bytes.NewBuffer(data))
		if err == nil {
			data, err = io.ReadAll(gz)
		}
		if err != nil {
			return nil, fmt.Errorf("decompressing profile: %v", err)
		}
	}
	if p, err = ParseUncompressed(data); err != nil && err != errNoData && err != errConcatProfile {
		p, err = parseLegacy(data)
	}

	if err != nil {
		return nil, fmt.Errorf("parsing profile: %v", err)
	}

	if err := p.CheckValid(); err != nil {
		return nil, fmt.Errorf("malformed profile: %v", err)
	}
	return p, nil
}

var errUnrecognized = fmt.Errorf("unrecognized profile format")
var errMalformed = fmt.Errorf("malformed profile format")
var errNoData = fmt.Errorf("empty input file")
var errConcatProfile = fmt.Errorf("concatenated profiles detected")

func parseLegacy(data []byte) (*Profile, error) {
	parsers := []func([]byte) (*Profile, error){
		parseCPU,
		parseHeap,
		parseGoCount, // goroutine, threadcreate
		parseThread,
		parseContention,
		parseJavaProfile,
	}

	for _, parser := range parsers {
		p, err := parser(data)
		if err == nil {
			p.addLegacyFrameInfo()
			return p, nil
		}
		if err != errUnrecognized {
			return nil, err
		}
	}
	return nil, errUnrecognized
}

// ParseUncompressed parses an uncompressed protobuf into a profile.
func ParseUncompressed(data []byte) (*Profile, error) {
	if len(data) == 0 {
		return nil, errNoData
	}
	p := &Profile{}
	if err := unmarshal(data, p); err != nil {
		return nil, err
	}

	if err := p.postDecode(); err != nil {
		return nil, err
	}

	return p, nil
}

var libRx = regexp.MustCompile(`([.]so$|[.]so[._][0-9]+)`)

// massageMappings applies heuristic-based changes to the profile
// mappings to account for quirks of some environments.
func (p *Profile) massageMappings() {
	// Merge adjacent regions with matching names, checking that the offsets match
	if len(p.Mapping) > 1 {
		mappings := []*Mapping{p.Mapping[0]}
		for _, m := range p.Mapping[1:] {
			lm := mappings[len(mappings)-1]
			if adjacent(lm, m) {
				lm.Limit = m.Limit
				if m.File != "" {
					lm.File = m.File
				}
				if m.BuildID != "" {
					lm.BuildID = m.BuildID
				}
				p.updateLocationMapping(m, lm)
				continue
			}
			mappings = append(mappings, m)
		}
		p.Mapping = mappings
	}

	// Use heuristics to identify main binary and move it to the top of the list of mappings
	for i, m := range p.Mapping {
		file := strings.TrimSpace(strings.Replace(m.File, "(deleted)", "", -1))
		if len(file) == 0 {
			continue
		}
		if len(libRx.FindStringSubmatch(file)) > 0 {
			continue
		}
		if file[0] == '[' {
			continue
		}
		// Swap what we guess is main to position 0.
		p.Mapping[0], p.Mapping[i] = p.Mapping[i], p.Mapping[0]
		break
	}

	// Keep the mapping IDs neatly sorted
	for i, m := range p.Mapping {
		m.ID = uint64(i + 1)
	}
}

// adjacent returns whether two mapping entries represent the same
// mapping that has been split into two. Check that their addresses are adjacent,
// and if the offsets match, if they are available.
func adjacent(m1, m2 *Mapping) bool {
	if m1.File != "" && m2.File != "" {
		if m1.File != m2.File {
			return false
		}
	}
	if m1.BuildID != "" && m2.BuildID != "" {
		if m1.BuildID != m2.BuildID {
			return false
		}
	}
	if m1.Limit != m2.Start {
		return false
	}
	if m1.Offset != 0 && m2.Offset != 0 {
		offset := m1.Offset + (m1.Limit - m1.Start)
		if offset != m2.Offset {
			return false
		}
	}
	return true
}

func (p *Profile) updateLocationMapping(from, to *Mapping) {
	for _, l := range p.Location {
		if l.Mapping == from {
			l.Mapping = to
		}
	}
}

func serialize(p *Profile) []byte {
	p.encodeMu.Lock()
	p.preEncode()
	b := marshal(p)
	p.encodeMu.Unlock()
	return b
}

// Write writes the profile as a gzip-compressed marshaled protobuf.
func (p *Profile) Write(w io.Writer) error {
	zw := gzip.NewWriter(w)
	defer zw.Close()
	_, err := zw.Write(serialize(p))
	return err
}

// WriteUncompressed writes the profile as a marshaled protobuf.
func (p *Profile) WriteUncompressed(w io.Writer) error {
	_, err := w.Write(serialize(p))
	return err
}

// CheckValid tests whether the profile is valid. Checks include, but are
// not limited to:
//   - len(Profile.Sample[n].value) == len(Profile.value_unit)
//   - Sample.id has a corresponding Profile.Location
func (p *Profile) CheckValid() error {
	// Check that sample values are consistent
	sampleLen := len(p.SampleType)
	if sampleLen == 0 && len(p.Sample) != 0 {
		return fmt.Errorf("missing sample type information")
	}
	for _, s := range p.Sample {
		if s == nil {
			return fmt.Errorf("profile has nil sample")
		}
		if len(s.Value) != sampleLen {
			return fmt.Errorf("mismatch: sample has %d values vs. %d types", len(s.Value), len(p.SampleType))
		}
		for _, l := range s.Location {
			if l == nil {
				return fmt.Errorf("sample has nil location")
			}
		}
	}

	// Check that all mappings/locations/functions are in the tables
	// Check that there are no duplicate ids
	mappings := make(map[uint64]*Mapping, len(p.Mapping))
	for _, m := range p.Mapping {
		if m == nil {
			return fmt.Errorf("profile has nil mapping")
		}
		if m.ID == 0 {
			return fmt.Errorf("found mapping with reserved ID=0")
		}
		if mappings[m.ID] != nil {
			return fmt.Errorf("multiple mappings with same id: %d", m.ID)
		}
		mappings[m.ID] = m
	}
	functions := make(map[uint64]*Function, len(p.Function))
	for _, f := range p.Function {
		if f == nil {
			return fmt.Errorf("profile has nil function")
		}
		if f.ID == 0 {
			return fmt.Errorf("found function with reserved ID=0")
		}
		if functions[f.ID] != nil {
			return fmt.Errorf("multiple functions with same id: %d", f.ID)
		}
		functions[f.ID] = f
	}
	locations := make(map[uint64]*Location, len(p.Location))
	for _, l := range p.Location {
		if l == nil {
			return fmt.Errorf("profile has nil location")
		}
		if l.ID == 0 {
			return fmt.Errorf("found location with reserved id=0")
		}
		if locations[l.ID] != nil {
			return fmt.Errorf("multiple locations with same id: %d", l.ID)
		}
		locations[l.ID] = l
		if m := l.Mapping; m != nil {
			if m.ID == 0 || mappings[m.ID] != m {
				return fmt.Errorf("inconsistent mapping %p: %d", m, m.ID)
			}
		}
		for _, ln := range l.Line {
			f := ln.Function
			if f == nil {
				return fmt.Errorf("location id: %d has a line with nil function", l.ID)
			}
			if f.ID == 0 || functions[f.ID] != f {
				return fmt.Errorf("inconsistent function %p: %d", f, f.ID)
			}
		}
	}
	return nil
}

// Aggregate merges the locations in the profile into equivalence
// classes preserving the request attributes. It also updates the
// samples to point to the merged locations.
func (p *Profile) Aggregate(inlineFrame, function, filename, linenumber, columnnumber, address bool) error {
	for _, m := range p.Mapping {
		m.HasInlineFrames = m.HasInlineFrames && inlineFrame
		m.HasFunctions = m.HasFunctions && function
		m.HasFilenames = m.HasFilenames && filename
		m.HasLineNumbers = m.HasLineNumbers && linenumber
	}

	// Aggregate functions
	if !function || !filename {
		for _, f := range p.Function {
			if !function {
				f.Name = ""
				f.SystemName = ""
			}
			if !filename {
				f.Filename = ""
			}
		}
	}

	// Aggregate locations
	if !inlineFrame || !address || !linenumber || !columnnumber {
		for _, l := range p.Location {
			if !inlineFrame && len(l.Line) > 1 {
				l.Line = l.Line[len(l.Line)-1:]
			}
			if !linenumber {
				for i := range l.Line {
					l.Line[i].Line = 0
					l.Line[i].Column = 0
				}
			}
			if !columnnumber {
				for i := range l.Line {
					l.Line[i].Column = 0
				}
			}
			if !address {
				l.Address = 0
			}
		}
	}

	return p.CheckValid()
}

// NumLabelUnits returns a map of numeric label keys to the units
// associated with those keys and a map of those keys to any units
// that were encountered but not used.
// Unit for a given key is the first encountered unit for that key. If multiple
// units are encountered for values paired with a particular key, then the first
// unit encountered is used and all other units are returned in sorted order
// in map of ignored units.
// If no units are encountered for a particular key, the unit is then inferred
// based on the key.
func (p *Profile) NumLabelUnits() (map[string]string, map[string][]string) {
	numLabelUnits := map[string]string{}
	ignoredUnits := map[string]map[string]bool{}
	encounteredKeys := map[string]bool{}

	// Determine units based on numeric tags for each sample.
	for _, s := range p.Sample {
		for k := range s.NumLabel {
			encounteredKeys[k] = true
			for _, unit := range s.NumUnit[k] {
				if unit == "" {
					continue
				}
				if wantUnit, ok := numLabelUnits[k]; !ok {
					numLabelUnits[k] = unit
				} else if wantUnit != unit {
					if v, ok := ignoredUnits[k]; ok {
						v[unit] = true
					} else {
						ignoredUnits[k] = map[string]bool{unit: true}
					}
				}
			}
		}
	}
	// Infer units for keys without any units associated with
	// numeric tag values.
	for key := range encounteredKeys {
		unit := numLabelUnits[key]
		if unit == "" {
			switch key {
			case "alignment", "request":
				numLabelUnits[key] = "bytes"
			default:
				numLabelUnits[key] = key
			}
		}
	}

	// Copy ignored units into more readable format
	unitsIgnored := make(map[string][]string, len(ignoredUnits))
	for key, values := range ignoredUnits {
		units := make([]string, len(values))
		i := 0
		for unit := range values {
			units[i] = unit
			i++
		}
		sort.Strings(units)
		unitsIgnored[key] = units
	}

	return numLabelUnits, unitsIgnored
}

// String dumps a text representation of a profile. Intended mainly
// for debugging purposes.
func (p *Profile) String() string {
	ss := make([]string, 0, len(p.Comments)+len(p.Sample)+len(p.Mapping)+len(p.Location))
	for _, c := range p.Comments {
		ss = append(ss, "Comment: "+c)
	}
	if url := p.DocURL; url != "" {
		ss = append(ss, fmt.Sprintf("Doc: %s", url))
	}
	if pt := p.PeriodType; pt != nil {
		ss = append(ss, fmt.Sprintf("PeriodType: %s %s", pt.Type, pt.Unit))
	}
	ss = append(ss, fmt.Sprintf("Period: %d", p.Period))
	if p.TimeNanos != 0 {
		ss = append(ss, fmt.Sprintf("Time: %v", time.Unix(0, p.TimeNanos)))
	}
	if p.DurationNanos != 0 {
		ss = append(ss, fmt.Sprintf("Duration: %.4v", time.Duration(p.DurationNanos)))
	}

	ss = append(ss, "Samples:")
	var sh1 string
	for _, s := range p.SampleType {
		dflt := ""
		if s.Type == p.DefaultSampleType {
			dflt = "[dflt]"
		}
		sh1 = sh1 + fmt.Sprintf("%s/%s%s ", s.Type, s.Unit, dflt)
	}
	ss = append(ss, strings.TrimSpace(sh1))
	for _, s := range p.Sample {
		ss = append(ss, s.string())
	}

	ss = append(ss, "Locations")
	for _, l := range p.Location {
		ss = append(ss, l.string())
	}

	ss = append(ss, "Mappings")
	for _, m := range p.Mapping {
		ss = append(ss, m.string())
	}

	return strings.Join(ss, "\n") + "\n"
}

// string dumps a text representation of a mapping. Intended mainly
// for debugging purposes.
func (m *Mapping) string() string {
	bits := ""
	if m.HasFunctions {
		bits = bits + "[FN]"
	}
	if m.HasFilenames {
		bits = bits + "[FL]"
	}
	if m.HasLineNumbers {
		bits = bits + "[LN]"
	}
	if m.HasInlineFrames {
		bits = bits + "[IN]"
	}
	return fmt.Sprintf("%d: %#x/%#x/%#x %s %s %s",
		m.ID,
		m.Start, m.Limit, m.Offset,
		m.File,
		m.BuildID,
		bits)
}

// string dumps a text representation of a location. Intended mainly
// for debugging purposes.
func (l *Location) string() string {
	ss := []string{}
	locStr := fmt.Sprintf("%6d: %#x ", l.ID, l.Address)
	if m := l.Mapping; m != nil {
		locStr = locStr + fmt.Sprintf("M=%d ", m.ID)
	}
	if l.IsFolded {
		locStr = locStr + "[F] "
	}
	if len(l.Line) == 0 {
		ss = append(ss, locStr)
	}
	for li := range l.Line {
		lnStr := "??"
		if fn := l.Line[li].Function; fn != nil {
			lnStr = fmt.Sprintf("%s %s:%d:%d s=%d",
				fn.Name,
				fn.Filename,
				l.Line[li].Line,
				l.Line[li].Column,
				fn.StartLine)
			if fn.Name != fn.SystemName {
				lnStr = lnStr + "(" + fn.SystemName + ")"
			}
		}
		ss = append(ss, locStr+lnStr)
		// Do not print location details past the first line
		locStr = "             "
	}
	return strings.Join(ss, "\n")
}

// string dumps a text representation of a sample. Intended mainly
// for debugging purposes.
func (s *Sample) string() string {
	ss := []string{}
	var sv string
	for _, v := range s.Value {
		sv = fmt.Sprintf("%s %10d", sv, v)
	}
	sv = sv + ": "
	for _, l := range s.Location {
		sv = sv + fmt.Sprintf("%d ", l.ID)
	}
	ss = append(ss, sv)
	const labelHeader = "                "
	if len(s.Label) > 0 {
		ss = append(ss, labelHeader+labelsToString(s.Label))
	}
	if len(s.NumLabel) > 0 {
		ss = append(ss, labelHeader+numLabelsToString(s.NumLabel, s.NumUnit))
	}
	return strings.Join(ss, "\n")
}

// labelsToString returns a string representation of a
// map representing labels.
func labelsToString(labels map[string][]string) string {
	ls := []string{}
	for k, v := range labels {
		ls = append(ls, fmt.Sprintf("%s:%v", k, v))
	}
	sort.Strings(ls)
	return strings.Join(ls, " ")
}

// numLabelsToString returns a string representation of a map
// representing numeric labels.
func numLabelsToString(numLabels map[string][]int64, numUnits map[string][]string) string {
	ls := []string{}
	for k, v := range numLabels {
		units := numUnits[k]
		var labelString string
		if len(units) == len(v) {
			values := make([]string, len(v))
			for i, vv := range v {
				values[i] = fmt.Sprintf("%d %s", vv, units[i])
			}
			labelString = fmt.Sprintf("%s:%v", k, values)
		} else {
			labelString = fmt.Sprintf("%s:%v", k, v)
		}
		ls = append(ls, labelString)
	}
	sort.Strings(ls)
	return strings.Join(ls, " ")
}

// SetLabel sets the specified key to the specified value for all samples in the
// profile.
func (p *Profile) SetLabel(key string, value []string) {
	for _, sample := range p.Sample {
		if sample.Label == nil {
			sample.Label = map[string][]string{key: value}
		} else {
			sample.Label[key] = value
		}
	}
}

// RemoveLabel removes all labels associated with the specified key for all
// samples in the profile.
func (p *Profile) RemoveLabel(key string) {
	for _, sample := range p.Sample {
		delete(sample.Label, key)
	}
}

// HasLabel returns true if a sample has a label with indicated key and value.
func (s *Sample) HasLabel(key, value string) bool {
	for _, v := range s.Label[key] {
		if v == value {
			return true
		}
	}
	return false
}

// SetNumLabel sets the specified key to the specified value for all samples in the
// profile. "unit" is a slice that describes the units that each corresponding member
// of "values" is measured in (e.g. bytes or seconds).  If there is no relevant
// unit for a given value, that member of "unit" should be the empty string.
// "unit" must either have the same length as "value", or be nil.
func (p *Profile) SetNumLabel(key string, value []int64, unit []string) {
	for _, sample := range p.Sample {
		if sample.NumLabel == nil {
			sample.NumLabel = map[string][]int64{key: value}
		} else {
			sample.NumLabel[key] = value
		}
		if sample.NumUnit == nil {
			sample.NumUnit = map[string][]string{key: unit}
		} else {
			sample.NumUnit[key] = unit
		}
	}
}

// RemoveNumLabel removes all numerical labels associated with the specified key for all
// samples in the profile.
func (p *Profile) RemoveNumLabel(key string) {
	for _, sample := range p.Sample {
		delete(sample.NumLabel, key)
		delete(sample.NumUnit, key)
	}
}

// DiffBaseSample returns true if a sample belongs to the diff base and false
// otherwise.
func (s *Sample) DiffBaseSample() bool {
	return s.HasLabel("pprof::base", "true")
}

// Scale multiplies all sample values in a profile by a constant and keeps
// only samples that have at least one non-zero value.
func (p *Profile) Scale(ratio float64) {
	if ratio == 1 {
		return
	}
	ratios := make([]float64, len(p.SampleType))
	for i := range p.SampleType {
		ratios[i] = ratio
	}
	p.ScaleN(ratios)
}

// ScaleN multiplies each sample values in a sample by a different amount
// and keeps only samples that have at least one non-zero value.
func (p *Profile) ScaleN(ratios []float64) error {
	if len(p.SampleType) != len(ratios) {
		return fmt.Errorf("mismatched scale ratios, got %d, want %d", len(ratios), len(p.SampleType))
	}
	allOnes := true
	for _, r := range ratios {
		if r != 1 {
			allOnes = false
			break
		}
	}
	if allOnes {
		return nil
	}
	fillIdx := 0
	for _, s := range p.Sample {
		keepSample := false
		for i, v := range s.Value {
			if ratios[i] != 1 {
				val := int64(math.Round(float64(v) * ratios[i]))
				s.Value[i] = val
				keepSample = keepSample || val != 0
			}
		}
		if keepSample {
			p.Sample[fillIdx] = s
			fillIdx++
		}
	}
	p.Sample = p.Sample[:fillIdx]
	return nil
}

// HasFunctions determines if all locations in this profile have
// symbolized function information.
func (p *Profile) HasFunctions() bool {
	for _, l := range p.Location {
		if l.Mapping != nil && !l.Mapping.HasFunctions {
			return false
		}
	}
	return true
}

// HasFileLines determines if all locations in this profile have
// symbolized file and line number information.
func (p *Profile) HasFileLines() bool {
	for _, l := range p.Location {
		if l.Mapping != nil && (!l.Mapping.HasFilenames || !l.Mapping.HasLineNumbers) {
			return false
		}
	}
	return true
}

// Unsymbolizable returns true if a mapping points to a binary for which
// locations can't be symbolized in principle, at least now. Examples are
// "[vdso]", "[vsyscall]" and some others, see the code.
func (m *Mapping) Unsymbolizable() bool {
	name := filepath.Base(m.File)
	return strings.HasPrefix(name, "[") || strings.HasPrefix(name, "linux-vdso") || strings.HasPrefix(m.File, "/dev/dri/") || m.File == "//anon"
}

// Copy makes a fully independent copy of a profile.
func (p *Profile) Copy() *Profile {
	pp := &Profile{}
	if err := unmarshal(serialize(p), pp); err != nil {
		panic(err)
	}
	if err := pp.postDecode(); err != nil {
		panic(err)
	}

	return pp
}