core.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0-only
#include <linux/perf_event.h>
#include <linux/jump_label.h>
#include <linux/export.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/jiffies.h>
#include <asm/apicdef.h>
#include <asm/apic.h>
#include <asm/nmi.h>

#include "../perf_event.h"

static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
static unsigned long perf_nmi_window;

/* AMD Event 0xFFF: Merge.  Used with Large Increment per Cycle events */
#define AMD_MERGE_EVENT …
#define AMD_MERGE_EVENT_ENABLE …

/* PMC Enable and Overflow bits for PerfCntrGlobal* registers */
static u64 amd_pmu_global_cntr_mask __read_mostly;

static __initconst const u64 amd_hw_cache_event_ids
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] = …;

static __initconst const u64 amd_hw_cache_event_ids_f17h
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] = …;

/*
 * AMD Performance Monitor K7 and later, up to and including Family 16h:
 */
static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = …;

/*
 * AMD Performance Monitor Family 17h and later:
 */
static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] = …;

static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] = …;

static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] = …;

static u64 amd_pmu_event_map(int hw_event)
{ … }

/*
 * Previously calculated offsets
 */
static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;

/*
 * Legacy CPUs:
 *   4 counters starting at 0xc0010000 each offset by 1
 *
 * CPUs with core performance counter extensions:
 *   6 counters starting at 0xc0010200 each offset by 2
 */
static inline int amd_pmu_addr_offset(int index, bool eventsel)
{ … }

/*
 * AMD64 events are detected based on their event codes.
 */
static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
{ … }

static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
{ … }

DEFINE_STATIC_CALL_RET0(…);

static int amd_core_hw_config(struct perf_event *event)
{ … }

static inline int amd_is_nb_event(struct hw_perf_event *hwc)
{ … }

static inline int amd_has_nb(struct cpu_hw_events *cpuc)
{ … }

static int amd_pmu_hw_config(struct perf_event *event)
{ … }

static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
					   struct perf_event *event)
{ … }

 /*
  * AMD64 NorthBridge events need special treatment because
  * counter access needs to be synchronized across all cores
  * of a package. Refer to BKDG section 3.12
  *
  * NB events are events measuring L3 cache, Hypertransport
  * traffic. They are identified by an event code >= 0xe00.
  * They measure events on the NorthBride which is shared
  * by all cores on a package. NB events are counted on a
  * shared set of counters. When a NB event is programmed
  * in a counter, the data actually comes from a shared
  * counter. Thus, access to those counters needs to be
  * synchronized.
  *
  * We implement the synchronization such that no two cores
  * can be measuring NB events using the same counters. Thus,
  * we maintain a per-NB allocation table. The available slot
  * is propagated using the event_constraint structure.
  *
  * We provide only one choice for each NB event based on
  * the fact that only NB events have restrictions. Consequently,
  * if a counter is available, there is a guarantee the NB event
  * will be assigned to it. If no slot is available, an empty
  * constraint is returned and scheduling will eventually fail
  * for this event.
  *
  * Note that all cores attached the same NB compete for the same
  * counters to host NB events, this is why we use atomic ops. Some
  * multi-chip CPUs may have more than one NB.
  *
  * Given that resources are allocated (cmpxchg), they must be
  * eventually freed for others to use. This is accomplished by
  * calling __amd_put_nb_event_constraints()
  *
  * Non NB events are not impacted by this restriction.
  */
static struct event_constraint *
__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
			       struct event_constraint *c)
{ … }

static struct amd_nb *amd_alloc_nb(int cpu)
{ … }

amd_pmu_branch_reset_t;
DEFINE_STATIC_CALL_NULL(…);

static void amd_pmu_cpu_reset(int cpu)
{ … }

static int amd_pmu_cpu_prepare(int cpu)
{ … }

static void amd_pmu_cpu_starting(int cpu)
{ … }

static void amd_pmu_cpu_dead(int cpu)
{ … }

static __always_inline void amd_pmu_set_global_ctl(u64 ctl)
{ … }

static inline u64 amd_pmu_get_global_status(void)
{ … }

static inline void amd_pmu_ack_global_status(u64 status)
{ … }

static bool amd_pmu_test_overflow_topbit(int idx)
{ … }

static bool amd_pmu_test_overflow_status(int idx)
{ … }

DEFINE_STATIC_CALL(…);

/*
 * When a PMC counter overflows, an NMI is used to process the event and
 * reset the counter. NMI latency can result in the counter being updated
 * before the NMI can run, which can result in what appear to be spurious
 * NMIs. This function is intended to wait for the NMI to run and reset
 * the counter to avoid possible unhandled NMI messages.
 */
#define OVERFLOW_WAIT_COUNT …

static void amd_pmu_wait_on_overflow(int idx)
{ … }

static void amd_pmu_check_overflow(void)
{ … }

static void amd_pmu_enable_event(struct perf_event *event)
{ … }

static void amd_pmu_enable_all(int added)
{ … }

static void amd_pmu_v2_enable_event(struct perf_event *event)
{ … }

static __always_inline void amd_pmu_core_enable_all(void)
{ … }

static void amd_pmu_v2_enable_all(int added)
{ … }

static void amd_pmu_disable_event(struct perf_event *event)
{ … }

static void amd_pmu_disable_all(void)
{ … }

static __always_inline void amd_pmu_core_disable_all(void)
{ … }

static void amd_pmu_v2_disable_all(void)
{ … }

DEFINE_STATIC_CALL_NULL(…);

static void amd_pmu_add_event(struct perf_event *event)
{ … }

DEFINE_STATIC_CALL_NULL(…);

static void amd_pmu_del_event(struct perf_event *event)
{ … }

/*
 * Because of NMI latency, if multiple PMC counters are active or other sources
 * of NMIs are received, the perf NMI handler can handle one or more overflowed
 * PMC counters outside of the NMI associated with the PMC overflow. If the NMI
 * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel
 * back-to-back NMI support won't be active. This PMC handler needs to take into
 * account that this can occur, otherwise this could result in unknown NMI
 * messages being issued. Examples of this is PMC overflow while in the NMI
 * handler when multiple PMCs are active or PMC overflow while handling some
 * other source of an NMI.
 *
 * Attempt to mitigate this by creating an NMI window in which un-handled NMIs
 * received during this window will be claimed. This prevents extending the
 * window past when it is possible that latent NMIs should be received. The
 * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has
 * handled a counter. When an un-handled NMI is received, it will be claimed
 * only if arriving within that window.
 */
static inline int amd_pmu_adjust_nmi_window(int handled)
{ … }

static int amd_pmu_handle_irq(struct pt_regs *regs)
{ … }

/*
 * AMD-specific callback invoked through perf_snapshot_branch_stack static
 * call, defined in include/linux/perf_event.h. See its definition for API
 * details. It's up to caller to provide enough space in *entries* to fit all
 * LBR records, otherwise returned result will be truncated to *cnt* entries.
 */
static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
{ … }

static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
{ … }

static struct event_constraint *
amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
			  struct perf_event *event)
{ … }

static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
				      struct perf_event *event)
{ … }

PMU_FORMAT_ATTR(…);
PMU_FORMAT_ATTR(…);
PMU_FORMAT_ATTR(…);
PMU_FORMAT_ATTR(…);
PMU_FORMAT_ATTR(…);

static struct attribute *amd_format_attr[] = …;

/* AMD Family 15h */

#define AMD_EVENT_TYPE_MASK …

#define AMD_EVENT_FP …
#define AMD_EVENT_LS …
#define AMD_EVENT_DC …
#define AMD_EVENT_CU …
#define AMD_EVENT_IC_DE …
#define AMD_EVENT_EX_LS …
#define AMD_EVENT_DE …
#define AMD_EVENT_NB …

/*
 * AMD family 15h event code/PMC mappings:
 *
 * type = event_code & 0x0F0:
 *
 * 0x000	FP	PERF_CTL[5:3]
 * 0x010	FP	PERF_CTL[5:3]
 * 0x020	LS	PERF_CTL[5:0]
 * 0x030	LS	PERF_CTL[5:0]
 * 0x040	DC	PERF_CTL[5:0]
 * 0x050	DC	PERF_CTL[5:0]
 * 0x060	CU	PERF_CTL[2:0]
 * 0x070	CU	PERF_CTL[2:0]
 * 0x080	IC/DE	PERF_CTL[2:0]
 * 0x090	IC/DE	PERF_CTL[2:0]
 * 0x0A0	---
 * 0x0B0	---
 * 0x0C0	EX/LS	PERF_CTL[5:0]
 * 0x0D0	DE	PERF_CTL[2:0]
 * 0x0E0	NB	NB_PERF_CTL[3:0]
 * 0x0F0	NB	NB_PERF_CTL[3:0]
 *
 * Exceptions:
 *
 * 0x000	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
 * 0x003	FP	PERF_CTL[3]
 * 0x004	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
 * 0x00B	FP	PERF_CTL[3]
 * 0x00D	FP	PERF_CTL[3]
 * 0x023	DE	PERF_CTL[2:0]
 * 0x02D	LS	PERF_CTL[3]
 * 0x02E	LS	PERF_CTL[3,0]
 * 0x031	LS	PERF_CTL[2:0] (**)
 * 0x043	CU	PERF_CTL[2:0]
 * 0x045	CU	PERF_CTL[2:0]
 * 0x046	CU	PERF_CTL[2:0]
 * 0x054	CU	PERF_CTL[2:0]
 * 0x055	CU	PERF_CTL[2:0]
 * 0x08F	IC	PERF_CTL[0]
 * 0x187	DE	PERF_CTL[0]
 * 0x188	DE	PERF_CTL[0]
 * 0x0DB	EX	PERF_CTL[5:0]
 * 0x0DC	LS	PERF_CTL[5:0]
 * 0x0DD	LS	PERF_CTL[5:0]
 * 0x0DE	LS	PERF_CTL[5:0]
 * 0x0DF	LS	PERF_CTL[5:0]
 * 0x1C0	EX	PERF_CTL[5:3]
 * 0x1D6	EX	PERF_CTL[5:0]
 * 0x1D8	EX	PERF_CTL[5:0]
 *
 * (*)  depending on the umask all FPU counters may be used
 * (**) only one unitmask enabled at a time
 */

static struct event_constraint amd_f15_PMC0  = …;
static struct event_constraint amd_f15_PMC20 = …;
static struct event_constraint amd_f15_PMC3  = …;
static struct event_constraint amd_f15_PMC30 = …;
static struct event_constraint amd_f15_PMC50 = …;
static struct event_constraint amd_f15_PMC53 = …;

static struct event_constraint *
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
			       struct perf_event *event)
{ … }

static struct event_constraint pair_constraint;

static struct event_constraint *
amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx,
			       struct perf_event *event)
{ … }

static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
					   struct perf_event *event)
{ … }

/*
 * Because of the way BRS operates with an inactive and active phases, and
 * the link to one counter, it is not possible to have two events using BRS
 * scheduled at the same time. There would be an issue with enforcing the
 * period of each one and given that the BRS saturates, it would not be possible
 * to guarantee correlated content for all events. Therefore, in situations
 * where multiple events want to use BRS, the kernel enforces mutual exclusion.
 * Exclusion is enforced by choosing only one counter for events using BRS.
 * The event scheduling logic will then automatically multiplex the
 * events and ensure that at most one event is actively using BRS.
 *
 * The BRS counter could be any counter, but there is no constraint on Fam19h,
 * therefore all counters are equal and thus we pick the first one: PMC0
 */
static struct event_constraint amd_fam19h_brs_cntr0_constraint = …;

static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint = …;

static struct event_constraint *
amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx,
			  struct perf_event *event)
{ … }


static ssize_t amd_event_sysfs_show(char *page, u64 config)
{ … }

static void amd_pmu_limit_period(struct perf_event *event, s64 *left)
{ … }

static __initconst const struct x86_pmu amd_pmu = …;

static ssize_t branches_show(struct device *cdev,
			      struct device_attribute *attr,
			      char *buf)
{ … }

static DEVICE_ATTR_RO(branches);

static struct attribute *amd_pmu_branches_attrs[] = …;

static umode_t
amd_branches_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{ … }

static struct attribute_group group_caps_amd_branches = …;

#ifdef CONFIG_PERF_EVENTS_AMD_BRS

EVENT_ATTR_STR(branch-brs, amd_branch_brs,
	       "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n");

static struct attribute *amd_brs_events_attrs[] = …;

static umode_t
amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{ … }

static struct attribute_group group_events_amd_brs = …;

#endif	/* CONFIG_PERF_EVENTS_AMD_BRS */

static const struct attribute_group *amd_attr_update[] = …;

static int __init amd_core_pmu_init(void)
{ … }

__init int amd_pmu_init(void)
{ … }

static inline void amd_pmu_reload_virt(void)
{ … }

void amd_pmu_enable_virt(void)
{ … }
EXPORT_SYMBOL_GPL(…);

void amd_pmu_disable_virt(void)
{ … }
EXPORT_SYMBOL_GPL(…);
linux/arch/x86/events/amd/core.c