// SPDX-License-Identifier: GPL-2.0-only /* * Intel(R) Processor Trace PMU driver for perf * Copyright (c) 2013-2014, Intel Corporation. * * Intel PT is specified in the Intel Architecture Instruction Set Extensions * Programming Reference: * http://software.intel.com/en-us/intel-isa-extensions */ #undef DEBUG #define pr_fmt(fmt) … #include <linux/types.h> #include <linux/bits.h> #include <linux/limits.h> #include <linux/slab.h> #include <linux/device.h> #include <asm/perf_event.h> #include <asm/insn.h> #include <asm/io.h> #include <asm/intel_pt.h> #include <asm/cpu_device_id.h> #include "../perf_event.h" #include "pt.h" static DEFINE_PER_CPU(struct pt, pt_ctx); static struct pt_pmu pt_pmu; /* * Capabilities of Intel PT hardware, such as number of address bits or * supported output schemes, are cached and exported to userspace as "caps" * attribute group of pt pmu device * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store * relevant bits together with intel_pt traces. * * These are necessary for both trace decoding (payloads_lip, contains address * width encoded in IP-related packets), and event configuration (bitmasks with * permitted values for certain bit fields). */ #define PT_CAP(_n, _l, _r, _m) … static struct pt_cap_desc { … } pt_caps[] = …; u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) { … } EXPORT_SYMBOL_GPL(…); u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { … } EXPORT_SYMBOL_GPL(…); static ssize_t pt_cap_show(struct device *cdev, struct device_attribute *attr, char *buf) { … } static struct attribute_group pt_cap_group __ro_after_init = …; PMU_FORMAT_ATTR(…); PMU_FORMAT_ATTR(…); PMU_FORMAT_ATTR(…); PMU_FORMAT_ATTR(…); PMU_FORMAT_ATTR(…); PMU_FORMAT_ATTR(…); PMU_FORMAT_ATTR(…); PMU_FORMAT_ATTR(…); PMU_FORMAT_ATTR(…); PMU_FORMAT_ATTR(…); PMU_FORMAT_ATTR(…); PMU_FORMAT_ATTR(…); PMU_FORMAT_ATTR(…); PMU_FORMAT_ATTR(…); static struct attribute *pt_formats_attr[] = …; static struct attribute_group pt_format_group = …; static ssize_t pt_timing_attr_show(struct device *dev, struct device_attribute *attr, char *page) { … } PMU_EVENT_ATTR(max_nonturbo_ratio, timing_attr_max_nonturbo_ratio, 0, pt_timing_attr_show); PMU_EVENT_ATTR(tsc_art_ratio, timing_attr_tsc_art_ratio, 1, pt_timing_attr_show); static struct attribute *pt_timing_attr[] = …; static struct attribute_group pt_timing_group = …; static const struct attribute_group *pt_attr_groups[] = …; static int __init pt_pmu_hw_init(void) { … } #define RTIT_CTL_CYC_PSB … #define RTIT_CTL_MTC … #define RTIT_CTL_PTW … /* * Bit 0 (TraceEn) in the attr.config is meaningless as the * corresponding bit in the RTIT_CTL can only be controlled * by the driver; therefore, repurpose it to mean: pass * through the bit that was previously assumed to be always * on for PT, thereby allowing the user to *not* set it if * they so wish. See also pt_event_valid() and pt_config(). */ #define RTIT_CTL_PASSTHROUGH … #define PT_CONFIG_MASK … static bool pt_event_valid(struct perf_event *event) { … } /* * PT configuration helpers * These all are cpu affine and operate on a local PT */ static void pt_config_start(struct perf_event *event) { … } /* Address ranges and their corresponding msr configuration registers */ static const struct pt_address_range { … } pt_address_ranges[] = …; static u64 pt_config_filters(struct perf_event *event) { … } static void pt_config(struct perf_event *event) { … } static void pt_config_stop(struct perf_event *event) { … } /** * struct topa - ToPA metadata * @list: linkage to struct pt_buffer's list of tables * @offset: offset of the first entry in this table in the buffer * @size: total size of all entries in this table * @last: index of the last initialized entry in this table * @z_count: how many times the first entry repeats */ struct topa { … }; /* * Keep ToPA table-related metadata on the same page as the actual table, * taking up a few words from the top */ #define TENTS_PER_PAGE … /** * struct topa_page - page-sized ToPA table with metadata at the top * @table: actual ToPA table entries, as understood by PT hardware * @topa: metadata */ struct topa_page { … }; static inline struct topa_page *topa_to_page(struct topa *topa) { … } static inline struct topa_page *topa_entry_to_page(struct topa_entry *te) { … } static inline phys_addr_t topa_pfn(struct topa *topa) { … } /* make -1 stand for the last table entry */ #define TOPA_ENTRY(t, i) … #define TOPA_ENTRY_SIZE(t, i) … #define TOPA_ENTRY_PAGES(t, i) … static void pt_config_buffer(struct pt_buffer *buf) { … } /** * topa_alloc() - allocate page-sized ToPA table * @cpu: CPU on which to allocate. * @gfp: Allocation flags. * * Return: On success, return the pointer to ToPA table page. */ static struct topa *topa_alloc(int cpu, gfp_t gfp) { … } /** * topa_free() - free a page-sized ToPA table * @topa: Table to deallocate. */ static void topa_free(struct topa *topa) { … } /** * topa_insert_table() - insert a ToPA table into a buffer * @buf: PT buffer that's being extended. * @topa: New topa table to be inserted. * * If it's the first table in this buffer, set up buffer's pointers * accordingly; otherwise, add a END=1 link entry to @topa to the current * "last" table and adjust the last table pointer to @topa. */ static void topa_insert_table(struct pt_buffer *buf, struct topa *topa) { … } /** * topa_table_full() - check if a ToPA table is filled up * @topa: ToPA table. */ static bool topa_table_full(struct topa *topa) { … } /** * topa_insert_pages() - create a list of ToPA tables * @buf: PT buffer being initialized. * @cpu: CPU on which to allocate. * @gfp: Allocation flags. * * This initializes a list of ToPA tables with entries from * the data_pages provided by rb_alloc_aux(). * * Return: 0 on success or error code. */ static int topa_insert_pages(struct pt_buffer *buf, int cpu, gfp_t gfp) { … } /** * pt_topa_dump() - print ToPA tables and their entries * @buf: PT buffer. */ static void pt_topa_dump(struct pt_buffer *buf) { … } /** * pt_buffer_advance() - advance to the next output region * @buf: PT buffer. * * Advance the current pointers in the buffer to the next ToPA entry. */ static void pt_buffer_advance(struct pt_buffer *buf) { … } /** * pt_update_head() - calculate current offsets and sizes * @pt: Per-cpu pt context. * * Update buffer's current write pointer position and data size. */ static void pt_update_head(struct pt *pt) { … } /** * pt_buffer_region() - obtain current output region's address * @buf: PT buffer. */ static void *pt_buffer_region(struct pt_buffer *buf) { … } /** * pt_buffer_region_size() - obtain current output region's size * @buf: PT buffer. */ static size_t pt_buffer_region_size(struct pt_buffer *buf) { … } /** * pt_handle_status() - take care of possible status conditions * @pt: Per-cpu pt context. */ static void pt_handle_status(struct pt *pt) { … } /** * pt_read_offset() - translate registers into buffer pointers * @buf: PT buffer. * * Set buffer's output pointers from MSR values. */ static void pt_read_offset(struct pt_buffer *buf) { … } static struct topa_entry * pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg) { … } static struct topa_entry * pt_topa_prev_entry(struct pt_buffer *buf, struct topa_entry *te) { … } /** * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer * @buf: PT buffer. * @handle: Current output handle. * * Place INT and STOP marks to prevent overwriting old data that the consumer * hasn't yet collected and waking up the consumer after a certain fraction of * the buffer has filled up. Only needed and sensible for non-snapshot counters. * * This obviously relies on buf::head to figure out buffer markers, so it has * to be called after pt_buffer_reset_offsets() and before the hardware tracing * is enabled. */ static int pt_buffer_reset_markers(struct pt_buffer *buf, struct perf_output_handle *handle) { … } /** * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head * @buf: PT buffer. * @head: Write pointer (aux_head) from AUX buffer. * * Find the ToPA table and entry corresponding to given @head and set buffer's * "current" pointers accordingly. This is done after we have obtained the * current aux_head position from a successful call to perf_aux_output_begin() * to make sure the hardware is writing to the right place. * * This function modifies buf::{cur,cur_idx,output_off} that will be programmed * into PT msrs when the tracing is enabled and buf::head and buf::data_size, * which are used to determine INT and STOP markers' locations by a subsequent * call to pt_buffer_reset_markers(). */ static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head) { … } /** * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer * @buf: PT buffer. */ static void pt_buffer_fini_topa(struct pt_buffer *buf) { … } /** * pt_buffer_init_topa() - initialize ToPA table for pt buffer * @buf: PT buffer. * @cpu: CPU on which to allocate. * @nr_pages: No. of pages to allocate. * @gfp: Allocation flags. * * Return: 0 on success or error code. */ static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu, unsigned long nr_pages, gfp_t gfp) { … } static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages) { … } /** * pt_buffer_setup_aux() - set up topa tables for a PT buffer * @event: Performance event * @pages: Array of pointers to buffer pages passed from perf core. * @nr_pages: Number of pages in the buffer. * @snapshot: If this is a snapshot/overwrite counter. * * This is a pmu::setup_aux callback that sets up ToPA tables and all the * bookkeeping for an AUX buffer. * * Return: Our private PT buffer structure. */ static void * pt_buffer_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool snapshot) { … } /** * pt_buffer_free_aux() - perf AUX deallocation path callback * @data: PT buffer. */ static void pt_buffer_free_aux(void *data) { … } static int pt_addr_filters_init(struct perf_event *event) { … } static void pt_addr_filters_fini(struct perf_event *event) { … } #ifdef CONFIG_X86_64 /* Clamp to a canonical address greater-than-or-equal-to the address given */ static u64 clamp_to_ge_canonical_addr(u64 vaddr, u8 vaddr_bits) { … } /* Clamp to a canonical address less-than-or-equal-to the address given */ static u64 clamp_to_le_canonical_addr(u64 vaddr, u8 vaddr_bits) { … } #else #define clamp_to_ge_canonical_addr … #define clamp_to_le_canonical_addr … #endif static int pt_event_addr_filters_validate(struct list_head *filters) { … } static void pt_event_addr_filters_sync(struct perf_event *event) { … } /** * intel_pt_interrupt() - PT PMI handler */ void intel_pt_interrupt(void) { … } void intel_pt_handle_vmx(int on) { … } EXPORT_SYMBOL_GPL(…); /* * PMU callbacks */ static void pt_event_start(struct perf_event *event, int mode) { … } static void pt_event_stop(struct perf_event *event, int mode) { … } static long pt_event_snapshot_aux(struct perf_event *event, struct perf_output_handle *handle, unsigned long size) { … } static void pt_event_del(struct perf_event *event, int mode) { … } static int pt_event_add(struct perf_event *event, int mode) { … } static void pt_event_read(struct perf_event *event) { … } static void pt_event_destroy(struct perf_event *event) { … } static int pt_event_init(struct perf_event *event) { … } void cpu_emergency_stop_pt(void) { … } int is_intel_pt_event(struct perf_event *event) { … } static __init int pt_init(void) { … } arch_initcall(pt_init);