// SPDX-License-Identifier: GPL-2.0 /* * trace_hwlat.c - A simple Hardware Latency detector. * * Use this tracer to detect large system latencies induced by the behavior of * certain underlying system hardware or firmware, independent of Linux itself. * The code was developed originally to detect the presence of SMIs on Intel * and AMD systems, although there is no dependency upon x86 herein. * * The classical example usage of this tracer is in detecting the presence of * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a * somewhat special form of hardware interrupt spawned from earlier CPU debug * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge * LPC (or other device) to generate a special interrupt under certain * circumstances, for example, upon expiration of a special SMI timer device, * due to certain external thermal readings, on certain I/O address accesses, * and other situations. An SMI hits a special CPU pin, triggers a special * SMI mode (complete with special memory map), and the OS is unaware. * * Although certain hardware-inducing latencies are necessary (for example, * a modern system often requires an SMI handler for correct thermal control * and remote management) they can wreak havoc upon any OS-level performance * guarantees toward low-latency, especially when the OS is not even made * aware of the presence of these interrupts. For this reason, we need a * somewhat brute force mechanism to detect these interrupts. In this case, * we do it by hogging all of the CPU(s) for configurable timer intervals, * sampling the built-in CPU timer, looking for discontiguous readings. * * WARNING: This implementation necessarily introduces latencies. Therefore, * you should NEVER use this tracer while running in a production * environment requiring any kind of low-latency performance * guarantee(s). * * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <[email protected]> * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <[email protected]> * * Includes useful feedback from Clark Williams <[email protected]> * */ #include <linux/kthread.h> #include <linux/tracefs.h> #include <linux/uaccess.h> #include <linux/cpumask.h> #include <linux/delay.h> #include <linux/sched/clock.h> #include "trace.h" static struct trace_array *hwlat_trace; #define U64STR_SIZE … #define BANNER … #define DEFAULT_SAMPLE_WINDOW … #define DEFAULT_SAMPLE_WIDTH … #define DEFAULT_LAT_THRESHOLD … static struct dentry *hwlat_sample_width; /* sample width us */ static struct dentry *hwlat_sample_window; /* sample window us */ static struct dentry *hwlat_thread_mode; /* hwlat thread mode */ enum { … }; static char *thread_mode_str[] = …; /* Save the previous tracing_thresh value */ static unsigned long save_tracing_thresh; /* runtime kthread data */ struct hwlat_kthread_data { … }; static struct hwlat_kthread_data hwlat_single_cpu_data; static DEFINE_PER_CPU(struct hwlat_kthread_data, hwlat_per_cpu_data); /* Tells NMIs to call back to the hwlat tracer to record timestamps */ bool trace_hwlat_callback_enabled; /* If the user changed threshold, remember it */ static u64 last_tracing_thresh = …; /* Individual latency samples are stored here when detected. */ struct hwlat_sample { … }; /* keep the global state somewhere. */ static struct hwlat_data { … } hwlat_data = …; static struct hwlat_kthread_data *get_cpu_data(void) { … } static bool hwlat_busy; static void trace_hwlat_sample(struct hwlat_sample *sample) { … } /* Macros to encapsulate the time capturing infrastructure */ #define time_type … #define time_get() … #define time_to_us(x) … #define time_sub(a, b) … #define init_time(a, b) … #define time_u64(a) … void trace_hwlat_callback(bool enter) { … } /* * hwlat_err - report a hwlat error. */ #define hwlat_err(msg) … /** * get_sample - sample the CPU TSC and look for likely hardware latencies * * Used to repeatedly capture the CPU TSC (or similar), looking for potential * hardware-induced latency. Called with interrupts disabled and with * hwlat_data.lock held. */ static int get_sample(void) { … } static struct cpumask save_cpumask; static void move_to_next_cpu(void) { … } /* * kthread_fn - The CPU time sampling/hardware latency detection kernel thread * * Used to periodically sample the CPU TSC via a call to get_sample. We * disable interrupts, which does (intentionally) introduce latency since we * need to ensure nothing else might be running (and thus preempting). * Obviously this should never be used in production environments. * * Executes one loop interaction on each CPU in tracing_cpumask sysfs file. */ static int kthread_fn(void *data) { … } /* * stop_stop_kthread - Inform the hardware latency sampling/detector kthread to stop * * This kicks the running hardware latency sampling/detector kernel thread and * tells it to stop sampling now. Use this on unload and at system shutdown. */ static void stop_single_kthread(void) { … } /* * start_single_kthread - Kick off the hardware latency sampling/detector kthread * * This starts the kernel thread that will sit and sample the CPU timestamp * counter (TSC or similar) and look for potential hardware latencies. */ static int start_single_kthread(struct trace_array *tr) { … } /* * stop_cpu_kthread - Stop a hwlat cpu kthread */ static void stop_cpu_kthread(unsigned int cpu) { … } /* * stop_per_cpu_kthreads - Inform the hardware latency sampling/detector kthread to stop * * This kicks the running hardware latency sampling/detector kernel threads and * tells it to stop sampling now. Use this on unload and at system shutdown. */ static void stop_per_cpu_kthreads(void) { … } /* * start_cpu_kthread - Start a hwlat cpu kthread */ static int start_cpu_kthread(unsigned int cpu) { … } #ifdef CONFIG_HOTPLUG_CPU static void hwlat_hotplug_workfn(struct work_struct *dummy) { … } static DECLARE_WORK(hwlat_hotplug_work, hwlat_hotplug_workfn); /* * hwlat_cpu_init - CPU hotplug online callback function */ static int hwlat_cpu_init(unsigned int cpu) { … } /* * hwlat_cpu_die - CPU hotplug offline callback function */ static int hwlat_cpu_die(unsigned int cpu) { … } static void hwlat_init_hotplug_support(void) { … } #else /* CONFIG_HOTPLUG_CPU */ static void hwlat_init_hotplug_support(void) { return; } #endif /* CONFIG_HOTPLUG_CPU */ /* * start_per_cpu_kthreads - Kick off the hardware latency sampling/detector kthreads * * This starts the kernel threads that will sit on potentially all cpus and * sample the CPU timestamp counter (TSC or similar) and look for potential * hardware latencies. */ static int start_per_cpu_kthreads(struct trace_array *tr) { … } static void *s_mode_start(struct seq_file *s, loff_t *pos) { … } static void *s_mode_next(struct seq_file *s, void *v, loff_t *pos) { … } static int s_mode_show(struct seq_file *s, void *v) { … } static void s_mode_stop(struct seq_file *s, void *v) { … } static const struct seq_operations thread_mode_seq_ops = …; static int hwlat_mode_open(struct inode *inode, struct file *file) { return seq_open(file, &thread_mode_seq_ops); }; static void hwlat_tracer_start(struct trace_array *tr); static void hwlat_tracer_stop(struct trace_array *tr); /** * hwlat_mode_write - Write function for "mode" entry * @filp: The active open file structure * @ubuf: The user buffer that contains the value to write * @cnt: The maximum number of bytes to write to "file" * @ppos: The current position in @file * * This function provides a write implementation for the "mode" interface * to the hardware latency detector. hwlatd has different operation modes. * The "none" sets the allowed cpumask for a single hwlatd thread at the * startup and lets the scheduler handle the migration. The default mode is * the "round-robin" one, in which a single hwlatd thread runs, migrating * among the allowed CPUs in a round-robin fashion. The "per-cpu" mode * creates one hwlatd thread per allowed CPU. */ static ssize_t hwlat_mode_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { … } /* * The width parameter is read/write using the generic trace_min_max_param * method. The *val is protected by the hwlat_data lock and is upper * bounded by the window parameter. */ static struct trace_min_max_param hwlat_width = …; /* * The window parameter is read/write using the generic trace_min_max_param * method. The *val is protected by the hwlat_data lock and is lower * bounded by the width parameter. */ static struct trace_min_max_param hwlat_window = …; static const struct file_operations thread_mode_fops = …; /** * init_tracefs - A function to initialize the tracefs interface files * * This function creates entries in tracefs for "hwlat_detector". * It creates the hwlat_detector directory in the tracing directory, * and within that directory is the count, width and window files to * change and view those values. */ static int init_tracefs(void) { … } static void hwlat_tracer_start(struct trace_array *tr) { … } static void hwlat_tracer_stop(struct trace_array *tr) { … } static int hwlat_tracer_init(struct trace_array *tr) { … } static void hwlat_tracer_reset(struct trace_array *tr) { … } static struct tracer hwlat_tracer __read_mostly = …; __init static int init_hwlat_tracer(void) { … } late_initcall(init_hwlat_tracer);