// SPDX-License-Identifier: GPL-2.0-or-later /* * Debugging module statistics. * * Copyright (C) 2023 Luis Chamberlain <[email protected]> */ #include <linux/module.h> #include <uapi/linux/module.h> #include <linux/string.h> #include <linux/printk.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/debugfs.h> #include <linux/rculist.h> #include <linux/math.h> #include "internal.h" /** * DOC: module debugging statistics overview * * Enabling CONFIG_MODULE_STATS enables module debugging statistics which * are useful to monitor and root cause memory pressure issues with module * loading. These statistics are useful to allow us to improve production * workloads. * * The current module debugging statistics supported help keep track of module * loading failures to enable improvements either for kernel module auto-loading * usage (request_module()) or interactions with userspace. Statistics are * provided to track all possible failures in the finit_module() path and memory * wasted in this process space. Each of the failure counters are associated * to a type of module loading failure which is known to incur a certain amount * of memory allocation loss. In the worst case loading a module will fail after * a 3 step memory allocation process: * * a) memory allocated with kernel_read_file_from_fd() * b) module decompression processes the file read from * kernel_read_file_from_fd(), and vmap() is used to map * the decompressed module to a new local buffer which represents * a copy of the decompressed module passed from userspace. The buffer * from kernel_read_file_from_fd() is freed right away. * c) layout_and_allocate() allocates space for the final resting * place where we would keep the module if it were to be processed * successfully. * * If a failure occurs after these three different allocations only one * counter will be incremented with the summation of the allocated bytes freed * incurred during this failure. Likewise, if module loading failed only after * step b) a separate counter is used and incremented for the bytes freed and * not used during both of those allocations. * * Virtual memory space can be limited, for example on x86 virtual memory size * defaults to 128 MiB. We should strive to limit and avoid wasting virtual * memory allocations when possible. These module debugging statistics help * to evaluate how much memory is being wasted on bootup due to module loading * failures. * * All counters are designed to be incremental. Atomic counters are used so to * remain simple and avoid delays and deadlocks. */ /** * DOC: dup_failed_modules - tracks duplicate failed modules * * Linked list of modules which failed to be loaded because an already existing * module with the same name was already being processed or already loaded. * The finit_module() system call incurs heavy virtual memory allocations. In * the worst case an finit_module() system call can end up allocating virtual * memory 3 times: * * 1) kernel_read_file_from_fd() call uses vmalloc() * 2) optional module decompression uses vmap() * 3) layout_and allocate() can use vzalloc() or an arch specific variation of * vmalloc to deal with ELF sections requiring special permissions * * In practice on a typical boot today most finit_module() calls fail due to * the module with the same name already being loaded or about to be processed. * All virtual memory allocated to these failed modules will be freed with * no functional use. * * To help with this the dup_failed_modules allows us to track modules which * failed to load due to the fact that a module was already loaded or being * processed. There are only two points at which we can fail such calls, * we list them below along with the number of virtual memory allocation * calls: * * a) FAIL_DUP_MOD_BECOMING: at the end of early_mod_check() before * layout_and_allocate(). * - with module decompression: 2 virtual memory allocation calls * - without module decompression: 1 virtual memory allocation calls * b) FAIL_DUP_MOD_LOAD: after layout_and_allocate() on add_unformed_module() * - with module decompression 3 virtual memory allocation calls * - without module decompression 2 virtual memory allocation calls * * We should strive to get this list to be as small as possible. If this list * is not empty it is a reflection of possible work or optimizations possible * either in-kernel or in userspace. */ static LIST_HEAD(dup_failed_modules); /** * DOC: module statistics debugfs counters * * The total amount of wasted virtual memory allocation space during module * loading can be computed by adding the total from the summation: * * * @invalid_kread_bytes + * @invalid_decompress_bytes + * @invalid_becoming_bytes + * @invalid_mod_bytes * * The following debugfs counters are available to inspect module loading * failures: * * * total_mod_size: total bytes ever used by all modules we've dealt with on * this system * * total_text_size: total bytes of the .text and .init.text ELF section * sizes we've dealt with on this system * * invalid_kread_bytes: bytes allocated and then freed on failures which * happen due to the initial kernel_read_file_from_fd(). kernel_read_file_from_fd() * uses vmalloc(). These should typically not happen unless your system is * under memory pressure. * * invalid_decompress_bytes: number of bytes allocated and freed due to * memory allocations in the module decompression path that use vmap(). * These typically should not happen unless your system is under memory * pressure. * * invalid_becoming_bytes: total number of bytes allocated and freed used * to read the kernel module userspace wants us to read before we * promote it to be processed to be added to our @modules linked list. These * failures can happen if we had a check in between a successful kernel_read_file_from_fd() * call and right before we allocate the our private memory for the module * which would be kept if the module is successfully loaded. The most common * reason for this failure is when userspace is racing to load a module * which it does not yet see loaded. The first module to succeed in * add_unformed_module() will add a module to our &modules list and * subsequent loads of modules with the same name will error out at the * end of early_mod_check(). The check for module_patient_check_exists() * at the end of early_mod_check() prevents duplicate allocations * on layout_and_allocate() for modules already being processed. These * duplicate failed modules are non-fatal, however they typically are * indicative of userspace not seeing a module in userspace loaded yet and * unnecessarily trying to load a module before the kernel even has a chance * to begin to process prior requests. Although duplicate failures can be * non-fatal, we should try to reduce vmalloc() pressure proactively, so * ideally after boot this will be close to as 0 as possible. If module * decompression was used we also add to this counter the cost of the * initial kernel_read_file_from_fd() of the compressed module. If module * decompression was not used the value represents the total allocated and * freed bytes in kernel_read_file_from_fd() calls for these type of * failures. These failures can occur because: * * * module_sig_check() - module signature checks * * elf_validity_cache_copy() - some ELF validation issue * * early_mod_check(): * * * blacklisting * * failed to rewrite section headers * * version magic * * live patch requirements didn't check out * * the module was detected as being already present * * * invalid_mod_bytes: these are the total number of bytes allocated and * freed due to failures after we did all the sanity checks of the module * which userspace passed to us and after our first check that the module * is unique. A module can still fail to load if we detect the module is * loaded after we allocate space for it with layout_and_allocate(), we do * this check right before processing the module as live and run its * initialization routines. Note that you have a failure of this type it * also means the respective kernel_read_file_from_fd() memory space was * also freed and not used, and so we increment this counter with twice * the size of the module. Additionally if you used module decompression * the size of the compressed module is also added to this counter. * * * modcount: how many modules we've loaded in our kernel life time * * failed_kreads: how many modules failed due to failed kernel_read_file_from_fd() * * failed_decompress: how many failed module decompression attempts we've had. * These really should not happen unless your compression / decompression * might be broken. * * failed_becoming: how many modules failed after we kernel_read_file_from_fd() * it and before we allocate memory for it with layout_and_allocate(). This * counter is never incremented if you manage to validate the module and * call layout_and_allocate() for it. * * failed_load_modules: how many modules failed once we've allocated our * private space for our module using layout_and_allocate(). These failures * should hopefully mostly be dealt with already. Races in theory could * still exist here, but it would just mean the kernel had started processing * two threads concurrently up to early_mod_check() and one thread won. * These failures are good signs the kernel or userspace is doing something * seriously stupid or that could be improved. We should strive to fix these, * but it is perhaps not easy to fix them. A recent example are the modules * requests incurred for frequency modules, a separate module request was * being issued for each CPU on a system. */ atomic_long_t total_mod_size; atomic_long_t total_text_size; atomic_long_t invalid_kread_bytes; atomic_long_t invalid_decompress_bytes; static atomic_long_t invalid_becoming_bytes; static atomic_long_t invalid_mod_bytes; atomic_t modcount; atomic_t failed_kreads; atomic_t failed_decompress; static atomic_t failed_becoming; static atomic_t failed_load_modules; static const char *mod_fail_to_str(struct mod_fail_load *mod_fail) { … } void mod_stat_bump_invalid(struct load_info *info, int flags) { … } void mod_stat_bump_becoming(struct load_info *info, int flags) { … } int try_add_failed_module(const char *name, enum fail_dup_mod_reason reason) { … } /* * At 64 bytes per module and assuming a 1024 bytes preamble we can fit the * 112 module prints within 8k. * * 1024 + (64*112) = 8k */ #define MAX_PREAMBLE … #define MAX_FAILED_MOD_PRINT … #define MAX_BYTES_PER_MOD … static ssize_t read_file_mod_stats(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { … } #undef MAX_PREAMBLE #undef MAX_FAILED_MOD_PRINT #undef MAX_BYTES_PER_MOD static const struct file_operations fops_mod_stats = …; #define mod_debug_add_ulong … #define mod_debug_add_atomic … static int __init module_stats_init(void) { … } #undef mod_debug_add_ulong #undef mod_debug_add_atomic module_init(…) …;