// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008 Steven Rostedt <[email protected]> * */ #include <linux/sched/task_stack.h> #include <linux/stacktrace.h> #include <linux/security.h> #include <linux/kallsyms.h> #include <linux/seq_file.h> #include <linux/spinlock.h> #include <linux/uaccess.h> #include <linux/ftrace.h> #include <linux/module.h> #include <linux/sysctl.h> #include <linux/init.h> #include <asm/setup.h> #include "trace.h" #define STACK_TRACE_ENTRIES … static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES]; static unsigned stack_trace_index[STACK_TRACE_ENTRIES]; static unsigned int stack_trace_nr_entries; static unsigned long stack_trace_max_size; static arch_spinlock_t stack_trace_max_lock = …; DEFINE_PER_CPU(int, disable_stack_tracer); static DEFINE_MUTEX(stack_sysctl_mutex); int stack_tracer_enabled; static void print_max_stack(void) { … } /* * The stack tracer looks for a maximum stack at each call from a function. It * registers a callback from ftrace, and in that callback it examines the stack * size. It determines the stack size from the variable passed in, which is the * address of a local variable in the stack_trace_call() callback function. * The stack size is calculated by the address of the local variable to the top * of the current stack. If that size is smaller than the currently saved max * stack size, nothing more is done. * * If the size of the stack is greater than the maximum recorded size, then the * following algorithm takes place. * * For architectures (like x86) that store the function's return address before * saving the function's local variables, the stack will look something like * this: * * [ top of stack ] * 0: sys call entry frame * 10: return addr to entry code * 11: start of sys_foo frame * 20: return addr to sys_foo * 21: start of kernel_func_bar frame * 30: return addr to kernel_func_bar * 31: [ do trace stack here ] * * The save_stack_trace() is called returning all the functions it finds in the * current stack. Which would be (from the bottom of the stack to the top): * * return addr to kernel_func_bar * return addr to sys_foo * return addr to entry code * * Now to figure out how much each of these functions' local variable size is, * a search of the stack is made to find these values. When a match is made, it * is added to the stack_dump_trace[] array. The offset into the stack is saved * in the stack_trace_index[] array. The above example would show: * * stack_dump_trace[] | stack_trace_index[] * ------------------ + ------------------- * return addr to kernel_func_bar | 30 * return addr to sys_foo | 20 * return addr to entry | 10 * * The print_max_stack() function above, uses these values to print the size of * each function's portion of the stack. * * for (i = 0; i < nr_entries; i++) { * size = i == nr_entries - 1 ? stack_trace_index[i] : * stack_trace_index[i] - stack_trace_index[i+1] * print "%d %d %d %s\n", i, stack_trace_index[i], size, stack_dump_trace[i]); * } * * The above shows * * depth size location * ----- ---- -------- * 0 30 10 kernel_func_bar * 1 20 10 sys_foo * 2 10 10 entry code * * Now for architectures that might save the return address after the functions * local variables (saving the link register before calling nested functions), * this will cause the stack to look a little different: * * [ top of stack ] * 0: sys call entry frame * 10: start of sys_foo_frame * 19: return addr to entry code << lr saved before calling kernel_func_bar * 20: start of kernel_func_bar frame * 29: return addr to sys_foo_frame << lr saved before calling next function * 30: [ do trace stack here ] * * Although the functions returned by save_stack_trace() may be the same, the * placement in the stack will be different. Using the same algorithm as above * would yield: * * stack_dump_trace[] | stack_trace_index[] * ------------------ + ------------------- * return addr to kernel_func_bar | 30 * return addr to sys_foo | 29 * return addr to entry | 19 * * Where the mapping is off by one: * * kernel_func_bar stack frame size is 29 - 19 not 30 - 29! * * To fix this, if the architecture sets ARCH_RET_ADDR_AFTER_LOCAL_VARS the * values in stack_trace_index[] are shifted by one to and the number of * stack trace entries is decremented by one. * * stack_dump_trace[] | stack_trace_index[] * ------------------ + ------------------- * return addr to kernel_func_bar | 29 * return addr to sys_foo | 19 * * Although the entry function is not displayed, the first function (sys_foo) * will still include the stack size of it. */ static void check_stack(unsigned long ip, unsigned long *stack) { … } /* Some archs may not define MCOUNT_INSN_SIZE */ #ifndef MCOUNT_INSN_SIZE #define MCOUNT_INSN_SIZE … #endif static void stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { … } static struct ftrace_ops trace_ops __read_mostly = …; static ssize_t stack_max_size_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { … } static ssize_t stack_max_size_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *ppos) { … } static const struct file_operations stack_max_size_fops = …; static void * __next(struct seq_file *m, loff_t *pos) { … } static void * t_next(struct seq_file *m, void *v, loff_t *pos) { … } static void *t_start(struct seq_file *m, loff_t *pos) { … } static void t_stop(struct seq_file *m, void *p) { … } static void trace_lookup_stack(struct seq_file *m, long i) { … } static void print_disabled(struct seq_file *m) { … } static int t_show(struct seq_file *m, void *v) { … } static const struct seq_operations stack_trace_seq_ops = …; static int stack_trace_open(struct inode *inode, struct file *file) { … } static const struct file_operations stack_trace_fops = …; #ifdef CONFIG_DYNAMIC_FTRACE static int stack_trace_filter_open(struct inode *inode, struct file *file) { … } static const struct file_operations stack_trace_filter_fops = …; #endif /* CONFIG_DYNAMIC_FTRACE */ int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { … } static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata; static __init int enable_stacktrace(char *str) { … } __setup(…); static __init int stack_trace_init(void) { … } device_initcall(stack_trace_init);