linux/kernel/events/uprobes.c

// SPDX-License-Identifier: GPL-2.0+
/*
 * User-space Probes (UProbes)
 *
 * Copyright (C) IBM Corporation, 2008-2012
 * Authors:
 *	Srikar Dronamraju
 *	Jim Keniston
 * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
 */

#include <linux/kernel.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>	/* read_mapping_page */
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/export.h>
#include <linux/rmap.h>		/* anon_vma_prepare */
#include <linux/mmu_notifier.h>
#include <linux/swap.h>		/* folio_free_swap */
#include <linux/ptrace.h>	/* user_enable_single_step */
#include <linux/kdebug.h>	/* notifier mechanism */
#include <linux/percpu-rwsem.h>
#include <linux/task_work.h>
#include <linux/shmem_fs.h>
#include <linux/khugepaged.h>

#include <linux/uprobes.h>

#define UINSNS_PER_PAGE
#define MAX_UPROBE_XOL_SLOTS

static struct rb_root uprobes_tree =;
/*
 * allows us to skip the uprobe_mmap if there are no uprobe events active
 * at this time.  Probably a fine grained per inode count is better?
 */
#define no_uprobe_events()

static DEFINE_RWLOCK(uprobes_treelock);	/* serialize rbtree access */
static seqcount_rwlock_t uprobes_seqcount =;

DEFINE_STATIC_SRCU();

#define UPROBES_HASH_SZ
/* serialize uprobe->pending_list */
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
#define uprobes_mmap_hash(v)

DEFINE_STATIC_PERCPU_RWSEM();

/* Have a copy of original instruction */
#define UPROBE_COPY_INSN

struct uprobe {};

struct delayed_uprobe {};

static DEFINE_MUTEX(delayed_uprobe_lock);
static LIST_HEAD(delayed_uprobe_list);

/*
 * Execute out of line area: anonymous executable mapping installed
 * by the probed task to execute the copy of the original instruction
 * mangled by set_swbp().
 *
 * On a breakpoint hit, thread contests for a slot.  It frees the
 * slot after singlestep. Currently a fixed number of slots are
 * allocated.
 */
struct xol_area {};

static void uprobe_warn(struct task_struct *t, const char *msg)
{}

/*
 * valid_vma: Verify if the specified vma is an executable vma
 * Relax restrictions while unregistering: vm_flags might have
 * changed after breakpoint was inserted.
 *	- is_register: indicates if we are in register context.
 *	- Return 1 if the specified virtual address is in an
 *	  executable vma.
 */
static bool valid_vma(struct vm_area_struct *vma, bool is_register)
{}

static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset)
{}

static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
{}

/**
 * __replace_page - replace page in vma by new page.
 * based on replace_page in mm/ksm.c
 *
 * @vma:      vma that holds the pte pointing to page
 * @addr:     address the old @page is mapped at
 * @old_page: the page we are replacing by new_page
 * @new_page: the modified page we replace page by
 *
 * If @new_page is NULL, only unmap @old_page.
 *
 * Returns 0 on success, negative error code otherwise.
 */
static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
				struct page *old_page, struct page *new_page)
{}

/**
 * is_swbp_insn - check if instruction is breakpoint instruction.
 * @insn: instruction to be checked.
 * Default implementation of is_swbp_insn
 * Returns true if @insn is a breakpoint instruction.
 */
bool __weak is_swbp_insn(uprobe_opcode_t *insn)
{}

/**
 * is_trap_insn - check if instruction is breakpoint instruction.
 * @insn: instruction to be checked.
 * Default implementation of is_trap_insn
 * Returns true if @insn is a breakpoint instruction.
 *
 * This function is needed for the case where an architecture has multiple
 * trap instructions (like powerpc).
 */
bool __weak is_trap_insn(uprobe_opcode_t *insn)
{}

static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len)
{}

static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len)
{}

static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode)
{}

static struct delayed_uprobe *
delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm)
{}

static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm)
{}

static void delayed_uprobe_delete(struct delayed_uprobe *du)
{}

static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm)
{}

static bool valid_ref_ctr_vma(struct uprobe *uprobe,
			      struct vm_area_struct *vma)
{}

static struct vm_area_struct *
find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm)
{}

static int
__update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
{}

static void update_ref_ctr_warn(struct uprobe *uprobe,
				struct mm_struct *mm, short d)
{}

static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
			  short d)
{}

/*
 * NOTE:
 * Expect the breakpoint instruction to be the smallest size instruction for
 * the architecture. If an arch has variable length instruction and the
 * breakpoint instruction is not of the smallest length instruction
 * supported by that architecture then we need to modify is_trap_at_addr and
 * uprobe_write_opcode accordingly. This would never be a problem for archs
 * that have fixed length instructions.
 *
 * uprobe_write_opcode - write the opcode at a given virtual address.
 * @auprobe: arch specific probepoint information.
 * @mm: the probed process address space.
 * @vaddr: the virtual address to store the opcode.
 * @opcode: opcode to be written at @vaddr.
 *
 * Called with mm->mmap_lock held for read or write.
 * Return 0 (success) or a negative errno.
 */
int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
			unsigned long vaddr, uprobe_opcode_t opcode)
{}

/**
 * set_swbp - store breakpoint at a given address.
 * @auprobe: arch specific probepoint information.
 * @mm: the probed process address space.
 * @vaddr: the virtual address to insert the opcode.
 *
 * For mm @mm, store the breakpoint instruction at @vaddr.
 * Return 0 (success) or a negative errno.
 */
int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
{}

/**
 * set_orig_insn - Restore the original instruction.
 * @mm: the probed process address space.
 * @auprobe: arch specific probepoint information.
 * @vaddr: the virtual address to insert the opcode.
 *
 * For mm @mm, restore the original opcode (opcode) at @vaddr.
 * Return 0 (success) or a negative errno.
 */
int __weak
set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
{}

/* uprobe should have guaranteed positive refcount */
static struct uprobe *get_uprobe(struct uprobe *uprobe)
{}

/*
 * uprobe should have guaranteed lifetime, which can be either of:
 *   - caller already has refcount taken (and wants an extra one);
 *   - uprobe is RCU protected and won't be freed until after grace period;
 *   - we are holding uprobes_treelock (for read or write, doesn't matter).
 */
static struct uprobe *try_get_uprobe(struct uprobe *uprobe)
{}

static inline bool uprobe_is_active(struct uprobe *uprobe)
{}

static void uprobe_free_rcu(struct rcu_head *rcu)
{}

static void put_uprobe(struct uprobe *uprobe)
{}

static __always_inline
int uprobe_cmp(const struct inode *l_inode, const loff_t l_offset,
	       const struct uprobe *r)
{}

#define __node_2_uprobe(node)

struct __uprobe_key {};

static inline int __uprobe_cmp_key(const void *key, const struct rb_node *b)
{}

static inline int __uprobe_cmp(struct rb_node *a, const struct rb_node *b)
{}

/*
 * Assumes being inside RCU protected region.
 * No refcount is taken on returned uprobe.
 */
static struct uprobe *find_uprobe_rcu(struct inode *inode, loff_t offset)
{}

/*
 * Attempt to insert a new uprobe into uprobes_tree.
 *
 * If uprobe already exists (for given inode+offset), we just increment
 * refcount of previously existing uprobe.
 *
 * If not, a provided new instance of uprobe is inserted into the tree (with
 * assumed initial refcount == 1).
 *
 * In any case, we return a uprobe instance that ends up being in uprobes_tree.
 * Caller has to clean up new uprobe instance, if it ended up not being
 * inserted into the tree.
 *
 * We assume that uprobes_treelock is held for writing.
 */
static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
{}

/*
 * Acquire uprobes_treelock and insert uprobe into uprobes_tree
 * (or reuse existing one, see __insert_uprobe() comments above).
 */
static struct uprobe *insert_uprobe(struct uprobe *uprobe)
{}

static void
ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe)
{}

static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
				   loff_t ref_ctr_offset)
{}

static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
{}

/*
 * For uprobe @uprobe, delete the consumer @uc.
 * Should never be called with consumer that's not part of @uprobe->consumers.
 */
static void consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
{}

static int __copy_insn(struct address_space *mapping, struct file *filp,
			void *insn, int nbytes, loff_t offset)
{}

static int copy_insn(struct uprobe *uprobe, struct file *filp)
{}

static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
				struct mm_struct *mm, unsigned long vaddr)
{}

static inline bool consumer_filter(struct uprobe_consumer *uc, struct mm_struct *mm)
{}

static bool filter_chain(struct uprobe *uprobe, struct mm_struct *mm)
{}

static int
install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
			struct vm_area_struct *vma, unsigned long vaddr)
{}

static int
remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
{}

struct map_info {};

static inline struct map_info *free_map_info(struct map_info *info)
{}

static struct map_info *
build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
{}

static int
register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
{}

/**
 * uprobe_unregister_nosync - unregister an already registered probe.
 * @uprobe: uprobe to remove
 * @uc: identify which probe if multiple probes are colocated.
 */
void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc)
{}
EXPORT_SYMBOL_GPL();

void uprobe_unregister_sync(void)
{}
EXPORT_SYMBOL_GPL();

/**
 * uprobe_register - register a probe
 * @inode: the file in which the probe has to be placed.
 * @offset: offset from the start of the file.
 * @ref_ctr_offset: offset of SDT marker / reference counter
 * @uc: information on howto handle the probe..
 *
 * Apart from the access refcount, uprobe_register() takes a creation
 * refcount (thro alloc_uprobe) if and only if this @uprobe is getting
 * inserted into the rbtree (i.e first consumer for a @inode:@offset
 * tuple).  Creation refcount stops uprobe_unregister from freeing the
 * @uprobe even before the register operation is complete. Creation
 * refcount is released when the last @uc for the @uprobe
 * unregisters. Caller of uprobe_register() is required to keep @inode
 * (and the containing mount) referenced.
 *
 * Return: pointer to the new uprobe on success or an ERR_PTR on failure.
 */
struct uprobe *uprobe_register(struct inode *inode,
				loff_t offset, loff_t ref_ctr_offset,
				struct uprobe_consumer *uc)
{}
EXPORT_SYMBOL_GPL();

/**
 * uprobe_apply - add or remove the breakpoints according to @uc->filter
 * @uprobe: uprobe which "owns" the breakpoint
 * @uc: consumer which wants to add more or remove some breakpoints
 * @add: add or remove the breakpoints
 * Return: 0 on success or negative error code.
 */
int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add)
{}

static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
{}

static struct rb_node *
find_node_in_range(struct inode *inode, loff_t min, loff_t max)
{}

/*
 * For a given range in vma, build a list of probes that need to be inserted.
 */
static void build_probe_list(struct inode *inode,
				struct vm_area_struct *vma,
				unsigned long start, unsigned long end,
				struct list_head *head)
{}

/* @vma contains reference counter, not the probed instruction. */
static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
{}

/*
 * Called from mmap_region/vma_merge with mm->mmap_lock acquired.
 *
 * Currently we ignore all errors and always return 0, the callers
 * can't handle the failure anyway.
 */
int uprobe_mmap(struct vm_area_struct *vma)
{}

static bool
vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{}

/*
 * Called in context of a munmap of a vma.
 */
void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{}

static vm_fault_t xol_fault(const struct vm_special_mapping *sm,
			    struct vm_area_struct *vma, struct vm_fault *vmf)
{}

static const struct vm_special_mapping xol_mapping =;

/* Slot allocation for XOL */
static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
{}

void * __weak arch_uprobe_trampoline(unsigned long *psize)
{}

static struct xol_area *__create_xol_area(unsigned long vaddr)
{}

/*
 * get_xol_area - Allocate process's xol_area if necessary.
 * This area will be used for storing instructions for execution out of line.
 *
 * Returns the allocated area or NULL.
 */
static struct xol_area *get_xol_area(void)
{}

/*
 * uprobe_clear_state - Free the area allocated for slots.
 */
void uprobe_clear_state(struct mm_struct *mm)
{}

void uprobe_start_dup_mmap(void)
{}

void uprobe_end_dup_mmap(void)
{}

void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
{}

/*
 *  - search for a free slot.
 */
static unsigned long xol_take_insn_slot(struct xol_area *area)
{}

/*
 * xol_get_insn_slot - allocate a slot for xol.
 * Returns the allocated slot address or 0.
 */
static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
{}

/*
 * xol_free_insn_slot - If slot was earlier allocated by
 * @xol_get_insn_slot(), make the slot available for
 * subsequent requests.
 */
static void xol_free_insn_slot(struct task_struct *tsk)
{}

void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
				  void *src, unsigned long len)
{}

/**
 * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs
 * @regs: Reflects the saved state of the task after it has hit a breakpoint
 * instruction.
 * Return the address of the breakpoint instruction.
 */
unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
{}

unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
{}

static struct return_instance *free_ret_instance(struct return_instance *ri)
{}

/*
 * Called with no locks held.
 * Called in context of an exiting or an exec-ing thread.
 */
void uprobe_free_utask(struct task_struct *t)
{}

/*
 * Allocate a uprobe_task object for the task if necessary.
 * Called when the thread hits a breakpoint.
 *
 * Returns:
 * - pointer to new uprobe_task on success
 * - NULL otherwise
 */
static struct uprobe_task *get_utask(void)
{}

static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
{}

static void dup_xol_work(struct callback_head *work)
{}

/*
 * Called in context of a new clone/fork from copy_process.
 */
void uprobe_copy_process(struct task_struct *t, unsigned long flags)
{}

/*
 * Current area->vaddr notion assume the trampoline address is always
 * equal area->vaddr.
 *
 * Returns -1 in case the xol_area is not allocated.
 */
unsigned long uprobe_get_trampoline_vaddr(void)
{}

static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
					struct pt_regs *regs)
{}

static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
{}

/* Prepare to single-step probed instruction out of line. */
static int
pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
{}

/*
 * If we are singlestepping, then ensure this thread is not connected to
 * non-fatal signals until completion of singlestep.  When xol insn itself
 * triggers the signal,  restart the original insn even if the task is
 * already SIGKILL'ed (since coredump should report the correct ip).  This
 * is even more important if the task has a handler for SIGSEGV/etc, The
 * _same_ instruction should be repeated again after return from the signal
 * handler, and SSTEP can never finish in this case.
 */
bool uprobe_deny_signal(void)
{}

static void mmf_recalc_uprobes(struct mm_struct *mm)
{}

static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
{}

/* assumes being inside RCU protected region */
static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swbp)
{}

static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
{}

static void
handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
{}

static struct return_instance *find_next_ret_chain(struct return_instance *ri)
{}

void uprobe_handle_trampoline(struct pt_regs *regs)
{}

bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs)
{}

bool __weak arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
					struct pt_regs *regs)
{}

/*
 * Run handler and ask thread to singlestep.
 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
 */
static void handle_swbp(struct pt_regs *regs)
{}

/*
 * Perform required fix-ups and disable singlestep.
 * Allow pending signals to take effect.
 */
static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
{}

/*
 * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag and
 * allows the thread to return from interrupt. After that handle_swbp()
 * sets utask->active_uprobe.
 *
 * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag
 * and allows the thread to return from interrupt.
 *
 * While returning to userspace, thread notices the TIF_UPROBE flag and calls
 * uprobe_notify_resume().
 */
void uprobe_notify_resume(struct pt_regs *regs)
{}

/*
 * uprobe_pre_sstep_notifier gets called from interrupt context as part of
 * notifier mechanism. Set TIF_UPROBE flag and indicate breakpoint hit.
 */
int uprobe_pre_sstep_notifier(struct pt_regs *regs)
{}

/*
 * uprobe_post_sstep_notifier gets called in interrupt context as part of notifier
 * mechanism. Set TIF_UPROBE flag and indicate completion of singlestep.
 */
int uprobe_post_sstep_notifier(struct pt_regs *regs)
{}

static struct notifier_block uprobe_exception_nb =;

void __init uprobes_init(void)
{}