/* SPDX-License-Identifier: GPL-2.0+ */ /* * RCU expedited grace periods * * Copyright IBM Corporation, 2016 * * Authors: Paul E. McKenney <[email protected]> */ #include <linux/lockdep.h> static void rcu_exp_handler(void *unused); static int rcu_print_task_exp_stall(struct rcu_node *rnp); static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp); /* * Record the start of an expedited grace period. */ static void rcu_exp_gp_seq_start(void) { … } /* * Return the value that the expedited-grace-period counter will have * at the end of the current grace period. */ static __maybe_unused unsigned long rcu_exp_gp_seq_endval(void) { … } /* * Record the end of an expedited grace period. */ static void rcu_exp_gp_seq_end(void) { … } /* * Take a snapshot of the expedited-grace-period counter, which is the * earliest value that will indicate that a full grace period has * elapsed since the current time. */ static unsigned long rcu_exp_gp_seq_snap(void) { … } /* * Given a counter snapshot from rcu_exp_gp_seq_snap(), return true * if a full expedited grace period has elapsed since that snapshot * was taken. */ static bool rcu_exp_gp_seq_done(unsigned long s) { … } /* * Reset the ->expmaskinit values in the rcu_node tree to reflect any * recent CPU-online activity. Note that these masks are not cleared * when CPUs go offline, so they reflect the union of all CPUs that have * ever been online. This means that this function normally takes its * no-work-to-do fastpath. */ static void sync_exp_reset_tree_hotplug(void) { … } /* * Reset the ->expmask values in the rcu_node tree in preparation for * a new expedited grace period. */ static void __maybe_unused sync_exp_reset_tree(void) { … } /* * Return non-zero if there is no RCU expedited grace period in progress * for the specified rcu_node structure, in other words, if all CPUs and * tasks covered by the specified rcu_node structure have done their bit * for the current expedited grace period. */ static bool sync_rcu_exp_done(struct rcu_node *rnp) { … } /* * Like sync_rcu_exp_done(), but where the caller does not hold the * rcu_node's ->lock. */ static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp) { … } /* * Report the exit from RCU read-side critical section for the last task * that queued itself during or before the current expedited preemptible-RCU * grace period. This event is reported either to the rcu_node structure on * which the task was queued or to one of that rcu_node structure's ancestors, * recursively up the tree. (Calm down, calm down, we do the recursion * iteratively!) */ static void __rcu_report_exp_rnp(struct rcu_node *rnp, bool wake, unsigned long flags) __releases(rnp->lock) { … } /* * Report expedited quiescent state for specified node. This is a * lock-acquisition wrapper function for __rcu_report_exp_rnp(). */ static void __maybe_unused rcu_report_exp_rnp(struct rcu_node *rnp, bool wake) { … } /* * Report expedited quiescent state for multiple CPUs, all covered by the * specified leaf rcu_node structure. */ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, unsigned long mask, bool wake) { … } /* * Report expedited quiescent state for specified rcu_data (CPU). */ static void rcu_report_exp_rdp(struct rcu_data *rdp) { … } /* Common code for work-done checking. */ static bool sync_exp_work_done(unsigned long s) { … } /* * Funnel-lock acquisition for expedited grace periods. Returns true * if some other task completed an expedited grace period that this task * can piggy-back on, and with no mutex held. Otherwise, returns false * with the mutex held, indicating that the caller must actually do the * expedited grace period. */ static bool exp_funnel_lock(unsigned long s) { … } /* * Select the CPUs within the specified rcu_node that the upcoming * expedited grace period needs to wait for. */ static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp) { … } static void rcu_exp_sel_wait_wake(unsigned long s); static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp) { … } static inline bool rcu_exp_worker_started(void) { … } static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp) { … } static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp) { … } static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp) { … } /* * Work-queue handler to drive an expedited grace period forward. */ static void wait_rcu_exp_gp(struct kthread_work *wp) { … } static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew) { … } /* * Select the nodes that the upcoming expedited grace period needs * to wait for. */ static void sync_rcu_exp_select_cpus(void) { … } /* * Wait for the expedited grace period to elapse, within time limit. * If the time limit is exceeded without the grace period elapsing, * return false, otherwise return true. */ static bool synchronize_rcu_expedited_wait_once(long tlimit) { … } /* * Wait for the expedited grace period to elapse, issuing any needed * RCU CPU stall warnings along the way. */ static void synchronize_rcu_expedited_wait(void) { … } /* * Wait for the current expedited grace period to complete, and then * wake up everyone who piggybacked on the just-completed expedited * grace period. Also update all the ->exp_seq_rq counters as needed * in order to avoid counter-wrap problems. */ static void rcu_exp_wait_wake(unsigned long s) { … } /* * Common code to drive an expedited grace period forward, used by * workqueues and mid-boot-time tasks. */ static void rcu_exp_sel_wait_wake(unsigned long s) { … } #ifdef CONFIG_PREEMPT_RCU /* * Remote handler for smp_call_function_single(). If there is an * RCU read-side critical section in effect, request that the * next rcu_read_unlock() record the quiescent state up the * ->expmask fields in the rcu_node tree. Otherwise, immediately * report the quiescent state. */ static void rcu_exp_handler(void *unused) { … } /* PREEMPTION=y, so no PREEMPTION=n expedited grace period to clean up after. */ static void sync_sched_exp_online_cleanup(int cpu) { … } /* * Scan the current list of tasks blocked within RCU read-side critical * sections, printing out the tid of each that is blocking the current * expedited grace period. */ static int rcu_print_task_exp_stall(struct rcu_node *rnp) { … } /* * Scan the current list of tasks blocked within RCU read-side critical * sections, dumping the stack of each that is blocking the current * expedited grace period. */ static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp) { … } #else /* #ifdef CONFIG_PREEMPT_RCU */ /* Request an expedited quiescent state. */ static void rcu_exp_need_qs(void) { __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true); /* Store .exp before .rcu_urgent_qs. */ smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true); set_tsk_need_resched(current); set_preempt_need_resched(); } /* Invoked on each online non-idle CPU for expedited quiescent state. */ static void rcu_exp_handler(void *unused) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); struct rcu_node *rnp = rdp->mynode; bool preempt_bh_enabled = !(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)); if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) return; if (rcu_is_cpu_rrupt_from_idle() || (IS_ENABLED(CONFIG_PREEMPT_COUNT) && preempt_bh_enabled)) { rcu_report_exp_rdp(this_cpu_ptr(&rcu_data)); return; } rcu_exp_need_qs(); } /* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */ static void sync_sched_exp_online_cleanup(int cpu) { unsigned long flags; int my_cpu; struct rcu_data *rdp; int ret; struct rcu_node *rnp; rdp = per_cpu_ptr(&rcu_data, cpu); rnp = rdp->mynode; my_cpu = get_cpu(); /* Quiescent state either not needed or already requested, leave. */ if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || READ_ONCE(rdp->cpu_no_qs.b.exp)) { put_cpu(); return; } /* Quiescent state needed on current CPU, so set it up locally. */ if (my_cpu == cpu) { local_irq_save(flags); rcu_exp_need_qs(); local_irq_restore(flags); put_cpu(); return; } /* Quiescent state needed on some other CPU, send IPI. */ ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0); put_cpu(); WARN_ON_ONCE(ret); } /* * Because preemptible RCU does not exist, we never have to check for * tasks blocked within RCU read-side critical sections that are * blocking the current expedited grace period. */ static int rcu_print_task_exp_stall(struct rcu_node *rnp) { return 0; } /* * Because preemptible RCU does not exist, we never have to print out * tasks blocked within RCU read-side critical sections that are blocking * the current expedited grace period. */ static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp) { } #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /** * synchronize_rcu_expedited - Brute-force RCU grace period * * Wait for an RCU grace period, but expedite it. The basic idea is to * IPI all non-idle non-nohz online CPUs. The IPI handler checks whether * the CPU is in an RCU critical section, and if so, it sets a flag that * causes the outermost rcu_read_unlock() to report the quiescent state * for RCU-preempt or asks the scheduler for help for RCU-sched. On the * other hand, if the CPU is not in an RCU read-side critical section, * the IPI handler reports the quiescent state immediately. * * Although this is a great improvement over previous expedited * implementations, it is still unfriendly to real-time workloads, so is * thus not recommended for any sort of common-case code. In fact, if * you are using synchronize_rcu_expedited() in a loop, please restructure * your code to batch your updates, and then use a single synchronize_rcu() * instead. * * This has the same semantics as (but is more brutal than) synchronize_rcu(). */ void synchronize_rcu_expedited(void) { … } EXPORT_SYMBOL_GPL(…); /* * Ensure that start_poll_synchronize_rcu_expedited() has the expedited * RCU grace periods that it needs. */ static void sync_rcu_do_polled_gp(struct work_struct *wp) { … } /** * start_poll_synchronize_rcu_expedited - Snapshot current RCU state and start expedited grace period * * Returns a cookie to pass to a call to cond_synchronize_rcu(), * cond_synchronize_rcu_expedited(), or poll_state_synchronize_rcu(), * allowing them to determine whether or not any sort of grace period has * elapsed in the meantime. If the needed expedited grace period is not * already slated to start, initiates that grace period. */ unsigned long start_poll_synchronize_rcu_expedited(void) { … } EXPORT_SYMBOL_GPL(…); /** * start_poll_synchronize_rcu_expedited_full - Take a full snapshot and start expedited grace period * @rgosp: Place to put snapshot of grace-period state * * Places the normal and expedited grace-period states in rgosp. This * state value can be passed to a later call to cond_synchronize_rcu_full() * or poll_state_synchronize_rcu_full() to determine whether or not a * grace period (whether normal or expedited) has elapsed in the meantime. * If the needed expedited grace period is not already slated to start, * initiates that grace period. */ void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) { … } EXPORT_SYMBOL_GPL(…); /** * cond_synchronize_rcu_expedited - Conditionally wait for an expedited RCU grace period * * @oldstate: value from get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or start_poll_synchronize_rcu_expedited() * * If any type of full RCU grace period has elapsed since the earlier * call to get_state_synchronize_rcu(), start_poll_synchronize_rcu(), * or start_poll_synchronize_rcu_expedited(), just return. Otherwise, * invoke synchronize_rcu_expedited() to wait for a full grace period. * * Yes, this function does not take counter wrap into account. * But counter wrap is harmless. If the counter wraps, we have waited for * more than 2 billion grace periods (and way more on a 64-bit system!), * so waiting for a couple of additional grace periods should be just fine. * * This function provides the same memory-ordering guarantees that * would be provided by a synchronize_rcu() that was invoked at the call * to the function that provided @oldstate and that returned at the end * of this function. */ void cond_synchronize_rcu_expedited(unsigned long oldstate) { … } EXPORT_SYMBOL_GPL(…); /** * cond_synchronize_rcu_expedited_full - Conditionally wait for an expedited RCU grace period * @rgosp: value from get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), or start_poll_synchronize_rcu_expedited_full() * * If a full RCU grace period has elapsed since the call to * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), * or start_poll_synchronize_rcu_expedited_full() from which @rgosp was * obtained, just return. Otherwise, invoke synchronize_rcu_expedited() * to wait for a full grace period. * * Yes, this function does not take counter wrap into account. * But counter wrap is harmless. If the counter wraps, we have waited for * more than 2 billion grace periods (and way more on a 64-bit system!), * so waiting for a couple of additional grace periods should be just fine. * * This function provides the same memory-ordering guarantees that * would be provided by a synchronize_rcu() that was invoked at the call * to the function that provided @rgosp and that returned at the end of * this function. */ void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) { … } EXPORT_SYMBOL_GPL(…);