tree_exp.h | Explore in Territory

/* SPDX-License-Identifier: GPL-2.0+ */
/*
 * RCU expedited grace periods
 *
 * Copyright IBM Corporation, 2016
 *
 * Authors: Paul E. McKenney <[email protected]>
 */

#include <linux/lockdep.h>

static void rcu_exp_handler(void *unused);
static int rcu_print_task_exp_stall(struct rcu_node *rnp);
static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp);

/*
 * Record the start of an expedited grace period.
 */
static void rcu_exp_gp_seq_start(void)
{ … }

/*
 * Return the value that the expedited-grace-period counter will have
 * at the end of the current grace period.
 */
static __maybe_unused unsigned long rcu_exp_gp_seq_endval(void)
{ … }

/*
 * Record the end of an expedited grace period.
 */
static void rcu_exp_gp_seq_end(void)
{ … }

/*
 * Take a snapshot of the expedited-grace-period counter, which is the
 * earliest value that will indicate that a full grace period has
 * elapsed since the current time.
 */
static unsigned long rcu_exp_gp_seq_snap(void)
{ … }

/*
 * Given a counter snapshot from rcu_exp_gp_seq_snap(), return true
 * if a full expedited grace period has elapsed since that snapshot
 * was taken.
 */
static bool rcu_exp_gp_seq_done(unsigned long s)
{ … }

/*
 * Reset the ->expmaskinit values in the rcu_node tree to reflect any
 * recent CPU-online activity.  Note that these masks are not cleared
 * when CPUs go offline, so they reflect the union of all CPUs that have
 * ever been online.  This means that this function normally takes its
 * no-work-to-do fastpath.
 */
static void sync_exp_reset_tree_hotplug(void)
{ … }

/*
 * Reset the ->expmask values in the rcu_node tree in preparation for
 * a new expedited grace period.
 */
static void __maybe_unused sync_exp_reset_tree(void)
{ … }

/*
 * Return non-zero if there is no RCU expedited grace period in progress
 * for the specified rcu_node structure, in other words, if all CPUs and
 * tasks covered by the specified rcu_node structure have done their bit
 * for the current expedited grace period.
 */
static bool sync_rcu_exp_done(struct rcu_node *rnp)
{ … }

/*
 * Like sync_rcu_exp_done(), but where the caller does not hold the
 * rcu_node's ->lock.
 */
static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp)
{ … }

/*
 * Report the exit from RCU read-side critical section for the last task
 * that queued itself during or before the current expedited preemptible-RCU
 * grace period.  This event is reported either to the rcu_node structure on
 * which the task was queued or to one of that rcu_node structure's ancestors,
 * recursively up the tree.  (Calm down, calm down, we do the recursion
 * iteratively!)
 */
static void __rcu_report_exp_rnp(struct rcu_node *rnp,
				 bool wake, unsigned long flags)
	__releases(rnp->lock)
{ … }

/*
 * Report expedited quiescent state for specified node.  This is a
 * lock-acquisition wrapper function for __rcu_report_exp_rnp().
 */
static void __maybe_unused rcu_report_exp_rnp(struct rcu_node *rnp, bool wake)
{ … }

/*
 * Report expedited quiescent state for multiple CPUs, all covered by the
 * specified leaf rcu_node structure.
 */
static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
				    unsigned long mask, bool wake)
{ … }

/*
 * Report expedited quiescent state for specified rcu_data (CPU).
 */
static void rcu_report_exp_rdp(struct rcu_data *rdp)
{ … }

/* Common code for work-done checking. */
static bool sync_exp_work_done(unsigned long s)
{ … }

/*
 * Funnel-lock acquisition for expedited grace periods.  Returns true
 * if some other task completed an expedited grace period that this task
 * can piggy-back on, and with no mutex held.  Otherwise, returns false
 * with the mutex held, indicating that the caller must actually do the
 * expedited grace period.
 */
static bool exp_funnel_lock(unsigned long s)
{ … }

/*
 * Select the CPUs within the specified rcu_node that the upcoming
 * expedited grace period needs to wait for.
 */
static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp)
{ … }

static void rcu_exp_sel_wait_wake(unsigned long s);

static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp)
{ … }

static inline bool rcu_exp_worker_started(void)
{ … }

static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp)
{ … }

static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
{ … }

static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
{ … }

/*
 * Work-queue handler to drive an expedited grace period forward.
 */
static void wait_rcu_exp_gp(struct kthread_work *wp)
{ … }

static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew)
{ … }

/*
 * Select the nodes that the upcoming expedited grace period needs
 * to wait for.
 */
static void sync_rcu_exp_select_cpus(void)
{ … }

/*
 * Wait for the expedited grace period to elapse, within time limit.
 * If the time limit is exceeded without the grace period elapsing,
 * return false, otherwise return true.
 */
static bool synchronize_rcu_expedited_wait_once(long tlimit)
{ … }

/*
 * Wait for the expedited grace period to elapse, issuing any needed
 * RCU CPU stall warnings along the way.
 */
static void synchronize_rcu_expedited_wait(void)
{ … }

/*
 * Wait for the current expedited grace period to complete, and then
 * wake up everyone who piggybacked on the just-completed expedited
 * grace period.  Also update all the ->exp_seq_rq counters as needed
 * in order to avoid counter-wrap problems.
 */
static void rcu_exp_wait_wake(unsigned long s)
{ … }

/*
 * Common code to drive an expedited grace period forward, used by
 * workqueues and mid-boot-time tasks.
 */
static void rcu_exp_sel_wait_wake(unsigned long s)
{ … }

#ifdef CONFIG_PREEMPT_RCU

/*
 * Remote handler for smp_call_function_single().  If there is an
 * RCU read-side critical section in effect, request that the
 * next rcu_read_unlock() record the quiescent state up the
 * ->expmask fields in the rcu_node tree.  Otherwise, immediately
 * report the quiescent state.
 */
static void rcu_exp_handler(void *unused)
{ … }

/* PREEMPTION=y, so no PREEMPTION=n expedited grace period to clean up after. */
static void sync_sched_exp_online_cleanup(int cpu)
{ … }

/*
 * Scan the current list of tasks blocked within RCU read-side critical
 * sections, printing out the tid of each that is blocking the current
 * expedited grace period.
 */
static int rcu_print_task_exp_stall(struct rcu_node *rnp)
{ … }

/*
 * Scan the current list of tasks blocked within RCU read-side critical
 * sections, dumping the stack of each that is blocking the current
 * expedited grace period.
 */
static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
{ … }

#else /* #ifdef CONFIG_PREEMPT_RCU */

/* Request an expedited quiescent state. */
static void rcu_exp_need_qs(void)
{
	__this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
	/* Store .exp before .rcu_urgent_qs. */
	smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
	set_tsk_need_resched(current);
	set_preempt_need_resched();
}

/* Invoked on each online non-idle CPU for expedited quiescent state. */
static void rcu_exp_handler(void *unused)
{
	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
	struct rcu_node *rnp = rdp->mynode;
	bool preempt_bh_enabled = !(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK));

	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
	    __this_cpu_read(rcu_data.cpu_no_qs.b.exp))
		return;
	if (rcu_is_cpu_rrupt_from_idle() ||
	    (IS_ENABLED(CONFIG_PREEMPT_COUNT) && preempt_bh_enabled)) {
		rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
		return;
	}
	rcu_exp_need_qs();
}

/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
static void sync_sched_exp_online_cleanup(int cpu)
{
	unsigned long flags;
	int my_cpu;
	struct rcu_data *rdp;
	int ret;
	struct rcu_node *rnp;

	rdp = per_cpu_ptr(&rcu_data, cpu);
	rnp = rdp->mynode;
	my_cpu = get_cpu();
	/* Quiescent state either not needed or already requested, leave. */
	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
	    READ_ONCE(rdp->cpu_no_qs.b.exp)) {
		put_cpu();
		return;
	}
	/* Quiescent state needed on current CPU, so set it up locally. */
	if (my_cpu == cpu) {
		local_irq_save(flags);
		rcu_exp_need_qs();
		local_irq_restore(flags);
		put_cpu();
		return;
	}
	/* Quiescent state needed on some other CPU, send IPI. */
	ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
	put_cpu();
	WARN_ON_ONCE(ret);
}

/*
 * Because preemptible RCU does not exist, we never have to check for
 * tasks blocked within RCU read-side critical sections that are
 * blocking the current expedited grace period.
 */
static int rcu_print_task_exp_stall(struct rcu_node *rnp)
{
	return 0;
}

/*
 * Because preemptible RCU does not exist, we never have to print out
 * tasks blocked within RCU read-side critical sections that are blocking
 * the current expedited grace period.
 */
static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
{
}

#endif /* #else #ifdef CONFIG_PREEMPT_RCU */

/**
 * synchronize_rcu_expedited - Brute-force RCU grace period
 *
 * Wait for an RCU grace period, but expedite it.  The basic idea is to
 * IPI all non-idle non-nohz online CPUs.  The IPI handler checks whether
 * the CPU is in an RCU critical section, and if so, it sets a flag that
 * causes the outermost rcu_read_unlock() to report the quiescent state
 * for RCU-preempt or asks the scheduler for help for RCU-sched.  On the
 * other hand, if the CPU is not in an RCU read-side critical section,
 * the IPI handler reports the quiescent state immediately.
 *
 * Although this is a great improvement over previous expedited
 * implementations, it is still unfriendly to real-time workloads, so is
 * thus not recommended for any sort of common-case code.  In fact, if
 * you are using synchronize_rcu_expedited() in a loop, please restructure
 * your code to batch your updates, and then use a single synchronize_rcu()
 * instead.
 *
 * This has the same semantics as (but is more brutal than) synchronize_rcu().
 */
void synchronize_rcu_expedited(void)
{ … }
EXPORT_SYMBOL_GPL(…);

/*
 * Ensure that start_poll_synchronize_rcu_expedited() has the expedited
 * RCU grace periods that it needs.
 */
static void sync_rcu_do_polled_gp(struct work_struct *wp)
{ … }

/**
 * start_poll_synchronize_rcu_expedited - Snapshot current RCU state and start expedited grace period
 *
 * Returns a cookie to pass to a call to cond_synchronize_rcu(),
 * cond_synchronize_rcu_expedited(), or poll_state_synchronize_rcu(),
 * allowing them to determine whether or not any sort of grace period has
 * elapsed in the meantime.  If the needed expedited grace period is not
 * already slated to start, initiates that grace period.
 */
unsigned long start_poll_synchronize_rcu_expedited(void)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * start_poll_synchronize_rcu_expedited_full - Take a full snapshot and start expedited grace period
 * @rgosp: Place to put snapshot of grace-period state
 *
 * Places the normal and expedited grace-period states in rgosp.  This
 * state value can be passed to a later call to cond_synchronize_rcu_full()
 * or poll_state_synchronize_rcu_full() to determine whether or not a
 * grace period (whether normal or expedited) has elapsed in the meantime.
 * If the needed expedited grace period is not already slated to start,
 * initiates that grace period.
 */
void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * cond_synchronize_rcu_expedited - Conditionally wait for an expedited RCU grace period
 *
 * @oldstate: value from get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or start_poll_synchronize_rcu_expedited()
 *
 * If any type of full RCU grace period has elapsed since the earlier
 * call to get_state_synchronize_rcu(), start_poll_synchronize_rcu(),
 * or start_poll_synchronize_rcu_expedited(), just return.  Otherwise,
 * invoke synchronize_rcu_expedited() to wait for a full grace period.
 *
 * Yes, this function does not take counter wrap into account.
 * But counter wrap is harmless.  If the counter wraps, we have waited for
 * more than 2 billion grace periods (and way more on a 64-bit system!),
 * so waiting for a couple of additional grace periods should be just fine.
 *
 * This function provides the same memory-ordering guarantees that
 * would be provided by a synchronize_rcu() that was invoked at the call
 * to the function that provided @oldstate and that returned at the end
 * of this function.
 */
void cond_synchronize_rcu_expedited(unsigned long oldstate)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * cond_synchronize_rcu_expedited_full - Conditionally wait for an expedited RCU grace period
 * @rgosp: value from get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), or start_poll_synchronize_rcu_expedited_full()
 *
 * If a full RCU grace period has elapsed since the call to
 * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(),
 * or start_poll_synchronize_rcu_expedited_full() from which @rgosp was
 * obtained, just return.  Otherwise, invoke synchronize_rcu_expedited()
 * to wait for a full grace period.
 *
 * Yes, this function does not take counter wrap into account.
 * But counter wrap is harmless.  If the counter wraps, we have waited for
 * more than 2 billion grace periods (and way more on a 64-bit system!),
 * so waiting for a couple of additional grace periods should be just fine.
 *
 * This function provides the same memory-ordering guarantees that
 * would be provided by a synchronize_rcu() that was invoked at the call
 * to the function that provided @rgosp and that returned at the end of
 * this function.
 */
void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp)
{ … }
EXPORT_SYMBOL_GPL(…);
linux/kernel/rcu/tree_exp.h