linux/kernel/time/timer_migration.h

/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _KERNEL_TIME_MIGRATION_H
#define _KERNEL_TIME_MIGRATION_H

/* Per group capacity. Must be a power of 2! */
#define TMIGR_CHILDREN_PER_GROUP 8

/**
 * struct tmigr_event - a timer event associated to a CPU
 * @nextevt:	The node to enqueue an event in the parent group queue
 * @cpu:	The CPU to which this event belongs
 * @ignore:	Hint whether the event could be ignored; it is set when
 *		CPU or group is active;
 */
struct tmigr_event {
	struct timerqueue_node	nextevt;
	unsigned int		cpu;
	bool			ignore;
};

/**
 * struct tmigr_group - timer migration hierarchy group
 * @lock:		Lock protecting the event information and group hierarchy
 *			information during setup
 * @parent:		Pointer to the parent group. Pointer is updated when a
 *			new hierarchy level is added because of a CPU coming
 *			online the first time. Once it is set, the pointer will
 *			not be removed or updated. When accessing parent pointer
 *			lock less to decide whether to abort a propagation or
 *			not, it is not a problem. The worst outcome is an
 *			unnecessary/early CPU wake up. But do not access parent
 *			pointer several times in the same 'action' (like
 *			activation, deactivation, check for remote expiry,...)
 *			without holding the lock as it is not ensured that value
 *			will not change.
 * @groupevt:		Next event of the group which is only used when the
 *			group is !active. The group event is then queued into
 *			the parent timer queue.
 *			Ignore bit of @groupevt is set when the group is active.
 * @next_expiry:	Base monotonic expiry time of the next event of the
 *			group; It is used for the racy lockless check whether a
 *			remote expiry is required; it is always reliable
 * @events:		Timer queue for child events queued in the group
 * @migr_state:		State of the group (see union tmigr_state)
 * @level:		Hierarchy level of the group; Required during setup
 * @numa_node:		Required for setup only to make sure CPU and low level
 *			group information is NUMA local. It is set to NUMA node
 *			as long as the group level is per NUMA node (level <
 *			tmigr_crossnode_level); otherwise it is set to
 *			NUMA_NO_NODE
 * @num_children:	Counter of group children to make sure the group is only
 *			filled with TMIGR_CHILDREN_PER_GROUP; Required for setup
 *			only
 * @groupmask:		mask of the group in the parent group; is set during
 *			setup and will never change; can be read lockless
 * @list:		List head that is added to the per level
 *			tmigr_level_list; is required during setup when a
 *			new group needs to be connected to the existing
 *			hierarchy groups
 */
struct tmigr_group {
	raw_spinlock_t		lock;
	struct tmigr_group	*parent;
	struct tmigr_event	groupevt;
	u64			next_expiry;
	struct timerqueue_head	events;
	atomic_t		migr_state;
	unsigned int		level;
	int			numa_node;
	unsigned int		num_children;
	u8			groupmask;
	struct list_head	list;
};

/**
 * struct tmigr_cpu - timer migration per CPU group
 * @lock:		Lock protecting the tmigr_cpu group information
 * @online:		Indicates whether the CPU is online; In deactivate path
 *			it is required to know whether the migrator in the top
 *			level group is to be set offline, while a timer is
 *			pending. Then another online CPU needs to be notified to
 *			take over the migrator role. Furthermore the information
 *			is required in CPU hotplug path as the CPU is able to go
 *			idle before the timer migration hierarchy hotplug AP is
 *			reached. During this phase, the CPU has to handle the
 *			global timers on its own and must not act as a migrator.
 * @idle:		Indicates whether the CPU is idle in the timer migration
 *			hierarchy
 * @remote:		Is set when timers of the CPU are expired remotely
 * @tmgroup:		Pointer to the parent group
 * @groupmask:		mask of tmigr_cpu in the parent group
 * @wakeup:		Stores the first timer when the timer migration
 *			hierarchy is completely idle and remote expiry was done;
 *			is returned to timer code in the idle path and is only
 *			used in idle path.
 * @cpuevt:		CPU event which could be enqueued into the parent group
 */
struct tmigr_cpu {
	raw_spinlock_t		lock;
	bool			online;
	bool			idle;
	bool			remote;
	struct tmigr_group	*tmgroup;
	u8			groupmask;
	u64			wakeup;
	struct tmigr_event	cpuevt;
};

/**
 * union tmigr_state - state of tmigr_group
 * @state:	Combined version of the state - only used for atomic
 *		read/cmpxchg function
 * @struct:	Split version of the state - only use the struct members to
 *		update information to stay independent of endianness
 */
union tmigr_state {
	u32 state;
	/**
	 * struct - split state of tmigr_group
	 * @active:	Contains each mask bit of the active children
	 * @migrator:	Contains mask of the child which is migrator
	 * @seq:	Sequence counter needs to be increased when an update
	 *		to the tmigr_state is done. It prevents a race when
	 *		updates in the child groups are propagated in changed
	 *		order. Detailed information about the scenario is
	 *		given in the documentation at the begin of
	 *		timer_migration.c.
	 */
	struct {
		u8	active;
		u8	migrator;
		u16	seq;
	} __packed;
};

#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
extern void tmigr_handle_remote(void);
extern bool tmigr_requires_handle_remote(void);
extern void tmigr_cpu_activate(void);
extern u64 tmigr_cpu_deactivate(u64 nextevt);
extern u64 tmigr_cpu_new_timer(u64 nextevt);
extern u64 tmigr_quick_check(u64 nextevt);
#else
static inline void tmigr_handle_remote(void) { }
static inline bool tmigr_requires_handle_remote(void) { return false; }
static inline void tmigr_cpu_activate(void) { }
#endif

#endif