sched.h | Explore in Territory

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SCHED_H
#define _LINUX_SCHED_H

/*
 * Define 'struct task_struct' and provide the main scheduler
 * APIs (schedule(), wakeup variants, etc.)
 */

#include <uapi/linux/sched.h>

#include <asm/current.h>
#include <asm/processor.h>
#include <linux/thread_info.h>
#include <linux/preempt.h>
#include <linux/cpumask_types.h>

#include <linux/cache.h>
#include <linux/irqflags_types.h>
#include <linux/smp_types.h>
#include <linux/pid_types.h>
#include <linux/sem_types.h>
#include <linux/shm.h>
#include <linux/kmsan_types.h>
#include <linux/mutex_types.h>
#include <linux/plist_types.h>
#include <linux/hrtimer_types.h>
#include <linux/timer_types.h>
#include <linux/seccomp_types.h>
#include <linux/nodemask_types.h>
#include <linux/refcount_types.h>
#include <linux/resource.h>
#include <linux/latencytop.h>
#include <linux/sched/prio.h>
#include <linux/sched/types.h>
#include <linux/signal_types.h>
#include <linux/syscall_user_dispatch_types.h>
#include <linux/mm_types_task.h>
#include <linux/netdevice_xmit.h>
#include <linux/task_io_accounting.h>
#include <linux/posix-timers_types.h>
#include <linux/restart_block.h>
#include <uapi/linux/rseq.h>
#include <linux/seqlock_types.h>
#include <linux/kcsan.h>
#include <linux/rv.h>
#include <linux/livepatch_sched.h>
#include <linux/uidgid_types.h>
#include <asm/kmap_size.h>

/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
struct bio_list;
struct blk_plug;
struct bpf_local_storage;
struct bpf_run_ctx;
struct bpf_net_context;
struct capture_control;
struct cfs_rq;
struct fs_struct;
struct futex_pi_state;
struct io_context;
struct io_uring_task;
struct mempolicy;
struct nameidata;
struct nsproxy;
struct perf_event_context;
struct pid_namespace;
struct pipe_inode_info;
struct rcu_node;
struct reclaim_state;
struct robust_list_head;
struct root_domain;
struct rq;
struct sched_attr;
struct sched_dl_entity;
struct seq_file;
struct sighand_struct;
struct signal_struct;
struct task_delay_info;
struct task_group;
struct task_struct;
struct user_event_mm;

#include <linux/sched/ext.h>

/*
 * Task state bitmask. NOTE! These bits are also
 * encoded in fs/proc/array.c: get_task_state().
 *
 * We have two separate sets of flags: task->__state
 * is about runnability, while task->exit_state are
 * about the task exiting. Confusing, but this way
 * modifying one set can't modify the other one by
 * mistake.
 */

/* Used in tsk->__state: */
#define TASK_RUNNING …
#define TASK_INTERRUPTIBLE …
#define TASK_UNINTERRUPTIBLE …
#define __TASK_STOPPED …
#define __TASK_TRACED …
/* Used in tsk->exit_state: */
#define EXIT_DEAD …
#define EXIT_ZOMBIE …
#define EXIT_TRACE …
/* Used in tsk->__state again: */
#define TASK_PARKED …
#define TASK_DEAD …
#define TASK_WAKEKILL …
#define TASK_WAKING …
#define TASK_NOLOAD …
#define TASK_NEW …
#define TASK_RTLOCK_WAIT …
#define TASK_FREEZABLE …
#define __TASK_FREEZABLE_UNSAFE …
#define TASK_FROZEN …
#define TASK_STATE_MAX …

#define TASK_ANY …

/*
 * DO NOT ADD ANY NEW USERS !
 */
#define TASK_FREEZABLE_UNSAFE …

/* Convenience macros for the sake of set_current_state: */
#define TASK_KILLABLE …
#define TASK_STOPPED …
#define TASK_TRACED …

#define TASK_IDLE …

/* Convenience macros for the sake of wake_up(): */
#define TASK_NORMAL …

/* get_task_state(): */
#define TASK_REPORT …

#define task_is_running(task) …

#define task_is_traced(task) …
#define task_is_stopped(task) …
#define task_is_stopped_or_traced(task) …

/*
 * Special states are those that do not use the normal wait-loop pattern. See
 * the comment with set_special_state().
 */
#define is_special_task_state(state) …

#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
#define debug_normal_state_change(state_value) …

#define debug_special_state_change(state_value) …

#define debug_rtlock_wait_set_state() …

#define debug_rtlock_wait_restore_state() …

#else
#define debug_normal_state_change …
#define debug_special_state_change …
#define debug_rtlock_wait_set_state …
#define debug_rtlock_wait_restore_state …
#endif

/*
 * set_current_state() includes a barrier so that the write of current->__state
 * is correctly serialised wrt the caller's subsequent test of whether to
 * actually sleep:
 *
 *   for (;;) {
 *	set_current_state(TASK_UNINTERRUPTIBLE);
 *	if (CONDITION)
 *	   break;
 *
 *	schedule();
 *   }
 *   __set_current_state(TASK_RUNNING);
 *
 * If the caller does not need such serialisation (because, for instance, the
 * CONDITION test and condition change and wakeup are under the same lock) then
 * use __set_current_state().
 *
 * The above is typically ordered against the wakeup, which does:
 *
 *   CONDITION = 1;
 *   wake_up_state(p, TASK_UNINTERRUPTIBLE);
 *
 * where wake_up_state()/try_to_wake_up() executes a full memory barrier before
 * accessing p->__state.
 *
 * Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is,
 * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
 * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
 *
 * However, with slightly different timing the wakeup TASK_RUNNING store can
 * also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not
 * a problem either because that will result in one extra go around the loop
 * and our @cond test will save the day.
 *
 * Also see the comments of try_to_wake_up().
 */
#define __set_current_state(state_value) …

#define set_current_state(state_value) …

/*
 * set_special_state() should be used for those states when the blocking task
 * can not use the regular condition based wait-loop. In that case we must
 * serialize against wakeups such that any possible in-flight TASK_RUNNING
 * stores will not collide with our state change.
 */
#define set_special_state(state_value) …

/*
 * PREEMPT_RT specific variants for "sleeping" spin/rwlocks
 *
 * RT's spin/rwlock substitutions are state preserving. The state of the
 * task when blocking on the lock is saved in task_struct::saved_state and
 * restored after the lock has been acquired.  These operations are
 * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT
 * lock related wakeups while the task is blocked on the lock are
 * redirected to operate on task_struct::saved_state to ensure that these
 * are not dropped. On restore task_struct::saved_state is set to
 * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail.
 *
 * The lock operation looks like this:
 *
 *	current_save_and_set_rtlock_wait_state();
 *	for (;;) {
 *		if (try_lock())
 *			break;
 *		raw_spin_unlock_irq(&lock->wait_lock);
 *		schedule_rtlock();
 *		raw_spin_lock_irq(&lock->wait_lock);
 *		set_current_state(TASK_RTLOCK_WAIT);
 *	}
 *	current_restore_rtlock_saved_state();
 */
#define current_save_and_set_rtlock_wait_state() …

#define current_restore_rtlock_saved_state() …

#define get_current_state() …

/*
 * Define the task command name length as enum, then it can be visible to
 * BPF programs.
 */
enum { … };

extern void sched_tick(void);

#define MAX_SCHEDULE_TIMEOUT …

extern long schedule_timeout(long timeout);
extern long schedule_timeout_interruptible(long timeout);
extern long schedule_timeout_killable(long timeout);
extern long schedule_timeout_uninterruptible(long timeout);
extern long schedule_timeout_idle(long timeout);
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);
asmlinkage void preempt_schedule_irq(void);
#ifdef CONFIG_PREEMPT_RT
 extern void schedule_rtlock(void);
#endif

extern int __must_check io_schedule_prepare(void);
extern void io_schedule_finish(int token);
extern long io_schedule_timeout(long timeout);
extern void io_schedule(void);

/**
 * struct prev_cputime - snapshot of system and user cputime
 * @utime: time spent in user mode
 * @stime: time spent in system mode
 * @lock: protects the above two fields
 *
 * Stores previous user/system time values such that we can guarantee
 * monotonicity.
 */
struct prev_cputime { … };

enum vtime_state { … };

struct vtime { … };

/*
 * Utilization clamp constraints.
 * @UCLAMP_MIN:	Minimum utilization
 * @UCLAMP_MAX:	Maximum utilization
 * @UCLAMP_CNT:	Utilization clamp constraints count
 */
enum uclamp_id { … };

#ifdef CONFIG_SMP
extern struct root_domain def_root_domain;
extern struct mutex sched_domains_mutex;
#endif

struct sched_param { … };

struct sched_info { … };

/*
 * Integer metrics need fixed point arithmetic, e.g., sched/fair
 * has a few: load, load_avg, util_avg, freq, and capacity.
 *
 * We define a basic fixed point arithmetic range, and then formalize
 * all these metrics based on that basic range.
 */
#define SCHED_FIXEDPOINT_SHIFT …
#define SCHED_FIXEDPOINT_SCALE …

/* Increase resolution of cpu_capacity calculations */
#define SCHED_CAPACITY_SHIFT …
#define SCHED_CAPACITY_SCALE …

struct load_weight { … };

/*
 * The load/runnable/util_avg accumulates an infinite geometric series
 * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c).
 *
 * [load_avg definition]
 *
 *   load_avg = runnable% * scale_load_down(load)
 *
 * [runnable_avg definition]
 *
 *   runnable_avg = runnable% * SCHED_CAPACITY_SCALE
 *
 * [util_avg definition]
 *
 *   util_avg = running% * SCHED_CAPACITY_SCALE
 *
 * where runnable% is the time ratio that a sched_entity is runnable and
 * running% the time ratio that a sched_entity is running.
 *
 * For cfs_rq, they are the aggregated values of all runnable and blocked
 * sched_entities.
 *
 * The load/runnable/util_avg doesn't directly factor frequency scaling and CPU
 * capacity scaling. The scaling is done through the rq_clock_pelt that is used
 * for computing those signals (see update_rq_clock_pelt())
 *
 * N.B., the above ratios (runnable% and running%) themselves are in the
 * range of [0, 1]. To do fixed point arithmetics, we therefore scale them
 * to as large a range as necessary. This is for example reflected by
 * util_avg's SCHED_CAPACITY_SCALE.
 *
 * [Overflow issue]
 *
 * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
 * with the highest load (=88761), always runnable on a single cfs_rq,
 * and should not overflow as the number already hits PID_MAX_LIMIT.
 *
 * For all other cases (including 32-bit kernels), struct load_weight's
 * weight will overflow first before we do, because:
 *
 *    Max(load_avg) <= Max(load.weight)
 *
 * Then it is the load_weight's responsibility to consider overflow
 * issues.
 */
struct sched_avg { … } ____cacheline_aligned;

/*
 * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
 * updates. When a task is dequeued, its util_est should not be updated if its
 * util_avg has not been updated in the meantime.
 * This information is mapped into the MSB bit of util_est at dequeue time.
 * Since max value of util_est for a task is 1024 (PELT util_avg for a task)
 * it is safe to use MSB.
 */
#define UTIL_EST_WEIGHT_SHIFT …
#define UTIL_AVG_UNCHANGED …

struct sched_statistics { … } ____cacheline_aligned;

struct sched_entity { … };

struct sched_rt_entity { … } __randomize_layout;

dl_server_has_tasks_f;
dl_server_pick_f;

struct sched_dl_entity { … };

#ifdef CONFIG_UCLAMP_TASK
/* Number of utilization clamp buckets (shorter alias) */
#define UCLAMP_BUCKETS …

/*
 * Utilization clamp for a scheduling entity
 * @value:		clamp value "assigned" to a se
 * @bucket_id:		bucket index corresponding to the "assigned" value
 * @active:		the se is currently refcounted in a rq's bucket
 * @user_defined:	the requested clamp value comes from user-space
 *
 * The bucket_id is the index of the clamp bucket matching the clamp value
 * which is pre-computed and stored to avoid expensive integer divisions from
 * the fast path.
 *
 * The active bit is set whenever a task has got an "effective" value assigned,
 * which can be different from the clamp value "requested" from user-space.
 * This allows to know a task is refcounted in the rq's bucket corresponding
 * to the "effective" bucket_id.
 *
 * The user_defined bit is set whenever a task has got a task-specific clamp
 * value requested from userspace, i.e. the system defaults apply to this task
 * just as a restriction. This allows to relax default clamps when a less
 * restrictive task-specific value has been requested, thus allowing to
 * implement a "nice" semantic. For example, a task running with a 20%
 * default boost can still drop its own boosting to 0%.
 */
struct uclamp_se { … };
#endif /* CONFIG_UCLAMP_TASK */

rcu_special;

enum perf_event_task_context { … };

/*
 * Number of contexts where an event can trigger:
 *      task, softirq, hardirq, nmi.
 */
#define PERF_NR_CONTEXTS …

struct wake_q_node { … };

struct kmap_ctrl { … };

struct task_struct { … };

#define TASK_REPORT_IDLE …
#define TASK_REPORT_MAX …

static inline unsigned int __task_state_index(unsigned int tsk_state,
					      unsigned int tsk_exit_state)
{ … }

static inline unsigned int task_state_index(struct task_struct *tsk)
{ … }

static inline char task_index_to_char(unsigned int state)
{ … }

static inline char task_state_to_char(struct task_struct *tsk)
{ … }

extern struct pid *cad_pid;

/*
 * Per process flags
 */
#define PF_VCPU …
#define PF_IDLE …
#define PF_EXITING …
#define PF_POSTCOREDUMP …
#define PF_IO_WORKER …
#define PF_WQ_WORKER …
#define PF_FORKNOEXEC …
#define PF_MCE_PROCESS …
#define PF_SUPERPRIV …
#define PF_DUMPCORE …
#define PF_SIGNALED …
#define PF_MEMALLOC …
#define PF_NPROC_EXCEEDED …
#define PF_USED_MATH …
#define PF_USER_WORKER …
#define PF_NOFREEZE …
#define PF__HOLE__00010000 …
#define PF_KSWAPD …
#define PF_MEMALLOC_NOFS …
#define PF_MEMALLOC_NOIO …
#define PF_LOCAL_THROTTLE …
#define PF_KTHREAD …
#define PF_RANDOMIZE …
#define PF_MEMALLOC_NORECLAIM …
#define PF_MEMALLOC_NOWARN …
#define PF__HOLE__02000000 …
#define PF_NO_SETAFFINITY …
#define PF_MCE_EARLY …
#define PF_MEMALLOC_PIN …
#define PF_BLOCK_TS …
#define PF__HOLE__40000000 …
#define PF_SUSPEND_TASK …

/*
 * Only the _current_ task can read/write to tsk->flags, but other
 * tasks can access tsk->flags in readonly mode for example
 * with tsk_used_math (like during threaded core dumping).
 * There is however an exception to this rule during ptrace
 * or during fork: the ptracer task is allowed to write to the
 * child->flags of its traced child (same goes for fork, the parent
 * can write to the child->flags), because we're guaranteed the
 * child is not running and in turn not changing child->flags
 * at the same time the parent does it.
 */
#define clear_stopped_child_used_math(child) …
#define set_stopped_child_used_math(child) …
#define clear_used_math() …
#define set_used_math() …

#define conditional_stopped_child_used_math(condition, child) …

#define conditional_used_math(condition) …

#define copy_to_stopped_child_used_math(child) …

/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
#define tsk_used_math(p) …
#define used_math() …

static __always_inline bool is_percpu_thread(void)
{ … }

/* Per-process atomic flags. */
#define PFA_NO_NEW_PRIVS …
#define PFA_SPREAD_PAGE …
#define PFA_SPREAD_SLAB …
#define PFA_SPEC_SSB_DISABLE …
#define PFA_SPEC_SSB_FORCE_DISABLE …
#define PFA_SPEC_IB_DISABLE …
#define PFA_SPEC_IB_FORCE_DISABLE …
#define PFA_SPEC_SSB_NOEXEC …

#define TASK_PFA_TEST(name, func) …

#define TASK_PFA_SET(name, func) …

#define TASK_PFA_CLEAR(name, func) …

TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)

TASK_PFA_TEST(SPREAD_PAGE, spread_page)
TASK_PFA_SET(SPREAD_PAGE, spread_page)
TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)

TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
TASK_PFA_SET(SPREAD_SLAB, spread_slab)
TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)

TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)

TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec)
TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec)
TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec)

TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)

TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)

TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)

static inline void
current_restore_flags(unsigned long orig_flags, unsigned long flags)
{ … }

extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
extern int task_can_attach(struct task_struct *p);
extern int dl_bw_alloc(int cpu, u64 dl_bw);
extern void dl_bw_free(int cpu, u64 dl_bw);
#ifdef CONFIG_SMP

/* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */
extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);

/**
 * set_cpus_allowed_ptr - set CPU affinity mask of a task
 * @p: the task
 * @new_mask: CPU affinity mask
 *
 * Return: zero if successful, or a negative error code
 */
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node);
extern void release_user_cpus_ptr(struct task_struct *p);
extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask);
extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p);
#else
static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
}
static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
	/* Opencoded cpumask_test_cpu(0, new_mask) to avoid dependency on cpumask.h */
	if ((*cpumask_bits(new_mask) & 1) == 0)
		return -EINVAL;
	return 0;
}
static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node)
{
	if (src->user_cpus_ptr)
		return -EINVAL;
	return 0;
}
static inline void release_user_cpus_ptr(struct task_struct *p)
{
	WARN_ON(p->user_cpus_ptr);
}

static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
{
	return 0;
}
#endif

extern int yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);

/**
 * task_nice - return the nice value of a given task.
 * @p: the task in question.
 *
 * Return: The nice value [ -20 ... 0 ... 19 ].
 */
static inline int task_nice(const struct task_struct *p)
{ … }

extern int can_nice(const struct task_struct *p, const int nice);
extern int task_curr(const struct task_struct *p);
extern int idle_cpu(int cpu);
extern int available_idle_cpu(int cpu);
extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
extern void sched_set_fifo(struct task_struct *p);
extern void sched_set_fifo_low(struct task_struct *p);
extern void sched_set_normal(struct task_struct *p, int nice);
extern int sched_setattr(struct task_struct *, const struct sched_attr *);
extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *);
extern struct task_struct *idle_task(int cpu);

/**
 * is_idle_task - is the specified task an idle task?
 * @p: the task in question.
 *
 * Return: 1 if @p is an idle task. 0 otherwise.
 */
static __always_inline bool is_idle_task(const struct task_struct *p)
{ … }

extern struct task_struct *curr_task(int cpu);
extern void ia64_set_curr_task(int cpu, struct task_struct *p);

void yield(void);

thread_union;

#ifndef CONFIG_THREAD_INFO_IN_TASK
extern struct thread_info init_thread_info;
#endif

extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];

#ifdef CONFIG_THREAD_INFO_IN_TASK
#define task_thread_info(task) …
#elif !defined(__HAVE_THREAD_FUNCTIONS)
#define task_thread_info …
#endif

/*
 * find a task by one of its numerical ids
 *
 * find_task_by_pid_ns():
 *      finds a task by its pid in the specified namespace
 * find_task_by_vpid():
 *      finds a task by its virtual pid
 *
 * see also find_vpid() etc in include/linux/pid.h
 */

extern struct task_struct *find_task_by_vpid(pid_t nr);
extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns);

/*
 * find a task by its virtual pid and get the task struct
 */
extern struct task_struct *find_get_task_by_vpid(pid_t nr);

extern int wake_up_state(struct task_struct *tsk, unsigned int state);
extern int wake_up_process(struct task_struct *tsk);
extern void wake_up_new_task(struct task_struct *tsk);

#ifdef CONFIG_SMP
extern void kick_process(struct task_struct *tsk);
#else
static inline void kick_process(struct task_struct *tsk) { }
#endif

extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);

static inline void set_task_comm(struct task_struct *tsk, const char *from)
{ … }

extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
#define get_task_comm(buf, tsk) …

#ifdef CONFIG_SMP
static __always_inline void scheduler_ipi(void)
{ … }
#else
static inline void scheduler_ipi(void) { }
#endif

extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state);

/*
 * Set thread flags in other task's structures.
 * See asm/thread_info.h for TIF_xxxx flags available:
 */
static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
{ … }

static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
{ … }

static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag,
					  bool value)
{ … }

static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
{ … }

static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
{ … }

static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
{ … }

static inline void set_tsk_need_resched(struct task_struct *tsk)
{ … }

static inline void clear_tsk_need_resched(struct task_struct *tsk)
{ … }

static inline int test_tsk_need_resched(struct task_struct *tsk)
{ … }

/*
 * cond_resched() and cond_resched_lock(): latency reduction via
 * explicit rescheduling in places that are safe. The return
 * value indicates whether a reschedule was done in fact.
 * cond_resched_lock() will drop the spinlock before scheduling,
 */
#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC)
extern int __cond_resched(void);

#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)

void sched_dynamic_klp_enable(void);
void sched_dynamic_klp_disable(void);

DECLARE_STATIC_CALL(cond_resched, __cond_resched);

static __always_inline int _cond_resched(void)
{ … }

#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)

extern int dynamic_cond_resched(void);

static __always_inline int _cond_resched(void)
{
	return dynamic_cond_resched();
}

#else /* !CONFIG_PREEMPTION */

static inline int _cond_resched(void)
{
	klp_sched_try_switch();
	return __cond_resched();
}

#endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */

#else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */

static inline int _cond_resched(void)
{
	klp_sched_try_switch();
	return 0;
}

#endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */

#define cond_resched() …

extern int __cond_resched_lock(spinlock_t *lock);
extern int __cond_resched_rwlock_read(rwlock_t *lock);
extern int __cond_resched_rwlock_write(rwlock_t *lock);

#define MIGHT_RESCHED_RCU_SHIFT …
#define MIGHT_RESCHED_PREEMPT_MASK …

#ifndef CONFIG_PREEMPT_RT
/*
 * Non RT kernels have an elevated preempt count due to the held lock,
 * but are not allowed to be inside a RCU read side critical section
 */
#define PREEMPT_LOCK_RESCHED_OFFSETS …
#else
/*
 * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
 * cond_resched*lock() has to take that into account because it checks for
 * preempt_count() and rcu_preempt_depth().
 */
#define PREEMPT_LOCK_RESCHED_OFFSETS …
#endif

#define cond_resched_lock(lock) …

#define cond_resched_rwlock_read(lock) …

#define cond_resched_rwlock_write(lock) …

static __always_inline bool need_resched(void)
{ … }

/*
 * Wrappers for p->thread_info->cpu access. No-op on UP.
 */
#ifdef CONFIG_SMP

static inline unsigned int task_cpu(const struct task_struct *p)
{ … }

extern void set_task_cpu(struct task_struct *p, unsigned int cpu);

#else

static inline unsigned int task_cpu(const struct task_struct *p)
{
	return 0;
}

static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
{
}

#endif /* CONFIG_SMP */

extern bool sched_task_on_rq(struct task_struct *p);
extern unsigned long get_wchan(struct task_struct *p);
extern struct task_struct *cpu_curr_snapshot(int cpu);

#include <linux/spinlock.h>

/*
 * In order to reduce various lock holder preemption latencies provide an
 * interface to see if a vCPU is currently running or not.
 *
 * This allows us to terminate optimistic spin loops and block, analogous to
 * the native optimistic spin heuristic of testing if the lock owner task is
 * running or not.
 */
#ifndef vcpu_is_preempted
static inline bool vcpu_is_preempted(int cpu)
{
	return false;
}
#endif

extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
extern long sched_getaffinity(pid_t pid, struct cpumask *mask);

#ifndef TASK_SIZE_OF
#define TASK_SIZE_OF …
#endif

#ifdef CONFIG_SMP
static inline bool owner_on_cpu(struct task_struct *owner)
{ … }

/* Returns effective CPU energy utilization, as seen by the scheduler */
unsigned long sched_cpu_util(int cpu);
#endif /* CONFIG_SMP */

#ifdef CONFIG_SCHED_CORE
extern void sched_core_free(struct task_struct *tsk);
extern void sched_core_fork(struct task_struct *p);
extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
				unsigned long uaddr);
extern int sched_core_idle_cpu(int cpu);
#else
static inline void sched_core_free(struct task_struct *tsk) { }
static inline void sched_core_fork(struct task_struct *p) { }
static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
#endif

extern void sched_set_stop_task(int cpu, struct task_struct *stop);

#ifdef CONFIG_MEM_ALLOC_PROFILING
static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
{
	swap(current->alloc_tag, tag);
	return tag;
}

static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
{
#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
	WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");
#endif
	current->alloc_tag = old;
}
#else
#define alloc_tag_save(_tag) …
#define alloc_tag_restore(_tag, _old) …
#endif

#endif
linux/include/linux/sched.h