rwsem.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0
/* kernel/rwsem.c: R/W semaphores, public implementation
 *
 * Written by David Howells ([email protected]).
 * Derived from asm-i386/semaphore.h
 *
 * Writer lock-stealing by Alex Shi <[email protected]>
 * and Michel Lespinasse <[email protected]>
 *
 * Optimistic spinning by Tim Chen <[email protected]>
 * and Davidlohr Bueso <[email protected]>. Based on mutexes.
 *
 * Rwsem count bit fields re-definition and rwsem rearchitecture by
 * Waiman Long <[email protected]> and
 * Peter Zijlstra <[email protected]>.
 */

#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/sched/task.h>
#include <linux/sched/debug.h>
#include <linux/sched/wake_q.h>
#include <linux/sched/signal.h>
#include <linux/sched/clock.h>
#include <linux/export.h>
#include <linux/rwsem.h>
#include <linux/atomic.h>
#include <trace/events/lock.h>

#ifndef CONFIG_PREEMPT_RT
#include "lock_events.h"

/*
 * The least significant 2 bits of the owner value has the following
 * meanings when set.
 *  - Bit 0: RWSEM_READER_OWNED - rwsem may be owned by readers (just a hint)
 *  - Bit 1: RWSEM_NONSPINNABLE - Cannot spin on a reader-owned lock
 *
 * When the rwsem is reader-owned and a spinning writer has timed out,
 * the nonspinnable bit will be set to disable optimistic spinning.

 * When a writer acquires a rwsem, it puts its task_struct pointer
 * into the owner field. It is cleared after an unlock.
 *
 * When a reader acquires a rwsem, it will also puts its task_struct
 * pointer into the owner field with the RWSEM_READER_OWNED bit set.
 * On unlock, the owner field will largely be left untouched. So
 * for a free or reader-owned rwsem, the owner value may contain
 * information about the last reader that acquires the rwsem.
 *
 * That information may be helpful in debugging cases where the system
 * seems to hang on a reader owned rwsem especially if only one reader
 * is involved. Ideally we would like to track all the readers that own
 * a rwsem, but the overhead is simply too big.
 *
 * A fast path reader optimistic lock stealing is supported when the rwsem
 * is previously owned by a writer and the following conditions are met:
 *  - rwsem is not currently writer owned
 *  - the handoff isn't set.
 */
#define RWSEM_READER_OWNED …
#define RWSEM_NONSPINNABLE …
#define RWSEM_OWNER_FLAGS_MASK …

#ifdef CONFIG_DEBUG_RWSEMS
#define DEBUG_RWSEMS_WARN_ON(c, sem) …
#else
#define DEBUG_RWSEMS_WARN_ON …
#endif

/*
 * On 64-bit architectures, the bit definitions of the count are:
 *
 * Bit  0    - writer locked bit
 * Bit  1    - waiters present bit
 * Bit  2    - lock handoff bit
 * Bits 3-7  - reserved
 * Bits 8-62 - 55-bit reader count
 * Bit  63   - read fail bit
 *
 * On 32-bit architectures, the bit definitions of the count are:
 *
 * Bit  0    - writer locked bit
 * Bit  1    - waiters present bit
 * Bit  2    - lock handoff bit
 * Bits 3-7  - reserved
 * Bits 8-30 - 23-bit reader count
 * Bit  31   - read fail bit
 *
 * It is not likely that the most significant bit (read fail bit) will ever
 * be set. This guard bit is still checked anyway in the down_read() fastpath
 * just in case we need to use up more of the reader bits for other purpose
 * in the future.
 *
 * atomic_long_fetch_add() is used to obtain reader lock, whereas
 * atomic_long_cmpxchg() will be used to obtain writer lock.
 *
 * There are three places where the lock handoff bit may be set or cleared.
 * 1) rwsem_mark_wake() for readers		-- set, clear
 * 2) rwsem_try_write_lock() for writers	-- set, clear
 * 3) rwsem_del_waiter()			-- clear
 *
 * For all the above cases, wait_lock will be held. A writer must also
 * be the first one in the wait_list to be eligible for setting the handoff
 * bit. So concurrent setting/clearing of handoff bit is not possible.
 */
#define RWSEM_WRITER_LOCKED …
#define RWSEM_FLAG_WAITERS …
#define RWSEM_FLAG_HANDOFF …
#define RWSEM_FLAG_READFAIL …

#define RWSEM_READER_SHIFT …
#define RWSEM_READER_BIAS …
#define RWSEM_READER_MASK …
#define RWSEM_WRITER_MASK …
#define RWSEM_LOCK_MASK …
#define RWSEM_READ_FAILED_MASK …

/*
 * All writes to owner are protected by WRITE_ONCE() to make sure that
 * store tearing can't happen as optimistic spinners may read and use
 * the owner value concurrently without lock. Read from owner, however,
 * may not need READ_ONCE() as long as the pointer value is only used
 * for comparison and isn't being dereferenced.
 *
 * Both rwsem_{set,clear}_owner() functions should be in the same
 * preempt disable section as the atomic op that changes sem->count.
 */
static inline void rwsem_set_owner(struct rw_semaphore *sem)
{ … }

static inline void rwsem_clear_owner(struct rw_semaphore *sem)
{ … }

/*
 * Test the flags in the owner field.
 */
static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)
{ … }

/*
 * The task_struct pointer of the last owning reader will be left in
 * the owner field.
 *
 * Note that the owner value just indicates the task has owned the rwsem
 * previously, it may not be the real owner or one of the real owners
 * anymore when that field is examined, so take it with a grain of salt.
 *
 * The reader non-spinnable bit is preserved.
 */
static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
					    struct task_struct *owner)
{ … }

static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
{ … }

/*
 * Return true if the rwsem is owned by a reader.
 */
static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
{ … }

#ifdef CONFIG_DEBUG_RWSEMS
/*
 * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
 * is a task pointer in owner of a reader-owned rwsem, it will be the
 * real owner or one of the real owners. The only exception is when the
 * unlock is done by up_read_non_owner().
 */
static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
{ … }
#else
static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
{
}
#endif

/*
 * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag
 * remains set. Otherwise, the operation will be aborted.
 */
static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
{ … }

static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp)
{ … }

static inline bool rwsem_write_trylock(struct rw_semaphore *sem)
{ … }

/*
 * Return just the real task structure pointer of the owner
 */
static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
{ … }

/*
 * Return the real task structure pointer of the owner and the embedded
 * flags in the owner. pflags must be non-NULL.
 */
static inline struct task_struct *
rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags)
{ … }

/*
 * Guide to the rw_semaphore's count field.
 *
 * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned
 * by a writer.
 *
 * The lock is owned by readers when
 * (1) the RWSEM_WRITER_LOCKED isn't set in count,
 * (2) some of the reader bits are set in count, and
 * (3) the owner field has RWSEM_READ_OWNED bit set.
 *
 * Having some reader bits set is not enough to guarantee a readers owned
 * lock as the readers may be in the process of backing out from the count
 * and a writer has just released the lock. So another writer may steal
 * the lock immediately after that.
 */

/*
 * Initialize an rwsem:
 */
void __init_rwsem(struct rw_semaphore *sem, const char *name,
		  struct lock_class_key *key)
{ … }
EXPORT_SYMBOL(…);

enum rwsem_waiter_type { … };

struct rwsem_waiter { … };
#define rwsem_first_waiter(sem) …

enum rwsem_wake_type { … };

/*
 * The typical HZ value is either 250 or 1000. So set the minimum waiting
 * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
 * queue before initiating the handoff protocol.
 */
#define RWSEM_WAIT_TIMEOUT …

/*
 * Magic number to batch-wakeup waiting readers, even when writers are
 * also present in the queue. This both limits the amount of work the
 * waking thread must do and also prevents any potential counter overflow,
 * however unlikely.
 */
#define MAX_READERS_WAKEUP …

static inline void
rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
{ … }

/*
 * Remove a waiter from the wait_list and clear flags.
 *
 * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
 * this function. Modify with care.
 *
 * Return: true if wait_list isn't empty and false otherwise
 */
static inline bool
rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
{ … }

/*
 * handle the lock release when processes blocked on it that can now run
 * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
 *   have been set.
 * - there must be someone on the queue
 * - the wait_lock must be held by the caller
 * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
 *   to actually wakeup the blocked task(s) and drop the reference count,
 *   preferably when the wait_lock is released
 * - woken process blocks are discarded from the list after having task zeroed
 * - writers are only marked woken if downgrading is false
 *
 * Implies rwsem_del_waiter() for all woken readers.
 */
static void rwsem_mark_wake(struct rw_semaphore *sem,
			    enum rwsem_wake_type wake_type,
			    struct wake_q_head *wake_q)
{ … }

/*
 * Remove a waiter and try to wake up other waiters in the wait queue
 * This function is called from the out_nolock path of both the reader and
 * writer slowpaths with wait_lock held. It releases the wait_lock and
 * optionally wake up waiters before it returns.
 */
static inline void
rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
		      struct wake_q_head *wake_q)
		      __releases(&sem->wait_lock)
{ … }

/*
 * This function must be called with the sem->wait_lock held to prevent
 * race conditions between checking the rwsem wait list and setting the
 * sem->count accordingly.
 *
 * Implies rwsem_del_waiter() on success.
 */
static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
					struct rwsem_waiter *waiter)
{ … }

/*
 * The rwsem_spin_on_owner() function returns the following 4 values
 * depending on the lock owner state.
 *   OWNER_NULL  : owner is currently NULL
 *   OWNER_WRITER: when owner changes and is a writer
 *   OWNER_READER: when owner changes and the new owner may be a reader.
 *   OWNER_NONSPINNABLE:
 *		   when optimistic spinning has to stop because either the
 *		   owner stops running, is unknown, or its timeslice has
 *		   been used up.
 */
enum owner_state { … };

#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
/*
 * Try to acquire write lock before the writer has been put on wait queue.
 */
static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
{ … }

static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
{ … }

#define OWNER_SPINNABLE …

static inline enum owner_state
rwsem_owner_state(struct task_struct *owner, unsigned long flags)
{ … }

static noinline enum owner_state
rwsem_spin_on_owner(struct rw_semaphore *sem)
{ … }

/*
 * Calculate reader-owned rwsem spinning threshold for writer
 *
 * The more readers own the rwsem, the longer it will take for them to
 * wind down and free the rwsem. So the empirical formula used to
 * determine the actual spinning time limit here is:
 *
 *   Spinning threshold = (10 + nr_readers/2)us
 *
 * The limit is capped to a maximum of 25us (30 readers). This is just
 * a heuristic and is subjected to change in the future.
 */
static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
{ … }

static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
{ … }

/*
 * Clear the owner's RWSEM_NONSPINNABLE bit if it is set. This should
 * only be called when the reader count reaches 0.
 */
static inline void clear_nonspinnable(struct rw_semaphore *sem)
{ … }

#else
static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
{
	return false;
}

static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem)
{
	return false;
}

static inline void clear_nonspinnable(struct rw_semaphore *sem) { }

static inline enum owner_state
rwsem_spin_on_owner(struct rw_semaphore *sem)
{
	return OWNER_NONSPINNABLE;
}
#endif

/*
 * Prepare to wake up waiter(s) in the wait queue by putting them into the
 * given wake_q if the rwsem lock owner isn't a writer. If rwsem is likely
 * reader-owned, wake up read lock waiters in queue front or wake up any
 * front waiter otherwise.

 * This is being called from both reader and writer slow paths.
 */
static inline void rwsem_cond_wake_waiter(struct rw_semaphore *sem, long count,
					  struct wake_q_head *wake_q)
{ … }

/*
 * Wait for the read lock to be granted
 */
static struct rw_semaphore __sched *
rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state)
{ … }

/*
 * Wait until we successfully acquire the write lock
 */
static struct rw_semaphore __sched *
rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
{ … }

/*
 * handle waking up a waiter on the semaphore
 * - up_read/up_write has decremented the active part of count if we come here
 */
static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
{ … }

/*
 * downgrade a write lock into a read lock
 * - caller incremented waiting part of count and discovered it still negative
 * - just wake up any readers at the front of the queue
 */
static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
{ … }

/*
 * lock for reading
 */
static __always_inline int __down_read_common(struct rw_semaphore *sem, int state)
{ … }

static __always_inline void __down_read(struct rw_semaphore *sem)
{ … }

static __always_inline int __down_read_interruptible(struct rw_semaphore *sem)
{ … }

static __always_inline int __down_read_killable(struct rw_semaphore *sem)
{ … }

static inline int __down_read_trylock(struct rw_semaphore *sem)
{ … }

/*
 * lock for writing
 */
static __always_inline int __down_write_common(struct rw_semaphore *sem, int state)
{ … }

static __always_inline void __down_write(struct rw_semaphore *sem)
{ … }

static __always_inline int __down_write_killable(struct rw_semaphore *sem)
{ … }

static inline int __down_write_trylock(struct rw_semaphore *sem)
{ … }

/*
 * unlock after reading
 */
static inline void __up_read(struct rw_semaphore *sem)
{ … }

/*
 * unlock after writing
 */
static inline void __up_write(struct rw_semaphore *sem)
{ … }

/*
 * downgrade write lock to read lock
 */
static inline void __downgrade_write(struct rw_semaphore *sem)
{ … }

#else /* !CONFIG_PREEMPT_RT */

#define RT_MUTEX_BUILD_MUTEX
#include "rtmutex.c"

#define rwbase_set_and_save_current_state …

#define rwbase_restore_current_state …

#define rwbase_rtmutex_lock_state …

#define rwbase_rtmutex_slowlock_locked …

#define rwbase_rtmutex_unlock …

#define rwbase_rtmutex_trylock …

#define rwbase_signal_pending_state …

#define rwbase_pre_schedule …

#define rwbase_schedule …

#define rwbase_post_schedule …

#include "rwbase_rt.c"

void __init_rwsem(struct rw_semaphore *sem, const char *name,
		  struct lock_class_key *key)
{
	init_rwbase_rt(&(sem)->rwbase);

#ifdef CONFIG_DEBUG_LOCK_ALLOC
	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
	lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
#endif
}
EXPORT_SYMBOL(__init_rwsem);

static inline void __down_read(struct rw_semaphore *sem)
{
	rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
}

static inline int __down_read_interruptible(struct rw_semaphore *sem)
{
	return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE);
}

static inline int __down_read_killable(struct rw_semaphore *sem)
{
	return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE);
}

static inline int __down_read_trylock(struct rw_semaphore *sem)
{
	return rwbase_read_trylock(&sem->rwbase);
}

static inline void __up_read(struct rw_semaphore *sem)
{
	rwbase_read_unlock(&sem->rwbase, TASK_NORMAL);
}

static inline void __sched __down_write(struct rw_semaphore *sem)
{
	rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
}

static inline int __sched __down_write_killable(struct rw_semaphore *sem)
{
	return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE);
}

static inline int __down_write_trylock(struct rw_semaphore *sem)
{
	return rwbase_write_trylock(&sem->rwbase);
}

static inline void __up_write(struct rw_semaphore *sem)
{
	rwbase_write_unlock(&sem->rwbase);
}

static inline void __downgrade_write(struct rw_semaphore *sem)
{
	rwbase_write_downgrade(&sem->rwbase);
}

/* Debug stubs for the common API */
#define DEBUG_RWSEMS_WARN_ON …

static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
					    struct task_struct *owner)
{
}

static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
{
	int count = atomic_read(&sem->rwbase.readers);

	return count < 0 && count != READER_BIAS;
}

#endif /* CONFIG_PREEMPT_RT */

/*
 * lock for reading
 */
void __sched down_read(struct rw_semaphore *sem)
{ … }
EXPORT_SYMBOL(…);

int __sched down_read_interruptible(struct rw_semaphore *sem)
{ … }
EXPORT_SYMBOL(…);

int __sched down_read_killable(struct rw_semaphore *sem)
{ … }
EXPORT_SYMBOL(…);

/*
 * trylock for reading -- returns 1 if successful, 0 if contention
 */
int down_read_trylock(struct rw_semaphore *sem)
{ … }
EXPORT_SYMBOL(…);

/*
 * lock for writing
 */
void __sched down_write(struct rw_semaphore *sem)
{ … }
EXPORT_SYMBOL(…);

/*
 * lock for writing
 */
int __sched down_write_killable(struct rw_semaphore *sem)
{ … }
EXPORT_SYMBOL(…);

/*
 * trylock for writing -- returns 1 if successful, 0 if contention
 */
int down_write_trylock(struct rw_semaphore *sem)
{ … }
EXPORT_SYMBOL(…);

/*
 * release a read lock
 */
void up_read(struct rw_semaphore *sem)
{ … }
EXPORT_SYMBOL(…);

/*
 * release a write lock
 */
void up_write(struct rw_semaphore *sem)
{ … }
EXPORT_SYMBOL(…);

/*
 * downgrade write lock to read lock
 */
void downgrade_write(struct rw_semaphore *sem)
{ … }
EXPORT_SYMBOL(…);

#ifdef CONFIG_DEBUG_LOCK_ALLOC

void down_read_nested(struct rw_semaphore *sem, int subclass)
{ … }
EXPORT_SYMBOL(…);

int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
{ … }
EXPORT_SYMBOL(…);

void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
{ … }
EXPORT_SYMBOL(…);

void down_read_non_owner(struct rw_semaphore *sem)
{ … }
EXPORT_SYMBOL(…);

void down_write_nested(struct rw_semaphore *sem, int subclass)
{ … }
EXPORT_SYMBOL(…);

int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
{ … }
EXPORT_SYMBOL(…);

void up_read_non_owner(struct rw_semaphore *sem)
{ … }
EXPORT_SYMBOL(…);

#endif
linux/kernel/locking/rwsem.c