ring_buffer.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0
/*
 * Generic ring buffer
 *
 * Copyright (C) 2008 Steven Rostedt <[email protected]>
 */
#include <linux/trace_recursion.h>
#include <linux/trace_events.h>
#include <linux/ring_buffer.h>
#include <linux/trace_clock.h>
#include <linux/sched/clock.h>
#include <linux/cacheflush.h>
#include <linux/trace_seq.h>
#include <linux/spinlock.h>
#include <linux/irq_work.h>
#include <linux/security.h>
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kthread.h>	/* for self test */
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/mutex.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/hash.h>
#include <linux/list.h>
#include <linux/cpu.h>
#include <linux/oom.h>
#include <linux/mm.h>

#include <asm/local64.h>
#include <asm/local.h>

/*
 * The "absolute" timestamp in the buffer is only 59 bits.
 * If a clock has the 5 MSBs set, it needs to be saved and
 * reinserted.
 */
#define TS_MSB …
#define ABS_TS_MASK …

static void update_pages_handler(struct work_struct *work);

/*
 * The ring buffer header is special. We must manually up keep it.
 */
int ring_buffer_print_entry_header(struct trace_seq *s)
{ … }

/*
 * The ring buffer is made up of a list of pages. A separate list of pages is
 * allocated for each CPU. A writer may only write to a buffer that is
 * associated with the CPU it is currently executing on.  A reader may read
 * from any per cpu buffer.
 *
 * The reader is special. For each per cpu buffer, the reader has its own
 * reader page. When a reader has read the entire reader page, this reader
 * page is swapped with another page in the ring buffer.
 *
 * Now, as long as the writer is off the reader page, the reader can do what
 * ever it wants with that page. The writer will never write to that page
 * again (as long as it is out of the ring buffer).
 *
 * Here's some silly ASCII art.
 *
 *   +------+
 *   |reader|          RING BUFFER
 *   |page  |
 *   +------+        +---+   +---+   +---+
 *                   |   |-->|   |-->|   |
 *                   +---+   +---+   +---+
 *                     ^               |
 *                     |               |
 *                     +---------------+
 *
 *
 *   +------+
 *   |reader|          RING BUFFER
 *   |page  |------------------v
 *   +------+        +---+   +---+   +---+
 *                   |   |-->|   |-->|   |
 *                   +---+   +---+   +---+
 *                     ^               |
 *                     |               |
 *                     +---------------+
 *
 *
 *   +------+
 *   |reader|          RING BUFFER
 *   |page  |------------------v
 *   +------+        +---+   +---+   +---+
 *      ^            |   |-->|   |-->|   |
 *      |            +---+   +---+   +---+
 *      |                              |
 *      |                              |
 *      +------------------------------+
 *
 *
 *   +------+
 *   |buffer|          RING BUFFER
 *   |page  |------------------v
 *   +------+        +---+   +---+   +---+
 *      ^            |   |   |   |-->|   |
 *      |   New      +---+   +---+   +---+
 *      |  Reader------^               |
 *      |   page                       |
 *      +------------------------------+
 *
 *
 * After we make this swap, the reader can hand this page off to the splice
 * code and be done with it. It can even allocate a new page if it needs to
 * and swap that into the ring buffer.
 *
 * We will be using cmpxchg soon to make all this lockless.
 *
 */

/* Used for individual buffers (after the counter) */
#define RB_BUFFER_OFF …

#define BUF_PAGE_HDR_SIZE …

#define RB_EVNT_HDR_SIZE …
#define RB_ALIGNMENT …
#define RB_MAX_SMALL_DATA …
#define RB_EVNT_MIN_SIZE …

#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
#define RB_FORCE_8BYTE_ALIGNMENT …
#define RB_ARCH_ALIGNMENT …
#else
#define RB_FORCE_8BYTE_ALIGNMENT …
#define RB_ARCH_ALIGNMENT …
#endif

#define RB_ALIGN_DATA …

/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
#define RINGBUF_TYPE_DATA …

enum { … };

#define skip_time_extend(event) …

#define extended_time(event) …

static inline bool rb_null_event(struct ring_buffer_event *event)
{ … }

static void rb_event_set_padding(struct ring_buffer_event *event)
{ … }

static unsigned
rb_event_data_length(struct ring_buffer_event *event)
{ … }

/*
 * Return the length of the given event. Will return
 * the length of the time extend if the event is a
 * time extend.
 */
static inline unsigned
rb_event_length(struct ring_buffer_event *event)
{ … }

/*
 * Return total length of time extend and data,
 *   or just the event length for all other events.
 */
static inline unsigned
rb_event_ts_length(struct ring_buffer_event *event)
{ … }

/**
 * ring_buffer_event_length - return the length of the event
 * @event: the event to get the length of
 *
 * Returns the size of the data load of a data event.
 * If the event is something other than a data event, it
 * returns the size of the event itself. With the exception
 * of a TIME EXTEND, where it still returns the size of the
 * data load of the data event after it.
 */
unsigned ring_buffer_event_length(struct ring_buffer_event *event)
{ … }
EXPORT_SYMBOL_GPL(…);

/* inline for ring buffer fast paths */
static __always_inline void *
rb_event_data(struct ring_buffer_event *event)
{ … }

/**
 * ring_buffer_event_data - return the data of the event
 * @event: the event to get the data from
 */
void *ring_buffer_event_data(struct ring_buffer_event *event)
{ … }
EXPORT_SYMBOL_GPL(…);

#define for_each_buffer_cpu(buffer, cpu) …

#define for_each_online_buffer_cpu(buffer, cpu) …

#define TS_SHIFT …
#define TS_MASK …
#define TS_DELTA_TEST …

static u64 rb_event_time_stamp(struct ring_buffer_event *event)
{ … }

/* Flag when events were overwritten */
#define RB_MISSED_EVENTS …
/* Missed count stored at end */
#define RB_MISSED_STORED …

#define RB_MISSED_MASK …

struct buffer_data_page { … };

struct buffer_data_read_page { … };

/*
 * Note, the buffer_page list must be first. The buffer pages
 * are allocated in cache lines, which means that each buffer
 * page will be at the beginning of a cache line, and thus
 * the least significant bits will be zero. We use this to
 * add flags in the list struct pointers, to make the ring buffer
 * lockless.
 */
struct buffer_page { … };

/*
 * The buffer page counters, write and entries, must be reset
 * atomically when crossing page boundaries. To synchronize this
 * update, two counters are inserted into the number. One is
 * the actual counter for the write position or count on the page.
 *
 * The other is a counter of updaters. Before an update happens
 * the update partition of the counter is incremented. This will
 * allow the updater to update the counter atomically.
 *
 * The counter is 20 bits, and the state data is 12.
 */
#define RB_WRITE_MASK …
#define RB_WRITE_INTCNT …

static void rb_init_page(struct buffer_data_page *bpage)
{ … }

static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
{ … }

static void free_buffer_page(struct buffer_page *bpage)
{ … }

/*
 * We need to fit the time_stamp delta into 27 bits.
 */
static inline bool test_time_stamp(u64 delta)
{ … }

struct rb_irq_work { … };

/*
 * Structure to hold event state and handle nested events.
 */
struct rb_event_info { … };

/*
 * Used for the add_timestamp
 *  NONE
 *  EXTEND - wants a time extend
 *  ABSOLUTE - the buffer requests all events to have absolute time stamps
 *  FORCE - force a full time stamp.
 */
enum { … };
/*
 * Used for which event context the event is in.
 *  TRANSITION = 0
 *  NMI     = 1
 *  IRQ     = 2
 *  SOFTIRQ = 3
 *  NORMAL  = 4
 *
 * See trace_recursive_lock() comment below for more details.
 */
enum { … };

struct rb_time_struct { … };
rb_time_t;

#define MAX_NEST …

/*
 * head_page == tail_page && head == tail then buffer is empty.
 */
struct ring_buffer_per_cpu { … };

struct trace_buffer { … };

struct ring_buffer_iter { … };

int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq *s)
{ … }

static inline void rb_time_read(rb_time_t *t, u64 *ret)
{ … }
static void rb_time_set(rb_time_t *t, u64 val)
{ … }

/*
 * Enable this to make sure that the event passed to
 * ring_buffer_event_time_stamp() is not committed and also
 * is on the buffer that it passed in.
 */
//#define RB_VERIFY_EVENT
#ifdef RB_VERIFY_EVENT
static struct list_head *rb_list_head(struct list_head *list);
static void verify_event(struct ring_buffer_per_cpu *cpu_buffer,
			 void *event)
{
	struct buffer_page *page = cpu_buffer->commit_page;
	struct buffer_page *tail_page = READ_ONCE(cpu_buffer->tail_page);
	struct list_head *next;
	long commit, write;
	unsigned long addr = (unsigned long)event;
	bool done = false;
	int stop = 0;

	/* Make sure the event exists and is not committed yet */
	do {
		if (page == tail_page || WARN_ON_ONCE(stop++ > 100))
			done = true;
		commit = local_read(&page->page->commit);
		write = local_read(&page->write);
		if (addr >= (unsigned long)&page->page->data[commit] &&
		    addr < (unsigned long)&page->page->data[write])
			return;

		next = rb_list_head(page->list.next);
		page = list_entry(next, struct buffer_page, list);
	} while (!done);
	WARN_ON_ONCE(1);
}
#else
static inline void verify_event(struct ring_buffer_per_cpu *cpu_buffer,
			 void *event)
{ … }
#endif

/*
 * The absolute time stamp drops the 5 MSBs and some clocks may
 * require them. The rb_fix_abs_ts() will take a previous full
 * time stamp, and add the 5 MSB of that time stamp on to the
 * saved absolute time stamp. Then they are compared in case of
 * the unlikely event that the latest time stamp incremented
 * the 5 MSB.
 */
static inline u64 rb_fix_abs_ts(u64 abs, u64 save_ts)
{ … }

static inline u64 rb_time_stamp(struct trace_buffer *buffer);

/**
 * ring_buffer_event_time_stamp - return the event's current time stamp
 * @buffer: The buffer that the event is on
 * @event: the event to get the time stamp of
 *
 * Note, this must be called after @event is reserved, and before it is
 * committed to the ring buffer. And must be called from the same
 * context where the event was reserved (normal, softirq, irq, etc).
 *
 * Returns the time stamp associated with the current event.
 * If the event has an extended time stamp, then that is used as
 * the time stamp to return.
 * In the highly unlikely case that the event was nested more than
 * the max nesting, then the write_stamp of the buffer is returned,
 * otherwise  current time is returned, but that really neither of
 * the last two cases should ever happen.
 */
u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer,
				 struct ring_buffer_event *event)
{ … }

/**
 * ring_buffer_nr_dirty_pages - get the number of used pages in the ring buffer
 * @buffer: The ring_buffer to get the number of pages from
 * @cpu: The cpu of the ring_buffer to get the number of pages from
 *
 * Returns the number of pages that have content in the ring buffer.
 */
size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu)
{ … }

static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full)
{ … }

/*
 * rb_wake_up_waiters - wake up tasks waiting for ring buffer input
 *
 * Schedules a delayed work to wake up any task that is blocked on the
 * ring buffer waiters queue.
 */
static void rb_wake_up_waiters(struct irq_work *work)
{ … }

/**
 * ring_buffer_wake_waiters - wake up any waiters on this ring buffer
 * @buffer: The ring buffer to wake waiters on
 * @cpu: The CPU buffer to wake waiters on
 *
 * In the case of a file that represents a ring buffer is closing,
 * it is prudent to wake up any waiters that are on this.
 */
void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
{ … }

static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full)
{ … }

static inline bool
rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer,
	     int cpu, int full, ring_buffer_cond_fn cond, void *data)
{ … }

struct rb_wait_data { … };

/*
 * The default wait condition for ring_buffer_wait() is to just to exit the
 * wait loop the first time it is woken up.
 */
static bool rb_wait_once(void *data)
{ … }

/**
 * ring_buffer_wait - wait for input to the ring buffer
 * @buffer: buffer to wait on
 * @cpu: the cpu buffer to wait on
 * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
 * @cond: condition function to break out of wait (NULL to run once)
 * @data: the data to pass to @cond.
 *
 * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
 * as data is added to any of the @buffer's cpu buffers. Otherwise
 * it will wait for data to be added to a specific cpu buffer.
 */
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
		     ring_buffer_cond_fn cond, void *data)
{ … }

/**
 * ring_buffer_poll_wait - poll on buffer input
 * @buffer: buffer to wait on
 * @cpu: the cpu buffer to wait on
 * @filp: the file descriptor
 * @poll_table: The poll descriptor
 * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
 *
 * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
 * as data is added to any of the @buffer's cpu buffers. Otherwise
 * it will wait for data to be added to a specific cpu buffer.
 *
 * Returns EPOLLIN | EPOLLRDNORM if data exists in the buffers,
 * zero otherwise.
 */
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
			  struct file *filp, poll_table *poll_table, int full)
{ … }

/* buffer may be either ring_buffer or ring_buffer_per_cpu */
#define RB_WARN_ON(b, cond) …

/* Up this if you want to test the TIME_EXTENTS and normalization */
#define DEBUG_SHIFT …

static inline u64 rb_time_stamp(struct trace_buffer *buffer)
{ … }

u64 ring_buffer_time_stamp(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer,
				      int cpu, u64 *ts)
{ … }
EXPORT_SYMBOL_GPL(…);

/*
 * Making the ring buffer lockless makes things tricky.
 * Although writes only happen on the CPU that they are on,
 * and they only need to worry about interrupts. Reads can
 * happen on any CPU.
 *
 * The reader page is always off the ring buffer, but when the
 * reader finishes with a page, it needs to swap its page with
 * a new one from the buffer. The reader needs to take from
 * the head (writes go to the tail). But if a writer is in overwrite
 * mode and wraps, it must push the head page forward.
 *
 * Here lies the problem.
 *
 * The reader must be careful to replace only the head page, and
 * not another one. As described at the top of the file in the
 * ASCII art, the reader sets its old page to point to the next
 * page after head. It then sets the page after head to point to
 * the old reader page. But if the writer moves the head page
 * during this operation, the reader could end up with the tail.
 *
 * We use cmpxchg to help prevent this race. We also do something
 * special with the page before head. We set the LSB to 1.
 *
 * When the writer must push the page forward, it will clear the
 * bit that points to the head page, move the head, and then set
 * the bit that points to the new head page.
 *
 * We also don't want an interrupt coming in and moving the head
 * page on another writer. Thus we use the second LSB to catch
 * that too. Thus:
 *
 * head->list->prev->next        bit 1          bit 0
 *                              -------        -------
 * Normal page                     0              0
 * Points to head page             0              1
 * New head page                   1              0
 *
 * Note we can not trust the prev pointer of the head page, because:
 *
 * +----+       +-----+        +-----+
 * |    |------>|  T  |---X--->|  N  |
 * |    |<------|     |        |     |
 * +----+       +-----+        +-----+
 *   ^                           ^ |
 *   |          +-----+          | |
 *   +----------|  R  |----------+ |
 *              |     |<-----------+
 *              +-----+
 *
 * Key:  ---X-->  HEAD flag set in pointer
 *         T      Tail page
 *         R      Reader page
 *         N      Next page
 *
 * (see __rb_reserve_next() to see where this happens)
 *
 *  What the above shows is that the reader just swapped out
 *  the reader page with a page in the buffer, but before it
 *  could make the new header point back to the new page added
 *  it was preempted by a writer. The writer moved forward onto
 *  the new page added by the reader and is about to move forward
 *  again.
 *
 *  You can see, it is legitimate for the previous pointer of
 *  the head (or any page) not to point back to itself. But only
 *  temporarily.
 */

#define RB_PAGE_NORMAL …
#define RB_PAGE_HEAD …
#define RB_PAGE_UPDATE …


#define RB_FLAG_MASK …

/* PAGE_MOVED is not part of the mask */
#define RB_PAGE_MOVED …

/*
 * rb_list_head - remove any bit
 */
static struct list_head *rb_list_head(struct list_head *list)
{ … }

/*
 * rb_is_head_page - test if the given page is the head page
 *
 * Because the reader may move the head_page pointer, we can
 * not trust what the head page is (it may be pointing to
 * the reader page). But if the next page is a header page,
 * its flags will be non zero.
 */
static inline int
rb_is_head_page(struct buffer_page *page, struct list_head *list)
{ … }

/*
 * rb_is_reader_page
 *
 * The unique thing about the reader page, is that, if the
 * writer is ever on it, the previous pointer never points
 * back to the reader page.
 */
static bool rb_is_reader_page(struct buffer_page *page)
{ … }

/*
 * rb_set_list_to_head - set a list_head to be pointing to head.
 */
static void rb_set_list_to_head(struct list_head *list)
{ … }

/*
 * rb_head_page_activate - sets up head page
 */
static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static void rb_list_head_clear(struct list_head *list)
{ … }

/*
 * rb_head_page_deactivate - clears head page ptr (for free list)
 */
static void
rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
			    struct buffer_page *head,
			    struct buffer_page *prev,
			    int old_flag, int new_flag)
{ … }

static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
				   struct buffer_page *head,
				   struct buffer_page *prev,
				   int old_flag)
{ … }

static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
				 struct buffer_page *head,
				 struct buffer_page *prev,
				 int old_flag)
{ … }

static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
				   struct buffer_page *head,
				   struct buffer_page *prev,
				   int old_flag)
{ … }

static inline void rb_inc_page(struct buffer_page **bpage)
{ … }

static struct buffer_page *
rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static bool rb_head_page_replace(struct buffer_page *old,
				struct buffer_page *new)
{ … }

/*
 * rb_tail_page_update - move the tail page forward
 */
static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
			       struct buffer_page *tail_page,
			       struct buffer_page *next_page)
{ … }

static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
			  struct buffer_page *bpage)
{ … }

/**
 * rb_check_pages - integrity check of buffer pages
 * @cpu_buffer: CPU buffer with pages to test
 *
 * As a safety measure we check to make sure the data pages have not
 * been corrupted.
 *
 * Callers of this function need to guarantee that the list of pages doesn't get
 * modified during the check. In particular, if it's possible that the function
 * is invoked with concurrent readers which can swap in a new reader page then
 * the caller should take cpu_buffer->reader_lock.
 */
static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
		long nr_pages, struct list_head *pages)
{ … }

static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
			     unsigned long nr_pages)
{ … }

static struct ring_buffer_per_cpu *
rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
{ … }

static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

/**
 * __ring_buffer_alloc - allocate a new ring_buffer
 * @size: the size in bytes per cpu that is needed.
 * @flags: attributes to set for the ring buffer.
 * @key: ring buffer reader_lock_key.
 *
 * Currently the only flag that is available is the RB_FL_OVERWRITE
 * flag. This flag means that the buffer will overwrite old data
 * when the buffer wraps. If this flag is not set, the buffer will
 * drop data when the tail hits the head.
 */
struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
					struct lock_class_key *key)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_free - free a ring buffer.
 * @buffer: the buffer to free.
 */
void
ring_buffer_free(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

void ring_buffer_set_clock(struct trace_buffer *buffer,
			   u64 (*clock)(void))
{ … }

void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs)
{ … }

bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer)
{ … }

static inline unsigned long rb_page_entries(struct buffer_page *bpage)
{ … }

static inline unsigned long rb_page_write(struct buffer_page *bpage)
{ … }

static bool
rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
{ … }

static bool
rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static void update_pages_handler(struct work_struct *work)
{ … }

/**
 * ring_buffer_resize - resize the ring buffer
 * @buffer: the buffer to resize.
 * @size: the new size.
 * @cpu_id: the cpu buffer to resize
 *
 * Minimum size is 2 * buffer->subbuf_size.
 *
 * Returns 0 on success and < 0 on failure.
 */
int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
			int cpu_id)
{ … }
EXPORT_SYMBOL_GPL(…);

void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val)
{ … }
EXPORT_SYMBOL_GPL(…);

static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
{ … }

static __always_inline struct ring_buffer_event *
rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static struct ring_buffer_event *
rb_iter_head_event(struct ring_buffer_iter *iter)
{ … }

/* Size is determined by what has been committed */
static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
{ … }

static __always_inline unsigned
rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static __always_inline unsigned
rb_event_index(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event)
{ … }

static void rb_inc_iter(struct ring_buffer_iter *iter)
{ … }

/*
 * rb_handle_head_page - writer hit the head page
 *
 * Returns: +1 to retry page
 *           0 to continue
 *          -1 on error
 */
static int
rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
		    struct buffer_page *tail_page,
		    struct buffer_page *next_page)
{ … }

static inline void
rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
	      unsigned long tail, struct rb_event_info *info)
{ … }

static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer);

/*
 * This is the slow path, force gcc not to inline it.
 */
static noinline struct ring_buffer_event *
rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
	     unsigned long tail, struct rb_event_info *info)
{ … }

/* Slow path */
static struct ring_buffer_event *
rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
		  struct ring_buffer_event *event, u64 delta, bool abs)
{ … }

#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
static inline bool sched_clock_stable(void)
{
	return true;
}
#endif

static void
rb_check_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
		   struct rb_event_info *info)
{ … }

static void rb_add_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
				      struct ring_buffer_event **event,
				      struct rb_event_info *info,
				      u64 *delta,
				      unsigned int *length)
{ … }

/**
 * rb_update_event - update event type and data
 * @cpu_buffer: The per cpu buffer of the @event
 * @event: the event to update
 * @info: The info to update the @event with (contains length and delta)
 *
 * Update the type and data fields of the @event. The length
 * is the actual size that is written to the ring buffer,
 * and with this, we can determine what to place into the
 * data field.
 */
static void
rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
		struct ring_buffer_event *event,
		struct rb_event_info *info)
{ … }

static unsigned rb_calculate_event_length(unsigned length)
{ … }

static inline bool
rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
		  struct ring_buffer_event *event)
{ … }

static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static __always_inline void
rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static inline void rb_event_discard(struct ring_buffer_event *event)
{ … }

static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static __always_inline void
rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
{ … }

#ifdef CONFIG_RING_BUFFER_RECORD_RECURSION
#define do_ring_buffer_record_recursion() …
#else
#define do_ring_buffer_record_recursion …
#endif

/*
 * The lock and unlock are done within a preempt disable section.
 * The current_context per_cpu variable can only be modified
 * by the current task between lock and unlock. But it can
 * be modified more than once via an interrupt. To pass this
 * information from the lock to the unlock without having to
 * access the 'in_interrupt()' functions again (which do show
 * a bit of overhead in something as critical as function tracing,
 * we use a bitmask trick.
 *
 *  bit 1 =  NMI context
 *  bit 2 =  IRQ context
 *  bit 3 =  SoftIRQ context
 *  bit 4 =  normal context.
 *
 * This works because this is the order of contexts that can
 * preempt other contexts. A SoftIRQ never preempts an IRQ
 * context.
 *
 * When the context is determined, the corresponding bit is
 * checked and set (if it was set, then a recursion of that context
 * happened).
 *
 * On unlock, we need to clear this bit. To do so, just subtract
 * 1 from the current_context and AND it to itself.
 *
 * (binary)
 *  101 - 1 = 100
 *  101 & 100 = 100 (clearing bit zero)
 *
 *  1010 - 1 = 1001
 *  1010 & 1001 = 1000 (clearing bit 1)
 *
 * The least significant bit can be cleared this way, and it
 * just so happens that it is the same bit corresponding to
 * the current context.
 *
 * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
 * is set when a recursion is detected at the current context, and if
 * the TRANSITION bit is already set, it will fail the recursion.
 * This is needed because there's a lag between the changing of
 * interrupt context and updating the preempt count. In this case,
 * a false positive will be found. To handle this, one extra recursion
 * is allowed, and this is done by the TRANSITION bit. If the TRANSITION
 * bit is already set, then it is considered a recursion and the function
 * ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
 *
 * On the trace_recursive_unlock(), the TRANSITION bit will be the first
 * to be cleared. Even if it wasn't the context that set it. That is,
 * if an interrupt comes in while NORMAL bit is set and the ring buffer
 * is called before preempt_count() is updated, since the check will
 * be on the NORMAL bit, the TRANSITION bit will then be set. If an
 * NMI then comes in, it will set the NMI bit, but when the NMI code
 * does the trace_recursive_unlock() it will clear the TRANSITION bit
 * and leave the NMI bit set. But this is fine, because the interrupt
 * code that set the TRANSITION bit will then clear the NMI bit when it
 * calls trace_recursive_unlock(). If another NMI comes in, it will
 * set the TRANSITION bit and continue.
 *
 * Note: The TRANSITION bit only handles a single transition between context.
 */

static __always_inline bool
trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static __always_inline void
trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

/* The recursive locking above uses 5 bits */
#define NESTED_BITS …

/**
 * ring_buffer_nest_start - Allow to trace while nested
 * @buffer: The ring buffer to modify
 *
 * The ring buffer has a safety mechanism to prevent recursion.
 * But there may be a case where a trace needs to be done while
 * tracing something else. In this case, calling this function
 * will allow this function to nest within a currently active
 * ring_buffer_lock_reserve().
 *
 * Call this function before calling another ring_buffer_lock_reserve() and
 * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit().
 */
void ring_buffer_nest_start(struct trace_buffer *buffer)
{ … }

/**
 * ring_buffer_nest_end - Allow to trace while nested
 * @buffer: The ring buffer to modify
 *
 * Must be called after ring_buffer_nest_start() and after the
 * ring_buffer_unlock_commit().
 */
void ring_buffer_nest_end(struct trace_buffer *buffer)
{ … }

/**
 * ring_buffer_unlock_commit - commit a reserved
 * @buffer: The buffer to commit to
 *
 * This commits the data to the ring buffer, and releases any locks held.
 *
 * Must be paired with ring_buffer_lock_reserve.
 */
int ring_buffer_unlock_commit(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

/* Special value to validate all deltas on a page. */
#define CHECK_FULL_PAGE …

#ifdef CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS

static const char *show_irq_str(int bits)
{ … }

/* Assume this is an trace event */
static const char *show_flags(struct ring_buffer_event *event)
{ … }

static const char *show_irq(struct ring_buffer_event *event)
{ … }

static const char *show_interrupt_level(void)
{ … }

static void dump_buffer_page(struct buffer_data_page *bpage,
			     struct rb_event_info *info,
			     unsigned long tail)
{ … }

static DEFINE_PER_CPU(atomic_t, checking);
static atomic_t ts_dump;

#define buffer_warn_return(fmt, ...) …

/*
 * Check if the current event time stamp matches the deltas on
 * the buffer page.
 */
static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
			 struct rb_event_info *info,
			 unsigned long tail)
{ … }
#else
static inline void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
			 struct rb_event_info *info,
			 unsigned long tail)
{
}
#endif /* CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS */

static struct ring_buffer_event *
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
		  struct rb_event_info *info)
{ … }

static __always_inline struct ring_buffer_event *
rb_reserve_next_event(struct trace_buffer *buffer,
		      struct ring_buffer_per_cpu *cpu_buffer,
		      unsigned long length)
{ … }

/**
 * ring_buffer_lock_reserve - reserve a part of the buffer
 * @buffer: the ring buffer to reserve from
 * @length: the length of the data to reserve (excluding event header)
 *
 * Returns a reserved event on the ring buffer to copy directly to.
 * The user of this interface will need to get the body to write into
 * and can use the ring_buffer_event_data() interface.
 *
 * The length is the length of the data needed, not the event length
 * which also includes the event header.
 *
 * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
 * If NULL is returned, then nothing has been allocated or locked.
 */
struct ring_buffer_event *
ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length)
{ … }
EXPORT_SYMBOL_GPL(…);

/*
 * Decrement the entries to the page that an event is on.
 * The event does not even need to exist, only the pointer
 * to the page it is on. This may only be called before the commit
 * takes place.
 */
static inline void
rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
		   struct ring_buffer_event *event)
{ … }

/**
 * ring_buffer_discard_commit - discard an event that has not been committed
 * @buffer: the ring buffer
 * @event: non committed event to discard
 *
 * Sometimes an event that is in the ring buffer needs to be ignored.
 * This function lets the user discard an event in the ring buffer
 * and then that event will not be read later.
 *
 * This function only works if it is called before the item has been
 * committed. It will try to free the event from the ring buffer
 * if another event has not been added behind it.
 *
 * If another event has been added behind it, it will set the event
 * up as discarded, and perform the commit.
 *
 * If this function is called, do not call ring_buffer_unlock_commit on
 * the event.
 */
void ring_buffer_discard_commit(struct trace_buffer *buffer,
				struct ring_buffer_event *event)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_write - write data to the buffer without reserving
 * @buffer: The ring buffer to write to.
 * @length: The length of the data being written (excluding the event header)
 * @data: The data to write to the buffer.
 *
 * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
 * one function. If you already have the data to write to the buffer, it
 * may be easier to simply call this function.
 *
 * Note, like ring_buffer_lock_reserve, the length is the length of the data
 * and not the length of the event which would hold the header.
 */
int ring_buffer_write(struct trace_buffer *buffer,
		      unsigned long length,
		      void *data)
{ … }
EXPORT_SYMBOL_GPL(…);

static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

/**
 * ring_buffer_record_disable - stop all writes into the buffer
 * @buffer: The ring buffer to stop writes to.
 *
 * This prevents all writes to the buffer. Any attempt to write
 * to the buffer after this will fail and return NULL.
 *
 * The caller should call synchronize_rcu() after this.
 */
void ring_buffer_record_disable(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_record_enable - enable writes to the buffer
 * @buffer: The ring buffer to enable writes
 *
 * Note, multiple disables will need the same number of enables
 * to truly enable the writing (much like preempt_disable).
 */
void ring_buffer_record_enable(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_record_off - stop all writes into the buffer
 * @buffer: The ring buffer to stop writes to.
 *
 * This prevents all writes to the buffer. Any attempt to write
 * to the buffer after this will fail and return NULL.
 *
 * This is different than ring_buffer_record_disable() as
 * it works like an on/off switch, where as the disable() version
 * must be paired with a enable().
 */
void ring_buffer_record_off(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_record_on - restart writes into the buffer
 * @buffer: The ring buffer to start writes to.
 *
 * This enables all writes to the buffer that was disabled by
 * ring_buffer_record_off().
 *
 * This is different than ring_buffer_record_enable() as
 * it works like an on/off switch, where as the enable() version
 * must be paired with a disable().
 */
void ring_buffer_record_on(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_record_is_on - return true if the ring buffer can write
 * @buffer: The ring buffer to see if write is enabled
 *
 * Returns true if the ring buffer is in a state that it accepts writes.
 */
bool ring_buffer_record_is_on(struct trace_buffer *buffer)
{ … }

/**
 * ring_buffer_record_is_set_on - return true if the ring buffer is set writable
 * @buffer: The ring buffer to see if write is set enabled
 *
 * Returns true if the ring buffer is set writable by ring_buffer_record_on().
 * Note that this does NOT mean it is in a writable state.
 *
 * It may return true when the ring buffer has been disabled by
 * ring_buffer_record_disable(), as that is a temporary disabling of
 * the ring buffer.
 */
bool ring_buffer_record_is_set_on(struct trace_buffer *buffer)
{ … }

/**
 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
 * @buffer: The ring buffer to stop writes to.
 * @cpu: The CPU buffer to stop
 *
 * This prevents all writes to the buffer. Any attempt to write
 * to the buffer after this will fail and return NULL.
 *
 * The caller should call synchronize_rcu() after this.
 */
void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_record_enable_cpu - enable writes to the buffer
 * @buffer: The ring buffer to enable writes
 * @cpu: The CPU to enable.
 *
 * Note, multiple disables will need the same number of enables
 * to truly enable the writing (much like preempt_disable).
 */
void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);

/*
 * The total entries in the ring buffer is the running counter
 * of entries entered into the ring buffer, minus the sum of
 * the entries read from the ring buffer and the number of
 * entries that were overwritten.
 */
static inline unsigned long
rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

/**
 * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer
 * @buffer: The ring buffer
 * @cpu: The per CPU buffer to read from.
 */
u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer
 * @buffer: The ring buffer
 * @cpu: The per CPU buffer to read from.
 */
unsigned long ring_buffer_bytes_cpu(struct trace_buffer *buffer, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
 * @buffer: The ring buffer
 * @cpu: The per CPU buffer to get the entries from.
 */
unsigned long ring_buffer_entries_cpu(struct trace_buffer *buffer, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_overrun_cpu - get the number of overruns caused by the ring
 * buffer wrapping around (only if RB_FL_OVERWRITE is on).
 * @buffer: The ring buffer
 * @cpu: The per CPU buffer to get the number of overruns from
 */
unsigned long ring_buffer_overrun_cpu(struct trace_buffer *buffer, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by
 * commits failing due to the buffer wrapping around while there are uncommitted
 * events, such as during an interrupt storm.
 * @buffer: The ring buffer
 * @cpu: The per CPU buffer to get the number of overruns from
 */
unsigned long
ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_dropped_events_cpu - get the number of dropped events caused by
 * the ring buffer filling up (only if RB_FL_OVERWRITE is off).
 * @buffer: The ring buffer
 * @cpu: The per CPU buffer to get the number of overruns from
 */
unsigned long
ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_read_events_cpu - get the number of events successfully read
 * @buffer: The ring buffer
 * @cpu: The per CPU buffer to get the number of events read
 */
unsigned long
ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_entries - get the number of entries in a buffer
 * @buffer: The ring buffer
 *
 * Returns the total number of entries in the ring buffer
 * (all CPU entries)
 */
unsigned long ring_buffer_entries(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_overruns - get the number of overruns in buffer
 * @buffer: The ring buffer
 *
 * Returns the total number of overruns in the ring buffer
 * (all CPU entries)
 */
unsigned long ring_buffer_overruns(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

static void rb_iter_reset(struct ring_buffer_iter *iter)
{ … }

/**
 * ring_buffer_iter_reset - reset an iterator
 * @iter: The iterator to reset
 *
 * Resets the iterator, so that it will start from the beginning
 * again.
 */
void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_iter_empty - check if an iterator has no more to read
 * @iter: The iterator to check
 */
int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
{ … }
EXPORT_SYMBOL_GPL(…);

static void
rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
		     struct ring_buffer_event *event)
{ … }

static void
rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
			  struct ring_buffer_event *event)
{ … }

static struct buffer_page *
rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static void rb_advance_iter(struct ring_buffer_iter *iter)
{ … }

static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static struct ring_buffer_event *
rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
	       unsigned long *lost_events)
{ … }
EXPORT_SYMBOL_GPL(…);

static struct ring_buffer_event *
rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
{ … }
EXPORT_SYMBOL_GPL(…);

static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static inline void
rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked)
{ … }

/**
 * ring_buffer_peek - peek at the next event to be read
 * @buffer: The ring buffer to read
 * @cpu: The cpu to peak at
 * @ts: The timestamp counter of this event.
 * @lost_events: a variable to store if events were lost (may be NULL)
 *
 * This will return the event that will be read next, but does
 * not consume the data.
 */
struct ring_buffer_event *
ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts,
		 unsigned long *lost_events)
{ … }

/** ring_buffer_iter_dropped - report if there are dropped events
 * @iter: The ring buffer iterator
 *
 * Returns true if there was dropped events since the last peek.
 */
bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_iter_peek - peek at the next event to be read
 * @iter: The ring buffer iterator
 * @ts: The timestamp counter of this event.
 *
 * This will return the event that will be read next, but does
 * not increment the iterator.
 */
struct ring_buffer_event *
ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
{ … }

/**
 * ring_buffer_consume - return an event and consume it
 * @buffer: The ring buffer to get the next event from
 * @cpu: the cpu to read the buffer from
 * @ts: a variable to store the timestamp (may be NULL)
 * @lost_events: a variable to store if events were lost (may be NULL)
 *
 * Returns the next event in the ring buffer, and that event is consumed.
 * Meaning, that sequential reads will keep returning a different event,
 * and eventually empty the ring buffer if the producer is slower.
 */
struct ring_buffer_event *
ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts,
		    unsigned long *lost_events)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
 * @buffer: The ring buffer to read from
 * @cpu: The cpu buffer to iterate over
 * @flags: gfp flags to use for memory allocation
 *
 * This performs the initial preparations necessary to iterate
 * through the buffer.  Memory is allocated, buffer resizing
 * is disabled, and the iterator pointer is returned to the caller.
 *
 * After a sequence of ring_buffer_read_prepare calls, the user is
 * expected to make at least one call to ring_buffer_read_prepare_sync.
 * Afterwards, ring_buffer_read_start is invoked to get things going
 * for real.
 *
 * This overall must be paired with ring_buffer_read_finish.
 */
struct ring_buffer_iter *
ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
 *
 * All previously invoked ring_buffer_read_prepare calls to prepare
 * iterators will be synchronized.  Afterwards, read_buffer_read_start
 * calls on those iterators are allowed.
 */
void
ring_buffer_read_prepare_sync(void)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_read_start - start a non consuming read of the buffer
 * @iter: The iterator returned by ring_buffer_read_prepare
 *
 * This finalizes the startup of an iteration through the buffer.
 * The iterator comes from a call to ring_buffer_read_prepare and
 * an intervening ring_buffer_read_prepare_sync must have been
 * performed.
 *
 * Must be paired with ring_buffer_read_finish.
 */
void
ring_buffer_read_start(struct ring_buffer_iter *iter)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_read_finish - finish reading the iterator of the buffer
 * @iter: The iterator retrieved by ring_buffer_start
 *
 * This re-enables resizing of the buffer, and frees the iterator.
 */
void
ring_buffer_read_finish(struct ring_buffer_iter *iter)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_iter_advance - advance the iterator to the next location
 * @iter: The ring buffer iterator
 *
 * Move the location of the iterator such that the next read will
 * be the next location of the iterator.
 */
void ring_buffer_iter_advance(struct ring_buffer_iter *iter)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_size - return the size of the ring buffer (in bytes)
 * @buffer: The ring buffer.
 * @cpu: The CPU to get ring buffer size from.
 */
unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_max_event_size - return the max data size of an event
 * @buffer: The ring buffer.
 *
 * Returns the maximum size an event can be.
 */
unsigned long ring_buffer_max_event_size(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

static void rb_clear_buffer_page(struct buffer_page *page)
{ … }

static void rb_update_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static void
rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

/* Must have disabled the cpu buffer then done a synchronize_rcu */
static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

/**
 * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
 * @buffer: The ring buffer to reset a per cpu buffer of
 * @cpu: The CPU buffer to be reset
 */
void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);

/* Flag to ensure proper resetting of atomic variables */
#define RESET_BIT …

/**
 * ring_buffer_reset_online_cpus - reset a ring buffer per CPU buffer
 * @buffer: The ring buffer to reset a per cpu buffer of
 */
void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
{ … }

/**
 * ring_buffer_reset - reset a ring buffer
 * @buffer: The ring buffer to reset all cpu buffers
 */
void ring_buffer_reset(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_empty - is the ring buffer empty?
 * @buffer: The ring buffer to test
 */
bool ring_buffer_empty(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
 * @buffer: The ring buffer
 * @cpu: The CPU buffer to test
 */
bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);

#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
/**
 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
 * @buffer_a: One buffer to swap with
 * @buffer_b: The other buffer to swap with
 * @cpu: the CPU of the buffers to swap
 *
 * This function is useful for tracers that want to take a "snapshot"
 * of a CPU buffer and has another back up buffer lying around.
 * it is expected that the tracer handles the cpu buffer not being
 * used at the moment.
 */
int ring_buffer_swap_cpu(struct trace_buffer *buffer_a,
			 struct trace_buffer *buffer_b, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);
#endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */

/**
 * ring_buffer_alloc_read_page - allocate a page to read from buffer
 * @buffer: the buffer to allocate for.
 * @cpu: the cpu buffer to allocate.
 *
 * This function is used in conjunction with ring_buffer_read_page.
 * When reading a full page from the ring buffer, these functions
 * can be used to speed up the process. The calling function should
 * allocate a few pages first with this function. Then when it
 * needs to get pages from the ring buffer, it passes the result
 * of this function into ring_buffer_read_page, which will swap
 * the page that was allocated, with the read page of the buffer.
 *
 * Returns:
 *  The page allocated, or ERR_PTR
 */
struct buffer_data_read_page *
ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_free_read_page - free an allocated read page
 * @buffer: the buffer the page was allocate for
 * @cpu: the cpu buffer the page came from
 * @data_page: the page to free
 *
 * Free a page allocated from ring_buffer_alloc_read_page.
 */
void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu,
				struct buffer_data_read_page *data_page)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_read_page - extract a page from the ring buffer
 * @buffer: buffer to extract from
 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
 * @len: amount to extract
 * @cpu: the cpu of the buffer to extract
 * @full: should the extraction only happen when the page is full.
 *
 * This function will pull out a page from the ring buffer and consume it.
 * @data_page must be the address of the variable that was returned
 * from ring_buffer_alloc_read_page. This is because the page might be used
 * to swap with a page in the ring buffer.
 *
 * for example:
 *	rpage = ring_buffer_alloc_read_page(buffer, cpu);
 *	if (IS_ERR(rpage))
 *		return PTR_ERR(rpage);
 *	ret = ring_buffer_read_page(buffer, rpage, len, cpu, 0);
 *	if (ret >= 0)
 *		process_page(ring_buffer_read_page_data(rpage), ret);
 *	ring_buffer_free_read_page(buffer, cpu, rpage);
 *
 * When @full is set, the function will not return true unless
 * the writer is off the reader page.
 *
 * Note: it is up to the calling functions to handle sleeps and wakeups.
 *  The ring buffer can be used anywhere in the kernel and can not
 *  blindly call wake_up. The layer that uses the ring buffer must be
 *  responsible for that.
 *
 * Returns:
 *  >=0 if data has been transferred, returns the offset of consumed data.
 *  <0 if no data has been transferred.
 */
int ring_buffer_read_page(struct trace_buffer *buffer,
			  struct buffer_data_read_page *data_page,
			  size_t len, int cpu, int full)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_read_page_data - get pointer to the data in the page.
 * @page:  the page to get the data from
 *
 * Returns pointer to the actual data in this page.
 */
void *ring_buffer_read_page_data(struct buffer_data_read_page *page)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_subbuf_size_get - get size of the sub buffer.
 * @buffer: the buffer to get the sub buffer size from
 *
 * Returns size of the sub buffer, in bytes.
 */
int ring_buffer_subbuf_size_get(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_subbuf_order_get - get order of system sub pages in one buffer page.
 * @buffer: The ring_buffer to get the system sub page order from
 *
 * By default, one ring buffer sub page equals to one system page. This parameter
 * is configurable, per ring buffer. The size of the ring buffer sub page can be
 * extended, but must be an order of system page size.
 *
 * Returns the order of buffer sub page size, in system pages:
 * 0 means the sub buffer size is 1 system page and so forth.
 * In case of an error < 0 is returned.
 */
int ring_buffer_subbuf_order_get(struct trace_buffer *buffer)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * ring_buffer_subbuf_order_set - set the size of ring buffer sub page.
 * @buffer: The ring_buffer to set the new page size.
 * @order: Order of the system pages in one sub buffer page
 *
 * By default, one ring buffer pages equals to one system page. This API can be
 * used to set new size of the ring buffer page. The size must be order of
 * system page size, that's why the input parameter @order is the order of
 * system pages that are allocated for one ring buffer page:
 *  0 - 1 system page
 *  1 - 2 system pages
 *  3 - 4 system pages
 *  ...
 *
 * Returns 0 on success or < 0 in case of an error.
 */
int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
{ … }
EXPORT_SYMBOL_GPL(…);

static int rb_alloc_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static void rb_free_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer,
				   unsigned long *subbuf_ids)
{ … }

static struct ring_buffer_per_cpu *
rb_get_mapped_buffer(struct trace_buffer *buffer, int cpu)
{ … }

static void rb_put_mapped_buffer(struct ring_buffer_per_cpu *cpu_buffer)
{ … }

/*
 * Fast-path for rb_buffer_(un)map(). Called whenever the meta-page doesn't need
 * to be set-up or torn-down.
 */
static int __rb_inc_dec_mapped(struct ring_buffer_per_cpu *cpu_buffer,
			       bool inc)
{ … }

/*
 *   +--------------+  pgoff == 0
 *   |   meta page  |
 *   +--------------+  pgoff == 1
 *   | subbuffer 0  |
 *   |              |
 *   +--------------+  pgoff == (1 + (1 << subbuf_order))
 *   | subbuffer 1  |
 *   |              |
 *         ...
 */
#ifdef CONFIG_MMU
static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
			struct vm_area_struct *vma)
{ … }
#else
static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
			struct vm_area_struct *vma)
{
	return -EOPNOTSUPP;
}
#endif

int ring_buffer_map(struct trace_buffer *buffer, int cpu,
		    struct vm_area_struct *vma)
{ … }

int ring_buffer_unmap(struct trace_buffer *buffer, int cpu)
{ … }

int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu)
{ … }

/*
 * We only allocate new buffers, never free them if the CPU goes down.
 * If we were to free the buffer, then the user would lose any trace that was in
 * the buffer.
 */
int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node)
{ … }

#ifdef CONFIG_RING_BUFFER_STARTUP_TEST
/*
 * This is a basic integrity check of the ring buffer.
 * Late in the boot cycle this test will run when configured in.
 * It will kick off a thread per CPU that will go into a loop
 * writing to the per cpu ring buffer various sizes of data.
 * Some of the data will be large items, some small.
 *
 * Another thread is created that goes into a spin, sending out
 * IPIs to the other CPUs to also write into the ring buffer.
 * this is to test the nesting ability of the buffer.
 *
 * Basic stats are recorded and reported. If something in the
 * ring buffer should happen that's not expected, a big warning
 * is displayed and all ring buffers are disabled.
 */
static struct task_struct *rb_threads[NR_CPUS] __initdata;

struct rb_test_data { … };

static struct rb_test_data rb_data[NR_CPUS] __initdata;

/* 1 meg per cpu */
#define RB_TEST_BUFFER_SIZE …

static char rb_string[] __initdata = …;

static bool rb_test_started __initdata;

struct rb_item { … };

static __init int rb_write_something(struct rb_test_data *data, bool nested)
{ … }

static __init int rb_test(void *arg)
{ … }

static __init void rb_ipi(void *ignore)
{ … }

static __init int rb_hammer_test(void *arg)
{ … }

static __init int test_ringbuffer(void)
{ … }

late_initcall(test_ringbuffer);
#endif /* CONFIG_RING_BUFFER_STARTUP_TEST */
linux/kernel/trace/ring_buffer.c