// SPDX-License-Identifier: GPL-2.0 /* * Generic ring buffer * * Copyright (C) 2008 Steven Rostedt <[email protected]> */ #include <linux/trace_recursion.h> #include <linux/trace_events.h> #include <linux/ring_buffer.h> #include <linux/trace_clock.h> #include <linux/sched/clock.h> #include <linux/cacheflush.h> #include <linux/trace_seq.h> #include <linux/spinlock.h> #include <linux/irq_work.h> #include <linux/security.h> #include <linux/uaccess.h> #include <linux/hardirq.h> #include <linux/kthread.h> /* for self test */ #include <linux/module.h> #include <linux/percpu.h> #include <linux/mutex.h> #include <linux/delay.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/hash.h> #include <linux/list.h> #include <linux/cpu.h> #include <linux/oom.h> #include <linux/mm.h> #include <asm/local64.h> #include <asm/local.h> #include "trace.h" /* * The "absolute" timestamp in the buffer is only 59 bits. * If a clock has the 5 MSBs set, it needs to be saved and * reinserted. */ #define TS_MSB … #define ABS_TS_MASK … static void update_pages_handler(struct work_struct *work); #define RING_BUFFER_META_MAGIC … struct ring_buffer_meta { … }; /* * The ring buffer header is special. We must manually up keep it. */ int ring_buffer_print_entry_header(struct trace_seq *s) { … } /* * The ring buffer is made up of a list of pages. A separate list of pages is * allocated for each CPU. A writer may only write to a buffer that is * associated with the CPU it is currently executing on. A reader may read * from any per cpu buffer. * * The reader is special. For each per cpu buffer, the reader has its own * reader page. When a reader has read the entire reader page, this reader * page is swapped with another page in the ring buffer. * * Now, as long as the writer is off the reader page, the reader can do what * ever it wants with that page. The writer will never write to that page * again (as long as it is out of the ring buffer). * * Here's some silly ASCII art. * * +------+ * |reader| RING BUFFER * |page | * +------+ +---+ +---+ +---+ * | |-->| |-->| | * +---+ +---+ +---+ * ^ | * | | * +---------------+ * * * +------+ * |reader| RING BUFFER * |page |------------------v * +------+ +---+ +---+ +---+ * | |-->| |-->| | * +---+ +---+ +---+ * ^ | * | | * +---------------+ * * * +------+ * |reader| RING BUFFER * |page |------------------v * +------+ +---+ +---+ +---+ * ^ | |-->| |-->| | * | +---+ +---+ +---+ * | | * | | * +------------------------------+ * * * +------+ * |buffer| RING BUFFER * |page |------------------v * +------+ +---+ +---+ +---+ * ^ | | | |-->| | * | New +---+ +---+ +---+ * | Reader------^ | * | page | * +------------------------------+ * * * After we make this swap, the reader can hand this page off to the splice * code and be done with it. It can even allocate a new page if it needs to * and swap that into the ring buffer. * * We will be using cmpxchg soon to make all this lockless. * */ /* Used for individual buffers (after the counter) */ #define RB_BUFFER_OFF … #define BUF_PAGE_HDR_SIZE … #define RB_EVNT_HDR_SIZE … #define RB_ALIGNMENT … #define RB_MAX_SMALL_DATA … #define RB_EVNT_MIN_SIZE … #ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS #define RB_FORCE_8BYTE_ALIGNMENT … #define RB_ARCH_ALIGNMENT … #else #define RB_FORCE_8BYTE_ALIGNMENT … #define RB_ARCH_ALIGNMENT … #endif #define RB_ALIGN_DATA … /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ #define RINGBUF_TYPE_DATA … enum { … }; #define skip_time_extend(event) … #define extended_time(event) … static inline bool rb_null_event(struct ring_buffer_event *event) { … } static void rb_event_set_padding(struct ring_buffer_event *event) { … } static unsigned rb_event_data_length(struct ring_buffer_event *event) { … } /* * Return the length of the given event. Will return * the length of the time extend if the event is a * time extend. */ static inline unsigned rb_event_length(struct ring_buffer_event *event) { … } /* * Return total length of time extend and data, * or just the event length for all other events. */ static inline unsigned rb_event_ts_length(struct ring_buffer_event *event) { … } /** * ring_buffer_event_length - return the length of the event * @event: the event to get the length of * * Returns the size of the data load of a data event. * If the event is something other than a data event, it * returns the size of the event itself. With the exception * of a TIME EXTEND, where it still returns the size of the * data load of the data event after it. */ unsigned ring_buffer_event_length(struct ring_buffer_event *event) { … } EXPORT_SYMBOL_GPL(…); /* inline for ring buffer fast paths */ static __always_inline void * rb_event_data(struct ring_buffer_event *event) { … } /** * ring_buffer_event_data - return the data of the event * @event: the event to get the data from */ void *ring_buffer_event_data(struct ring_buffer_event *event) { … } EXPORT_SYMBOL_GPL(…); #define for_each_buffer_cpu(buffer, cpu) … #define for_each_online_buffer_cpu(buffer, cpu) … #define TS_SHIFT … #define TS_MASK … #define TS_DELTA_TEST … static u64 rb_event_time_stamp(struct ring_buffer_event *event) { … } /* Flag when events were overwritten */ #define RB_MISSED_EVENTS … /* Missed count stored at end */ #define RB_MISSED_STORED … #define RB_MISSED_MASK … struct buffer_data_page { … }; struct buffer_data_read_page { … }; /* * Note, the buffer_page list must be first. The buffer pages * are allocated in cache lines, which means that each buffer * page will be at the beginning of a cache line, and thus * the least significant bits will be zero. We use this to * add flags in the list struct pointers, to make the ring buffer * lockless. */ struct buffer_page { … }; /* * The buffer page counters, write and entries, must be reset * atomically when crossing page boundaries. To synchronize this * update, two counters are inserted into the number. One is * the actual counter for the write position or count on the page. * * The other is a counter of updaters. Before an update happens * the update partition of the counter is incremented. This will * allow the updater to update the counter atomically. * * The counter is 20 bits, and the state data is 12. */ #define RB_WRITE_MASK … #define RB_WRITE_INTCNT … static void rb_init_page(struct buffer_data_page *bpage) { … } static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) { … } static void free_buffer_page(struct buffer_page *bpage) { … } /* * We need to fit the time_stamp delta into 27 bits. */ static inline bool test_time_stamp(u64 delta) { … } struct rb_irq_work { … }; /* * Structure to hold event state and handle nested events. */ struct rb_event_info { … }; /* * Used for the add_timestamp * NONE * EXTEND - wants a time extend * ABSOLUTE - the buffer requests all events to have absolute time stamps * FORCE - force a full time stamp. */ enum { … }; /* * Used for which event context the event is in. * TRANSITION = 0 * NMI = 1 * IRQ = 2 * SOFTIRQ = 3 * NORMAL = 4 * * See trace_recursive_lock() comment below for more details. */ enum { … }; struct rb_time_struct { … }; rb_time_t; #define MAX_NEST … /* * head_page == tail_page && head == tail then buffer is empty. */ struct ring_buffer_per_cpu { … }; struct trace_buffer { … }; struct ring_buffer_iter { … }; int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq *s) { … } static inline void rb_time_read(rb_time_t *t, u64 *ret) { … } static void rb_time_set(rb_time_t *t, u64 val) { … } /* * Enable this to make sure that the event passed to * ring_buffer_event_time_stamp() is not committed and also * is on the buffer that it passed in. */ //#define RB_VERIFY_EVENT #ifdef RB_VERIFY_EVENT static struct list_head *rb_list_head(struct list_head *list); static void verify_event(struct ring_buffer_per_cpu *cpu_buffer, void *event) { struct buffer_page *page = cpu_buffer->commit_page; struct buffer_page *tail_page = READ_ONCE(cpu_buffer->tail_page); struct list_head *next; long commit, write; unsigned long addr = (unsigned long)event; bool done = false; int stop = 0; /* Make sure the event exists and is not committed yet */ do { if (page == tail_page || WARN_ON_ONCE(stop++ > 100)) done = true; commit = local_read(&page->page->commit); write = local_read(&page->write); if (addr >= (unsigned long)&page->page->data[commit] && addr < (unsigned long)&page->page->data[write]) return; next = rb_list_head(page->list.next); page = list_entry(next, struct buffer_page, list); } while (!done); WARN_ON_ONCE(1); } #else static inline void verify_event(struct ring_buffer_per_cpu *cpu_buffer, void *event) { … } #endif /* * The absolute time stamp drops the 5 MSBs and some clocks may * require them. The rb_fix_abs_ts() will take a previous full * time stamp, and add the 5 MSB of that time stamp on to the * saved absolute time stamp. Then they are compared in case of * the unlikely event that the latest time stamp incremented * the 5 MSB. */ static inline u64 rb_fix_abs_ts(u64 abs, u64 save_ts) { … } static inline u64 rb_time_stamp(struct trace_buffer *buffer); /** * ring_buffer_event_time_stamp - return the event's current time stamp * @buffer: The buffer that the event is on * @event: the event to get the time stamp of * * Note, this must be called after @event is reserved, and before it is * committed to the ring buffer. And must be called from the same * context where the event was reserved (normal, softirq, irq, etc). * * Returns the time stamp associated with the current event. * If the event has an extended time stamp, then that is used as * the time stamp to return. * In the highly unlikely case that the event was nested more than * the max nesting, then the write_stamp of the buffer is returned, * otherwise current time is returned, but that really neither of * the last two cases should ever happen. */ u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *event) { … } /** * ring_buffer_nr_dirty_pages - get the number of used pages in the ring buffer * @buffer: The ring_buffer to get the number of pages from * @cpu: The cpu of the ring_buffer to get the number of pages from * * Returns the number of pages that have content in the ring buffer. */ size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) { … } static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full) { … } /* * rb_wake_up_waiters - wake up tasks waiting for ring buffer input * * Schedules a delayed work to wake up any task that is blocked on the * ring buffer waiters queue. */ static void rb_wake_up_waiters(struct irq_work *work) { … } /** * ring_buffer_wake_waiters - wake up any waiters on this ring buffer * @buffer: The ring buffer to wake waiters on * @cpu: The CPU buffer to wake waiters on * * In the case of a file that represents a ring buffer is closing, * it is prudent to wake up any waiters that are on this. */ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) { … } static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) { … } static inline bool rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer, int cpu, int full, ring_buffer_cond_fn cond, void *data) { … } struct rb_wait_data { … }; /* * The default wait condition for ring_buffer_wait() is to just to exit the * wait loop the first time it is woken up. */ static bool rb_wait_once(void *data) { … } /** * ring_buffer_wait - wait for input to the ring buffer * @buffer: buffer to wait on * @cpu: the cpu buffer to wait on * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS * @cond: condition function to break out of wait (NULL to run once) * @data: the data to pass to @cond. * * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. */ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full, ring_buffer_cond_fn cond, void *data) { … } /** * ring_buffer_poll_wait - poll on buffer input * @buffer: buffer to wait on * @cpu: the cpu buffer to wait on * @filp: the file descriptor * @poll_table: The poll descriptor * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS * * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. * * Returns EPOLLIN | EPOLLRDNORM if data exists in the buffers, * zero otherwise. */ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full) { … } /* buffer may be either ring_buffer or ring_buffer_per_cpu */ #define RB_WARN_ON(b, cond) … /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT … static inline u64 rb_time_stamp(struct trace_buffer *buffer) { … } u64 ring_buffer_time_stamp(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer, int cpu, u64 *ts) { … } EXPORT_SYMBOL_GPL(…); /* * Making the ring buffer lockless makes things tricky. * Although writes only happen on the CPU that they are on, * and they only need to worry about interrupts. Reads can * happen on any CPU. * * The reader page is always off the ring buffer, but when the * reader finishes with a page, it needs to swap its page with * a new one from the buffer. The reader needs to take from * the head (writes go to the tail). But if a writer is in overwrite * mode and wraps, it must push the head page forward. * * Here lies the problem. * * The reader must be careful to replace only the head page, and * not another one. As described at the top of the file in the * ASCII art, the reader sets its old page to point to the next * page after head. It then sets the page after head to point to * the old reader page. But if the writer moves the head page * during this operation, the reader could end up with the tail. * * We use cmpxchg to help prevent this race. We also do something * special with the page before head. We set the LSB to 1. * * When the writer must push the page forward, it will clear the * bit that points to the head page, move the head, and then set * the bit that points to the new head page. * * We also don't want an interrupt coming in and moving the head * page on another writer. Thus we use the second LSB to catch * that too. Thus: * * head->list->prev->next bit 1 bit 0 * ------- ------- * Normal page 0 0 * Points to head page 0 1 * New head page 1 0 * * Note we can not trust the prev pointer of the head page, because: * * +----+ +-----+ +-----+ * | |------>| T |---X--->| N | * | |<------| | | | * +----+ +-----+ +-----+ * ^ ^ | * | +-----+ | | * +----------| R |----------+ | * | |<-----------+ * +-----+ * * Key: ---X--> HEAD flag set in pointer * T Tail page * R Reader page * N Next page * * (see __rb_reserve_next() to see where this happens) * * What the above shows is that the reader just swapped out * the reader page with a page in the buffer, but before it * could make the new header point back to the new page added * it was preempted by a writer. The writer moved forward onto * the new page added by the reader and is about to move forward * again. * * You can see, it is legitimate for the previous pointer of * the head (or any page) not to point back to itself. But only * temporarily. */ #define RB_PAGE_NORMAL … #define RB_PAGE_HEAD … #define RB_PAGE_UPDATE … #define RB_FLAG_MASK … /* PAGE_MOVED is not part of the mask */ #define RB_PAGE_MOVED … /* * rb_list_head - remove any bit */ static struct list_head *rb_list_head(struct list_head *list) { … } /* * rb_is_head_page - test if the given page is the head page * * Because the reader may move the head_page pointer, we can * not trust what the head page is (it may be pointing to * the reader page). But if the next page is a header page, * its flags will be non zero. */ static inline int rb_is_head_page(struct buffer_page *page, struct list_head *list) { … } /* * rb_is_reader_page * * The unique thing about the reader page, is that, if the * writer is ever on it, the previous pointer never points * back to the reader page. */ static bool rb_is_reader_page(struct buffer_page *page) { … } /* * rb_set_list_to_head - set a list_head to be pointing to head. */ static void rb_set_list_to_head(struct list_head *list) { … } /* * rb_head_page_activate - sets up head page */ static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) { … } static void rb_list_head_clear(struct list_head *list) { … } /* * rb_head_page_deactivate - clears head page ptr (for free list) */ static void rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer) { … } static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *head, struct buffer_page *prev, int old_flag, int new_flag) { … } static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *head, struct buffer_page *prev, int old_flag) { … } static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *head, struct buffer_page *prev, int old_flag) { … } static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *head, struct buffer_page *prev, int old_flag) { … } static inline void rb_inc_page(struct buffer_page **bpage) { … } static struct buffer_page * rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) { … } static bool rb_head_page_replace(struct buffer_page *old, struct buffer_page *new) { … } /* * rb_tail_page_update - move the tail page forward */ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *tail_page, struct buffer_page *next_page) { … } static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *bpage) { … } /** * rb_check_pages - integrity check of buffer pages * @cpu_buffer: CPU buffer with pages to test * * As a safety measure we check to make sure the data pages have not * been corrupted. * * Callers of this function need to guarantee that the list of pages doesn't get * modified during the check. In particular, if it's possible that the function * is invoked with concurrent readers which can swap in a new reader page then * the caller should take cpu_buffer->reader_lock. */ static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) { … } /* * Take an address, add the meta data size as well as the array of * array subbuffer indexes, then align it to a subbuffer size. * * This is used to help find the next per cpu subbuffer within a mapped range. */ static unsigned long rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs) { … } /* * Return the ring_buffer_meta for a given @cpu. */ static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int cpu) { … } /* Return the start of subbufs given the meta pointer */ static void *rb_subbufs_from_meta(struct ring_buffer_meta *meta) { … } /* * Return a specific sub-buffer for a given @cpu defined by @idx. */ static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx) { … } /* * See if the existing memory contains valid ring buffer data. * As the previous kernel must be the same as this kernel, all * the calculations (size of buffers and number of buffers) * must be the same. */ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, struct trace_buffer *buffer, int nr_pages) { … } static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf); static int rb_read_data_buffer(struct buffer_data_page *dpage, int tail, int cpu, unsigned long long *timestamp, u64 *delta_ptr) { … } static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu) { … } /* If the meta data has been validated, now validate the events */ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) { … } /* Used to calculate data delta */ static char rb_data_ptr[] = …; #define THIS_TEXT_PTR … #define THIS_DATA_PTR … static void rb_meta_init_text_addr(struct ring_buffer_meta *meta) { … } static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages) { … } static void *rbm_start(struct seq_file *m, loff_t *pos) { … } static void *rbm_next(struct seq_file *m, void *v, loff_t *pos) { … } static int rbm_show(struct seq_file *m, void *v) { … } static void rbm_stop(struct seq_file *m, void *p) { … } static const struct seq_operations rb_meta_seq_ops = …; int ring_buffer_meta_seq_init(struct file *file, struct trace_buffer *buffer, int cpu) { … } /* Map the buffer_pages to the previous head and commit pages */ static void rb_meta_buffer_update(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *bpage) { … } static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, long nr_pages, struct list_head *pages) { … } static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) { … } static struct ring_buffer_per_cpu * rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) { … } static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) { … } static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, int order, unsigned long start, unsigned long end, struct lock_class_key *key) { … } /** * __ring_buffer_alloc - allocate a new ring_buffer * @size: the size in bytes per cpu that is needed. * @flags: attributes to set for the ring buffer. * @key: ring buffer reader_lock_key. * * Currently the only flag that is available is the RB_FL_OVERWRITE * flag. This flag means that the buffer will overwrite old data * when the buffer wraps. If this flag is not set, the buffer will * drop data when the tail hits the head. */ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key) { … } EXPORT_SYMBOL_GPL(…); /** * __ring_buffer_alloc_range - allocate a new ring_buffer from existing memory * @size: the size in bytes per cpu that is needed. * @flags: attributes to set for the ring buffer. * @start: start of allocated range * @range_size: size of allocated range * @order: sub-buffer order * @key: ring buffer reader_lock_key. * * Currently the only flag that is available is the RB_FL_OVERWRITE * flag. This flag means that the buffer will overwrite old data * when the buffer wraps. If this flag is not set, the buffer will * drop data when the tail hits the head. */ struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags, int order, unsigned long start, unsigned long range_size, struct lock_class_key *key) { … } /** * ring_buffer_last_boot_delta - return the delta offset from last boot * @buffer: The buffer to return the delta from * @text: Return text delta * @data: Return data delta * * Returns: The true if the delta is non zero */ bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text, long *data) { … } /** * ring_buffer_free - free a ring buffer. * @buffer: the buffer to free. */ void ring_buffer_free(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); void ring_buffer_set_clock(struct trace_buffer *buffer, u64 (*clock)(void)) { … } void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs) { … } bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer) { … } static inline unsigned long rb_page_entries(struct buffer_page *bpage) { … } static inline unsigned long rb_page_write(struct buffer_page *bpage) { … } static bool rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) { … } static bool rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) { … } static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer) { … } static void update_pages_handler(struct work_struct *work) { … } /** * ring_buffer_resize - resize the ring buffer * @buffer: the buffer to resize. * @size: the new size. * @cpu_id: the cpu buffer to resize * * Minimum size is 2 * buffer->subbuf_size. * * Returns 0 on success and < 0 on failure. */ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, int cpu_id) { … } EXPORT_SYMBOL_GPL(…); void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val) { … } EXPORT_SYMBOL_GPL(…); static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) { … } static __always_inline struct ring_buffer_event * rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) { … } static struct ring_buffer_event * rb_iter_head_event(struct ring_buffer_iter *iter) { … } /* Size is determined by what has been committed */ static __always_inline unsigned rb_page_size(struct buffer_page *bpage) { … } static __always_inline unsigned rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) { … } static __always_inline unsigned rb_event_index(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { … } static void rb_inc_iter(struct ring_buffer_iter *iter) { … } /* Return the index into the sub-buffers for a given sub-buffer */ static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf) { … } static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *next_page) { … } static void rb_update_meta_reader(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *reader) { … } /* * rb_handle_head_page - writer hit the head page * * Returns: +1 to retry page * 0 to continue * -1 on error */ static int rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *tail_page, struct buffer_page *next_page) { … } static inline void rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, unsigned long tail, struct rb_event_info *info) { … } static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer); /* * This is the slow path, force gcc not to inline it. */ static noinline struct ring_buffer_event * rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, unsigned long tail, struct rb_event_info *info) { … } /* Slow path */ static struct ring_buffer_event * rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event, u64 delta, bool abs) { … } #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK static inline bool sched_clock_stable(void) { return true; } #endif static void rb_check_timestamp(struct ring_buffer_per_cpu *cpu_buffer, struct rb_event_info *info) { … } static void rb_add_timestamp(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event **event, struct rb_event_info *info, u64 *delta, unsigned int *length) { … } /** * rb_update_event - update event type and data * @cpu_buffer: The per cpu buffer of the @event * @event: the event to update * @info: The info to update the @event with (contains length and delta) * * Update the type and data fields of the @event. The length * is the actual size that is written to the ring buffer, * and with this, we can determine what to place into the * data field. */ static void rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event, struct rb_event_info *info) { … } static unsigned rb_calculate_event_length(unsigned length) { … } static inline bool rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { … } static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) { … } static __always_inline void rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) { … } static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) { … } static inline void rb_event_discard(struct ring_buffer_event *event) { … } static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer) { … } static __always_inline void rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) { … } #ifdef CONFIG_RING_BUFFER_RECORD_RECURSION #define do_ring_buffer_record_recursion() … #else #define do_ring_buffer_record_recursion … #endif /* * The lock and unlock are done within a preempt disable section. * The current_context per_cpu variable can only be modified * by the current task between lock and unlock. But it can * be modified more than once via an interrupt. To pass this * information from the lock to the unlock without having to * access the 'in_interrupt()' functions again (which do show * a bit of overhead in something as critical as function tracing, * we use a bitmask trick. * * bit 1 = NMI context * bit 2 = IRQ context * bit 3 = SoftIRQ context * bit 4 = normal context. * * This works because this is the order of contexts that can * preempt other contexts. A SoftIRQ never preempts an IRQ * context. * * When the context is determined, the corresponding bit is * checked and set (if it was set, then a recursion of that context * happened). * * On unlock, we need to clear this bit. To do so, just subtract * 1 from the current_context and AND it to itself. * * (binary) * 101 - 1 = 100 * 101 & 100 = 100 (clearing bit zero) * * 1010 - 1 = 1001 * 1010 & 1001 = 1000 (clearing bit 1) * * The least significant bit can be cleared this way, and it * just so happens that it is the same bit corresponding to * the current context. * * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit * is set when a recursion is detected at the current context, and if * the TRANSITION bit is already set, it will fail the recursion. * This is needed because there's a lag between the changing of * interrupt context and updating the preempt count. In this case, * a false positive will be found. To handle this, one extra recursion * is allowed, and this is done by the TRANSITION bit. If the TRANSITION * bit is already set, then it is considered a recursion and the function * ends. Otherwise, the TRANSITION bit is set, and that bit is returned. * * On the trace_recursive_unlock(), the TRANSITION bit will be the first * to be cleared. Even if it wasn't the context that set it. That is, * if an interrupt comes in while NORMAL bit is set and the ring buffer * is called before preempt_count() is updated, since the check will * be on the NORMAL bit, the TRANSITION bit will then be set. If an * NMI then comes in, it will set the NMI bit, but when the NMI code * does the trace_recursive_unlock() it will clear the TRANSITION bit * and leave the NMI bit set. But this is fine, because the interrupt * code that set the TRANSITION bit will then clear the NMI bit when it * calls trace_recursive_unlock(). If another NMI comes in, it will * set the TRANSITION bit and continue. * * Note: The TRANSITION bit only handles a single transition between context. */ static __always_inline bool trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { … } static __always_inline void trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) { … } /* The recursive locking above uses 5 bits */ #define NESTED_BITS … /** * ring_buffer_nest_start - Allow to trace while nested * @buffer: The ring buffer to modify * * The ring buffer has a safety mechanism to prevent recursion. * But there may be a case where a trace needs to be done while * tracing something else. In this case, calling this function * will allow this function to nest within a currently active * ring_buffer_lock_reserve(). * * Call this function before calling another ring_buffer_lock_reserve() and * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit(). */ void ring_buffer_nest_start(struct trace_buffer *buffer) { … } /** * ring_buffer_nest_end - Allow to trace while nested * @buffer: The ring buffer to modify * * Must be called after ring_buffer_nest_start() and after the * ring_buffer_unlock_commit(). */ void ring_buffer_nest_end(struct trace_buffer *buffer) { … } /** * ring_buffer_unlock_commit - commit a reserved * @buffer: The buffer to commit to * * This commits the data to the ring buffer, and releases any locks held. * * Must be paired with ring_buffer_lock_reserve. */ int ring_buffer_unlock_commit(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); /* Special value to validate all deltas on a page. */ #define CHECK_FULL_PAGE … #ifdef CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS static const char *show_irq_str(int bits) { … } /* Assume this is an trace event */ static const char *show_flags(struct ring_buffer_event *event) { … } static const char *show_irq(struct ring_buffer_event *event) { … } static const char *show_interrupt_level(void) { … } static void dump_buffer_page(struct buffer_data_page *bpage, struct rb_event_info *info, unsigned long tail) { … } static DEFINE_PER_CPU(atomic_t, checking); static atomic_t ts_dump; #define buffer_warn_return(fmt, ...) … /* * Check if the current event time stamp matches the deltas on * the buffer page. */ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, struct rb_event_info *info, unsigned long tail) { … } #else static inline void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, struct rb_event_info *info, unsigned long tail) { } #endif /* CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS */ static struct ring_buffer_event * __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, struct rb_event_info *info) { … } static __always_inline struct ring_buffer_event * rb_reserve_next_event(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer, unsigned long length) { … } /** * ring_buffer_lock_reserve - reserve a part of the buffer * @buffer: the ring buffer to reserve from * @length: the length of the data to reserve (excluding event header) * * Returns a reserved event on the ring buffer to copy directly to. * The user of this interface will need to get the body to write into * and can use the ring_buffer_event_data() interface. * * The length is the length of the data needed, not the event length * which also includes the event header. * * Must be paired with ring_buffer_unlock_commit, unless NULL is returned. * If NULL is returned, then nothing has been allocated or locked. */ struct ring_buffer_event * ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length) { … } EXPORT_SYMBOL_GPL(…); /* * Decrement the entries to the page that an event is on. * The event does not even need to exist, only the pointer * to the page it is on. This may only be called before the commit * takes place. */ static inline void rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { … } /** * ring_buffer_discard_commit - discard an event that has not been committed * @buffer: the ring buffer * @event: non committed event to discard * * Sometimes an event that is in the ring buffer needs to be ignored. * This function lets the user discard an event in the ring buffer * and then that event will not be read later. * * This function only works if it is called before the item has been * committed. It will try to free the event from the ring buffer * if another event has not been added behind it. * * If another event has been added behind it, it will set the event * up as discarded, and perform the commit. * * If this function is called, do not call ring_buffer_unlock_commit on * the event. */ void ring_buffer_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_write - write data to the buffer without reserving * @buffer: The ring buffer to write to. * @length: The length of the data being written (excluding the event header) * @data: The data to write to the buffer. * * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as * one function. If you already have the data to write to the buffer, it * may be easier to simply call this function. * * Note, like ring_buffer_lock_reserve, the length is the length of the data * and not the length of the event which would hold the header. */ int ring_buffer_write(struct trace_buffer *buffer, unsigned long length, void *data) { … } EXPORT_SYMBOL_GPL(…); static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) { … } /** * ring_buffer_record_disable - stop all writes into the buffer * @buffer: The ring buffer to stop writes to. * * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * * The caller should call synchronize_rcu() after this. */ void ring_buffer_record_disable(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_record_enable - enable writes to the buffer * @buffer: The ring buffer to enable writes * * Note, multiple disables will need the same number of enables * to truly enable the writing (much like preempt_disable). */ void ring_buffer_record_enable(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_record_off - stop all writes into the buffer * @buffer: The ring buffer to stop writes to. * * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * * This is different than ring_buffer_record_disable() as * it works like an on/off switch, where as the disable() version * must be paired with a enable(). */ void ring_buffer_record_off(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_record_on - restart writes into the buffer * @buffer: The ring buffer to start writes to. * * This enables all writes to the buffer that was disabled by * ring_buffer_record_off(). * * This is different than ring_buffer_record_enable() as * it works like an on/off switch, where as the enable() version * must be paired with a disable(). */ void ring_buffer_record_on(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_record_is_on - return true if the ring buffer can write * @buffer: The ring buffer to see if write is enabled * * Returns true if the ring buffer is in a state that it accepts writes. */ bool ring_buffer_record_is_on(struct trace_buffer *buffer) { … } /** * ring_buffer_record_is_set_on - return true if the ring buffer is set writable * @buffer: The ring buffer to see if write is set enabled * * Returns true if the ring buffer is set writable by ring_buffer_record_on(). * Note that this does NOT mean it is in a writable state. * * It may return true when the ring buffer has been disabled by * ring_buffer_record_disable(), as that is a temporary disabling of * the ring buffer. */ bool ring_buffer_record_is_set_on(struct trace_buffer *buffer) { … } /** * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer * @buffer: The ring buffer to stop writes to. * @cpu: The CPU buffer to stop * * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * * The caller should call synchronize_rcu() after this. */ void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_record_enable_cpu - enable writes to the buffer * @buffer: The ring buffer to enable writes * @cpu: The CPU to enable. * * Note, multiple disables will need the same number of enables * to truly enable the writing (much like preempt_disable). */ void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu) { … } EXPORT_SYMBOL_GPL(…); /* * The total entries in the ring buffer is the running counter * of entries entered into the ring buffer, minus the sum of * the entries read from the ring buffer and the number of * entries that were overwritten. */ static inline unsigned long rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) { … } /** * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ unsigned long ring_buffer_bytes_cpu(struct trace_buffer *buffer, int cpu) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_entries_cpu - get the number of entries in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to get the entries from. */ unsigned long ring_buffer_entries_cpu(struct trace_buffer *buffer, int cpu) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_overrun_cpu - get the number of overruns caused by the ring * buffer wrapping around (only if RB_FL_OVERWRITE is on). * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of overruns from */ unsigned long ring_buffer_overrun_cpu(struct trace_buffer *buffer, int cpu) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_commit_overrun_cpu - get the number of overruns caused by * commits failing due to the buffer wrapping around while there are uncommitted * events, such as during an interrupt storm. * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of overruns from */ unsigned long ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cpu) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_dropped_events_cpu - get the number of dropped events caused by * the ring buffer filling up (only if RB_FL_OVERWRITE is off). * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of overruns from */ unsigned long ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_read_events_cpu - get the number of events successfully read * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of events read */ unsigned long ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_entries - get the number of entries in a buffer * @buffer: The ring buffer * * Returns the total number of entries in the ring buffer * (all CPU entries) */ unsigned long ring_buffer_entries(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_overruns - get the number of overruns in buffer * @buffer: The ring buffer * * Returns the total number of overruns in the ring buffer * (all CPU entries) */ unsigned long ring_buffer_overruns(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); static void rb_iter_reset(struct ring_buffer_iter *iter) { … } /** * ring_buffer_iter_reset - reset an iterator * @iter: The iterator to reset * * Resets the iterator, so that it will start from the beginning * again. */ void ring_buffer_iter_reset(struct ring_buffer_iter *iter) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_iter_empty - check if an iterator has no more to read * @iter: The iterator to check */ int ring_buffer_iter_empty(struct ring_buffer_iter *iter) { … } EXPORT_SYMBOL_GPL(…); static void rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { … } static void rb_update_iter_read_stamp(struct ring_buffer_iter *iter, struct ring_buffer_event *event) { … } static struct buffer_page * rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) { … } static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) { … } static void rb_advance_iter(struct ring_buffer_iter *iter) { … } static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) { … } static struct ring_buffer_event * rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, unsigned long *lost_events) { … } EXPORT_SYMBOL_GPL(…); static struct ring_buffer_event * rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) { … } EXPORT_SYMBOL_GPL(…); static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer) { … } static inline void rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked) { … } /** * ring_buffer_peek - peek at the next event to be read * @buffer: The ring buffer to read * @cpu: The cpu to peak at * @ts: The timestamp counter of this event. * @lost_events: a variable to store if events were lost (may be NULL) * * This will return the event that will be read next, but does * not consume the data. */ struct ring_buffer_event * ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events) { … } /** ring_buffer_iter_dropped - report if there are dropped events * @iter: The ring buffer iterator * * Returns true if there was dropped events since the last peek. */ bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_iter_peek - peek at the next event to be read * @iter: The ring buffer iterator * @ts: The timestamp counter of this event. * * This will return the event that will be read next, but does * not increment the iterator. */ struct ring_buffer_event * ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) { … } /** * ring_buffer_consume - return an event and consume it * @buffer: The ring buffer to get the next event from * @cpu: the cpu to read the buffer from * @ts: a variable to store the timestamp (may be NULL) * @lost_events: a variable to store if events were lost (may be NULL) * * Returns the next event in the ring buffer, and that event is consumed. * Meaning, that sequential reads will keep returning a different event, * and eventually empty the ring buffer if the producer is slower. */ struct ring_buffer_event * ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer * @buffer: The ring buffer to read from * @cpu: The cpu buffer to iterate over * @flags: gfp flags to use for memory allocation * * This performs the initial preparations necessary to iterate * through the buffer. Memory is allocated, buffer resizing * is disabled, and the iterator pointer is returned to the caller. * * After a sequence of ring_buffer_read_prepare calls, the user is * expected to make at least one call to ring_buffer_read_prepare_sync. * Afterwards, ring_buffer_read_start is invoked to get things going * for real. * * This overall must be paired with ring_buffer_read_finish. */ struct ring_buffer_iter * ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls * * All previously invoked ring_buffer_read_prepare calls to prepare * iterators will be synchronized. Afterwards, read_buffer_read_start * calls on those iterators are allowed. */ void ring_buffer_read_prepare_sync(void) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_read_start - start a non consuming read of the buffer * @iter: The iterator returned by ring_buffer_read_prepare * * This finalizes the startup of an iteration through the buffer. * The iterator comes from a call to ring_buffer_read_prepare and * an intervening ring_buffer_read_prepare_sync must have been * performed. * * Must be paired with ring_buffer_read_finish. */ void ring_buffer_read_start(struct ring_buffer_iter *iter) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_read_finish - finish reading the iterator of the buffer * @iter: The iterator retrieved by ring_buffer_start * * This re-enables resizing of the buffer, and frees the iterator. */ void ring_buffer_read_finish(struct ring_buffer_iter *iter) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_iter_advance - advance the iterator to the next location * @iter: The ring buffer iterator * * Move the location of the iterator such that the next read will * be the next location of the iterator. */ void ring_buffer_iter_advance(struct ring_buffer_iter *iter) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_size - return the size of the ring buffer (in bytes) * @buffer: The ring buffer. * @cpu: The CPU to get ring buffer size from. */ unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_max_event_size - return the max data size of an event * @buffer: The ring buffer. * * Returns the maximum size an event can be. */ unsigned long ring_buffer_max_event_size(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); static void rb_clear_buffer_page(struct buffer_page *page) { … } static void rb_update_meta_page(struct ring_buffer_per_cpu *cpu_buffer) { … } static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) { … } /* Must have disabled the cpu buffer then done a synchronize_rcu */ static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) { … } /** * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer * @buffer: The ring buffer to reset a per cpu buffer of * @cpu: The CPU buffer to be reset */ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) { … } EXPORT_SYMBOL_GPL(…); /* Flag to ensure proper resetting of atomic variables */ #define RESET_BIT … /** * ring_buffer_reset_online_cpus - reset a ring buffer per CPU buffer * @buffer: The ring buffer to reset a per cpu buffer of */ void ring_buffer_reset_online_cpus(struct trace_buffer *buffer) { … } /** * ring_buffer_reset - reset a ring buffer * @buffer: The ring buffer to reset all cpu buffers */ void ring_buffer_reset(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_empty - is the ring buffer empty? * @buffer: The ring buffer to test */ bool ring_buffer_empty(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty? * @buffer: The ring buffer * @cpu: The CPU buffer to test */ bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu) { … } EXPORT_SYMBOL_GPL(…); #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP /** * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers * @buffer_a: One buffer to swap with * @buffer_b: The other buffer to swap with * @cpu: the CPU of the buffers to swap * * This function is useful for tracers that want to take a "snapshot" * of a CPU buffer and has another back up buffer lying around. * it is expected that the tracer handles the cpu buffer not being * used at the moment. */ int ring_buffer_swap_cpu(struct trace_buffer *buffer_a, struct trace_buffer *buffer_b, int cpu) { … } EXPORT_SYMBOL_GPL(…); #endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */ /** * ring_buffer_alloc_read_page - allocate a page to read from buffer * @buffer: the buffer to allocate for. * @cpu: the cpu buffer to allocate. * * This function is used in conjunction with ring_buffer_read_page. * When reading a full page from the ring buffer, these functions * can be used to speed up the process. The calling function should * allocate a few pages first with this function. Then when it * needs to get pages from the ring buffer, it passes the result * of this function into ring_buffer_read_page, which will swap * the page that was allocated, with the read page of the buffer. * * Returns: * The page allocated, or ERR_PTR */ struct buffer_data_read_page * ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_free_read_page - free an allocated read page * @buffer: the buffer the page was allocate for * @cpu: the cpu buffer the page came from * @data_page: the page to free * * Free a page allocated from ring_buffer_alloc_read_page. */ void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, struct buffer_data_read_page *data_page) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_read_page - extract a page from the ring buffer * @buffer: buffer to extract from * @data_page: the page to use allocated from ring_buffer_alloc_read_page * @len: amount to extract * @cpu: the cpu of the buffer to extract * @full: should the extraction only happen when the page is full. * * This function will pull out a page from the ring buffer and consume it. * @data_page must be the address of the variable that was returned * from ring_buffer_alloc_read_page. This is because the page might be used * to swap with a page in the ring buffer. * * for example: * rpage = ring_buffer_alloc_read_page(buffer, cpu); * if (IS_ERR(rpage)) * return PTR_ERR(rpage); * ret = ring_buffer_read_page(buffer, rpage, len, cpu, 0); * if (ret >= 0) * process_page(ring_buffer_read_page_data(rpage), ret); * ring_buffer_free_read_page(buffer, cpu, rpage); * * When @full is set, the function will not return true unless * the writer is off the reader page. * * Note: it is up to the calling functions to handle sleeps and wakeups. * The ring buffer can be used anywhere in the kernel and can not * blindly call wake_up. The layer that uses the ring buffer must be * responsible for that. * * Returns: * >=0 if data has been transferred, returns the offset of consumed data. * <0 if no data has been transferred. */ int ring_buffer_read_page(struct trace_buffer *buffer, struct buffer_data_read_page *data_page, size_t len, int cpu, int full) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_read_page_data - get pointer to the data in the page. * @page: the page to get the data from * * Returns pointer to the actual data in this page. */ void *ring_buffer_read_page_data(struct buffer_data_read_page *page) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_subbuf_size_get - get size of the sub buffer. * @buffer: the buffer to get the sub buffer size from * * Returns size of the sub buffer, in bytes. */ int ring_buffer_subbuf_size_get(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_subbuf_order_get - get order of system sub pages in one buffer page. * @buffer: The ring_buffer to get the system sub page order from * * By default, one ring buffer sub page equals to one system page. This parameter * is configurable, per ring buffer. The size of the ring buffer sub page can be * extended, but must be an order of system page size. * * Returns the order of buffer sub page size, in system pages: * 0 means the sub buffer size is 1 system page and so forth. * In case of an error < 0 is returned. */ int ring_buffer_subbuf_order_get(struct trace_buffer *buffer) { … } EXPORT_SYMBOL_GPL(…); /** * ring_buffer_subbuf_order_set - set the size of ring buffer sub page. * @buffer: The ring_buffer to set the new page size. * @order: Order of the system pages in one sub buffer page * * By default, one ring buffer pages equals to one system page. This API can be * used to set new size of the ring buffer page. The size must be order of * system page size, that's why the input parameter @order is the order of * system pages that are allocated for one ring buffer page: * 0 - 1 system page * 1 - 2 system pages * 3 - 4 system pages * ... * * Returns 0 on success or < 0 in case of an error. */ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) { … } EXPORT_SYMBOL_GPL(…); static int rb_alloc_meta_page(struct ring_buffer_per_cpu *cpu_buffer) { … } static void rb_free_meta_page(struct ring_buffer_per_cpu *cpu_buffer) { … } static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer, unsigned long *subbuf_ids) { … } static struct ring_buffer_per_cpu * rb_get_mapped_buffer(struct trace_buffer *buffer, int cpu) { … } static void rb_put_mapped_buffer(struct ring_buffer_per_cpu *cpu_buffer) { … } /* * Fast-path for rb_buffer_(un)map(). Called whenever the meta-page doesn't need * to be set-up or torn-down. */ static int __rb_inc_dec_mapped(struct ring_buffer_per_cpu *cpu_buffer, bool inc) { … } /* * +--------------+ pgoff == 0 * | meta page | * +--------------+ pgoff == 1 * | subbuffer 0 | * | | * +--------------+ pgoff == (1 + (1 << subbuf_order)) * | subbuffer 1 | * | | * ... */ #ifdef CONFIG_MMU static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer, struct vm_area_struct *vma) { … } #else static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer, struct vm_area_struct *vma) { return -EOPNOTSUPP; } #endif int ring_buffer_map(struct trace_buffer *buffer, int cpu, struct vm_area_struct *vma) { … } int ring_buffer_unmap(struct trace_buffer *buffer, int cpu) { … } int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu) { … } /* * We only allocate new buffers, never free them if the CPU goes down. * If we were to free the buffer, then the user would lose any trace that was in * the buffer. */ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node) { … } #ifdef CONFIG_RING_BUFFER_STARTUP_TEST /* * This is a basic integrity check of the ring buffer. * Late in the boot cycle this test will run when configured in. * It will kick off a thread per CPU that will go into a loop * writing to the per cpu ring buffer various sizes of data. * Some of the data will be large items, some small. * * Another thread is created that goes into a spin, sending out * IPIs to the other CPUs to also write into the ring buffer. * this is to test the nesting ability of the buffer. * * Basic stats are recorded and reported. If something in the * ring buffer should happen that's not expected, a big warning * is displayed and all ring buffers are disabled. */ static struct task_struct *rb_threads[NR_CPUS] __initdata; struct rb_test_data { … }; static struct rb_test_data rb_data[NR_CPUS] __initdata; /* 1 meg per cpu */ #define RB_TEST_BUFFER_SIZE … static char rb_string[] __initdata = …; static bool rb_test_started __initdata; struct rb_item { … }; static __init int rb_write_something(struct rb_test_data *data, bool nested) { … } static __init int rb_test(void *arg) { … } static __init void rb_ipi(void *ignore) { … } static __init int rb_hammer_test(void *arg) { … } static __init int test_ringbuffer(void) { … } late_initcall(test_ringbuffer); #endif /* CONFIG_RING_BUFFER_STARTUP_TEST */