journal.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0+
/*
 * linux/fs/jbd2/journal.c
 *
 * Written by Stephen C. Tweedie <[email protected]>, 1998
 *
 * Copyright 1998 Red Hat corp --- All Rights Reserved
 *
 * Generic filesystem journal-writing code; part of the ext2fs
 * journaling system.
 *
 * This file manages journals: areas of disk reserved for logging
 * transactional updates.  This includes the kernel journaling thread
 * which is responsible for scheduling updates to the log.
 *
 * We do not actually manage the physical storage of the journal in this
 * file: that is left to a per-journal policy function, which allows us
 * to store the journal within a filesystem-specified area for ext2
 * journaling (ext2 can use a reserved inode for storing the log).
 */

#include <linux/module.h>
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/jbd2.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/freezer.h>
#include <linux/pagemap.h>
#include <linux/kthread.h>
#include <linux/poison.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/math64.h>
#include <linux/hash.h>
#include <linux/log2.h>
#include <linux/vmalloc.h>
#include <linux/backing-dev.h>
#include <linux/bitops.h>
#include <linux/ratelimit.h>
#include <linux/sched/mm.h>

#define CREATE_TRACE_POINTS
#include <trace/events/jbd2.h>

#include <linux/uaccess.h>
#include <asm/page.h>

#ifdef CONFIG_JBD2_DEBUG
static ushort jbd2_journal_enable_debug __read_mostly;

module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644);
MODULE_PARM_DESC(…) …;
#endif

EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);

EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);
EXPORT_SYMBOL(…);

static int jbd2_journal_create_slab(size_t slab_size);

#ifdef CONFIG_JBD2_DEBUG
void __jbd2_debug(int level, const char *file, const char *func,
		  unsigned int line, const char *fmt, ...)
{ … }
#endif

/* Checksumming functions */
static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
{ … }

/*
 * Helper function used to manage commit timeouts
 */

static void commit_timeout(struct timer_list *t)
{ … }

/*
 * kjournald2: The main thread function used to manage a logging device
 * journal.
 *
 * This kernel thread is responsible for two things:
 *
 * 1) COMMIT:  Every so often we need to commit the current state of the
 *    filesystem to disk.  The journal thread is responsible for writing
 *    all of the metadata buffers to disk. If a fast commit is ongoing
 *    journal thread waits until it's done and then continues from
 *    there on.
 *
 * 2) CHECKPOINT: We cannot reuse a used section of the log file until all
 *    of the data in that part of the log has been rewritten elsewhere on
 *    the disk.  Flushing these old buffers to reclaim space in the log is
 *    known as checkpointing, and this thread is responsible for that job.
 */

static int kjournald2(void *arg)
{ … }

static int jbd2_journal_start_thread(journal_t *journal)
{ … }

static void journal_kill_thread(journal_t *journal)
{ … }

/*
 * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal.
 *
 * Writes a metadata buffer to a given disk block.  The actual IO is not
 * performed but a new buffer_head is constructed which labels the data
 * to be written with the correct destination disk block.
 *
 * Any magic-number escaping which needs to be done will cause a
 * copy-out here.  If the buffer happens to start with the
 * JBD2_MAGIC_NUMBER, then we can't write it to the log directly: the
 * magic number is only written to the log for descripter blocks.  In
 * this case, we copy the data and replace the first word with 0, and we
 * return a result code which indicates that this buffer needs to be
 * marked as an escaped buffer in the corresponding log descriptor
 * block.  The missing word can then be restored when the block is read
 * during recovery.
 *
 * If the source buffer has already been modified by a new transaction
 * since we took the last commit snapshot, we use the frozen copy of
 * that data for IO. If we end up using the existing buffer_head's data
 * for the write, then we have to make sure nobody modifies it while the
 * IO is in progress. do_get_write_access() handles this.
 *
 * The function returns a pointer to the buffer_head to be used for IO.
 *
 *
 * Return value:
 *  <0: Error
 *  =0: Finished OK without escape
 *  =1: Finished OK with escape
 */

int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
				  struct journal_head  *jh_in,
				  struct buffer_head **bh_out,
				  sector_t blocknr)
{ … }

/*
 * Allocation code for the journal file.  Manage the space left in the
 * journal, so that we can begin checkpointing when appropriate.
 */

/*
 * Called with j_state_lock locked for writing.
 * Returns true if a transaction commit was started.
 */
static int __jbd2_log_start_commit(journal_t *journal, tid_t target)
{ … }

int jbd2_log_start_commit(journal_t *journal, tid_t tid)
{ … }

/*
 * Force and wait any uncommitted transactions.  We can only force the running
 * transaction if we don't have an active handle, otherwise, we will deadlock.
 * Returns: <0 in case of error,
 *           0 if nothing to commit,
 *           1 if transaction was successfully committed.
 */
static int __jbd2_journal_force_commit(journal_t *journal)
{ … }

/**
 * jbd2_journal_force_commit_nested - Force and wait upon a commit if the
 * calling process is not within transaction.
 *
 * @journal: journal to force
 * Returns true if progress was made.
 *
 * This is used for forcing out undo-protected data which contains
 * bitmaps, when the fs is running out of space.
 */
int jbd2_journal_force_commit_nested(journal_t *journal)
{ … }

/**
 * jbd2_journal_force_commit() - force any uncommitted transactions
 * @journal: journal to force
 *
 * Caller want unconditional commit. We can only force the running transaction
 * if we don't have an active handle, otherwise, we will deadlock.
 */
int jbd2_journal_force_commit(journal_t *journal)
{ … }

/*
 * Start a commit of the current running transaction (if any).  Returns true
 * if a transaction is going to be committed (or is currently already
 * committing), and fills its tid in at *ptid
 */
int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
{ … }

/*
 * Return 1 if a given transaction has not yet sent barrier request
 * connected with a transaction commit. If 0 is returned, transaction
 * may or may not have sent the barrier. Used to avoid sending barrier
 * twice in common cases.
 */
int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
{ … }
EXPORT_SYMBOL(…);

/*
 * Wait for a specified commit to complete.
 * The caller may not hold the journal lock.
 */
int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
{ … }

/*
 * Start a fast commit. If there's an ongoing fast or full commit wait for
 * it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY
 * if a fast commit is not needed, either because there's an already a commit
 * going on or this tid has already been committed. Returns -EINVAL if no jbd2
 * commit has yet been performed.
 */
int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
{ … }
EXPORT_SYMBOL(…);

/*
 * Stop a fast commit. If fallback is set, this function starts commit of
 * TID tid before any other fast commit can start.
 */
static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
{ … }

int jbd2_fc_end_commit(journal_t *journal)
{ … }
EXPORT_SYMBOL(…);

int jbd2_fc_end_commit_fallback(journal_t *journal)
{ … }
EXPORT_SYMBOL(…);

/* Return 1 when transaction with given tid has already committed. */
int jbd2_transaction_committed(journal_t *journal, tid_t tid)
{ … }
EXPORT_SYMBOL(…);

/*
 * When this function returns the transaction corresponding to tid
 * will be completed.  If the transaction has currently running, start
 * committing that transaction before waiting for it to complete.  If
 * the transaction id is stale, it is by definition already completed,
 * so just return SUCCESS.
 */
int jbd2_complete_transaction(journal_t *journal, tid_t tid)
{ … }
EXPORT_SYMBOL(…);

/*
 * Log buffer allocation routines:
 */

int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
{ … }

/* Map one fast commit buffer for use by the file system */
int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
{ … }
EXPORT_SYMBOL(…);

/*
 * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
 * for completion.
 */
int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
{ … }
EXPORT_SYMBOL(…);

int jbd2_fc_release_bufs(journal_t *journal)
{ … }
EXPORT_SYMBOL(…);

/*
 * Conversion of logical to physical block numbers for the journal
 *
 * On external journals the journal blocks are identity-mapped, so
 * this is a no-op.  If needed, we can use j_blk_offset - everything is
 * ready.
 */
int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
		 unsigned long long *retp)
{ … }

/*
 * We play buffer_head aliasing tricks to write data/metadata blocks to
 * the journal without copying their contents, but for journal
 * descriptor blocks we do need to generate bona fide buffers.
 *
 * After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying
 * the buffer's contents they really should run flush_dcache_page(bh->b_page).
 * But we don't bother doing that, so there will be coherency problems with
 * mmaps of blockdevs which hold live JBD-controlled filesystems.
 */
struct buffer_head *
jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
{ … }

void jbd2_descriptor_block_csum_set(journal_t *j, struct buffer_head *bh)
{ … }

/*
 * Return tid of the oldest transaction in the journal and block in the journal
 * where the transaction starts.
 *
 * If the journal is now empty, return which will be the next transaction ID
 * we will write and where will that transaction start.
 *
 * The return value is 0 if journal tail cannot be pushed any further, 1 if
 * it can.
 */
int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
			      unsigned long *block)
{ … }

/*
 * Update information in journal structure and in on disk journal superblock
 * about log tail. This function does not check whether information passed in
 * really pushes log tail further. It's responsibility of the caller to make
 * sure provided log tail information is valid (e.g. by holding
 * j_checkpoint_mutex all the time between computing log tail and calling this
 * function as is the case with jbd2_cleanup_journal_tail()).
 *
 * Requires j_checkpoint_mutex
 */
int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
{ … }

/*
 * This is a variation of __jbd2_update_log_tail which checks for validity of
 * provided log tail and locks j_checkpoint_mutex. So it is safe against races
 * with other threads updating log tail.
 */
void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
{ … }

struct jbd2_stats_proc_session { … };

static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)
{ … }

static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
{ … }

static int jbd2_seq_info_show(struct seq_file *seq, void *v)
{ … }

static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
{ … }

static const struct seq_operations jbd2_seq_info_ops = …;

static int jbd2_seq_info_open(struct inode *inode, struct file *file)
{ … }

static int jbd2_seq_info_release(struct inode *inode, struct file *file)
{ … }

static const struct proc_ops jbd2_info_proc_ops = …;

static struct proc_dir_entry *proc_jbd2_stats;

static void jbd2_stats_proc_init(journal_t *journal)
{ … }

static void jbd2_stats_proc_exit(journal_t *journal)
{ … }

/* Minimum size of descriptor tag */
static int jbd2_min_tag_size(void)
{ … }

/**
 * jbd2_journal_shrink_scan()
 * @shrink: shrinker to work on
 * @sc: reclaim request to process
 *
 * Scan the checkpointed buffer on the checkpoint list and release the
 * journal_head.
 */
static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink,
					      struct shrink_control *sc)
{ … }

/**
 * jbd2_journal_shrink_count()
 * @shrink: shrinker to work on
 * @sc: reclaim request to process
 *
 * Count the number of checkpoint buffers on the checkpoint list.
 */
static unsigned long jbd2_journal_shrink_count(struct shrinker *shrink,
					       struct shrink_control *sc)
{ … }

/*
 * If the journal init or create aborts, we need to mark the journal
 * superblock as being NULL to prevent the journal destroy from writing
 * back a bogus superblock.
 */
static void journal_fail_superblock(journal_t *journal)
{ … }

/*
 * Check the superblock for a given journal, performing initial
 * validation of the format.
 */
static int journal_check_superblock(journal_t *journal)
{ … }

static int journal_revoke_records_per_block(journal_t *journal)
{ … }

static int jbd2_journal_get_max_txn_bufs(journal_t *journal)
{ … }

/*
 * Base amount of descriptor blocks we reserve for each transaction.
 */
static int jbd2_descriptor_blocks_per_trans(journal_t *journal)
{ … }

/*
 * Initialize number of blocks each transaction reserves for its bookkeeping
 * and maximum number of blocks a transaction can use. This needs to be called
 * after the journal size and the fastcommit area size are initialized.
 */
static void jbd2_journal_init_transaction_limits(journal_t *journal)
{ … }

/*
 * Load the on-disk journal superblock and read the key fields into the
 * journal_t.
 */
static int journal_load_superblock(journal_t *journal)
{ … }


/*
 * Management for journal control blocks: functions to create and
 * destroy journal_t structures, and to initialise and read existing
 * journal blocks from disk.  */

/* First: create and setup a journal_t object in memory.  We initialise
 * very few fields yet: that has to wait until we have created the
 * journal structures from from scratch, or loaded them from disk. */

static journal_t *journal_init_common(struct block_device *bdev,
			struct block_device *fs_dev,
			unsigned long long start, int len, int blocksize)
{ … }

/* jbd2_journal_init_dev and jbd2_journal_init_inode:
 *
 * Create a journal structure assigned some fixed set of disk blocks to
 * the journal.  We don't actually touch those disk blocks yet, but we
 * need to set up all of the mapping information to tell the journaling
 * system where the journal blocks are.
 *
 */

/**
 *  journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
 *  @bdev: Block device on which to create the journal
 *  @fs_dev: Device which hold journalled filesystem for this journal.
 *  @start: Block nr Start of journal.
 *  @len:  Length of the journal in blocks.
 *  @blocksize: blocksize of journalling device
 *
 *  Returns: a newly created journal_t *
 *
 *  jbd2_journal_init_dev creates a journal which maps a fixed contiguous
 *  range of blocks on an arbitrary block device.
 *
 */
journal_t *jbd2_journal_init_dev(struct block_device *bdev,
			struct block_device *fs_dev,
			unsigned long long start, int len, int blocksize)
{ … }

/**
 *  journal_t * jbd2_journal_init_inode () - creates a journal which maps to a inode.
 *  @inode: An inode to create the journal in
 *
 * jbd2_journal_init_inode creates a journal which maps an on-disk inode as
 * the journal.  The inode must exist already, must support bmap() and
 * must have all data blocks preallocated.
 */
journal_t *jbd2_journal_init_inode(struct inode *inode)
{ … }

/*
 * Given a journal_t structure, initialise the various fields for
 * startup of a new journaling session.  We use this both when creating
 * a journal, and after recovering an old journal to reset it for
 * subsequent use.
 */

static int journal_reset(journal_t *journal)
{ … }

/*
 * This function expects that the caller will have locked the journal
 * buffer head, and will return with it unlocked
 */
static int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags)
{ … }

/**
 * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
 * @journal: The journal to update.
 * @tail_tid: TID of the new transaction at the tail of the log
 * @tail_block: The first block of the transaction at the tail of the log
 * @write_flags: Flags for the journal sb write operation
 *
 * Update a journal's superblock information about log tail and write it to
 * disk, waiting for the IO to complete.
 */
int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
				    unsigned long tail_block,
				    blk_opf_t write_flags)
{ … }

/**
 * jbd2_mark_journal_empty() - Mark on disk journal as empty.
 * @journal: The journal to update.
 * @write_flags: Flags for the journal sb write operation
 *
 * Update a journal's dynamic superblock fields to show that journal is empty.
 * Write updated superblock to disk waiting for IO to complete.
 */
static void jbd2_mark_journal_empty(journal_t *journal, blk_opf_t write_flags)
{ … }

/**
 * __jbd2_journal_erase() - Discard or zeroout journal blocks (excluding superblock)
 * @journal: The journal to erase.
 * @flags: A discard/zeroout request is sent for each physically contigous
 *	region of the journal. Either JBD2_JOURNAL_FLUSH_DISCARD or
 *	JBD2_JOURNAL_FLUSH_ZEROOUT must be set to determine which operation
 *	to perform.
 *
 * Note: JBD2_JOURNAL_FLUSH_ZEROOUT attempts to use hardware offload. Zeroes
 * will be explicitly written if no hardware offload is available, see
 * blkdev_issue_zeroout for more details.
 */
static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
{ … }

/**
 * jbd2_journal_update_sb_errno() - Update error in the journal.
 * @journal: The journal to update.
 *
 * Update a journal's errno.  Write updated superblock to disk waiting for IO
 * to complete.
 */
void jbd2_journal_update_sb_errno(journal_t *journal)
{ … }
EXPORT_SYMBOL(…);

/**
 * jbd2_journal_load() - Read journal from disk.
 * @journal: Journal to act on.
 *
 * Given a journal_t structure which tells us which disk blocks contain
 * a journal, read the journal from disk to initialise the in-memory
 * structures.
 */
int jbd2_journal_load(journal_t *journal)
{ … }

/**
 * jbd2_journal_destroy() - Release a journal_t structure.
 * @journal: Journal to act on.
 *
 * Release a journal_t structure once it is no longer in use by the
 * journaled object.
 * Return <0 if we couldn't clean up the journal.
 */
int jbd2_journal_destroy(journal_t *journal)
{ … }


/**
 * jbd2_journal_check_used_features() - Check if features specified are used.
 * @journal: Journal to check.
 * @compat: bitmask of compatible features
 * @ro: bitmask of features that force read-only mount
 * @incompat: bitmask of incompatible features
 *
 * Check whether the journal uses all of a given set of
 * features.  Return true (non-zero) if it does.
 **/

int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
				 unsigned long ro, unsigned long incompat)
{ … }

/**
 * jbd2_journal_check_available_features() - Check feature set in journalling layer
 * @journal: Journal to check.
 * @compat: bitmask of compatible features
 * @ro: bitmask of features that force read-only mount
 * @incompat: bitmask of incompatible features
 *
 * Check whether the journaling code supports the use of
 * all of a given set of features on this journal.  Return true
 * (non-zero) if it can. */

int jbd2_journal_check_available_features(journal_t *journal, unsigned long compat,
				      unsigned long ro, unsigned long incompat)
{ … }

static int
jbd2_journal_initialize_fast_commit(journal_t *journal)
{ … }

/**
 * jbd2_journal_set_features() - Mark a given journal feature in the superblock
 * @journal: Journal to act on.
 * @compat: bitmask of compatible features
 * @ro: bitmask of features that force read-only mount
 * @incompat: bitmask of incompatible features
 *
 * Mark a given journal feature as present on the
 * superblock.  Returns true if the requested features could be set.
 *
 */

int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
			  unsigned long ro, unsigned long incompat)
{ … }

/*
 * jbd2_journal_clear_features() - Clear a given journal feature in the
 * 				    superblock
 * @journal: Journal to act on.
 * @compat: bitmask of compatible features
 * @ro: bitmask of features that force read-only mount
 * @incompat: bitmask of incompatible features
 *
 * Clear a given journal feature as present on the
 * superblock.
 */
void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
				unsigned long ro, unsigned long incompat)
{ … }
EXPORT_SYMBOL(…);

/**
 * jbd2_journal_flush() - Flush journal
 * @journal: Journal to act on.
 * @flags: optional operation on the journal blocks after the flush (see below)
 *
 * Flush all data for a given journal to disk and empty the journal.
 * Filesystems can use this when remounting readonly to ensure that
 * recovery does not need to happen on remount. Optionally, a discard or zeroout
 * can be issued on the journal blocks after flushing.
 *
 * flags:
 *	JBD2_JOURNAL_FLUSH_DISCARD: issues discards for the journal blocks
 *	JBD2_JOURNAL_FLUSH_ZEROOUT: issues zeroouts for the journal blocks
 */
int jbd2_journal_flush(journal_t *journal, unsigned int flags)
{ … }

/**
 * jbd2_journal_wipe() - Wipe journal contents
 * @journal: Journal to act on.
 * @write: flag (see below)
 *
 * Wipe out all of the contents of a journal, safely.  This will produce
 * a warning if the journal contains any valid recovery information.
 * Must be called between journal_init_*() and jbd2_journal_load().
 *
 * If 'write' is non-zero, then we wipe out the journal on disk; otherwise
 * we merely suppress recovery.
 */

int jbd2_journal_wipe(journal_t *journal, int write)
{ … }

/**
 * jbd2_journal_abort () - Shutdown the journal immediately.
 * @journal: the journal to shutdown.
 * @errno:   an error number to record in the journal indicating
 *           the reason for the shutdown.
 *
 * Perform a complete, immediate shutdown of the ENTIRE
 * journal (not of a single transaction).  This operation cannot be
 * undone without closing and reopening the journal.
 *
 * The jbd2_journal_abort function is intended to support higher level error
 * recovery mechanisms such as the ext2/ext3 remount-readonly error
 * mode.
 *
 * Journal abort has very specific semantics.  Any existing dirty,
 * unjournaled buffers in the main filesystem will still be written to
 * disk by bdflush, but the journaling mechanism will be suspended
 * immediately and no further transaction commits will be honoured.
 *
 * Any dirty, journaled buffers will be written back to disk without
 * hitting the journal.  Atomicity cannot be guaranteed on an aborted
 * filesystem, but we _do_ attempt to leave as much data as possible
 * behind for fsck to use for cleanup.
 *
 * Any attempt to get a new transaction handle on a journal which is in
 * ABORT state will just result in an -EROFS error return.  A
 * jbd2_journal_stop on an existing handle will return -EIO if we have
 * entered abort state during the update.
 *
 * Recursive transactions are not disturbed by journal abort until the
 * final jbd2_journal_stop, which will receive the -EIO error.
 *
 * Finally, the jbd2_journal_abort call allows the caller to supply an errno
 * which will be recorded (if possible) in the journal superblock.  This
 * allows a client to record failure conditions in the middle of a
 * transaction without having to complete the transaction to record the
 * failure to disk.  ext3_error, for example, now uses this
 * functionality.
 *
 */

void jbd2_journal_abort(journal_t *journal, int errno)
{ … }

/**
 * jbd2_journal_errno() - returns the journal's error state.
 * @journal: journal to examine.
 *
 * This is the errno number set with jbd2_journal_abort(), the last
 * time the journal was mounted - if the journal was stopped
 * without calling abort this will be 0.
 *
 * If the journal has been aborted on this mount time -EROFS will
 * be returned.
 */
int jbd2_journal_errno(journal_t *journal)
{ … }

/**
 * jbd2_journal_clear_err() - clears the journal's error state
 * @journal: journal to act on.
 *
 * An error must be cleared or acked to take a FS out of readonly
 * mode.
 */
int jbd2_journal_clear_err(journal_t *journal)
{ … }

/**
 * jbd2_journal_ack_err() - Ack journal err.
 * @journal: journal to act on.
 *
 * An error must be cleared or acked to take a FS out of readonly
 * mode.
 */
void jbd2_journal_ack_err(journal_t *journal)
{ … }

int jbd2_journal_blocks_per_page(struct inode *inode)
{ … }

/*
 * helper functions to deal with 32 or 64bit block numbers.
 */
size_t journal_tag_bytes(journal_t *journal)
{ … }

/*
 * JBD memory management
 *
 * These functions are used to allocate block-sized chunks of memory
 * used for making copies of buffer_head data.  Very often it will be
 * page-sized chunks of data, but sometimes it will be in
 * sub-page-size chunks.  (For example, 16k pages on Power systems
 * with a 4k block file system.)  For blocks smaller than a page, we
 * use a SLAB allocator.  There are slab caches for each block size,
 * which are allocated at mount time, if necessary, and we only free
 * (all of) the slab caches when/if the jbd2 module is unloaded.  For
 * this reason we don't need to a mutex to protect access to
 * jbd2_slab[] allocating or releasing memory; only in
 * jbd2_journal_create_slab().
 */
#define JBD2_MAX_SLABS …
static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];

static const char *jbd2_slab_names[JBD2_MAX_SLABS] = …;


static void jbd2_journal_destroy_slabs(void)
{ … }

static int jbd2_journal_create_slab(size_t size)
{ … }

static struct kmem_cache *get_slab(size_t size)
{ … }

void *jbd2_alloc(size_t size, gfp_t flags)
{ … }

void jbd2_free(void *ptr, size_t size)
{
	if (size < PAGE_SIZE)
		kmem_cache_free(get_slab(size), ptr);
	else
		free_pages((unsigned long)ptr, get_order(size));
};

/*
 * Journal_head storage management
 */
static struct kmem_cache *jbd2_journal_head_cache;
#ifdef CONFIG_JBD2_DEBUG
static atomic_t nr_journal_heads = …;
#endif

static int __init jbd2_journal_init_journal_head_cache(void)
{ … }

static void jbd2_journal_destroy_journal_head_cache(void)
{ … }

/*
 * journal_head splicing and dicing
 */
static struct journal_head *journal_alloc_journal_head(void)
{ … }

static void journal_free_journal_head(struct journal_head *jh)
{ … }

/*
 * A journal_head is attached to a buffer_head whenever JBD has an
 * interest in the buffer.
 *
 * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
 * is set.  This bit is tested in core kernel code where we need to take
 * JBD-specific actions.  Testing the zeroness of ->b_private is not reliable
 * there.
 *
 * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
 *
 * When a buffer has its BH_JBD bit set it is immune from being released by
 * core kernel code, mainly via ->b_count.
 *
 * A journal_head is detached from its buffer_head when the journal_head's
 * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
 * transaction (b_cp_transaction) hold their references to b_jcount.
 *
 * Various places in the kernel want to attach a journal_head to a buffer_head
 * _before_ attaching the journal_head to a transaction.  To protect the
 * journal_head in this situation, jbd2_journal_add_journal_head elevates the
 * journal_head's b_jcount refcount by one.  The caller must call
 * jbd2_journal_put_journal_head() to undo this.
 *
 * So the typical usage would be:
 *
 *	(Attach a journal_head if needed.  Increments b_jcount)
 *	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
 *	...
 *      (Get another reference for transaction)
 *	jbd2_journal_grab_journal_head(bh);
 *	jh->b_transaction = xxx;
 *	(Put original reference)
 *	jbd2_journal_put_journal_head(jh);
 */

/*
 * Give a buffer_head a journal_head.
 *
 * May sleep.
 */
struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
{ … }

/*
 * Grab a ref against this buffer_head's journal_head.  If it ended up not
 * having a journal_head, return NULL
 */
struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh)
{ … }
EXPORT_SYMBOL(…);

static void __journal_remove_journal_head(struct buffer_head *bh)
{ … }

static void journal_release_journal_head(struct journal_head *jh, size_t b_size)
{ … }

/*
 * Drop a reference on the passed journal_head.  If it fell to zero then
 * release the journal_head from the buffer_head.
 */
void jbd2_journal_put_journal_head(struct journal_head *jh)
{ … }
EXPORT_SYMBOL(…);

/*
 * Initialize jbd inode head
 */
void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
{ … }

/*
 * Function to be called before we start removing inode from memory (i.e.,
 * clear_inode() is a fine place to be called from). It removes inode from
 * transaction's lists.
 */
void jbd2_journal_release_jbd_inode(journal_t *journal,
				    struct jbd2_inode *jinode)
{ … }


#ifdef CONFIG_PROC_FS

#define JBD2_STATS_PROC_NAME …

static void __init jbd2_create_jbd_stats_proc_entry(void)
{ … }

static void __exit jbd2_remove_jbd_stats_proc_entry(void)
{ … }

#else

#define jbd2_create_jbd_stats_proc_entry …
#define jbd2_remove_jbd_stats_proc_entry …

#endif

struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;

static int __init jbd2_journal_init_inode_cache(void)
{ … }

static int __init jbd2_journal_init_handle_cache(void)
{ … }

static void jbd2_journal_destroy_inode_cache(void)
{ … }

static void jbd2_journal_destroy_handle_cache(void)
{ … }

/*
 * Module startup and shutdown
 */

static int __init journal_init_caches(void)
{ … }

static void jbd2_journal_destroy_caches(void)
{ … }

static int __init journal_init(void)
{ … }

static void __exit journal_exit(void)
{ … }

MODULE_DESCRIPTION(…) …;
MODULE_LICENSE(…) …;
module_init(…) …;
module_exit(journal_exit);