// SPDX-License-Identifier: GPL-2.0 /* * Write ahead logging implementation copyright Chris Mason 2000 * * The background commits make this code very interrelated, and * overly complex. I need to rethink things a bit....The major players: * * journal_begin -- call with the number of blocks you expect to log. * If the current transaction is too * old, it will block until the current transaction is * finished, and then start a new one. * Usually, your transaction will get joined in with * previous ones for speed. * * journal_join -- same as journal_begin, but won't block on the current * transaction regardless of age. Don't ever call * this. Ever. There are only two places it should be * called from, and they are both inside this file. * * journal_mark_dirty -- adds blocks into this transaction. clears any flags * that might make them get sent to disk * and then marks them BH_JDirty. Puts the buffer head * into the current transaction hash. * * journal_end -- if the current transaction is batchable, it does nothing * otherwise, it could do an async/synchronous commit, or * a full flush of all log and real blocks in the * transaction. * * flush_old_commits -- if the current transaction is too old, it is ended and * commit blocks are sent to disk. Forces commit blocks * to disk for all backgrounded commits that have been * around too long. * -- Note, if you call this as an immediate flush from * within kupdate, it will ignore the immediate flag */ #include <linux/time.h> #include <linux/semaphore.h> #include <linux/vmalloc.h> #include "reiserfs.h" #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/buffer_head.h> #include <linux/workqueue.h> #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/uaccess.h> #include <linux/slab.h> /* gets a struct reiserfs_journal_list * from a list head */ #define JOURNAL_LIST_ENTRY(h) … /* must be correct to keep the desc and commit structs at 4k */ #define JOURNAL_TRANS_HALF … #define BUFNR … /* cnode stat bits. Move these into reiserfs_fs.h */ /* this block was freed, and can't be written. */ #define BLOCK_FREED … /* this block was freed during this transaction, and can't be written */ #define BLOCK_FREED_HOLDER … /* used in flush_journal_list */ #define BLOCK_NEEDS_FLUSH … #define BLOCK_DIRTIED … /* journal list state bits */ #define LIST_TOUCHED … #define LIST_DIRTY … #define LIST_COMMIT_PENDING … /* flags for do_journal_end */ #define FLUSH_ALL … #define COMMIT_NOW … #define WAIT … static int do_journal_end(struct reiserfs_transaction_handle *, int flags); static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall); static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall); static int can_dirty(struct reiserfs_journal_cnode *cn); static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *sb); static void release_journal_dev(struct reiserfs_journal *journal); static void dirty_one_transaction(struct super_block *s, struct reiserfs_journal_list *jl); static void flush_async_commits(struct work_struct *work); static void queue_log_writer(struct super_block *s); /* values for join in do_journal_begin_r */ enum { … }; static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct super_block *sb, unsigned long nblocks, int join); static void init_journal_hash(struct super_block *sb) { … } /* * clears BH_Dirty and sticks the buffer on the clean list. Called because * I can't allow refile_buffer to make schedule happen after I've freed a * block. Look at remove_from_transaction and journal_mark_freed for * more details. */ static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) { … } static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block *sb) { … } static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb) { … } static inline void free_bitmap_node(struct super_block *sb, struct reiserfs_bitmap_node *bn) { … } static void allocate_bitmap_nodes(struct super_block *sb) { … } static int set_bit_in_list_bitmap(struct super_block *sb, b_blocknr_t block, struct reiserfs_list_bitmap *jb) { … } static void cleanup_bitmap_list(struct super_block *sb, struct reiserfs_list_bitmap *jb) { … } /* * only call this on FS unmount. */ static int free_list_bitmaps(struct super_block *sb, struct reiserfs_list_bitmap *jb_array) { … } static int free_bitmap_nodes(struct super_block *sb) { … } /* * get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. * jb_array is the array to be filled in. */ int reiserfs_allocate_list_bitmaps(struct super_block *sb, struct reiserfs_list_bitmap *jb_array, unsigned int bmap_nr) { … } /* * find an available list bitmap. If you can't find one, flush a commit list * and try again */ static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, struct reiserfs_journal_list *jl) { … } /* * allocates a new chunk of X nodes, and links them all together as a list. * Uses the cnode->next and cnode->prev pointers * returns NULL on failure */ static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) { … } /* pulls a cnode off the free list, or returns NULL on failure */ static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) { … } /* * returns a cnode to the free list */ static void free_cnode(struct super_block *sb, struct reiserfs_journal_cnode *cn) { … } static void clear_prepared_bits(struct buffer_head *bh) { … } /* * return a cnode with same dev, block number and size in table, * or null if not found */ static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct super_block *sb, struct reiserfs_journal_cnode **table, long bl) { … } /* * this actually means 'can this block be reallocated yet?'. If you set * search_all, a block can only be allocated if it is not in the current * transaction, was not freed by the current transaction, and has no chance * of ever being overwritten by a replay after crashing. * * If you don't set search_all, a block can only be allocated if it is not * in the current transaction. Since deleting a block removes it from the * current transaction, this case should never happen. If you don't set * search_all, make sure you never write the block without logging it. * * next_zero_bit is a suggestion about the next block to try for find_forward. * when bl is rejected because it is set in a journal list bitmap, we search * for the next zero bit in the bitmap that rejected bl. Then, we return * that through next_zero_bit for find_forward to try. * * Just because we return something in next_zero_bit does not mean we won't * reject it on the next call to reiserfs_in_journal */ int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr, int bit_nr, int search_all, b_blocknr_t * next_zero_bit) { … } /* insert cn into table */ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, struct reiserfs_journal_cnode *cn) { … } /* lock the current transaction */ static inline void lock_journal(struct super_block *sb) { … } /* unlock the current transaction */ static inline void unlock_journal(struct super_block *sb) { … } static inline void get_journal_list(struct reiserfs_journal_list *jl) { … } static inline void put_journal_list(struct super_block *s, struct reiserfs_journal_list *jl) { … } /* * this used to be much more involved, and I'm keeping it just in case * things get ugly again. it gets called by flush_commit_list, and * cleans up any data stored about blocks freed during a transaction. */ static void cleanup_freed_for_journal_list(struct super_block *sb, struct reiserfs_journal_list *jl) { … } static int journal_list_still_alive(struct super_block *s, unsigned int trans_id) { … } /* * If page->mapping was null, we failed to truncate this page for * some reason. Most likely because it was truncated after being * logged via data=journal. * * This does a check to see if the buffer belongs to one of these * lost pages before doing the final put_bh. If page->mapping was * null, it tries to free buffers on the page, which should make the * final put_page drop the page from the lru. */ static void release_buffer_page(struct buffer_head *bh) { … } static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) { … } static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) { … } static void submit_logged_buffer(struct buffer_head *bh) { … } static void submit_ordered_buffer(struct buffer_head *bh) { … } #define CHUNK_SIZE … struct buffer_chunk { … }; static void write_chunk(struct buffer_chunk *chunk) { … } static void write_ordered_chunk(struct buffer_chunk *chunk) { … } static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, spinlock_t * lock, void (fn) (struct buffer_chunk *)) { … } static atomic_t nr_reiserfs_jh = …; static struct reiserfs_jh *alloc_jh(void) { … } /* * we want to free the jh when the buffer has been written * and waited on */ void reiserfs_free_jh(struct buffer_head *bh) { … } static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, int tail) { … } int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) { … } int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) { … } #define JH_ENTRY(l) … static int write_ordered_buffers(spinlock_t * lock, struct reiserfs_journal *j, struct reiserfs_journal_list *jl, struct list_head *list) { … } static int flush_older_commits(struct super_block *s, struct reiserfs_journal_list *jl) { … } static int reiserfs_async_progress_wait(struct super_block *s) { … } /* * if this journal list still has commit blocks unflushed, send them to disk. * * log areas must be flushed in order (transaction 2 can't commit before * transaction 1) Before the commit block can by written, every other log * block must be safely on disk */ static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) { … } /* * flush_journal_list frequently needs to find a newer transaction for a * given block. This does that, or returns NULL if it can't find anything */ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct reiserfs_journal_cnode *cn) { … } static void remove_journal_hash(struct super_block *, struct reiserfs_journal_cnode **, struct reiserfs_journal_list *, unsigned long, int); /* * once all the real blocks have been flushed, it is safe to remove them * from the journal list for this transaction. Aside from freeing the * cnode, this also allows the block to be reallocated for data blocks * if it had been deleted. */ static void remove_all_from_journal_list(struct super_block *sb, struct reiserfs_journal_list *jl, int debug) { … } /* * if this timestamp is greater than the timestamp we wrote last to the * header block, write it to the header block. once this is done, I can * safely say the log area for this transaction won't ever be replayed, * and I can start releasing blocks in this transaction for reuse as data * blocks. called by flush_journal_list, before it calls * remove_all_from_journal_list */ static int _update_journal_header_block(struct super_block *sb, unsigned long offset, unsigned int trans_id) { … } static int update_journal_header_block(struct super_block *sb, unsigned long offset, unsigned int trans_id) { … } /* ** flush any and all journal lists older than you are ** can only be called from flush_journal_list */ static int flush_older_journal_lists(struct super_block *sb, struct reiserfs_journal_list *jl) { … } static void del_from_work_list(struct super_block *s, struct reiserfs_journal_list *jl) { … } /* * flush a journal list, both commit and real blocks * * always set flushall to 1, unless you are calling from inside * flush_journal_list * * IMPORTANT. This can only be called while there are no journal writers, * and the journal is locked. That means it can only be called from * do_journal_end, or by journal_release */ static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) { … } static int write_one_transaction(struct super_block *s, struct reiserfs_journal_list *jl, struct buffer_chunk *chunk) { … } /* used by flush_commit_list */ static void dirty_one_transaction(struct super_block *s, struct reiserfs_journal_list *jl) { … } static int kupdate_transactions(struct super_block *s, struct reiserfs_journal_list *jl, struct reiserfs_journal_list **next_jl, unsigned int *next_trans_id, int num_blocks, int num_trans) { … } /* * for o_sync and fsync heavy applications, they tend to use * all the journa list slots with tiny transactions. These * trigger lots and lots of calls to update the header block, which * adds seeks and slows things down. * * This function tries to clear out a large chunk of the journal lists * at once, which makes everything faster since only the newest journal * list updates the header block */ static int flush_used_journal_lists(struct super_block *s, struct reiserfs_journal_list *jl) { … } /* * removes any nodes in table with name block and dev as bh. * only touchs the hnext and hprev pointers. */ static void remove_journal_hash(struct super_block *sb, struct reiserfs_journal_cnode **table, struct reiserfs_journal_list *jl, unsigned long block, int remove_freed) { … } static void free_journal_ram(struct super_block *sb) { … } /* * call on unmount. Only set error to 1 if you haven't made your way out * of read_super() yet. Any other caller must keep error at 0. */ static int do_journal_release(struct reiserfs_transaction_handle *th, struct super_block *sb, int error) { … } /* * call on unmount. flush all journal trans, release all alloc'd ram */ int journal_release(struct reiserfs_transaction_handle *th, struct super_block *sb) { … } /* only call from an error condition inside reiserfs_read_super! */ int journal_release_error(struct reiserfs_transaction_handle *th, struct super_block *sb) { … } /* * compares description block with commit block. * returns 1 if they differ, 0 if they are the same */ static int journal_compare_desc_commit(struct super_block *sb, struct reiserfs_journal_desc *desc, struct reiserfs_journal_commit *commit) { … } /* * returns 0 if it did not find a description block * returns -1 if it found a corrupt commit block * returns 1 if both desc and commit were valid * NOTE: only called during fs mount */ static int journal_transaction_is_valid(struct super_block *sb, struct buffer_head *d_bh, unsigned int *oldest_invalid_trans_id, unsigned long *newest_mount_id) { … } static void brelse_array(struct buffer_head **heads, int num) { … } /* * given the start, and values for the oldest acceptable transactions, * this either reads in a replays a transaction, or returns because the * transaction is invalid, or too old. * NOTE: only called during fs mount */ static int journal_read_transaction(struct super_block *sb, unsigned long cur_dblock, unsigned long oldest_start, unsigned int oldest_trans_id, unsigned long newest_mount_id) { … } /* * This function reads blocks starting from block and to max_block of bufsize * size (but no more than BUFNR blocks at a time). This proved to improve * mounting speed on self-rebuilding raid5 arrays at least. * Right now it is only used from journal code. But later we might use it * from other places. * Note: Do not use journal_getblk/sb_getblk functions here! */ static struct buffer_head *reiserfs_breada(struct block_device *dev, b_blocknr_t block, int bufsize, b_blocknr_t max_block) { … } /* * read and replay the log * on a clean unmount, the journal header's next unflushed pointer will be * to an invalid transaction. This tests that before finding all the * transactions in the log, which makes normal mount times fast. * * After a crash, this starts with the next unflushed transaction, and * replays until it finds one too old, or invalid. * * On exit, it sets things up so the first transaction will work correctly. * NOTE: only called during fs mount */ static int journal_read(struct super_block *sb) { … } static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) { … } static void journal_list_init(struct super_block *sb) { … } static void release_journal_dev(struct reiserfs_journal *journal) { … } static int journal_init_dev(struct super_block *super, struct reiserfs_journal *journal, const char *jdev_name) { … } /* * When creating/tuning a file system user can assign some * journal params within boundaries which depend on the ratio * blocksize/standard_blocksize. * * For blocks >= standard_blocksize transaction size should * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more * then JOURNAL_TRANS_MAX_DEFAULT. * * For blocks < standard_blocksize these boundaries should be * decreased proportionally. */ #define REISERFS_STANDARD_BLKSIZE … static int check_advise_trans_params(struct super_block *sb, struct reiserfs_journal *journal) { … } /* must be called once on fs mount. calls journal_read for you */ int journal_init(struct super_block *sb, const char *j_dev_name, int old_format, unsigned int commit_max_age) { … } /* * test for a polite end of the current transaction. Used by file_write, * and should be used by delete to make sure they don't write more than * can fit inside a single transaction */ int journal_transaction_should_end(struct reiserfs_transaction_handle *th, int new_alloc) { … } /* this must be called inside a transaction */ void reiserfs_block_writes(struct reiserfs_transaction_handle *th) { … } /* this must be called without a transaction started */ void reiserfs_allow_writes(struct super_block *s) { … } /* this must be called without a transaction started */ void reiserfs_wait_on_write_block(struct super_block *s) { … } static void queue_log_writer(struct super_block *s) { … } static void wake_queued_writers(struct super_block *s) { … } static void let_transaction_grow(struct super_block *sb, unsigned int trans_id) { … } /* * join == true if you must join an existing transaction. * join == false if you can deal with waiting for others to finish * * this will block until the transaction is joinable. send the number of * blocks you expect to use in nblocks. */ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct super_block *sb, unsigned long nblocks, int join) { … } struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct super_block *s, int nblocks) { … } int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) { … } static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *sb) { … } int journal_join_abort(struct reiserfs_transaction_handle *th, struct super_block *sb) { … } int journal_begin(struct reiserfs_transaction_handle *th, struct super_block *sb, unsigned long nblocks) { … } /* * puts bh into the current transaction. If it was already there, reorders * removes the old pointers from the hash, and puts new ones in (to make * sure replay happen in the right order). * * if it was dirty, cleans and files onto the clean list. I can't let it * be dirty again until the transaction is committed. * * if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. */ int journal_mark_dirty(struct reiserfs_transaction_handle *th, struct buffer_head *bh) { … } int journal_end(struct reiserfs_transaction_handle *th) { … } /* * removes from the current transaction, relsing and descrementing any counters. * also files the removed buffer directly onto the clean list * * called by journal_mark_freed when a block has been deleted * * returns 1 if it cleaned and relsed the buffer. 0 otherwise */ static int remove_from_transaction(struct super_block *sb, b_blocknr_t blocknr, int already_cleaned) { … } /* * for any cnode in a journal list, it can only be dirtied of all the * transactions that include it are committed to disk. * this checks through each transaction, and returns 1 if you are allowed * to dirty, and 0 if you aren't * * it is called by dirty_journal_list, which is called after * flush_commit_list has gotten all the log blocks for a given * transaction on disk * */ static int can_dirty(struct reiserfs_journal_cnode *cn) { … } /* * syncs the commit blocks, but does not force the real buffers to disk * will wait until the current transaction is done/committed before returning */ int journal_end_sync(struct reiserfs_transaction_handle *th) { … } /* writeback the pending async commits to disk */ static void flush_async_commits(struct work_struct *work) { … } /* * flushes any old transactions to disk * ends the current transaction if it is too old */ void reiserfs_flush_old_commits(struct super_block *sb) { … } /* * returns 0 if do_journal_end should return right away, returns 1 if * do_journal_end should finish the commit * * if the current transaction is too old, but still has writers, this will * wait on j_join_wait until all the writers are done. By the time it * wakes up, the transaction it was called has already ended, so it just * flushes the commit list and returns 0. * * Won't batch when flush or commit_now is set. Also won't batch when * others are waiting on j_join_wait. * * Note, we can't allow the journal_end to proceed while there are still * writers in the log. */ static int check_journal_end(struct reiserfs_transaction_handle *th, int flags) { … } /* * Does all the work that makes deleting blocks safe. * when deleting a block mark BH_JNew, just remove it from the current * transaction, clean it's buffer_head and move on. * * otherwise: * set a bit for the block in the journal bitmap. That will prevent it from * being allocated for unformatted nodes before this transaction has finished. * * mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. * That will prevent any old transactions with this block from trying to flush * to the real location. Since we aren't removing the cnode from the * journal_list_hash, *the block can't be reallocated yet. * * Then remove it from the current transaction, decrementing any counters and * filing it on the clean list. */ int journal_mark_freed(struct reiserfs_transaction_handle *th, struct super_block *sb, b_blocknr_t blocknr) { … } void reiserfs_update_inode_transaction(struct inode *inode) { … } /* * returns -1 on error, 0 if no commits/barriers were done and 1 * if a transaction was actually committed and the barrier was done */ static int __commit_trans_jl(struct inode *inode, unsigned long id, struct reiserfs_journal_list *jl) { … } int reiserfs_commit_for_inode(struct inode *inode) { … } void reiserfs_restore_prepared_buffer(struct super_block *sb, struct buffer_head *bh) { … } extern struct tree_balance *cur_tb; /* * before we can change a metadata block, we have to make sure it won't * be written to disk while we are altering it. So, we must: * clean it * wait on it. */ int reiserfs_prepare_for_journal(struct super_block *sb, struct buffer_head *bh, int wait) { … } /* * long and ugly. If flush, will not return until all commit * blocks and all real buffers in the trans are on disk. * If no_async, won't return until all commit blocks are on disk. * * keep reading, there are comments as you go along * * If the journal is aborted, we just clean up. Things like flushing * journal lists, etc just won't happen. */ static int do_journal_end(struct reiserfs_transaction_handle *th, int flags) { … } /* Send the file system read only and refuse new transactions */ void reiserfs_abort_journal(struct super_block *sb, int errno) { … }