journal.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0-only
/*
 * This file is part of UBIFS.
 *
 * Copyright (C) 2006-2008 Nokia Corporation.
 *
 * Authors: Artem Bityutskiy (Битюцкий Артём)
 *          Adrian Hunter
 */

/*
 * This file implements UBIFS journal.
 *
 * The journal consists of 2 parts - the log and bud LEBs. The log has fixed
 * length and position, while a bud logical eraseblock is any LEB in the main
 * area. Buds contain file system data - data nodes, inode nodes, etc. The log
 * contains only references to buds and some other stuff like commit
 * start node. The idea is that when we commit the journal, we do
 * not copy the data, the buds just become indexed. Since after the commit the
 * nodes in bud eraseblocks become leaf nodes of the file system index tree, we
 * use term "bud". Analogy is obvious, bud eraseblocks contain nodes which will
 * become leafs in the future.
 *
 * The journal is multi-headed because we want to write data to the journal as
 * optimally as possible. It is nice to have nodes belonging to the same inode
 * in one LEB, so we may write data owned by different inodes to different
 * journal heads, although at present only one data head is used.
 *
 * For recovery reasons, the base head contains all inode nodes, all directory
 * entry nodes and all truncate nodes. This means that the other heads contain
 * only data nodes.
 *
 * Bud LEBs may be half-indexed. For example, if the bud was not full at the
 * time of commit, the bud is retained to continue to be used in the journal,
 * even though the "front" of the LEB is now indexed. In that case, the log
 * reference contains the offset where the bud starts for the purposes of the
 * journal.
 *
 * The journal size has to be limited, because the larger is the journal, the
 * longer it takes to mount UBIFS (scanning the journal) and the more memory it
 * takes (indexing in the TNC).
 *
 * All the journal write operations like 'ubifs_jnl_update()' here, which write
 * multiple UBIFS nodes to the journal at one go, are atomic with respect to
 * unclean reboots. Should the unclean reboot happen, the recovery code drops
 * all the nodes.
 */

#include "ubifs.h"

/**
 * zero_ino_node_unused - zero out unused fields of an on-flash inode node.
 * @ino: the inode to zero out
 */
static inline void zero_ino_node_unused(struct ubifs_ino_node *ino)
{ … }

/**
 * zero_dent_node_unused - zero out unused fields of an on-flash directory
 *                         entry node.
 * @dent: the directory entry to zero out
 */
static inline void zero_dent_node_unused(struct ubifs_dent_node *dent)
{ … }

/**
 * zero_trun_node_unused - zero out unused fields of an on-flash truncation
 *                         node.
 * @trun: the truncation node to zero out
 */
static inline void zero_trun_node_unused(struct ubifs_trun_node *trun)
{ … }

static void ubifs_add_auth_dirt(struct ubifs_info *c, int lnum)
{ … }

/**
 * reserve_space - reserve space in the journal.
 * @c: UBIFS file-system description object
 * @jhead: journal head number
 * @len: node length
 *
 * This function reserves space in journal head @head. If the reservation
 * succeeded, the journal head stays locked and later has to be unlocked using
 * 'release_head()'. Returns zero in case of success, %-EAGAIN if commit has to
 * be done, and other negative error codes in case of other failures.
 */
static int reserve_space(struct ubifs_info *c, int jhead, int len)
{ … }

static int ubifs_hash_nodes(struct ubifs_info *c, void *node,
			     int len, struct shash_desc *hash)
{ … }

/**
 * write_head - write data to a journal head.
 * @c: UBIFS file-system description object
 * @jhead: journal head
 * @buf: buffer to write
 * @len: length to write
 * @lnum: LEB number written is returned here
 * @offs: offset written is returned here
 * @sync: non-zero if the write-buffer has to by synchronized
 *
 * This function writes data to the reserved space of journal head @jhead.
 * Returns zero in case of success and a negative error code in case of
 * failure.
 */
static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
		      int *lnum, int *offs, int sync)
{ … }

/**
 * __queue_and_wait - queue a task and wait until the task is waked up.
 * @c: UBIFS file-system description object
 *
 * This function adds current task in queue and waits until the task is waked
 * up. This function should be called with @c->reserve_space_wq locked.
 */
static void __queue_and_wait(struct ubifs_info *c)
{ … }

/**
 * wait_for_reservation - try queuing current task to wait until waked up.
 * @c: UBIFS file-system description object
 *
 * This function queues current task to wait until waked up, if queuing is
 * started(@c->need_wait_space is not %0). Returns %true if current task is
 * added in queue, otherwise %false is returned.
 */
static bool wait_for_reservation(struct ubifs_info *c)
{ … }

/**
 * wake_up_reservation - wake up first task in queue or stop queuing.
 * @c: UBIFS file-system description object
 *
 * This function wakes up the first task in queue if it exists, or stops
 * queuing if no tasks in queue.
 */
static void wake_up_reservation(struct ubifs_info *c)
{ … }

/**
 * add_or_start_queue - add current task in queue or start queuing.
 * @c: UBIFS file-system description object
 *
 * This function starts queuing if queuing is not started, otherwise adds
 * current task in queue.
 */
static void add_or_start_queue(struct ubifs_info *c)
{ … }

/**
 * make_reservation - reserve journal space.
 * @c: UBIFS file-system description object
 * @jhead: journal head
 * @len: how many bytes to reserve
 *
 * This function makes space reservation in journal head @jhead. The function
 * takes the commit lock and locks the journal head, and the caller has to
 * unlock the head and finish the reservation with 'finish_reservation()'.
 * Returns zero in case of success and a negative error code in case of
 * failure.
 *
 * Note, the journal head may be unlocked as soon as the data is written, while
 * the commit lock has to be released after the data has been added to the
 * TNC.
 */
static int make_reservation(struct ubifs_info *c, int jhead, int len)
{ … }

/**
 * release_head - release a journal head.
 * @c: UBIFS file-system description object
 * @jhead: journal head
 *
 * This function releases journal head @jhead which was locked by
 * the 'make_reservation()' function. It has to be called after each successful
 * 'make_reservation()' invocation.
 */
static inline void release_head(struct ubifs_info *c, int jhead)
{ … }

/**
 * finish_reservation - finish a reservation.
 * @c: UBIFS file-system description object
 *
 * This function finishes journal space reservation. It must be called after
 * 'make_reservation()'.
 */
static void finish_reservation(struct ubifs_info *c)
{ … }

/**
 * get_dent_type - translate VFS inode mode to UBIFS directory entry type.
 * @mode: inode mode
 */
static int get_dent_type(int mode)
{ … }

/**
 * pack_inode - pack an inode node.
 * @c: UBIFS file-system description object
 * @ino: buffer in which to pack inode node
 * @inode: inode to pack
 * @last: indicates the last node of the group
 */
static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
		       const struct inode *inode, int last)
{ … }

/**
 * mark_inode_clean - mark UBIFS inode as clean.
 * @c: UBIFS file-system description object
 * @ui: UBIFS inode to mark as clean
 *
 * This helper function marks UBIFS inode @ui as clean by cleaning the
 * @ui->dirty flag and releasing its budget. Note, VFS may still treat the
 * inode as dirty and try to write it back, but 'ubifs_write_inode()' would
 * just do nothing.
 */
static void mark_inode_clean(struct ubifs_info *c, struct ubifs_inode *ui)
{ … }

static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent)
{ … }

/**
 * ubifs_jnl_update - update inode.
 * @c: UBIFS file-system description object
 * @dir: parent inode or host inode in case of extended attributes
 * @nm: directory entry name
 * @inode: inode to update
 * @deletion: indicates a directory entry deletion i.e unlink or rmdir
 * @xent: non-zero if the directory entry is an extended attribute entry
 * @in_orphan: indicates whether the @inode is in orphan list
 *
 * This function updates an inode by writing a directory entry (or extended
 * attribute entry), the inode itself, and the parent directory inode (or the
 * host inode) to the journal.
 *
 * The function writes the host inode @dir last, which is important in case of
 * extended attributes. Indeed, then we guarantee that if the host inode gets
 * synchronized (with 'fsync()'), and the write-buffer it sits in gets flushed,
 * the extended attribute inode gets flushed too. And this is exactly what the
 * user expects - synchronizing the host inode synchronizes its extended
 * attributes. Similarly, this guarantees that if @dir is synchronized, its
 * directory entry corresponding to @nm gets synchronized too.
 *
 * If the inode (@inode) or the parent directory (@dir) are synchronous, this
 * function synchronizes the write-buffer.
 *
 * This function marks the @dir and @inode inodes as clean and returns zero on
 * success. In case of failure, a negative error code is returned.
 */
int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
		     const struct fscrypt_name *nm, const struct inode *inode,
		     int deletion, int xent, int in_orphan)
{ … }

/**
 * ubifs_jnl_write_data - write a data node to the journal.
 * @c: UBIFS file-system description object
 * @inode: inode the data node belongs to
 * @key: node key
 * @buf: buffer to write
 * @len: data length (must not exceed %UBIFS_BLOCK_SIZE)
 *
 * This function writes a data node to the journal. Returns %0 if the data node
 * was successfully written, and a negative error code in case of failure.
 */
int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
			 const union ubifs_key *key, const void *buf, int len)
{ … }

/**
 * ubifs_jnl_write_inode - flush inode to the journal.
 * @c: UBIFS file-system description object
 * @inode: inode to flush
 *
 * This function writes inode @inode to the journal. If the inode is
 * synchronous, it also synchronizes the write-buffer. Returns zero in case of
 * success and a negative error code in case of failure.
 */
int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
{ … }

/**
 * ubifs_jnl_delete_inode - delete an inode.
 * @c: UBIFS file-system description object
 * @inode: inode to delete
 *
 * This function deletes inode @inode which includes removing it from orphans,
 * deleting it from TNC and, in some cases, writing a deletion inode to the
 * journal.
 *
 * When regular file inodes are unlinked or a directory inode is removed, the
 * 'ubifs_jnl_update()' function writes a corresponding deletion inode and
 * direntry to the media, and adds the inode to orphans. After this, when the
 * last reference to this inode has been dropped, this function is called. In
 * general, it has to write one more deletion inode to the media, because if
 * a commit happened between 'ubifs_jnl_update()' and
 * 'ubifs_jnl_delete_inode()', the deletion inode is not in the journal
 * anymore, and in fact it might not be on the flash anymore, because it might
 * have been garbage-collected already. And for optimization reasons UBIFS does
 * not read the orphan area if it has been unmounted cleanly, so it would have
 * no indication in the journal that there is a deleted inode which has to be
 * removed from TNC.
 *
 * However, if there was no commit between 'ubifs_jnl_update()' and
 * 'ubifs_jnl_delete_inode()', then there is no need to write the deletion
 * inode to the media for the second time. And this is quite a typical case.
 *
 * This function returns zero in case of success and a negative error code in
 * case of failure.
 */
int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode)
{ … }

/**
 * ubifs_jnl_xrename - cross rename two directory entries.
 * @c: UBIFS file-system description object
 * @fst_dir: parent inode of 1st directory entry to exchange
 * @fst_inode: 1st inode to exchange
 * @fst_nm: name of 1st inode to exchange
 * @snd_dir: parent inode of 2nd directory entry to exchange
 * @snd_inode: 2nd inode to exchange
 * @snd_nm: name of 2nd inode to exchange
 * @sync: non-zero if the write-buffer has to be synchronized
 *
 * This function implements the cross rename operation which may involve
 * writing 2 inodes and 2 directory entries. It marks the written inodes as clean
 * and returns zero on success. In case of failure, a negative error code is
 * returned.
 */
int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
		      const struct inode *fst_inode,
		      const struct fscrypt_name *fst_nm,
		      const struct inode *snd_dir,
		      const struct inode *snd_inode,
		      const struct fscrypt_name *snd_nm, int sync)
{ … }

/**
 * ubifs_jnl_rename - rename a directory entry.
 * @c: UBIFS file-system description object
 * @old_dir: parent inode of directory entry to rename
 * @old_inode: directory entry's inode to rename
 * @old_nm: name of the old directory entry to rename
 * @new_dir: parent inode of directory entry to rename
 * @new_inode: new directory entry's inode (or directory entry's inode to
 *		replace)
 * @new_nm: new name of the new directory entry
 * @whiteout: whiteout inode
 * @sync: non-zero if the write-buffer has to be synchronized
 * @delete_orphan: indicates an orphan entry deletion for @whiteout
 *
 * This function implements the re-name operation which may involve writing up
 * to 4 inodes(new inode, whiteout inode, old and new parent directory inodes)
 * and 2 directory entries. It marks the written inodes as clean and returns
 * zero on success. In case of failure, a negative error code is returned.
 */
int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
		     const struct inode *old_inode,
		     const struct fscrypt_name *old_nm,
		     const struct inode *new_dir,
		     const struct inode *new_inode,
		     const struct fscrypt_name *new_nm,
		     const struct inode *whiteout, int sync, int delete_orphan)
{ … }

/**
 * truncate_data_node - re-compress/encrypt a truncated data node.
 * @c: UBIFS file-system description object
 * @inode: inode which refers to the data node
 * @block: data block number
 * @dn: data node to re-compress
 * @new_len: new length
 * @dn_size: size of the data node @dn in memory
 *
 * This function is used when an inode is truncated and the last data node of
 * the inode has to be re-compressed/encrypted and re-written.
 */
static int truncate_data_node(const struct ubifs_info *c, const struct inode *inode,
			      unsigned int block, struct ubifs_data_node *dn,
			      int *new_len, int dn_size)
{ … }

/**
 * ubifs_jnl_truncate - update the journal for a truncation.
 * @c: UBIFS file-system description object
 * @inode: inode to truncate
 * @old_size: old size
 * @new_size: new size
 *
 * When the size of a file decreases due to truncation, a truncation node is
 * written, the journal tree is updated, and the last data block is re-written
 * if it has been affected. The inode is also updated in order to synchronize
 * the new inode size.
 *
 * This function marks the inode as clean and returns zero on success. In case
 * of failure, a negative error code is returned.
 */
int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
		       loff_t old_size, loff_t new_size)
{ … }


/**
 * ubifs_jnl_delete_xattr - delete an extended attribute.
 * @c: UBIFS file-system description object
 * @host: host inode
 * @inode: extended attribute inode
 * @nm: extended attribute entry name
 *
 * This function delete an extended attribute which is very similar to
 * un-linking regular files - it writes a deletion xentry, a deletion inode and
 * updates the target inode. Returns zero in case of success and a negative
 * error code in case of failure.
 */
int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
			   const struct inode *inode,
			   const struct fscrypt_name *nm)
{ … }

/**
 * ubifs_jnl_change_xattr - change an extended attribute.
 * @c: UBIFS file-system description object
 * @inode: extended attribute inode
 * @host: host inode
 *
 * This function writes the updated version of an extended attribute inode and
 * the host inode to the journal (to the base head). The host inode is written
 * after the extended attribute inode in order to guarantee that the extended
 * attribute will be flushed when the inode is synchronized by 'fsync()' and
 * consequently, the write-buffer is synchronized. This function returns zero
 * in case of success and a negative error code in case of failure.
 */
int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
			   const struct inode *host)
{ … }
linux/fs/ubifs/journal.c