bmap.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
 */

#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/iomap.h>
#include <linux/ktime.h>

#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "glock.h"
#include "inode.h"
#include "meta_io.h"
#include "quota.h"
#include "rgrp.h"
#include "log.h"
#include "super.h"
#include "trans.h"
#include "dir.h"
#include "util.h"
#include "aops.h"
#include "trace_gfs2.h"

/* This doesn't need to be that large as max 64 bit pointers in a 4k
 * block is 512, so __u16 is fine for that. It saves stack space to
 * keep it small.
 */
struct metapath { … };

static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);

/**
 * gfs2_unstuffer_folio - unstuff a stuffed inode into a block cached by a folio
 * @ip: the inode
 * @dibh: the dinode buffer
 * @block: the block number that was allocated
 * @folio: The folio.
 *
 * Returns: errno
 */
static int gfs2_unstuffer_folio(struct gfs2_inode *ip, struct buffer_head *dibh,
			       u64 block, struct folio *folio)
{ … }

static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct folio *folio)
{ … }

/**
 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
 * @ip: The GFS2 inode to unstuff
 *
 * This routine unstuffs a dinode and returns it to a "normal" state such
 * that the height can be grown in the traditional way.
 *
 * Returns: errno
 */

int gfs2_unstuff_dinode(struct gfs2_inode *ip)
{ … }

/**
 * find_metapath - Find path through the metadata tree
 * @sdp: The superblock
 * @block: The disk block to look up
 * @mp: The metapath to return the result in
 * @height: The pre-calculated height of the metadata tree
 *
 *   This routine returns a struct metapath structure that defines a path
 *   through the metadata of inode "ip" to get to block "block".
 *
 *   Example:
 *   Given:  "ip" is a height 3 file, "offset" is 101342453, and this is a
 *   filesystem with a blocksize of 4096.
 *
 *   find_metapath() would return a struct metapath structure set to:
 *   mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
 *
 *   That means that in order to get to the block containing the byte at
 *   offset 101342453, we would load the indirect block pointed to by pointer
 *   0 in the dinode.  We would then load the indirect block pointed to by
 *   pointer 48 in that indirect block.  We would then load the data block
 *   pointed to by pointer 165 in that indirect block.
 *
 *             ----------------------------------------
 *             | Dinode |                             |
 *             |        |                            4|
 *             |        |0 1 2 3 4 5                 9|
 *             |        |                            6|
 *             ----------------------------------------
 *                       |
 *                       |
 *                       V
 *             ----------------------------------------
 *             | Indirect Block                       |
 *             |                                     5|
 *             |            4 4 4 4 4 5 5            1|
 *             |0           5 6 7 8 9 0 1            2|
 *             ----------------------------------------
 *                                |
 *                                |
 *                                V
 *             ----------------------------------------
 *             | Indirect Block                       |
 *             |                         1 1 1 1 1   5|
 *             |                         6 6 6 6 6   1|
 *             |0                        3 4 5 6 7   2|
 *             ----------------------------------------
 *                                           |
 *                                           |
 *                                           V
 *             ----------------------------------------
 *             | Data block containing offset         |
 *             |            101342453                 |
 *             |                                      |
 *             |                                      |
 *             ----------------------------------------
 *
 */

static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
			  struct metapath *mp, unsigned int height)
{ … }

static inline unsigned int metapath_branch_start(const struct metapath *mp)
{ … }

/**
 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
 * @height: The metadata height (0 = dinode)
 * @mp: The metapath
 */
static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
{ … }

/**
 * metapointer - Return pointer to start of metadata in a buffer
 * @height: The metadata height (0 = dinode)
 * @mp: The metapath
 *
 * Return a pointer to the block number of the next height of the metadata
 * tree given a buffer containing the pointer to the current height of the
 * metadata tree.
 */

static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
{ … }

static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
{ … }

static void clone_metapath(struct metapath *clone, struct metapath *mp)
{ … }

static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
{ … }

static inline struct buffer_head *
metapath_dibh(struct metapath *mp)
{ … }

static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
			     unsigned int x, unsigned int h)
{ … }

/**
 * lookup_metapath - Walk the metadata tree to a specific point
 * @ip: The inode
 * @mp: The metapath
 *
 * Assumes that the inode's buffer has already been looked up and
 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
 * by find_metapath().
 *
 * If this function encounters part of the tree which has not been
 * allocated, it returns the current height of the tree at the point
 * at which it found the unallocated block. Blocks which are found are
 * added to the mp->mp_bh[] list.
 *
 * Returns: error
 */

static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
{ … }

/**
 * fillup_metapath - fill up buffers for the metadata path to a specific height
 * @ip: The inode
 * @mp: The metapath
 * @h: The height to which it should be mapped
 *
 * Similar to lookup_metapath, but does lookups for a range of heights
 *
 * Returns: error or the number of buffers filled
 */

static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
{ … }

static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp)
{ … }

static void release_metapath(struct metapath *mp)
{ … }

/**
 * gfs2_extent_length - Returns length of an extent of blocks
 * @bh: The metadata block
 * @ptr: Current position in @bh
 * @eob: Set to 1 if we hit "end of block"
 *
 * Returns: The length of the extent (minimum of one block)
 */

static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, int *eob)
{ … }

enum walker_status { … };

/*
 * gfs2_metadata_walker - walk an indirect block
 * @mp: Metapath to indirect block
 * @ptrs: Number of pointers to look at
 *
 * When returning WALK_FOLLOW, the walker must update @mp to point at the right
 * indirect block to follow.
 */
gfs2_metadata_walker;

/*
 * gfs2_walk_metadata - walk a tree of indirect blocks
 * @inode: The inode
 * @mp: Starting point of walk
 * @max_len: Maximum number of blocks to walk
 * @walker: Called during the walk
 *
 * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or
 * past the end of metadata, and a negative error code otherwise.
 */

static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp,
		u64 max_len, gfs2_metadata_walker walker)
{ … }

static enum walker_status gfs2_hole_walker(struct metapath *mp,
					   unsigned int ptrs)
{ … }

/**
 * gfs2_hole_size - figure out the size of a hole
 * @inode: The inode
 * @lblock: The logical starting block number
 * @len: How far to look (in blocks)
 * @mp: The metapath at lblock
 * @iomap: The iomap to store the hole size in
 *
 * This function modifies @mp.
 *
 * Returns: errno on error
 */
static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
			  struct metapath *mp, struct iomap *iomap)
{ … }

static inline void gfs2_indirect_init(struct metapath *mp,
				      struct gfs2_glock *gl, unsigned int i,
				      unsigned offset, u64 bn)
{ … }

enum alloc_state { … };

/**
 * __gfs2_iomap_alloc - Build a metadata tree of the requested height
 * @inode: The GFS2 inode
 * @iomap: The iomap structure
 * @mp: The metapath, with proper height information calculated
 *
 * In this routine we may have to alloc:
 *   i) Indirect blocks to grow the metadata tree height
 *  ii) Indirect blocks to fill in lower part of the metadata tree
 * iii) Data blocks
 *
 * This function is called after __gfs2_iomap_get, which works out the
 * total number of blocks which we need via gfs2_alloc_size.
 *
 * We then do the actual allocation asking for an extent at a time (if
 * enough contiguous free blocks are available, there will only be one
 * allocation request per call) and uses the state machine to initialise
 * the blocks in order.
 *
 * Right now, this function will allocate at most one indirect block
 * worth of data -- with a default block size of 4K, that's slightly
 * less than 2M.  If this limitation is ever removed to allow huge
 * allocations, we would probably still want to limit the iomap size we
 * return to avoid stalling other tasks during huge writes; the next
 * iomap iteration would then find the blocks already allocated.
 *
 * Returns: errno on error
 */

static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
			      struct metapath *mp)
{ … }

#define IOMAP_F_GFS2_BOUNDARY …

/**
 * gfs2_alloc_size - Compute the maximum allocation size
 * @inode: The inode
 * @mp: The metapath
 * @size: Requested size in blocks
 *
 * Compute the maximum size of the next allocation at @mp.
 *
 * Returns: size in blocks
 */
static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
{ … }

/**
 * __gfs2_iomap_get - Map blocks from an inode to disk blocks
 * @inode: The inode
 * @pos: Starting position in bytes
 * @length: Length to map, in bytes
 * @flags: iomap flags
 * @iomap: The iomap structure
 * @mp: The metapath
 *
 * Returns: errno
 */
static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
			    unsigned flags, struct iomap *iomap,
			    struct metapath *mp)
{ … }

static struct folio *
gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len)
{ … }

static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
				 unsigned copied, struct folio *folio)
{ … }

static const struct iomap_folio_ops gfs2_iomap_folio_ops = …;

static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
				  loff_t length, unsigned flags,
				  struct iomap *iomap,
				  struct metapath *mp)
{ … }

static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
			    unsigned flags, struct iomap *iomap,
			    struct iomap *srcmap)
{ … }

static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
			  ssize_t written, unsigned flags, struct iomap *iomap)
{ … }

const struct iomap_ops gfs2_iomap_ops = …;

/**
 * gfs2_block_map - Map one or more blocks of an inode to a disk block
 * @inode: The inode
 * @lblock: The logical block number
 * @bh_map: The bh to be mapped
 * @create: True if its ok to alloc blocks to satify the request
 *
 * The size of the requested mapping is defined in bh_map->b_size.
 *
 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
 * when @lblock is not mapped.  Sets buffer_mapped(bh_map) and
 * bh_map->b_size to indicate the size of the mapping when @lblock and
 * successive blocks are mapped, up to the requested size.
 *
 * Sets buffer_boundary() if a read of metadata will be required
 * before the next block can be mapped. Sets buffer_new() if new
 * blocks were allocated.
 *
 * Returns: errno
 */

int gfs2_block_map(struct inode *inode, sector_t lblock,
		   struct buffer_head *bh_map, int create)
{ … }

int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock,
		    unsigned int *extlen)
{ … }

int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock,
		      unsigned int *extlen, bool *new)
{ … }

/*
 * NOTE: Never call gfs2_block_zero_range with an open transaction because it
 * uses iomap write to perform its actions, which begin their own transactions
 * (iomap_begin, get_folio, etc.)
 */
static int gfs2_block_zero_range(struct inode *inode, loff_t from,
				 unsigned int length)
{ … }

#define GFS2_JTRUNC_REVOKES …

/**
 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
 * @inode: The inode being truncated
 * @oldsize: The original (larger) size
 * @newsize: The new smaller size
 *
 * With jdata files, we have to journal a revoke for each block which is
 * truncated. As a result, we need to split this into separate transactions
 * if the number of pages being truncated gets too large.
 */

static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
{ … }

static int trunc_start(struct inode *inode, u64 newsize)
{ … }

int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
		   struct iomap *iomap)
{ … }

int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length,
		     struct iomap *iomap)
{ … }

/**
 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
 * @ip: inode
 * @rd_gh: holder of resource group glock
 * @bh: buffer head to sweep
 * @start: starting point in bh
 * @end: end point in bh
 * @meta: true if bh points to metadata (rather than data)
 * @btotal: place to keep count of total blocks freed
 *
 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
 * free, and free them all. However, we do it one rgrp at a time. If this
 * block has references to multiple rgrps, we break it into individual
 * transactions. This allows other processes to use the rgrps while we're
 * focused on a single one, for better concurrency / performance.
 * At every transaction boundary, we rewrite the inode into the journal.
 * That way the bitmaps are kept consistent with the inode and we can recover
 * if we're interrupted by power-outages.
 *
 * Returns: 0, or return code if an error occurred.
 *          *btotal has the total number of blocks freed
 */
static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
			      struct buffer_head *bh, __be64 *start, __be64 *end,
			      bool meta, u32 *btotal)
{ … }

static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
{ … }

/**
 * find_nonnull_ptr - find a non-null pointer given a metapath and height
 * @sdp: The superblock
 * @mp: starting metapath
 * @h: desired height to search
 * @end_list: See punch_hole().
 * @end_aligned: See punch_hole().
 *
 * Assumes the metapath is valid (with buffers) out to height h.
 * Returns: true if a non-null pointer was found in the metapath buffer
 *          false if all remaining pointers are NULL in the buffer
 */
static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
			     unsigned int h,
			     __u16 *end_list, unsigned int end_aligned)
{ … }

enum dealloc_states { … };

static inline void
metapointer_range(struct metapath *mp, int height,
		  __u16 *start_list, unsigned int start_aligned,
		  __u16 *end_list, unsigned int end_aligned,
		  __be64 **start, __be64 **end)
{ … }

static inline bool walk_done(struct gfs2_sbd *sdp,
			     struct metapath *mp, int height,
			     __u16 *end_list, unsigned int end_aligned)
{ … }

/**
 * punch_hole - deallocate blocks in a file
 * @ip: inode to truncate
 * @offset: the start of the hole
 * @length: the size of the hole (or 0 for truncate)
 *
 * Punch a hole into a file or truncate a file at a given position.  This
 * function operates in whole blocks (@offset and @length are rounded
 * accordingly); partially filled blocks must be cleared otherwise.
 *
 * This function works from the bottom up, and from the right to the left. In
 * other words, it strips off the highest layer (data) before stripping any of
 * the metadata. Doing it this way is best in case the operation is interrupted
 * by power failure, etc.  The dinode is rewritten in every transaction to
 * guarantee integrity.
 */
static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
{ … }

static int trunc_end(struct gfs2_inode *ip)
{ … }

/**
 * do_shrink - make a file smaller
 * @inode: the inode
 * @newsize: the size to make the file
 *
 * Called with an exclusive lock on @inode. The @size must
 * be equal to or smaller than the current inode size.
 *
 * Returns: errno
 */

static int do_shrink(struct inode *inode, u64 newsize)
{ … }

/**
 * do_grow - Touch and update inode size
 * @inode: The inode
 * @size: The new size
 *
 * This function updates the timestamps on the inode and
 * may also increase the size of the inode. This function
 * must not be called with @size any smaller than the current
 * inode size.
 *
 * Although it is not strictly required to unstuff files here,
 * earlier versions of GFS2 have a bug in the stuffed file reading
 * code which will result in a buffer overrun if the size is larger
 * than the max stuffed file size. In order to prevent this from
 * occurring, such files are unstuffed, but in other cases we can
 * just update the inode size directly.
 *
 * Returns: 0 on success, or -ve on error
 */

static int do_grow(struct inode *inode, u64 size)
{ … }

/**
 * gfs2_setattr_size - make a file a given size
 * @inode: the inode
 * @newsize: the size to make the file
 *
 * The file size can grow, shrink, or stay the same size. This
 * is called holding i_rwsem and an exclusive glock on the inode
 * in question.
 *
 * Returns: errno
 */

int gfs2_setattr_size(struct inode *inode, u64 newsize)
{ … }

int gfs2_truncatei_resume(struct gfs2_inode *ip)
{ … }

int gfs2_file_dealloc(struct gfs2_inode *ip)
{ … }

/**
 * gfs2_free_journal_extents - Free cached journal bmap info
 * @jd: The journal
 *
 */

void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
{ … }

/**
 * gfs2_add_jextent - Add or merge a new extent to extent cache
 * @jd: The journal descriptor
 * @lblock: The logical block at start of new extent
 * @dblock: The physical block at start of new extent
 * @blocks: Size of extent in fs blocks
 *
 * Returns: 0 on success or -ENOMEM
 */

static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
{ … }

/**
 * gfs2_map_journal_extents - Cache journal bmap info
 * @sdp: The super block
 * @jd: The journal to map
 *
 * Create a reusable "extent" mapping from all logical
 * blocks to all physical blocks for the given journal.  This will save
 * us time when writing journal blocks.  Most journals will have only one
 * extent that maps all their logical blocks.  That's because gfs2.mkfs
 * arranges the journal blocks sequentially to maximize performance.
 * So the extent would map the first block for the entire file length.
 * However, gfs2_jadd can happen while file activity is happening, so
 * those journals may not be sequential.  Less likely is the case where
 * the users created their own journals by mounting the metafs and
 * laying it out.  But it's still possible.  These journals might have
 * several extents.
 *
 * Returns: 0 on success, or error on failure
 */

int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
{ … }

/**
 * gfs2_write_alloc_required - figure out if a write will require an allocation
 * @ip: the file being written to
 * @offset: the offset to write to
 * @len: the number of bytes being written
 *
 * Returns: 1 if an alloc is required, 0 otherwise
 */

int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
			      unsigned int len)
{ … }

static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
{ … }

static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
					 loff_t length)
{ … }

int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
{ … }

static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode,
		loff_t offset, unsigned int len)
{ … }

const struct iomap_writeback_ops gfs2_writeback_ops = …;
linux/fs/gfs2/bmap.c