file.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/falloc.h>
#include <linux/writeback.h>
#include <linux/compat.h>
#include <linux/slab.h>
#include <linux/btrfs.h>
#include <linux/uio.h>
#include <linux/iversion.h>
#include <linux/fsverity.h>
#include "ctree.h"
#include "direct-io.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "tree-log.h"
#include "locking.h"
#include "qgroup.h"
#include "compression.h"
#include "delalloc-space.h"
#include "reflink.h"
#include "subpage.h"
#include "fs.h"
#include "accessors.h"
#include "extent-tree.h"
#include "file-item.h"
#include "ioctl.h"
#include "file.h"
#include "super.h"

/* simple helper to fault in pages and copy.  This should go away
 * and be replaced with calls into generic code.
 */
static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
					 struct page **prepared_pages,
					 struct iov_iter *i)
{ … }

/*
 * unlocks pages after btrfs_file_write is done with them
 */
static void btrfs_drop_pages(struct btrfs_fs_info *fs_info,
			     struct page **pages, size_t num_pages,
			     u64 pos, u64 copied)
{ … }

/*
 * After btrfs_copy_from_user(), update the following things for delalloc:
 * - Mark newly dirtied pages as DELALLOC in the io tree.
 *   Used to advise which range is to be written back.
 * - Mark modified pages as Uptodate/Dirty and not needing COW fixup
 * - Update inode size for past EOF write
 */
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
		      size_t num_pages, loff_t pos, size_t write_bytes,
		      struct extent_state **cached, bool noreserve)
{ … }

/*
 * this is very complex, but the basic idea is to drop all extents
 * in the range start - end.  hint_block is filled in with a block number
 * that would be a good hint to the block allocator for this file.
 *
 * If an extent intersects the range but is not entirely inside the range
 * it is either truncated or split.  Anything entirely inside the range
 * is deleted from the tree.
 *
 * Note: the VFS' inode number of bytes is not updated, it's up to the caller
 * to deal with that. We set the field 'bytes_found' of the arguments structure
 * with the number of allocated bytes found in the target range, so that the
 * caller can update the inode's number of bytes in an atomic way when
 * replacing extents in a range to avoid races with stat(2).
 */
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root, struct btrfs_inode *inode,
		       struct btrfs_drop_extents_args *args)
{ … }

static int extent_mergeable(struct extent_buffer *leaf, int slot,
			    u64 objectid, u64 bytenr, u64 orig_offset,
			    u64 *start, u64 *end)
{ … }

/*
 * Mark extent in the range start - end as written.
 *
 * This changes extent type from 'pre-allocated' to 'regular'. If only
 * part of extent is marked as written, the extent will be split into
 * two or three.
 */
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
			      struct btrfs_inode *inode, u64 start, u64 end)
{ … }

/*
 * on error we return an unlocked page and the error value
 * on success we return a locked page and 0
 */
static int prepare_uptodate_page(struct inode *inode,
				 struct page *page, u64 pos,
				 bool force_uptodate)
{ … }

static fgf_t get_prepare_fgp_flags(bool nowait)
{ … }

static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait)
{ … }

/*
 * this just gets pages into the page cache and locks them down.
 */
static noinline int prepare_pages(struct inode *inode, struct page **pages,
				  size_t num_pages, loff_t pos,
				  size_t write_bytes, bool force_uptodate,
				  bool nowait)
{ … }

/*
 * This function locks the extent and properly waits for data=ordered extents
 * to finish before allowing the pages to be modified if need.
 *
 * The return value:
 * 1 - the extent is locked
 * 0 - the extent is not locked, and everything is OK
 * -EAGAIN - need re-prepare the pages
 * the other < 0 number - Something wrong happens
 */
static noinline int
lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
				size_t num_pages, loff_t pos,
				size_t write_bytes,
				u64 *lockstart, u64 *lockend, bool nowait,
				struct extent_state **cached_state)
{ … }

/*
 * Check if we can do nocow write into the range [@pos, @pos + @write_bytes)
 *
 * @pos:         File offset.
 * @write_bytes: The length to write, will be updated to the nocow writeable
 *               range.
 *
 * This function will flush ordered extents in the range to ensure proper
 * nocow checks.
 *
 * Return:
 * > 0          If we can nocow, and updates @write_bytes.
 *  0           If we can't do a nocow write.
 * -EAGAIN      If we can't do a nocow write because snapshoting of the inode's
 *              root is in progress.
 * < 0          If an error happened.
 *
 * NOTE: Callers need to call btrfs_check_nocow_unlock() if we return > 0.
 */
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
			   size_t *write_bytes, bool nowait)
{ … }

void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
{ … }

static void update_time_for_write(struct inode *inode)
{ … }

int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count)
{ … }

ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
{ … }

static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
			const struct btrfs_ioctl_encoded_io_args *encoded)
{ … }

ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
			    const struct btrfs_ioctl_encoded_io_args *encoded)
{ … }

static ssize_t btrfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{ … }

int btrfs_release_file(struct inode *inode, struct file *filp)
{ … }

static int start_ordered_ops(struct btrfs_inode *inode, loff_t start, loff_t end)
{ … }

static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
{ … }

/*
 * fsync call for both files and directories.  This logs the inode into
 * the tree log instead of forcing full commits whenever possible.
 *
 * It needs to call filemap_fdatawait so that all ordered extent updates are
 * in the metadata btree are up to date for copying to the log.
 *
 * It drops the inode mutex before doing the tree log commit.  This is an
 * important optimization for directories because holding the mutex prevents
 * new operations on the dir while we write to disk.
 */
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{ … }

/*
 * btrfs_page_mkwrite() is not allowed to change the file size as it gets
 * called from a page fault handler when a page is first dirtied. Hence we must
 * be careful to check for EOF conditions here. We set the page up correctly
 * for a written page which means we get ENOSPC checking when writing into
 * holes and correct delalloc and unwritten extent mapping on filesystems that
 * support these features.
 *
 * We are not allowed to take the i_mutex here so we have to play games to
 * protect against truncate races as the page could now be beyond EOF.  Because
 * truncate_setsize() writes the inode size before removing pages, once we have
 * the page lock we can determine safely if the page is beyond EOF. If it is not
 * beyond EOF, then the page is guaranteed safe against truncation until we
 * unlock the page.
 */
static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
{ … }

static const struct vm_operations_struct btrfs_file_vm_ops = …;

static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
{ … }

static int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf,
			  int slot, u64 start, u64 end)
{ … }

static int fill_holes(struct btrfs_trans_handle *trans,
		struct btrfs_inode *inode,
		struct btrfs_path *path, u64 offset, u64 end)
{ … }

/*
 * Find a hole extent on given inode and change start/len to the end of hole
 * extent.(hole/vacuum extent whose em->start <= start &&
 *	   em->start + em->len > start)
 * When a hole extent is found, return 1 and modify start/len.
 */
static int find_first_non_hole(struct btrfs_inode *inode, u64 *start, u64 *len)
{ … }

static void btrfs_punch_hole_lock_range(struct inode *inode,
					const u64 lockstart,
					const u64 lockend,
					struct extent_state **cached_state)
{ … }

static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans,
				     struct btrfs_inode *inode,
				     struct btrfs_path *path,
				     struct btrfs_replace_extent_info *extent_info,
				     const u64 replace_len,
				     const u64 bytes_to_drop)
{ … }

/*
 * The respective range must have been previously locked, as well as the inode.
 * The end offset is inclusive (last byte of the range).
 * @extent_info is NULL for fallocate's hole punching and non-NULL when replacing
 * the file range with an extent.
 * When not punching a hole, we don't want to end up in a state where we dropped
 * extents without inserting a new one, so we must abort the transaction to avoid
 * a corruption.
 */
int btrfs_replace_file_extents(struct btrfs_inode *inode,
			       struct btrfs_path *path, const u64 start,
			       const u64 end,
			       struct btrfs_replace_extent_info *extent_info,
			       struct btrfs_trans_handle **trans_out)
{ … }

static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
{ … }

/* Helper structure to record which range is already reserved */
struct falloc_range { … };

/*
 * Helper function to add falloc range
 *
 * Caller should have locked the larger range of extent containing
 * [start, len)
 */
static int add_falloc_range(struct list_head *head, u64 start, u64 len)
{ … }

static int btrfs_fallocate_update_isize(struct inode *inode,
					const u64 end,
					const int mode)
{ … }

enum { … };

static int btrfs_zero_range_check_range_boundary(struct btrfs_inode *inode,
						 u64 offset)
{ … }

static int btrfs_zero_range(struct inode *inode,
			    loff_t offset,
			    loff_t len,
			    const int mode)
{ … }

static long btrfs_fallocate(struct file *file, int mode,
			    loff_t offset, loff_t len)
{ … }

/*
 * Helper for btrfs_find_delalloc_in_range(). Find a subrange in a given range
 * that has unflushed and/or flushing delalloc. There might be other adjacent
 * subranges after the one it found, so btrfs_find_delalloc_in_range() keeps
 * looping while it gets adjacent subranges, and merging them together.
 */
static bool find_delalloc_subrange(struct btrfs_inode *inode, u64 start, u64 end,
				   struct extent_state **cached_state,
				   bool *search_io_tree,
				   u64 *delalloc_start_ret, u64 *delalloc_end_ret)
{ … }

/*
 * Check if there's delalloc in a given range.
 *
 * @inode:               The inode.
 * @start:               The start offset of the range. It does not need to be
 *                       sector size aligned.
 * @end:                 The end offset (inclusive value) of the search range.
 *                       It does not need to be sector size aligned.
 * @cached_state:        Extent state record used for speeding up delalloc
 *                       searches in the inode's io_tree. Can be NULL.
 * @delalloc_start_ret:  Output argument, set to the start offset of the
 *                       subrange found with delalloc (may not be sector size
 *                       aligned).
 * @delalloc_end_ret:    Output argument, set to he end offset (inclusive value)
 *                       of the subrange found with delalloc.
 *
 * Returns true if a subrange with delalloc is found within the given range, and
 * if so it sets @delalloc_start_ret and @delalloc_end_ret with the start and
 * end offsets of the subrange.
 */
bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end,
				  struct extent_state **cached_state,
				  u64 *delalloc_start_ret, u64 *delalloc_end_ret)
{ … }

/*
 * Check if there's a hole or delalloc range in a range representing a hole (or
 * prealloc extent) found in the inode's subvolume btree.
 *
 * @inode:      The inode.
 * @whence:     Seek mode (SEEK_DATA or SEEK_HOLE).
 * @start:      Start offset of the hole region. It does not need to be sector
 *              size aligned.
 * @end:        End offset (inclusive value) of the hole region. It does not
 *              need to be sector size aligned.
 * @start_ret:  Return parameter, used to set the start of the subrange in the
 *              hole that matches the search criteria (seek mode), if such
 *              subrange is found (return value of the function is true).
 *              The value returned here may not be sector size aligned.
 *
 * Returns true if a subrange matching the given seek mode is found, and if one
 * is found, it updates @start_ret with the start of the subrange.
 */
static bool find_desired_extent_in_hole(struct btrfs_inode *inode, int whence,
					struct extent_state **cached_state,
					u64 start, u64 end, u64 *start_ret)
{ … }

static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
{ … }

static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
{ … }

static int btrfs_file_open(struct inode *inode, struct file *filp)
{ … }

static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{ … }

const struct file_operations btrfs_file_operations = …;

int btrfs_fdatawrite_range(struct btrfs_inode *inode, loff_t start, loff_t end)
{ … }
linux/fs/btrfs/file.c