// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. */ #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/time.h> #include <linux/init.h> #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/falloc.h> #include <linux/writeback.h> #include <linux/compat.h> #include <linux/slab.h> #include <linux/btrfs.h> #include <linux/uio.h> #include <linux/iversion.h> #include <linux/fsverity.h> #include "ctree.h" #include "direct-io.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" #include "tree-log.h" #include "locking.h" #include "qgroup.h" #include "compression.h" #include "delalloc-space.h" #include "reflink.h" #include "subpage.h" #include "fs.h" #include "accessors.h" #include "extent-tree.h" #include "file-item.h" #include "ioctl.h" #include "file.h" #include "super.h" /* simple helper to fault in pages and copy. This should go away * and be replaced with calls into generic code. */ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes, struct page **prepared_pages, struct iov_iter *i) { … } /* * unlocks pages after btrfs_file_write is done with them */ static void btrfs_drop_pages(struct btrfs_fs_info *fs_info, struct page **pages, size_t num_pages, u64 pos, u64 copied) { … } /* * After btrfs_copy_from_user(), update the following things for delalloc: * - Mark newly dirtied pages as DELALLOC in the io tree. * Used to advise which range is to be written back. * - Mark modified pages as Uptodate/Dirty and not needing COW fixup * - Update inode size for past EOF write */ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, struct extent_state **cached, bool noreserve) { … } /* * this is very complex, but the basic idea is to drop all extents * in the range start - end. hint_block is filled in with a block number * that would be a good hint to the block allocator for this file. * * If an extent intersects the range but is not entirely inside the range * it is either truncated or split. Anything entirely inside the range * is deleted from the tree. * * Note: the VFS' inode number of bytes is not updated, it's up to the caller * to deal with that. We set the field 'bytes_found' of the arguments structure * with the number of allocated bytes found in the target range, so that the * caller can update the inode's number of bytes in an atomic way when * replacing extents in a range to avoid races with stat(2). */ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, struct btrfs_drop_extents_args *args) { … } static int extent_mergeable(struct extent_buffer *leaf, int slot, u64 objectid, u64 bytenr, u64 orig_offset, u64 *start, u64 *end) { … } /* * Mark extent in the range start - end as written. * * This changes extent type from 'pre-allocated' to 'regular'. If only * part of extent is marked as written, the extent will be split into * two or three. */ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, u64 start, u64 end) { … } /* * on error we return an unlocked page and the error value * on success we return a locked page and 0 */ static int prepare_uptodate_page(struct inode *inode, struct page *page, u64 pos, bool force_uptodate) { … } static fgf_t get_prepare_fgp_flags(bool nowait) { … } static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait) { … } /* * this just gets pages into the page cache and locks them down. */ static noinline int prepare_pages(struct inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, bool force_uptodate, bool nowait) { … } /* * This function locks the extent and properly waits for data=ordered extents * to finish before allowing the pages to be modified if need. * * The return value: * 1 - the extent is locked * 0 - the extent is not locked, and everything is OK * -EAGAIN - need re-prepare the pages * the other < 0 number - Something wrong happens */ static noinline int lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, u64 *lockstart, u64 *lockend, bool nowait, struct extent_state **cached_state) { … } /* * Check if we can do nocow write into the range [@pos, @pos + @write_bytes) * * @pos: File offset. * @write_bytes: The length to write, will be updated to the nocow writeable * range. * * This function will flush ordered extents in the range to ensure proper * nocow checks. * * Return: * > 0 If we can nocow, and updates @write_bytes. * 0 If we can't do a nocow write. * -EAGAIN If we can't do a nocow write because snapshoting of the inode's * root is in progress. * < 0 If an error happened. * * NOTE: Callers need to call btrfs_check_nocow_unlock() if we return > 0. */ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, size_t *write_bytes, bool nowait) { … } void btrfs_check_nocow_unlock(struct btrfs_inode *inode) { … } static void update_time_for_write(struct inode *inode) { … } int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count) { … } ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) { … } static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from, const struct btrfs_ioctl_encoded_io_args *encoded) { … } ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from, const struct btrfs_ioctl_encoded_io_args *encoded) { … } static ssize_t btrfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { … } int btrfs_release_file(struct inode *inode, struct file *filp) { … } static int start_ordered_ops(struct btrfs_inode *inode, loff_t start, loff_t end) { … } static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) { … } /* * fsync call for both files and directories. This logs the inode into * the tree log instead of forcing full commits whenever possible. * * It needs to call filemap_fdatawait so that all ordered extent updates are * in the metadata btree are up to date for copying to the log. * * It drops the inode mutex before doing the tree log commit. This is an * important optimization for directories because holding the mutex prevents * new operations on the dir while we write to disk. */ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { … } /* * btrfs_page_mkwrite() is not allowed to change the file size as it gets * called from a page fault handler when a page is first dirtied. Hence we must * be careful to check for EOF conditions here. We set the page up correctly * for a written page which means we get ENOSPC checking when writing into * holes and correct delalloc and unwritten extent mapping on filesystems that * support these features. * * We are not allowed to take the i_mutex here so we have to play games to * protect against truncate races as the page could now be beyond EOF. Because * truncate_setsize() writes the inode size before removing pages, once we have * the page lock we can determine safely if the page is beyond EOF. If it is not * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. */ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf) { … } static const struct vm_operations_struct btrfs_file_vm_ops = …; static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) { … } static int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf, int slot, u64 start, u64 end) { … } static int fill_holes(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *path, u64 offset, u64 end) { … } /* * Find a hole extent on given inode and change start/len to the end of hole * extent.(hole/vacuum extent whose em->start <= start && * em->start + em->len > start) * When a hole extent is found, return 1 and modify start/len. */ static int find_first_non_hole(struct btrfs_inode *inode, u64 *start, u64 *len) { … } static void btrfs_punch_hole_lock_range(struct inode *inode, const u64 lockstart, const u64 lockend, struct extent_state **cached_state) { … } static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *path, struct btrfs_replace_extent_info *extent_info, const u64 replace_len, const u64 bytes_to_drop) { … } /* * The respective range must have been previously locked, as well as the inode. * The end offset is inclusive (last byte of the range). * @extent_info is NULL for fallocate's hole punching and non-NULL when replacing * the file range with an extent. * When not punching a hole, we don't want to end up in a state where we dropped * extents without inserting a new one, so we must abort the transaction to avoid * a corruption. */ int btrfs_replace_file_extents(struct btrfs_inode *inode, struct btrfs_path *path, const u64 start, const u64 end, struct btrfs_replace_extent_info *extent_info, struct btrfs_trans_handle **trans_out) { … } static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len) { … } /* Helper structure to record which range is already reserved */ struct falloc_range { … }; /* * Helper function to add falloc range * * Caller should have locked the larger range of extent containing * [start, len) */ static int add_falloc_range(struct list_head *head, u64 start, u64 len) { … } static int btrfs_fallocate_update_isize(struct inode *inode, const u64 end, const int mode) { … } enum { … }; static int btrfs_zero_range_check_range_boundary(struct btrfs_inode *inode, u64 offset) { … } static int btrfs_zero_range(struct inode *inode, loff_t offset, loff_t len, const int mode) { … } static long btrfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { … } /* * Helper for btrfs_find_delalloc_in_range(). Find a subrange in a given range * that has unflushed and/or flushing delalloc. There might be other adjacent * subranges after the one it found, so btrfs_find_delalloc_in_range() keeps * looping while it gets adjacent subranges, and merging them together. */ static bool find_delalloc_subrange(struct btrfs_inode *inode, u64 start, u64 end, struct extent_state **cached_state, bool *search_io_tree, u64 *delalloc_start_ret, u64 *delalloc_end_ret) { … } /* * Check if there's delalloc in a given range. * * @inode: The inode. * @start: The start offset of the range. It does not need to be * sector size aligned. * @end: The end offset (inclusive value) of the search range. * It does not need to be sector size aligned. * @cached_state: Extent state record used for speeding up delalloc * searches in the inode's io_tree. Can be NULL. * @delalloc_start_ret: Output argument, set to the start offset of the * subrange found with delalloc (may not be sector size * aligned). * @delalloc_end_ret: Output argument, set to he end offset (inclusive value) * of the subrange found with delalloc. * * Returns true if a subrange with delalloc is found within the given range, and * if so it sets @delalloc_start_ret and @delalloc_end_ret with the start and * end offsets of the subrange. */ bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end, struct extent_state **cached_state, u64 *delalloc_start_ret, u64 *delalloc_end_ret) { … } /* * Check if there's a hole or delalloc range in a range representing a hole (or * prealloc extent) found in the inode's subvolume btree. * * @inode: The inode. * @whence: Seek mode (SEEK_DATA or SEEK_HOLE). * @start: Start offset of the hole region. It does not need to be sector * size aligned. * @end: End offset (inclusive value) of the hole region. It does not * need to be sector size aligned. * @start_ret: Return parameter, used to set the start of the subrange in the * hole that matches the search criteria (seek mode), if such * subrange is found (return value of the function is true). * The value returned here may not be sector size aligned. * * Returns true if a subrange matching the given seek mode is found, and if one * is found, it updates @start_ret with the start of the subrange. */ static bool find_desired_extent_in_hole(struct btrfs_inode *inode, int whence, struct extent_state **cached_state, u64 start, u64 end, u64 *start_ret) { … } static loff_t find_desired_extent(struct file *file, loff_t offset, int whence) { … } static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) { … } static int btrfs_file_open(struct inode *inode, struct file *filp) { … } static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { … } const struct file_operations btrfs_file_operations = …; int btrfs_fdatawrite_range(struct btrfs_inode *inode, loff_t start, loff_t end) { … }