// SPDX-License-Identifier: GPL-2.0 #include <linux/init.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/rwsem.h> #include <linux/xattr.h> #include <linux/security.h> #include <linux/posix_acl_xattr.h> #include <linux/iversion.h> #include <linux/fsverity.h> #include <linux/sched/mm.h> #include "messages.h" #include "ctree.h" #include "btrfs_inode.h" #include "transaction.h" #include "locking.h" #include "fs.h" #include "accessors.h" #include "ioctl.h" #include "verity.h" #include "orphan.h" /* * Implementation of the interface defined in struct fsverity_operations. * * The main question is how and where to store the verity descriptor and the * Merkle tree. We store both in dedicated btree items in the filesystem tree, * together with the rest of the inode metadata. This means we'll need to do * extra work to encrypt them once encryption is supported in btrfs, but btrfs * has a lot of careful code around i_size and it seems better to make a new key * type than try and adjust all of our expectations for i_size. * * Note that this differs from the implementation in ext4 and f2fs, where * this data is stored as if it were in the file, but past EOF. However, btrfs * does not have a widespread mechanism for caching opaque metadata pages, so we * do pretend that the Merkle tree pages themselves are past EOF for the * purposes of caching them (as opposed to creating a virtual inode). * * fs verity items are stored under two different key types on disk. * The descriptor items: * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ] * * At offset 0, we store a btrfs_verity_descriptor_item which tracks the * size of the descriptor item and some extra data for encryption. * Starting at offset 1, these hold the generic fs verity descriptor. * The latter are opaque to btrfs, we just read and write them as a blob for * the higher level verity code. The most common descriptor size is 256 bytes. * * The merkle tree items: * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ] * * These also start at offset 0, and correspond to the merkle tree bytes. * So when fsverity asks for page 0 of the merkle tree, we pull up one page * starting at offset 0 for this key type. These are also opaque to btrfs, * we're blindly storing whatever fsverity sends down. * * Another important consideration is the fact that the Merkle tree data scales * linearly with the size of the file (with 4K pages/blocks and SHA-256, it's * ~1/127th the size) so for large files, writing the tree can be a lengthy * operation. For that reason, we guard the whole enable verity operation * (between begin_enable_verity and end_enable_verity) with an orphan item. * Again, because the data can be pretty large, it's quite possible that we * could run out of space writing it, so we try our best to handle errors by * stopping and rolling back rather than aborting the victim transaction. */ #define MERKLE_START_ALIGN … /* * Compute the logical file offset where we cache the Merkle tree. * * @inode: inode of the verity file * * For the purposes of caching the Merkle tree pages, as required by * fs-verity, it is convenient to do size computations in terms of a file * offset, rather than in terms of page indices. * * Use 64K to be sure it's past the last page in the file, even with 64K pages. * That rounding operation itself can overflow loff_t, so we do it in u64 and * check. * * Returns the file offset on success, negative error code on failure. */ static loff_t merkle_file_pos(const struct inode *inode) { … } /* * Drop all the items for this inode with this key_type. * * @inode: inode to drop items for * @key_type: type of items to drop (BTRFS_VERITY_DESC_ITEM or * BTRFS_VERITY_MERKLE_ITEM) * * Before doing a verity enable we cleanup any existing verity items. * This is also used to clean up if a verity enable failed half way through. * * Returns number of dropped items on success, negative error code on failure. */ static int drop_verity_items(struct btrfs_inode *inode, u8 key_type) { … } /* * Drop all verity items * * @inode: inode to drop verity items for * * In most contexts where we are dropping verity items, we want to do it for all * the types of verity items, not a particular one. * * Returns: 0 on success, negative error code on failure. */ int btrfs_drop_verity_items(struct btrfs_inode *inode) { … } /* * Insert and write inode items with a given key type and offset. * * @inode: inode to insert for * @key_type: key type to insert * @offset: item offset to insert at * @src: source data to write * @len: length of source data to write * * Write len bytes from src into items of up to 2K length. * The inserted items will have key (ino, key_type, offset + off) where off is * consecutively increasing from 0 up to the last item ending at offset + len. * * Returns 0 on success and a negative error code on failure. */ static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset, const char *src, u64 len) { … } /* * Read inode items of the given key type and offset from the btree. * * @inode: inode to read items of * @key_type: key type to read * @offset: item offset to read from * @dest: Buffer to read into. This parameter has slightly tricky * semantics. If it is NULL, the function will not do any copying * and will just return the size of all the items up to len bytes. * If dest_page is passed, then the function will kmap_local the * page and ignore dest, but it must still be non-NULL to avoid the * counting-only behavior. * @len: length in bytes to read * @dest_folio: copy into this folio instead of the dest buffer * * Helper function to read items from the btree. This returns the number of * bytes read or < 0 for errors. We can return short reads if the items don't * exist on disk or aren't big enough to fill the desired length. Supports * reading into a provided buffer (dest) or into the page cache * * Returns number of bytes read or a negative error code on failure. */ static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset, char *dest, u64 len, struct folio *dest_folio) { … } /* * Delete an fsverity orphan * * @trans: transaction to do the delete in * @inode: inode to orphan * * Capture verity orphan specific logic that is repeated in the couple places * we delete verity orphans. Specifically, handling ENOENT and ignoring inodes * with 0 links. * * Returns zero on success or a negative error code on failure. */ static int del_orphan(struct btrfs_trans_handle *trans, struct btrfs_inode *inode) { … } /* * Rollback in-progress verity if we encounter an error. * * @inode: inode verity had an error for * * We try to handle recoverable errors while enabling verity by rolling it back * and just failing the operation, rather than having an fs level error no * matter what. However, any error in rollback is unrecoverable. * * Returns 0 on success, negative error code on failure. */ static int rollback_verity(struct btrfs_inode *inode) { … } /* * Finalize making the file a valid verity file * * @inode: inode to be marked as verity * @desc: contents of the verity descriptor to write (not NULL) * @desc_size: size of the verity descriptor * * Do the actual work of finalizing verity after successfully writing the Merkle * tree: * * - write out the descriptor items * - mark the inode with the verity flag * - delete the orphan item * - mark the ro compat bit * - clear the in progress bit * * Returns 0 on success, negative error code on failure. */ static int finish_verity(struct btrfs_inode *inode, const void *desc, size_t desc_size) { … } /* * fsverity op that begins enabling verity. * * @filp: file to enable verity on * * Begin enabling fsverity for the file. We drop any existing verity items, add * an orphan and set the in progress bit. * * Returns 0 on success, negative error code on failure. */ static int btrfs_begin_enable_verity(struct file *filp) { … } /* * fsverity op that ends enabling verity. * * @filp: file we are finishing enabling verity on * @desc: verity descriptor to write out (NULL in error conditions) * @desc_size: size of the verity descriptor (variable with signatures) * @merkle_tree_size: size of the merkle tree in bytes * * If desc is null, then VFS is signaling an error occurred during verity * enable, and we should try to rollback. Otherwise, attempt to finish verity. * * Returns 0 on success, negative error code on error. */ static int btrfs_end_enable_verity(struct file *filp, const void *desc, size_t desc_size, u64 merkle_tree_size) { … } /* * fsverity op that gets the struct fsverity_descriptor. * * @inode: inode to get the descriptor of * @buf: output buffer for the descriptor contents * @buf_size: size of the output buffer. 0 to query the size * * fsverity does a two pass setup for reading the descriptor, in the first pass * it calls with buf_size = 0 to query the size of the descriptor, and then in * the second pass it actually reads the descriptor off disk. * * Returns the size on success or a negative error code on failure. */ int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size) { … } /* * fsverity op that reads and caches a merkle tree page. * * @inode: inode to read a merkle tree page for * @index: page index relative to the start of the merkle tree * @num_ra_pages: number of pages to readahead. Optional, we ignore it * * The Merkle tree is stored in the filesystem btree, but its pages are cached * with a logical position past EOF in the inode's mapping. * * Returns the page we read, or an ERR_PTR on error. */ static struct page *btrfs_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { … } /* * fsverity op that writes a Merkle tree block into the btree. * * @inode: inode to write a Merkle tree block for * @buf: Merkle tree block to write * @pos: the position of the block in the Merkle tree (in bytes) * @size: the Merkle tree block size (in bytes) * * Returns 0 on success or negative error code on failure */ static int btrfs_write_merkle_tree_block(struct inode *inode, const void *buf, u64 pos, unsigned int size) { … } const struct fsverity_operations btrfs_verityops = …;