linux/fs/btrfs/verity.c

// SPDX-License-Identifier: GPL-2.0

#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/rwsem.h>
#include <linux/xattr.h>
#include <linux/security.h>
#include <linux/posix_acl_xattr.h>
#include <linux/iversion.h>
#include <linux/fsverity.h>
#include <linux/sched/mm.h>
#include "messages.h"
#include "ctree.h"
#include "btrfs_inode.h"
#include "transaction.h"
#include "locking.h"
#include "fs.h"
#include "accessors.h"
#include "ioctl.h"
#include "verity.h"
#include "orphan.h"

/*
 * Implementation of the interface defined in struct fsverity_operations.
 *
 * The main question is how and where to store the verity descriptor and the
 * Merkle tree. We store both in dedicated btree items in the filesystem tree,
 * together with the rest of the inode metadata. This means we'll need to do
 * extra work to encrypt them once encryption is supported in btrfs, but btrfs
 * has a lot of careful code around i_size and it seems better to make a new key
 * type than try and adjust all of our expectations for i_size.
 *
 * Note that this differs from the implementation in ext4 and f2fs, where
 * this data is stored as if it were in the file, but past EOF. However, btrfs
 * does not have a widespread mechanism for caching opaque metadata pages, so we
 * do pretend that the Merkle tree pages themselves are past EOF for the
 * purposes of caching them (as opposed to creating a virtual inode).
 *
 * fs verity items are stored under two different key types on disk.
 * The descriptor items:
 * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ]
 *
 * At offset 0, we store a btrfs_verity_descriptor_item which tracks the
 * size of the descriptor item and some extra data for encryption.
 * Starting at offset 1, these hold the generic fs verity descriptor.
 * The latter are opaque to btrfs, we just read and write them as a blob for
 * the higher level verity code.  The most common descriptor size is 256 bytes.
 *
 * The merkle tree items:
 * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ]
 *
 * These also start at offset 0, and correspond to the merkle tree bytes.
 * So when fsverity asks for page 0 of the merkle tree, we pull up one page
 * starting at offset 0 for this key type.  These are also opaque to btrfs,
 * we're blindly storing whatever fsverity sends down.
 *
 * Another important consideration is the fact that the Merkle tree data scales
 * linearly with the size of the file (with 4K pages/blocks and SHA-256, it's
 * ~1/127th the size) so for large files, writing the tree can be a lengthy
 * operation. For that reason, we guard the whole enable verity operation
 * (between begin_enable_verity and end_enable_verity) with an orphan item.
 * Again, because the data can be pretty large, it's quite possible that we
 * could run out of space writing it, so we try our best to handle errors by
 * stopping and rolling back rather than aborting the victim transaction.
 */

#define MERKLE_START_ALIGN

/*
 * Compute the logical file offset where we cache the Merkle tree.
 *
 * @inode:  inode of the verity file
 *
 * For the purposes of caching the Merkle tree pages, as required by
 * fs-verity, it is convenient to do size computations in terms of a file
 * offset, rather than in terms of page indices.
 *
 * Use 64K to be sure it's past the last page in the file, even with 64K pages.
 * That rounding operation itself can overflow loff_t, so we do it in u64 and
 * check.
 *
 * Returns the file offset on success, negative error code on failure.
 */
static loff_t merkle_file_pos(const struct inode *inode)
{}

/*
 * Drop all the items for this inode with this key_type.
 *
 * @inode:     inode to drop items for
 * @key_type:  type of items to drop (BTRFS_VERITY_DESC_ITEM or
 *             BTRFS_VERITY_MERKLE_ITEM)
 *
 * Before doing a verity enable we cleanup any existing verity items.
 * This is also used to clean up if a verity enable failed half way through.
 *
 * Returns number of dropped items on success, negative error code on failure.
 */
static int drop_verity_items(struct btrfs_inode *inode, u8 key_type)
{}

/*
 * Drop all verity items
 *
 * @inode:  inode to drop verity items for
 *
 * In most contexts where we are dropping verity items, we want to do it for all
 * the types of verity items, not a particular one.
 *
 * Returns: 0 on success, negative error code on failure.
 */
int btrfs_drop_verity_items(struct btrfs_inode *inode)
{}

/*
 * Insert and write inode items with a given key type and offset.
 *
 * @inode:     inode to insert for
 * @key_type:  key type to insert
 * @offset:    item offset to insert at
 * @src:       source data to write
 * @len:       length of source data to write
 *
 * Write len bytes from src into items of up to 2K length.
 * The inserted items will have key (ino, key_type, offset + off) where off is
 * consecutively increasing from 0 up to the last item ending at offset + len.
 *
 * Returns 0 on success and a negative error code on failure.
 */
static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
			   const char *src, u64 len)
{}

/*
 * Read inode items of the given key type and offset from the btree.
 *
 * @inode:      inode to read items of
 * @key_type:   key type to read
 * @offset:     item offset to read from
 * @dest:       Buffer to read into. This parameter has slightly tricky
 *              semantics.  If it is NULL, the function will not do any copying
 *              and will just return the size of all the items up to len bytes.
 *              If dest_page is passed, then the function will kmap_local the
 *              page and ignore dest, but it must still be non-NULL to avoid the
 *              counting-only behavior.
 * @len:        length in bytes to read
 * @dest_folio: copy into this folio instead of the dest buffer
 *
 * Helper function to read items from the btree.  This returns the number of
 * bytes read or < 0 for errors.  We can return short reads if the items don't
 * exist on disk or aren't big enough to fill the desired length.  Supports
 * reading into a provided buffer (dest) or into the page cache
 *
 * Returns number of bytes read or a negative error code on failure.
 */
static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
			  char *dest, u64 len, struct folio *dest_folio)
{}

/*
 * Delete an fsverity orphan
 *
 * @trans:  transaction to do the delete in
 * @inode:  inode to orphan
 *
 * Capture verity orphan specific logic that is repeated in the couple places
 * we delete verity orphans. Specifically, handling ENOENT and ignoring inodes
 * with 0 links.
 *
 * Returns zero on success or a negative error code on failure.
 */
static int del_orphan(struct btrfs_trans_handle *trans, struct btrfs_inode *inode)
{}

/*
 * Rollback in-progress verity if we encounter an error.
 *
 * @inode:  inode verity had an error for
 *
 * We try to handle recoverable errors while enabling verity by rolling it back
 * and just failing the operation, rather than having an fs level error no
 * matter what. However, any error in rollback is unrecoverable.
 *
 * Returns 0 on success, negative error code on failure.
 */
static int rollback_verity(struct btrfs_inode *inode)
{}

/*
 * Finalize making the file a valid verity file
 *
 * @inode:      inode to be marked as verity
 * @desc:       contents of the verity descriptor to write (not NULL)
 * @desc_size:  size of the verity descriptor
 *
 * Do the actual work of finalizing verity after successfully writing the Merkle
 * tree:
 *
 * - write out the descriptor items
 * - mark the inode with the verity flag
 * - delete the orphan item
 * - mark the ro compat bit
 * - clear the in progress bit
 *
 * Returns 0 on success, negative error code on failure.
 */
static int finish_verity(struct btrfs_inode *inode, const void *desc,
			 size_t desc_size)
{}

/*
 * fsverity op that begins enabling verity.
 *
 * @filp:  file to enable verity on
 *
 * Begin enabling fsverity for the file. We drop any existing verity items, add
 * an orphan and set the in progress bit.
 *
 * Returns 0 on success, negative error code on failure.
 */
static int btrfs_begin_enable_verity(struct file *filp)
{}

/*
 * fsverity op that ends enabling verity.
 *
 * @filp:              file we are finishing enabling verity on
 * @desc:              verity descriptor to write out (NULL in error conditions)
 * @desc_size:         size of the verity descriptor (variable with signatures)
 * @merkle_tree_size:  size of the merkle tree in bytes
 *
 * If desc is null, then VFS is signaling an error occurred during verity
 * enable, and we should try to rollback. Otherwise, attempt to finish verity.
 *
 * Returns 0 on success, negative error code on error.
 */
static int btrfs_end_enable_verity(struct file *filp, const void *desc,
				   size_t desc_size, u64 merkle_tree_size)
{}

/*
 * fsverity op that gets the struct fsverity_descriptor.
 *
 * @inode:     inode to get the descriptor of
 * @buf:       output buffer for the descriptor contents
 * @buf_size:  size of the output buffer. 0 to query the size
 *
 * fsverity does a two pass setup for reading the descriptor, in the first pass
 * it calls with buf_size = 0 to query the size of the descriptor, and then in
 * the second pass it actually reads the descriptor off disk.
 *
 * Returns the size on success or a negative error code on failure.
 */
int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size)
{}

/*
 * fsverity op that reads and caches a merkle tree page.
 *
 * @inode:         inode to read a merkle tree page for
 * @index:         page index relative to the start of the merkle tree
 * @num_ra_pages:  number of pages to readahead. Optional, we ignore it
 *
 * The Merkle tree is stored in the filesystem btree, but its pages are cached
 * with a logical position past EOF in the inode's mapping.
 *
 * Returns the page we read, or an ERR_PTR on error.
 */
static struct page *btrfs_read_merkle_tree_page(struct inode *inode,
						pgoff_t index,
						unsigned long num_ra_pages)
{}

/*
 * fsverity op that writes a Merkle tree block into the btree.
 *
 * @inode:	inode to write a Merkle tree block for
 * @buf:	Merkle tree block to write
 * @pos:	the position of the block in the Merkle tree (in bytes)
 * @size:	the Merkle tree block size (in bytes)
 *
 * Returns 0 on success or negative error code on failure
 */
static int btrfs_write_merkle_tree_block(struct inode *inode, const void *buf,
					 u64 pos, unsigned int size)
{}

const struct fsverity_operations btrfs_verityops =;