linux/fs/bcachefs/extents_format.h

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_EXTENTS_FORMAT_H
#define _BCACHEFS_EXTENTS_FORMAT_H

/*
 * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
 * preceded by checksum/compression information (bch_extent_crc32 or
 * bch_extent_crc64).
 *
 * One major determining factor in the format of extents is how we handle and
 * represent extents that have been partially overwritten and thus trimmed:
 *
 * If an extent is not checksummed or compressed, when the extent is trimmed we
 * don't have to remember the extent we originally allocated and wrote: we can
 * merely adjust ptr->offset to point to the start of the data that is currently
 * live. The size field in struct bkey records the current (live) size of the
 * extent, and is also used to mean "size of region on disk that we point to" in
 * this case.
 *
 * Thus an extent that is not checksummed or compressed will consist only of a
 * list of bch_extent_ptrs, with none of the fields in
 * bch_extent_crc32/bch_extent_crc64.
 *
 * When an extent is checksummed or compressed, it's not possible to read only
 * the data that is currently live: we have to read the entire extent that was
 * originally written, and then return only the part of the extent that is
 * currently live.
 *
 * Thus, in addition to the current size of the extent in struct bkey, we need
 * to store the size of the originally allocated space - this is the
 * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
 * when the extent is trimmed, instead of modifying the offset field of the
 * pointer, we keep a second smaller offset field - "offset into the original
 * extent of the currently live region".
 *
 * The other major determining factor is replication and data migration:
 *
 * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
 * write, we will initially write all the replicas in the same format, with the
 * same checksum type and compression format - however, when copygc runs later (or
 * tiering/cache promotion, anything that moves data), it is not in general
 * going to rewrite all the pointers at once - one of the replicas may be in a
 * bucket on one device that has very little fragmentation while another lives
 * in a bucket that has become heavily fragmented, and thus is being rewritten
 * sooner than the rest.
 *
 * Thus it will only move a subset of the pointers (or in the case of
 * tiering/cache promotion perhaps add a single pointer without dropping any
 * current pointers), and if the extent has been partially overwritten it must
 * write only the currently live portion (or copygc would not be able to reduce
 * fragmentation!) - which necessitates a different bch_extent_crc format for
 * the new pointer.
 *
 * But in the interests of space efficiency, we don't want to store one
 * bch_extent_crc for each pointer if we don't have to.
 *
 * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
 * bch_extent_ptrs appended arbitrarily one after the other. We determine the
 * type of a given entry with a scheme similar to utf8 (except we're encoding a
 * type, not a size), encoding the type in the position of the first set bit:
 *
 * bch_extent_crc32	- 0b1
 * bch_extent_ptr	- 0b10
 * bch_extent_crc64	- 0b100
 *
 * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
 * bch_extent_crc64 is the least constrained).
 *
 * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
 * until the next bch_extent_crc32/64.
 *
 * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
 * is neither checksummed nor compressed.
 */

#define BCH_EXTENT_ENTRY_TYPES()
#define BCH_EXTENT_ENTRY_MAX

enum bch_extent_entry_type {};

/* Compressed/uncompressed size are stored biased by 1: */
struct bch_extent_crc32 {} __packed __aligned();

#define CRC32_SIZE_MAX
#define CRC32_NONCE_MAX

struct bch_extent_crc64 {} __packed __aligned();

#define CRC64_SIZE_MAX
#define CRC64_NONCE_MAX

struct bch_extent_crc128 {} __packed __aligned();

#define CRC128_SIZE_MAX
#define CRC128_NONCE_MAX

/*
 * @reservation - pointer hasn't been written to, just reserved
 */
struct bch_extent_ptr {} __packed __aligned();

struct bch_extent_stripe_ptr {};

struct bch_extent_rebalance {};

bch_extent_entry;

struct bch_btree_ptr {} __packed __aligned();

struct bch_btree_ptr_v2 {} __packed __aligned();

LE16_BITMASK(BTREE_PTR_RANGE_UPDATED,	struct bch_btree_ptr_v2, flags, 0, 1);

struct bch_extent {} __packed __aligned();

/* Maximum size (in u64s) a single pointer could be: */
#define BKEY_EXTENT_PTR_U64s_MAX

/* Maximum possible size of an entire extent value: */
#define BKEY_EXTENT_VAL_U64s_MAX

/* * Maximum possible size of an entire extent, key + value: */
#define BKEY_EXTENT_U64s_MAX

/* Btree pointers don't carry around checksums: */
#define BKEY_BTREE_PTR_VAL_U64s_MAX
#define BKEY_BTREE_PTR_U64s_MAX

struct bch_reservation {} __packed __aligned();

struct bch_inline_data {};

#endif /* _BCACHEFS_EXTENTS_FORMAT_H */