linux/drivers/md/dm-vdo/indexer/delta-index.c

// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright 2023 Red Hat
 */
#include "delta-index.h"

#include <linux/bitops.h>
#include <linux/bits.h>
#include <linux/compiler.h>
#include <linux/limits.h>
#include <linux/log2.h>

#include "cpu.h"
#include "errors.h"
#include "logger.h"
#include "memory-alloc.h"
#include "numeric.h"
#include "permassert.h"
#include "string-utils.h"
#include "time-utils.h"

#include "config.h"
#include "indexer.h"

/*
 * The entries in a delta index could be stored in a single delta list, but to reduce search times
 * and update costs it uses multiple delta lists. These lists are stored in a single chunk of
 * memory managed by the delta_zone structure. The delta_zone can move the data around within its
 * memory, so the location of each delta list is recorded as a bit offset into the memory. Because
 * the volume index can contain over a million delta lists, we want to be efficient with the size
 * of the delta list header information. This information is encoded into 16 bytes per list. The
 * volume index delta list memory can easily exceed 4 gigabits, so a 64 bit value is needed to
 * address the memory. The volume index delta lists average around 6 kilobits, so 16 bits are
 * sufficient to store the size of a delta list.
 *
 * Each delta list is stored as a bit stream. Within the delta list encoding, bits and bytes are
 * numbered in little endian order. Within a byte, bit 0 is the least significant bit (0x1), and
 * bit 7 is the most significant bit (0x80). Within a bit stream, bit 7 is the most significant bit
 * of byte 0, and bit 8 is the least significant bit of byte 1. Within a byte array, a byte's
 * number corresponds to its index in the array.
 *
 * A standard delta list entry is stored as a fixed length payload (the value) followed by a
 * variable length key (the delta). A collision entry is used when two block names have the same
 * delta list address. A collision entry always follows a standard entry for the hash with which it
 * collides, and is encoded with DELTA == 0 with an additional 256 bits field at the end,
 * containing the full block name. An entry with a delta of 0 at the beginning of a delta list
 * indicates a normal entry.
 *
 * The delta in each entry is encoded with a variable-length Huffman code to minimize the memory
 * used by small deltas. The Huffman code is specified by three parameters, which can be computed
 * from the desired mean delta when the index is full. (See compute_coding_constants() for
 * details.)
 *
 * The bit field utilities used to read and write delta entries assume that it is possible to read
 * some bytes beyond the end of the bit field, so a delta_zone memory allocation is guarded by two
 * invalid delta lists to prevent reading outside the delta_zone memory. The valid delta lists are
 * numbered 1 to N, and the guard lists are numbered 0 and N+1. The function to decode the bit
 * stream include a step that skips over bits set to 0 until the first 1 bit is found. A corrupted
 * delta list could cause this step to run off the end of the delta_zone memory, so as extra
 * protection against this happening, the tail guard list is set to all ones.
 *
 * The delta_index supports two different forms. The mutable form is created by
 * uds_initialize_delta_index(), and is used for the volume index and for open chapter indexes. The
 * immutable form is created by uds_initialize_delta_index_page(), and is used for closed (and
 * cached) chapter index pages. The immutable form does not allocate delta list headers or
 * temporary offsets, and thus is somewhat more memory efficient.
 */

/*
 * This is the largest field size supported by get_field() and set_field(). Any field that is
 * larger is not guaranteed to fit in a single byte-aligned u32.
 */
#define MAX_FIELD_BITS

/*
 * This is the largest field size supported by get_big_field() and set_big_field(). Any field that
 * is larger is not guaranteed to fit in a single byte-aligned u64.
 */
#define MAX_BIG_FIELD_BITS

/*
 * This is the number of guard bytes needed at the end of the memory byte array when using the bit
 * utilities. These utilities call get_big_field() and set_big_field(), which can access up to 7
 * bytes beyond the end of the desired field. The definition is written to make it clear how this
 * value is derived.
 */
#define POST_FIELD_GUARD_BYTES

/* The number of guard bits that are needed in the tail guard list */
#define GUARD_BITS

/*
 * The maximum size of a single delta list in bytes. We count guard bytes in this value because a
 * buffer of this size can be used with move_bits().
 */
#define DELTA_LIST_MAX_BYTE_COUNT

/* The number of extra bytes and bits needed to store a collision entry */
#define COLLISION_BYTES
#define COLLISION_BITS

/*
 * Immutable delta lists are packed into pages containing a header that encodes the delta list
 * information into 19 bits per list (64KB bit offset).
 */
#define IMMUTABLE_HEADER_SIZE

/*
 * Constants and structures for the saved delta index. "DI" is for delta_index, and -##### is a
 * number to increment when the format of the data changes.
 */
#define MAGIC_SIZE

static const char DELTA_INDEX_MAGIC[] =;

struct delta_index_header {};

/*
 * Header data used for immutable delta index pages. This data is followed by the delta list offset
 * table.
 */
struct delta_page_header {} __packed;

static inline u64 get_delta_list_byte_start(const struct delta_list *delta_list)
{}

static inline u16 get_delta_list_byte_size(const struct delta_list *delta_list)
{}

static void rebalance_delta_zone(const struct delta_zone *delta_zone, u32 first,
				 u32 last)
{}

static inline size_t get_zone_memory_size(unsigned int zone_count, size_t memory_size)
{}

void uds_reset_delta_index(const struct delta_index *delta_index)
{}

/* Compute the Huffman coding parameters for the given mean delta. The Huffman code is specified by
 * three parameters:
 *
 *  MINBITS   The number of bits in the smallest code
 *  BASE      The number of values coded using a code of length MINBITS
 *  INCR      The number of values coded by using one additional bit
 *
 * These parameters are related by this equation:
 *
 *	BASE + INCR == 1 << MINBITS
 *
 * The math for the Huffman code of an exponential distribution says that
 *
 *	INCR = log(2) * MEAN_DELTA
 *
 * Then use the smallest MINBITS value so that
 *
 *	(1 << MINBITS) > INCR
 *
 * And then
 *
 *	BASE = (1 << MINBITS) - INCR
 *
 * Now the index can generate a code such that
 * - The first BASE values code using MINBITS bits.
 * - The next INCR values code using MINBITS+1 bits.
 * - The next INCR values code using MINBITS+2 bits.
 * - (and so on).
 */
static void compute_coding_constants(u32 mean_delta, u16 *min_bits, u32 *min_keys, u32 *incr_keys)
{}

void uds_uninitialize_delta_index(struct delta_index *delta_index)
{}

static int initialize_delta_zone(struct delta_zone *delta_zone, size_t size,
				 u32 first_list, u32 list_count, u32 mean_delta,
				 u32 payload_bits, u8 tag)
{}

int uds_initialize_delta_index(struct delta_index *delta_index, unsigned int zone_count,
			       u32 list_count, u32 mean_delta, u32 payload_bits,
			       size_t memory_size, u8 tag)
{}

/* Read a bit field from an arbitrary bit boundary. */
static inline u32 get_field(const u8 *memory, u64 offset, u8 size)
{}

/* Write a bit field to an arbitrary bit boundary. */
static inline void set_field(u32 value, u8 *memory, u64 offset, u8 size)
{}

/* Get the bit offset to the immutable delta list header. */
static inline u32 get_immutable_header_offset(u32 list_number)
{}

/* Get the bit offset to the start of the immutable delta list bit stream. */
static inline u32 get_immutable_start(const u8 *memory, u32 list_number)
{}

/* Set the bit offset to the start of the immutable delta list bit stream. */
static inline void set_immutable_start(u8 *memory, u32 list_number, u32 start)
{}

static bool verify_delta_index_page(u64 nonce, u16 list_count, u64 expected_nonce,
				    u8 *memory, size_t memory_size)
{}

/* Initialize a delta index page to refer to a supplied page. */
int uds_initialize_delta_index_page(struct delta_index_page *delta_index_page,
				    u64 expected_nonce, u32 mean_delta, u32 payload_bits,
				    u8 *memory, size_t memory_size)
{}

/* Read a large bit field from an arbitrary bit boundary. */
static inline u64 get_big_field(const u8 *memory, u64 offset, u8 size)
{}

/* Write a large bit field to an arbitrary bit boundary. */
static inline void set_big_field(u64 value, u8 *memory, u64 offset, u8 size)
{}

/* Set a sequence of bits to all zeros. */
static inline void set_zero(u8 *memory, u64 offset, u32 size)
{}

/*
 * Move several bits from a higher to a lower address, moving the lower addressed bits first. The
 * size and memory offsets are measured in bits.
 */
static void move_bits_down(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size)
{}

/*
 * Move several bits from a lower to a higher address, moving the higher addressed bits first. The
 * size and memory offsets are measured in bits.
 */
static void move_bits_up(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size)
{}

/*
 * Move bits from one field to another. When the fields overlap, behave as if we first move all the
 * bits from the source to a temporary value, and then move all the bits from the temporary value
 * to the destination. The size and memory offsets are measured in bits.
 */
static void move_bits(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size)
{}

/*
 * Pack delta lists from a mutable delta index into an immutable delta index page. A range of delta
 * lists (starting with a specified list index) is copied from the mutable delta index into a
 * memory page used in the immutable index. The number of lists copied onto the page is returned in
 * list_count.
 */
int uds_pack_delta_index_page(const struct delta_index *delta_index, u64 header_nonce,
			      u8 *memory, size_t memory_size, u64 virtual_chapter_number,
			      u32 first_list, u32 *list_count)
{}

/* Compute the new offsets of the delta lists. */
static void compute_new_list_offsets(struct delta_zone *delta_zone, u32 growing_index,
				     size_t growing_size, size_t used_space)
{}

static void rebalance_lists(struct delta_zone *delta_zone)
{}

/* Start restoring a delta index from multiple input streams. */
int uds_start_restoring_delta_index(struct delta_index *delta_index,
				    struct buffered_reader **buffered_readers,
				    unsigned int reader_count)
{}

static int restore_delta_list_to_zone(struct delta_zone *delta_zone,
				      const struct delta_list_save_info *save_info,
				      const u8 *data)
{}

static int restore_delta_list_data(struct delta_index *delta_index, unsigned int load_zone,
				   struct buffered_reader *buffered_reader, u8 *data)
{}

/* Restore delta lists from saved data. */
int uds_finish_restoring_delta_index(struct delta_index *delta_index,
				     struct buffered_reader **buffered_readers,
				     unsigned int reader_count)
{}

int uds_check_guard_delta_lists(struct buffered_reader **buffered_readers,
				unsigned int reader_count)
{}

static int flush_delta_list(struct delta_zone *zone, u32 flush_index)
{}

/* Start saving a delta index zone to a buffered output stream. */
int uds_start_saving_delta_index(const struct delta_index *delta_index,
				 unsigned int zone_number,
				 struct buffered_writer *buffered_writer)
{}

int uds_finish_saving_delta_index(const struct delta_index *delta_index,
				  unsigned int zone_number)
{}

int uds_write_guard_delta_list(struct buffered_writer *buffered_writer)
{}

size_t uds_compute_delta_index_save_bytes(u32 list_count, size_t memory_size)
{}

static int assert_not_at_end(const struct delta_index_entry *delta_entry)
{}

/*
 * Prepare to search for an entry in the specified delta list.
 *
 * This is always the first function to be called when dealing with delta index entries. It is
 * always followed by calls to uds_next_delta_index_entry() to iterate through a delta list. The
 * fields of the delta_index_entry argument will be set up for iteration, but will not contain an
 * entry from the list.
 */
int uds_start_delta_index_search(const struct delta_index *delta_index, u32 list_number,
				 u32 key, struct delta_index_entry *delta_entry)
{}

static inline u64 get_delta_entry_offset(const struct delta_index_entry *delta_entry)
{}

/*
 * Decode a delta index entry delta value. The delta_index_entry basically describes the previous
 * list entry, and has had its offset field changed to point to the subsequent entry. We decode the
 * bit stream and update the delta_list_entry to describe the entry.
 */
static inline void decode_delta(struct delta_index_entry *delta_entry)
{}

noinline int uds_next_delta_index_entry(struct delta_index_entry *delta_entry)
{}

int uds_remember_delta_index_offset(const struct delta_index_entry *delta_entry)
{}

static void set_delta(struct delta_index_entry *delta_entry, u32 delta)
{}

static void get_collision_name(const struct delta_index_entry *entry, u8 *name)
{}

static void set_collision_name(const struct delta_index_entry *entry, const u8 *name)
{}

int uds_get_delta_index_entry(const struct delta_index *delta_index, u32 list_number,
			      u32 key, const u8 *name,
			      struct delta_index_entry *delta_entry)
{}

int uds_get_delta_entry_collision(const struct delta_index_entry *delta_entry, u8 *name)
{}

u32 uds_get_delta_entry_value(const struct delta_index_entry *delta_entry)
{}

static int assert_mutable_entry(const struct delta_index_entry *delta_entry)
{}

int uds_set_delta_entry_value(const struct delta_index_entry *delta_entry, u32 value)
{}

/*
 * Extend the memory used by the delta lists by adding growing_size bytes before the list indicated
 * by growing_index, then rebalancing the lists in the new chunk.
 */
static int extend_delta_zone(struct delta_zone *delta_zone, u32 growing_index,
			     size_t growing_size)
{}

static int insert_bits(struct delta_index_entry *delta_entry, u16 size)
{}

static void encode_delta(const struct delta_index_entry *delta_entry)
{}

static void encode_entry(const struct delta_index_entry *delta_entry, u32 value,
			 const u8 *name)
{}

/*
 * Create a new entry in the delta index. If the entry is a collision, the full 256 bit name must
 * be provided.
 */
int uds_put_delta_index_entry(struct delta_index_entry *delta_entry, u32 key, u32 value,
			      const u8 *name)
{}

static void delete_bits(const struct delta_index_entry *delta_entry, int size)
{}

int uds_remove_delta_index_entry(struct delta_index_entry *delta_entry)
{}

void uds_get_delta_index_stats(const struct delta_index *delta_index,
			       struct delta_index_stats *stats)
{}

size_t uds_compute_delta_index_size(u32 entry_count, u32 mean_delta, u32 payload_bits)
{}

u32 uds_get_delta_index_page_count(u32 entry_count, u32 list_count, u32 mean_delta,
				   u32 payload_bits, size_t bytes_per_page)
{}

void uds_log_delta_index_entry(struct delta_index_entry *delta_entry)
{}