// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2023 Red Hat */ #include "delta-index.h" #include <linux/bitops.h> #include <linux/bits.h> #include <linux/compiler.h> #include <linux/limits.h> #include <linux/log2.h> #include "cpu.h" #include "errors.h" #include "logger.h" #include "memory-alloc.h" #include "numeric.h" #include "permassert.h" #include "string-utils.h" #include "time-utils.h" #include "config.h" #include "indexer.h" /* * The entries in a delta index could be stored in a single delta list, but to reduce search times * and update costs it uses multiple delta lists. These lists are stored in a single chunk of * memory managed by the delta_zone structure. The delta_zone can move the data around within its * memory, so the location of each delta list is recorded as a bit offset into the memory. Because * the volume index can contain over a million delta lists, we want to be efficient with the size * of the delta list header information. This information is encoded into 16 bytes per list. The * volume index delta list memory can easily exceed 4 gigabits, so a 64 bit value is needed to * address the memory. The volume index delta lists average around 6 kilobits, so 16 bits are * sufficient to store the size of a delta list. * * Each delta list is stored as a bit stream. Within the delta list encoding, bits and bytes are * numbered in little endian order. Within a byte, bit 0 is the least significant bit (0x1), and * bit 7 is the most significant bit (0x80). Within a bit stream, bit 7 is the most significant bit * of byte 0, and bit 8 is the least significant bit of byte 1. Within a byte array, a byte's * number corresponds to its index in the array. * * A standard delta list entry is stored as a fixed length payload (the value) followed by a * variable length key (the delta). A collision entry is used when two block names have the same * delta list address. A collision entry always follows a standard entry for the hash with which it * collides, and is encoded with DELTA == 0 with an additional 256 bits field at the end, * containing the full block name. An entry with a delta of 0 at the beginning of a delta list * indicates a normal entry. * * The delta in each entry is encoded with a variable-length Huffman code to minimize the memory * used by small deltas. The Huffman code is specified by three parameters, which can be computed * from the desired mean delta when the index is full. (See compute_coding_constants() for * details.) * * The bit field utilities used to read and write delta entries assume that it is possible to read * some bytes beyond the end of the bit field, so a delta_zone memory allocation is guarded by two * invalid delta lists to prevent reading outside the delta_zone memory. The valid delta lists are * numbered 1 to N, and the guard lists are numbered 0 and N+1. The function to decode the bit * stream include a step that skips over bits set to 0 until the first 1 bit is found. A corrupted * delta list could cause this step to run off the end of the delta_zone memory, so as extra * protection against this happening, the tail guard list is set to all ones. * * The delta_index supports two different forms. The mutable form is created by * uds_initialize_delta_index(), and is used for the volume index and for open chapter indexes. The * immutable form is created by uds_initialize_delta_index_page(), and is used for closed (and * cached) chapter index pages. The immutable form does not allocate delta list headers or * temporary offsets, and thus is somewhat more memory efficient. */ /* * This is the largest field size supported by get_field() and set_field(). Any field that is * larger is not guaranteed to fit in a single byte-aligned u32. */ #define MAX_FIELD_BITS … /* * This is the largest field size supported by get_big_field() and set_big_field(). Any field that * is larger is not guaranteed to fit in a single byte-aligned u64. */ #define MAX_BIG_FIELD_BITS … /* * This is the number of guard bytes needed at the end of the memory byte array when using the bit * utilities. These utilities call get_big_field() and set_big_field(), which can access up to 7 * bytes beyond the end of the desired field. The definition is written to make it clear how this * value is derived. */ #define POST_FIELD_GUARD_BYTES … /* The number of guard bits that are needed in the tail guard list */ #define GUARD_BITS … /* * The maximum size of a single delta list in bytes. We count guard bytes in this value because a * buffer of this size can be used with move_bits(). */ #define DELTA_LIST_MAX_BYTE_COUNT … /* The number of extra bytes and bits needed to store a collision entry */ #define COLLISION_BYTES … #define COLLISION_BITS … /* * Immutable delta lists are packed into pages containing a header that encodes the delta list * information into 19 bits per list (64KB bit offset). */ #define IMMUTABLE_HEADER_SIZE … /* * Constants and structures for the saved delta index. "DI" is for delta_index, and -##### is a * number to increment when the format of the data changes. */ #define MAGIC_SIZE … static const char DELTA_INDEX_MAGIC[] = …; struct delta_index_header { … }; /* * Header data used for immutable delta index pages. This data is followed by the delta list offset * table. */ struct delta_page_header { … } __packed; static inline u64 get_delta_list_byte_start(const struct delta_list *delta_list) { … } static inline u16 get_delta_list_byte_size(const struct delta_list *delta_list) { … } static void rebalance_delta_zone(const struct delta_zone *delta_zone, u32 first, u32 last) { … } static inline size_t get_zone_memory_size(unsigned int zone_count, size_t memory_size) { … } void uds_reset_delta_index(const struct delta_index *delta_index) { … } /* Compute the Huffman coding parameters for the given mean delta. The Huffman code is specified by * three parameters: * * MINBITS The number of bits in the smallest code * BASE The number of values coded using a code of length MINBITS * INCR The number of values coded by using one additional bit * * These parameters are related by this equation: * * BASE + INCR == 1 << MINBITS * * The math for the Huffman code of an exponential distribution says that * * INCR = log(2) * MEAN_DELTA * * Then use the smallest MINBITS value so that * * (1 << MINBITS) > INCR * * And then * * BASE = (1 << MINBITS) - INCR * * Now the index can generate a code such that * - The first BASE values code using MINBITS bits. * - The next INCR values code using MINBITS+1 bits. * - The next INCR values code using MINBITS+2 bits. * - (and so on). */ static void compute_coding_constants(u32 mean_delta, u16 *min_bits, u32 *min_keys, u32 *incr_keys) { … } void uds_uninitialize_delta_index(struct delta_index *delta_index) { … } static int initialize_delta_zone(struct delta_zone *delta_zone, size_t size, u32 first_list, u32 list_count, u32 mean_delta, u32 payload_bits, u8 tag) { … } int uds_initialize_delta_index(struct delta_index *delta_index, unsigned int zone_count, u32 list_count, u32 mean_delta, u32 payload_bits, size_t memory_size, u8 tag) { … } /* Read a bit field from an arbitrary bit boundary. */ static inline u32 get_field(const u8 *memory, u64 offset, u8 size) { … } /* Write a bit field to an arbitrary bit boundary. */ static inline void set_field(u32 value, u8 *memory, u64 offset, u8 size) { … } /* Get the bit offset to the immutable delta list header. */ static inline u32 get_immutable_header_offset(u32 list_number) { … } /* Get the bit offset to the start of the immutable delta list bit stream. */ static inline u32 get_immutable_start(const u8 *memory, u32 list_number) { … } /* Set the bit offset to the start of the immutable delta list bit stream. */ static inline void set_immutable_start(u8 *memory, u32 list_number, u32 start) { … } static bool verify_delta_index_page(u64 nonce, u16 list_count, u64 expected_nonce, u8 *memory, size_t memory_size) { … } /* Initialize a delta index page to refer to a supplied page. */ int uds_initialize_delta_index_page(struct delta_index_page *delta_index_page, u64 expected_nonce, u32 mean_delta, u32 payload_bits, u8 *memory, size_t memory_size) { … } /* Read a large bit field from an arbitrary bit boundary. */ static inline u64 get_big_field(const u8 *memory, u64 offset, u8 size) { … } /* Write a large bit field to an arbitrary bit boundary. */ static inline void set_big_field(u64 value, u8 *memory, u64 offset, u8 size) { … } /* Set a sequence of bits to all zeros. */ static inline void set_zero(u8 *memory, u64 offset, u32 size) { … } /* * Move several bits from a higher to a lower address, moving the lower addressed bits first. The * size and memory offsets are measured in bits. */ static void move_bits_down(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size) { … } /* * Move several bits from a lower to a higher address, moving the higher addressed bits first. The * size and memory offsets are measured in bits. */ static void move_bits_up(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size) { … } /* * Move bits from one field to another. When the fields overlap, behave as if we first move all the * bits from the source to a temporary value, and then move all the bits from the temporary value * to the destination. The size and memory offsets are measured in bits. */ static void move_bits(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size) { … } /* * Pack delta lists from a mutable delta index into an immutable delta index page. A range of delta * lists (starting with a specified list index) is copied from the mutable delta index into a * memory page used in the immutable index. The number of lists copied onto the page is returned in * list_count. */ int uds_pack_delta_index_page(const struct delta_index *delta_index, u64 header_nonce, u8 *memory, size_t memory_size, u64 virtual_chapter_number, u32 first_list, u32 *list_count) { … } /* Compute the new offsets of the delta lists. */ static void compute_new_list_offsets(struct delta_zone *delta_zone, u32 growing_index, size_t growing_size, size_t used_space) { … } static void rebalance_lists(struct delta_zone *delta_zone) { … } /* Start restoring a delta index from multiple input streams. */ int uds_start_restoring_delta_index(struct delta_index *delta_index, struct buffered_reader **buffered_readers, unsigned int reader_count) { … } static int restore_delta_list_to_zone(struct delta_zone *delta_zone, const struct delta_list_save_info *save_info, const u8 *data) { … } static int restore_delta_list_data(struct delta_index *delta_index, unsigned int load_zone, struct buffered_reader *buffered_reader, u8 *data) { … } /* Restore delta lists from saved data. */ int uds_finish_restoring_delta_index(struct delta_index *delta_index, struct buffered_reader **buffered_readers, unsigned int reader_count) { … } int uds_check_guard_delta_lists(struct buffered_reader **buffered_readers, unsigned int reader_count) { … } static int flush_delta_list(struct delta_zone *zone, u32 flush_index) { … } /* Start saving a delta index zone to a buffered output stream. */ int uds_start_saving_delta_index(const struct delta_index *delta_index, unsigned int zone_number, struct buffered_writer *buffered_writer) { … } int uds_finish_saving_delta_index(const struct delta_index *delta_index, unsigned int zone_number) { … } int uds_write_guard_delta_list(struct buffered_writer *buffered_writer) { … } size_t uds_compute_delta_index_save_bytes(u32 list_count, size_t memory_size) { … } static int assert_not_at_end(const struct delta_index_entry *delta_entry) { … } /* * Prepare to search for an entry in the specified delta list. * * This is always the first function to be called when dealing with delta index entries. It is * always followed by calls to uds_next_delta_index_entry() to iterate through a delta list. The * fields of the delta_index_entry argument will be set up for iteration, but will not contain an * entry from the list. */ int uds_start_delta_index_search(const struct delta_index *delta_index, u32 list_number, u32 key, struct delta_index_entry *delta_entry) { … } static inline u64 get_delta_entry_offset(const struct delta_index_entry *delta_entry) { … } /* * Decode a delta index entry delta value. The delta_index_entry basically describes the previous * list entry, and has had its offset field changed to point to the subsequent entry. We decode the * bit stream and update the delta_list_entry to describe the entry. */ static inline void decode_delta(struct delta_index_entry *delta_entry) { … } noinline int uds_next_delta_index_entry(struct delta_index_entry *delta_entry) { … } int uds_remember_delta_index_offset(const struct delta_index_entry *delta_entry) { … } static void set_delta(struct delta_index_entry *delta_entry, u32 delta) { … } static void get_collision_name(const struct delta_index_entry *entry, u8 *name) { … } static void set_collision_name(const struct delta_index_entry *entry, const u8 *name) { … } int uds_get_delta_index_entry(const struct delta_index *delta_index, u32 list_number, u32 key, const u8 *name, struct delta_index_entry *delta_entry) { … } int uds_get_delta_entry_collision(const struct delta_index_entry *delta_entry, u8 *name) { … } u32 uds_get_delta_entry_value(const struct delta_index_entry *delta_entry) { … } static int assert_mutable_entry(const struct delta_index_entry *delta_entry) { … } int uds_set_delta_entry_value(const struct delta_index_entry *delta_entry, u32 value) { … } /* * Extend the memory used by the delta lists by adding growing_size bytes before the list indicated * by growing_index, then rebalancing the lists in the new chunk. */ static int extend_delta_zone(struct delta_zone *delta_zone, u32 growing_index, size_t growing_size) { … } static int insert_bits(struct delta_index_entry *delta_entry, u16 size) { … } static void encode_delta(const struct delta_index_entry *delta_entry) { … } static void encode_entry(const struct delta_index_entry *delta_entry, u32 value, const u8 *name) { … } /* * Create a new entry in the delta index. If the entry is a collision, the full 256 bit name must * be provided. */ int uds_put_delta_index_entry(struct delta_index_entry *delta_entry, u32 key, u32 value, const u8 *name) { … } static void delete_bits(const struct delta_index_entry *delta_entry, int size) { … } int uds_remove_delta_index_entry(struct delta_index_entry *delta_entry) { … } void uds_get_delta_index_stats(const struct delta_index *delta_index, struct delta_index_stats *stats) { … } size_t uds_compute_delta_index_size(u32 entry_count, u32 mean_delta, u32 payload_bits) { … } u32 uds_get_delta_index_page_count(u32 entry_count, u32 list_count, u32 mean_delta, u32 payload_bits, size_t bytes_per_page) { … } void uds_log_delta_index_entry(struct delta_index_entry *delta_entry) { … }