// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008 Oracle. All rights reserved. */ #include <linux/kernel.h> #include <linux/bio.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/pagevec.h> #include <linux/highmem.h> #include <linux/kthread.h> #include <linux/time.h> #include <linux/init.h> #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/writeback.h> #include <linux/psi.h> #include <linux/slab.h> #include <linux/sched/mm.h> #include <linux/log2.h> #include <linux/shrinker.h> #include <crypto/hash.h> #include "misc.h" #include "ctree.h" #include "fs.h" #include "btrfs_inode.h" #include "bio.h" #include "ordered-data.h" #include "compression.h" #include "extent_io.h" #include "extent_map.h" #include "subpage.h" #include "messages.h" #include "super.h" static struct bio_set btrfs_compressed_bioset; static const char* const btrfs_compress_types[] = …; const char* btrfs_compress_type2str(enum btrfs_compression_type type) { … } static inline struct compressed_bio *to_compressed_bio(struct btrfs_bio *bbio) { … } static struct compressed_bio *alloc_compressed_bio(struct btrfs_inode *inode, u64 start, blk_opf_t op, btrfs_bio_end_io_t end_io) { … } bool btrfs_compress_is_valid_type(const char *str, size_t len) { … } static int compression_compress_pages(int type, struct list_head *ws, struct address_space *mapping, u64 start, struct folio **folios, unsigned long *out_folios, unsigned long *total_in, unsigned long *total_out) { … } static int compression_decompress_bio(struct list_head *ws, struct compressed_bio *cb) { … } static int compression_decompress(int type, struct list_head *ws, const u8 *data_in, struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, size_t destlen) { … } static void btrfs_free_compressed_folios(struct compressed_bio *cb) { … } static int btrfs_decompress_bio(struct compressed_bio *cb); /* * Global cache of last unused pages for compression/decompression. */ static struct btrfs_compr_pool { … } compr_pool; static unsigned long btrfs_compr_pool_count(struct shrinker *sh, struct shrink_control *sc) { … } static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_control *sc) { … } /* * Common wrappers for page allocation from compression wrappers */ struct folio *btrfs_alloc_compr_folio(void) { … } void btrfs_free_compr_folio(struct folio *folio) { … } static void end_bbio_compressed_read(struct btrfs_bio *bbio) { … } /* * Clear the writeback bits on all of the file * pages for a compressed write */ static noinline void end_compressed_writeback(const struct compressed_bio *cb) { … } static void btrfs_finish_compressed_write_work(struct work_struct *work) { … } /* * Do the cleanup once all the compressed pages hit the disk. This will clear * writeback on the file pages and free the compressed pages. * * This also calls the writeback end hooks for the file pages so that metadata * and checksums can be updated in the file. */ static void end_bbio_compressed_write(struct btrfs_bio *bbio) { … } static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb) { … } /* * worker function to build and submit bios for previously compressed pages. * The corresponding pages in the inode should be marked for writeback * and the compressed pages should have a reference on them for dropping * when the IO is complete. * * This also checksums the file bytes and gets things ready for * the end io hooks. */ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered, struct folio **compressed_folios, unsigned int nr_folios, blk_opf_t write_flags, bool writeback) { … } /* * Add extra pages in the same compressed file extent so that we don't need to * re-read the same extent again and again. * * NOTE: this won't work well for subpage, as for subpage read, we lock the * full page then submit bio for each compressed/regular extents. * * This means, if we have several sectors in the same page points to the same * on-disk compressed data, we will re-read the same extent many times and * this function can only help for the next page. */ static noinline int add_ra_bio_pages(struct inode *inode, u64 compressed_end, struct compressed_bio *cb, int *memstall, unsigned long *pflags) { … } /* * for a compressed read, the bio we get passed has all the inode pages * in it. We don't actually do IO on those pages but allocate new ones * to hold the compressed pages on disk. * * bio->bi_iter.bi_sector points to the compressed extent on disk * bio->bi_io_vec points to all of the inode pages * * After the compressed pages are read, we copy the bytes into the * bio we were passed and then call the bio end_io calls */ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) { … } /* * Heuristic uses systematic sampling to collect data from the input data * range, the logic can be tuned by the following constants: * * @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample * @SAMPLING_INTERVAL - range from which the sampled data can be collected */ #define SAMPLING_READ_SIZE … #define SAMPLING_INTERVAL … /* * For statistical analysis of the input data we consider bytes that form a * Galois Field of 256 objects. Each object has an attribute count, ie. how * many times the object appeared in the sample. */ #define BUCKET_SIZE … /* * The size of the sample is based on a statistical sampling rule of thumb. * The common way is to perform sampling tests as long as the number of * elements in each cell is at least 5. * * Instead of 5, we choose 32 to obtain more accurate results. * If the data contain the maximum number of symbols, which is 256, we obtain a * sample size bound by 8192. * * For a sample of at most 8KB of data per data range: 16 consecutive bytes * from up to 512 locations. */ #define MAX_SAMPLE_SIZE … struct bucket_item { … }; struct heuristic_ws { … }; static struct workspace_manager heuristic_wsm; static void free_heuristic_ws(struct list_head *ws) { … } static struct list_head *alloc_heuristic_ws(unsigned int level) { … } const struct btrfs_compress_op btrfs_heuristic_compress = …; static const struct btrfs_compress_op * const btrfs_compress_op[] = …; static struct list_head *alloc_workspace(int type, unsigned int level) { … } static void free_workspace(int type, struct list_head *ws) { … } static void btrfs_init_workspace_manager(int type) { … } static void btrfs_cleanup_workspace_manager(int type) { … } /* * This finds an available workspace or allocates a new one. * If it's not possible to allocate a new one, waits until there's one. * Preallocation makes a forward progress guarantees and we do not return * errors. */ struct list_head *btrfs_get_workspace(int type, unsigned int level) { … } static struct list_head *get_workspace(int type, int level) { … } /* * put a workspace struct back on the list or free it if we have enough * idle ones sitting around */ void btrfs_put_workspace(int type, struct list_head *ws) { … } static void put_workspace(int type, struct list_head *ws) { … } /* * Adjust @level according to the limits of the compression algorithm or * fallback to default */ static unsigned int btrfs_compress_set_level(int type, unsigned level) { … } /* Wrapper around find_get_page(), with extra error message. */ int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start, struct folio **in_folio_ret) { … } /* * Given an address space and start and length, compress the bytes into @pages * that are allocated on demand. * * @type_level is encoded algorithm and level, where level 0 means whatever * default the algorithm chooses and is opaque here; * - compression algo are 0-3 * - the level are bits 4-7 * * @out_pages is an in/out parameter, holds maximum number of pages to allocate * and returns number of actually allocated pages * * @total_in is used to return the number of bytes actually read. It * may be smaller than the input length if we had to exit early because we * ran out of room in the pages array or because we cross the * max_out threshold. * * @total_out is an in/out parameter, must be set to the input length and will * be also used to return the total number of compressed bytes */ int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping, u64 start, struct folio **folios, unsigned long *out_folios, unsigned long *total_in, unsigned long *total_out) { … } static int btrfs_decompress_bio(struct compressed_bio *cb) { … } /* * a less complex decompression routine. Our compressed data fits in a * single page, and we want to read a single page out of it. * start_byte tells us the offset into the compressed data we're interested in */ int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, size_t destlen) { … } int __init btrfs_init_compress(void) { … } void __cold btrfs_exit_compress(void) { … } /* * Copy decompressed data from working buffer to pages. * * @buf: The decompressed data buffer * @buf_len: The decompressed data length * @decompressed: Number of bytes that are already decompressed inside the * compressed extent * @cb: The compressed extent descriptor * @orig_bio: The original bio that the caller wants to read for * * An easier to understand graph is like below: * * |<- orig_bio ->| |<- orig_bio->| * |<------- full decompressed extent ----->| * |<----------- @cb range ---->| * | |<-- @buf_len -->| * |<--- @decompressed --->| * * Note that, @cb can be a subpage of the full decompressed extent, but * @cb->start always has the same as the orig_file_offset value of the full * decompressed extent. * * When reading compressed extent, we have to read the full compressed extent, * while @orig_bio may only want part of the range. * Thus this function will ensure only data covered by @orig_bio will be copied * to. * * Return 0 if we have copied all needed contents for @orig_bio. * Return >0 if we need continue decompress. */ int btrfs_decompress_buf2page(const char *buf, u32 buf_len, struct compressed_bio *cb, u32 decompressed) { … } /* * Shannon Entropy calculation * * Pure byte distribution analysis fails to determine compressibility of data. * Try calculating entropy to estimate the average minimum number of bits * needed to encode the sampled data. * * For convenience, return the percentage of needed bits, instead of amount of * bits directly. * * @ENTROPY_LVL_ACEPTABLE - below that threshold, sample has low byte entropy * and can be compressible with high probability * * @ENTROPY_LVL_HIGH - data are not compressible with high probability * * Use of ilog2() decreases precision, we lower the LVL to 5 to compensate. */ #define ENTROPY_LVL_ACEPTABLE … #define ENTROPY_LVL_HIGH … /* * For increasead precision in shannon_entropy calculation, * let's do pow(n, M) to save more digits after comma: * * - maximum int bit length is 64 * - ilog2(MAX_SAMPLE_SIZE) -> 13 * - 13 * 4 = 52 < 64 -> M = 4 * * So use pow(n, 4). */ static inline u32 ilog2_w(u64 n) { … } static u32 shannon_entropy(struct heuristic_ws *ws) { … } #define RADIX_BASE … #define COUNTERS_SIZE … static u8 get4bits(u64 num, int shift) { … } /* * Use 4 bits as radix base * Use 16 u32 counters for calculating new position in buf array * * @array - array that will be sorted * @array_buf - buffer array to store sorting results * must be equal in size to @array * @num - array size */ static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf, int num) { … } /* * Size of the core byte set - how many bytes cover 90% of the sample * * There are several types of structured binary data that use nearly all byte * values. The distribution can be uniform and counts in all buckets will be * nearly the same (eg. encrypted data). Unlikely to be compressible. * * Other possibility is normal (Gaussian) distribution, where the data could * be potentially compressible, but we have to take a few more steps to decide * how much. * * @BYTE_CORE_SET_LOW - main part of byte values repeated frequently, * compression algo can easy fix that * @BYTE_CORE_SET_HIGH - data have uniform distribution and with high * probability is not compressible */ #define BYTE_CORE_SET_LOW … #define BYTE_CORE_SET_HIGH … static int byte_core_set_size(struct heuristic_ws *ws) { … } /* * Count byte values in buckets. * This heuristic can detect textual data (configs, xml, json, html, etc). * Because in most text-like data byte set is restricted to limited number of * possible characters, and that restriction in most cases makes data easy to * compress. * * @BYTE_SET_THRESHOLD - consider all data within this byte set size: * less - compressible * more - need additional analysis */ #define BYTE_SET_THRESHOLD … static u32 byte_set_size(const struct heuristic_ws *ws) { … } static bool sample_repeated_patterns(struct heuristic_ws *ws) { … } static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end, struct heuristic_ws *ws) { … } /* * Compression heuristic. * * The following types of analysis can be performed: * - detect mostly zero data * - detect data with low "byte set" size (text, etc) * - detect data with low/high "core byte" set * * Return non-zero if the compression should be done, 0 otherwise. */ int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end) { … } /* * Convert the compression suffix (eg. after "zlib" starting with ":") to * level, unrecognized string will set the default level */ unsigned int btrfs_compress_str2level(unsigned int type, const char *str) { … }