compression.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2008 Oracle.  All rights reserved.
 */

#include <linux/kernel.h>
#include <linux/bio.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
#include <linux/highmem.h>
#include <linux/kthread.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/writeback.h>
#include <linux/psi.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/log2.h>
#include <linux/shrinker.h>
#include <crypto/hash.h>
#include "misc.h"
#include "ctree.h"
#include "fs.h"
#include "btrfs_inode.h"
#include "bio.h"
#include "ordered-data.h"
#include "compression.h"
#include "extent_io.h"
#include "extent_map.h"
#include "subpage.h"
#include "messages.h"
#include "super.h"

static struct bio_set btrfs_compressed_bioset;

static const char* const btrfs_compress_types[] = …;

const char* btrfs_compress_type2str(enum btrfs_compression_type type)
{ … }

static inline struct compressed_bio *to_compressed_bio(struct btrfs_bio *bbio)
{ … }

static struct compressed_bio *alloc_compressed_bio(struct btrfs_inode *inode,
						   u64 start, blk_opf_t op,
						   btrfs_bio_end_io_t end_io)
{ … }

bool btrfs_compress_is_valid_type(const char *str, size_t len)
{ … }

static int compression_compress_pages(int type, struct list_head *ws,
				      struct address_space *mapping, u64 start,
				      struct folio **folios, unsigned long *out_folios,
				      unsigned long *total_in, unsigned long *total_out)
{ … }

static int compression_decompress_bio(struct list_head *ws,
				      struct compressed_bio *cb)
{ … }

static int compression_decompress(int type, struct list_head *ws,
		const u8 *data_in, struct folio *dest_folio,
		unsigned long dest_pgoff, size_t srclen, size_t destlen)
{ … }

static void btrfs_free_compressed_folios(struct compressed_bio *cb)
{ … }

static int btrfs_decompress_bio(struct compressed_bio *cb);

/*
 * Global cache of last unused pages for compression/decompression.
 */
static struct btrfs_compr_pool { … } compr_pool;

static unsigned long btrfs_compr_pool_count(struct shrinker *sh, struct shrink_control *sc)
{ … }

static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_control *sc)
{ … }

/*
 * Common wrappers for page allocation from compression wrappers
 */
struct folio *btrfs_alloc_compr_folio(void)
{ … }

void btrfs_free_compr_folio(struct folio *folio)
{ … }

static void end_bbio_compressed_read(struct btrfs_bio *bbio)
{ … }

/*
 * Clear the writeback bits on all of the file
 * pages for a compressed write
 */
static noinline void end_compressed_writeback(const struct compressed_bio *cb)
{ … }

static void btrfs_finish_compressed_write_work(struct work_struct *work)
{ … }

/*
 * Do the cleanup once all the compressed pages hit the disk.  This will clear
 * writeback on the file pages and free the compressed pages.
 *
 * This also calls the writeback end hooks for the file pages so that metadata
 * and checksums can be updated in the file.
 */
static void end_bbio_compressed_write(struct btrfs_bio *bbio)
{ … }

static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb)
{ … }

/*
 * worker function to build and submit bios for previously compressed pages.
 * The corresponding pages in the inode should be marked for writeback
 * and the compressed pages should have a reference on them for dropping
 * when the IO is complete.
 *
 * This also checksums the file bytes and gets things ready for
 * the end io hooks.
 */
void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
				   struct folio **compressed_folios,
				   unsigned int nr_folios,
				   blk_opf_t write_flags,
				   bool writeback)
{ … }

/*
 * Add extra pages in the same compressed file extent so that we don't need to
 * re-read the same extent again and again.
 *
 * NOTE: this won't work well for subpage, as for subpage read, we lock the
 * full page then submit bio for each compressed/regular extents.
 *
 * This means, if we have several sectors in the same page points to the same
 * on-disk compressed data, we will re-read the same extent many times and
 * this function can only help for the next page.
 */
static noinline int add_ra_bio_pages(struct inode *inode,
				     u64 compressed_end,
				     struct compressed_bio *cb,
				     int *memstall, unsigned long *pflags)
{ … }

/*
 * for a compressed read, the bio we get passed has all the inode pages
 * in it.  We don't actually do IO on those pages but allocate new ones
 * to hold the compressed pages on disk.
 *
 * bio->bi_iter.bi_sector points to the compressed extent on disk
 * bio->bi_io_vec points to all of the inode pages
 *
 * After the compressed pages are read, we copy the bytes into the
 * bio we were passed and then call the bio end_io calls
 */
void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
{ … }

/*
 * Heuristic uses systematic sampling to collect data from the input data
 * range, the logic can be tuned by the following constants:
 *
 * @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample
 * @SAMPLING_INTERVAL  - range from which the sampled data can be collected
 */
#define SAMPLING_READ_SIZE …
#define SAMPLING_INTERVAL …

/*
 * For statistical analysis of the input data we consider bytes that form a
 * Galois Field of 256 objects. Each object has an attribute count, ie. how
 * many times the object appeared in the sample.
 */
#define BUCKET_SIZE …

/*
 * The size of the sample is based on a statistical sampling rule of thumb.
 * The common way is to perform sampling tests as long as the number of
 * elements in each cell is at least 5.
 *
 * Instead of 5, we choose 32 to obtain more accurate results.
 * If the data contain the maximum number of symbols, which is 256, we obtain a
 * sample size bound by 8192.
 *
 * For a sample of at most 8KB of data per data range: 16 consecutive bytes
 * from up to 512 locations.
 */
#define MAX_SAMPLE_SIZE …

struct bucket_item { … };

struct heuristic_ws { … };

static struct workspace_manager heuristic_wsm;

static void free_heuristic_ws(struct list_head *ws)
{ … }

static struct list_head *alloc_heuristic_ws(unsigned int level)
{ … }

const struct btrfs_compress_op btrfs_heuristic_compress = …;

static const struct btrfs_compress_op * const btrfs_compress_op[] = …;

static struct list_head *alloc_workspace(int type, unsigned int level)
{ … }

static void free_workspace(int type, struct list_head *ws)
{ … }

static void btrfs_init_workspace_manager(int type)
{ … }

static void btrfs_cleanup_workspace_manager(int type)
{ … }

/*
 * This finds an available workspace or allocates a new one.
 * If it's not possible to allocate a new one, waits until there's one.
 * Preallocation makes a forward progress guarantees and we do not return
 * errors.
 */
struct list_head *btrfs_get_workspace(int type, unsigned int level)
{ … }

static struct list_head *get_workspace(int type, int level)
{ … }

/*
 * put a workspace struct back on the list or free it if we have enough
 * idle ones sitting around
 */
void btrfs_put_workspace(int type, struct list_head *ws)
{ … }

static void put_workspace(int type, struct list_head *ws)
{ … }

/*
 * Adjust @level according to the limits of the compression algorithm or
 * fallback to default
 */
static unsigned int btrfs_compress_set_level(int type, unsigned level)
{ … }

/* Wrapper around find_get_page(), with extra error message. */
int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start,
				     struct folio **in_folio_ret)
{ … }

/*
 * Given an address space and start and length, compress the bytes into @pages
 * that are allocated on demand.
 *
 * @type_level is encoded algorithm and level, where level 0 means whatever
 * default the algorithm chooses and is opaque here;
 * - compression algo are 0-3
 * - the level are bits 4-7
 *
 * @out_pages is an in/out parameter, holds maximum number of pages to allocate
 * and returns number of actually allocated pages
 *
 * @total_in is used to return the number of bytes actually read.  It
 * may be smaller than the input length if we had to exit early because we
 * ran out of room in the pages array or because we cross the
 * max_out threshold.
 *
 * @total_out is an in/out parameter, must be set to the input length and will
 * be also used to return the total number of compressed bytes
 */
int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping,
			 u64 start, struct folio **folios, unsigned long *out_folios,
			 unsigned long *total_in, unsigned long *total_out)
{ … }

static int btrfs_decompress_bio(struct compressed_bio *cb)
{ … }

/*
 * a less complex decompression routine.  Our compressed data fits in a
 * single page, and we want to read a single page out of it.
 * start_byte tells us the offset into the compressed data we're interested in
 */
int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio,
		     unsigned long dest_pgoff, size_t srclen, size_t destlen)
{ … }

int __init btrfs_init_compress(void)
{ … }

void __cold btrfs_exit_compress(void)
{ … }

/*
 * Copy decompressed data from working buffer to pages.
 *
 * @buf:		The decompressed data buffer
 * @buf_len:		The decompressed data length
 * @decompressed:	Number of bytes that are already decompressed inside the
 * 			compressed extent
 * @cb:			The compressed extent descriptor
 * @orig_bio:		The original bio that the caller wants to read for
 *
 * An easier to understand graph is like below:
 *
 * 		|<- orig_bio ->|     |<- orig_bio->|
 * 	|<-------      full decompressed extent      ----->|
 * 	|<-----------    @cb range   ---->|
 * 	|			|<-- @buf_len -->|
 * 	|<--- @decompressed --->|
 *
 * Note that, @cb can be a subpage of the full decompressed extent, but
 * @cb->start always has the same as the orig_file_offset value of the full
 * decompressed extent.
 *
 * When reading compressed extent, we have to read the full compressed extent,
 * while @orig_bio may only want part of the range.
 * Thus this function will ensure only data covered by @orig_bio will be copied
 * to.
 *
 * Return 0 if we have copied all needed contents for @orig_bio.
 * Return >0 if we need continue decompress.
 */
int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
			      struct compressed_bio *cb, u32 decompressed)
{ … }

/*
 * Shannon Entropy calculation
 *
 * Pure byte distribution analysis fails to determine compressibility of data.
 * Try calculating entropy to estimate the average minimum number of bits
 * needed to encode the sampled data.
 *
 * For convenience, return the percentage of needed bits, instead of amount of
 * bits directly.
 *
 * @ENTROPY_LVL_ACEPTABLE - below that threshold, sample has low byte entropy
 *			    and can be compressible with high probability
 *
 * @ENTROPY_LVL_HIGH - data are not compressible with high probability
 *
 * Use of ilog2() decreases precision, we lower the LVL to 5 to compensate.
 */
#define ENTROPY_LVL_ACEPTABLE …
#define ENTROPY_LVL_HIGH …

/*
 * For increasead precision in shannon_entropy calculation,
 * let's do pow(n, M) to save more digits after comma:
 *
 * - maximum int bit length is 64
 * - ilog2(MAX_SAMPLE_SIZE)	-> 13
 * - 13 * 4 = 52 < 64		-> M = 4
 *
 * So use pow(n, 4).
 */
static inline u32 ilog2_w(u64 n)
{ … }

static u32 shannon_entropy(struct heuristic_ws *ws)
{ … }

#define RADIX_BASE …
#define COUNTERS_SIZE …

static u8 get4bits(u64 num, int shift) { … }

/*
 * Use 4 bits as radix base
 * Use 16 u32 counters for calculating new position in buf array
 *
 * @array     - array that will be sorted
 * @array_buf - buffer array to store sorting results
 *              must be equal in size to @array
 * @num       - array size
 */
static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf,
		       int num)
{ … }

/*
 * Size of the core byte set - how many bytes cover 90% of the sample
 *
 * There are several types of structured binary data that use nearly all byte
 * values. The distribution can be uniform and counts in all buckets will be
 * nearly the same (eg. encrypted data). Unlikely to be compressible.
 *
 * Other possibility is normal (Gaussian) distribution, where the data could
 * be potentially compressible, but we have to take a few more steps to decide
 * how much.
 *
 * @BYTE_CORE_SET_LOW  - main part of byte values repeated frequently,
 *                       compression algo can easy fix that
 * @BYTE_CORE_SET_HIGH - data have uniform distribution and with high
 *                       probability is not compressible
 */
#define BYTE_CORE_SET_LOW …
#define BYTE_CORE_SET_HIGH …

static int byte_core_set_size(struct heuristic_ws *ws)
{ … }

/*
 * Count byte values in buckets.
 * This heuristic can detect textual data (configs, xml, json, html, etc).
 * Because in most text-like data byte set is restricted to limited number of
 * possible characters, and that restriction in most cases makes data easy to
 * compress.
 *
 * @BYTE_SET_THRESHOLD - consider all data within this byte set size:
 *	less - compressible
 *	more - need additional analysis
 */
#define BYTE_SET_THRESHOLD …

static u32 byte_set_size(const struct heuristic_ws *ws)
{ … }

static bool sample_repeated_patterns(struct heuristic_ws *ws)
{ … }

static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
				     struct heuristic_ws *ws)
{ … }

/*
 * Compression heuristic.
 *
 * The following types of analysis can be performed:
 * - detect mostly zero data
 * - detect data with low "byte set" size (text, etc)
 * - detect data with low/high "core byte" set
 *
 * Return non-zero if the compression should be done, 0 otherwise.
 */
int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end)
{ … }

/*
 * Convert the compression suffix (eg. after "zlib" starting with ":") to
 * level, unrecognized string will set the default level
 */
unsigned int btrfs_compress_str2level(unsigned int type, const char *str)
{ … }
linux/fs/btrfs/compression.c