linux/mm/zswap.c

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * zswap.c - zswap driver file
 *
 * zswap is a cache that takes pages that are in the process
 * of being swapped out and attempts to compress and store them in a
 * RAM-based memory pool.  This can result in a significant I/O reduction on
 * the swap device and, in the case where decompressing from RAM is faster
 * than reading from the swap device, can also improve workload performance.
 *
 * Copyright (C) 2012  Seth Jennings <[email protected]>
*/

#define pr_fmt(fmt)

#include <linux/module.h>
#include <linux/cpu.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/swap.h>
#include <linux/crypto.h>
#include <linux/scatterlist.h>
#include <linux/mempolicy.h>
#include <linux/mempool.h>
#include <linux/zpool.h>
#include <crypto/acompress.h>
#include <linux/zswap.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
#include <linux/swapops.h>
#include <linux/writeback.h>
#include <linux/pagemap.h>
#include <linux/workqueue.h>
#include <linux/list_lru.h>

#include "swap.h"
#include "internal.h"

/*********************************
* statistics
**********************************/
/* The number of compressed pages currently stored in zswap */
atomic_t zswap_stored_pages =;

/*
 * The statistics below are not protected from concurrent access for
 * performance reasons so they may not be a 100% accurate.  However,
 * they do provide useful information on roughly how many times a
 * certain event is occurring.
*/

/* Pool limit was hit (see zswap_max_pool_percent) */
static u64 zswap_pool_limit_hit;
/* Pages written back when pool limit was reached */
static u64 zswap_written_back_pages;
/* Store failed due to a reclaim failure after pool limit was reached */
static u64 zswap_reject_reclaim_fail;
/* Store failed due to compression algorithm failure */
static u64 zswap_reject_compress_fail;
/* Compressed page was too big for the allocator to (optimally) store */
static u64 zswap_reject_compress_poor;
/* Store failed because underlying allocator could not get memory */
static u64 zswap_reject_alloc_fail;
/* Store failed because the entry metadata could not be allocated (rare) */
static u64 zswap_reject_kmemcache_fail;

/* Shrinker work queue */
static struct workqueue_struct *shrink_wq;
/* Pool limit was hit, we need to calm down */
static bool zswap_pool_reached_full;

/*********************************
* tunables
**********************************/

#define ZSWAP_PARAM_UNSET

static int zswap_setup(void);

/* Enable/disable zswap */
static DEFINE_STATIC_KEY_MAYBE(CONFIG_ZSWAP_DEFAULT_ON, zswap_ever_enabled);
static bool zswap_enabled = IS_ENABLED();
static int zswap_enabled_param_set(const char *,
				   const struct kernel_param *);
static const struct kernel_param_ops zswap_enabled_param_ops =;
module_param_cb();

/* Crypto compressor to use */
static char *zswap_compressor =;
static int zswap_compressor_param_set(const char *,
				      const struct kernel_param *);
static const struct kernel_param_ops zswap_compressor_param_ops =;
module_param_cb();

/* Compressed storage zpool to use */
static char *zswap_zpool_type =;
static int zswap_zpool_param_set(const char *, const struct kernel_param *);
static const struct kernel_param_ops zswap_zpool_param_ops =;
module_param_cb();

/* The maximum percentage of memory that the compressed pool can occupy */
static unsigned int zswap_max_pool_percent =;
module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);

/* The threshold for accepting new pages after the max_pool_percent was hit */
static unsigned int zswap_accept_thr_percent =; /* of max pool size */
module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
		   uint, 0644);

/* Enable/disable memory pressure-based shrinker. */
static bool zswap_shrinker_enabled = IS_ENABLED();
module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644);

bool zswap_is_enabled(void)
{}

bool zswap_never_enabled(void)
{}

/*********************************
* data structures
**********************************/

struct crypto_acomp_ctx {};

/*
 * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock.
 * The only case where lru_lock is not acquired while holding tree.lock is
 * when a zswap_entry is taken off the lru for writeback, in that case it
 * needs to be verified that it's still valid in the tree.
 */
struct zswap_pool {};

/* Global LRU lists shared by all zswap pools. */
static struct list_lru zswap_list_lru;

/* The lock protects zswap_next_shrink updates. */
static DEFINE_SPINLOCK(zswap_shrink_lock);
static struct mem_cgroup *zswap_next_shrink;
static struct work_struct zswap_shrink_work;
static struct shrinker *zswap_shrinker;

/*
 * struct zswap_entry
 *
 * This structure contains the metadata for tracking a single compressed
 * page within zswap.
 *
 * swpentry - associated swap entry, the offset indexes into the red-black tree
 * length - the length in bytes of the compressed page data.  Needed during
 *          decompression.
 * referenced - true if the entry recently entered the zswap pool. Unset by the
 *              writeback logic. The entry is only reclaimed by the writeback
 *              logic if referenced is unset. See comments in the shrinker
 *              section for context.
 * pool - the zswap_pool the entry's data is in
 * handle - zpool allocation handle that stores the compressed page data
 * objcg - the obj_cgroup that the compressed memory is charged to
 * lru - handle to the pool's lru used to evict pages.
 */
struct zswap_entry {};

static struct xarray *zswap_trees[MAX_SWAPFILES];
static unsigned int nr_zswap_trees[MAX_SWAPFILES];

/* RCU-protected iteration */
static LIST_HEAD(zswap_pools);
/* protects zswap_pools list modification */
static DEFINE_SPINLOCK(zswap_pools_lock);
/* pool counter to provide unique names to zpool */
static atomic_t zswap_pools_count =;

enum zswap_init_type {};

static enum zswap_init_type zswap_init_state;

/* used to ensure the integrity of initialization */
static DEFINE_MUTEX(zswap_init_lock);

/* init completed, but couldn't create the initial pool */
static bool zswap_has_pool;

/*********************************
* helpers and fwd declarations
**********************************/

static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
{}

#define zswap_pool_debug(msg, p)

/*********************************
* pool functions
**********************************/
static void __zswap_pool_empty(struct percpu_ref *ref);

static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
{}

static struct zswap_pool *__zswap_pool_create_fallback(void)
{}

static void zswap_pool_destroy(struct zswap_pool *pool)
{}

static void __zswap_pool_release(struct work_struct *work)
{}

static struct zswap_pool *zswap_pool_current(void);

static void __zswap_pool_empty(struct percpu_ref *ref)
{}

static int __must_check zswap_pool_get(struct zswap_pool *pool)
{}

static void zswap_pool_put(struct zswap_pool *pool)
{}

static struct zswap_pool *__zswap_pool_current(void)
{}

static struct zswap_pool *zswap_pool_current(void)
{}

static struct zswap_pool *zswap_pool_current_get(void)
{}

/* type and compressor must be null-terminated */
static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
{}

static unsigned long zswap_max_pages(void)
{}

static unsigned long zswap_accept_thr_pages(void)
{}

unsigned long zswap_total_pages(void)
{}

static bool zswap_check_limits(void)
{}

/*********************************
* param callbacks
**********************************/

static bool zswap_pool_changed(const char *s, const struct kernel_param *kp)
{}

/* val must be a null-terminated string */
static int __zswap_param_set(const char *val, const struct kernel_param *kp,
			     char *type, char *compressor)
{}

static int zswap_compressor_param_set(const char *val,
				      const struct kernel_param *kp)
{}

static int zswap_zpool_param_set(const char *val,
				 const struct kernel_param *kp)
{}

static int zswap_enabled_param_set(const char *val,
				   const struct kernel_param *kp)
{}

/*********************************
* lru functions
**********************************/

/* should be called under RCU */
#ifdef CONFIG_MEMCG
static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
{}
#else
static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
{
	return NULL;
}
#endif

static inline int entry_to_nid(struct zswap_entry *entry)
{}

static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry)
{}

static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry)
{}

void zswap_lruvec_state_init(struct lruvec *lruvec)
{}

void zswap_folio_swapin(struct folio *folio)
{}

/*
 * This function should be called when a memcg is being offlined.
 *
 * Since the global shrinker shrink_worker() may hold a reference
 * of the memcg, we must check and release the reference in
 * zswap_next_shrink.
 *
 * shrink_worker() must handle the case where this function releases
 * the reference of memcg being shrunk.
 */
void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
{}

/*********************************
* zswap entry functions
**********************************/
static struct kmem_cache *zswap_entry_cache;

static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
{}

static void zswap_entry_cache_free(struct zswap_entry *entry)
{}

/*
 * Carries out the common pattern of freeing and entry's zpool allocation,
 * freeing the entry itself, and decrementing the number of stored pages.
 */
static void zswap_entry_free(struct zswap_entry *entry)
{}

/*********************************
* compressed storage functions
**********************************/
static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
{}

static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
{}

static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
{}

static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
{}

/*********************************
* writeback code
**********************************/
/*
 * Attempts to free an entry by adding a folio to the swap cache,
 * decompressing the entry data into the folio, and issuing a
 * bio write to write the folio back to the swap device.
 *
 * This can be thought of as a "resumed writeback" of the folio
 * to the swap device.  We are basically resuming the same swap
 * writeback path that was intercepted with the zswap_store()
 * in the first place.  After the folio has been decompressed into
 * the swap cache, the compressed version stored by zswap can be
 * freed.
 */
static int zswap_writeback_entry(struct zswap_entry *entry,
				 swp_entry_t swpentry)
{}

/*********************************
* shrinker functions
**********************************/
/*
 * The dynamic shrinker is modulated by the following factors:
 *
 * 1. Each zswap entry has a referenced bit, which the shrinker unsets (giving
 *    the entry a second chance) before rotating it in the LRU list. If the
 *    entry is considered again by the shrinker, with its referenced bit unset,
 *    it is written back. The writeback rate as a result is dynamically
 *    adjusted by the pool activities - if the pool is dominated by new entries
 *    (i.e lots of recent zswapouts), these entries will be protected and
 *    the writeback rate will slow down. On the other hand, if the pool has a
 *    lot of stagnant entries, these entries will be reclaimed immediately,
 *    effectively increasing the writeback rate.
 *
 * 2. Swapins counter: If we observe swapins, it is a sign that we are
 *    overshrinking and should slow down. We maintain a swapins counter, which
 *    is consumed and subtract from the number of eligible objects on the LRU
 *    in zswap_shrinker_count().
 *
 * 3. Compression ratio. The better the workload compresses, the less gains we
 *    can expect from writeback. We scale down the number of objects available
 *    for reclaim by this ratio.
 */
static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
				       spinlock_t *lock, void *arg)
{}

static unsigned long zswap_shrinker_scan(struct shrinker *shrinker,
		struct shrink_control *sc)
{}

static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
		struct shrink_control *sc)
{}

static struct shrinker *zswap_alloc_shrinker(void)
{}

static int shrink_memcg(struct mem_cgroup *memcg)
{}

static void shrink_worker(struct work_struct *w)
{}

/*********************************
* main API
**********************************/
bool zswap_store(struct folio *folio)
{}

bool zswap_load(struct folio *folio)
{}

void zswap_invalidate(swp_entry_t swp)
{}

int zswap_swapon(int type, unsigned long nr_pages)
{}

void zswap_swapoff(int type)
{}

/*********************************
* debugfs functions
**********************************/
#ifdef CONFIG_DEBUG_FS
#include <linux/debugfs.h>

static struct dentry *zswap_debugfs_root;

static int debugfs_get_total_size(void *data, u64 *val)
{}
DEFINE_DEBUGFS_ATTRIBUTE();

static int zswap_debugfs_init(void)
{}
#else
static int zswap_debugfs_init(void)
{
	return 0;
}
#endif

/*********************************
* module init and exit
**********************************/
static int zswap_setup(void)
{}

static int __init zswap_init(void)
{}
/* must be late so crypto has time to come up */
late_initcall(zswap_init);

MODULE_AUTHOR();
MODULE_DESCRIPTION();