linux/mm/zsmalloc.c

/*
 * zsmalloc memory allocator
 *
 * Copyright (C) 2011  Nitin Gupta
 * Copyright (C) 2012, 2013 Minchan Kim
 *
 * This code is released using a dual license strategy: BSD/GPL
 * You can choose the license that better fits your requirements.
 *
 * Released under the terms of 3-clause BSD License
 * Released under the terms of GNU General Public License Version 2.0
 */

/*
 * Following is how we use various fields and flags of underlying
 * struct page(s) to form a zspage.
 *
 * Usage of struct page fields:
 *	page->private: points to zspage
 *	page->index: links together all component pages of a zspage
 *		For the huge page, this is always 0, so we use this field
 *		to store handle.
 *	page->page_type: PG_zsmalloc, lower 16 bit locate the first object
 *		offset in a subpage of a zspage
 *
 * Usage of struct page flags:
 *	PG_private: identifies the first component page
 *	PG_owner_priv_1: identifies the huge component page
 *
 */

#define pr_fmt(fmt)

/*
 * lock ordering:
 *	page_lock
 *	pool->migrate_lock
 *	class->lock
 *	zspage->lock
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/highmem.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/pgtable.h>
#include <asm/tlbflush.h>
#include <linux/cpumask.h>
#include <linux/cpu.h>
#include <linux/vmalloc.h>
#include <linux/preempt.h>
#include <linux/spinlock.h>
#include <linux/shrinker.h>
#include <linux/types.h>
#include <linux/debugfs.h>
#include <linux/zsmalloc.h>
#include <linux/zpool.h>
#include <linux/migrate.h>
#include <linux/wait.h>
#include <linux/pagemap.h>
#include <linux/fs.h>
#include <linux/local_lock.h>

#define ZSPAGE_MAGIC

/*
 * This must be power of 2 and greater than or equal to sizeof(link_free).
 * These two conditions ensure that any 'struct link_free' itself doesn't
 * span more than 1 page which avoids complex case of mapping 2 pages simply
 * to restore link_free pointer values.
 */
#define ZS_ALIGN

#define ZS_HANDLE_SIZE

/*
 * Object location (<PFN>, <obj_idx>) is encoded as
 * a single (unsigned long) handle value.
 *
 * Note that object index <obj_idx> starts from 0.
 *
 * This is made more complicated by various memory models and PAE.
 */

#ifndef MAX_POSSIBLE_PHYSMEM_BITS
#ifdef MAX_PHYSMEM_BITS
#define MAX_POSSIBLE_PHYSMEM_BITS
#else
/*
 * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
 * be PAGE_SHIFT
 */
#define MAX_POSSIBLE_PHYSMEM_BITS
#endif
#endif

#define _PFN_BITS

/*
 * Head in allocated object should have OBJ_ALLOCATED_TAG
 * to identify the object was allocated or not.
 * It's okay to add the status bit in the least bit because
 * header keeps handle which is 4byte-aligned address so we
 * have room for two bit at least.
 */
#define OBJ_ALLOCATED_TAG

#define OBJ_TAG_BITS
#define OBJ_TAG_MASK

#define OBJ_INDEX_BITS
#define OBJ_INDEX_MASK

#define HUGE_BITS
#define FULLNESS_BITS
#define CLASS_BITS
#define MAGIC_VAL_BITS

#define ZS_MAX_PAGES_PER_ZSPAGE

/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
#define ZS_MIN_ALLOC_SIZE
/* each chunk includes extra space to keep handle */
#define ZS_MAX_ALLOC_SIZE

/*
 * On systems with 4K page size, this gives 255 size classes! There is a
 * trader-off here:
 *  - Large number of size classes is potentially wasteful as free page are
 *    spread across these classes
 *  - Small number of size classes causes large internal fragmentation
 *  - Probably its better to use specific size classes (empirically
 *    determined). NOTE: all those class sizes must be set as multiple of
 *    ZS_ALIGN to make sure link_free itself never has to span 2 pages.
 *
 *  ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
 *  (reason above)
 */
#define ZS_SIZE_CLASS_DELTA
#define ZS_SIZE_CLASSES

/*
 * Pages are distinguished by the ratio of used memory (that is the ratio
 * of ->inuse objects to all objects that page can store). For example,
 * INUSE_RATIO_10 means that the ratio of used objects is > 0% and <= 10%.
 *
 * The number of fullness groups is not random. It allows us to keep
 * difference between the least busy page in the group (minimum permitted
 * number of ->inuse objects) and the most busy page (maximum permitted
 * number of ->inuse objects) at a reasonable value.
 */
enum fullness_group {};

enum class_stat_type {};

struct zs_size_stat {};

#ifdef CONFIG_ZSMALLOC_STAT
static struct dentry *zs_stat_root;
#endif

static size_t huge_class_size;

struct size_class {};

/*
 * Placed within free objects to form a singly linked list.
 * For every zspage, zspage->freeobj gives head of this list.
 *
 * This must be power of 2 and less than or equal to ZS_ALIGN
 */
struct link_free {};

struct zs_pool {};

struct zspage {};

struct mapping_area {};

/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
static void SetZsHugePage(struct zspage *zspage)
{}

static bool ZsHugePage(struct zspage *zspage)
{}

static void migrate_lock_init(struct zspage *zspage);
static void migrate_read_lock(struct zspage *zspage);
static void migrate_read_unlock(struct zspage *zspage);
static void migrate_write_lock(struct zspage *zspage);
static void migrate_write_unlock(struct zspage *zspage);

#ifdef CONFIG_COMPACTION
static void kick_deferred_free(struct zs_pool *pool);
static void init_deferred_free(struct zs_pool *pool);
static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
#else
static void kick_deferred_free(struct zs_pool *pool) {}
static void init_deferred_free(struct zs_pool *pool) {}
static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
#endif

static int create_cache(struct zs_pool *pool)
{}

static void destroy_cache(struct zs_pool *pool)
{}

static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
{}

static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
{}

static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
{}

static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
{}

/* class->lock(which owns the handle) synchronizes races */
static void record_obj(unsigned long handle, unsigned long obj)
{}

/* zpool driver */

#ifdef CONFIG_ZPOOL

static void *zs_zpool_create(const char *name, gfp_t gfp)
{}

static void zs_zpool_destroy(void *pool)
{}

static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
			unsigned long *handle)
{}
static void zs_zpool_free(void *pool, unsigned long handle)
{}

static void *zs_zpool_map(void *pool, unsigned long handle,
			enum zpool_mapmode mm)
{}
static void zs_zpool_unmap(void *pool, unsigned long handle)
{}

static u64 zs_zpool_total_pages(void *pool)
{}

static struct zpool_driver zs_zpool_driver =;

MODULE_ALIAS();
#endif /* CONFIG_ZPOOL */

/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
static DEFINE_PER_CPU(struct mapping_area, zs_map_area) =;

static __maybe_unused int is_first_page(struct page *page)
{}

/* Protected by class->lock */
static inline int get_zspage_inuse(struct zspage *zspage)
{}


static inline void mod_zspage_inuse(struct zspage *zspage, int val)
{}

static inline struct page *get_first_page(struct zspage *zspage)
{}

#define FIRST_OBJ_PAGE_TYPE_MASK

static inline void reset_first_obj_offset(struct page *page)
{}

static inline unsigned int get_first_obj_offset(struct page *page)
{}

static inline void set_first_obj_offset(struct page *page, unsigned int offset)
{}

static inline unsigned int get_freeobj(struct zspage *zspage)
{}

static inline void set_freeobj(struct zspage *zspage, unsigned int obj)
{}

static struct size_class *zspage_class(struct zs_pool *pool,
				       struct zspage *zspage)
{}

/*
 * zsmalloc divides the pool into various size classes where each
 * class maintains a list of zspages where each zspage is divided
 * into equal sized chunks. Each allocation falls into one of these
 * classes depending on its size. This function returns index of the
 * size class which has chunk size big enough to hold the given size.
 */
static int get_size_class_index(int size)
{}

static inline void class_stat_add(struct size_class *class, int type,
				  unsigned long cnt)
{}

static inline void class_stat_sub(struct size_class *class, int type,
				  unsigned long cnt)
{}

static inline unsigned long class_stat_read(struct size_class *class, int type)
{}

#ifdef CONFIG_ZSMALLOC_STAT

static void __init zs_stat_init(void)
{}

static void __exit zs_stat_exit(void)
{}

static unsigned long zs_can_compact(struct size_class *class);

static int zs_stats_size_show(struct seq_file *s, void *v)
{}
DEFINE_SHOW_ATTRIBUTE();

static void zs_pool_stat_create(struct zs_pool *pool, const char *name)
{}

static void zs_pool_stat_destroy(struct zs_pool *pool)
{}

#else /* CONFIG_ZSMALLOC_STAT */
static void __init zs_stat_init(void)
{
}

static void __exit zs_stat_exit(void)
{
}

static inline void zs_pool_stat_create(struct zs_pool *pool, const char *name)
{
}

static inline void zs_pool_stat_destroy(struct zs_pool *pool)
{
}
#endif


/*
 * For each size class, zspages are divided into different groups
 * depending on their usage ratio. This function returns fullness
 * status of the given page.
 */
static int get_fullness_group(struct size_class *class, struct zspage *zspage)
{}

/*
 * Each size class maintains various freelists and zspages are assigned
 * to one of these freelists based on the number of live objects they
 * have. This functions inserts the given zspage into the freelist
 * identified by <class, fullness_group>.
 */
static void insert_zspage(struct size_class *class,
				struct zspage *zspage,
				int fullness)
{}

/*
 * This function removes the given zspage from the freelist identified
 * by <class, fullness_group>.
 */
static void remove_zspage(struct size_class *class, struct zspage *zspage)
{}

/*
 * Each size class maintains zspages in different fullness groups depending
 * on the number of live objects they contain. When allocating or freeing
 * objects, the fullness status of the page can change, for instance, from
 * INUSE_RATIO_80 to INUSE_RATIO_70 when freeing an object. This function
 * checks if such a status change has occurred for the given page and
 * accordingly moves the page from the list of the old fullness group to that
 * of the new fullness group.
 */
static int fix_fullness_group(struct size_class *class, struct zspage *zspage)
{}

static struct zspage *get_zspage(struct page *page)
{}

static struct page *get_next_page(struct page *page)
{}

/**
 * obj_to_location - get (<page>, <obj_idx>) from encoded object value
 * @obj: the encoded object value
 * @page: page object resides in zspage
 * @obj_idx: object index
 */
static void obj_to_location(unsigned long obj, struct page **page,
				unsigned int *obj_idx)
{}

static void obj_to_page(unsigned long obj, struct page **page)
{}

/**
 * location_to_obj - get obj value encoded from (<page>, <obj_idx>)
 * @page: page object resides in zspage
 * @obj_idx: object index
 */
static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
{}

static unsigned long handle_to_obj(unsigned long handle)
{}

static inline bool obj_allocated(struct page *page, void *obj,
				 unsigned long *phandle)
{}

static void reset_page(struct page *page)
{}

static int trylock_zspage(struct zspage *zspage)
{}

static void __free_zspage(struct zs_pool *pool, struct size_class *class,
				struct zspage *zspage)
{}

static void free_zspage(struct zs_pool *pool, struct size_class *class,
				struct zspage *zspage)
{}

/* Initialize a newly allocated zspage */
static void init_zspage(struct size_class *class, struct zspage *zspage)
{}

static void create_page_chain(struct size_class *class, struct zspage *zspage,
				struct page *pages[])
{}

/*
 * Allocate a zspage for the given size class
 */
static struct zspage *alloc_zspage(struct zs_pool *pool,
					struct size_class *class,
					gfp_t gfp)
{}

static struct zspage *find_get_zspage(struct size_class *class)
{}

static inline int __zs_cpu_up(struct mapping_area *area)
{}

static inline void __zs_cpu_down(struct mapping_area *area)
{}

static void *__zs_map_object(struct mapping_area *area,
			struct page *pages[2], int off, int size)
{}

static void __zs_unmap_object(struct mapping_area *area,
			struct page *pages[2], int off, int size)
{}

static int zs_cpu_prepare(unsigned int cpu)
{}

static int zs_cpu_dead(unsigned int cpu)
{}

static bool can_merge(struct size_class *prev, int pages_per_zspage,
					int objs_per_zspage)
{}

static bool zspage_full(struct size_class *class, struct zspage *zspage)
{}

static bool zspage_empty(struct zspage *zspage)
{}

/**
 * zs_lookup_class_index() - Returns index of the zsmalloc &size_class
 * that hold objects of the provided size.
 * @pool: zsmalloc pool to use
 * @size: object size
 *
 * Context: Any context.
 *
 * Return: the index of the zsmalloc &size_class that hold objects of the
 * provided size.
 */
unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size)
{}
EXPORT_SYMBOL_GPL();

unsigned long zs_get_total_pages(struct zs_pool *pool)
{}
EXPORT_SYMBOL_GPL();

/**
 * zs_map_object - get address of allocated object from handle.
 * @pool: pool from which the object was allocated
 * @handle: handle returned from zs_malloc
 * @mm: mapping mode to use
 *
 * Before using an object allocated from zs_malloc, it must be mapped using
 * this function. When done with the object, it must be unmapped using
 * zs_unmap_object.
 *
 * Only one object can be mapped per cpu at a time. There is no protection
 * against nested mappings.
 *
 * This function returns with preemption and page faults disabled.
 */
void *zs_map_object(struct zs_pool *pool, unsigned long handle,
			enum zs_mapmode mm)
{}
EXPORT_SYMBOL_GPL();

void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
{}
EXPORT_SYMBOL_GPL();

/**
 * zs_huge_class_size() - Returns the size (in bytes) of the first huge
 *                        zsmalloc &size_class.
 * @pool: zsmalloc pool to use
 *
 * The function returns the size of the first huge class - any object of equal
 * or bigger size will be stored in zspage consisting of a single physical
 * page.
 *
 * Context: Any context.
 *
 * Return: the size (in bytes) of the first huge zsmalloc &size_class.
 */
size_t zs_huge_class_size(struct zs_pool *pool)
{}
EXPORT_SYMBOL_GPL();

static unsigned long obj_malloc(struct zs_pool *pool,
				struct zspage *zspage, unsigned long handle)
{}


/**
 * zs_malloc - Allocate block of given size from pool.
 * @pool: pool to allocate from
 * @size: size of block to allocate
 * @gfp: gfp flags when allocating object
 *
 * On success, handle to the allocated object is returned,
 * otherwise an ERR_PTR().
 * Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
 */
unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
{}
EXPORT_SYMBOL_GPL();

static void obj_free(int class_size, unsigned long obj)
{}

void zs_free(struct zs_pool *pool, unsigned long handle)
{}
EXPORT_SYMBOL_GPL();

static void zs_object_copy(struct size_class *class, unsigned long dst,
				unsigned long src)
{}

/*
 * Find alloced object in zspage from index object and
 * return handle.
 */
static unsigned long find_alloced_obj(struct size_class *class,
				      struct page *page, int *obj_idx)
{}

static void migrate_zspage(struct zs_pool *pool, struct zspage *src_zspage,
			   struct zspage *dst_zspage)
{}

static struct zspage *isolate_src_zspage(struct size_class *class)
{}

static struct zspage *isolate_dst_zspage(struct size_class *class)
{}

/*
 * putback_zspage - add @zspage into right class's fullness list
 * @class: destination class
 * @zspage: target page
 *
 * Return @zspage's fullness status
 */
static int putback_zspage(struct size_class *class, struct zspage *zspage)
{}

#ifdef CONFIG_COMPACTION
/*
 * To prevent zspage destroy during migration, zspage freeing should
 * hold locks of all pages in the zspage.
 */
static void lock_zspage(struct zspage *zspage)
{}
#endif /* CONFIG_COMPACTION */

static void migrate_lock_init(struct zspage *zspage)
{}

static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock)
{}

static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
{}

static void migrate_write_lock(struct zspage *zspage)
{}

static void migrate_write_unlock(struct zspage *zspage)
{}

#ifdef CONFIG_COMPACTION

static const struct movable_operations zsmalloc_mops;

static void replace_sub_page(struct size_class *class, struct zspage *zspage,
				struct page *newpage, struct page *oldpage)
{}

static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
{}

static int zs_page_migrate(struct page *newpage, struct page *page,
		enum migrate_mode mode)
{}

static void zs_page_putback(struct page *page)
{}

static const struct movable_operations zsmalloc_mops =;

/*
 * Caller should hold page_lock of all pages in the zspage
 * In here, we cannot use zspage meta data.
 */
static void async_free_zspage(struct work_struct *work)
{
	int i;
	struct size_class *class;
	struct zspage *zspage, *tmp;
	LIST_HEAD(free_pages);
	struct zs_pool *pool = container_of(work, struct zs_pool,
					free_work);

	for (i = 0; i < ZS_SIZE_CLASSES; i++) {
		class = pool->size_class[i];
		if (class->index != i)
			continue;

		spin_lock(&class->lock);
		list_splice_init(&class->fullness_list[ZS_INUSE_RATIO_0],
				 &free_pages);
		spin_unlock(&class->lock);
	}

	list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
		list_del(&zspage->list);
		lock_zspage(zspage);

		class = zspage_class(pool, zspage);
		spin_lock(&class->lock);
		class_stat_sub(class, ZS_INUSE_RATIO_0, 1);
		__free_zspage(pool, class, zspage);
		spin_unlock(&class->lock);
	}
};

static void kick_deferred_free(struct zs_pool *pool)
{}

static void zs_flush_migration(struct zs_pool *pool)
{}

static void init_deferred_free(struct zs_pool *pool)
{}

static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage)
{}
#else
static inline void zs_flush_migration(struct zs_pool *pool) { }
#endif

/*
 *
 * Based on the number of unused allocated objects calculate
 * and return the number of pages that we can free.
 */
static unsigned long zs_can_compact(struct size_class *class)
{}

static unsigned long __zs_compact(struct zs_pool *pool,
				  struct size_class *class)
{}

unsigned long zs_compact(struct zs_pool *pool)
{}
EXPORT_SYMBOL_GPL();

void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats)
{}
EXPORT_SYMBOL_GPL();

static unsigned long zs_shrinker_scan(struct shrinker *shrinker,
		struct shrink_control *sc)
{}

static unsigned long zs_shrinker_count(struct shrinker *shrinker,
		struct shrink_control *sc)
{}

static void zs_unregister_shrinker(struct zs_pool *pool)
{}

static int zs_register_shrinker(struct zs_pool *pool)
{}

static int calculate_zspage_chain_size(int class_size)
{}

/**
 * zs_create_pool - Creates an allocation pool to work from.
 * @name: pool name to be created
 *
 * This function must be called before anything when using
 * the zsmalloc allocator.
 *
 * On success, a pointer to the newly created pool is returned,
 * otherwise NULL.
 */
struct zs_pool *zs_create_pool(const char *name)
{}
EXPORT_SYMBOL_GPL();

void zs_destroy_pool(struct zs_pool *pool)
{}
EXPORT_SYMBOL_GPL();

static int __init zs_init(void)
{}

static void __exit zs_exit(void)
{}

module_init();
module_exit(zs_exit);

MODULE_LICENSE();
MODULE_AUTHOR();
MODULE_DESCRIPTION();