vmw_balloon.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0
/*
 * VMware Balloon driver.
 *
 * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved.
 *
 * This is VMware physical memory management driver for Linux. The driver
 * acts like a "balloon" that can be inflated to reclaim physical pages by
 * reserving them in the guest and invalidating them in the monitor,
 * freeing up the underlying machine pages so they can be allocated to
 * other guests.  The balloon can also be deflated to allow the guest to
 * use more physical memory. Higher level policies can control the sizes
 * of balloons in VMs in order to manage physical memory resources.
 */

//#define DEBUG
#define pr_fmt(fmt) …

#include <linux/types.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/workqueue.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/balloon_compaction.h>
#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
#include <asm/hypervisor.h>

MODULE_AUTHOR(…) …;
MODULE_DESCRIPTION(…) …;
MODULE_ALIAS(…) …;
MODULE_ALIAS(…) …;
MODULE_LICENSE(…) …;

static bool __read_mostly vmwballoon_shrinker_enable;
module_param(vmwballoon_shrinker_enable, bool, 0444);
MODULE_PARM_DESC(…) …;

/* Delay in seconds after shrink before inflation. */
#define VMBALLOON_SHRINK_DELAY …

/* Maximum number of refused pages we accumulate during inflation cycle */
#define VMW_BALLOON_MAX_REFUSED …

/* Magic number for the balloon mount-point */
#define BALLOON_VMW_MAGIC …

/*
 * Hypervisor communication port definitions.
 */
#define VMW_BALLOON_HV_PORT …
#define VMW_BALLOON_HV_MAGIC …
#define VMW_BALLOON_GUEST_ID …

enum vmwballoon_capabilities { … };

#define VMW_BALLOON_CAPABILITIES_COMMON …

#define VMW_BALLOON_2M_ORDER …

/*
 * 64-bit targets are only supported in 64-bit
 */
#ifdef CONFIG_64BIT
#define VMW_BALLOON_CAPABILITIES …
#else
#define VMW_BALLOON_CAPABILITIES …
#endif

enum vmballoon_page_size_type { … };

#define VMW_BALLOON_NUM_PAGE_SIZES …

static const char * const vmballoon_page_size_names[] = …;

enum vmballoon_op { … };

enum vmballoon_op_stat_type { … };

#define VMW_BALLOON_OP_STAT_TYPES …

/**
 * enum vmballoon_cmd_type - backdoor commands.
 *
 * Availability of the commands is as followed:
 *
 * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and
 * %VMW_BALLOON_CMD_GUEST_ID are always available.
 *
 * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then
 * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available.
 *
 * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then
 * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands
 * are available.
 *
 * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then
 * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK
 * are supported.
 *
 * If the host reports  VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then
 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported.
 *
 * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor.
 * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size.
 * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page.
 * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about
 *			    to be deflated from the balloon.
 * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that
 *			      runs in the VM.
 * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of
 *				  ballooned pages (up to 512).
 * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of
 *				  pages that are about to be deflated from the
 *				  balloon (up to 512).
 * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK
 *				     for 2MB pages.
 * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to
 *				       @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB
 *				       pages.
 * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification
 *				       that would be invoked when the balloon
 *				       size changes.
 * @VMW_BALLOON_CMD_LAST: Value of the last command.
 */
enum vmballoon_cmd_type { … };

#define VMW_BALLOON_CMD_NUM …

enum vmballoon_error_codes { … };

#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES …

#define VMW_BALLOON_CMD_WITH_TARGET_MASK …

static const char * const vmballoon_cmd_names[] = …;

enum vmballoon_stat_page { … };

#define VMW_BALLOON_PAGE_STAT_NUM …

enum vmballoon_stat_general { … };

#define VMW_BALLOON_STAT_NUM …

static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled);

struct vmballoon_ctl { … };

/**
 * struct vmballoon_batch_entry - a batch entry for lock or unlock.
 *
 * @status: the status of the operation, which is written by the hypervisor.
 * @reserved: reserved for future use. Must be set to zero.
 * @pfn: the physical frame number of the page to be locked or unlocked.
 */
struct vmballoon_batch_entry { … } __packed;

struct vmballoon { … };

static struct vmballoon balloon;

struct vmballoon_stats { … };

static inline bool is_vmballoon_stats_on(void)
{ … }

static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op,
					  enum vmballoon_op_stat_type type)
{ … }

static inline void vmballoon_stats_gen_inc(struct vmballoon *b,
					   enum vmballoon_stat_general stat)
{ … }

static inline void vmballoon_stats_gen_add(struct vmballoon *b,
					   enum vmballoon_stat_general stat,
					   unsigned int val)
{ … }

static inline void vmballoon_stats_page_inc(struct vmballoon *b,
					    enum vmballoon_stat_page stat,
					    enum vmballoon_page_size_type size)
{ … }

static inline void vmballoon_stats_page_add(struct vmballoon *b,
					    enum vmballoon_stat_page stat,
					    enum vmballoon_page_size_type size,
					    unsigned int val)
{ … }

static inline unsigned long
__vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
		unsigned long arg2, unsigned long *result)
{ … }

static __always_inline unsigned long
vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
	      unsigned long arg2)
{ … }

/*
 * Send "start" command to the host, communicating supported version
 * of the protocol.
 */
static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
{ … }

/**
 * vmballoon_send_guest_id - communicate guest type to the host.
 *
 * @b: pointer to the balloon.
 *
 * Communicate guest type to the host so that it can adjust ballooning
 * algorithm to the one most appropriate for the guest. This command
 * is normally issued after sending "start" command and is part of
 * standard reset sequence.
 *
 * Return: zero on success or appropriate error code.
 */
static int vmballoon_send_guest_id(struct vmballoon *b)
{ … }

/**
 * vmballoon_page_order() - return the order of the page
 * @page_size: the size of the page.
 *
 * Return: the allocation order.
 */
static inline
unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size)
{ … }

/**
 * vmballoon_page_in_frames() - returns the number of frames in a page.
 * @page_size: the size of the page.
 *
 * Return: the number of 4k frames.
 */
static inline unsigned int
vmballoon_page_in_frames(enum vmballoon_page_size_type page_size)
{ … }

/**
 * vmballoon_mark_page_offline() - mark a page as offline
 * @page: pointer for the page.
 * @page_size: the size of the page.
 */
static void
vmballoon_mark_page_offline(struct page *page,
			    enum vmballoon_page_size_type page_size)
{ … }

/**
 * vmballoon_mark_page_online() - mark a page as online
 * @page: pointer for the page.
 * @page_size: the size of the page.
 */
static void
vmballoon_mark_page_online(struct page *page,
			   enum vmballoon_page_size_type page_size)
{ … }

/**
 * vmballoon_send_get_target() - Retrieve desired balloon size from the host.
 *
 * @b: pointer to the balloon.
 *
 * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required
 * by the host-guest protocol and EIO if an error occurred in communicating with
 * the host.
 */
static int vmballoon_send_get_target(struct vmballoon *b)
{ … }

/**
 * vmballoon_alloc_page_list - allocates a list of pages.
 *
 * @b: pointer to the balloon.
 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
 * @req_n_pages: the number of requested pages.
 *
 * Tries to allocate @req_n_pages. Add them to the list of balloon pages in
 * @ctl.pages and updates @ctl.n_pages to reflect the number of pages.
 *
 * Return: zero on success or error code otherwise.
 */
static int vmballoon_alloc_page_list(struct vmballoon *b,
				     struct vmballoon_ctl *ctl,
				     unsigned int req_n_pages)
{ … }

/**
 * vmballoon_handle_one_result - Handle lock/unlock result for a single page.
 *
 * @b: pointer for %struct vmballoon.
 * @page: pointer for the page whose result should be handled.
 * @page_size: size of the page.
 * @status: status of the operation as provided by the hypervisor.
 */
static int vmballoon_handle_one_result(struct vmballoon *b, struct page *page,
				       enum vmballoon_page_size_type page_size,
				       unsigned long status)
{ … }

/**
 * vmballoon_status_page - returns the status of (un)lock operation
 *
 * @b: pointer to the balloon.
 * @idx: index for the page for which the operation is performed.
 * @p: pointer to where the page struct is returned.
 *
 * Following a lock or unlock operation, returns the status of the operation for
 * an individual page. Provides the page that the operation was performed on on
 * the @page argument.
 *
 * Returns: The status of a lock or unlock operation for an individual page.
 */
static unsigned long vmballoon_status_page(struct vmballoon *b, int idx,
					   struct page **p)
{ … }

/**
 * vmballoon_lock_op - notifies the host about inflated/deflated pages.
 * @b: pointer to the balloon.
 * @num_pages: number of inflated/deflated pages.
 * @page_size: size of the page.
 * @op: the type of operation (lock or unlock).
 *
 * Notify the host about page(s) that were ballooned (or removed from the
 * balloon) so that host can use it without fear that guest will need it (or
 * stop using them since the VM does). Host may reject some pages, we need to
 * check the return value and maybe submit a different page. The pages that are
 * inflated/deflated are pointed by @b->page.
 *
 * Return: result as provided by the hypervisor.
 */
static unsigned long vmballoon_lock_op(struct vmballoon *b,
				       unsigned int num_pages,
				       enum vmballoon_page_size_type page_size,
				       enum vmballoon_op op)
{ … }

/**
 * vmballoon_add_page - adds a page towards lock/unlock operation.
 *
 * @b: pointer to the balloon.
 * @idx: index of the page to be ballooned in this batch.
 * @p: pointer to the page that is about to be ballooned.
 *
 * Adds the page to be ballooned. Must be called while holding @comm_lock.
 */
static void vmballoon_add_page(struct vmballoon *b, unsigned int idx,
			       struct page *p)
{ … }

/**
 * vmballoon_lock - lock or unlock a batch of pages.
 *
 * @b: pointer to the balloon.
 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
 *
 * Notifies the host of about ballooned pages (after inflation or deflation,
 * according to @ctl). If the host rejects the page put it on the
 * @ctl refuse list. These refused page are then released when moving to the
 * next size of pages.
 *
 * Note that we neither free any @page here nor put them back on the ballooned
 * pages list. Instead we queue it for later processing. We do that for several
 * reasons. First, we do not want to free the page under the lock. Second, it
 * allows us to unify the handling of lock and unlock. In the inflate case, the
 * caller will check if there are too many refused pages and release them.
 * Although it is not identical to the past behavior, it should not affect
 * performance.
 */
static int vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl)
{ … }

/**
 * vmballoon_release_page_list() - Releases a page list
 *
 * @page_list: list of pages to release.
 * @n_pages: pointer to the number of pages.
 * @page_size: whether the pages in the list are 2MB (or else 4KB).
 *
 * Releases the list of pages and zeros the number of pages.
 */
static void vmballoon_release_page_list(struct list_head *page_list,
				       int *n_pages,
				       enum vmballoon_page_size_type page_size)
{ … }


/*
 * Release pages that were allocated while attempting to inflate the
 * balloon but were refused by the host for one reason or another.
 */
static void vmballoon_release_refused_pages(struct vmballoon *b,
					    struct vmballoon_ctl *ctl)
{ … }

/**
 * vmballoon_change - retrieve the required balloon change
 *
 * @b: pointer for the balloon.
 *
 * Return: the required change for the balloon size. A positive number
 * indicates inflation, a negative number indicates a deflation.
 */
static int64_t vmballoon_change(struct vmballoon *b)
{ … }

/**
 * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation.
 *
 * @b: pointer to balloon.
 * @pages: list of pages to enqueue.
 * @n_pages: pointer to number of pages in list. The value is zeroed.
 * @page_size: whether the pages are 2MB or 4KB pages.
 *
 * Enqueues the provides list of pages in the ballooned page list, clears the
 * list and zeroes the number of pages that was provided.
 */
static void vmballoon_enqueue_page_list(struct vmballoon *b,
					struct list_head *pages,
					unsigned int *n_pages,
					enum vmballoon_page_size_type page_size)
{ … }

/**
 * vmballoon_dequeue_page_list() - Dequeues page lists for deflation.
 *
 * @b: pointer to balloon.
 * @pages: list of pages to enqueue.
 * @n_pages: pointer to number of pages in list. The value is zeroed.
 * @page_size: whether the pages are 2MB or 4KB pages.
 * @n_req_pages: the number of requested pages.
 *
 * Dequeues the number of requested pages from the balloon for deflation. The
 * number of dequeued pages may be lower, if not enough pages in the requested
 * size are available.
 */
static void vmballoon_dequeue_page_list(struct vmballoon *b,
					struct list_head *pages,
					unsigned int *n_pages,
					enum vmballoon_page_size_type page_size,
					unsigned int n_req_pages)
{ … }

/**
 * vmballoon_split_refused_pages() - Split the 2MB refused pages to 4k.
 *
 * If inflation of 2MB pages was denied by the hypervisor, it is likely to be
 * due to one or few 4KB pages. These 2MB pages may keep being allocated and
 * then being refused. To prevent this case, this function splits the refused
 * pages into 4KB pages and adds them into @prealloc_pages list.
 *
 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
 */
static void vmballoon_split_refused_pages(struct vmballoon_ctl *ctl)
{ … }

/**
 * vmballoon_inflate() - Inflate the balloon towards its target size.
 *
 * @b: pointer to the balloon.
 */
static void vmballoon_inflate(struct vmballoon *b)
{ … }

/**
 * vmballoon_deflate() - Decrease the size of the balloon.
 *
 * @b: pointer to the balloon
 * @n_frames: the number of frames to deflate. If zero, automatically
 * calculated according to the target size.
 * @coordinated: whether to coordinate with the host
 *
 * Decrease the size of the balloon allowing guest to use more memory.
 *
 * Return: The number of deflated frames (i.e., basic page size units)
 */
static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames,
				       bool coordinated)
{ … }

/**
 * vmballoon_deinit_batching - disables batching mode.
 *
 * @b: pointer to &struct vmballoon.
 *
 * Disables batching, by deallocating the page for communication with the
 * hypervisor and disabling the static key to indicate that batching is off.
 */
static void vmballoon_deinit_batching(struct vmballoon *b)
{ … }

/**
 * vmballoon_init_batching - enable batching mode.
 *
 * @b: pointer to &struct vmballoon.
 *
 * Enables batching, by allocating a page for communication with the hypervisor
 * and enabling the static_key to use batching.
 *
 * Return: zero on success or an appropriate error-code.
 */
static int vmballoon_init_batching(struct vmballoon *b)
{ … }

/*
 * Receive notification and resize balloon
 */
static void vmballoon_doorbell(void *client_data)
{ … }

/*
 * Clean up vmci doorbell
 */
static void vmballoon_vmci_cleanup(struct vmballoon *b)
{ … }

/**
 * vmballoon_vmci_init - Initialize vmci doorbell.
 *
 * @b: pointer to the balloon.
 *
 * Return: zero on success or when wakeup command not supported. Error-code
 * otherwise.
 *
 * Initialize vmci doorbell, to get notified as soon as balloon changes.
 */
static int vmballoon_vmci_init(struct vmballoon *b)
{ … }

/**
 * vmballoon_pop - Quickly release all pages allocate for the balloon.
 *
 * @b: pointer to the balloon.
 *
 * This function is called when host decides to "reset" balloon for one reason
 * or another. Unlike normal "deflate" we do not (shall not) notify host of the
 * pages being released.
 */
static void vmballoon_pop(struct vmballoon *b)
{ … }

/*
 * Perform standard reset sequence by popping the balloon (in case it
 * is not  empty) and then restarting protocol. This operation normally
 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
 */
static void vmballoon_reset(struct vmballoon *b)
{ … }

/**
 * vmballoon_work - periodic balloon worker for reset, inflation and deflation.
 *
 * @work: pointer to the &work_struct which is provided by the workqueue.
 *
 * Resets the protocol if needed, gets the new size and adjusts balloon as
 * needed. Repeat in 1 sec.
 */
static void vmballoon_work(struct work_struct *work)
{ … }

/**
 * vmballoon_shrinker_scan() - deflate the balloon due to memory pressure.
 * @shrinker: pointer to the balloon shrinker.
 * @sc: page reclaim information.
 *
 * Returns: number of pages that were freed during deflation.
 */
static unsigned long vmballoon_shrinker_scan(struct shrinker *shrinker,
					     struct shrink_control *sc)
{ … }

/**
 * vmballoon_shrinker_count() - return the number of ballooned pages.
 * @shrinker: pointer to the balloon shrinker.
 * @sc: page reclaim information.
 *
 * Returns: number of 4k pages that are allocated for the balloon and can
 *	    therefore be reclaimed under pressure.
 */
static unsigned long vmballoon_shrinker_count(struct shrinker *shrinker,
					      struct shrink_control *sc)
{ … }

static void vmballoon_unregister_shrinker(struct vmballoon *b)
{ … }

static int vmballoon_register_shrinker(struct vmballoon *b)
{ … }

/*
 * DEBUGFS Interface
 */
#ifdef CONFIG_DEBUG_FS

static const char * const vmballoon_stat_page_names[] = …;

static const char * const vmballoon_stat_names[] = …;

static int vmballoon_enable_stats(struct vmballoon *b)
{ … }

/**
 * vmballoon_debug_show - shows statistics of balloon operations.
 * @f: pointer to the &struct seq_file.
 * @offset: ignored.
 *
 * Provides the statistics that can be accessed in vmmemctl in the debugfs.
 * To avoid the overhead - mainly that of memory - of collecting the statistics,
 * we only collect statistics after the first time the counters are read.
 *
 * Return: zero on success or an error code.
 */
static int vmballoon_debug_show(struct seq_file *f, void *offset)
{ … }

DEFINE_SHOW_ATTRIBUTE(…);

static void __init vmballoon_debugfs_init(struct vmballoon *b)
{ … }

static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
{ … }

#else

static inline void vmballoon_debugfs_init(struct vmballoon *b)
{
}

static inline void vmballoon_debugfs_exit(struct vmballoon *b)
{
}

#endif	/* CONFIG_DEBUG_FS */


#ifdef CONFIG_BALLOON_COMPACTION
/**
 * vmballoon_migratepage() - migrates a balloon page.
 * @b_dev_info: balloon device information descriptor.
 * @newpage: the page to which @page should be migrated.
 * @page: a ballooned page that should be migrated.
 * @mode: migration mode, ignored.
 *
 * This function is really open-coded, but that is according to the interface
 * that balloon_compaction provides.
 *
 * Return: zero on success, -EAGAIN when migration cannot be performed
 *	   momentarily, and -EBUSY if migration failed and should be retried
 *	   with that specific page.
 */
static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
				 struct page *newpage, struct page *page,
				 enum migrate_mode mode)
{ … }

/**
 * vmballoon_compaction_init() - initialized compaction for the balloon.
 *
 * @b: pointer to the balloon.
 *
 * If during the initialization a failure occurred, this function does not
 * perform cleanup. The caller must call vmballoon_compaction_deinit() in this
 * case.
 *
 * Return: zero on success or error code on failure.
 */
static __init void vmballoon_compaction_init(struct vmballoon *b)
{ … }

#else /* CONFIG_BALLOON_COMPACTION */
static inline void vmballoon_compaction_init(struct vmballoon *b)
{
}
#endif /* CONFIG_BALLOON_COMPACTION */

static int __init vmballoon_init(void)
{ … }

/*
 * Using late_initcall() instead of module_init() allows the balloon to use the
 * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
 * VMCI is probed only after the balloon is initialized. If the balloon is used
 * as a module, late_initcall() is equivalent to module_init().
 */
late_initcall(vmballoon_init);

static void __exit vmballoon_exit(void)
{ … }
module_exit(vmballoon_exit);
linux/drivers/misc/vmw_balloon.c