hv_balloon.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (c) 2012, Microsoft Corporation.
 *
 * Author:
 *   K. Y. Srinivasan <[email protected]>
 */

#define pr_fmt(fmt) …

#include <linux/cleanup.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/mman.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/completion.h>
#include <linux/count_zeros.h>
#include <linux/memory_hotplug.h>
#include <linux/memory.h>
#include <linux/notifier.h>
#include <linux/percpu_counter.h>
#include <linux/page_reporting.h>
#include <linux/sizes.h>

#include <linux/hyperv.h>
#include <asm/hyperv-tlfs.h>

#include <asm/mshyperv.h>

#define CREATE_TRACE_POINTS
#include "hv_trace_balloon.h"

/*
 * We begin with definitions supporting the Dynamic Memory protocol
 * with the host.
 *
 * Begin protocol definitions.
 */

/*
 * Protocol versions. The low word is the minor version, the high word the major
 * version.
 *
 * History:
 * Initial version 1.0
 * Changed to 0.1 on 2009/03/25
 * Changes to 0.2 on 2009/05/14
 * Changes to 0.3 on 2009/12/03
 * Changed to 1.0 on 2011/04/05
 */

#define DYNMEM_MAKE_VERSION(Major, Minor) …
#define DYNMEM_MAJOR_VERSION(Version) …
#define DYNMEM_MINOR_VERSION(Version) …

enum { … };

/*
 * Message Types
 */

enum dm_message_type { … };

/*
 * Structures defining the dynamic memory management
 * protocol.
 */

dm_version __packed;

dm_caps __packed;

dm_mem_page_range __packed;

/*
 * The header for all dynamic memory messages:
 *
 * type: Type of the message.
 * size: Size of the message in bytes; including the header.
 * trans_id: The guest is responsible for manufacturing this ID.
 */

struct dm_header { … } __packed;

/*
 * A generic message format for dynamic memory.
 * Specific message formats are defined later in the file.
 */

struct dm_message { … } __packed;

/*
 * Specific message types supporting the dynamic memory protocol.
 */

/*
 * Version negotiation message. Sent from the guest to the host.
 * The guest is free to try different versions until the host
 * accepts the version.
 *
 * dm_version: The protocol version requested.
 * is_last_attempt: If TRUE, this is the last version guest will request.
 * reservedz: Reserved field, set to zero.
 */

struct dm_version_request { … } __packed;

/*
 * Version response message; Host to Guest and indicates
 * if the host has accepted the version sent by the guest.
 *
 * is_accepted: If TRUE, host has accepted the version and the guest
 * should proceed to the next stage of the protocol. FALSE indicates that
 * guest should re-try with a different version.
 *
 * reservedz: Reserved field, set to zero.
 */

struct dm_version_response { … } __packed;

/*
 * Message reporting capabilities. This is sent from the guest to the
 * host.
 */

struct dm_capabilities { … } __packed;

/*
 * Response to the capabilities message. This is sent from the host to the
 * guest. This message notifies if the host has accepted the guest's
 * capabilities. If the host has not accepted, the guest must shutdown
 * the service.
 *
 * is_accepted: Indicates if the host has accepted guest's capabilities.
 * reservedz: Must be 0.
 */

struct dm_capabilities_resp_msg { … } __packed;

/*
 * This message is used to report memory pressure from the guest.
 * This message is not part of any transaction and there is no
 * response to this message.
 *
 * num_avail: Available memory in pages.
 * num_committed: Committed memory in pages.
 * page_file_size: The accumulated size of all page files
 *		   in the system in pages.
 * zero_free: The number of zero and free pages.
 * page_file_writes: The writes to the page file in pages.
 * io_diff: An indicator of file cache efficiency or page file activity,
 *	    calculated as File Cache Page Fault Count - Page Read Count.
 *	    This value is in pages.
 *
 * Some of these metrics are Windows specific and fortunately
 * the algorithm on the host side that computes the guest memory
 * pressure only uses num_committed value.
 */

struct dm_status { … } __packed;

/*
 * Message to ask the guest to allocate memory - balloon up message.
 * This message is sent from the host to the guest. The guest may not be
 * able to allocate as much memory as requested.
 *
 * num_pages: number of pages to allocate.
 */

struct dm_balloon { … } __packed;

/*
 * Balloon response message; this message is sent from the guest
 * to the host in response to the balloon message.
 *
 * reservedz: Reserved; must be set to zero.
 * more_pages: If FALSE, this is the last message of the transaction.
 * if TRUE there will be at least one more message from the guest.
 *
 * range_count: The number of ranges in the range array.
 *
 * range_array: An array of page ranges returned to the host.
 *
 */

struct dm_balloon_response { … } __packed;

/*
 * Un-balloon message; this message is sent from the host
 * to the guest to give guest more memory.
 *
 * more_pages: If FALSE, this is the last message of the transaction.
 * if TRUE there will be at least one more message from the guest.
 *
 * reservedz: Reserved; must be set to zero.
 *
 * range_count: The number of ranges in the range array.
 *
 * range_array: An array of page ranges returned to the host.
 *
 */

struct dm_unballoon_request { … } __packed;

/*
 * Un-balloon response message; this message is sent from the guest
 * to the host in response to an unballoon request.
 *
 */

struct dm_unballoon_response { … } __packed;

/*
 * Hot add request message. Message sent from the host to the guest.
 *
 * mem_range: Memory range to hot add.
 *
 */

struct dm_hot_add { … } __packed;

/*
 * Hot add response message.
 * This message is sent by the guest to report the status of a hot add request.
 * If page_count is less than the requested page count, then the host should
 * assume all further hot add requests will fail, since this indicates that
 * the guest has hit an upper physical memory barrier.
 *
 * Hot adds may also fail due to low resources; in this case, the guest must
 * not complete this message until the hot add can succeed, and the host must
 * not send a new hot add request until the response is sent.
 * If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS
 * times it fails the request.
 *
 *
 * page_count: number of pages that were successfully hot added.
 *
 * result: result of the operation 1: success, 0: failure.
 *
 */

struct dm_hot_add_response { … } __packed;

/*
 * Types of information sent from host to the guest.
 */

enum dm_info_type { … };

/*
 * Header for the information message.
 */

struct dm_info_header { … } __packed;

/*
 * This message is sent from the host to the guest to pass
 * some relevant information (win8 addition).
 *
 * reserved: no used.
 * info_size: size of the information blob.
 * info: information blob.
 */

struct dm_info_msg { … };

/*
 * End protocol definitions.
 */

/*
 * State to manage hot adding memory into the guest.
 * The range start_pfn : end_pfn specifies the range
 * that the host has asked us to hot add. The range
 * start_pfn : ha_end_pfn specifies the range that we have
 * currently hot added. We hot add in chunks equal to the
 * memory block size; it is possible that we may not be able
 * to bring online all the pages in the region. The range
 * covered_start_pfn:covered_end_pfn defines the pages that can
 * be brought online.
 */

struct hv_hotadd_state { … };

struct hv_hotadd_gap { … };

struct balloon_state { … };

struct hot_add_wrk { … };

static bool allow_hibernation;
static bool hot_add = …;
static bool do_hot_add;
/*
 * Delay reporting memory pressure by
 * the specified number of seconds.
 */
static uint pressure_report_delay = …;
extern unsigned int page_reporting_order;
#define HV_MAX_FAILURES …

/*
 * The last time we posted a pressure report to host.
 */
static unsigned long last_post_time;

static int hv_hypercall_multi_failure;

module_param(hot_add, bool, 0644);
MODULE_PARM_DESC(…) …;

module_param(pressure_report_delay, uint, 0644);
MODULE_PARM_DESC(…) …;
static atomic_t trans_id = …;

static int dm_ring_size = …;

/*
 * Driver specific state.
 */

enum hv_dm_state { … };

static __u8 recv_buffer[HV_HYP_PAGE_SIZE];
static __u8 balloon_up_send_buffer[HV_HYP_PAGE_SIZE];

static unsigned long ha_pages_in_chunk;
#define HA_BYTES_IN_CHUNK …

#define PAGES_IN_2M …

struct hv_dynmem_device { … };

static struct hv_dynmem_device dm_device;

static void post_status(struct hv_dynmem_device *dm);

static void enable_page_reporting(void);

static void disable_page_reporting(void);

#ifdef CONFIG_MEMORY_HOTPLUG
static inline bool has_pfn_is_backed(struct hv_hotadd_state *has,
				     unsigned long pfn)
{ … }

static unsigned long hv_page_offline_check(unsigned long start_pfn,
					   unsigned long nr_pages)
{ … }

static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
			      void *v)
{ … }

static struct notifier_block hv_memory_nb = …;

/* Check if the particular page is backed and can be onlined and online it. */
static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
{ … }

static void hv_bring_pgs_online(struct hv_hotadd_state *has,
				unsigned long start_pfn, unsigned long size)
{ … }

static void hv_mem_hot_add(unsigned long start, unsigned long size,
				unsigned long pfn_count,
				struct hv_hotadd_state *has)
{ … }

static void hv_online_page(struct page *pg, unsigned int order)
{ … }

static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
{ … }

static unsigned long handle_pg_range(unsigned long pg_start,
				     unsigned long pg_count)
{ … }

static unsigned long process_hot_add(unsigned long pg_start,
					unsigned long pfn_cnt,
					unsigned long rg_start,
					unsigned long rg_size)
{ … }

#endif

static void hot_add_req(struct work_struct *dummy)
{ … }

static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
{ … }

static unsigned long compute_balloon_floor(void)
{ … }

/*
 * Compute total committed memory pages
 */

static unsigned long get_pages_committed(struct hv_dynmem_device *dm)
{ … }

/*
 * Post our status as it relates memory pressure to the
 * host. Host expects the guests to post this status
 * periodically at 1 second intervals.
 *
 * The metrics specified in this protocol are very Windows
 * specific and so we cook up numbers here to convey our memory
 * pressure.
 */

static void post_status(struct hv_dynmem_device *dm)
{ … }

static void free_balloon_pages(struct hv_dynmem_device *dm,
			       union dm_mem_page_range *range_array)
{ … }

static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm,
					unsigned int num_pages,
					struct dm_balloon_response *bl_resp,
					int alloc_unit)
{ … }

static void balloon_up(struct work_struct *dummy)
{ … }

static void balloon_down(struct hv_dynmem_device *dm,
			 struct dm_unballoon_request *req)
{ … }

static void balloon_onchannelcallback(void *context);

static int dm_thread_func(void *dm_dev)
{ … }

static void version_resp(struct hv_dynmem_device *dm,
			 struct dm_version_response *vresp)
{ … }

static void cap_resp(struct hv_dynmem_device *dm,
		     struct dm_capabilities_resp_msg *cap_resp)
{ … }

static void balloon_onchannelcallback(void *context)
{ … }

#define HV_LARGE_REPORTING_ORDER …
#define HV_LARGE_REPORTING_LEN …
static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info,
			       struct scatterlist *sgl, unsigned int nents)
{ … }

static void enable_page_reporting(void)
{ … }

static void disable_page_reporting(void)
{ … }

static int ballooning_enabled(void)
{ … }

static int hot_add_enabled(void)
{ … }

static int balloon_connect_vsp(struct hv_device *dev)
{ … }

/*
 * DEBUGFS Interface
 */
#ifdef CONFIG_DEBUG_FS

/**
 * hv_balloon_debug_show - shows statistics of balloon operations.
 * @f: pointer to the &struct seq_file.
 * @offset: ignored.
 *
 * Provides the statistics that can be accessed in hv-balloon in the debugfs.
 *
 * Return: zero on success or an error code.
 */
static int hv_balloon_debug_show(struct seq_file *f, void *offset)
{ … }

DEFINE_SHOW_ATTRIBUTE(…);

static void  hv_balloon_debugfs_init(struct hv_dynmem_device *b)
{ … }

static void  hv_balloon_debugfs_exit(struct hv_dynmem_device *b)
{ … }

#else

static inline void hv_balloon_debugfs_init(struct hv_dynmem_device  *b)
{
}

static inline void hv_balloon_debugfs_exit(struct hv_dynmem_device *b)
{
}

#endif	/* CONFIG_DEBUG_FS */

static int balloon_probe(struct hv_device *dev,
			 const struct hv_vmbus_device_id *dev_id)
{ … }

static void balloon_remove(struct hv_device *dev)
{ … }

static int balloon_suspend(struct hv_device *hv_dev)
{ … }

static int balloon_resume(struct hv_device *dev)
{ … }

static const struct hv_vmbus_device_id id_table[] = …;

MODULE_DEVICE_TABLE(vmbus, id_table);

static  struct hv_driver balloon_drv = …;

static int __init init_balloon_drv(void)
{ … }

module_init(…) …;

MODULE_DESCRIPTION(…) …;
MODULE_LICENSE(…) …;
linux/drivers/hv/hv_balloon.c