// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012, Microsoft Corporation. * * Author: * K. Y. Srinivasan <[email protected]> */ #define pr_fmt(fmt) … #include <linux/cleanup.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/mman.h> #include <linux/debugfs.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/kthread.h> #include <linux/completion.h> #include <linux/count_zeros.h> #include <linux/memory_hotplug.h> #include <linux/memory.h> #include <linux/notifier.h> #include <linux/percpu_counter.h> #include <linux/page_reporting.h> #include <linux/sizes.h> #include <linux/hyperv.h> #include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> #define CREATE_TRACE_POINTS #include "hv_trace_balloon.h" /* * We begin with definitions supporting the Dynamic Memory protocol * with the host. * * Begin protocol definitions. */ /* * Protocol versions. The low word is the minor version, the high word the major * version. * * History: * Initial version 1.0 * Changed to 0.1 on 2009/03/25 * Changes to 0.2 on 2009/05/14 * Changes to 0.3 on 2009/12/03 * Changed to 1.0 on 2011/04/05 */ #define DYNMEM_MAKE_VERSION(Major, Minor) … #define DYNMEM_MAJOR_VERSION(Version) … #define DYNMEM_MINOR_VERSION(Version) … enum { … }; /* * Message Types */ enum dm_message_type { … }; /* * Structures defining the dynamic memory management * protocol. */ dm_version __packed; dm_caps __packed; dm_mem_page_range __packed; /* * The header for all dynamic memory messages: * * type: Type of the message. * size: Size of the message in bytes; including the header. * trans_id: The guest is responsible for manufacturing this ID. */ struct dm_header { … } __packed; /* * A generic message format for dynamic memory. * Specific message formats are defined later in the file. */ struct dm_message { … } __packed; /* * Specific message types supporting the dynamic memory protocol. */ /* * Version negotiation message. Sent from the guest to the host. * The guest is free to try different versions until the host * accepts the version. * * dm_version: The protocol version requested. * is_last_attempt: If TRUE, this is the last version guest will request. * reservedz: Reserved field, set to zero. */ struct dm_version_request { … } __packed; /* * Version response message; Host to Guest and indicates * if the host has accepted the version sent by the guest. * * is_accepted: If TRUE, host has accepted the version and the guest * should proceed to the next stage of the protocol. FALSE indicates that * guest should re-try with a different version. * * reservedz: Reserved field, set to zero. */ struct dm_version_response { … } __packed; /* * Message reporting capabilities. This is sent from the guest to the * host. */ struct dm_capabilities { … } __packed; /* * Response to the capabilities message. This is sent from the host to the * guest. This message notifies if the host has accepted the guest's * capabilities. If the host has not accepted, the guest must shutdown * the service. * * is_accepted: Indicates if the host has accepted guest's capabilities. * reservedz: Must be 0. */ struct dm_capabilities_resp_msg { … } __packed; /* * This message is used to report memory pressure from the guest. * This message is not part of any transaction and there is no * response to this message. * * num_avail: Available memory in pages. * num_committed: Committed memory in pages. * page_file_size: The accumulated size of all page files * in the system in pages. * zero_free: The number of zero and free pages. * page_file_writes: The writes to the page file in pages. * io_diff: An indicator of file cache efficiency or page file activity, * calculated as File Cache Page Fault Count - Page Read Count. * This value is in pages. * * Some of these metrics are Windows specific and fortunately * the algorithm on the host side that computes the guest memory * pressure only uses num_committed value. */ struct dm_status { … } __packed; /* * Message to ask the guest to allocate memory - balloon up message. * This message is sent from the host to the guest. The guest may not be * able to allocate as much memory as requested. * * num_pages: number of pages to allocate. */ struct dm_balloon { … } __packed; /* * Balloon response message; this message is sent from the guest * to the host in response to the balloon message. * * reservedz: Reserved; must be set to zero. * more_pages: If FALSE, this is the last message of the transaction. * if TRUE there will be at least one more message from the guest. * * range_count: The number of ranges in the range array. * * range_array: An array of page ranges returned to the host. * */ struct dm_balloon_response { … } __packed; /* * Un-balloon message; this message is sent from the host * to the guest to give guest more memory. * * more_pages: If FALSE, this is the last message of the transaction. * if TRUE there will be at least one more message from the guest. * * reservedz: Reserved; must be set to zero. * * range_count: The number of ranges in the range array. * * range_array: An array of page ranges returned to the host. * */ struct dm_unballoon_request { … } __packed; /* * Un-balloon response message; this message is sent from the guest * to the host in response to an unballoon request. * */ struct dm_unballoon_response { … } __packed; /* * Hot add request message. Message sent from the host to the guest. * * mem_range: Memory range to hot add. * */ struct dm_hot_add { … } __packed; /* * Hot add response message. * This message is sent by the guest to report the status of a hot add request. * If page_count is less than the requested page count, then the host should * assume all further hot add requests will fail, since this indicates that * the guest has hit an upper physical memory barrier. * * Hot adds may also fail due to low resources; in this case, the guest must * not complete this message until the hot add can succeed, and the host must * not send a new hot add request until the response is sent. * If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS * times it fails the request. * * * page_count: number of pages that were successfully hot added. * * result: result of the operation 1: success, 0: failure. * */ struct dm_hot_add_response { … } __packed; /* * Types of information sent from host to the guest. */ enum dm_info_type { … }; /* * Header for the information message. */ struct dm_info_header { … } __packed; /* * This message is sent from the host to the guest to pass * some relevant information (win8 addition). * * reserved: no used. * info_size: size of the information blob. * info: information blob. */ struct dm_info_msg { … }; /* * End protocol definitions. */ /* * State to manage hot adding memory into the guest. * The range start_pfn : end_pfn specifies the range * that the host has asked us to hot add. The range * start_pfn : ha_end_pfn specifies the range that we have * currently hot added. We hot add in chunks equal to the * memory block size; it is possible that we may not be able * to bring online all the pages in the region. The range * covered_start_pfn:covered_end_pfn defines the pages that can * be brought online. */ struct hv_hotadd_state { … }; struct hv_hotadd_gap { … }; struct balloon_state { … }; struct hot_add_wrk { … }; static bool allow_hibernation; static bool hot_add = …; static bool do_hot_add; /* * Delay reporting memory pressure by * the specified number of seconds. */ static uint pressure_report_delay = …; extern unsigned int page_reporting_order; #define HV_MAX_FAILURES … /* * The last time we posted a pressure report to host. */ static unsigned long last_post_time; static int hv_hypercall_multi_failure; module_param(hot_add, bool, 0644); MODULE_PARM_DESC(…) …; module_param(pressure_report_delay, uint, 0644); MODULE_PARM_DESC(…) …; static atomic_t trans_id = …; static int dm_ring_size = …; /* * Driver specific state. */ enum hv_dm_state { … }; static __u8 recv_buffer[HV_HYP_PAGE_SIZE]; static __u8 balloon_up_send_buffer[HV_HYP_PAGE_SIZE]; static unsigned long ha_pages_in_chunk; #define HA_BYTES_IN_CHUNK … #define PAGES_IN_2M … struct hv_dynmem_device { … }; static struct hv_dynmem_device dm_device; static void post_status(struct hv_dynmem_device *dm); static void enable_page_reporting(void); static void disable_page_reporting(void); #ifdef CONFIG_MEMORY_HOTPLUG static inline bool has_pfn_is_backed(struct hv_hotadd_state *has, unsigned long pfn) { … } static unsigned long hv_page_offline_check(unsigned long start_pfn, unsigned long nr_pages) { … } static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { … } static struct notifier_block hv_memory_nb = …; /* Check if the particular page is backed and can be onlined and online it. */ static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg) { … } static void hv_bring_pgs_online(struct hv_hotadd_state *has, unsigned long start_pfn, unsigned long size) { … } static void hv_mem_hot_add(unsigned long start, unsigned long size, unsigned long pfn_count, struct hv_hotadd_state *has) { … } static void hv_online_page(struct page *pg, unsigned int order) { … } static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) { … } static unsigned long handle_pg_range(unsigned long pg_start, unsigned long pg_count) { … } static unsigned long process_hot_add(unsigned long pg_start, unsigned long pfn_cnt, unsigned long rg_start, unsigned long rg_size) { … } #endif static void hot_add_req(struct work_struct *dummy) { … } static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg) { … } static unsigned long compute_balloon_floor(void) { … } /* * Compute total committed memory pages */ static unsigned long get_pages_committed(struct hv_dynmem_device *dm) { … } /* * Post our status as it relates memory pressure to the * host. Host expects the guests to post this status * periodically at 1 second intervals. * * The metrics specified in this protocol are very Windows * specific and so we cook up numbers here to convey our memory * pressure. */ static void post_status(struct hv_dynmem_device *dm) { … } static void free_balloon_pages(struct hv_dynmem_device *dm, union dm_mem_page_range *range_array) { … } static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, unsigned int num_pages, struct dm_balloon_response *bl_resp, int alloc_unit) { … } static void balloon_up(struct work_struct *dummy) { … } static void balloon_down(struct hv_dynmem_device *dm, struct dm_unballoon_request *req) { … } static void balloon_onchannelcallback(void *context); static int dm_thread_func(void *dm_dev) { … } static void version_resp(struct hv_dynmem_device *dm, struct dm_version_response *vresp) { … } static void cap_resp(struct hv_dynmem_device *dm, struct dm_capabilities_resp_msg *cap_resp) { … } static void balloon_onchannelcallback(void *context) { … } #define HV_LARGE_REPORTING_ORDER … #define HV_LARGE_REPORTING_LEN … static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, struct scatterlist *sgl, unsigned int nents) { … } static void enable_page_reporting(void) { … } static void disable_page_reporting(void) { … } static int ballooning_enabled(void) { … } static int hot_add_enabled(void) { … } static int balloon_connect_vsp(struct hv_device *dev) { … } /* * DEBUGFS Interface */ #ifdef CONFIG_DEBUG_FS /** * hv_balloon_debug_show - shows statistics of balloon operations. * @f: pointer to the &struct seq_file. * @offset: ignored. * * Provides the statistics that can be accessed in hv-balloon in the debugfs. * * Return: zero on success or an error code. */ static int hv_balloon_debug_show(struct seq_file *f, void *offset) { … } DEFINE_SHOW_ATTRIBUTE(…); static void hv_balloon_debugfs_init(struct hv_dynmem_device *b) { … } static void hv_balloon_debugfs_exit(struct hv_dynmem_device *b) { … } #else static inline void hv_balloon_debugfs_init(struct hv_dynmem_device *b) { } static inline void hv_balloon_debugfs_exit(struct hv_dynmem_device *b) { } #endif /* CONFIG_DEBUG_FS */ static int balloon_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { … } static void balloon_remove(struct hv_device *dev) { … } static int balloon_suspend(struct hv_device *hv_dev) { … } static int balloon_resume(struct hv_device *dev) { … } static const struct hv_vmbus_device_id id_table[] = …; MODULE_DEVICE_TABLE(vmbus, id_table); static struct hv_driver balloon_drv = …; static int __init init_balloon_drv(void) { … } module_init(…) …; MODULE_DESCRIPTION(…) …; MODULE_LICENSE(…) …;