linux/mm/memory_hotplug.c

// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/mm/memory_hotplug.c
 *
 *  Copyright (C)
 */

#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/sched/signal.h>
#include <linux/swap.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
#include <linux/compiler.h>
#include <linux/export.h>
#include <linux/writeback.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/memory.h>
#include <linux/memremap.h>
#include <linux/memory_hotplug.h>
#include <linux/vmalloc.h>
#include <linux/ioport.h>
#include <linux/delay.h>
#include <linux/migrate.h>
#include <linux/page-isolation.h>
#include <linux/pfn.h>
#include <linux/suspend.h>
#include <linux/mm_inline.h>
#include <linux/firmware-map.h>
#include <linux/stop_machine.h>
#include <linux/hugetlb.h>
#include <linux/memblock.h>
#include <linux/compaction.h>
#include <linux/rmap.h>
#include <linux/module.h>

#include <asm/tlbflush.h>

#include "internal.h"
#include "shuffle.h"

enum {};

static int memmap_mode __read_mostly =;

static inline unsigned long memory_block_memmap_size(void)
{}

static inline unsigned long memory_block_memmap_on_memory_pages(void)
{}

#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
/*
 * memory_hotplug.memmap_on_memory parameter
 */
static int set_memmap_mode(const char *val, const struct kernel_param *kp)
{}

static int get_memmap_mode(char *buffer, const struct kernel_param *kp)
{}

static const struct kernel_param_ops memmap_mode_ops =;
module_param_cb();
MODULE_PARM_DESC();

static inline bool mhp_memmap_on_memory(void)
{}
#else
static inline bool mhp_memmap_on_memory(void)
{
	return false;
}
#endif

enum {};

static const char * const online_policy_to_str[] =;

static int set_online_policy(const char *val, const struct kernel_param *kp)
{}

static int get_online_policy(char *buffer, const struct kernel_param *kp)
{}

/*
 * memory_hotplug.online_policy: configure online behavior when onlining without
 * specifying a zone (MMOP_ONLINE)
 *
 * "contig-zones": keep zone contiguous
 * "auto-movable": online memory to ZONE_MOVABLE if the configuration
 *                 (auto_movable_ratio, auto_movable_numa_aware) allows for it
 */
static int online_policy __read_mostly =;
static const struct kernel_param_ops online_policy_ops =;
module_param_cb();
MODULE_PARM_DESC();

/*
 * memory_hotplug.auto_movable_ratio: specify maximum MOVABLE:KERNEL ratio
 *
 * The ratio represent an upper limit and the kernel might decide to not
 * online some memory to ZONE_MOVABLE -- e.g., because hotplugged KERNEL memory
 * doesn't allow for more MOVABLE memory.
 */
static unsigned int auto_movable_ratio __read_mostly =;
module_param(auto_movable_ratio, uint, 0644);
MODULE_PARM_DESC();

/*
 * memory_hotplug.auto_movable_numa_aware: consider numa node stats
 */
#ifdef CONFIG_NUMA
static bool auto_movable_numa_aware __read_mostly =;
module_param(auto_movable_numa_aware, bool, 0644);
MODULE_PARM_DESC();
#endif /* CONFIG_NUMA */

/*
 * online_page_callback contains pointer to current page onlining function.
 * Initially it is generic_online_page(). If it is required it could be
 * changed by calling set_online_page_callback() for callback registration
 * and restore_online_page_callback() for generic callback restore.
 */

static online_page_callback_t online_page_callback =;
static DEFINE_MUTEX(online_page_callback_lock);

DEFINE_STATIC_PERCPU_RWSEM();

void get_online_mems(void)
{}

void put_online_mems(void)
{}

bool movable_node_enabled =;

#ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
int mhp_default_online_type = MMOP_OFFLINE;
#else
int mhp_default_online_type =;
#endif

static int __init setup_memhp_default_state(char *str)
{}
__setup();

void mem_hotplug_begin(void)
{}

void mem_hotplug_done(void)
{}

u64 max_mem_size =;

/* add this memory to iomem resource */
static struct resource *register_memory_resource(u64 start, u64 size,
						 const char *resource_name)
{}

static void release_memory_resource(struct resource *res)
{}

static int check_pfn_span(unsigned long pfn, unsigned long nr_pages)
{}

/*
 * Return page for the valid pfn only if the page is online. All pfn
 * walkers which rely on the fully initialized page->flags and others
 * should use this rather than pfn_valid && pfn_to_page
 */
struct page *pfn_to_online_page(unsigned long pfn)
{}
EXPORT_SYMBOL_GPL();

int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages,
		struct mhp_params *params)
{}

/* find the smallest valid pfn in the range [start_pfn, end_pfn) */
static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
				     unsigned long start_pfn,
				     unsigned long end_pfn)
{}

/* find the biggest valid pfn in the range [start_pfn, end_pfn). */
static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
				    unsigned long start_pfn,
				    unsigned long end_pfn)
{}

static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
			     unsigned long end_pfn)
{}

static void update_pgdat_span(struct pglist_data *pgdat)
{}

void __ref remove_pfn_range_from_zone(struct zone *zone,
				      unsigned long start_pfn,
				      unsigned long nr_pages)
{}

/**
 * __remove_pages() - remove sections of pages
 * @pfn: starting pageframe (must be aligned to start of a section)
 * @nr_pages: number of pages to remove (must be multiple of section size)
 * @altmap: alternative device page map or %NULL if default memmap is used
 *
 * Generic helper function to remove section mappings and sysfs entries
 * for the section of the memory we are removing. Caller needs to make
 * sure that pages are marked reserved and zones are adjust properly by
 * calling offline_pages().
 */
void __remove_pages(unsigned long pfn, unsigned long nr_pages,
		    struct vmem_altmap *altmap)
{}

int set_online_page_callback(online_page_callback_t callback)
{}
EXPORT_SYMBOL_GPL();

int restore_online_page_callback(online_page_callback_t callback)
{}
EXPORT_SYMBOL_GPL();

/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
void __ref generic_online_page(struct page *page, unsigned int order)
{}
EXPORT_SYMBOL_GPL();

static void online_pages_range(unsigned long start_pfn, unsigned long nr_pages)
{}

/* check which state of node_states will be changed when online memory */
static void node_states_check_changes_online(unsigned long nr_pages,
	struct zone *zone, struct memory_notify *arg)
{}

static void node_states_set_node(int node, struct memory_notify *arg)
{}

static void __meminit resize_zone_range(struct zone *zone, unsigned long start_pfn,
		unsigned long nr_pages)
{}

static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned long start_pfn,
                                     unsigned long nr_pages)
{}

#ifdef CONFIG_ZONE_DEVICE
static void section_taint_zone_device(unsigned long pfn)
{}
#else
static inline void section_taint_zone_device(unsigned long pfn)
{
}
#endif

/*
 * Associate the pfn range with the given zone, initializing the memmaps
 * and resizing the pgdat/zone data to span the added pages. After this
 * call, all affected pages are PageOffline().
 *
 * All aligned pageblocks are initialized to the specified migratetype
 * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
 * zone stats (e.g., nr_isolate_pageblock) are touched.
 */
void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
				  unsigned long nr_pages,
				  struct vmem_altmap *altmap, int migratetype)
{}

struct auto_movable_stats {};

static void auto_movable_stats_account_zone(struct auto_movable_stats *stats,
					    struct zone *zone)
{}
struct auto_movable_group_stats {};

static int auto_movable_stats_account_group(struct memory_group *group,
					   void *arg)
{}

static bool auto_movable_can_online_movable(int nid, struct memory_group *group,
					    unsigned long nr_pages)
{}

/*
 * Returns a default kernel memory zone for the given pfn range.
 * If no kernel zone covers this pfn range it will automatically go
 * to the ZONE_NORMAL.
 */
static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn,
		unsigned long nr_pages)
{}

/*
 * Determine to which zone to online memory dynamically based on user
 * configuration and system stats. We care about the following ratio:
 *
 *   MOVABLE : KERNEL
 *
 * Whereby MOVABLE is memory in ZONE_MOVABLE and KERNEL is memory in
 * one of the kernel zones. CMA pages inside one of the kernel zones really
 * behaves like ZONE_MOVABLE, so we treat them accordingly.
 *
 * We don't allow for hotplugged memory in a KERNEL zone to increase the
 * amount of MOVABLE memory we can have, so we end up with:
 *
 *   MOVABLE : KERNEL_EARLY
 *
 * Whereby KERNEL_EARLY is memory in one of the kernel zones, available sinze
 * boot. We base our calculation on KERNEL_EARLY internally, because:
 *
 * a) Hotplugged memory in one of the kernel zones can sometimes still get
 *    hotunplugged, especially when hot(un)plugging individual memory blocks.
 *    There is no coordination across memory devices, therefore "automatic"
 *    hotunplugging, as implemented in hypervisors, could result in zone
 *    imbalances.
 * b) Early/boot memory in one of the kernel zones can usually not get
 *    hotunplugged again (e.g., no firmware interface to unplug, fragmented
 *    with unmovable allocations). While there are corner cases where it might
 *    still work, it is barely relevant in practice.
 *
 * Exceptions are dynamic memory groups, which allow for more MOVABLE
 * memory within the same memory group -- because in that case, there is
 * coordination within the single memory device managed by a single driver.
 *
 * We rely on "present pages" instead of "managed pages", as the latter is
 * highly unreliable and dynamic in virtualized environments, and does not
 * consider boot time allocations. For example, memory ballooning adjusts the
 * managed pages when inflating/deflating the balloon, and balloon compaction
 * can even migrate inflated pages between zones.
 *
 * Using "present pages" is better but some things to keep in mind are:
 *
 * a) Some memblock allocations, such as for the crashkernel area, are
 *    effectively unused by the kernel, yet they account to "present pages".
 *    Fortunately, these allocations are comparatively small in relevant setups
 *    (e.g., fraction of system memory).
 * b) Some hotplugged memory blocks in virtualized environments, esecially
 *    hotplugged by virtio-mem, look like they are completely present, however,
 *    only parts of the memory block are actually currently usable.
 *    "present pages" is an upper limit that can get reached at runtime. As
 *    we base our calculations on KERNEL_EARLY, this is not an issue.
 */
static struct zone *auto_movable_zone_for_pfn(int nid,
					      struct memory_group *group,
					      unsigned long pfn,
					      unsigned long nr_pages)
{}

static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
		unsigned long nr_pages)
{}

struct zone *zone_for_pfn_range(int online_type, int nid,
		struct memory_group *group, unsigned long start_pfn,
		unsigned long nr_pages)
{}

/*
 * This function should only be called by memory_block_{online,offline},
 * and {online,offline}_pages.
 */
void adjust_present_page_count(struct page *page, struct memory_group *group,
			       long nr_pages)
{}

int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
			      struct zone *zone, bool mhp_off_inaccessible)
{}

void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages)
{}

/*
 * Must be called with mem_hotplug_lock in write mode.
 */
int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
		       struct zone *zone, struct memory_group *group)
{}

/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
static pg_data_t __ref *hotadd_init_pgdat(int nid)
{}

/*
 * __try_online_node - online a node if offlined
 * @nid: the node ID
 * @set_node_online: Whether we want to online the node
 * called by cpu_up() to online a node without onlined memory.
 *
 * Returns:
 * 1 -> a new node has been allocated
 * 0 -> the node is already online
 * -ENOMEM -> the node could not be allocated
 */
static int __try_online_node(int nid, bool set_node_online)
{}

/*
 * Users of this function always want to online/register the node
 */
int try_online_node(int nid)
{}

static int check_hotplug_memory_range(u64 start, u64 size)
{}

static int online_memory_block(struct memory_block *mem, void *arg)
{}

#ifndef arch_supports_memmap_on_memory
static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size)
{}
#endif

bool mhp_supports_memmap_on_memory(void)
{}
EXPORT_SYMBOL_GPL();

static void __ref remove_memory_blocks_and_altmaps(u64 start, u64 size)
{}

static int create_altmaps_and_memory_blocks(int nid, struct memory_group *group,
					    u64 start, u64 size, mhp_t mhp_flags)
{}

/*
 * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
 * and online/offline operations (triggered e.g. by sysfs).
 *
 * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG
 */
int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
{}

/* requires device_hotplug_lock, see add_memory_resource() */
int __ref __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags)
{}

int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags)
{}
EXPORT_SYMBOL_GPL();

/*
 * Add special, driver-managed memory to the system as system RAM. Such
 * memory is not exposed via the raw firmware-provided memmap as system
 * RAM, instead, it is detected and added by a driver - during cold boot,
 * after a reboot, and after kexec.
 *
 * Reasons why this memory should not be used for the initial memmap of a
 * kexec kernel or for placing kexec images:
 * - The booting kernel is in charge of determining how this memory will be
 *   used (e.g., use persistent memory as system RAM)
 * - Coordination with a hypervisor is required before this memory
 *   can be used (e.g., inaccessible parts).
 *
 * For this memory, no entries in /sys/firmware/memmap ("raw firmware-provided
 * memory map") are created. Also, the created memory resource is flagged
 * with IORESOURCE_SYSRAM_DRIVER_MANAGED, so in-kernel users can special-case
 * this memory as well (esp., not place kexec images onto it).
 *
 * The resource_name (visible via /proc/iomem) has to have the format
 * "System RAM ($DRIVER)".
 */
int add_memory_driver_managed(int nid, u64 start, u64 size,
			      const char *resource_name, mhp_t mhp_flags)
{}
EXPORT_SYMBOL_GPL();

/*
 * Platforms should define arch_get_mappable_range() that provides
 * maximum possible addressable physical memory range for which the
 * linear mapping could be created. The platform returned address
 * range must adhere to these following semantics.
 *
 * - range.start <= range.end
 * - Range includes both end points [range.start..range.end]
 *
 * There is also a fallback definition provided here, allowing the
 * entire possible physical address range in case any platform does
 * not define arch_get_mappable_range().
 */
struct range __weak arch_get_mappable_range(void)
{}

struct range mhp_get_pluggable_range(bool need_mapping)
{}
EXPORT_SYMBOL_GPL();

bool mhp_range_allowed(u64 start, u64 size, bool need_mapping)
{}

#ifdef CONFIG_MEMORY_HOTREMOVE
/*
 * Scan pfn range [start,end) to find movable/migratable pages (LRU pages,
 * non-lru movable pages and hugepages). Will skip over most unmovable
 * pages (esp., pages that can be skipped when offlining), but bail out on
 * definitely unmovable pages.
 *
 * Returns:
 *	0 in case a movable page is found and movable_pfn was updated.
 *	-ENOENT in case no movable page was found.
 *	-EBUSY in case a definitely unmovable page was found.
 */
static int scan_movable_pages(unsigned long start, unsigned long end,
			      unsigned long *movable_pfn)
{}

static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
{}

static int __init cmdline_parse_movable_node(char *p)
{}
early_param();

/* check which state of node_states will be changed when offline memory */
static void node_states_check_changes_offline(unsigned long nr_pages,
		struct zone *zone, struct memory_notify *arg)
{}

static void node_states_clear_node(int node, struct memory_notify *arg)
{}

static int count_system_ram_pages_cb(unsigned long start_pfn,
				     unsigned long nr_pages, void *data)
{}

/*
 * Must be called with mem_hotplug_lock in write mode.
 */
int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
			struct zone *zone, struct memory_group *group)
{}

static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
{}

static int count_memory_range_altmaps_cb(struct memory_block *mem, void *arg)
{}

static int check_cpu_on_node(int nid)
{}

static int check_no_memblock_for_node_cb(struct memory_block *mem, void *arg)
{}

/**
 * try_offline_node
 * @nid: the node ID
 *
 * Offline a node if all memory sections and cpus of the node are removed.
 *
 * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
 * and online/offline operations before this call.
 */
void try_offline_node(int nid)
{}
EXPORT_SYMBOL();

static int memory_blocks_have_altmaps(u64 start, u64 size)
{}

static int __ref try_remove_memory(u64 start, u64 size)
{}

/**
 * __remove_memory - Remove memory if every memory block is offline
 * @start: physical address of the region to remove
 * @size: size of the region to remove
 *
 * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
 * and online/offline operations before this call, as required by
 * try_offline_node().
 */
void __remove_memory(u64 start, u64 size)
{}

/*
 * Remove memory if every memory block is offline, otherwise return -EBUSY is
 * some memory is not offline
 */
int remove_memory(u64 start, u64 size)
{}
EXPORT_SYMBOL_GPL();

static int try_offline_memory_block(struct memory_block *mem, void *arg)
{}

static int try_reonline_memory_block(struct memory_block *mem, void *arg)
{}

/*
 * Try to offline and remove memory. Might take a long time to finish in case
 * memory is still in use. Primarily useful for memory devices that logically
 * unplugged all memory (so it's no longer in use) and want to offline + remove
 * that memory.
 */
int offline_and_remove_memory(u64 start, u64 size)
{}
EXPORT_SYMBOL_GPL();
#endif /* CONFIG_MEMORY_HOTREMOVE */