// SPDX-License-Identifier: GPL-2.0 /* * Memory Migration functionality - linux/mm/migrate.c * * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter * * Page migration was first developed in the context of the memory hotplug * project. The main authors of the migration code are: * * IWAMOTO Toshihiro <[email protected]> * Hirokazu Takahashi <[email protected]> * Dave Hansen <[email protected]> * Christoph Lameter */ #include <linux/migrate.h> #include <linux/export.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/pagemap.h> #include <linux/buffer_head.h> #include <linux/mm_inline.h> #include <linux/nsproxy.h> #include <linux/ksm.h> #include <linux/rmap.h> #include <linux/topology.h> #include <linux/cpu.h> #include <linux/cpuset.h> #include <linux/writeback.h> #include <linux/mempolicy.h> #include <linux/vmalloc.h> #include <linux/security.h> #include <linux/backing-dev.h> #include <linux/compaction.h> #include <linux/syscalls.h> #include <linux/compat.h> #include <linux/hugetlb.h> #include <linux/hugetlb_cgroup.h> #include <linux/gfp.h> #include <linux/pfn_t.h> #include <linux/memremap.h> #include <linux/userfaultfd_k.h> #include <linux/balloon_compaction.h> #include <linux/page_idle.h> #include <linux/page_owner.h> #include <linux/sched/mm.h> #include <linux/ptrace.h> #include <linux/oom.h> #include <linux/memory.h> #include <linux/random.h> #include <linux/sched/sysctl.h> #include <linux/memory-tiers.h> #include <asm/tlbflush.h> #include <trace/events/migrate.h> #include "internal.h" bool isolate_movable_page(struct page *page, isolate_mode_t mode) { … } static void putback_movable_folio(struct folio *folio) { … } /* * Put previously isolated pages back onto the appropriate lists * from where they were once taken off for compaction/migration. * * This function shall be used whenever the isolated pageset has been * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range() * and isolate_hugetlb(). */ void putback_movable_pages(struct list_head *l) { … } /* * Restore a potential migration pte to a working pte entry */ static bool remove_migration_pte(struct folio *folio, struct vm_area_struct *vma, unsigned long addr, void *old) { … } /* * Get rid of all migration entries and replace them by * references to the indicated page. */ void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked) { … } /* * Something used the pte of a page under migration. We need to * get to the page and wait until migration is finished. * When we return from this function the fault will be retried. */ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { … } #ifdef CONFIG_HUGETLB_PAGE /* * The vma read lock must be held upon entry. Holding that lock prevents either * the pte or the ptl from being freed. * * This function will release the vma lock before returning. */ void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { … } #endif #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) { … } #endif static int folio_expected_refs(struct address_space *mapping, struct folio *folio) { … } /* * Replace the folio in the mapping. * * The number of remaining references must be: * 1 for anonymous folios without a mapping * 2 for folios with a mapping * 3 for folios with a mapping and PagePrivate/PagePrivate2 set. */ static int __folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int expected_count) { … } int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count) { … } EXPORT_SYMBOL(…); /* * The expected number of remaining references is the same as that * of folio_migrate_mapping(). */ int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src) { … } /* * Copy the flags and some other ancillary information */ void folio_migrate_flags(struct folio *newfolio, struct folio *folio) { … } EXPORT_SYMBOL(…); /************************************************************ * Migration functions ***********************************************************/ static int __migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, void *src_private, enum migrate_mode mode) { … } /** * migrate_folio() - Simple folio migration. * @mapping: The address_space containing the folio. * @dst: The folio to migrate the data to. * @src: The folio containing the current data. * @mode: How to migrate the page. * * Common logic to directly migrate a single LRU folio suitable for * folios that do not use PagePrivate/PagePrivate2. * * Folios are locked upon entry and exit. */ int migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode) { … } EXPORT_SYMBOL(…); #ifdef CONFIG_BUFFER_HEAD /* Returns true if all buffers are successfully locked */ static bool buffer_migrate_lock_buffers(struct buffer_head *head, enum migrate_mode mode) { … } static int __buffer_migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode, bool check_refs) { … } /** * buffer_migrate_folio() - Migration function for folios with buffers. * @mapping: The address space containing @src. * @dst: The folio to migrate to. * @src: The folio to migrate from. * @mode: How to migrate the folio. * * This function can only be used if the underlying filesystem guarantees * that no other references to @src exist. For example attached buffer * heads are accessed only under the folio lock. If your filesystem cannot * provide this guarantee, buffer_migrate_folio_norefs() may be more * appropriate. * * Return: 0 on success or a negative errno on failure. */ int buffer_migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode) { … } EXPORT_SYMBOL(…); /** * buffer_migrate_folio_norefs() - Migration function for folios with buffers. * @mapping: The address space containing @src. * @dst: The folio to migrate to. * @src: The folio to migrate from. * @mode: How to migrate the folio. * * Like buffer_migrate_folio() except that this variant is more careful * and checks that there are also no buffer head references. This function * is the right one for mappings where buffer heads are directly looked * up and referenced (such as block device mappings). * * Return: 0 on success or a negative errno on failure. */ int buffer_migrate_folio_norefs(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode) { … } EXPORT_SYMBOL_GPL(…); #endif /* CONFIG_BUFFER_HEAD */ int filemap_migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode) { … } EXPORT_SYMBOL_GPL(…); /* * Writeback a folio to clean the dirty state */ static int writeout(struct address_space *mapping, struct folio *folio) { … } /* * Default handling if a filesystem does not provide a migration function. */ static int fallback_migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode) { … } /* * Move a page to a newly allocated page * The page is locked and all ptes have been successfully removed. * * The new page will have replaced the old page if this function * is successful. * * Return value: * < 0 - error code * MIGRATEPAGE_SUCCESS - success */ static int move_to_new_folio(struct folio *dst, struct folio *src, enum migrate_mode mode) { … } /* * To record some information during migration, we use unused private * field of struct folio of the newly allocated destination folio. * This is safe because nobody is using it except us. */ enum { … }; static void __migrate_folio_record(struct folio *dst, int old_page_state, struct anon_vma *anon_vma) { … } static void __migrate_folio_extract(struct folio *dst, int *old_page_state, struct anon_vma **anon_vmap) { … } /* Restore the source folio to the original state upon failure */ static void migrate_folio_undo_src(struct folio *src, int page_was_mapped, struct anon_vma *anon_vma, bool locked, struct list_head *ret) { … } /* Restore the destination folio to the original state upon failure */ static void migrate_folio_undo_dst(struct folio *dst, bool locked, free_folio_t put_new_folio, unsigned long private) { … } /* Cleanup src folio upon migration success */ static void migrate_folio_done(struct folio *src, enum migrate_reason reason) { … } /* Obtain the lock on page, remove all ptes. */ static int migrate_folio_unmap(new_folio_t get_new_folio, free_folio_t put_new_folio, unsigned long private, struct folio *src, struct folio **dstp, enum migrate_mode mode, enum migrate_reason reason, struct list_head *ret) { … } /* Migrate the folio to the newly allocated folio in dst. */ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private, struct folio *src, struct folio *dst, enum migrate_mode mode, enum migrate_reason reason, struct list_head *ret) { … } /* * Counterpart of unmap_and_move_page() for hugepage migration. * * This function doesn't wait the completion of hugepage I/O * because there is no race between I/O and migration for hugepage. * Note that currently hugepage I/O occurs only in direct I/O * where no lock is held and PG_writeback is irrelevant, * and writeback status of all subpages are counted in the reference * count of the head page (i.e. if all subpages of a 2MB hugepage are * under direct I/O, the reference of the head page is 512 and a bit more.) * This means that when we try to migrate hugepage whose subpages are * doing direct I/O, some references remain after try_to_unmap() and * hugepage migration fails without data corruption. * * There is also no race when direct I/O is issued on the page under migration, * because then pte is replaced with migration swap entry and direct I/O code * will wait in the page fault for migration to complete. */ static int unmap_and_move_huge_page(new_folio_t get_new_folio, free_folio_t put_new_folio, unsigned long private, struct folio *src, int force, enum migrate_mode mode, int reason, struct list_head *ret) { … } static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) { … } #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define NR_MAX_BATCHED_MIGRATION … #else #define NR_MAX_BATCHED_MIGRATION … #endif #define NR_MAX_MIGRATE_PAGES_RETRY … #define NR_MAX_MIGRATE_ASYNC_RETRY … #define NR_MAX_MIGRATE_SYNC_RETRY … struct migrate_pages_stats { … }; /* * Returns the number of hugetlb folios that were not migrated, or an error code * after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no hugetlb folios are movable * any more because the list has become empty or no retryable hugetlb folios * exist any more. It is caller's responsibility to call putback_movable_pages() * only if ret != 0. */ static int migrate_hugetlbs(struct list_head *from, new_folio_t get_new_folio, free_folio_t put_new_folio, unsigned long private, enum migrate_mode mode, int reason, struct migrate_pages_stats *stats, struct list_head *ret_folios) { … } /* * migrate_pages_batch() first unmaps folios in the from list as many as * possible, then move the unmapped folios. * * We only batch migration if mode == MIGRATE_ASYNC to avoid to wait a * lock or bit when we have locked more than one folio. Which may cause * deadlock (e.g., for loop device). So, if mode != MIGRATE_ASYNC, the * length of the from list must be <= 1. */ static int migrate_pages_batch(struct list_head *from, new_folio_t get_new_folio, free_folio_t put_new_folio, unsigned long private, enum migrate_mode mode, int reason, struct list_head *ret_folios, struct list_head *split_folios, struct migrate_pages_stats *stats, int nr_pass) { … } static int migrate_pages_sync(struct list_head *from, new_folio_t get_new_folio, free_folio_t put_new_folio, unsigned long private, enum migrate_mode mode, int reason, struct list_head *ret_folios, struct list_head *split_folios, struct migrate_pages_stats *stats) { … } /* * migrate_pages - migrate the folios specified in a list, to the free folios * supplied as the target for the page migration * * @from: The list of folios to be migrated. * @get_new_folio: The function used to allocate free folios to be used * as the target of the folio migration. * @put_new_folio: The function used to free target folios if migration * fails, or NULL if no special handling is necessary. * @private: Private data to be passed on to get_new_folio() * @mode: The migration mode that specifies the constraints for * folio migration, if any. * @reason: The reason for folio migration. * @ret_succeeded: Set to the number of folios migrated successfully if * the caller passes a non-NULL pointer. * * The function returns after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no folios * are movable any more because the list has become empty or no retryable folios * exist any more. It is caller's responsibility to call putback_movable_pages() * only if ret != 0. * * Returns the number of {normal folio, large folio, hugetlb} that were not * migrated, or an error code. The number of large folio splits will be * considered as the number of non-migrated large folio, no matter how many * split folios of the large folio are migrated successfully. */ int migrate_pages(struct list_head *from, new_folio_t get_new_folio, free_folio_t put_new_folio, unsigned long private, enum migrate_mode mode, int reason, unsigned int *ret_succeeded) { … } struct folio *alloc_migration_target(struct folio *src, unsigned long private) { … } #ifdef CONFIG_NUMA static int store_status(int __user *status, int start, int value, int nr) { … } static int do_move_pages_to_node(struct list_head *pagelist, int node) { … } /* * Resolves the given address to a struct page, isolates it from the LRU and * puts it to the given pagelist. * Returns: * errno - if the page cannot be found/isolated * 0 - when it doesn't have to be migrated because it is already on the * target node * 1 - when it has been queued */ static int add_page_for_migration(struct mm_struct *mm, const void __user *p, int node, struct list_head *pagelist, bool migrate_all) { … } static int move_pages_and_store_status(int node, struct list_head *pagelist, int __user *status, int start, int i, unsigned long nr_pages) { … } /* * Migrate an array of page address onto an array of nodes and fill * the corresponding array of status. */ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, unsigned long nr_pages, const void __user * __user *pages, const int __user *nodes, int __user *status, int flags) { … } /* * Determine the nodes of an array of pages and store it in an array of status. */ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, const void __user **pages, int *status) { … } static int get_compat_pages_array(const void __user *chunk_pages[], const void __user * __user *pages, unsigned long chunk_nr) { … } /* * Determine the nodes of a user array of pages and store it in * a user array of status. */ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, const void __user * __user *pages, int __user *status) { … } static struct mm_struct *find_mm_struct(pid_t pid, nodemask_t *mem_nodes) { … } /* * Move a list of pages in the address space of the currently executing * process. */ static int kernel_move_pages(pid_t pid, unsigned long nr_pages, const void __user * __user *pages, const int __user *nodes, int __user *status, int flags) { … } SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, const void __user * __user *, pages, const int __user *, nodes, int __user *, status, int, flags) { … } #ifdef CONFIG_NUMA_BALANCING /* * Returns true if this is a safe migration target node for misplaced NUMA * pages. Currently it only checks the watermarks which is crude. */ static bool migrate_balanced_pgdat(struct pglist_data *pgdat, unsigned long nr_migrate_pages) { … } static struct folio *alloc_misplaced_dst_folio(struct folio *src, unsigned long data) { … } /* * Prepare for calling migrate_misplaced_folio() by isolating the folio if * permitted. Must be called with the PTL still held. */ int migrate_misplaced_folio_prepare(struct folio *folio, struct vm_area_struct *vma, int node) { … } /* * Attempt to migrate a misplaced folio to the specified destination * node. Caller is expected to have isolated the folio by calling * migrate_misplaced_folio_prepare(), which will result in an * elevated reference count on the folio. This function will un-isolate the * folio, dereferencing the folio before returning. */ int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node) { … } #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_NUMA */