linux/mm/khugepaged.c

// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt)

#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/mmu_notifier.h>
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/mm_inline.h>
#include <linux/kthread.h>
#include <linux/khugepaged.h>
#include <linux/freezer.h>
#include <linux/mman.h>
#include <linux/hashtable.h>
#include <linux/userfaultfd_k.h>
#include <linux/page_idle.h>
#include <linux/page_table_check.h>
#include <linux/rcupdate_wait.h>
#include <linux/swapops.h>
#include <linux/shmem_fs.h>
#include <linux/ksm.h>

#include <asm/tlb.h>
#include <asm/pgalloc.h>
#include "internal.h"
#include "mm_slot.h"

enum scan_result {};

#define CREATE_TRACE_POINTS
#include <trace/events/huge_memory.h>

static struct task_struct *khugepaged_thread __read_mostly;
static DEFINE_MUTEX(khugepaged_mutex);

/* default scan 8*512 pte (or vmas) every 30 second */
static unsigned int khugepaged_pages_to_scan __read_mostly;
static unsigned int khugepaged_pages_collapsed;
static unsigned int khugepaged_full_scans;
static unsigned int khugepaged_scan_sleep_millisecs __read_mostly =;
/* during fragmentation poll the hugepage allocator once every minute */
static unsigned int khugepaged_alloc_sleep_millisecs __read_mostly =;
static unsigned long khugepaged_sleep_expire;
static DEFINE_SPINLOCK(khugepaged_mm_lock);
static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
/*
 * default collapse hugepages if there is at least one pte mapped like
 * it would have happened if the vma was large enough during page
 * fault.
 *
 * Note that these are only respected if collapse was initiated by khugepaged.
 */
unsigned int khugepaged_max_ptes_none __read_mostly;
static unsigned int khugepaged_max_ptes_swap __read_mostly;
static unsigned int khugepaged_max_ptes_shared __read_mostly;

#define MM_SLOTS_HASH_BITS
static DEFINE_READ_MOSTLY_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);

static struct kmem_cache *mm_slot_cache __ro_after_init;

struct collapse_control {};

/**
 * struct khugepaged_mm_slot - khugepaged information per mm that is being scanned
 * @slot: hash lookup from mm to mm_slot
 */
struct khugepaged_mm_slot {};

/**
 * struct khugepaged_scan - cursor for scanning
 * @mm_head: the head of the mm list to scan
 * @mm_slot: the current mm_slot we are scanning
 * @address: the next address inside that to be scanned
 *
 * There is only the one khugepaged_scan instance of this cursor structure.
 */
struct khugepaged_scan {};

static struct khugepaged_scan khugepaged_scan =;

#ifdef CONFIG_SYSFS
static ssize_t scan_sleep_millisecs_show(struct kobject *kobj,
					 struct kobj_attribute *attr,
					 char *buf)
{}

static ssize_t scan_sleep_millisecs_store(struct kobject *kobj,
					  struct kobj_attribute *attr,
					  const char *buf, size_t count)
{}
static struct kobj_attribute scan_sleep_millisecs_attr =;

static ssize_t alloc_sleep_millisecs_show(struct kobject *kobj,
					  struct kobj_attribute *attr,
					  char *buf)
{}

static ssize_t alloc_sleep_millisecs_store(struct kobject *kobj,
					   struct kobj_attribute *attr,
					   const char *buf, size_t count)
{}
static struct kobj_attribute alloc_sleep_millisecs_attr =;

static ssize_t pages_to_scan_show(struct kobject *kobj,
				  struct kobj_attribute *attr,
				  char *buf)
{}
static ssize_t pages_to_scan_store(struct kobject *kobj,
				   struct kobj_attribute *attr,
				   const char *buf, size_t count)
{}
static struct kobj_attribute pages_to_scan_attr =;

static ssize_t pages_collapsed_show(struct kobject *kobj,
				    struct kobj_attribute *attr,
				    char *buf)
{}
static struct kobj_attribute pages_collapsed_attr =;

static ssize_t full_scans_show(struct kobject *kobj,
			       struct kobj_attribute *attr,
			       char *buf)
{}
static struct kobj_attribute full_scans_attr =;

static ssize_t defrag_show(struct kobject *kobj,
			   struct kobj_attribute *attr, char *buf)
{}
static ssize_t defrag_store(struct kobject *kobj,
			    struct kobj_attribute *attr,
			    const char *buf, size_t count)
{}
static struct kobj_attribute khugepaged_defrag_attr =;

/*
 * max_ptes_none controls if khugepaged should collapse hugepages over
 * any unmapped ptes in turn potentially increasing the memory
 * footprint of the vmas. When max_ptes_none is 0 khugepaged will not
 * reduce the available free memory in the system as it
 * runs. Increasing max_ptes_none will instead potentially reduce the
 * free memory in the system during the khugepaged scan.
 */
static ssize_t max_ptes_none_show(struct kobject *kobj,
				  struct kobj_attribute *attr,
				  char *buf)
{}
static ssize_t max_ptes_none_store(struct kobject *kobj,
				   struct kobj_attribute *attr,
				   const char *buf, size_t count)
{}
static struct kobj_attribute khugepaged_max_ptes_none_attr =;

static ssize_t max_ptes_swap_show(struct kobject *kobj,
				  struct kobj_attribute *attr,
				  char *buf)
{}

static ssize_t max_ptes_swap_store(struct kobject *kobj,
				   struct kobj_attribute *attr,
				   const char *buf, size_t count)
{}

static struct kobj_attribute khugepaged_max_ptes_swap_attr =;

static ssize_t max_ptes_shared_show(struct kobject *kobj,
				    struct kobj_attribute *attr,
				    char *buf)
{}

static ssize_t max_ptes_shared_store(struct kobject *kobj,
				     struct kobj_attribute *attr,
				     const char *buf, size_t count)
{}

static struct kobj_attribute khugepaged_max_ptes_shared_attr =;

static struct attribute *khugepaged_attr[] =;

struct attribute_group khugepaged_attr_group =;
#endif /* CONFIG_SYSFS */

int hugepage_madvise(struct vm_area_struct *vma,
		     unsigned long *vm_flags, int advice)
{}

int __init khugepaged_init(void)
{}

void __init khugepaged_destroy(void)
{}

static inline int hpage_collapse_test_exit(struct mm_struct *mm)
{}

static inline int hpage_collapse_test_exit_or_disable(struct mm_struct *mm)
{}

static bool hugepage_pmd_enabled(void)
{}

void __khugepaged_enter(struct mm_struct *mm)
{}

void khugepaged_enter_vma(struct vm_area_struct *vma,
			  unsigned long vm_flags)
{}

void __khugepaged_exit(struct mm_struct *mm)
{}

static void release_pte_folio(struct folio *folio)
{}

static void release_pte_pages(pte_t *pte, pte_t *_pte,
		struct list_head *compound_pagelist)
{}

static bool is_refcount_suitable(struct folio *folio)
{}

static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
					unsigned long address,
					pte_t *pte,
					struct collapse_control *cc,
					struct list_head *compound_pagelist)
{}

static void __collapse_huge_page_copy_succeeded(pte_t *pte,
						struct vm_area_struct *vma,
						unsigned long address,
						spinlock_t *ptl,
						struct list_head *compound_pagelist)
{}

static void __collapse_huge_page_copy_failed(pte_t *pte,
					     pmd_t *pmd,
					     pmd_t orig_pmd,
					     struct vm_area_struct *vma,
					     struct list_head *compound_pagelist)
{}

/*
 * __collapse_huge_page_copy - attempts to copy memory contents from raw
 * pages to a hugepage. Cleans up the raw pages if copying succeeds;
 * otherwise restores the original page table and releases isolated raw pages.
 * Returns SCAN_SUCCEED if copying succeeds, otherwise returns SCAN_COPY_MC.
 *
 * @pte: starting of the PTEs to copy from
 * @folio: the new hugepage to copy contents to
 * @pmd: pointer to the new hugepage's PMD
 * @orig_pmd: the original raw pages' PMD
 * @vma: the original raw pages' virtual memory area
 * @address: starting address to copy
 * @ptl: lock on raw pages' PTEs
 * @compound_pagelist: list that stores compound pages
 */
static int __collapse_huge_page_copy(pte_t *pte, struct folio *folio,
		pmd_t *pmd, pmd_t orig_pmd, struct vm_area_struct *vma,
		unsigned long address, spinlock_t *ptl,
		struct list_head *compound_pagelist)
{}

static void khugepaged_alloc_sleep(void)
{}

struct collapse_control khugepaged_collapse_control =;

static bool hpage_collapse_scan_abort(int nid, struct collapse_control *cc)
{}

#define khugepaged_defrag()

/* Defrag for khugepaged will enter direct reclaim/compaction if necessary */
static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void)
{}

#ifdef CONFIG_NUMA
static int hpage_collapse_find_target_node(struct collapse_control *cc)
{}
#else
static int hpage_collapse_find_target_node(struct collapse_control *cc)
{
	return 0;
}
#endif

/*
 * If mmap_lock temporarily dropped, revalidate vma
 * before taking mmap_lock.
 * Returns enum scan_result value.
 */

static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
				   bool expect_anon,
				   struct vm_area_struct **vmap,
				   struct collapse_control *cc)
{}

static int find_pmd_or_thp_or_none(struct mm_struct *mm,
				   unsigned long address,
				   pmd_t **pmd)
{}

static int check_pmd_still_valid(struct mm_struct *mm,
				 unsigned long address,
				 pmd_t *pmd)
{}

/*
 * Bring missing pages in from swap, to complete THP collapse.
 * Only done if hpage_collapse_scan_pmd believes it is worthwhile.
 *
 * Called and returns without pte mapped or spinlocks held.
 * Returns result: if not SCAN_SUCCEED, mmap_lock has been released.
 */
static int __collapse_huge_page_swapin(struct mm_struct *mm,
				       struct vm_area_struct *vma,
				       unsigned long haddr, pmd_t *pmd,
				       int referenced)
{}

static int alloc_charge_folio(struct folio **foliop, struct mm_struct *mm,
			      struct collapse_control *cc)
{}

static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
			      int referenced, int unmapped,
			      struct collapse_control *cc)
{}

static int hpage_collapse_scan_pmd(struct mm_struct *mm,
				   struct vm_area_struct *vma,
				   unsigned long address, bool *mmap_locked,
				   struct collapse_control *cc)
{}

static void collect_mm_slot(struct khugepaged_mm_slot *mm_slot)
{}

#ifdef CONFIG_SHMEM
/* hpage must be locked, and mmap_lock must be held */
static int set_huge_pmd(struct vm_area_struct *vma, unsigned long addr,
			pmd_t *pmdp, struct page *hpage)
{}

/**
 * collapse_pte_mapped_thp - Try to collapse a pte-mapped THP for mm at
 * address haddr.
 *
 * @mm: process address space where collapse happens
 * @addr: THP collapse address
 * @install_pmd: If a huge PMD should be installed
 *
 * This function checks whether all the PTEs in the PMD are pointing to the
 * right THP. If so, retract the page table so the THP can refault in with
 * as pmd-mapped. Possibly install a huge PMD mapping the THP.
 */
int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
			    bool install_pmd)
{}

static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
{}

/**
 * collapse_file - collapse filemap/tmpfs/shmem pages into huge one.
 *
 * @mm: process address space where collapse happens
 * @addr: virtual collapse start address
 * @file: file that collapse on
 * @start: collapse start address
 * @cc: collapse context and scratchpad
 *
 * Basic scheme is simple, details are more complex:
 *  - allocate and lock a new huge page;
 *  - scan page cache, locking old pages
 *    + swap/gup in pages if necessary;
 *  - copy data to new page
 *  - handle shmem holes
 *    + re-validate that holes weren't filled by someone else
 *    + check for userfaultfd
 *  - finalize updates to the page cache;
 *  - if replacing succeeds:
 *    + unlock huge page;
 *    + free old pages;
 *  - if replacing failed;
 *    + unlock old pages
 *    + unlock and free huge page;
 */
static int collapse_file(struct mm_struct *mm, unsigned long addr,
			 struct file *file, pgoff_t start,
			 struct collapse_control *cc)
{}

static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
				    struct file *file, pgoff_t start,
				    struct collapse_control *cc)
{}
#else
static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
				    struct file *file, pgoff_t start,
				    struct collapse_control *cc)
{
	BUILD_BUG();
}
#endif

static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
					    struct collapse_control *cc)
	__releases(&khugepaged_mm_lock)
	__acquires(&khugepaged_mm_lock)
{}

static int khugepaged_has_work(void)
{}

static int khugepaged_wait_event(void)
{}

static void khugepaged_do_scan(struct collapse_control *cc)
{}

static bool khugepaged_should_wakeup(void)
{}

static void khugepaged_wait_work(void)
{}

static int khugepaged(void *none)
{}

static void set_recommended_min_free_kbytes(void)
{}

int start_stop_khugepaged(void)
{}

void khugepaged_min_free_kbytes_update(void)
{}

bool current_is_khugepaged(void)
{}

static int madvise_collapse_errno(enum scan_result r)
{}

int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
		     unsigned long start, unsigned long end)
{}