linux/arch/x86/mm/pat/set_memory.c

// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright 2002 Andi Kleen, SuSE Labs.
 * Thanks to Ben LaHaise for precious feedback.
 */
#include <linux/highmem.h>
#include <linux/memblock.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/debugfs.h>
#include <linux/pfn.h>
#include <linux/percpu.h>
#include <linux/gfp.h>
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/libnvdimm.h>
#include <linux/vmstat.h>
#include <linux/kernel.h>
#include <linux/cc_platform.h>
#include <linux/set_memory.h>
#include <linux/memregion.h>

#include <asm/e820/api.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <linux/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/proto.h>
#include <asm/memtype.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>

#include "../mm_internal.h"

/*
 * The current flushing context - we pass it instead of 5 arguments:
 */
struct cpa_data {};

enum cpa_warn {};

static const int cpa_warn_level =;

/*
 * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
 * using cpa_lock. So that we don't allow any other cpu, with stale large tlb
 * entries change the page attribute in parallel to some other cpu
 * splitting a large page entry along with changing the attribute.
 */
static DEFINE_SPINLOCK(cpa_lock);

#define CPA_FLUSHTLB
#define CPA_ARRAY
#define CPA_PAGES_ARRAY
#define CPA_NO_CHECK_ALIAS

static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm)
{}

#ifdef CONFIG_PROC_FS
static unsigned long direct_pages_count[PG_LEVEL_NUM];

void update_page_count(int level, unsigned long pages)
{}

static void split_page_count(int level)
{}

void arch_report_meminfo(struct seq_file *m)
{}
#else
static inline void split_page_count(int level) { }
#endif

#ifdef CONFIG_X86_CPA_STATISTICS

static unsigned long cpa_1g_checked;
static unsigned long cpa_1g_sameprot;
static unsigned long cpa_1g_preserved;
static unsigned long cpa_2m_checked;
static unsigned long cpa_2m_sameprot;
static unsigned long cpa_2m_preserved;
static unsigned long cpa_4k_install;

static inline void cpa_inc_1g_checked(void)
{}

static inline void cpa_inc_2m_checked(void)
{}

static inline void cpa_inc_4k_install(void)
{}

static inline void cpa_inc_lp_sameprot(int level)
{}

static inline void cpa_inc_lp_preserved(int level)
{}

static int cpastats_show(struct seq_file *m, void *p)
{}

static int cpastats_open(struct inode *inode, struct file *file)
{}

static const struct file_operations cpastats_fops =;

static int __init cpa_stats_init(void)
{}
late_initcall(cpa_stats_init);
#else
static inline void cpa_inc_1g_checked(void) { }
static inline void cpa_inc_2m_checked(void) { }
static inline void cpa_inc_4k_install(void) { }
static inline void cpa_inc_lp_sameprot(int level) { }
static inline void cpa_inc_lp_preserved(int level) { }
#endif


static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{}

static inline int
within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
{}

#ifdef CONFIG_X86_64

/*
 * The kernel image is mapped into two places in the virtual address space
 * (addresses without KASLR, of course):
 *
 * 1. The kernel direct map (0xffff880000000000)
 * 2. The "high kernel map" (0xffffffff81000000)
 *
 * We actually execute out of #2. If we get the address of a kernel symbol, it
 * points to #2, but almost all physical-to-virtual translations point to #1.
 *
 * This is so that we can have both a directmap of all physical memory *and*
 * take full advantage of the limited (s32) immediate addressing range (2G)
 * of x86_64.
 *
 * See Documentation/arch/x86/x86_64/mm.rst for more detail.
 */

static inline unsigned long highmap_start_pfn(void)
{}

static inline unsigned long highmap_end_pfn(void)
{}

static bool __cpa_pfn_in_highmap(unsigned long pfn)
{}

#else

static bool __cpa_pfn_in_highmap(unsigned long pfn)
{
	/* There is no highmap on 32-bit */
	return false;
}

#endif

/*
 * See set_mce_nospec().
 *
 * Machine check recovery code needs to change cache mode of poisoned pages to
 * UC to avoid speculative access logging another error. But passing the
 * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a
 * speculative access. So we cheat and flip the top bit of the address. This
 * works fine for the code that updates the page tables. But at the end of the
 * process we need to flush the TLB and cache and the non-canonical address
 * causes a #GP fault when used by the INVLPG and CLFLUSH instructions.
 *
 * But in the common case we already have a canonical address. This code
 * will fix the top bit if needed and is a no-op otherwise.
 */
static inline unsigned long fix_addr(unsigned long addr)
{}

static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx)
{}

/*
 * Flushing functions
 */

static void clflush_cache_range_opt(void *vaddr, unsigned int size)
{}

/**
 * clflush_cache_range - flush a cache range with clflush
 * @vaddr:	virtual start address
 * @size:	number of bytes to flush
 *
 * CLFLUSHOPT is an unordered instruction which needs fencing with MFENCE or
 * SFENCE to avoid ordering issues.
 */
void clflush_cache_range(void *vaddr, unsigned int size)
{}
EXPORT_SYMBOL_GPL();

#ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_invalidate_pmem(void *addr, size_t size)
{}
EXPORT_SYMBOL_GPL();
#endif

#ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
bool cpu_cache_has_invalidate_memregion(void)
{}
EXPORT_SYMBOL_NS_GPL();

int cpu_cache_invalidate_memregion(int res_desc)
{}
EXPORT_SYMBOL_NS_GPL();
#endif

static void __cpa_flush_all(void *arg)
{}

static void cpa_flush_all(unsigned long cache)
{}

static void __cpa_flush_tlb(void *data)
{}

static void cpa_flush(struct cpa_data *data, int cache)
{}

static bool overlaps(unsigned long r1_start, unsigned long r1_end,
		     unsigned long r2_start, unsigned long r2_end)
{}

#ifdef CONFIG_PCI_BIOS
/*
 * The BIOS area between 640k and 1Mb needs to be executable for PCI BIOS
 * based config access (CONFIG_PCI_GOBIOS) support.
 */
#define BIOS_PFN
#define BIOS_PFN_END

static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
{
	if (pcibios_enabled && overlaps(spfn, epfn, BIOS_PFN, BIOS_PFN_END))
		return _PAGE_NX;
	return 0;
}
#else
static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
{}
#endif

/*
 * The .rodata section needs to be read-only. Using the pfn catches all
 * aliases.  This also includes __ro_after_init, so do not enforce until
 * kernel_set_to_readonly is true.
 */
static pgprotval_t protect_rodata(unsigned long spfn, unsigned long epfn)
{}

/*
 * Protect kernel text against becoming non executable by forbidding
 * _PAGE_NX.  This protects only the high kernel mapping (_text -> _etext)
 * out of which the kernel actually executes.  Do not protect the low
 * mapping.
 *
 * This does not cover __inittext since that is gone after boot.
 */
static pgprotval_t protect_kernel_text(unsigned long start, unsigned long end)
{}

#if defined(CONFIG_X86_64)
/*
 * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
 * kernel text mappings for the large page aligned text, rodata sections
 * will be always read-only. For the kernel identity mappings covering the
 * holes caused by this alignment can be anything that user asks.
 *
 * This will preserve the large page mappings for kernel text/data at no
 * extra cost.
 */
static pgprotval_t protect_kernel_text_ro(unsigned long start,
					  unsigned long end)
{}
#else
static pgprotval_t protect_kernel_text_ro(unsigned long start,
					  unsigned long end)
{
	return 0;
}
#endif

static inline bool conflicts(pgprot_t prot, pgprotval_t val)
{}

static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val,
				  unsigned long start, unsigned long end,
				  unsigned long pfn, const char *txt)
{}

/*
 * Certain areas of memory on x86 require very specific protection flags,
 * for example the BIOS area or kernel text. Callers don't always get this
 * right (again, ioremap() on BIOS memory is not uncommon) so this function
 * checks and fixes these known static required protection bits.
 */
static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
					  unsigned long pfn, unsigned long npg,
					  unsigned long lpsize, int warnlvl)
{}

/*
 * Validate strict W^X semantics.
 */
static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start,
				  unsigned long pfn, unsigned long npg,
				  bool nx, bool rw)
{}

/*
 * Lookup the page table entry for a virtual address in a specific pgd.
 * Return a pointer to the entry (or NULL if the entry does not exist),
 * the level of the entry, and the effective NX and RW bits of all
 * page table levels.
 */
pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
				  unsigned int *level, bool *nx, bool *rw)
{}

/*
 * Lookup the page table entry for a virtual address in a specific pgd.
 * Return a pointer to the entry and the level of the mapping.
 */
pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
			     unsigned int *level)
{}

/*
 * Lookup the page table entry for a virtual address. Return a pointer
 * to the entry and the level of the mapping.
 *
 * Note: the function returns p4d, pud or pmd either when the entry is marked
 * large or when the present bit is not set. Otherwise it returns NULL.
 */
pte_t *lookup_address(unsigned long address, unsigned int *level)
{}
EXPORT_SYMBOL_GPL();

static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
				  unsigned int *level, bool *nx, bool *rw)
{}

/*
 * Lookup the PMD entry for a virtual address. Return a pointer to the entry
 * or NULL if not present.
 */
pmd_t *lookup_pmd_address(unsigned long address)
{}

/*
 * This is necessary because __pa() does not work on some
 * kinds of memory, like vmalloc() or the alloc_remap()
 * areas on 32-bit NUMA systems.  The percpu areas can
 * end up in this kind of memory, for instance.
 *
 * Note that as long as the PTEs are well-formed with correct PFNs, this
 * works without checking the PRESENT bit in the leaf PTE.  This is unlike
 * the similar vmalloc_to_page() and derivatives.  Callers may depend on
 * this behavior.
 *
 * This could be optimized, but it is only used in paths that are not perf
 * sensitive, and keeping it unoptimized should increase the testing coverage
 * for the more obscure platforms.
 */
phys_addr_t slow_virt_to_phys(void *__virt_addr)
{}
EXPORT_SYMBOL_GPL();

/*
 * Set the new pmd in all the pgds we know about:
 */
static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
{}

static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot)
{}

static int __should_split_large_page(pte_t *kpte, unsigned long address,
				     struct cpa_data *cpa)
{}

static int should_split_large_page(pte_t *kpte, unsigned long address,
				   struct cpa_data *cpa)
{}

static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn,
			  pgprot_t ref_prot, unsigned long address,
			  unsigned long size)
{}

static int
__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
		   struct page *base)
{}

static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
			    unsigned long address)
{}

static bool try_to_free_pte_page(pte_t *pte)
{}

static bool try_to_free_pmd_page(pmd_t *pmd)
{}

static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
{}

static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
			      unsigned long start, unsigned long end)
{}

static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
{}

static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
{}

static int alloc_pte_page(pmd_t *pmd)
{}

static int alloc_pmd_page(pud_t *pud)
{}

static void populate_pte(struct cpa_data *cpa,
			 unsigned long start, unsigned long end,
			 unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
{}

static long populate_pmd(struct cpa_data *cpa,
			 unsigned long start, unsigned long end,
			 unsigned num_pages, pud_t *pud, pgprot_t pgprot)
{}

static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d,
			pgprot_t pgprot)
{}

/*
 * Restrictions for kernel page table do not necessarily apply when mapping in
 * an alternate PGD.
 */
static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
{}

static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
			       int primary)
{}

static int __change_page_attr(struct cpa_data *cpa, int primary)
{}

static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary);

/*
 * Check the directmap and "high kernel map" 'aliases'.
 */
static int cpa_process_alias(struct cpa_data *cpa)
{}

static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary)
{}

static int change_page_attr_set_clr(unsigned long *addr, int numpages,
				    pgprot_t mask_set, pgprot_t mask_clr,
				    int force_split, int in_flag,
				    struct page **pages)
{}

static inline int change_page_attr_set(unsigned long *addr, int numpages,
				       pgprot_t mask, int array)
{}

static inline int change_page_attr_clear(unsigned long *addr, int numpages,
					 pgprot_t mask, int array)
{}

static inline int cpa_set_pages_array(struct page **pages, int numpages,
				       pgprot_t mask)
{}

static inline int cpa_clear_pages_array(struct page **pages, int numpages,
					 pgprot_t mask)
{}

/*
 * __set_memory_prot is an internal helper for callers that have been passed
 * a pgprot_t value from upper layers and a reservation has already been taken.
 * If you want to set the pgprot to a specific page protocol, use the
 * set_memory_xx() functions.
 */
int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot)
{}

int _set_memory_uc(unsigned long addr, int numpages)
{}

int set_memory_uc(unsigned long addr, int numpages)
{}
EXPORT_SYMBOL();

int _set_memory_wc(unsigned long addr, int numpages)
{}

int set_memory_wc(unsigned long addr, int numpages)
{}
EXPORT_SYMBOL();

int _set_memory_wt(unsigned long addr, int numpages)
{}

int _set_memory_wb(unsigned long addr, int numpages)
{}

int set_memory_wb(unsigned long addr, int numpages)
{}
EXPORT_SYMBOL();

/* Prevent speculative access to a page by marking it not-present */
#ifdef CONFIG_X86_64
int set_mce_nospec(unsigned long pfn)
{}

/* Restore full speculative operation to the pfn. */
int clear_mce_nospec(unsigned long pfn)
{}
EXPORT_SYMBOL_GPL();
#endif /* CONFIG_X86_64 */

int set_memory_x(unsigned long addr, int numpages)
{}

int set_memory_nx(unsigned long addr, int numpages)
{}

int set_memory_ro(unsigned long addr, int numpages)
{}

int set_memory_rox(unsigned long addr, int numpages)
{}

int set_memory_rw(unsigned long addr, int numpages)
{}

int set_memory_np(unsigned long addr, int numpages)
{}

int set_memory_np_noalias(unsigned long addr, int numpages)
{}

int set_memory_p(unsigned long addr, int numpages)
{}

int set_memory_4k(unsigned long addr, int numpages)
{}

int set_memory_nonglobal(unsigned long addr, int numpages)
{}

int set_memory_global(unsigned long addr, int numpages)
{}

/*
 * __set_memory_enc_pgtable() is used for the hypervisors that get
 * informed about "encryption" status via page tables.
 */
static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
{}

/*
 * The lock serializes conversions between private and shared memory.
 *
 * It is taken for read on conversion. A write lock guarantees that no
 * concurrent conversions are in progress.
 */
static DECLARE_RWSEM(mem_enc_lock);

/*
 * Stop new private<->shared conversions.
 *
 * Taking the exclusive mem_enc_lock waits for in-flight conversions to complete.
 * The lock is not released to prevent new conversions from being started.
 */
bool set_memory_enc_stop_conversion(void)
{}

static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
{}

int set_memory_encrypted(unsigned long addr, int numpages)
{}
EXPORT_SYMBOL_GPL();

int set_memory_decrypted(unsigned long addr, int numpages)
{}
EXPORT_SYMBOL_GPL();

int set_pages_uc(struct page *page, int numpages)
{}
EXPORT_SYMBOL();

static int _set_pages_array(struct page **pages, int numpages,
		enum page_cache_mode new_type)
{}

int set_pages_array_uc(struct page **pages, int numpages)
{}
EXPORT_SYMBOL();

int set_pages_array_wc(struct page **pages, int numpages)
{}
EXPORT_SYMBOL();

int set_pages_wb(struct page *page, int numpages)
{}
EXPORT_SYMBOL();

int set_pages_array_wb(struct page **pages, int numpages)
{}
EXPORT_SYMBOL();

int set_pages_ro(struct page *page, int numpages)
{}

int set_pages_rw(struct page *page, int numpages)
{}

static int __set_pages_p(struct page *page, int numpages)
{}

static int __set_pages_np(struct page *page, int numpages)
{}

int set_direct_map_invalid_noflush(struct page *page)
{}

int set_direct_map_default_noflush(struct page *page)
{}

#ifdef CONFIG_DEBUG_PAGEALLOC
void __kernel_map_pages(struct page *page, int numpages, int enable)
{}
#endif /* CONFIG_DEBUG_PAGEALLOC */

bool kernel_page_present(struct page *page)
{}

int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
				   unsigned numpages, unsigned long page_flags)
{}

/*
 * __flush_tlb_all() flushes mappings only on current CPU and hence this
 * function shouldn't be used in an SMP environment. Presently, it's used only
 * during boot (way before smp_init()) by EFI subsystem and hence is ok.
 */
int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
				     unsigned long numpages)
{}

/*
 * The testcases use internal knowledge of the implementation that shouldn't
 * be exposed to the rest of the kernel. Include these directly here.
 */
#ifdef CONFIG_CPA_DEBUG
#include "cpa-test.c"
#endif