// SPDX-License-Identifier: GPL-2.0-only /* * Page Attribute Table (PAT) support: handle memory caching attributes in page tables. * * Authors: Venkatesh Pallipadi <[email protected]> * Suresh B Siddha <[email protected]> * * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. * * Basic principles: * * PAT is a CPU feature supported by all modern x86 CPUs, to allow the firmware and * the kernel to set one of a handful of 'caching type' attributes for physical * memory ranges: uncached, write-combining, write-through, write-protected, * and the most commonly used and default attribute: write-back caching. * * PAT support supersedes and augments MTRR support in a compatible fashion: MTRR is * a hardware interface to enumerate a limited number of physical memory ranges * and set their caching attributes explicitly, programmed into the CPU via MSRs. * Even modern CPUs have MTRRs enabled - but these are typically not touched * by the kernel or by user-space (such as the X server), we rely on PAT for any * additional cache attribute logic. * * PAT doesn't work via explicit memory ranges, but uses page table entries to add * cache attribute information to the mapped memory range: there's 3 bits used, * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT), with the 8 possible values mapped by the * CPU to actual cache attributes via an MSR loaded into the CPU (MSR_IA32_CR_PAT). * * ( There's a metric ton of finer details, such as compatibility with CPU quirks * that only support 4 types of PAT entries, and interaction with MTRRs, see * below for details. ) */ #include <linux/seq_file.h> #include <linux/memblock.h> #include <linux/debugfs.h> #include <linux/ioport.h> #include <linux/kernel.h> #include <linux/pfn_t.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/highmem.h> #include <linux/fs.h> #include <linux/rbtree.h> #include <asm/cacheflush.h> #include <asm/cacheinfo.h> #include <asm/processor.h> #include <asm/tlbflush.h> #include <asm/x86_init.h> #include <asm/fcntl.h> #include <asm/e820/api.h> #include <asm/mtrr.h> #include <asm/page.h> #include <asm/msr.h> #include <asm/memtype.h> #include <asm/io.h> #include "memtype.h" #include "../mm_internal.h" #undef pr_fmt #define pr_fmt(fmt) … static bool __read_mostly pat_disabled = !IS_ENABLED(…); static u64 __ro_after_init pat_msr_val; /* * PAT support is enabled by default, but can be disabled for * various user-requested or hardware-forced reasons: */ static void __init pat_disable(const char *msg_reason) { … } static int __init nopat(char *str) { … } early_param(…); bool pat_enabled(void) { … } EXPORT_SYMBOL_GPL(…); int pat_debug_enable; static int __init pat_debug_setup(char *str) { … } __setup(…); #ifdef CONFIG_X86_PAT /* * X86 PAT uses page flags arch_1 and uncached together to keep track of * memory type of pages that have backing page struct. * * X86 PAT supports 4 different memory types: * - _PAGE_CACHE_MODE_WB * - _PAGE_CACHE_MODE_WC * - _PAGE_CACHE_MODE_UC_MINUS * - _PAGE_CACHE_MODE_WT * * _PAGE_CACHE_MODE_WB is the default type. */ #define _PGMT_WB … #define _PGMT_WC … #define _PGMT_UC_MINUS … #define _PGMT_WT … #define _PGMT_MASK … #define _PGMT_CLEAR_MASK … static inline enum page_cache_mode get_page_memtype(struct page *pg) { … } static inline void set_page_memtype(struct page *pg, enum page_cache_mode memtype) { … } #else static inline enum page_cache_mode get_page_memtype(struct page *pg) { return -1; } static inline void set_page_memtype(struct page *pg, enum page_cache_mode memtype) { } #endif enum { … }; #define CM … static enum page_cache_mode __init pat_get_cache_mode(unsigned int pat_val, char *msg) { … } #undef CM /* * Update the cache mode to pgprot translation tables according to PAT * configuration. * Using lower indices is preferred, so we start with highest index. */ static void __init init_cache_modes(u64 pat) { … } void pat_cpu_init(void) { … } /** * pat_bp_init - Initialize the PAT MSR value and PAT table * * This function initializes PAT MSR value and PAT table with an OS-defined * value to enable additional cache attributes, WC, WT and WP. * * This function prepares the calls of pat_cpu_init() via cache_cpu_init() * on all CPUs. */ void __init pat_bp_init(void) { … } static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ /* * Does intersection of PAT memory type and MTRR memory type and returns * the resulting memory type as PAT understands it. * (Type in pat and mtrr will not have same value) * The intersection is based on "Effective Memory Type" tables in IA-32 * SDM vol 3a */ static unsigned long pat_x_mtrr_type(u64 start, u64 end, enum page_cache_mode req_type) { … } struct pagerange_state { … }; static int pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg) { … } static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) { … } /* * For RAM pages, we use page flags to mark the pages with appropriate type. * The page flags are limited to four types, WB (default), WC, WT and UC-. * WP request fails with -EINVAL, and UC gets redirected to UC-. Setting * a new memory type is only allowed for a page mapped with the default WB * type. * * Here we do two passes: * - Find the memtype of all the pages in the range, look for any conflicts. * - In case of no conflicts, set the new memtype for pages in the range. */ static int reserve_ram_pages_type(u64 start, u64 end, enum page_cache_mode req_type, enum page_cache_mode *new_type) { … } static int free_ram_pages_type(u64 start, u64 end) { … } static u64 sanitize_phys(u64 address) { … } /* * req_type typically has one of the: * - _PAGE_CACHE_MODE_WB * - _PAGE_CACHE_MODE_WC * - _PAGE_CACHE_MODE_UC_MINUS * - _PAGE_CACHE_MODE_UC * - _PAGE_CACHE_MODE_WT * * If new_type is NULL, function will return an error if it cannot reserve the * region with req_type. If new_type is non-NULL, function will return * available type in new_type in case of no error. In case of any error * it will return a negative return value. */ int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_type, enum page_cache_mode *new_type) { … } int memtype_free(u64 start, u64 end) { … } /** * lookup_memtype - Looks up the memory type for a physical address * @paddr: physical address of which memory type needs to be looked up * * Only to be called when PAT is enabled * * Returns _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC, _PAGE_CACHE_MODE_UC_MINUS * or _PAGE_CACHE_MODE_WT. */ static enum page_cache_mode lookup_memtype(u64 paddr) { … } /** * pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type * of @pfn cannot be overridden by UC MTRR memory type. * * Only to be called when PAT is enabled. * * Returns true, if the PAT memory type of @pfn is UC, UC-, or WC. * Returns false in other cases. */ bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn) { … } EXPORT_SYMBOL_GPL(…); /** * memtype_reserve_io - Request a memory type mapping for a region of memory * @start: start (physical address) of the region * @end: end (physical address) of the region * @type: A pointer to memtype, with requested type. On success, requested * or any other compatible type that was available for the region is returned * * On success, returns 0 * On failure, returns non-zero */ int memtype_reserve_io(resource_size_t start, resource_size_t end, enum page_cache_mode *type) { … } /** * memtype_free_io - Release a memory type mapping for a region of memory * @start: start (physical address) of the region * @end: end (physical address) of the region */ void memtype_free_io(resource_size_t start, resource_size_t end) { … } #ifdef CONFIG_X86_PAT int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) { … } EXPORT_SYMBOL(…); void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) { … } EXPORT_SYMBOL(…); #endif pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { … } #ifdef CONFIG_STRICT_DEVMEM /* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */ static inline int range_is_allowed(unsigned long pfn, unsigned long size) { … } #else /* This check is needed to avoid cache aliasing when PAT is enabled */ static inline int range_is_allowed(unsigned long pfn, unsigned long size) { u64 from = ((u64)pfn) << PAGE_SHIFT; u64 to = from + size; u64 cursor = from; if (!pat_enabled()) return 1; while (cursor < to) { if (!devmem_is_allowed(pfn)) return 0; cursor += PAGE_SIZE; pfn++; } return 1; } #endif /* CONFIG_STRICT_DEVMEM */ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { … } /* * Change the memory type for the physical address range in kernel identity * mapping space if that range is a part of identity map. */ int memtype_kernel_map_sync(u64 base, unsigned long size, enum page_cache_mode pcm) { … } /* * Internal interface to reserve a range of physical memory with prot. * Reserved non RAM regions only and after successful memtype_reserve, * this func also keeps identity mapping (if any) in sync with this new prot. */ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, int strict_prot) { … } /* * Internal interface to free a range of physical memory. * Frees non RAM regions only. */ static void free_pfn_range(u64 paddr, unsigned long size) { … } static int follow_phys(struct vm_area_struct *vma, unsigned long *prot, resource_size_t *phys) { … } static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, pgprot_t *pgprot) { … } /* * track_pfn_copy is called when vma that is covering the pfnmap gets * copied through copy_page_range(). * * If the vma has a linear pfn mapping for the entire range, we get the prot * from pte and reserve the entire vma range with single reserve_pfn_range call. */ int track_pfn_copy(struct vm_area_struct *vma) { … } /* * prot is passed in as a parameter for the new mapping. If the vma has * a linear pfn mapping for the entire range, or no vma is provided, * reserve the entire pfn + size range with single reserve_pfn_range * call. */ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, unsigned long pfn, unsigned long addr, unsigned long size) { … } void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn) { … } /* * untrack_pfn is called while unmapping a pfnmap for a region. * untrack can be called for a specific region indicated by pfn and size or * can be for the entire vma (in which case pfn, size are zero). */ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size, bool mm_wr_locked) { … } /* * untrack_pfn_clear is called if the following situation fits: * * 1) while mremapping a pfnmap for a new region, with the old vma after * its pfnmap page table has been removed. The new vma has a new pfnmap * to the same pfn & cache type with VM_PAT set. * 2) while duplicating vm area, the new vma fails to copy the pgtable from * old vma. */ void untrack_pfn_clear(struct vm_area_struct *vma) { … } pgprot_t pgprot_writecombine(pgprot_t prot) { … } EXPORT_SYMBOL_GPL(…); pgprot_t pgprot_writethrough(pgprot_t prot) { … } EXPORT_SYMBOL_GPL(…); #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) /* * We are allocating a temporary printout-entry to be passed * between seq_start()/next() and seq_show(): */ static struct memtype *memtype_get_idx(loff_t pos) { … } static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) { … } static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { … } static void memtype_seq_stop(struct seq_file *seq, void *v) { … } static int memtype_seq_show(struct seq_file *seq, void *v) { … } static const struct seq_operations memtype_seq_ops = …; static int memtype_seq_open(struct inode *inode, struct file *file) { … } static const struct file_operations memtype_fops = …; static int __init pat_memtype_list_init(void) { … } late_initcall(pat_memtype_list_init); #endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */