#include <linux/gfp.h> #include <linux/initrd.h> #include <linux/ioport.h> #include <linux/swap.h> #include <linux/memblock.h> #include <linux/swapfile.h> #include <linux/swapops.h> #include <linux/kmemleak.h> #include <linux/sched/task.h> #include <linux/execmem.h> #include <asm/set_memory.h> #include <asm/cpu_device_id.h> #include <asm/e820/api.h> #include <asm/init.h> #include <asm/page.h> #include <asm/page_types.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include <asm/proto.h> #include <asm/dma.h> /* for MAX_DMA_PFN */ #include <asm/kaslr.h> #include <asm/hypervisor.h> #include <asm/cpufeature.h> #include <asm/pti.h> #include <asm/text-patching.h> #include <asm/memtype.h> #include <asm/paravirt.h> /* * We need to define the tracepoints somewhere, and tlb.c * is only compiled when SMP=y. */ #include <trace/events/tlb.h> #include "mm_internal.h" /* * Tables translating between page_cache_type_t and pte encoding. * * The default values are defined statically as minimal supported mode; * WC and WT fall back to UC-. pat_init() updates these values to support * more cache modes, WC and WT, when it is safe to do so. See pat_init() * for the details. Note, __early_ioremap() used during early boot-time * takes pgprot_t (pte encoding) and does not use these tables. * * Index into __cachemode2pte_tbl[] is the cachemode. * * Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. */ static uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = …; unsigned long cachemode2protval(enum page_cache_mode pcm) { … } EXPORT_SYMBOL(…); static uint8_t __pte2cachemode_tbl[8] = …; /* * Check that the write-protect PAT entry is set for write-protect. * To do this without making assumptions how PAT has been set up (Xen has * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache * mode via the __cachemode2pte_tbl[] into protection bits (those protection * bits will select a cache mode of WP or better), and then translate the * protection bits back into the cache mode using __pte2cm_idx() and the * __pte2cachemode_tbl[] array. This will return the really used cache mode. */ bool x86_has_pat_wp(void) { … } enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) { … } static unsigned long __initdata pgt_buf_start; static unsigned long __initdata pgt_buf_end; static unsigned long __initdata pgt_buf_top; static unsigned long min_pfn_mapped; static bool __initdata can_use_brk_pgt = …; /* * Pages returned are already directly mapped. * * Changing that is likely to break Xen, see commit: * * 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve * * for detailed information. */ __ref void *alloc_low_pages(unsigned int num) { … } /* * By default need to be able to allocate page tables below PGD firstly for * the 0-ISA_END_ADDRESS range and secondly for the initial PMD_SIZE mapping. * With KASLR memory randomization, depending on the machine e820 memory and the * PUD alignment, twice that many pages may be needed when KASLR memory * randomization is enabled. */ #ifndef CONFIG_X86_5LEVEL #define INIT_PGD_PAGE_TABLES … #else #define INIT_PGD_PAGE_TABLES … #endif #ifndef CONFIG_RANDOMIZE_MEMORY #define INIT_PGD_PAGE_COUNT … #else #define INIT_PGD_PAGE_COUNT … #endif #define INIT_PGT_BUF_SIZE … RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); void __init early_alloc_pgt_buf(void) { … } int after_bootmem; early_param_on_off(…); struct map_range { … }; static int page_size_mask; /* * Save some of cr4 feature set we're using (e.g. Pentium 4MB * enable and PPro Global page enable), so that any CPU's that boot * up after us can get the correct flags. Invoked on the boot CPU. */ static inline void cr4_set_bits_and_update_boot(unsigned long mask) { … } static void __init probe_page_size_mask(void) { … } /* * INVLPG may not properly flush Global entries * on these CPUs when PCIDs are enabled. */ static const struct x86_cpu_id invlpg_miss_ids[] = …; static void setup_pcid(void) { … } #ifdef CONFIG_X86_32 #define NR_RANGE_MR … #else /* CONFIG_X86_64 */ #define NR_RANGE_MR … #endif static int __meminit save_mr(struct map_range *mr, int nr_range, unsigned long start_pfn, unsigned long end_pfn, unsigned long page_size_mask) { … } /* * adjust the page_size_mask for small range to go with * big page size instead small one if nearby are ram too. */ static void __ref adjust_range_page_size_mask(struct map_range *mr, int nr_range) { … } static const char *page_size_string(struct map_range *mr) { … } static int __meminit split_mem_range(struct map_range *mr, int nr_range, unsigned long start, unsigned long end) { … } struct range pfn_mapped[E820_MAX_ENTRIES]; int nr_pfn_mapped; static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) { … } bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) { … } /* * Setup the direct mapping of the physical memory at PAGE_OFFSET. * This runs before bootmem is initialized and gets pages directly from * the physical memory. To access them they are temporarily mapped. */ unsigned long __ref init_memory_mapping(unsigned long start, unsigned long end, pgprot_t prot) { … } /* * We need to iterate through the E820 memory map and create direct mappings * for only E820_TYPE_RAM and E820_KERN_RESERVED regions. We cannot simply * create direct mappings for all pfns from [0 to max_low_pfn) and * [4GB to max_pfn) because of possible memory holes in high addresses * that cannot be marked as UC by fixed/variable range MTRRs. * Depending on the alignment of E820 ranges, this may possibly result * in using smaller size (i.e. 4K instead of 2M or 1G) page tables. * * init_mem_mapping() calls init_range_memory_mapping() with big range. * That range would have hole in the middle or ends, and only ram parts * will be mapped in init_range_memory_mapping(). */ static unsigned long __init init_range_memory_mapping( unsigned long r_start, unsigned long r_end) { … } static unsigned long __init get_new_step_size(unsigned long step_size) { … } /** * memory_map_top_down - Map [map_start, map_end) top down * @map_start: start address of the target memory range * @map_end: end address of the target memory range * * This function will setup direct mapping for memory range * [map_start, map_end) in top-down. That said, the page tables * will be allocated at the end of the memory, and we map the * memory in top-down. */ static void __init memory_map_top_down(unsigned long map_start, unsigned long map_end) { … } /** * memory_map_bottom_up - Map [map_start, map_end) bottom up * @map_start: start address of the target memory range * @map_end: end address of the target memory range * * This function will setup direct mapping for memory range * [map_start, map_end) in bottom-up. Since we have limited the * bottom-up allocation above the kernel, the page tables will * be allocated just above the kernel and we map the memory * in [map_start, map_end) in bottom-up. */ static void __init memory_map_bottom_up(unsigned long map_start, unsigned long map_end) { … } /* * The real mode trampoline, which is required for bootstrapping CPUs * occupies only a small area under the low 1MB. See reserve_real_mode() * for details. * * If KASLR is disabled the first PGD entry of the direct mapping is copied * to map the real mode trampoline. * * If KASLR is enabled, copy only the PUD which covers the low 1MB * area. This limits the randomization granularity to 1GB for both 4-level * and 5-level paging. */ static void __init init_trampoline(void) { … } void __init init_mem_mapping(void) { … } /* * Initialize an mm_struct to be used during poking and a pointer to be used * during patching. */ void __init poking_init(void) { … } /* * devmem_is_allowed() checks to see if /dev/mem access to a certain address * is valid. The argument is a physical page number. * * On x86, access has to be given to the first megabyte of RAM because that * area traditionally contains BIOS code and data regions used by X, dosemu, * and similar apps. Since they map the entire memory range, the whole range * must be allowed (for mapping), but any areas that would otherwise be * disallowed are flagged as being "zero filled" instead of rejected. * Access has to be given to non-kernel-ram areas as well, these contain the * PCI mmio resources as well as potential bios/acpi data regions. */ int devmem_is_allowed(unsigned long pagenr) { … } void free_init_pages(const char *what, unsigned long begin, unsigned long end) { … } /* * begin/end can be in the direct map or the "high kernel mapping" * used for the kernel image only. free_init_pages() will do the * right thing for either kind of address. */ void free_kernel_image_pages(const char *what, void *begin, void *end) { … } void __ref free_initmem(void) { … } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { … } #endif void __init zone_sizes_init(void) { … } __visible DEFINE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate) = …; #ifdef CONFIG_ADDRESS_MASKING DEFINE_PER_CPU(u64, tlbstate_untag_mask); EXPORT_PER_CPU_SYMBOL(…); #endif void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) { … } #ifdef CONFIG_SWAP unsigned long arch_max_swapfile_size(void) { … } #endif #ifdef CONFIG_EXECMEM static struct execmem_info execmem_info __ro_after_init; struct execmem_info __init *execmem_arch_setup(void) { … } #endif /* CONFIG_EXECMEM */