init.c | Explore in Territory

#include <linux/gfp.h>
#include <linux/initrd.h>
#include <linux/ioport.h>
#include <linux/swap.h>
#include <linux/memblock.h>
#include <linux/swapfile.h>
#include <linux/swapops.h>
#include <linux/kmemleak.h>
#include <linux/sched/task.h>
#include <linux/execmem.h>

#include <asm/set_memory.h>
#include <asm/cpu_device_id.h>
#include <asm/e820/api.h>
#include <asm/init.h>
#include <asm/page.h>
#include <asm/page_types.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/proto.h>
#include <asm/dma.h>		/* for MAX_DMA_PFN */
#include <asm/kaslr.h>
#include <asm/hypervisor.h>
#include <asm/cpufeature.h>
#include <asm/pti.h>
#include <asm/text-patching.h>
#include <asm/memtype.h>
#include <asm/paravirt.h>

/*
 * We need to define the tracepoints somewhere, and tlb.c
 * is only compiled when SMP=y.
 */
#include <trace/events/tlb.h>

#include "mm_internal.h"

/*
 * Tables translating between page_cache_type_t and pte encoding.
 *
 * The default values are defined statically as minimal supported mode;
 * WC and WT fall back to UC-.  pat_init() updates these values to support
 * more cache modes, WC and WT, when it is safe to do so.  See pat_init()
 * for the details.  Note, __early_ioremap() used during early boot-time
 * takes pgprot_t (pte encoding) and does not use these tables.
 *
 *   Index into __cachemode2pte_tbl[] is the cachemode.
 *
 *   Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte
 *   (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
 */
static uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = …;

unsigned long cachemode2protval(enum page_cache_mode pcm)
{ … }
EXPORT_SYMBOL(…);

static uint8_t __pte2cachemode_tbl[8] = …;

/*
 * Check that the write-protect PAT entry is set for write-protect.
 * To do this without making assumptions how PAT has been set up (Xen has
 * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache
 * mode via the __cachemode2pte_tbl[] into protection bits (those protection
 * bits will select a cache mode of WP or better), and then translate the
 * protection bits back into the cache mode using __pte2cm_idx() and the
 * __pte2cachemode_tbl[] array. This will return the really used cache mode.
 */
bool x86_has_pat_wp(void)
{ … }

enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
{ … }

static unsigned long __initdata pgt_buf_start;
static unsigned long __initdata pgt_buf_end;
static unsigned long __initdata pgt_buf_top;

static unsigned long min_pfn_mapped;

static bool __initdata can_use_brk_pgt = …;

/*
 * Pages returned are already directly mapped.
 *
 * Changing that is likely to break Xen, see commit:
 *
 *    279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve
 *
 * for detailed information.
 */
__ref void *alloc_low_pages(unsigned int num)
{ … }

/*
 * By default need to be able to allocate page tables below PGD firstly for
 * the 0-ISA_END_ADDRESS range and secondly for the initial PMD_SIZE mapping.
 * With KASLR memory randomization, depending on the machine e820 memory and the
 * PUD alignment, twice that many pages may be needed when KASLR memory
 * randomization is enabled.
 */

#ifndef CONFIG_X86_5LEVEL
#define INIT_PGD_PAGE_TABLES …
#else
#define INIT_PGD_PAGE_TABLES …
#endif

#ifndef CONFIG_RANDOMIZE_MEMORY
#define INIT_PGD_PAGE_COUNT …
#else
#define INIT_PGD_PAGE_COUNT …
#endif

#define INIT_PGT_BUF_SIZE …
RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
void  __init early_alloc_pgt_buf(void)
{ … }

int after_bootmem;

early_param_on_off(…);

struct map_range { … };

static int page_size_mask;

/*
 * Save some of cr4 feature set we're using (e.g.  Pentium 4MB
 * enable and PPro Global page enable), so that any CPU's that boot
 * up after us can get the correct flags. Invoked on the boot CPU.
 */
static inline void cr4_set_bits_and_update_boot(unsigned long mask)
{ … }

static void __init probe_page_size_mask(void)
{ … }

/*
 * INVLPG may not properly flush Global entries
 * on these CPUs when PCIDs are enabled.
 */
static const struct x86_cpu_id invlpg_miss_ids[] = …;

static void setup_pcid(void)
{ … }

#ifdef CONFIG_X86_32
#define NR_RANGE_MR …
#else /* CONFIG_X86_64 */
#define NR_RANGE_MR …
#endif

static int __meminit save_mr(struct map_range *mr, int nr_range,
			     unsigned long start_pfn, unsigned long end_pfn,
			     unsigned long page_size_mask)
{ … }

/*
 * adjust the page_size_mask for small range to go with
 *	big page size instead small one if nearby are ram too.
 */
static void __ref adjust_range_page_size_mask(struct map_range *mr,
							 int nr_range)
{ … }

static const char *page_size_string(struct map_range *mr)
{ … }

static int __meminit split_mem_range(struct map_range *mr, int nr_range,
				     unsigned long start,
				     unsigned long end)
{ … }

struct range pfn_mapped[E820_MAX_ENTRIES];
int nr_pfn_mapped;

static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn)
{ … }

bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
{ … }

/*
 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
 * This runs before bootmem is initialized and gets pages directly from
 * the physical memory. To access them they are temporarily mapped.
 */
unsigned long __ref init_memory_mapping(unsigned long start,
					unsigned long end, pgprot_t prot)
{ … }

/*
 * We need to iterate through the E820 memory map and create direct mappings
 * for only E820_TYPE_RAM and E820_KERN_RESERVED regions. We cannot simply
 * create direct mappings for all pfns from [0 to max_low_pfn) and
 * [4GB to max_pfn) because of possible memory holes in high addresses
 * that cannot be marked as UC by fixed/variable range MTRRs.
 * Depending on the alignment of E820 ranges, this may possibly result
 * in using smaller size (i.e. 4K instead of 2M or 1G) page tables.
 *
 * init_mem_mapping() calls init_range_memory_mapping() with big range.
 * That range would have hole in the middle or ends, and only ram parts
 * will be mapped in init_range_memory_mapping().
 */
static unsigned long __init init_range_memory_mapping(
					   unsigned long r_start,
					   unsigned long r_end)
{ … }

static unsigned long __init get_new_step_size(unsigned long step_size)
{ … }

/**
 * memory_map_top_down - Map [map_start, map_end) top down
 * @map_start: start address of the target memory range
 * @map_end: end address of the target memory range
 *
 * This function will setup direct mapping for memory range
 * [map_start, map_end) in top-down. That said, the page tables
 * will be allocated at the end of the memory, and we map the
 * memory in top-down.
 */
static void __init memory_map_top_down(unsigned long map_start,
				       unsigned long map_end)
{ … }

/**
 * memory_map_bottom_up - Map [map_start, map_end) bottom up
 * @map_start: start address of the target memory range
 * @map_end: end address of the target memory range
 *
 * This function will setup direct mapping for memory range
 * [map_start, map_end) in bottom-up. Since we have limited the
 * bottom-up allocation above the kernel, the page tables will
 * be allocated just above the kernel and we map the memory
 * in [map_start, map_end) in bottom-up.
 */
static void __init memory_map_bottom_up(unsigned long map_start,
					unsigned long map_end)
{ … }

/*
 * The real mode trampoline, which is required for bootstrapping CPUs
 * occupies only a small area under the low 1MB.  See reserve_real_mode()
 * for details.
 *
 * If KASLR is disabled the first PGD entry of the direct mapping is copied
 * to map the real mode trampoline.
 *
 * If KASLR is enabled, copy only the PUD which covers the low 1MB
 * area. This limits the randomization granularity to 1GB for both 4-level
 * and 5-level paging.
 */
static void __init init_trampoline(void)
{ … }

void __init init_mem_mapping(void)
{ … }

/*
 * Initialize an mm_struct to be used during poking and a pointer to be used
 * during patching.
 */
void __init poking_init(void)
{ … }

/*
 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
 * is valid. The argument is a physical page number.
 *
 * On x86, access has to be given to the first megabyte of RAM because that
 * area traditionally contains BIOS code and data regions used by X, dosemu,
 * and similar apps. Since they map the entire memory range, the whole range
 * must be allowed (for mapping), but any areas that would otherwise be
 * disallowed are flagged as being "zero filled" instead of rejected.
 * Access has to be given to non-kernel-ram areas as well, these contain the
 * PCI mmio resources as well as potential bios/acpi data regions.
 */
int devmem_is_allowed(unsigned long pagenr)
{ … }

void free_init_pages(const char *what, unsigned long begin, unsigned long end)
{ … }

/*
 * begin/end can be in the direct map or the "high kernel mapping"
 * used for the kernel image only.  free_init_pages() will do the
 * right thing for either kind of address.
 */
void free_kernel_image_pages(const char *what, void *begin, void *end)
{ … }

void __ref free_initmem(void)
{ … }

#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{ … }
#endif

void __init zone_sizes_init(void)
{ … }

__visible DEFINE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate) = …;

#ifdef CONFIG_ADDRESS_MASKING
DEFINE_PER_CPU(u64, tlbstate_untag_mask);
EXPORT_PER_CPU_SYMBOL(…);
#endif

void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
{ … }

#ifdef CONFIG_SWAP
unsigned long arch_max_swapfile_size(void)
{ … }
#endif

#ifdef CONFIG_EXECMEM
static struct execmem_info execmem_info __ro_after_init;

struct execmem_info __init *execmem_arch_setup(void)
{ … }
#endif /* CONFIG_EXECMEM */
linux/arch/x86/mm/init.c