// SPDX-License-Identifier: GPL-2.0 /* * Xen leaves the responsibility for maintaining p2m mappings to the * guests themselves, but it must also access and update the p2m array * during suspend/resume when all the pages are reallocated. * * The logical flat p2m table is mapped to a linear kernel memory area. * For accesses by Xen a three-level tree linked via mfns only is set up to * allow the address space to be sparse. * * Xen * | * p2m_top_mfn * / \ * p2m_mid_mfn p2m_mid_mfn * / / * p2m p2m p2m ... * * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. * * The p2m_top_mfn level is limited to 1 page, so the maximum representable * pseudo-physical address space is: * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages * * P2M_PER_PAGE depends on the architecture, as a mfn is always * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to * 512 and 1024 entries respectively. * * In short, these structures contain the Machine Frame Number (MFN) of the PFN. * * However not all entries are filled with MFNs. Specifically for all other * leaf entries, or for the top root, or middle one, for which there is a void * entry, we assume it is "missing". So (for example) * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. * We have a dedicated page p2m_missing with all entries being * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns. * * We also have the possibility of setting 1-1 mappings on certain regions, so * that: * pfn_to_mfn(0xc0000)=0xc0000 * * The benefit of this is, that we can assume for non-RAM regions (think * PCI BARs, or ACPI spaces), we can create mappings easily because we * get the PFN value to match the MFN. * * For this to work efficiently we have one new page p2m_identity. All entries * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only * recognizes that and MFNs, no other fancy value). * * On lookup we spot that the entry points to p2m_identity and return the * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. * If the entry points to an allocated page, we just proceed as before and * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in * appropriate functions (pfn_to_mfn). * * The reason for having the IDENTITY_FRAME_BIT instead of just returning the * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a * non-identity pfn. To protect ourselves against we elect to set (and get) the * IDENTITY_FRAME_BIT on all identity mapped PFNs. */ #include <linux/init.h> #include <linux/export.h> #include <linux/list.h> #include <linux/hash.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/memblock.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/acpi.h> #include <asm/cache.h> #include <asm/setup.h> #include <linux/uaccess.h> #include <asm/xen/page.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> #include <xen/balloon.h> #include <xen/grant_table.h> #include <xen/hvc-console.h> #include "xen-ops.h" #define P2M_MID_PER_PAGE … #define P2M_TOP_PER_PAGE … #define MAX_P2M_PFN … #define PMDS_PER_MID_PAGE … unsigned long *xen_p2m_addr __read_mostly; EXPORT_SYMBOL_GPL(…); unsigned long xen_p2m_size __read_mostly; EXPORT_SYMBOL_GPL(…); unsigned long xen_max_p2m_pfn __read_mostly; EXPORT_SYMBOL_GPL(…); #ifdef CONFIG_XEN_MEMORY_HOTPLUG_LIMIT #define P2M_LIMIT … #else #define P2M_LIMIT … #endif static DEFINE_SPINLOCK(p2m_update_lock); static unsigned long *p2m_mid_missing_mfn; static unsigned long *p2m_top_mfn; static unsigned long **p2m_top_mfn_p; static unsigned long *p2m_missing; static unsigned long *p2m_identity; static pte_t *p2m_missing_pte; static pte_t *p2m_identity_pte; /* * Hint at last populated PFN. * * Used to set HYPERVISOR_shared_info->arch.max_pfn so the toolstack * can avoid scanning the whole P2M (which may be sized to account for * hotplugged memory). */ static unsigned long xen_p2m_last_pfn; static inline unsigned p2m_top_index(unsigned long pfn) { … } static inline unsigned p2m_mid_index(unsigned long pfn) { … } static void p2m_top_mfn_init(unsigned long *top) { … } static void p2m_top_mfn_p_init(unsigned long **top) { … } static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) { … } static void p2m_init(unsigned long *p2m) { … } static void p2m_init_identity(unsigned long *p2m, unsigned long pfn) { … } static void * __ref alloc_p2m_page(void) { … } static void __ref free_p2m_page(void *p) { … } /* * Build the parallel p2m_top_mfn and p2m_mid_mfn structures * * This is called both at boot time, and after resuming from suspend: * - At boot time we're called rather early, and must use alloc_bootmem*() * to allocate memory. * * - After resume we're called from within stop_machine, but the mfn * tree should already be completely allocated. */ void __ref xen_build_mfn_list_list(void) { … } void xen_setup_mfn_list_list(void) { … } /* Set up p2m_top to point to the domain-builder provided p2m pages */ void __init xen_build_dynamic_phys_to_machine(void) { … } #define P2M_TYPE_IDENTITY … #define P2M_TYPE_MISSING … #define P2M_TYPE_PFN … #define P2M_TYPE_UNKNOWN … static int xen_p2m_elem_type(unsigned long pfn) { … } static void __init xen_rebuild_p2m_list(unsigned long *p2m) { … } void __init xen_vmalloc_p2m_tree(void) { … } unsigned long get_phys_to_machine(unsigned long pfn) { … } EXPORT_SYMBOL_GPL(…); /* * Allocate new pmd(s). It is checked whether the old pmd is still in place. * If not, nothing is changed. This is okay as the only reason for allocating * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual * pmd. */ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) { … } /* * Fully allocate the p2m structure for a given pfn. We need to check * that both the top and mid levels are allocated, and make sure the * parallel mfn tree is kept in sync. We may race with other cpus, so * the new pages are installed with cmpxchg; if we lose the race then * simply free the page we allocated and use the one that's there. */ int xen_alloc_p2m_entry(unsigned long pfn) { … } EXPORT_SYMBOL(…); unsigned long __init set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e) { … } bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) { … } bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) { … } int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) { … } int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count) { … } /* Remapped non-RAM areas */ #define NR_NONRAM_REMAP … static struct nonram_remap { … } xen_nonram_remap[NR_NONRAM_REMAP] __ro_after_init; static unsigned int nr_nonram_remap __ro_after_init; /* * Do the real remapping of non-RAM regions as specified in the * xen_nonram_remap[] array. * In case of an error just crash the system. */ void __init xen_do_remap_nonram(void) { … } #ifdef CONFIG_ACPI /* * Xen variant of acpi_os_ioremap() taking potentially remapped non-RAM * regions into account. * Any attempt to map an area crossing a remap boundary will produce a * WARN() splat. * phys is related to remap->maddr on input and will be rebased to remap->paddr. */ static void __iomem *xen_acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { … } #endif /* CONFIG_ACPI */ /* * Add a new non-RAM remap entry. * In case of no free entry found, just crash the system. */ void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr, unsigned long size) { … } #ifdef CONFIG_XEN_DEBUG_FS #include <linux/debugfs.h> static int p2m_dump_show(struct seq_file *m, void *v) { … } DEFINE_SHOW_ATTRIBUTE(…); static struct dentry *d_mmu_debug; static int __init xen_p2m_debugfs(void) { … } fs_initcall(xen_p2m_debugfs); #endif /* CONFIG_XEN_DEBUG_FS */