p2m.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0

/*
 * Xen leaves the responsibility for maintaining p2m mappings to the
 * guests themselves, but it must also access and update the p2m array
 * during suspend/resume when all the pages are reallocated.
 *
 * The logical flat p2m table is mapped to a linear kernel memory area.
 * For accesses by Xen a three-level tree linked via mfns only is set up to
 * allow the address space to be sparse.
 *
 *               Xen
 *                |
 *          p2m_top_mfn
 *              /   \
 * p2m_mid_mfn p2m_mid_mfn
 *         /           /
 *  p2m p2m p2m ...
 *
 * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
 *
 * The p2m_top_mfn level is limited to 1 page, so the maximum representable
 * pseudo-physical address space is:
 *  P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
 *
 * P2M_PER_PAGE depends on the architecture, as a mfn is always
 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
 * 512 and 1024 entries respectively.
 *
 * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
 *
 * However not all entries are filled with MFNs. Specifically for all other
 * leaf entries, or for the top  root, or middle one, for which there is a void
 * entry, we assume it is  "missing". So (for example)
 *  pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
 * We have a dedicated page p2m_missing with all entries being
 * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m
 * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns.
 *
 * We also have the possibility of setting 1-1 mappings on certain regions, so
 * that:
 *  pfn_to_mfn(0xc0000)=0xc0000
 *
 * The benefit of this is, that we can assume for non-RAM regions (think
 * PCI BARs, or ACPI spaces), we can create mappings easily because we
 * get the PFN value to match the MFN.
 *
 * For this to work efficiently we have one new page p2m_identity. All entries
 * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only
 * recognizes that and MFNs, no other fancy value).
 *
 * On lookup we spot that the entry points to p2m_identity and return the
 * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
 * If the entry points to an allocated page, we just proceed as before and
 * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
 * appropriate functions (pfn_to_mfn).
 *
 * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
 * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
 * non-identity pfn. To protect ourselves against we elect to set (and get) the
 * IDENTITY_FRAME_BIT on all identity mapped PFNs.
 */

#include <linux/init.h>
#include <linux/export.h>
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/memblock.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>

#include <asm/cache.h>
#include <asm/setup.h>
#include <linux/uaccess.h>

#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
#include <xen/balloon.h>
#include <xen/grant_table.h>

#include "xen-ops.h"

#define P2M_MID_PER_PAGE …
#define P2M_TOP_PER_PAGE …

#define MAX_P2M_PFN …

#define PMDS_PER_MID_PAGE …

unsigned long *xen_p2m_addr __read_mostly;
EXPORT_SYMBOL_GPL(…);
unsigned long xen_p2m_size __read_mostly;
EXPORT_SYMBOL_GPL(…);
unsigned long xen_max_p2m_pfn __read_mostly;
EXPORT_SYMBOL_GPL(…);

#ifdef CONFIG_XEN_MEMORY_HOTPLUG_LIMIT
#define P2M_LIMIT …
#else
#define P2M_LIMIT …
#endif

static DEFINE_SPINLOCK(p2m_update_lock);

static unsigned long *p2m_mid_missing_mfn;
static unsigned long *p2m_top_mfn;
static unsigned long **p2m_top_mfn_p;
static unsigned long *p2m_missing;
static unsigned long *p2m_identity;
static pte_t *p2m_missing_pte;
static pte_t *p2m_identity_pte;

/*
 * Hint at last populated PFN.
 *
 * Used to set HYPERVISOR_shared_info->arch.max_pfn so the toolstack
 * can avoid scanning the whole P2M (which may be sized to account for
 * hotplugged memory).
 */
static unsigned long xen_p2m_last_pfn;

static inline unsigned p2m_top_index(unsigned long pfn)
{ … }

static inline unsigned p2m_mid_index(unsigned long pfn)
{ … }

static void p2m_top_mfn_init(unsigned long *top)
{ … }

static void p2m_top_mfn_p_init(unsigned long **top)
{ … }

static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
{ … }

static void p2m_init(unsigned long *p2m)
{ … }

static void p2m_init_identity(unsigned long *p2m, unsigned long pfn)
{ … }

static void * __ref alloc_p2m_page(void)
{ … }

static void __ref free_p2m_page(void *p)
{ … }

/*
 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
 *
 * This is called both at boot time, and after resuming from suspend:
 * - At boot time we're called rather early, and must use alloc_bootmem*()
 *   to allocate memory.
 *
 * - After resume we're called from within stop_machine, but the mfn
 *   tree should already be completely allocated.
 */
void __ref xen_build_mfn_list_list(void)
{ … }

void xen_setup_mfn_list_list(void)
{ … }

/* Set up p2m_top to point to the domain-builder provided p2m pages */
void __init xen_build_dynamic_phys_to_machine(void)
{ … }

#define P2M_TYPE_IDENTITY …
#define P2M_TYPE_MISSING …
#define P2M_TYPE_PFN …
#define P2M_TYPE_UNKNOWN …

static int xen_p2m_elem_type(unsigned long pfn)
{ … }

static void __init xen_rebuild_p2m_list(unsigned long *p2m)
{ … }

void __init xen_vmalloc_p2m_tree(void)
{ … }

unsigned long get_phys_to_machine(unsigned long pfn)
{ … }
EXPORT_SYMBOL_GPL(…);

/*
 * Allocate new pmd(s). It is checked whether the old pmd is still in place.
 * If not, nothing is changed. This is okay as the only reason for allocating
 * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
 * pmd.
 */
static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
{ … }

/*
 * Fully allocate the p2m structure for a given pfn.  We need to check
 * that both the top and mid levels are allocated, and make sure the
 * parallel mfn tree is kept in sync.  We may race with other cpus, so
 * the new pages are installed with cmpxchg; if we lose the race then
 * simply free the page we allocated and use the one that's there.
 */
int xen_alloc_p2m_entry(unsigned long pfn)
{ … }
EXPORT_SYMBOL(…);

unsigned long __init set_phys_range_identity(unsigned long pfn_s,
				      unsigned long pfn_e)
{ … }

bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{ … }

bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{ … }

int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
			    struct gnttab_map_grant_ref *kmap_ops,
			    struct page **pages, unsigned int count)
{ … }

int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
			      struct gnttab_unmap_grant_ref *kunmap_ops,
			      struct page **pages, unsigned int count)
{ … }

#ifdef CONFIG_XEN_DEBUG_FS
#include <linux/debugfs.h>
static int p2m_dump_show(struct seq_file *m, void *v)
{ … }

DEFINE_SHOW_ATTRIBUTE(…);

static struct dentry *d_mmu_debug;

static int __init xen_p2m_debugfs(void)
{ … }
fs_initcall(xen_p2m_debugfs);
#endif /* CONFIG_XEN_DEBUG_FS */
linux/arch/x86/xen/p2m.c