encl.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0
/*  Copyright(c) 2016-20 Intel Corporation. */

#include <linux/lockdep.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/shmem_fs.h>
#include <linux/suspend.h>
#include <linux/sched/mm.h>
#include <asm/sgx.h>
#include "encl.h"
#include "encls.h"
#include "sgx.h"

static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
			    struct sgx_backing *backing);

#define PCMDS_PER_PAGE …
/*
 * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to
 * determine the page index associated with the first PCMD entry
 * within a PCMD page.
 */
#define PCMD_FIRST_MASK …

/**
 * reclaimer_writing_to_pcmd() - Query if any enclave page associated with
 *                               a PCMD page is in process of being reclaimed.
 * @encl:        Enclave to which PCMD page belongs
 * @start_addr:  Address of enclave page using first entry within the PCMD page
 *
 * When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is
 * stored. The PCMD data of a reclaimed enclave page contains enough
 * information for the processor to verify the page at the time
 * it is loaded back into the Enclave Page Cache (EPC).
 *
 * The backing storage to which enclave pages are reclaimed is laid out as
 * follows:
 * Encrypted enclave pages:SECS page:PCMD pages
 *
 * Each PCMD page contains the PCMD metadata of
 * PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages.
 *
 * A PCMD page can only be truncated if it is (a) empty, and (b) not in the
 * process of getting data (and thus soon being non-empty). (b) is tested with
 * a check if an enclave page sharing the PCMD page is in the process of being
 * reclaimed.
 *
 * The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it
 * intends to reclaim that enclave page - it means that the PCMD page
 * associated with that enclave page is about to get some data and thus
 * even if the PCMD page is empty, it should not be truncated.
 *
 * Context: Enclave mutex (&sgx_encl->lock) must be held.
 * Return: 1 if the reclaimer is about to write to the PCMD page
 *         0 if the reclaimer has no intention to write to the PCMD page
 */
static int reclaimer_writing_to_pcmd(struct sgx_encl *encl,
				     unsigned long start_addr)
{ … }

/*
 * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's
 * follow right after the EPC data in the backing storage. In addition to the
 * visible enclave pages, there's one extra page slot for SECS, before PCMD
 * structs.
 */
static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl,
							    unsigned long page_index)
{ … }

/*
 * Free a page from the backing storage in the given page index.
 */
static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index)
{ … }

/*
 * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC
 * Pages" in the SDM.
 */
static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
			   struct sgx_epc_page *epc_page,
			   struct sgx_epc_page *secs_page)
{ … }

static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
					  struct sgx_epc_page *secs_page)
{ … }

/*
 * Ensure the SECS page is not swapped out.  Must be called with encl->lock
 * to protect the enclave states including SECS and ensure the SECS page is
 * not swapped out again while being used.
 */
static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
{ … }

static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
						  struct sgx_encl_page *entry)
{ … }

static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl,
						       unsigned long addr,
						       unsigned long vm_flags)
{ … }

struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
					 unsigned long addr)
{ … }

/**
 * sgx_encl_eaug_page() - Dynamically add page to initialized enclave
 * @vma:	VMA obtained from fault info from where page is accessed
 * @encl:	enclave accessing the page
 * @addr:	address that triggered the page fault
 *
 * When an initialized enclave accesses a page with no backing EPC page
 * on a SGX2 system then the EPC can be added dynamically via the SGX2
 * ENCLS[EAUG] instruction.
 *
 * Returns: Appropriate vm_fault_t: VM_FAULT_NOPAGE when PTE was installed
 * successfully, VM_FAULT_SIGBUS or VM_FAULT_OOM as error otherwise.
 */
static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
				     struct sgx_encl *encl, unsigned long addr)
{ … }

static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
{ … }

static void sgx_vma_open(struct vm_area_struct *vma)
{ … }


/**
 * sgx_encl_may_map() - Check if a requested VMA mapping is allowed
 * @encl:		an enclave pointer
 * @start:		lower bound of the address range, inclusive
 * @end:		upper bound of the address range, exclusive
 * @vm_flags:		VMA flags
 *
 * Iterate through the enclave pages contained within [@start, @end) to verify
 * that the permissions requested by a subset of {VM_READ, VM_WRITE, VM_EXEC}
 * do not contain any permissions that are not contained in the build time
 * permissions of any of the enclave pages within the given address range.
 *
 * An enclave creator must declare the strongest permissions that will be
 * needed for each enclave page. This ensures that mappings have the identical
 * or weaker permissions than the earlier declared permissions.
 *
 * Return: 0 on success, -EACCES otherwise
 */
int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
		     unsigned long end, unsigned long vm_flags)
{ … }

static int sgx_vma_mprotect(struct vm_area_struct *vma, unsigned long start,
			    unsigned long end, unsigned long newflags)
{ … }

static int sgx_encl_debug_read(struct sgx_encl *encl, struct sgx_encl_page *page,
			       unsigned long addr, void *data)
{ … }

static int sgx_encl_debug_write(struct sgx_encl *encl, struct sgx_encl_page *page,
				unsigned long addr, void *data)
{ … }

/*
 * Load an enclave page to EPC if required, and take encl->lock.
 */
static struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl,
						   unsigned long addr,
						   unsigned long vm_flags)
{ … }

static int sgx_vma_access(struct vm_area_struct *vma, unsigned long addr,
			  void *buf, int len, int write)
{ … }

const struct vm_operations_struct sgx_vm_ops = …;

/**
 * sgx_encl_release - Destroy an enclave instance
 * @ref:	address of a kref inside &sgx_encl
 *
 * Used together with kref_put(). Frees all the resources associated with the
 * enclave and the instance itself.
 */
void sgx_encl_release(struct kref *ref)
{ … }

/*
 * 'mm' is exiting and no longer needs mmu notifications.
 */
static void sgx_mmu_notifier_release(struct mmu_notifier *mn,
				     struct mm_struct *mm)
{ … }

static void sgx_mmu_notifier_free(struct mmu_notifier *mn)
{ … }

static const struct mmu_notifier_ops sgx_mmu_notifier_ops = …;

static struct sgx_encl_mm *sgx_encl_find_mm(struct sgx_encl *encl,
					    struct mm_struct *mm)
{ … }

int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
{ … }

/**
 * sgx_encl_cpumask() - Query which CPUs might be accessing the enclave
 * @encl: the enclave
 *
 * Some SGX functions require that no cached linear-to-physical address
 * mappings are present before they can succeed. For example, ENCLS[EWB]
 * copies a page from the enclave page cache to regular main memory but
 * it fails if it cannot ensure that there are no cached
 * linear-to-physical address mappings referring to the page.
 *
 * SGX hardware flushes all cached linear-to-physical mappings on a CPU
 * when an enclave is exited via ENCLU[EEXIT] or an Asynchronous Enclave
 * Exit (AEX). Exiting an enclave will thus ensure cached linear-to-physical
 * address mappings are cleared but coordination with the tracking done within
 * the SGX hardware is needed to support the SGX functions that depend on this
 * cache clearing.
 *
 * When the ENCLS[ETRACK] function is issued on an enclave the hardware
 * tracks threads operating inside the enclave at that time. The SGX
 * hardware tracking require that all the identified threads must have
 * exited the enclave in order to flush the mappings before a function such
 * as ENCLS[EWB] will be permitted
 *
 * The following flow is used to support SGX functions that require that
 * no cached linear-to-physical address mappings are present:
 * 1) Execute ENCLS[ETRACK] to initiate hardware tracking.
 * 2) Use this function (sgx_encl_cpumask()) to query which CPUs might be
 *    accessing the enclave.
 * 3) Send IPI to identified CPUs, kicking them out of the enclave and
 *    thus flushing all locally cached linear-to-physical address mappings.
 * 4) Execute SGX function.
 *
 * Context: It is required to call this function after ENCLS[ETRACK].
 *          This will ensure that if any new mm appears (racing with
 *          sgx_encl_mm_add()) then the new mm will enter into the
 *          enclave with fresh linear-to-physical address mappings.
 *
 *          It is required that all IPIs are completed before a new
 *          ENCLS[ETRACK] is issued so be sure to protect steps 1 to 3
 *          of the above flow with the enclave's mutex.
 *
 * Return: cpumask of CPUs that might be accessing @encl
 */
const cpumask_t *sgx_encl_cpumask(struct sgx_encl *encl)
{ … }

static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,
					      pgoff_t index)
{ … }

/**
 * __sgx_encl_get_backing() - Pin the backing storage
 * @encl:	an enclave pointer
 * @page_index:	enclave page index
 * @backing:	data for accessing backing storage for the page
 *
 * Pin the backing storage pages for storing the encrypted contents and Paging
 * Crypto MetaData (PCMD) of an enclave page.
 *
 * Return:
 *   0 on success,
 *   -errno otherwise.
 */
static int __sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
			 struct sgx_backing *backing)
{ … }

/*
 * When called from ksgxd, returns the mem_cgroup of a struct mm stored
 * in the enclave's mm_list. When not called from ksgxd, just returns
 * the mem_cgroup of the current task.
 */
static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl)
{ … }

/**
 * sgx_encl_alloc_backing() - create a new backing storage page
 * @encl:	an enclave pointer
 * @page_index:	enclave page index
 * @backing:	data for accessing backing storage for the page
 *
 * When called from ksgxd, sets the active memcg from one of the
 * mms in the enclave's mm_list prior to any backing page allocation,
 * in order to ensure that shmem page allocations are charged to the
 * enclave.  Create a backing page for loading data back into an EPC page with
 * ELDU.  This function takes a reference on a new backing page which
 * must be dropped with a corresponding call to sgx_encl_put_backing().
 *
 * Return:
 *   0 on success,
 *   -errno otherwise.
 */
int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
			   struct sgx_backing *backing)
{ … }

/**
 * sgx_encl_lookup_backing() - retrieve an existing backing storage page
 * @encl:	an enclave pointer
 * @page_index:	enclave page index
 * @backing:	data for accessing backing storage for the page
 *
 * Retrieve a backing page for loading data back into an EPC page with ELDU.
 * It is the caller's responsibility to ensure that it is appropriate to use
 * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is
 * not used correctly, this will cause an allocation which is not accounted for.
 * This function takes a reference on an existing backing page which must be
 * dropped with a corresponding call to sgx_encl_put_backing().
 *
 * Return:
 *   0 on success,
 *   -errno otherwise.
 */
static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
			   struct sgx_backing *backing)
{ … }

/**
 * sgx_encl_put_backing() - Unpin the backing storage
 * @backing:	data for accessing backing storage for the page
 */
void sgx_encl_put_backing(struct sgx_backing *backing)
{ … }

static int sgx_encl_test_and_clear_young_cb(pte_t *ptep, unsigned long addr,
					    void *data)
{ … }

/**
 * sgx_encl_test_and_clear_young() - Test and reset the accessed bit
 * @mm:		mm_struct that is checked
 * @page:	enclave page to be tested for recent access
 *
 * Checks the Access (A) bit from the PTE corresponding to the enclave page and
 * clears it.
 *
 * Return: 1 if the page has been recently accessed and 0 if not.
 */
int sgx_encl_test_and_clear_young(struct mm_struct *mm,
				  struct sgx_encl_page *page)
{ … }

struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
					  unsigned long offset,
					  u64 secinfo_flags)
{ … }

/**
 * sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave
 * @encl: the enclave
 * @addr: page aligned pointer to single page for which PTEs will be removed
 *
 * Multiple VMAs may have an enclave page mapped. Remove the PTE mapping
 * @addr from each VMA. Ensure that page fault handler is ready to handle
 * new mappings of @addr before calling this function.
 */
void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr)
{ … }

/**
 * sgx_alloc_va_page() - Allocate a Version Array (VA) page
 * @reclaim: Reclaim EPC pages directly if none available. Enclave
 *           mutex should not be held if this is set.
 *
 * Allocate a free EPC page and convert it to a Version Array (VA) page.
 *
 * Return:
 *   a VA page,
 *   -errno otherwise
 */
struct sgx_epc_page *sgx_alloc_va_page(bool reclaim)
{ … }

/**
 * sgx_alloc_va_slot - allocate a VA slot
 * @va_page:	a &struct sgx_va_page instance
 *
 * Allocates a slot from a &struct sgx_va_page instance.
 *
 * Return: offset of the slot inside the VA page
 */
unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page)
{ … }

/**
 * sgx_free_va_slot - free a VA slot
 * @va_page:	a &struct sgx_va_page instance
 * @offset:	offset of the slot inside the VA page
 *
 * Frees a slot from a &struct sgx_va_page instance.
 */
void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset)
{ … }

/**
 * sgx_va_page_full - is the VA page full?
 * @va_page:	a &struct sgx_va_page instance
 *
 * Return: true if all slots have been taken
 */
bool sgx_va_page_full(struct sgx_va_page *va_page)
{ … }

/**
 * sgx_encl_free_epc_page - free an EPC page assigned to an enclave
 * @page:	EPC page to be freed
 *
 * Free an EPC page assigned to an enclave. It does EREMOVE for the page, and
 * only upon success, it puts the page back to free page list.  Otherwise, it
 * gives a WARNING to indicate page is leaked.
 */
void sgx_encl_free_epc_page(struct sgx_epc_page *page)
{ … }
linux/arch/x86/kernel/cpu/sgx/encl.c