linux/drivers/vfio/vfio_iommu_type1.c

// SPDX-License-Identifier: GPL-2.0-only
/*
 * VFIO: IOMMU DMA mapping support for Type1 IOMMU
 *
 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
 *     Author: Alex Williamson <[email protected]>
 *
 * Derived from original vfio:
 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
 * Author: Tom Lyon, [email protected]
 *
 * We arbitrarily define a Type1 IOMMU as one matching the below code.
 * It could be called the x86 IOMMU as it's designed for AMD-Vi & Intel
 * VT-d, but that makes it harder to re-use as theoretically anyone
 * implementing a similar IOMMU could make use of this.  We expect the
 * IOMMU to support the IOMMU API and have few to no restrictions around
 * the IOVA range that can be mapped.  The Type1 IOMMU is currently
 * optimized for relatively static mappings of a userspace process with
 * userspace pages pinned into memory.  We also assume devices and IOMMU
 * domains are PCI based as the IOMMU API is still centered around a
 * device/bus interface rather than a group interface.
 */

#include <linux/compat.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/kthread.h>
#include <linux/rbtree.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vfio.h>
#include <linux/workqueue.h>
#include <linux/notifier.h>
#include "vfio.h"

#define DRIVER_VERSION
#define DRIVER_AUTHOR
#define DRIVER_DESC

static bool allow_unsafe_interrupts;
module_param_named(allow_unsafe_interrupts,
		   allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC();

static bool disable_hugepages;
module_param_named(disable_hugepages,
		   disable_hugepages, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC();

static unsigned int dma_entry_limit __read_mostly =;
module_param_named(dma_entry_limit, dma_entry_limit, uint, 0644);
MODULE_PARM_DESC();

struct vfio_iommu {};

struct vfio_domain {};

struct vfio_dma {};

struct vfio_batch {};

struct vfio_iommu_group {};

struct vfio_iova {};

/*
 * Guest RAM pinning working set or DMA target
 */
struct vfio_pfn {};

struct vfio_regions {};

#define DIRTY_BITMAP_BYTES(n)

/*
 * Input argument of number of bits to bitmap_set() is unsigned integer, which
 * further casts to signed integer for unaligned multi-bit operation,
 * __bitmap_set().
 * Then maximum bitmap size supported is 2^31 bits divided by 2^3 bits/byte,
 * that is 2^28 (256 MB) which maps to 2^31 * 2^12 = 2^43 (8TB) on 4K page
 * system.
 */
#define DIRTY_BITMAP_PAGES_MAX
#define DIRTY_BITMAP_SIZE_MAX

static int put_pfn(unsigned long pfn, int prot);

static struct vfio_iommu_group*
vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
			    struct iommu_group *iommu_group);

/*
 * This code handles mapping and unmapping of user data buffers
 * into DMA'ble space using the IOMMU
 */

static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
				      dma_addr_t start, size_t size)
{}

static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu,
						dma_addr_t start, u64 size)
{}

static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
{}

static void vfio_unlink_dma(struct vfio_iommu *iommu, struct vfio_dma *old)
{}


static int vfio_dma_bitmap_alloc(struct vfio_dma *dma, size_t pgsize)
{}

static void vfio_dma_bitmap_free(struct vfio_dma *dma)
{}

static void vfio_dma_populate_bitmap(struct vfio_dma *dma, size_t pgsize)
{}

static void vfio_iommu_populate_bitmap_full(struct vfio_iommu *iommu)
{}

static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize)
{}

static void vfio_dma_bitmap_free_all(struct vfio_iommu *iommu)
{}

/*
 * Helper Functions for host iova-pfn list
 */
static struct vfio_pfn *vfio_find_vpfn(struct vfio_dma *dma, dma_addr_t iova)
{}

static void vfio_link_pfn(struct vfio_dma *dma,
			  struct vfio_pfn *new)
{}

static void vfio_unlink_pfn(struct vfio_dma *dma, struct vfio_pfn *old)
{}

static int vfio_add_to_pfn_list(struct vfio_dma *dma, dma_addr_t iova,
				unsigned long pfn)
{}

static void vfio_remove_from_pfn_list(struct vfio_dma *dma,
				      struct vfio_pfn *vpfn)
{}

static struct vfio_pfn *vfio_iova_get_vfio_pfn(struct vfio_dma *dma,
					       unsigned long iova)
{}

static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn)
{}

static int mm_lock_acct(struct task_struct *task, struct mm_struct *mm,
			bool lock_cap, long npage)
{}

static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
{}

/*
 * Some mappings aren't backed by a struct page, for example an mmap'd
 * MMIO range for our own or another device.  These use a different
 * pfn conversion and shouldn't be tracked as locked pages.
 * For compound pages, any driver that sets the reserved bit in head
 * page needs to set the reserved bit in all subpages to be safe.
 */
static bool is_invalid_reserved_pfn(unsigned long pfn)
{}

static int put_pfn(unsigned long pfn, int prot)
{}

#define VFIO_BATCH_MAX_CAPACITY

static void vfio_batch_init(struct vfio_batch *batch)
{}

static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma)
{}

static void vfio_batch_fini(struct vfio_batch *batch)
{}

static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
			    unsigned long vaddr, unsigned long *pfn,
			    bool write_fault)
{}

/*
 * Returns the positive number of pfns successfully obtained or a negative
 * error code.
 */
static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
			  long npages, int prot, unsigned long *pfn,
			  struct page **pages)
{}

/*
 * Attempt to pin pages.  We really don't want to track all the pfns and
 * the iommu can only map chunks of consecutive pfns anyway, so get the
 * first page and all consecutive pages with the same locking.
 */
static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
				  long npage, unsigned long *pfn_base,
				  unsigned long limit, struct vfio_batch *batch)
{}

static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
				    unsigned long pfn, long npage,
				    bool do_accounting)
{}

static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
				  unsigned long *pfn_base, bool do_accounting)
{}

static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova,
				    bool do_accounting)
{}

static int vfio_iommu_type1_pin_pages(void *iommu_data,
				      struct iommu_group *iommu_group,
				      dma_addr_t user_iova,
				      int npage, int prot,
				      struct page **pages)
{}

static void vfio_iommu_type1_unpin_pages(void *iommu_data,
					 dma_addr_t user_iova, int npage)
{}

static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain,
			    struct list_head *regions,
			    struct iommu_iotlb_gather *iotlb_gather)
{}

/*
 * Generally, VFIO needs to unpin remote pages after each IOTLB flush.
 * Therefore, when using IOTLB flush sync interface, VFIO need to keep track
 * of these regions (currently using a list).
 *
 * This value specifies maximum number of regions for each IOTLB flush sync.
 */
#define VFIO_IOMMU_TLB_SYNC_MAX

static size_t unmap_unpin_fast(struct vfio_domain *domain,
			       struct vfio_dma *dma, dma_addr_t *iova,
			       size_t len, phys_addr_t phys, long *unlocked,
			       struct list_head *unmapped_list,
			       int *unmapped_cnt,
			       struct iommu_iotlb_gather *iotlb_gather)
{}

static size_t unmap_unpin_slow(struct vfio_domain *domain,
			       struct vfio_dma *dma, dma_addr_t *iova,
			       size_t len, phys_addr_t phys,
			       long *unlocked)
{}

static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
			     bool do_accounting)
{}

static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
{}

static void vfio_update_pgsize_bitmap(struct vfio_iommu *iommu)
{}

static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
			      struct vfio_dma *dma, dma_addr_t base_iova,
			      size_t pgsize)
{}

static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
				  dma_addr_t iova, size_t size, size_t pgsize)
{}

static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size)
{}

/*
 * Notify VFIO drivers using vfio_register_emulated_iommu_dev() to invalidate
 * and unmap iovas within the range we're about to unmap. Drivers MUST unpin
 * pages in response to an invalidation.
 */
static void vfio_notify_dma_unmap(struct vfio_iommu *iommu,
				  struct vfio_dma *dma)
{}

static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
			     struct vfio_iommu_type1_dma_unmap *unmap,
			     struct vfio_bitmap *bitmap)
{}

static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
			  unsigned long pfn, long npage, int prot)
{}

static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
			    size_t map_size)
{}

/*
 * Check dma map request is within a valid iova range
 */
static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
				      dma_addr_t start, dma_addr_t end)
{}

static int vfio_change_dma_owner(struct vfio_dma *dma)
{}

static int vfio_dma_do_map(struct vfio_iommu *iommu,
			   struct vfio_iommu_type1_dma_map *map)
{}

static int vfio_iommu_replay(struct vfio_iommu *iommu,
			     struct vfio_domain *domain)
{}

/*
 * We change our unmap behavior slightly depending on whether the IOMMU
 * supports fine-grained superpages.  IOMMUs like AMD-Vi will use a superpage
 * for practically any contiguous power-of-two mapping we give it.  This means
 * we don't need to look for contiguous chunks ourselves to make unmapping
 * more efficient.  On IOMMUs with coarse-grained super pages, like Intel VT-d
 * with discrete 2M/1G/512G/1T superpages, identifying contiguous chunks
 * significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when
 * hugetlbfs is in use.
 */
static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head *regions)
{}

static struct vfio_iommu_group *find_iommu_group(struct vfio_domain *domain,
						 struct iommu_group *iommu_group)
{}

static struct vfio_iommu_group*
vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
			    struct iommu_group *iommu_group)
{}

static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
				  phys_addr_t *base)
{}

/*
 * This is a helper function to insert an address range to iova list.
 * The list is initially created with a single entry corresponding to
 * the IOMMU domain geometry to which the device group is attached.
 * The list aperture gets modified when a new domain is added to the
 * container if the new aperture doesn't conflict with the current one
 * or with any existing dma mappings. The list is also modified to
 * exclude any reserved regions associated with the device group.
 */
static int vfio_iommu_iova_insert(struct list_head *head,
				  dma_addr_t start, dma_addr_t end)
{}

/*
 * Check the new iommu aperture conflicts with existing aper or with any
 * existing dma mappings.
 */
static bool vfio_iommu_aper_conflict(struct vfio_iommu *iommu,
				     dma_addr_t start, dma_addr_t end)
{}

/*
 * Resize iommu iova aperture window. This is called only if the new
 * aperture has no conflict with existing aperture and dma mappings.
 */
static int vfio_iommu_aper_resize(struct list_head *iova,
				  dma_addr_t start, dma_addr_t end)
{}

/*
 * Check reserved region conflicts with existing dma mappings
 */
static bool vfio_iommu_resv_conflict(struct vfio_iommu *iommu,
				     struct list_head *resv_regions)
{}

/*
 * Check iova region overlap with  reserved regions and
 * exclude them from the iommu iova range
 */
static int vfio_iommu_resv_exclude(struct list_head *iova,
				   struct list_head *resv_regions)
{}

static void vfio_iommu_resv_free(struct list_head *resv_regions)
{}

static void vfio_iommu_iova_free(struct list_head *iova)
{}

static int vfio_iommu_iova_get_copy(struct vfio_iommu *iommu,
				    struct list_head *iova_copy)
{}

static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
					struct list_head *iova_copy)
{}

static int vfio_iommu_domain_alloc(struct device *dev, void *data)
{}

static int vfio_iommu_type1_attach_group(void *iommu_data,
		struct iommu_group *iommu_group, enum vfio_group_type type)
{}

static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
{}

static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
{}

/*
 * Called when a domain is removed in detach. It is possible that
 * the removed domain decided the iova aperture window. Modify the
 * iova aperture with the smallest window among existing domains.
 */
static void vfio_iommu_aper_expand(struct vfio_iommu *iommu,
				   struct list_head *iova_copy)
{}

/*
 * Called when a group is detached. The reserved regions for that
 * group can be part of valid iova now. But since reserved regions
 * may be duplicated among groups, populate the iova valid regions
 * list again.
 */
static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu,
				   struct list_head *iova_copy)
{}

static void vfio_iommu_type1_detach_group(void *iommu_data,
					  struct iommu_group *iommu_group)
{}

static void *vfio_iommu_type1_open(unsigned long arg)
{}

static void vfio_release_domain(struct vfio_domain *domain)
{}

static void vfio_iommu_type1_release(void *iommu_data)
{}

static int vfio_domains_have_enforce_cache_coherency(struct vfio_iommu *iommu)
{}

static bool vfio_iommu_has_emulated(struct vfio_iommu *iommu)
{}

static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu,
					    unsigned long arg)
{}

static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps,
		 struct vfio_iommu_type1_info_cap_iova_range *cap_iovas,
		 size_t size)
{}

static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
				      struct vfio_info_cap *caps)
{}

static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
					   struct vfio_info_cap *caps)
{}

static int vfio_iommu_dma_avail_build_caps(struct vfio_iommu *iommu,
					   struct vfio_info_cap *caps)
{}

static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
				     unsigned long arg)
{}

static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu,
				    unsigned long arg)
{}

static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
				      unsigned long arg)
{}

static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
					unsigned long arg)
{}

static long vfio_iommu_type1_ioctl(void *iommu_data,
				   unsigned int cmd, unsigned long arg)
{}

static void vfio_iommu_type1_register_device(void *iommu_data,
					     struct vfio_device *vdev)
{}

static void vfio_iommu_type1_unregister_device(void *iommu_data,
					       struct vfio_device *vdev)
{}

static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
					 dma_addr_t user_iova, void *data,
					 size_t count, bool write,
					 size_t *copied)
{}

static int vfio_iommu_type1_dma_rw(void *iommu_data, dma_addr_t user_iova,
				   void *data, size_t count, bool write)
{}

static struct iommu_domain *
vfio_iommu_type1_group_iommu_domain(void *iommu_data,
				    struct iommu_group *iommu_group)
{}

static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 =;

static int __init vfio_iommu_type1_init(void)
{}

static void __exit vfio_iommu_type1_cleanup(void)
{}

module_init();
module_exit(vfio_iommu_type1_cleanup);

MODULE_VERSION();
MODULE_LICENSE();
MODULE_AUTHOR();
MODULE_DESCRIPTION();