xe_pt.c | Explore in Territory

// SPDX-License-Identifier: MIT
/*
 * Copyright © 2022 Intel Corporation
 */

#include "xe_pt.h"

#include "regs/xe_gtt_defs.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_drm_client.h"
#include "xe_gt.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_migrate.h"
#include "xe_pt_types.h"
#include "xe_pt_walk.h"
#include "xe_res_cursor.h"
#include "xe_trace.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_vm.h"

struct xe_pt_dir { … };

#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
#define xe_pt_set_addr(__xe_pt, __addr) …
#define xe_pt_addr(__xe_pt) …
#else
#define xe_pt_set_addr …
#define xe_pt_addr …
#endif

static const u64 xe_normal_pt_shifts[] = …;
static const u64 xe_compact_pt_shifts[] = …;

#define XE_PT_HIGHEST_LEVEL …

static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt)
{ … }

static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
{ … }

static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
			     unsigned int level)
{ … }

static void xe_pt_free(struct xe_pt *pt)
{ … }

/**
 * xe_pt_create() - Create a page-table.
 * @vm: The vm to create for.
 * @tile: The tile to create for.
 * @level: The page-table level.
 *
 * Allocate and initialize a single struct xe_pt metadata structure. Also
 * create the corresponding page-table bo, but don't initialize it. If the
 * level is grater than zero, then it's assumed to be a directory page-
 * table and the directory structure is also allocated and initialized to
 * NULL pointers.
 *
 * Return: A valid struct xe_pt pointer on success, Pointer error code on
 * error.
 */
struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
			   unsigned int level)
{ … }

/**
 * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero
 * entries.
 * @tile: The tile the scratch pagetable of which to use.
 * @vm: The vm we populate for.
 * @pt: The pagetable the bo of which to initialize.
 *
 * Populate the page-table bo of @pt with entries pointing into the tile's
 * scratch page-table tree if any. Otherwise populate with zeros.
 */
void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
			  struct xe_pt *pt)
{ … }

/**
 * xe_pt_shift() - Return the ilog2 value of the size of the address range of
 * a page-table at a certain level.
 * @level: The level.
 *
 * Return: The ilog2 value of the size of the address range of a page-table
 * at level @level.
 */
unsigned int xe_pt_shift(unsigned int level)
{ … }

/**
 * xe_pt_destroy() - Destroy a page-table tree.
 * @pt: The root of the page-table tree to destroy.
 * @flags: vm flags. Currently unused.
 * @deferred: List head of lockless list for deferred putting. NULL for
 *            immediate putting.
 *
 * Puts the page-table bo, recursively calls xe_pt_destroy on all children
 * and finally frees @pt. TODO: Can we remove the @flags argument?
 */
void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
{ … }

/**
 * DOC: Pagetable building
 *
 * Below we use the term "page-table" for both page-directories, containing
 * pointers to lower level page-directories or page-tables, and level 0
 * page-tables that contain only page-table-entries pointing to memory pages.
 *
 * When inserting an address range in an already existing page-table tree
 * there will typically be a set of page-tables that are shared with other
 * address ranges, and a set that are private to this address range.
 * The set of shared page-tables can be at most two per level,
 * and those can't be updated immediately because the entries of those
 * page-tables may still be in use by the gpu for other mappings. Therefore
 * when inserting entries into those, we instead stage those insertions by
 * adding insertion data into struct xe_vm_pgtable_update structures. This
 * data, (subtrees for the cpu and page-table-entries for the gpu) is then
 * added in a separate commit step. CPU-data is committed while still under the
 * vm lock, the object lock and for userptr, the notifier lock in read mode.
 * The GPU async data is committed either by the GPU or CPU after fulfilling
 * relevant dependencies.
 * For non-shared page-tables (and, in fact, for shared ones that aren't
 * existing at the time of staging), we add the data in-place without the
 * special update structures. This private part of the page-table tree will
 * remain disconnected from the vm page-table tree until data is committed to
 * the shared page tables of the vm tree in the commit phase.
 */

struct xe_pt_update { … };

struct xe_pt_stage_bind_walk { … };

static int
xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent,
		 pgoff_t offset, bool alloc_entries)
{ … }

/*
 * NOTE: This is a very frequently called function so we allow ourselves
 * to annotate (using branch prediction hints) the fastpath of updating a
 * non-pre-existing pagetable with leaf ptes.
 */
static int
xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent,
		   pgoff_t offset, struct xe_pt *xe_child, u64 pte)
{ … }

static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
				   struct xe_pt_stage_bind_walk *xe_walk)
{ … }

/*
 * Scan the requested mapping to check whether it can be done entirely
 * with 64K PTEs.
 */
static bool
xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
{ … }

/*
 * For non-compact "normal" 4K level-0 pagetables, we want to try to group
 * addresses together in 64K-contigous regions to add a 64K TLB hint for the
 * device to the PTE.
 * This function determines whether the address is part of such a
 * segment. For VRAM in normal pagetables, this is strictly necessary on
 * some devices.
 */
static bool
xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
{ … }

static int
xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
		       unsigned int level, u64 addr, u64 next,
		       struct xe_ptw **child,
		       enum page_walk_action *action,
		       struct xe_pt_walk *walk)
{ … }

static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = …;

/**
 * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
 * range.
 * @tile: The tile we're building for.
 * @vma: The vma indicating the address range.
 * @entries: Storage for the update entries used for connecting the tree to
 * the main tree at commit time.
 * @num_entries: On output contains the number of @entries used.
 *
 * This function builds a disconnected page-table tree for a given address
 * range. The tree is connected to the main vm tree for the gpu using
 * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind().
 * The function builds xe_vm_pgtable_update structures for already existing
 * shared page-tables, and non-existing shared and non-shared page-tables
 * are built and populated directly.
 *
 * Return 0 on success, negative error code on error.
 */
static int
xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
		 struct xe_vm_pgtable_update *entries, u32 *num_entries)
{ … }

/**
 * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a
 * shared pagetable.
 * @addr: The start address within the non-shared pagetable.
 * @end: The end address within the non-shared pagetable.
 * @level: The level of the non-shared pagetable.
 * @walk: Walk info. The function adjusts the walk action.
 * @action: next action to perform (see enum page_walk_action)
 * @offset: Ignored on input, First non-shared entry on output.
 * @end_offset: Ignored on input, Last non-shared entry + 1 on output.
 *
 * A non-shared page-table has some entries that belong to the address range
 * and others that don't. This function determines the entries that belong
 * fully to the address range. Depending on level, some entries may
 * partially belong to the address range (that can't happen at level 0).
 * The function detects that and adjust those offsets to not include those
 * partial entries. Iff it does detect partial entries, we know that there must
 * be shared page tables also at lower levels, so it adjusts the walk action
 * accordingly.
 *
 * Return: true if there were non-shared entries, false otherwise.
 */
static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level,
				    struct xe_pt_walk *walk,
				    enum page_walk_action *action,
				    pgoff_t *offset, pgoff_t *end_offset)
{ … }

struct xe_pt_zap_ptes_walk { … };

static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset,
				unsigned int level, u64 addr, u64 next,
				struct xe_ptw **child,
				enum page_walk_action *action,
				struct xe_pt_walk *walk)
{ … }

static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = …;

/**
 * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range
 * @tile: The tile we're zapping for.
 * @vma: GPU VMA detailing address range.
 *
 * Eviction and Userptr invalidation needs to be able to zap the
 * gpu ptes of a given address range in pagefaulting mode.
 * In order to be able to do that, that function needs access to the shared
 * page-table entrieaso it can either clear the leaf PTEs or
 * clear the pointers to lower-level page-tables. The caller is required
 * to hold the necessary locks to ensure neither the page-table connectivity
 * nor the page-table entries of the range is updated from under us.
 *
 * Return: Whether ptes were actually updated and a TLB invalidation is
 * required.
 */
bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
{ … }

static void
xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile,
		       struct iosys_map *map, void *data,
		       u32 qword_ofs, u32 num_qwords,
		       const struct xe_vm_pgtable_update *update)
{ … }

static void xe_pt_abort_bind(struct xe_vma *vma,
			     struct xe_vm_pgtable_update *entries,
			     u32 num_entries)
{ … }

static void xe_pt_commit_locks_assert(struct xe_vma *vma)
{ … }

static void xe_pt_commit_bind(struct xe_vma *vma,
			      struct xe_vm_pgtable_update *entries,
			      u32 num_entries, bool rebind,
			      struct llist_head *deferred)
{ … }

static int
xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
		   struct xe_vm_pgtable_update *entries, u32 *num_entries)
{ … }

static void xe_vm_dbg_print_entries(struct xe_device *xe,
				    const struct xe_vm_pgtable_update *entries,
				    unsigned int num_entries)
#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
{ … }
#else
{}
#endif

#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT

static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
{ … }

#else

static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
{
	return false;
}

#endif

/**
 * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks
 * @base: Base we derive from.
 * @bind: Whether this is a bind or an unbind operation. A bind operation
 *        makes the pre-commit callback error with -EAGAIN if it detects a
 *        pending invalidation.
 * @locked: Whether the pre-commit callback locked the userptr notifier lock
 *          and it needs unlocking.
 */
struct xe_pt_migrate_pt_update { … };

/*
 * This function adds the needed dependencies to a page-table update job
 * to make sure racing jobs for separate bind engines don't race writing
 * to the same page-table range, wreaking havoc. Initially use a single
 * fence for the entire VM. An optimization would use smaller granularity.
 */
static int xe_pt_vm_dependencies(struct xe_sched_job *job,
				 struct xe_range_fence_tree *rftree,
				 u64 start, u64 last)
{ … }

static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update)
{ … }

static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
{ … }

static const struct xe_migrate_pt_update_ops bind_ops = …;

static const struct xe_migrate_pt_update_ops userptr_bind_ops = …;

struct invalidation_fence { … };

static const char *
invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
{ … }

static const char *
invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
{ … }

static const struct dma_fence_ops invalidation_fence_ops = …;

static void invalidation_fence_cb(struct dma_fence *fence,
				  struct dma_fence_cb *cb)
{ … }

static void invalidation_fence_work_func(struct work_struct *w)
{ … }

static int invalidation_fence_init(struct xe_gt *gt,
				   struct invalidation_fence *ifence,
				   struct dma_fence *fence,
				   u64 start, u64 end, u32 asid)
{ … }

static void xe_pt_calc_rfence_interval(struct xe_vma *vma,
				       struct xe_pt_migrate_pt_update *update,
				       struct xe_vm_pgtable_update *entries,
				       u32 num_entries)
{ … }

/**
 * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma
 * address range.
 * @tile: The tile to bind for.
 * @vma: The vma to bind.
 * @q: The exec_queue with which to do pipelined page-table updates.
 * @syncs: Entries to sync on before binding the built tree to the live vm tree.
 * @num_syncs: Number of @sync entries.
 * @rebind: Whether we're rebinding this vma to the same address range without
 * an unbind in-between.
 *
 * This function builds a page-table tree (see xe_pt_stage_bind() for more
 * information on page-table building), and the xe_vm_pgtable_update entries
 * abstracting the operations needed to attach it to the main vm tree. It
 * then takes the relevant locks and updates the metadata side of the main
 * vm tree and submits the operations for pipelined attachment of the
 * gpu page-table to the vm main tree, (which can be done either by the
 * cpu and the GPU).
 *
 * Return: A valid dma-fence representing the pipelined attachment operation
 * on success, an error pointer on error.
 */
struct dma_fence *
__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
		 struct xe_sync_entry *syncs, u32 num_syncs,
		 bool rebind)
{ … }

struct xe_pt_stage_unbind_walk { … };

/*
 * Check whether this range is the only one populating this pagetable,
 * and in that case, update the walk range checks so that higher levels don't
 * view us as a shared pagetable.
 */
static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level,
			     const struct xe_pt *child,
			     enum page_walk_action *action,
			     struct xe_pt_walk *walk)
{ … }

static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
				    unsigned int level, u64 addr, u64 next,
				    struct xe_ptw **child,
				    enum page_walk_action *action,
				    struct xe_pt_walk *walk)
{ … }

static int
xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset,
				unsigned int level, u64 addr, u64 next,
				struct xe_ptw **child,
				enum page_walk_action *action,
				struct xe_pt_walk *walk)
{ … }

static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = …;

/**
 * xe_pt_stage_unbind() - Build page-table update structures for an unbind
 * operation
 * @tile: The tile we're unbinding for.
 * @vma: The vma we're unbinding.
 * @entries: Caller-provided storage for the update structures.
 *
 * Builds page-table update structures for an unbind operation. The function
 * will attempt to remove all page-tables that we're the only user
 * of, and for that to work, the unbind operation must be committed in the
 * same critical section that blocks racing binds to the same page-table tree.
 *
 * Return: The number of entries used.
 */
static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma,
				       struct xe_vm_pgtable_update *entries)
{ … }

static void
xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
				  struct xe_tile *tile, struct iosys_map *map,
				  void *ptr, u32 qword_ofs, u32 num_qwords,
				  const struct xe_vm_pgtable_update *update)
{ … }

static void
xe_pt_commit_unbind(struct xe_vma *vma,
		    struct xe_vm_pgtable_update *entries, u32 num_entries,
		    struct llist_head *deferred)
{ … }

static const struct xe_migrate_pt_update_ops unbind_ops = …;

static const struct xe_migrate_pt_update_ops userptr_unbind_ops = …;

/**
 * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma
 * address range.
 * @tile: The tile to unbind for.
 * @vma: The vma to unbind.
 * @q: The exec_queue with which to do pipelined page-table updates.
 * @syncs: Entries to sync on before disconnecting the tree to be destroyed.
 * @num_syncs: Number of @sync entries.
 *
 * This function builds a the xe_vm_pgtable_update entries abstracting the
 * operations needed to detach the page-table tree to be destroyed from the
 * man vm tree.
 * It then takes the relevant locks and submits the operations for
 * pipelined detachment of the gpu page-table from  the vm main tree,
 * (which can be done either by the cpu and the GPU), Finally it frees the
 * detached page-table tree.
 *
 * Return: A valid dma-fence representing the pipelined detachment operation
 * on success, an error pointer on error.
 */
struct dma_fence *
__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
		   struct xe_sync_entry *syncs, u32 num_syncs)
{ … }
linux/drivers/gpu/drm/xe/xe_pt.c