// SPDX-License-Identifier: MIT /* * Copyright © 2022 Intel Corporation */ #include "xe_pt.h" #include "regs/xe_gtt_defs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_drm_client.h" #include "xe_gt.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_pt_types.h" #include "xe_pt_walk.h" #include "xe_res_cursor.h" #include "xe_trace.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" struct xe_pt_dir { … }; #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) #define xe_pt_set_addr(__xe_pt, __addr) … #define xe_pt_addr(__xe_pt) … #else #define xe_pt_set_addr … #define xe_pt_addr … #endif static const u64 xe_normal_pt_shifts[] = …; static const u64 xe_compact_pt_shifts[] = …; #define XE_PT_HIGHEST_LEVEL … static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) { … } static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) { … } static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, unsigned int level) { … } static void xe_pt_free(struct xe_pt *pt) { … } /** * xe_pt_create() - Create a page-table. * @vm: The vm to create for. * @tile: The tile to create for. * @level: The page-table level. * * Allocate and initialize a single struct xe_pt metadata structure. Also * create the corresponding page-table bo, but don't initialize it. If the * level is grater than zero, then it's assumed to be a directory page- * table and the directory structure is also allocated and initialized to * NULL pointers. * * Return: A valid struct xe_pt pointer on success, Pointer error code on * error. */ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, unsigned int level) { … } /** * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero * entries. * @tile: The tile the scratch pagetable of which to use. * @vm: The vm we populate for. * @pt: The pagetable the bo of which to initialize. * * Populate the page-table bo of @pt with entries pointing into the tile's * scratch page-table tree if any. Otherwise populate with zeros. */ void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, struct xe_pt *pt) { … } /** * xe_pt_shift() - Return the ilog2 value of the size of the address range of * a page-table at a certain level. * @level: The level. * * Return: The ilog2 value of the size of the address range of a page-table * at level @level. */ unsigned int xe_pt_shift(unsigned int level) { … } /** * xe_pt_destroy() - Destroy a page-table tree. * @pt: The root of the page-table tree to destroy. * @flags: vm flags. Currently unused. * @deferred: List head of lockless list for deferred putting. NULL for * immediate putting. * * Puts the page-table bo, recursively calls xe_pt_destroy on all children * and finally frees @pt. TODO: Can we remove the @flags argument? */ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) { … } /** * DOC: Pagetable building * * Below we use the term "page-table" for both page-directories, containing * pointers to lower level page-directories or page-tables, and level 0 * page-tables that contain only page-table-entries pointing to memory pages. * * When inserting an address range in an already existing page-table tree * there will typically be a set of page-tables that are shared with other * address ranges, and a set that are private to this address range. * The set of shared page-tables can be at most two per level, * and those can't be updated immediately because the entries of those * page-tables may still be in use by the gpu for other mappings. Therefore * when inserting entries into those, we instead stage those insertions by * adding insertion data into struct xe_vm_pgtable_update structures. This * data, (subtrees for the cpu and page-table-entries for the gpu) is then * added in a separate commit step. CPU-data is committed while still under the * vm lock, the object lock and for userptr, the notifier lock in read mode. * The GPU async data is committed either by the GPU or CPU after fulfilling * relevant dependencies. * For non-shared page-tables (and, in fact, for shared ones that aren't * existing at the time of staging), we add the data in-place without the * special update structures. This private part of the page-table tree will * remain disconnected from the vm page-table tree until data is committed to * the shared page tables of the vm tree in the commit phase. */ struct xe_pt_update { … }; struct xe_pt_stage_bind_walk { … }; static int xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, pgoff_t offset, bool alloc_entries) { … } /* * NOTE: This is a very frequently called function so we allow ourselves * to annotate (using branch prediction hints) the fastpath of updating a * non-pre-existing pagetable with leaf ptes. */ static int xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, pgoff_t offset, struct xe_pt *xe_child, u64 pte) { … } static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, struct xe_pt_stage_bind_walk *xe_walk) { … } /* * Scan the requested mapping to check whether it can be done entirely * with 64K PTEs. */ static bool xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) { … } /* * For non-compact "normal" 4K level-0 pagetables, we want to try to group * addresses together in 64K-contigous regions to add a 64K TLB hint for the * device to the PTE. * This function determines whether the address is part of such a * segment. For VRAM in normal pagetables, this is strictly necessary on * some devices. */ static bool xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) { … } static int xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, unsigned int level, u64 addr, u64 next, struct xe_ptw **child, enum page_walk_action *action, struct xe_pt_walk *walk) { … } static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = …; /** * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address * range. * @tile: The tile we're building for. * @vma: The vma indicating the address range. * @entries: Storage for the update entries used for connecting the tree to * the main tree at commit time. * @num_entries: On output contains the number of @entries used. * * This function builds a disconnected page-table tree for a given address * range. The tree is connected to the main vm tree for the gpu using * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). * The function builds xe_vm_pgtable_update structures for already existing * shared page-tables, and non-existing shared and non-shared page-tables * are built and populated directly. * * Return 0 on success, negative error code on error. */ static int xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 *num_entries) { … } /** * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a * shared pagetable. * @addr: The start address within the non-shared pagetable. * @end: The end address within the non-shared pagetable. * @level: The level of the non-shared pagetable. * @walk: Walk info. The function adjusts the walk action. * @action: next action to perform (see enum page_walk_action) * @offset: Ignored on input, First non-shared entry on output. * @end_offset: Ignored on input, Last non-shared entry + 1 on output. * * A non-shared page-table has some entries that belong to the address range * and others that don't. This function determines the entries that belong * fully to the address range. Depending on level, some entries may * partially belong to the address range (that can't happen at level 0). * The function detects that and adjust those offsets to not include those * partial entries. Iff it does detect partial entries, we know that there must * be shared page tables also at lower levels, so it adjusts the walk action * accordingly. * * Return: true if there were non-shared entries, false otherwise. */ static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, struct xe_pt_walk *walk, enum page_walk_action *action, pgoff_t *offset, pgoff_t *end_offset) { … } struct xe_pt_zap_ptes_walk { … }; static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, unsigned int level, u64 addr, u64 next, struct xe_ptw **child, enum page_walk_action *action, struct xe_pt_walk *walk) { … } static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = …; /** * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range * @tile: The tile we're zapping for. * @vma: GPU VMA detailing address range. * * Eviction and Userptr invalidation needs to be able to zap the * gpu ptes of a given address range in pagefaulting mode. * In order to be able to do that, that function needs access to the shared * page-table entrieaso it can either clear the leaf PTEs or * clear the pointers to lower-level page-tables. The caller is required * to hold the necessary locks to ensure neither the page-table connectivity * nor the page-table entries of the range is updated from under us. * * Return: Whether ptes were actually updated and a TLB invalidation is * required. */ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) { … } static void xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, struct iosys_map *map, void *data, u32 qword_ofs, u32 num_qwords, const struct xe_vm_pgtable_update *update) { … } static void xe_pt_abort_bind(struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 num_entries) { … } static void xe_pt_commit_locks_assert(struct xe_vma *vma) { … } static void xe_pt_commit_bind(struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 num_entries, bool rebind, struct llist_head *deferred) { … } static int xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 *num_entries) { … } static void xe_vm_dbg_print_entries(struct xe_device *xe, const struct xe_vm_pgtable_update *entries, unsigned int num_entries) #if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) { … } #else {} #endif #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) { … } #else static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) { return false; } #endif /** * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks * @base: Base we derive from. * @bind: Whether this is a bind or an unbind operation. A bind operation * makes the pre-commit callback error with -EAGAIN if it detects a * pending invalidation. * @locked: Whether the pre-commit callback locked the userptr notifier lock * and it needs unlocking. */ struct xe_pt_migrate_pt_update { … }; /* * This function adds the needed dependencies to a page-table update job * to make sure racing jobs for separate bind engines don't race writing * to the same page-table range, wreaking havoc. Initially use a single * fence for the entire VM. An optimization would use smaller granularity. */ static int xe_pt_vm_dependencies(struct xe_sched_job *job, struct xe_range_fence_tree *rftree, u64 start, u64 last) { … } static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) { … } static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) { … } static const struct xe_migrate_pt_update_ops bind_ops = …; static const struct xe_migrate_pt_update_ops userptr_bind_ops = …; struct invalidation_fence { … }; static const char * invalidation_fence_get_driver_name(struct dma_fence *dma_fence) { … } static const char * invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) { … } static const struct dma_fence_ops invalidation_fence_ops = …; static void invalidation_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { … } static void invalidation_fence_work_func(struct work_struct *w) { … } static int invalidation_fence_init(struct xe_gt *gt, struct invalidation_fence *ifence, struct dma_fence *fence, u64 start, u64 end, u32 asid) { … } static void xe_pt_calc_rfence_interval(struct xe_vma *vma, struct xe_pt_migrate_pt_update *update, struct xe_vm_pgtable_update *entries, u32 num_entries) { … } /** * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma * address range. * @tile: The tile to bind for. * @vma: The vma to bind. * @q: The exec_queue with which to do pipelined page-table updates. * @syncs: Entries to sync on before binding the built tree to the live vm tree. * @num_syncs: Number of @sync entries. * @rebind: Whether we're rebinding this vma to the same address range without * an unbind in-between. * * This function builds a page-table tree (see xe_pt_stage_bind() for more * information on page-table building), and the xe_vm_pgtable_update entries * abstracting the operations needed to attach it to the main vm tree. It * then takes the relevant locks and updates the metadata side of the main * vm tree and submits the operations for pipelined attachment of the * gpu page-table to the vm main tree, (which can be done either by the * cpu and the GPU). * * Return: A valid dma-fence representing the pipelined attachment operation * on success, an error pointer on error. */ struct dma_fence * __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool rebind) { … } struct xe_pt_stage_unbind_walk { … }; /* * Check whether this range is the only one populating this pagetable, * and in that case, update the walk range checks so that higher levels don't * view us as a shared pagetable. */ static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, const struct xe_pt *child, enum page_walk_action *action, struct xe_pt_walk *walk) { … } static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, unsigned int level, u64 addr, u64 next, struct xe_ptw **child, enum page_walk_action *action, struct xe_pt_walk *walk) { … } static int xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, unsigned int level, u64 addr, u64 next, struct xe_ptw **child, enum page_walk_action *action, struct xe_pt_walk *walk) { … } static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = …; /** * xe_pt_stage_unbind() - Build page-table update structures for an unbind * operation * @tile: The tile we're unbinding for. * @vma: The vma we're unbinding. * @entries: Caller-provided storage for the update structures. * * Builds page-table update structures for an unbind operation. The function * will attempt to remove all page-tables that we're the only user * of, and for that to work, the unbind operation must be committed in the * same critical section that blocks racing binds to the same page-table tree. * * Return: The number of entries used. */ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, struct xe_vm_pgtable_update *entries) { … } static void xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, struct iosys_map *map, void *ptr, u32 qword_ofs, u32 num_qwords, const struct xe_vm_pgtable_update *update) { … } static void xe_pt_commit_unbind(struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 num_entries, struct llist_head *deferred) { … } static const struct xe_migrate_pt_update_ops unbind_ops = …; static const struct xe_migrate_pt_update_ops userptr_unbind_ops = …; /** * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma * address range. * @tile: The tile to unbind for. * @vma: The vma to unbind. * @q: The exec_queue with which to do pipelined page-table updates. * @syncs: Entries to sync on before disconnecting the tree to be destroyed. * @num_syncs: Number of @sync entries. * * This function builds a the xe_vm_pgtable_update entries abstracting the * operations needed to detach the page-table tree to be destroyed from the * man vm tree. * It then takes the relevant locks and submits the operations for * pipelined detachment of the gpu page-table from the vm main tree, * (which can be done either by the cpu and the GPU), Finally it frees the * detached page-table tree. * * Return: A valid dma-fence representing the pipelined detachment operation * on success, an error pointer on error. */ struct dma_fence * __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs) { … }