xe_migrate.c | Explore in Territory

// SPDX-License-Identifier: MIT
/*
 * Copyright © 2020 Intel Corporation
 */

#include "xe_migrate.h"

#include <linux/bitfield.h>
#include <linux/sizes.h>

#include <drm/drm_managed.h>
#include <drm/ttm/ttm_tt.h>
#include <uapi/drm/xe_drm.h>

#include <generated/xe_wa_oob.h>

#include "instructions/xe_gpu_commands.h"
#include "instructions/xe_mi_commands.h"
#include "regs/xe_gtt_defs.h"
#include "tests/xe_test.h"
#include "xe_assert.h"
#include "xe_bb.h"
#include "xe_bo.h"
#include "xe_exec_queue.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_hw_engine.h"
#include "xe_lrc.h"
#include "xe_map.h"
#include "xe_mocs.h"
#include "xe_pt.h"
#include "xe_res_cursor.h"
#include "xe_sched_job.h"
#include "xe_sync.h"
#include "xe_trace_bo.h"
#include "xe_vm.h"

/**
 * struct xe_migrate - migrate context.
 */
struct xe_migrate { … };

#define MAX_PREEMPTDISABLE_TRANSFER …
#define MAX_CCS_LIMITED_TRANSFER …
#define NUM_KERNEL_PDE …
#define NUM_PT_SLOTS …
#define LEVEL0_PAGE_TABLE_ENCODE_SIZE …
#define MAX_NUM_PTE …
#define IDENTITY_OFFSET …

/*
 * Although MI_STORE_DATA_IMM's "length" field is 10-bits, 0x3FE is the largest
 * legal value accepted.  Since that instruction field is always stored in
 * (val-2) format, this translates to 0x400 dwords for the true maximum length
 * of the instruction.  Subtracting the instruction header (1 dword) and
 * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values.
 */
#define MAX_PTE_PER_SDI …

/**
 * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue.
 * @tile: The tile.
 *
 * Returns the default migrate exec queue of this tile.
 *
 * Return: The default migrate exec queue
 */
struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile)
{ … }

static void xe_migrate_fini(struct drm_device *dev, void *arg)
{ … }

static u64 xe_migrate_vm_addr(u64 slot, u32 level)
{ … }

static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr, bool is_comp_pte)
{ … }

static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo,
					u64 map_ofs, u64 vram_offset, u16 pat_index, u64 pt_2m_ofs)
{ … }

static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
				 struct xe_vm *vm)
{ … }

/*
 * Including the reserved copy engine is required to avoid deadlocks due to
 * migrate jobs servicing the faults gets stuck behind the job that faulted.
 */
static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
{ … }

static bool xe_migrate_needs_ccs_emit(struct xe_device *xe)
{ … }

/**
 * xe_migrate_init() - Initialize a migrate context
 * @tile: Back-pointer to the tile we're initializing for.
 *
 * Return: Pointer to a migrate context on success. Error pointer on error.
 */
struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
{ … }

static u64 max_mem_transfer_per_pass(struct xe_device *xe)
{ … }

static u64 xe_migrate_res_sizes(struct xe_migrate *m, struct xe_res_cursor *cur)
{ … }

static bool xe_migrate_allow_identity(u64 size, const struct xe_res_cursor *cur)
{ … }

#define PTE_UPDATE_FLAG_IS_VRAM …
#define PTE_UPDATE_FLAG_IS_COMP_PTE …

static u32 pte_update_size(struct xe_migrate *m,
			   u32 flags,
			   struct ttm_resource *res,
			   struct xe_res_cursor *cur,
			   u64 *L0, u64 *L0_ofs, u32 *L0_pt,
			   u32 cmd_size, u32 pt_ofs, u32 avail_pts)
{ … }

static void emit_pte(struct xe_migrate *m,
		     struct xe_bb *bb, u32 at_pt,
		     bool is_vram, bool is_comp_pte,
		     struct xe_res_cursor *cur,
		     u32 size, struct ttm_resource *res)
{ … }

#define EMIT_COPY_CCS_DW …
static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
			  u64 dst_ofs, bool dst_is_indirect,
			  u64 src_ofs, bool src_is_indirect,
			  u32 size)
{ … }

#define EMIT_COPY_DW …
static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
		      u64 src_ofs, u64 dst_ofs, unsigned int size,
		      unsigned int pitch)
{ … }

static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
{ … }

static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
			       struct xe_bb *bb,
			       u64 src_ofs, bool src_is_indirect,
			       u64 dst_ofs, bool dst_is_indirect, u32 dst_size,
			       u64 ccs_ofs, bool copy_ccs)
{ … }

/**
 * xe_migrate_copy() - Copy content of TTM resources.
 * @m: The migration context.
 * @src_bo: The buffer object @src is currently bound to.
 * @dst_bo: If copying between resources created for the same bo, set this to
 * the same value as @src_bo. If copying between buffer objects, set it to
 * the buffer object @dst is currently bound to.
 * @src: The source TTM resource.
 * @dst: The dst TTM resource.
 * @copy_only_ccs: If true copy only CCS metadata
 *
 * Copies the contents of @src to @dst: On flat CCS devices,
 * the CCS metadata is copied as well if needed, or if not present,
 * the CCS metadata of @dst is cleared for security reasons.
 *
 * Return: Pointer to a dma_fence representing the last copy batch, or
 * an error pointer on failure. If there is a failure, any copy operation
 * started by the function call has been synced.
 */
struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
				  struct xe_bo *src_bo,
				  struct xe_bo *dst_bo,
				  struct ttm_resource *src,
				  struct ttm_resource *dst,
				  bool copy_only_ccs)
{ … }

static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
				 u32 size, u32 pitch)
{ … }

static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
				 u64 src_ofs, u32 size, u32 pitch, bool is_vram)
{ … }

static bool has_service_copy_support(struct xe_gt *gt)
{ … }

static u32 emit_clear_cmd_len(struct xe_gt *gt)
{ … }

static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
		       u32 size, u32 pitch, bool is_vram)
{ … }

/**
 * xe_migrate_clear() - Copy content of TTM resources.
 * @m: The migration context.
 * @bo: The buffer object @dst is currently bound to.
 * @dst: The dst TTM resource to be cleared.
 * @clear_flags: flags to specify which data to clear: CCS, BO, or both.
 *
 * Clear the contents of @dst to zero when XE_MIGRATE_CLEAR_FLAG_BO_DATA is set.
 * On flat CCS devices, the CCS metadata is cleared to zero with XE_MIGRATE_CLEAR_FLAG_CCS_DATA.
 * Set XE_MIGRATE_CLEAR_FLAG_FULL to clear bo as well as CCS metadata.
 * TODO: Eliminate the @bo argument.
 *
 * Return: Pointer to a dma_fence representing the last clear batch, or
 * an error pointer on failure. If there is a failure, any clear operation
 * started by the function call has been synced.
 */
struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
				   struct xe_bo *bo,
				   struct ttm_resource *dst,
				   u32 clear_flags)
{ … }

static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
			  const struct xe_vm_pgtable_update_op *pt_op,
			  const struct xe_vm_pgtable_update *update,
			  struct xe_migrate_pt_update *pt_update)
{ … }

struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m)
{ … }

#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
struct migrate_test_params { … };

#define to_migrate_test_params(_priv) …
#endif

static struct dma_fence *
xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
			       struct xe_migrate_pt_update *pt_update)
{ … }

static struct dma_fence *
__xe_migrate_update_pgtables(struct xe_migrate *m,
			     struct xe_migrate_pt_update *pt_update,
			     struct xe_vm_pgtable_update_ops *pt_update_ops)
{ … }

/**
 * xe_migrate_update_pgtables() - Pipelined page-table update
 * @m: The migrate context.
 * @pt_update: PT update arguments
 *
 * Perform a pipelined page-table update. The update descriptors are typically
 * built under the same lock critical section as a call to this function. If
 * using the default engine for the updates, they will be performed in the
 * order they grab the job_mutex. If different engines are used, external
 * synchronization is needed for overlapping updates to maintain page-table
 * consistency. Note that the meaing of "overlapping" is that the updates
 * touch the same page-table, which might be a higher-level page-directory.
 * If no pipelining is needed, then updates may be performed by the cpu.
 *
 * Return: A dma_fence that, when signaled, indicates the update completion.
 */
struct dma_fence *
xe_migrate_update_pgtables(struct xe_migrate *m,
			   struct xe_migrate_pt_update *pt_update)

{ … }

/**
 * xe_migrate_wait() - Complete all operations using the xe_migrate context
 * @m: Migrate context to wait for.
 *
 * Waits until the GPU no longer uses the migrate context's default engine
 * or its page-table objects. FIXME: What about separate page-table update
 * engines?
 */
void xe_migrate_wait(struct xe_migrate *m)
{ … }

#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
#include "tests/xe_migrate.c"
#endif
linux/drivers/gpu/drm/xe/xe_migrate.c