amdgpu_amdkfd_gfx_v9.c | Explore in Territory

/*
 * Copyright 2014-2018 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
#include "vega10_enum.h"
#include "sdma0/sdma0_4_0_offset.h"
#include "sdma0/sdma0_4_0_sh_mask.h"
#include "sdma1/sdma1_4_0_offset.h"
#include "sdma1/sdma1_4_0_sh_mask.h"
#include "athub/athub_1_0_offset.h"
#include "athub/athub_1_0_sh_mask.h"
#include "oss/osssys_4_0_offset.h"
#include "oss/osssys_4_0_sh_mask.h"
#include "soc15_common.h"
#include "v9_structs.h"
#include "soc15.h"
#include "soc15d.h"
#include "gfx_v9_0.h"
#include "amdgpu_amdkfd_gfx_v9.h"
#include <uapi/linux/kfd_ioctl.h>

enum hqd_dequeue_request_type { … };

static void kgd_gfx_v9_lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
			uint32_t queue, uint32_t vmid, uint32_t inst)
{ … }

static void kgd_gfx_v9_unlock_srbm(struct amdgpu_device *adev, uint32_t inst)
{ … }

void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
				uint32_t queue_id, uint32_t inst)
{ … }

uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
			       uint32_t pipe_id, uint32_t queue_id)
{ … }

void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst)
{ … }

void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
					uint32_t sh_mem_config,
					uint32_t sh_mem_ape1_base,
					uint32_t sh_mem_ape1_limit,
					uint32_t sh_mem_bases, uint32_t inst)
{ … }

int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
					unsigned int vmid, uint32_t inst)
{ … }

/* TODO - RING0 form of field is obsolete, seems to date back to SI
 * but still works
 */

int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
				uint32_t inst)
{ … }

static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
				unsigned int engine_id,
				unsigned int queue_id)
{ … }

static inline struct v9_mqd *get_mqd(void *mqd)
{ … }

static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
{ … }

int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
			uint32_t pipe_id, uint32_t queue_id,
			uint32_t __user *wptr, uint32_t wptr_shift,
			uint32_t wptr_mask, struct mm_struct *mm,
			uint32_t inst)
{ … }

int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
			    uint32_t pipe_id, uint32_t queue_id,
			    uint32_t doorbell_off, uint32_t inst)
{ … }

int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
			uint32_t pipe_id, uint32_t queue_id,
			uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{ … }

static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
			     uint32_t __user *wptr, struct mm_struct *mm)
{ … }

static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
			     uint32_t engine_id, uint32_t queue_id,
			     uint32_t (**dump)[2], uint32_t *n_regs)
{ … }

bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
				uint64_t queue_address, uint32_t pipe_id,
				uint32_t queue_id, uint32_t inst)
{ … }

static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
{ … }

int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
				enum kfd_preempt_type reset_type,
				unsigned int utimeout, uint32_t pipe_id,
				uint32_t queue_id, uint32_t inst)
{ … }

static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
				unsigned int utimeout)
{ … }

bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
					uint8_t vmid, uint16_t *p_pasid)
{ … }

int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
					uint32_t gfx_index_val,
					uint32_t sq_cmd, uint32_t inst)
{ … }

/*
 * GFX9 helper for wave launch stall requirements on debug trap setting.
 *
 * vmid:
 *   Target VMID to stall/unstall.
 *
 * stall:
 *   0-unstall wave launch (enable), 1-stall wave launch (disable).
 *   After wavefront launch has been stalled, allocated waves must drain from
 *   SPI in order for debug trap settings to take effect on those waves.
 *   This is roughly a ~96 clock cycle wait on SPI where a read on
 *   SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
 *   KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
 *
 *   NOTE: We can afford to clear the entire STALL_VMID field on unstall
 *   because GFX9.4.1 cannot support multi-process debugging due to trap
 *   configuration and masking being limited to global scope.  Always assume
 *   single process conditions.
 */
#define KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY …
void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
					uint32_t vmid,
					bool stall)
{ … }

/*
 * restore_dbg_registers is ignored here but is a general interface requirement
 * for devices that support GFXOFF and where the RLC save/restore list
 * does not support hw registers for debugging i.e. the driver has to manually
 * initialize the debug mode registers after it has disabled GFX off during the
 * debug session.
 */
uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
				bool restore_dbg_registers,
				uint32_t vmid)
{ … }

/*
 * keep_trap_enabled is ignored here but is a general interface requirement
 * for devices that support multi-process debugging where the performance
 * overhead from trap temporary setup needs to be bypassed when the debug
 * session has ended.
 */
uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
					bool keep_trap_enabled,
					uint32_t vmid)
{ … }

int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
					uint32_t trap_override,
					uint32_t *trap_mask_supported)
{ … }

uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
					     uint32_t vmid,
					     uint32_t trap_override,
					     uint32_t trap_mask_bits,
					     uint32_t trap_mask_request,
					     uint32_t *trap_mask_prev,
					     uint32_t kfd_dbg_cntl_prev)
{ … }

uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
					uint8_t wave_launch_mode,
					uint32_t vmid)
{ … }

#define TCP_WATCH_STRIDE …
uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
					uint64_t watch_address,
					uint32_t watch_address_mask,
					uint32_t watch_id,
					uint32_t watch_mode,
					uint32_t debug_vmid,
					uint32_t inst)
{ … }

uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
					uint32_t watch_id)
{ … }

/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
 * The values read are:
 *     ib_offload_wait_time     -- Wait Count for Indirect Buffer Offloads.
 *     atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
 *     wrm_offload_wait_time    -- Wait Count for WAIT_REG_MEM Offloads.
 *     gws_wait_time            -- Wait Count for Global Wave Syncs.
 *     que_sleep_wait_time      -- Wait Count for Dequeue Retry.
 *     sch_wave_wait_time       -- Wait Count for Scheduling Wave Message.
 *     sem_rearm_wait_time      -- Wait Count for Semaphore re-arm.
 *     deq_retry_wait_time      -- Wait Count for Global Wave Syncs.
 */
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
					uint32_t *wait_times,
					uint32_t inst)

{ … }

void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
			uint32_t vmid, uint64_t page_table_base)
{ … }

static void lock_spi_csq_mutexes(struct amdgpu_device *adev)
{ … }

static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
{ … }

/**
 * get_wave_count: Read device registers to get number of waves in flight for
 * a particular queue. The method also returns the VMID associated with the
 * queue.
 *
 * @adev: Handle of device whose registers are to be read
 * @queue_idx: Index of queue in the queue-map bit-field
 * @wave_cnt: Output parameter updated with number of waves in flight
 * @vmid: Output parameter updated with VMID of queue whose wave count
 *        is being collected
 * @inst: xcc's instance number on a multi-XCC setup
 */
static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
		int *wave_cnt, int *vmid, uint32_t inst)
{ … }

/**
 * kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
 * shader engine and aggregates the number of waves that are in flight for the
 * process whose pasid is provided as a parameter. The process could have ZERO
 * or more queues running and submitting waves to compute units.
 *
 * @adev: Handle of device from which to get number of waves in flight
 * @pasid: Identifies the process for which this query call is invoked
 * @pasid_wave_cnt: Output parameter updated with number of waves in flight that
 *                  belong to process with given pasid
 * @max_waves_per_cu: Output parameter updated with maximum number of waves
 *                    possible per Compute Unit
 * @inst: xcc's instance number on a multi-XCC setup
 *
 * Note: It's possible that the device has too many queues (oversubscription)
 * in which case a VMID could be remapped to a different PASID. This could lead
 * to an inaccurate wave count. Following is a high-level sequence:
 *    Time T1: vmid = getVmid(); vmid is associated with Pasid P1
 *    Time T2: passId = getPasId(vmid); vmid is associated with Pasid P2
 * In the sequence above wave count obtained from time T1 will be incorrectly
 * lost or added to total wave count.
 *
 * The registers that provide the waves in flight are:
 *
 *  SPI_CSQ_WF_ACTIVE_STATUS - bit-map of queues per pipe. The bit is ON if a
 *  queue is slotted, OFF if there is no queue. A process could have ZERO or
 *  more queues slotted and submitting waves to be run on compute units. Even
 *  when there is a queue it is possible there could be zero wave fronts, this
 *  can happen when queue is waiting on top-of-pipe events - e.g. waitRegMem
 *  command
 *
 *  For each bit that is ON from above:
 *
 *    Read (SPI_CSQ_WF_ACTIVE_COUNT_0 + queue_idx) register. It provides the
 *    number of waves that are in flight for the queue at specified index. The
 *    index ranges from 0 to 7.
 *
 *    If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID
 *    of the wave(s).
 *
 *    Determine if VMID from above step maps to pasid provided as parameter. If
 *    it matches agrregate the wave count. That the VMID will not match pasid is
 *    a normal condition i.e. a device is expected to support multiple queues
 *    from multiple proceses.
 *
 *  Reading registers referenced above involves programming GRBM appropriately
 */
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
		int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst)
{ … }

void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
		uint32_t wait_times,
		uint32_t grace_period,
		uint32_t *reg_offset,
		uint32_t *reg_data)
{ … }

void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
		uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst)
{ … }

const struct kfd2kgd_calls gfx_v9_kfd2kgd = …;
linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c