/* * Copyright 2008 Advanced Micro Devices, Inc. * Copyright 2008 Red Hat Inc. * Copyright 2009 Jerome Glisse. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: Dave Airlie * Alex Deucher * Jerome Glisse */ #include <linux/power_supply.h> #include <linux/kthread.h> #include <linux/module.h> #include <linux/console.h> #include <linux/slab.h> #include <linux/iommu.h> #include <linux/pci.h> #include <linux/pci-p2pdma.h> #include <linux/apple-gmux.h> #include <drm/drm_aperture.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_fb_helper.h> #include <drm/drm_probe_helper.h> #include <drm/amdgpu_drm.h> #include <linux/device.h> #include <linux/vgaarb.h> #include <linux/vga_switcheroo.h> #include <linux/efi.h> #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_i2c.h" #include "atom.h" #include "amdgpu_atombios.h" #include "amdgpu_atomfirmware.h" #include "amd_pcie.h" #ifdef CONFIG_DRM_AMDGPU_SI #include "si.h" #endif #ifdef CONFIG_DRM_AMDGPU_CIK #include "cik.h" #endif #include "vi.h" #include "soc15.h" #include "nv.h" #include "bif/bif_4_1_d.h" #include <linux/firmware.h> #include "amdgpu_vf_error.h" #include "amdgpu_amdkfd.h" #include "amdgpu_pm.h" #include "amdgpu_xgmi.h" #include "amdgpu_ras.h" #include "amdgpu_pmu.h" #include "amdgpu_fru_eeprom.h" #include "amdgpu_reset.h" #include "amdgpu_virt.h" #include "amdgpu_dev_coredump.h" #include <linux/suspend.h> #include <drm/task_barrier.h> #include <linux/pm_runtime.h> #include <drm/drm_drv.h> #if IS_ENABLED(CONFIG_X86) #include <asm/intel-family.h> #endif MODULE_FIRMWARE(…) …; MODULE_FIRMWARE(…) …; MODULE_FIRMWARE(…) …; MODULE_FIRMWARE(…) …; MODULE_FIRMWARE(…) …; MODULE_FIRMWARE(…) …; MODULE_FIRMWARE(…) …; #define AMDGPU_RESUME_MS … #define AMDGPU_MAX_RETRY_LIMIT … #define AMDGPU_RETRY_SRIOV_RESET(r) … #define AMDGPU_PCIE_INDEX_FALLBACK … #define AMDGPU_PCIE_INDEX_HI_FALLBACK … #define AMDGPU_PCIE_DATA_FALLBACK … static const struct drm_driver amdgpu_kms_driver; const char *amdgpu_asic_name[] = …; static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); /** * DOC: pcie_replay_count * * The amdgpu driver provides a sysfs API for reporting the total number * of PCIe replays (NAKs) * The file pcie_replay_count is used for this and returns the total * number of replays as a sum of the NAKs generated and NAKs received */ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, struct device_attribute *attr, char *buf) { … } static DEVICE_ATTR(pcie_replay_count, 0444, amdgpu_device_get_pcie_replay_count, NULL); static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t ppos, size_t count) { … } BIN_ATTR(…); int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev) { … } void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev) { … } /** * DOC: board_info * * The amdgpu driver provides a sysfs API for giving board related information. * It provides the form factor information in the format * * type : form factor * * Possible form factor values * * - "cem" - PCIE CEM card * - "oam" - Open Compute Accelerator Module * - "unknown" - Not known * */ static ssize_t amdgpu_device_get_board_info(struct device *dev, struct device_attribute *attr, char *buf) { … } static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL); static struct attribute *amdgpu_board_attrs[] = …; static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj, struct attribute *attr, int n) { … } static const struct attribute_group amdgpu_board_attrs_group = …; static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); /** * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control * * @dev: drm_device pointer * * Returns true if the device is a dGPU with ATPX power control, * otherwise return false. */ bool amdgpu_device_supports_px(struct drm_device *dev) { … } /** * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources * * @dev: drm_device pointer * * Returns true if the device is a dGPU with ACPI power control, * otherwise return false. */ bool amdgpu_device_supports_boco(struct drm_device *dev) { … } /** * amdgpu_device_supports_baco - Does the device support BACO * * @dev: drm_device pointer * * Return: * 1 if the device supporte BACO; * 3 if the device support MACO (only works if BACO is supported) * otherwise return 0. */ int amdgpu_device_supports_baco(struct drm_device *dev) { … } void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) { … } /** * amdgpu_device_supports_smart_shift - Is the device dGPU with * smart shift support * * @dev: drm_device pointer * * Returns true if the device is a dGPU with Smart Shift support, * otherwise returns false. */ bool amdgpu_device_supports_smart_shift(struct drm_device *dev) { … } /* * VRAM access helper functions */ /** * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA * * @adev: amdgpu_device pointer * @pos: offset of the buffer in vram * @buf: virtual address of the buffer in system memory * @size: read/write size, sizeof(@buf) must > @size * @write: true - write to vram, otherwise - read from vram */ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, void *buf, size_t size, bool write) { … } /** * amdgpu_device_aper_access - access vram by vram aperature * * @adev: amdgpu_device pointer * @pos: offset of the buffer in vram * @buf: virtual address of the buffer in system memory * @size: read/write size, sizeof(@buf) must > @size * @write: true - write to vram, otherwise - read from vram * * The return value means how many bytes have been transferred. */ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, void *buf, size_t size, bool write) { … } /** * amdgpu_device_vram_access - read/write a buffer in vram * * @adev: amdgpu_device pointer * @pos: offset of the buffer in vram * @buf: virtual address of the buffer in system memory * @size: read/write size, sizeof(@buf) must > @size * @write: true - write to vram, otherwise - read from vram */ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, void *buf, size_t size, bool write) { … } /* * register access helper functions. */ /* Check if hw access should be skipped because of hotplug or device error */ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) { … } /** * amdgpu_device_rreg - read a memory mapped IO or indirect register * * @adev: amdgpu_device pointer * @reg: dword aligned register offset * @acc_flags: access flags which require special behavior * * Returns the 32 bit value from the offset specified. */ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags) { … } /* * MMIO register read with bytes helper functions * @offset:bytes offset from MMIO start */ /** * amdgpu_mm_rreg8 - read a memory mapped IO register * * @adev: amdgpu_device pointer * @offset: byte aligned register offset * * Returns the 8 bit value from the offset specified. */ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { … } /** * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC * * @adev: amdgpu_device pointer * @reg: dword aligned register offset * @acc_flags: access flags which require special behavior * @xcc_id: xcc accelerated compute core id * * Returns the 32 bit value from the offset specified. */ uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags, uint32_t xcc_id) { … } /* * MMIO register write with bytes helper functions * @offset:bytes offset from MMIO start * @value: the value want to be written to the register */ /** * amdgpu_mm_wreg8 - read a memory mapped IO register * * @adev: amdgpu_device pointer * @offset: byte aligned register offset * @value: 8 bit value to write * * Writes the value specified to the offset specified. */ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { … } /** * amdgpu_device_wreg - write to a memory mapped IO or indirect register * * @adev: amdgpu_device pointer * @reg: dword aligned register offset * @v: 32 bit value to write to the register * @acc_flags: access flags which require special behavior * * Writes the value specified to the offset specified. */ void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags) { … } /** * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range * * @adev: amdgpu_device pointer * @reg: mmio/rlc register * @v: value to write * @xcc_id: xcc accelerated compute core id * * this function is invoked only for the debugfs register access */ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id) { … } /** * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC * * @adev: amdgpu_device pointer * @reg: dword aligned register offset * @v: 32 bit value to write to the register * @acc_flags: access flags which require special behavior * @xcc_id: xcc accelerated compute core id * * Writes the value specified to the offset specified. */ void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags, uint32_t xcc_id) { … } /** * amdgpu_device_indirect_rreg - read an indirect register * * @adev: amdgpu_device pointer * @reg_addr: indirect register address to read from * * Returns the value of indirect register @reg_addr */ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, u32 reg_addr) { … } u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, u64 reg_addr) { … } /** * amdgpu_device_indirect_rreg64 - read a 64bits indirect register * * @adev: amdgpu_device pointer * @reg_addr: indirect register address to read from * * Returns the value of indirect register @reg_addr */ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, u32 reg_addr) { … } u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev, u64 reg_addr) { … } /** * amdgpu_device_indirect_wreg - write an indirect register address * * @adev: amdgpu_device pointer * @reg_addr: indirect register offset * @reg_data: indirect register data * */ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, u32 reg_addr, u32 reg_data) { … } void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, u64 reg_addr, u32 reg_data) { … } /** * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address * * @adev: amdgpu_device pointer * @reg_addr: indirect register offset * @reg_data: indirect register data * */ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, u32 reg_addr, u64 reg_data) { … } void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev, u64 reg_addr, u64 reg_data) { … } /** * amdgpu_device_get_rev_id - query device rev_id * * @adev: amdgpu_device pointer * * Return device rev_id */ u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev) { … } /** * amdgpu_invalid_rreg - dummy reg read function * * @adev: amdgpu_device pointer * @reg: offset of register * * Dummy register read function. Used for register blocks * that certain asics don't have (all asics). * Returns the value in the register. */ static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) { … } static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg) { … } /** * amdgpu_invalid_wreg - dummy reg write function * * @adev: amdgpu_device pointer * @reg: offset of register * @v: value to write to the register * * Dummy register read function. Used for register blocks * that certain asics don't have (all asics). */ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { … } static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v) { … } /** * amdgpu_invalid_rreg64 - dummy 64 bit reg read function * * @adev: amdgpu_device pointer * @reg: offset of register * * Dummy register read function. Used for register blocks * that certain asics don't have (all asics). * Returns the value in the register. */ static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) { … } static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg) { … } /** * amdgpu_invalid_wreg64 - dummy reg write function * * @adev: amdgpu_device pointer * @reg: offset of register * @v: value to write to the register * * Dummy register read function. Used for register blocks * that certain asics don't have (all asics). */ static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) { … } static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v) { … } /** * amdgpu_block_invalid_rreg - dummy reg read function * * @adev: amdgpu_device pointer * @block: offset of instance * @reg: offset of register * * Dummy register read function. Used for register blocks * that certain asics don't have (all asics). * Returns the value in the register. */ static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, uint32_t block, uint32_t reg) { … } /** * amdgpu_block_invalid_wreg - dummy reg write function * * @adev: amdgpu_device pointer * @block: offset of instance * @reg: offset of register * @v: value to write to the register * * Dummy register read function. Used for register blocks * that certain asics don't have (all asics). */ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, uint32_t block, uint32_t reg, uint32_t v) { … } /** * amdgpu_device_asic_init - Wrapper for atom asic_init * * @adev: amdgpu_device pointer * * Does any asic specific work and then calls atom asic init. */ static int amdgpu_device_asic_init(struct amdgpu_device *adev) { … } /** * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page * * @adev: amdgpu_device pointer * * Allocates a scratch page of VRAM for use by various things in the * driver. */ static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev) { … } /** * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page * * @adev: amdgpu_device pointer * * Frees the VRAM scratch page. */ static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev) { … } /** * amdgpu_device_program_register_sequence - program an array of registers. * * @adev: amdgpu_device pointer * @registers: pointer to the register array * @array_size: size of the register array * * Programs an array or registers with and or masks. * This is a helper for setting golden registers. */ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 *registers, const u32 array_size) { … } /** * amdgpu_device_pci_config_reset - reset the GPU * * @adev: amdgpu_device pointer * * Resets the GPU using the pci config reset sequence. * Only applicable to asics prior to vega10. */ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) { … } /** * amdgpu_device_pci_reset - reset the GPU using generic PCI means * * @adev: amdgpu_device pointer * * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.). */ int amdgpu_device_pci_reset(struct amdgpu_device *adev) { … } /* * amdgpu_device_wb_*() * Writeback is the method by which the GPU updates special pages in memory * with the status of certain GPU events (fences, ring pointers,etc.). */ /** * amdgpu_device_wb_fini - Disable Writeback and free memory * * @adev: amdgpu_device pointer * * Disables Writeback and frees the Writeback memory (all asics). * Used at driver shutdown. */ static void amdgpu_device_wb_fini(struct amdgpu_device *adev) { … } /** * amdgpu_device_wb_init - Init Writeback driver info and allocate memory * * @adev: amdgpu_device pointer * * Initializes writeback and allocates writeback memory (all asics). * Used at driver startup. * Returns 0 on success or an -error on failure. */ static int amdgpu_device_wb_init(struct amdgpu_device *adev) { … } /** * amdgpu_device_wb_get - Allocate a wb entry * * @adev: amdgpu_device pointer * @wb: wb index * * Allocate a wb slot for use by the driver (all asics). * Returns 0 on success or -EINVAL on failure. */ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) { … } /** * amdgpu_device_wb_free - Free a wb entry * * @adev: amdgpu_device pointer * @wb: wb index * * Free a wb slot allocated for use by the driver (all asics) */ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) { … } /** * amdgpu_device_resize_fb_bar - try to resize FB BAR * * @adev: amdgpu_device pointer * * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not * to fail, but if any of the BARs is not accessible after the size we abort * driver loading by returning -ENODEV. */ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) { … } static bool amdgpu_device_read_bios(struct amdgpu_device *adev) { … } /* * GPU helpers function. */ /** * amdgpu_device_need_post - check if the hw need post or not * * @adev: amdgpu_device pointer * * Check if the asic has been initialized (all asics) at driver startup * or post is needed if hw reset is performed. * Returns true if need or false if not. */ bool amdgpu_device_need_post(struct amdgpu_device *adev) { … } /* * Check whether seamless boot is supported. * * So far we only support seamless boot on DCE 3.0 or later. * If users report that it works on older ASICS as well, we may * loosen this. */ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev) { … } /* * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids * don't support dynamic speed switching. Until we have confirmation from Intel * that a specific host supports it, it's safer that we keep it disabled for all. * * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/ * https://gitlab.freedesktop.org/drm/amd/-/issues/2663 */ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev) { … } /** * amdgpu_device_should_use_aspm - check if the device should program ASPM * * @adev: amdgpu_device pointer * * Confirm whether the module parameter and pcie bridge agree that ASPM should * be set for this device. * * Returns true if it should be used or false if not. */ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) { … } /* if we get transitioned to only one device, take VGA back */ /** * amdgpu_device_vga_set_decode - enable/disable vga decode * * @pdev: PCI device pointer * @state: enable/disable vga decode * * Enable/disable vga decode (all asics). * Returns VGA resource flags. */ static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev, bool state) { … } /** * amdgpu_device_check_block_size - validate the vm block size * * @adev: amdgpu_device pointer * * Validates the vm block size specified via module parameter. * The vm block size defines number of bits in page table versus page directory, * a page is 4KB so we have 12 bits offset, minimum 9 bits in the * page table and the remaining bits are in the page directory. */ static void amdgpu_device_check_block_size(struct amdgpu_device *adev) { … } /** * amdgpu_device_check_vm_size - validate the vm size * * @adev: amdgpu_device pointer * * Validates the vm size in GB specified via module parameter. * The VM size is the size of the GPU virtual memory space in GB. */ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) { … } static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) { … } static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) { … } /** * amdgpu_device_check_arguments - validate module params * * @adev: amdgpu_device pointer * * Validates certain module parameters and updates * the associated values used by the driver (all asics). */ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) { … } /** * amdgpu_switcheroo_set_state - set switcheroo state * * @pdev: pci dev pointer * @state: vga_switcheroo state * * Callback for the switcheroo driver. Suspends or resumes * the asics before or after it is powered up using ACPI methods. */ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) { … } /** * amdgpu_switcheroo_can_switch - see if switcheroo state can change * * @pdev: pci dev pointer * * Callback for the switcheroo driver. Check of the switcheroo * state can be changed. * Returns true if the state can be changed, false if not. */ static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) { … } static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = …; /** * amdgpu_device_ip_set_clockgating_state - set the CG state * * @dev: amdgpu_device pointer * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) * @state: clockgating state (gate or ungate) * * Sets the requested clockgating state for all instances of * the hardware IP specified. * Returns the error code from the last instance. */ int amdgpu_device_ip_set_clockgating_state(void *dev, enum amd_ip_block_type block_type, enum amd_clockgating_state state) { … } /** * amdgpu_device_ip_set_powergating_state - set the PG state * * @dev: amdgpu_device pointer * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) * @state: powergating state (gate or ungate) * * Sets the requested powergating state for all instances of * the hardware IP specified. * Returns the error code from the last instance. */ int amdgpu_device_ip_set_powergating_state(void *dev, enum amd_ip_block_type block_type, enum amd_powergating_state state) { … } /** * amdgpu_device_ip_get_clockgating_state - get the CG state * * @adev: amdgpu_device pointer * @flags: clockgating feature flags * * Walks the list of IPs on the device and updates the clockgating * flags for each IP. * Updates @flags with the feature flags for each hardware IP where * clockgating is enabled. */ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, u64 *flags) { … } /** * amdgpu_device_ip_wait_for_idle - wait for idle * * @adev: amdgpu_device pointer * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) * * Waits for the request hardware IP to be idle. * Returns 0 for success or a negative error code on failure. */ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, enum amd_ip_block_type block_type) { … } /** * amdgpu_device_ip_is_idle - is the hardware IP idle * * @adev: amdgpu_device pointer * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) * * Check if the hardware IP is idle or not. * Returns true if it the IP is idle, false if not. */ bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, enum amd_ip_block_type block_type) { … } /** * amdgpu_device_ip_get_ip_block - get a hw IP pointer * * @adev: amdgpu_device pointer * @type: Type of hardware IP (SMU, GFX, UVD, etc.) * * Returns a pointer to the hardware IP block structure * if it exists for the asic, otherwise NULL. */ struct amdgpu_ip_block * amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, enum amd_ip_block_type type) { … } /** * amdgpu_device_ip_block_version_cmp * * @adev: amdgpu_device pointer * @type: enum amd_ip_block_type * @major: major version * @minor: minor version * * return 0 if equal or greater * return 1 if smaller or the ip_block doesn't exist */ int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, enum amd_ip_block_type type, u32 major, u32 minor) { … } /** * amdgpu_device_ip_block_add * * @adev: amdgpu_device pointer * @ip_block_version: pointer to the IP to add * * Adds the IP block driver information to the collection of IPs * on the asic. */ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, const struct amdgpu_ip_block_version *ip_block_version) { … } /** * amdgpu_device_enable_virtual_display - enable virtual display feature * * @adev: amdgpu_device pointer * * Enabled the virtual display feature if the user has enabled it via * the module parameter virtual_display. This feature provides a virtual * display hardware on headless boards or in virtualized environments. * This function parses and validates the configuration string specified by * the user and configues the virtual display configuration (number of * virtual connectors, crtcs, etc.) specified. */ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) { … } void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) { … } /** * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware * * @adev: amdgpu_device pointer * * Parses the asic configuration parameters specified in the gpu info * firmware and makes them availale to the driver for use in configuring * the asic. * Returns 0 on success, -EINVAL on failure. */ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) { … } /** * amdgpu_device_ip_early_init - run early init for hardware IPs * * @adev: amdgpu_device pointer * * Early initialization pass for hardware IPs. The hardware IPs that make * up each asic are discovered each IP's early_init callback is run. This * is the first stage in initializing the asic. * Returns 0 on success, negative error code on failure. */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { … } static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) { … } static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) { … } static int amdgpu_device_fw_loading(struct amdgpu_device *adev) { … } static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) { … } /** * amdgpu_device_ip_init - run init for hardware IPs * * @adev: amdgpu_device pointer * * Main initialization pass for hardware IPs. The list of all the hardware * IPs that make up the asic is walked and the sw_init and hw_init callbacks * are run. sw_init initializes the software state associated with each IP * and hw_init initializes the hardware associated with each IP. * Returns 0 on success, negative error code on failure. */ static int amdgpu_device_ip_init(struct amdgpu_device *adev) { … } /** * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer * * @adev: amdgpu_device pointer * * Writes a reset magic value to the gart pointer in VRAM. The driver calls * this function before a GPU reset. If the value is retained after a * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. */ static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) { … } /** * amdgpu_device_check_vram_lost - check if vram is valid * * @adev: amdgpu_device pointer * * Checks the reset magic value written to the gart pointer in VRAM. * The driver calls this after a GPU reset to see if the contents of * VRAM is lost or now. * returns true if vram is lost, false if not. */ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) { … } /** * amdgpu_device_set_cg_state - set clockgating for amdgpu device * * @adev: amdgpu_device pointer * @state: clockgating state (gate or ungate) * * The list of all the hardware IPs that make up the asic is walked and the * set_clockgating_state callbacks are run. * Late initialization pass enabling clockgating for hardware IPs. * Fini or suspend, pass disabling clockgating for hardware IPs. * Returns 0 on success, negative error code on failure. */ int amdgpu_device_set_cg_state(struct amdgpu_device *adev, enum amd_clockgating_state state) { … } int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state) { … } static int amdgpu_device_enable_mgpu_fan_boost(void) { … } /** * amdgpu_device_ip_late_init - run late init for hardware IPs * * @adev: amdgpu_device pointer * * Late initialization pass for hardware IPs. The list of all the hardware * IPs that make up the asic is walked and the late_init callbacks are run. * late_init covers any special initialization that an IP requires * after all of the have been initialized or something that needs to happen * late in the init process. * Returns 0 on success, negative error code on failure. */ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) { … } /** * amdgpu_device_smu_fini_early - smu hw_fini wrapper * * @adev: amdgpu_device pointer * * For ASICs need to disable SMC first */ static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev) { … } static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) { … } /** * amdgpu_device_ip_fini - run fini for hardware IPs * * @adev: amdgpu_device pointer * * Main teardown pass for hardware IPs. The list of all the hardware * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks * are run. hw_fini tears down the hardware associated with each IP * and sw_fini tears down any software state associated with each IP. * Returns 0 on success, negative error code on failure. */ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { … } /** * amdgpu_device_delayed_init_work_handler - work handler for IB tests * * @work: work_struct. */ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) { … } static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) { … } /** * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) * * @adev: amdgpu_device pointer * * Main suspend function for hardware IPs. The list of all the hardware * IPs that make up the asic is walked, clockgating is disabled and the * suspend callbacks are run. suspend puts the hardware and software state * in each IP into a state suitable for suspend. * Returns 0 on success, negative error code on failure. */ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) { … } /** * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) * * @adev: amdgpu_device pointer * * Main suspend function for hardware IPs. The list of all the hardware * IPs that make up the asic is walked, clockgating is disabled and the * suspend callbacks are run. suspend puts the hardware and software state * in each IP into a state suitable for suspend. * Returns 0 on success, negative error code on failure. */ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) { … } /** * amdgpu_device_ip_suspend - run suspend for hardware IPs * * @adev: amdgpu_device pointer * * Main suspend function for hardware IPs. The list of all the hardware * IPs that make up the asic is walked, clockgating is disabled and the * suspend callbacks are run. suspend puts the hardware and software state * in each IP into a state suitable for suspend. * Returns 0 on success, negative error code on failure. */ int amdgpu_device_ip_suspend(struct amdgpu_device *adev) { … } static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) { … } static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) { … } /** * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs * * @adev: amdgpu_device pointer * * First resume function for hardware IPs. The list of all the hardware * IPs that make up the asic is walked and the resume callbacks are run for * COMMON, GMC, and IH. resume puts the hardware into a functional state * after a suspend and updates the software state as necessary. This * function is also used for restoring the GPU after a GPU reset. * Returns 0 on success, negative error code on failure. */ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) { … } /** * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs * * @adev: amdgpu_device pointer * * First resume function for hardware IPs. The list of all the hardware * IPs that make up the asic is walked and the resume callbacks are run for * all blocks except COMMON, GMC, and IH. resume puts the hardware into a * functional state after a suspend and updates the software state as * necessary. This function is also used for restoring the GPU after a GPU * reset. * Returns 0 on success, negative error code on failure. */ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) { … } /** * amdgpu_device_ip_resume - run resume for hardware IPs * * @adev: amdgpu_device pointer * * Main resume function for hardware IPs. The hardware IPs * are split into two resume functions because they are * also used in recovering from a GPU reset and some additional * steps need to be take between them. In this case (S3/S4) they are * run sequentially. * Returns 0 on success, negative error code on failure. */ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) { … } /** * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV * * @adev: amdgpu_device pointer * * Query the VBIOS data tables to determine if the board supports SR-IOV. */ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) { … } /** * amdgpu_device_asic_has_dc_support - determine if DC supports the asic * * @asic_type: AMD asic type * * Check if there is DC (new modesetting infrastructre) support for an asic. * returns true if DC has support, false if not. */ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) { … } /** * amdgpu_device_has_dc_support - check if dc is supported * * @adev: amdgpu_device pointer * * Returns true for supported, false for not supported */ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) { … } static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) { … } static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) { … } /** * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU * * @adev: amdgpu_device pointer * * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode */ static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) { … } static const struct attribute *amdgpu_dev_attributes[] = …; static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) { … } /** * amdgpu_device_init - initialize the driver * * @adev: amdgpu_device pointer * @flags: driver flags * * Initializes the driver info and hw (all asics). * Returns 0 for success or an error on failure. * Called at driver startup. */ int amdgpu_device_init(struct amdgpu_device *adev, uint32_t flags) { … } static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) { … } /** * amdgpu_device_fini_hw - tear down the driver * * @adev: amdgpu_device pointer * * Tear down the driver info (all asics). * Called at driver shutdown. */ void amdgpu_device_fini_hw(struct amdgpu_device *adev) { … } void amdgpu_device_fini_sw(struct amdgpu_device *adev) { … } /** * amdgpu_device_evict_resources - evict device resources * @adev: amdgpu device object * * Evicts all ttm device resources(vram BOs, gart table) from the lru list * of the vram memory type. Mainly used for evicting device resources * at suspend time. * */ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) { … } /* * Suspend & resume. */ /** * amdgpu_device_prepare - prepare for device suspend * * @dev: drm dev pointer * * Prepare to put the hw in the suspend state (all asics). * Returns 0 for success or an error on failure. * Called at driver suspend. */ int amdgpu_device_prepare(struct drm_device *dev) { … } /** * amdgpu_device_suspend - initiate device suspend * * @dev: drm dev pointer * @fbcon : notify the fbdev of suspend * * Puts the hw in the suspend state (all asics). * Returns 0 for success or an error on failure. * Called at driver suspend. */ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) { … } /** * amdgpu_device_resume - initiate device resume * * @dev: drm dev pointer * @fbcon : notify the fbdev of resume * * Bring the hw back to operating state (all asics). * Returns 0 for success or an error on failure. * Called at driver resume. */ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) { … } /** * amdgpu_device_ip_check_soft_reset - did soft reset succeed * * @adev: amdgpu_device pointer * * The list of all the hardware IPs that make up the asic is walked and * the check_soft_reset callbacks are run. check_soft_reset determines * if the asic is still hung or not. * Returns true if any of the IPs are still in a hung state, false if not. */ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) { … } /** * amdgpu_device_ip_pre_soft_reset - prepare for soft reset * * @adev: amdgpu_device pointer * * The list of all the hardware IPs that make up the asic is walked and the * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset * handles any IP specific hardware or software state changes that are * necessary for a soft reset to succeed. * Returns 0 on success, negative error code on failure. */ static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) { … } /** * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed * * @adev: amdgpu_device pointer * * Some hardware IPs cannot be soft reset. If they are hung, a full gpu * reset is necessary to recover. * Returns true if a full asic reset is required, false if not. */ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) { … } /** * amdgpu_device_ip_soft_reset - do a soft reset * * @adev: amdgpu_device pointer * * The list of all the hardware IPs that make up the asic is walked and the * soft_reset callbacks are run if the block is hung. soft_reset handles any * IP specific hardware or software state changes that are necessary to soft * reset the IP. * Returns 0 on success, negative error code on failure. */ static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) { … } /** * amdgpu_device_ip_post_soft_reset - clean up from soft reset * * @adev: amdgpu_device pointer * * The list of all the hardware IPs that make up the asic is walked and the * post_soft_reset callbacks are run if the asic was hung. post_soft_reset * handles any IP specific hardware or software state changes that are * necessary after the IP has been soft reset. * Returns 0 on success, negative error code on failure. */ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) { … } /** * amdgpu_device_recover_vram - Recover some VRAM contents * * @adev: amdgpu_device pointer * * Restores the contents of VRAM buffers from the shadows in GTT. Used to * restore things like GPUVM page tables after a GPU reset where * the contents of VRAM might be lost. * * Returns: * 0 on success, negative error code on failure. */ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) { … } /** * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * * @adev: amdgpu_device pointer * @reset_context: amdgpu reset context pointer * * do VF FLR and reinitialize Asic * return 0 means succeeded otherwise failed */ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { … } /** * amdgpu_device_has_job_running - check if there is any job in mirror list * * @adev: amdgpu_device pointer * * check if there is any job in mirror list */ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) { … } /** * amdgpu_device_should_recover_gpu - check if we should try GPU recovery * * @adev: amdgpu_device pointer * * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover * a hung GPU. */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) { … } int amdgpu_device_mode1_reset(struct amdgpu_device *adev) { … } int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { … } static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) { … } int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) { … } static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev) { … } static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev) { … } static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) { … } static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) { … } static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) { … } static int amdgpu_device_health_check(struct list_head *device_list_handle) { … } /** * amdgpu_device_gpu_recover - reset the asic and recover scheduler * * @adev: amdgpu_device pointer * @job: which job trigger hang * @reset_context: amdgpu reset context pointer * * Attempt to reset the GPU if it has hung (all asics). * Attempt to do soft-reset or full-reset and reinitialize Asic * Returns 0 for success or an error on failure. */ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job, struct amdgpu_reset_context *reset_context) { … } /** * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner * * @adev: amdgpu_device pointer * @speed: pointer to the speed of the link * @width: pointer to the width of the link * * Evaluate the hierarchy to find the speed and bandwidth capabilities of the * first physical partner to an AMD dGPU. * This will exclude any virtual switches and links. */ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, enum pci_bus_speed *speed, enum pcie_link_width *width) { … } /** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * * @adev: amdgpu_device pointer * * Fetchs and stores in the driver the PCIE capabilities (gen speed * and lanes) of the slot the device is in. Handles APUs and * virtualized environments where PCIE config space may not be available. */ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) { … } /** * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR * * @adev: amdgpu_device pointer * @peer_adev: amdgpu_device pointer for peer device trying to access @adev * * Return true if @peer_adev can access (DMA) @adev through the PCIe * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of * @peer_adev. */ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev) { … } int amdgpu_device_baco_enter(struct drm_device *dev) { … } int amdgpu_device_baco_exit(struct drm_device *dev) { … } /** * amdgpu_pci_error_detected - Called when a PCI error is detected. * @pdev: PCI device struct * @state: PCI channel state * * Description: Called when a PCI error is detected. * * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT. */ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { … } /** * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers * @pdev: pointer to PCI device */ pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev) { … } /** * amdgpu_pci_slot_reset - Called when PCI slot has been reset. * @pdev: PCI device struct * * Description: This routine is called by the pci error recovery * code after the PCI slot has been reset, just before we * should resume normal operations. */ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) { … } /** * amdgpu_pci_resume() - resume normal ops after PCI reset * @pdev: pointer to PCI device * * Called when the error recovery driver tells us that its * OK to resume normal operation. */ void amdgpu_pci_resume(struct pci_dev *pdev) { … } bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) { … } bool amdgpu_device_load_pci_state(struct pci_dev *pdev) { … } void amdgpu_device_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { … } void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { … } int amdgpu_in_reset(struct amdgpu_device *adev) { … } /** * amdgpu_device_halt() - bring hardware to some kind of halt state * * @adev: amdgpu_device pointer * * Bring hardware to some kind of halt state so that no one can touch it * any more. It will help to maintain error context when error occurred. * Compare to a simple hang, the system will keep stable at least for SSH * access. Then it should be trivial to inspect the hardware state and * see what's going on. Implemented as following: * * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc), * clears all CPU mappings to device, disallows remappings through page faults * 2. amdgpu_irq_disable_all() disables all interrupts * 3. amdgpu_fence_driver_hw_fini() signals all HW fences * 4. set adev->no_hw_access to avoid potential crashes after setp 5 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings * 6. pci_disable_device() and pci_wait_for_pending_transaction() * flush any in flight DMA operations */ void amdgpu_device_halt(struct amdgpu_device *adev) { … } u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, u32 reg) { … } void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { … } /** * amdgpu_device_get_gang - return a reference to the current gang * @adev: amdgpu_device pointer * * Returns: A new reference to the current gang leader. */ struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev) { … } /** * amdgpu_device_switch_gang - switch to a new gang * @adev: amdgpu_device pointer * @gang: the gang to switch to * * Try to switch to a new gang. * Returns: NULL if we switched to the new gang or a reference to the current * gang leader. */ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, struct dma_fence *gang) { … } bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) { … } uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, uint32_t inst, uint32_t reg_addr, char reg_name[], uint32_t expected_value, uint32_t mask) { … }