xe_device.c | Explore in Territory

// SPDX-License-Identifier: MIT
/*
 * Copyright © 2021 Intel Corporation
 */

#include "xe_device.h"

#include <linux/delay.h>
#include <linux/units.h>

#include <drm/drm_aperture.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_client.h>
#include <drm/drm_gem_ttm_helper.h>
#include <drm/drm_ioctl.h>
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
#include <drm/xe_drm.h>

#include "display/xe_display.h"
#include "instructions/xe_gpu_commands.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_regs.h"
#include "xe_bo.h"
#include "xe_debugfs.h"
#include "xe_devcoredump.h"
#include "xe_dma_buf.h"
#include "xe_drm_client.h"
#include "xe_drv.h"
#include "xe_exec.h"
#include "xe_exec_queue.h"
#include "xe_force_wake.h"
#include "xe_ggtt.h"
#include "xe_gsc_proxy.h"
#include "xe_gt.h"
#include "xe_gt_mcr.h"
#include "xe_gt_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc.h"
#include "xe_hwmon.h"
#include "xe_irq.h"
#include "xe_memirq.h"
#include "xe_mmio.h"
#include "xe_module.h"
#include "xe_observation.h"
#include "xe_pat.h"
#include "xe_pcode.h"
#include "xe_pm.h"
#include "xe_query.h"
#include "xe_sriov.h"
#include "xe_tile.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_ttm_sys_mgr.h"
#include "xe_vm.h"
#include "xe_vram.h"
#include "xe_wait_user_fence.h"

static int xe_file_open(struct drm_device *dev, struct drm_file *file)
{ … }

static void xe_file_close(struct drm_device *dev, struct drm_file *file)
{ … }

static const struct drm_ioctl_desc xe_ioctls[] = …;

static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{ … }

#ifdef CONFIG_COMPAT
static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{ … }
#else
/* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */
#define xe_drm_compat_ioctl …
#endif

static const struct file_operations xe_driver_fops = …;

static struct drm_driver driver = …;

static void xe_device_destroy(struct drm_device *dev, void *dummy)
{ … }

struct xe_device *xe_device_create(struct pci_dev *pdev,
				   const struct pci_device_id *ent)
{ … }

/*
 * The driver-initiated FLR is the highest level of reset that we can trigger
 * from within the driver. It is different from the PCI FLR in that it doesn't
 * fully reset the SGUnit and doesn't modify the PCI config space and therefore
 * it doesn't require a re-enumeration of the PCI BARs. However, the
 * driver-initiated FLR does still cause a reset of both GT and display and a
 * memory wipe of local and stolen memory, so recovery would require a full HW
 * re-init and saving/restoring (or re-populating) the wiped memory. Since we
 * perform the FLR as the very last action before releasing access to the HW
 * during the driver release flow, we don't attempt recovery at all, because
 * if/when a new instance of i915 is bound to the device it will do a full
 * re-init anyway.
 */
static void xe_driver_flr(struct xe_device *xe)
{ … }

static void xe_driver_flr_fini(void *arg)
{ … }

static void xe_device_sanitize(void *arg)
{ … }

static int xe_set_dma_info(struct xe_device *xe)
{ … }

static bool verify_lmem_ready(struct xe_gt *gt)
{ … }

static int wait_for_lmem_ready(struct xe_device *xe)
{ … }

static void update_device_info(struct xe_device *xe)
{ … }

/**
 * xe_device_probe_early: Device early probe
 * @xe: xe device instance
 *
 * Initialize MMIO resources that don't require any
 * knowledge about tile count. Also initialize pcode and
 * check vram initialization on root tile.
 *
 * Return: 0 on success, error code on failure
 */
int xe_device_probe_early(struct xe_device *xe)
{ … }

static int xe_device_set_has_flat_ccs(struct  xe_device *xe)
{ … }

int xe_device_probe(struct xe_device *xe)
{ … }

static void xe_device_remove_display(struct xe_device *xe)
{ … }

void xe_device_remove(struct xe_device *xe)
{ … }

void xe_device_shutdown(struct xe_device *xe)
{ … }

void xe_device_wmb(struct xe_device *xe)
{ … }

/**
 * xe_device_td_flush() - Flush transient L3 cache entries
 * @xe: The device
 *
 * Display engine has direct access to memory and is never coherent with L3/L4
 * caches (or CPU caches), however KMD is responsible for specifically flushing
 * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
 * can happen from such a surface without seeing corruption.
 *
 * Display surfaces can be tagged as transient by mapping it using one of the
 * various L3:XD PAT index modes on Xe2.
 *
 * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
 * at the end of each submission via PIPE_CONTROL for compute/render, since SA
 * Media is not coherent with L3 and we want to support render-vs-media
 * usescases. For other engines like copy/blt the HW internally forces uncached
 * behaviour, hence why we can skip the TDF on such platforms.
 */
void xe_device_td_flush(struct xe_device *xe)
{ … }

u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
{ … }

/**
 * xe_device_assert_mem_access - Inspect the current runtime_pm state.
 * @xe: xe device instance
 *
 * To be used before any kind of memory access. It will splat a debug warning
 * if the device is currently sleeping. But it doesn't guarantee in any way
 * that the device is going to remain awake. Xe PM runtime get and put
 * functions might be added to the outer bound of the memory access, while
 * this check is intended for inner usage to splat some warning if the worst
 * case has just happened.
 */
void xe_device_assert_mem_access(struct xe_device *xe)
{ … }

void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
{ … }

u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address)
{ … }

u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
{ … }

static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
{ … }

/**
 * xe_device_declare_wedged - Declare device wedged
 * @xe: xe device instance
 *
 * This is a final state that can only be cleared with a mudule
 * re-probe (unbind + bind).
 * In this state every IOCTL will be blocked so the GT cannot be used.
 * In general it will be called upon any critical error such as gt reset
 * failure or guc loading failure.
 * If xe.wedged module parameter is set to 2, this function will be called
 * on every single execution timeout (a.k.a. GPU hang) right after devcoredump
 * snapshot capture. In this mode, GT reset won't be attempted so the state of
 * the issue is preserved for further debugging.
 */
void xe_device_declare_wedged(struct xe_device *xe)
{ … }
linux/drivers/gpu/drm/xe/xe_device.c