linux/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h

/* SPDX-License-Identifier: MIT */
/*
 * Copyright © 2021-2022 Intel Corporation
 */

#ifndef _INTEL_GUC_CAPTURE_FWIF_H
#define _INTEL_GUC_CAPTURE_FWIF_H

#include <linux/types.h>
#include "intel_guc_fwif.h"

struct intel_guc;
struct file;

/*
 * struct __guc_capture_bufstate
 *
 * Book-keeping structure used to track read and write pointers
 * as we extract error capture data from the GuC-log-buffer's
 * error-capture region as a stream of dwords.
 */
struct __guc_capture_bufstate {
	u32 size;
	void *data;
	u32 rd;
	u32 wr;
};

/*
 * struct __guc_capture_parsed_output - extracted error capture node
 *
 * A single unit of extracted error-capture output data grouped together
 * at an engine-instance level. We keep these nodes in a linked list.
 * See cachelist and outlist below.
 */
struct __guc_capture_parsed_output {
	/*
	 * A single set of 3 capture lists: a global-list
	 * an engine-class-list and an engine-instance list.
	 * outlist in __guc_capture_parsed_output will keep
	 * a linked list of these nodes that will eventually
	 * be detached from outlist and attached into to
	 * i915_gpu_codedump in response to a context reset
	 */
	struct list_head link;
	bool is_partial;
	u32 eng_class;
	u32 eng_inst;
	u32 guc_id;
	u32 lrca;
	struct gcap_reg_list_info {
		u32 vfid;
		u32 num_regs;
		struct guc_mmio_reg *regs;
	} reginfo[GUC_CAPTURE_LIST_TYPE_MAX];
#define GCAP_PARSED_REGLIST_INDEX_GLOBAL   BIT(GUC_CAPTURE_LIST_TYPE_GLOBAL)
#define GCAP_PARSED_REGLIST_INDEX_ENGCLASS BIT(GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS)
#define GCAP_PARSED_REGLIST_INDEX_ENGINST  BIT(GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE)
};

/*
 * struct guc_debug_capture_list_header / struct guc_debug_capture_list
 *
 * As part of ADS registration, these header structures (followed by
 * an array of 'struct guc_mmio_reg' entries) are used to register with
 * GuC microkernel the list of registers we want it to dump out prior
 * to a engine reset.
 */
struct guc_debug_capture_list_header {
	u32 info;
#define GUC_CAPTURELISTHDR_NUMDESCR GENMASK(15, 0)
} __packed;

struct guc_debug_capture_list {
	struct guc_debug_capture_list_header header;
	struct guc_mmio_reg regs[];
} __packed;

/*
 * struct __guc_mmio_reg_descr / struct __guc_mmio_reg_descr_group
 *
 * intel_guc_capture module uses these structures to maintain static
 * tables (per unique platform) that consists of lists of registers
 * (offsets, names, flags,...) that are used at the ADS regisration
 * time as well as during runtime processing and reporting of error-
 * capture states generated by GuC just prior to engine reset events.
 */
struct __guc_mmio_reg_descr {
	i915_reg_t reg;
	u32 flags;
	u32 mask;
	const char *regname;
};

struct __guc_mmio_reg_descr_group {
	const struct __guc_mmio_reg_descr *list;
	u32 num_regs;
	u32 owner; /* see enum guc_capture_owner */
	u32 type; /* see enum guc_capture_type */
	u32 engine; /* as per MAX_ENGINE_CLASS */
	struct __guc_mmio_reg_descr *extlist; /* only used for steered registers */
};

/*
 * struct guc_state_capture_header_t / struct guc_state_capture_t /
 * guc_state_capture_group_header_t / guc_state_capture_group_t
 *
 * Prior to resetting engines that have hung or faulted, GuC microkernel
 * reports the engine error-state (register values that was read) by
 * logging them into the shared GuC log buffer using these hierarchy
 * of structures.
 */
struct guc_state_capture_header_t {
	u32 owner;
#define CAP_HDR_CAPTURE_VFID GENMASK(7, 0)
	u32 info;
#define CAP_HDR_CAPTURE_TYPE GENMASK(3, 0) /* see enum guc_capture_type */
#define CAP_HDR_ENGINE_CLASS GENMASK(7, 4) /* see GUC_MAX_ENGINE_CLASSES */
#define CAP_HDR_ENGINE_INSTANCE GENMASK(11, 8)
	u32 lrca; /* if type-instance, LRCA (address) that hung, else set to ~0 */
	u32 guc_id; /* if type-instance, context index of hung context, else set to ~0 */
	u32 num_mmios;
#define CAP_HDR_NUM_MMIOS GENMASK(9, 0)
} __packed;

struct guc_state_capture_t {
	struct guc_state_capture_header_t header;
	struct guc_mmio_reg mmio_entries[];
} __packed;

enum guc_capture_group_types {
	GUC_STATE_CAPTURE_GROUP_TYPE_FULL,
	GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL,
	GUC_STATE_CAPTURE_GROUP_TYPE_MAX,
};

struct guc_state_capture_group_header_t {
	u32 owner;
#define CAP_GRP_HDR_CAPTURE_VFID GENMASK(7, 0)
	u32 info;
#define CAP_GRP_HDR_NUM_CAPTURES GENMASK(7, 0)
#define CAP_GRP_HDR_CAPTURE_TYPE GENMASK(15, 8) /* guc_capture_group_types */
} __packed;

/* this is the top level structure where an error-capture dump starts */
struct guc_state_capture_group_t {
	struct guc_state_capture_group_header_t grp_header;
	struct guc_state_capture_t capture_entries[];
} __packed;

/*
 * struct __guc_capture_ads_cache
 *
 * A structure to cache register lists that were populated and registered
 * with GuC at startup during ADS registration. This allows much quicker
 * GuC resets without re-parsing all the tables for the given gt.
 */
struct __guc_capture_ads_cache {
	bool is_valid;
	void *ptr;
	size_t size;
	int status;
};

/**
 * struct intel_guc_state_capture
 *
 * Internal context of the intel_guc_capture module.
 */
struct intel_guc_state_capture {
	/**
	 * @reglists: static table of register lists used for error-capture state.
	 */
	const struct __guc_mmio_reg_descr_group *reglists;

	/**
	 * @extlists: allocated table of steered register lists used for error-capture state.
	 *
	 * NOTE: steered registers have multiple instances depending on the HW configuration
	 * (slices or dual-sub-slices) and thus depends on HW fuses discovered at startup
	 */
	struct __guc_mmio_reg_descr_group *extlists;

	/**
	 * @ads_cache: cached register lists that is ADS format ready
	 */
	struct __guc_capture_ads_cache ads_cache[GUC_CAPTURE_LIST_INDEX_MAX]
						[GUC_CAPTURE_LIST_TYPE_MAX]
						[GUC_MAX_ENGINE_CLASSES];

	/**
	 * @ads_null_cache: ADS null cache.
	 */
	void *ads_null_cache;

	/**
	 * @cachelist: Pool of pre-allocated nodes for error capture output
	 *
	 * We need this pool of pre-allocated nodes because we cannot
	 * dynamically allocate new nodes when receiving the G2H notification
	 * because the event handlers for all G2H event-processing is called
	 * by the ct processing worker queue and when that queue is being
	 * processed, there is no absoluate guarantee that we are not in the
	 * midst of a GT reset operation (which doesn't allow allocations).
	 */
	struct list_head cachelist;
#define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS)
#define PREALLOC_NODES_DEFAULT_NUMREGS 64

	/**
	 * @max_mmio_per_node: Max MMIO per node.
	 */
	int max_mmio_per_node;

	/**
	 * @outlist: Pool of pre-allocated nodes for error capture output
	 *
	 * A linked list of parsed GuC error-capture output data before
	 * reporting with formatting via i915_gpu_coredump. Each node in this linked list shall
	 * contain a single engine-capture including global, engine-class and
	 * engine-instance register dumps as per guc_capture_parsed_output_node
	 */
	struct list_head outlist;
};

#endif /* _INTEL_GUC_CAPTURE_FWIF_H */