linux/drivers/cxl/core/trace.h

// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM

#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _CXL_EVENTS_H

#include <linux/tracepoint.h>
#include <linux/pci.h>
#include <linux/unaligned.h>

#include <cxl.h>
#include <cxlmem.h>
#include "core.h"

#define CXL_RAS_UC_CACHE_DATA_PARITY
#define CXL_RAS_UC_CACHE_ADDR_PARITY
#define CXL_RAS_UC_CACHE_BE_PARITY
#define CXL_RAS_UC_CACHE_DATA_ECC
#define CXL_RAS_UC_MEM_DATA_PARITY
#define CXL_RAS_UC_MEM_ADDR_PARITY
#define CXL_RAS_UC_MEM_BE_PARITY
#define CXL_RAS_UC_MEM_DATA_ECC
#define CXL_RAS_UC_REINIT_THRESH
#define CXL_RAS_UC_RSVD_ENCODE
#define CXL_RAS_UC_POISON
#define CXL_RAS_UC_RECV_OVERFLOW
#define CXL_RAS_UC_INTERNAL_ERR
#define CXL_RAS_UC_IDE_TX_ERR
#define CXL_RAS_UC_IDE_RX_ERR

#define show_uc_errs(status)

TRACE_EVENT(cxl_aer_uncorrectable_error,
	TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl),
	TP_ARGS(cxlmd, status, fe, hl),
	TP_STRUCT__entry(
		__string(memdev, dev_name(&cxlmd->dev))
		__string(host, dev_name(cxlmd->dev.parent))
		__field(u64, serial)
		__field(u32, status)
		__field(u32, first_error)
		__array(u32, header_log, CXL_HEADERLOG_SIZE_U32)
	),
	TP_fast_assign(
		__assign_str(memdev);
		__assign_str(host);
		__entry->serial = cxlmd->cxlds->serial;
		__entry->status = status;
		__entry->first_error = fe;
		/*
		 * Embed the 512B headerlog data for user app retrieval and
		 * parsing, but no need to print this in the trace buffer.
		 */
		memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE);
	),
	TP_printk("memdev=%s host=%s serial=%lld: status: '%s' first_error: '%s'",
		  __get_str(memdev), __get_str(host), __entry->serial,
		  show_uc_errs(__entry->status),
		  show_uc_errs(__entry->first_error)
	)
);

#define CXL_RAS_CE_CACHE_DATA_ECC
#define CXL_RAS_CE_MEM_DATA_ECC
#define CXL_RAS_CE_CRC_THRESH
#define CLX_RAS_CE_RETRY_THRESH
#define CXL_RAS_CE_CACHE_POISON
#define CXL_RAS_CE_MEM_POISON
#define CXL_RAS_CE_PHYS_LAYER_ERR

#define show_ce_errs(status)

TRACE_EVENT(cxl_aer_correctable_error,
	TP_PROTO(const struct cxl_memdev *cxlmd, u32 status),
	TP_ARGS(cxlmd, status),
	TP_STRUCT__entry(
		__string(memdev, dev_name(&cxlmd->dev))
		__string(host, dev_name(cxlmd->dev.parent))
		__field(u64, serial)
		__field(u32, status)
	),
	TP_fast_assign(
		__assign_str(memdev);
		__assign_str(host);
		__entry->serial = cxlmd->cxlds->serial;
		__entry->status = status;
	),
	TP_printk("memdev=%s host=%s serial=%lld: status: '%s'",
		  __get_str(memdev), __get_str(host), __entry->serial,
		  show_ce_errs(__entry->status)
	)
);

#define cxl_event_log_type_str(type)

TRACE_EVENT(cxl_overflow,

	TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
		 struct cxl_get_event_payload *payload),

	TP_ARGS(cxlmd, log, payload),

	TP_STRUCT__entry(
		__string(memdev, dev_name(&cxlmd->dev))
		__string(host, dev_name(cxlmd->dev.parent))
		__field(int, log)
		__field(u64, serial)
		__field(u64, first_ts)
		__field(u64, last_ts)
		__field(u16, count)
	),

	TP_fast_assign(
		__assign_str(memdev);
		__assign_str(host);
		__entry->serial = cxlmd->cxlds->serial;
		__entry->log = log;
		__entry->count = le16_to_cpu(payload->overflow_err_count);
		__entry->first_ts = le64_to_cpu(payload->first_overflow_timestamp);
		__entry->last_ts = le64_to_cpu(payload->last_overflow_timestamp);
	),

	TP_printk("memdev=%s host=%s serial=%lld: log=%s : %u records from %llu to %llu",
		__get_str(memdev), __get_str(host), __entry->serial,
		cxl_event_log_type_str(__entry->log), __entry->count,
		__entry->first_ts, __entry->last_ts)

);

/*
 * Common Event Record Format
 * CXL 3.0 section 8.2.9.2.1; Table 8-42
 */
#define CXL_EVENT_RECORD_FLAG_PERMANENT
#define CXL_EVENT_RECORD_FLAG_MAINT_NEEDED
#define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED
#define CXL_EVENT_RECORD_FLAG_HW_REPLACE
#define show_hdr_flags(flags)

/*
 * Define macros for the common header of each CXL event.
 *
 * Tracepoints using these macros must do 3 things:
 *
 *	1) Add CXL_EVT_TP_entry to TP_STRUCT__entry
 *	2) Use CXL_EVT_TP_fast_assign within TP_fast_assign;
 *	   pass the dev, log, and CXL event header
 *	   NOTE: The uuid must be assigned by the specific trace event
 *	3) Use CXL_EVT_TP_printk() instead of TP_printk()
 *
 * See the generic_event tracepoint as an example.
 */
#define CXL_EVT_TP_entry

#define CXL_EVT_TP_fast_assign(cxlmd, l, hdr)

#define CXL_EVT_TP_printk(fmt, ...)

TRACE_EVENT(cxl_generic_event,

	TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
		 const uuid_t *uuid, struct cxl_event_generic *gen_rec),

	TP_ARGS(cxlmd, log, uuid, gen_rec),

	TP_STRUCT__entry(
		CXL_EVT_TP_entry
		__array(u8, data, CXL_EVENT_RECORD_DATA_LENGTH)
	),

	TP_fast_assign(
		CXL_EVT_TP_fast_assign(cxlmd, log, gen_rec->hdr);
		memcpy(&__entry->hdr_uuid, uuid, sizeof(uuid_t));
		memcpy(__entry->data, gen_rec->data, CXL_EVENT_RECORD_DATA_LENGTH);
	),

	CXL_EVT_TP_printk("%s",
		__print_hex(__entry->data, CXL_EVENT_RECORD_DATA_LENGTH))
);

/*
 * Physical Address field masks
 *
 * General Media Event Record
 * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
 *
 * DRAM Event Record
 * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
 */
#define CXL_DPA_FLAGS_MASK
#define CXL_DPA_MASK

#define CXL_DPA_VOLATILE
#define CXL_DPA_NOT_REPAIRABLE
#define show_dpa_flags(flags)

/*
 * General Media Event Record - GMER
 * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
 */
#define CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT
#define CXL_GMER_EVT_DESC_THRESHOLD_EVENT
#define CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW
#define show_event_desc_flags(flags)

#define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR
#define CXL_GMER_MEM_EVT_TYPE_INV_ADDR
#define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR
#define show_gmer_mem_event_type(type)

#define CXL_GMER_TRANS_UNKNOWN
#define CXL_GMER_TRANS_HOST_READ
#define CXL_GMER_TRANS_HOST_WRITE
#define CXL_GMER_TRANS_HOST_SCAN_MEDIA
#define CXL_GMER_TRANS_HOST_INJECT_POISON
#define CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB
#define CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT
#define show_trans_type(type)

#define CXL_GMER_VALID_CHANNEL
#define CXL_GMER_VALID_RANK
#define CXL_GMER_VALID_DEVICE
#define CXL_GMER_VALID_COMPONENT
#define show_valid_flags(flags)

TRACE_EVENT(cxl_general_media,

	TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
		 struct cxl_region *cxlr, u64 hpa, struct cxl_event_gen_media *rec),

	TP_ARGS(cxlmd, log, cxlr, hpa, rec),

	TP_STRUCT__entry(
		CXL_EVT_TP_entry
		/* General Media */
		__field(u64, dpa)
		__field(u8, descriptor)
		__field(u8, type)
		__field(u8, transaction_type)
		__field(u8, channel)
		__field(u32, device)
		__array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
		/* Following are out of order to pack trace record */
		__field(u64, hpa)
		__field_struct(uuid_t, region_uuid)
		__field(u16, validity_flags)
		__field(u8, rank)
		__field(u8, dpa_flags)
		__string(region_name, cxlr ? dev_name(&cxlr->dev) : "")
	),

	TP_fast_assign(
		CXL_EVT_TP_fast_assign(cxlmd, log, rec->media_hdr.hdr);
		__entry->hdr_uuid = CXL_EVENT_GEN_MEDIA_UUID;

		/* General Media */
		__entry->dpa = le64_to_cpu(rec->media_hdr.phys_addr);
		__entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK;
		/* Mask after flags have been parsed */
		__entry->dpa &= CXL_DPA_MASK;
		__entry->descriptor = rec->media_hdr.descriptor;
		__entry->type = rec->media_hdr.type;
		__entry->transaction_type = rec->media_hdr.transaction_type;
		__entry->channel = rec->media_hdr.channel;
		__entry->rank = rec->media_hdr.rank;
		__entry->device = get_unaligned_le24(rec->device);
		memcpy(__entry->comp_id, &rec->component_id,
			CXL_EVENT_GEN_MED_COMP_ID_SIZE);
		__entry->validity_flags = get_unaligned_le16(&rec->media_hdr.validity_flags);
		__entry->hpa = hpa;
		if (cxlr) {
			__assign_str(region_name);
			uuid_copy(&__entry->region_uuid, &cxlr->params.uuid);
		} else {
			__assign_str(region_name);
			uuid_copy(&__entry->region_uuid, &uuid_null);
		}
	),

	CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \
		"descriptor='%s' type='%s' transaction_type='%s' channel=%u rank=%u " \
		"device=%x comp_id=%s validity_flags='%s' " \
		"hpa=%llx region=%s region_uuid=%pUb",
		__entry->dpa, show_dpa_flags(__entry->dpa_flags),
		show_event_desc_flags(__entry->descriptor),
		show_gmer_mem_event_type(__entry->type),
		show_trans_type(__entry->transaction_type),
		__entry->channel, __entry->rank, __entry->device,
		__print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
		show_valid_flags(__entry->validity_flags),
		__entry->hpa, __get_str(region_name), &__entry->region_uuid
	)
);

/*
 * DRAM Event Record - DER
 *
 * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
 */
/*
 * DRAM Event Record defines many fields the same as the General Media Event
 * Record.  Reuse those definitions as appropriate.
 */
#define CXL_DER_MEM_EVT_TYPE_ECC_ERROR
#define CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR
#define CXL_DER_MEM_EVT_TYPE_INV_ADDR
#define CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR
#define show_dram_mem_event_type(type)

#define CXL_DER_VALID_CHANNEL
#define CXL_DER_VALID_RANK
#define CXL_DER_VALID_NIBBLE
#define CXL_DER_VALID_BANK_GROUP
#define CXL_DER_VALID_BANK
#define CXL_DER_VALID_ROW
#define CXL_DER_VALID_COLUMN
#define CXL_DER_VALID_CORRECTION_MASK
#define show_dram_valid_flags(flags)

TRACE_EVENT(cxl_dram,

	TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
		 struct cxl_region *cxlr, u64 hpa, struct cxl_event_dram *rec),

	TP_ARGS(cxlmd, log, cxlr, hpa, rec),

	TP_STRUCT__entry(
		CXL_EVT_TP_entry
		/* DRAM */
		__field(u64, dpa)
		__field(u8, descriptor)
		__field(u8, type)
		__field(u8, transaction_type)
		__field(u8, channel)
		__field(u16, validity_flags)
		__field(u16, column)	/* Out of order to pack trace record */
		__field(u32, nibble_mask)
		__field(u32, row)
		__array(u8, cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE)
		__field(u64, hpa)
		__field_struct(uuid_t, region_uuid)
		__field(u8, rank)	/* Out of order to pack trace record */
		__field(u8, bank_group)	/* Out of order to pack trace record */
		__field(u8, bank)	/* Out of order to pack trace record */
		__field(u8, dpa_flags)	/* Out of order to pack trace record */
		__string(region_name, cxlr ? dev_name(&cxlr->dev) : "")
	),

	TP_fast_assign(
		CXL_EVT_TP_fast_assign(cxlmd, log, rec->media_hdr.hdr);
		__entry->hdr_uuid = CXL_EVENT_DRAM_UUID;

		/* DRAM */
		__entry->dpa = le64_to_cpu(rec->media_hdr.phys_addr);
		__entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK;
		__entry->dpa &= CXL_DPA_MASK;
		__entry->descriptor = rec->media_hdr.descriptor;
		__entry->type = rec->media_hdr.type;
		__entry->transaction_type = rec->media_hdr.transaction_type;
		__entry->validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags);
		__entry->channel = rec->media_hdr.channel;
		__entry->rank = rec->media_hdr.rank;
		__entry->nibble_mask = get_unaligned_le24(rec->nibble_mask);
		__entry->bank_group = rec->bank_group;
		__entry->bank = rec->bank;
		__entry->row = get_unaligned_le24(rec->row);
		__entry->column = get_unaligned_le16(rec->column);
		memcpy(__entry->cor_mask, &rec->correction_mask,
			CXL_EVENT_DER_CORRECTION_MASK_SIZE);
		__entry->hpa = hpa;
		if (cxlr) {
			__assign_str(region_name);
			uuid_copy(&__entry->region_uuid, &cxlr->params.uuid);
		} else {
			__assign_str(region_name);
			uuid_copy(&__entry->region_uuid, &uuid_null);
		}
	),

	CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' " \
		"transaction_type='%s' channel=%u rank=%u nibble_mask=%x " \
		"bank_group=%u bank=%u row=%u column=%u cor_mask=%s " \
		"validity_flags='%s' " \
		"hpa=%llx region=%s region_uuid=%pUb",
		__entry->dpa, show_dpa_flags(__entry->dpa_flags),
		show_event_desc_flags(__entry->descriptor),
		show_dram_mem_event_type(__entry->type),
		show_trans_type(__entry->transaction_type),
		__entry->channel, __entry->rank, __entry->nibble_mask,
		__entry->bank_group, __entry->bank,
		__entry->row, __entry->column,
		__print_hex(__entry->cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE),
		show_dram_valid_flags(__entry->validity_flags),
		__entry->hpa, __get_str(region_name), &__entry->region_uuid
	)
);

/*
 * Memory Module Event Record - MMER
 *
 * CXL res 3.0 section 8.2.9.2.1.3; Table 8-45
 */
#define CXL_MMER_HEALTH_STATUS_CHANGE
#define CXL_MMER_MEDIA_STATUS_CHANGE
#define CXL_MMER_LIFE_USED_CHANGE
#define CXL_MMER_TEMP_CHANGE
#define CXL_MMER_DATA_PATH_ERROR
#define CXL_MMER_LSA_ERROR
#define show_dev_evt_type(type)

/*
 * Device Health Information - DHI
 *
 * CXL res 3.0 section 8.2.9.8.3.1; Table 8-100
 */
#define CXL_DHI_HS_MAINTENANCE_NEEDED
#define CXL_DHI_HS_PERFORMANCE_DEGRADED
#define CXL_DHI_HS_HW_REPLACEMENT_NEEDED
#define show_health_status_flags(flags)

#define CXL_DHI_MS_NORMAL
#define CXL_DHI_MS_NOT_READY
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOST
#define CXL_DHI_MS_ALL_DATA_LOST
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT
#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS
#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN
#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT
#define show_media_status(ms)

#define CXL_DHI_AS_NORMAL
#define CXL_DHI_AS_WARNING
#define CXL_DHI_AS_CRITICAL
#define show_two_bit_status(as)
#define show_one_bit_status(as)

#define CXL_DHI_AS_LIFE_USED(as)
#define CXL_DHI_AS_DEV_TEMP(as)
#define CXL_DHI_AS_COR_VOL_ERR_CNT(as)
#define CXL_DHI_AS_COR_PER_ERR_CNT(as)

TRACE_EVENT(cxl_memory_module,

	TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
		 struct cxl_event_mem_module *rec),

	TP_ARGS(cxlmd, log, rec),

	TP_STRUCT__entry(
		CXL_EVT_TP_entry

		/* Memory Module Event */
		__field(u8, event_type)

		/* Device Health Info */
		__field(u8, health_status)
		__field(u8, media_status)
		__field(u8, life_used)
		__field(u32, dirty_shutdown_cnt)
		__field(u32, cor_vol_err_cnt)
		__field(u32, cor_per_err_cnt)
		__field(s16, device_temp)
		__field(u8, add_status)
	),

	TP_fast_assign(
		CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr);
		__entry->hdr_uuid = CXL_EVENT_MEM_MODULE_UUID;

		/* Memory Module Event */
		__entry->event_type = rec->event_type;

		/* Device Health Info */
		__entry->health_status = rec->info.health_status;
		__entry->media_status = rec->info.media_status;
		__entry->life_used = rec->info.life_used;
		__entry->dirty_shutdown_cnt = get_unaligned_le32(rec->info.dirty_shutdown_cnt);
		__entry->cor_vol_err_cnt = get_unaligned_le32(rec->info.cor_vol_err_cnt);
		__entry->cor_per_err_cnt = get_unaligned_le32(rec->info.cor_per_err_cnt);
		__entry->device_temp = get_unaligned_le16(rec->info.device_temp);
		__entry->add_status = rec->info.add_status;
	),

	CXL_EVT_TP_printk("event_type='%s' health_status='%s' media_status='%s' " \
		"as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \
		"as_cor_per_err_cnt=%s life_used=%u device_temp=%d " \
		"dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u",
		show_dev_evt_type(__entry->event_type),
		show_health_status_flags(__entry->health_status),
		show_media_status(__entry->media_status),
		show_two_bit_status(CXL_DHI_AS_LIFE_USED(__entry->add_status)),
		show_two_bit_status(CXL_DHI_AS_DEV_TEMP(__entry->add_status)),
		show_one_bit_status(CXL_DHI_AS_COR_VOL_ERR_CNT(__entry->add_status)),
		show_one_bit_status(CXL_DHI_AS_COR_PER_ERR_CNT(__entry->add_status)),
		__entry->life_used, __entry->device_temp,
		__entry->dirty_shutdown_cnt, __entry->cor_vol_err_cnt,
		__entry->cor_per_err_cnt
	)
);

#define show_poison_trace_type(type)

#define __show_poison_source(source)

#define show_poison_source(source)

#define show_poison_flags(flags)

#define __cxl_poison_addr(record)
#define cxl_poison_record_dpa(record)
#define cxl_poison_record_source(record)
#define cxl_poison_record_dpa_length(record)
#define cxl_poison_overflow(flags, time)

TRACE_EVENT(cxl_poison,

	TP_PROTO(struct cxl_memdev *cxlmd, struct cxl_region *cxlr,
		 const struct cxl_poison_record *record, u8 flags,
		 __le64 overflow_ts, enum cxl_poison_trace_type trace_type),

	TP_ARGS(cxlmd, cxlr, record, flags, overflow_ts, trace_type),

	TP_STRUCT__entry(
		__string(memdev, dev_name(&cxlmd->dev))
		__string(host, dev_name(cxlmd->dev.parent))
		__field(u64, serial)
		__field(u8, trace_type)
		__string(region, cxlr ? dev_name(&cxlr->dev) : "")
		__field(u64, overflow_ts)
		__field(u64, hpa)
		__field(u64, dpa)
		__field(u32, dpa_length)
		__array(char, uuid, 16)
		__field(u8, source)
		__field(u8, flags)
	    ),

	TP_fast_assign(
		__assign_str(memdev);
		__assign_str(host);
		__entry->serial = cxlmd->cxlds->serial;
		__entry->overflow_ts = cxl_poison_overflow(flags, overflow_ts);
		__entry->dpa = cxl_poison_record_dpa(record);
		__entry->dpa_length = cxl_poison_record_dpa_length(record);
		__entry->source = cxl_poison_record_source(record);
		__entry->trace_type = trace_type;
		__entry->flags = flags;
		if (cxlr) {
			__assign_str(region);
			memcpy(__entry->uuid, &cxlr->params.uuid, 16);
			__entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd,
						      __entry->dpa);
		} else {
			__assign_str(region);
			memset(__entry->uuid, 0, 16);
			__entry->hpa = ULLONG_MAX;
		}
	    ),

	TP_printk("memdev=%s host=%s serial=%lld trace_type=%s region=%s "  \
		"region_uuid=%pU hpa=0x%llx dpa=0x%llx dpa_length=0x%x "    \
		"source=%s flags=%s overflow_time=%llu",
		__get_str(memdev),
		__get_str(host),
		__entry->serial,
		show_poison_trace_type(__entry->trace_type),
		__get_str(region),
		__entry->uuid,
		__entry->hpa,
		__entry->dpa,
		__entry->dpa_length,
		show_poison_source(__entry->source),
		show_poison_flags(__entry->flags),
		__entry->overflow_ts
	)
);

#endif /* _CXL_EVENTS_H */

#define TRACE_INCLUDE_FILE
#include <trace/define_trace.h>