/* * Generic EDAC defs * * Author: Dave Jiang <[email protected]> * * 2006-2008 (c) MontaVista Software, Inc. This file is licensed under * the terms of the GNU General Public License version 2. This program * is licensed "as is" without any warranty of any kind, whether express * or implied. * */ #ifndef _LINUX_EDAC_H_ #define _LINUX_EDAC_H_ #include <linux/atomic.h> #include <linux/device.h> #include <linux/completion.h> #include <linux/workqueue.h> #include <linux/debugfs.h> #include <linux/numa.h> #define EDAC_DEVICE_NAME_LEN … struct device; #define EDAC_OPSTATE_INVAL … #define EDAC_OPSTATE_POLL … #define EDAC_OPSTATE_NMI … #define EDAC_OPSTATE_INT … extern int edac_op_state; const struct bus_type *edac_get_sysfs_subsys(void); static inline void opstate_init(void) { … } /* Max length of a DIMM label*/ #define EDAC_MC_LABEL_LEN … /* Maximum size of the location string */ #define LOCATION_SIZE … /* Defines the maximum number of labels that can be reported */ #define EDAC_MAX_LABELS … /* String used to join two or more labels */ #define OTHER_LABEL … /** * enum dev_type - describe the type of memory DRAM chips used at the stick * @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it * @DEV_X1: 1 bit for data * @DEV_X2: 2 bits for data * @DEV_X4: 4 bits for data * @DEV_X8: 8 bits for data * @DEV_X16: 16 bits for data * @DEV_X32: 32 bits for data * @DEV_X64: 64 bits for data * * Typical values are x4 and x8. */ enum dev_type { … }; #define DEV_FLAG_UNKNOWN … #define DEV_FLAG_X1 … #define DEV_FLAG_X2 … #define DEV_FLAG_X4 … #define DEV_FLAG_X8 … #define DEV_FLAG_X16 … #define DEV_FLAG_X32 … #define DEV_FLAG_X64 … /** * enum hw_event_mc_err_type - type of the detected error * * @HW_EVENT_ERR_CORRECTED: Corrected Error - Indicates that an ECC * corrected error was detected * @HW_EVENT_ERR_UNCORRECTED: Uncorrected Error - Indicates an error that * can't be corrected by ECC, but it is not * fatal (maybe it is on an unused memory area, * or the memory controller could recover from * it for example, by re-trying the operation). * @HW_EVENT_ERR_DEFERRED: Deferred Error - Indicates an uncorrectable * error whose handling is not urgent. This could * be due to hardware data poisoning where the * system can continue operation until the poisoned * data is consumed. Preemptive measures may also * be taken, e.g. offlining pages, etc. * @HW_EVENT_ERR_FATAL: Fatal Error - Uncorrected error that could not * be recovered. * @HW_EVENT_ERR_INFO: Informational - The CPER spec defines a forth * type of error: informational logs. */ enum hw_event_mc_err_type { … }; static inline char *mc_event_error_type(const unsigned int err_type) { … } /** * enum mem_type - memory types. For a more detailed reference, please see * http://en.wikipedia.org/wiki/DRAM * * @MEM_EMPTY: Empty csrow * @MEM_RESERVED: Reserved csrow type * @MEM_UNKNOWN: Unknown csrow type * @MEM_FPM: FPM - Fast Page Mode, used on systems up to 1995. * @MEM_EDO: EDO - Extended data out, used on systems up to 1998. * @MEM_BEDO: BEDO - Burst Extended data out, an EDO variant. * @MEM_SDR: SDR - Single data rate SDRAM * http://en.wikipedia.org/wiki/Synchronous_dynamic_random-access_memory * They use 3 pins for chip select: Pins 0 and 2 are * for rank 0; pins 1 and 3 are for rank 1, if the memory * is dual-rank. * @MEM_RDR: Registered SDR SDRAM * @MEM_DDR: Double data rate SDRAM * http://en.wikipedia.org/wiki/DDR_SDRAM * @MEM_RDDR: Registered Double data rate SDRAM * This is a variant of the DDR memories. * A registered memory has a buffer inside it, hiding * part of the memory details to the memory controller. * @MEM_RMBS: Rambus DRAM, used on a few Pentium III/IV controllers. * @MEM_DDR2: DDR2 RAM, as described at JEDEC JESD79-2F. * Those memories are labeled as "PC2-" instead of "PC" to * differentiate from DDR. * @MEM_FB_DDR2: Fully-Buffered DDR2, as described at JEDEC Std No. 205 * and JESD206. * Those memories are accessed per DIMM slot, and not by * a chip select signal. * @MEM_RDDR2: Registered DDR2 RAM * This is a variant of the DDR2 memories. * @MEM_XDR: Rambus XDR * It is an evolution of the original RAMBUS memories, * created to compete with DDR2. Weren't used on any * x86 arch, but cell_edac PPC memory controller uses it. * @MEM_DDR3: DDR3 RAM * @MEM_RDDR3: Registered DDR3 RAM * This is a variant of the DDR3 memories. * @MEM_LRDDR3: Load-Reduced DDR3 memory. * @MEM_LPDDR3: Low-Power DDR3 memory. * @MEM_DDR4: Unbuffered DDR4 RAM * @MEM_RDDR4: Registered DDR4 RAM * This is a variant of the DDR4 memories. * @MEM_LRDDR4: Load-Reduced DDR4 memory. * @MEM_LPDDR4: Low-Power DDR4 memory. * @MEM_DDR5: Unbuffered DDR5 RAM * @MEM_RDDR5: Registered DDR5 RAM * @MEM_LRDDR5: Load-Reduced DDR5 memory. * @MEM_NVDIMM: Non-volatile RAM * @MEM_WIO2: Wide I/O 2. * @MEM_HBM2: High bandwidth Memory Gen 2. * @MEM_HBM3: High bandwidth Memory Gen 3. */ enum mem_type { … }; #define MEM_FLAG_EMPTY … #define MEM_FLAG_RESERVED … #define MEM_FLAG_UNKNOWN … #define MEM_FLAG_FPM … #define MEM_FLAG_EDO … #define MEM_FLAG_BEDO … #define MEM_FLAG_SDR … #define MEM_FLAG_RDR … #define MEM_FLAG_DDR … #define MEM_FLAG_RDDR … #define MEM_FLAG_RMBS … #define MEM_FLAG_DDR2 … #define MEM_FLAG_FB_DDR2 … #define MEM_FLAG_RDDR2 … #define MEM_FLAG_XDR … #define MEM_FLAG_DDR3 … #define MEM_FLAG_RDDR3 … #define MEM_FLAG_LPDDR3 … #define MEM_FLAG_DDR4 … #define MEM_FLAG_RDDR4 … #define MEM_FLAG_LRDDR4 … #define MEM_FLAG_LPDDR4 … #define MEM_FLAG_DDR5 … #define MEM_FLAG_RDDR5 … #define MEM_FLAG_LRDDR5 … #define MEM_FLAG_NVDIMM … #define MEM_FLAG_WIO2 … #define MEM_FLAG_HBM2 … #define MEM_FLAG_HBM3 … /** * enum edac_type - Error Detection and Correction capabilities and mode * @EDAC_UNKNOWN: Unknown if ECC is available * @EDAC_NONE: Doesn't support ECC * @EDAC_RESERVED: Reserved ECC type * @EDAC_PARITY: Detects parity errors * @EDAC_EC: Error Checking - no correction * @EDAC_SECDED: Single bit error correction, Double detection * @EDAC_S2ECD2ED: Chipkill x2 devices - do these exist? * @EDAC_S4ECD4ED: Chipkill x4 devices * @EDAC_S8ECD8ED: Chipkill x8 devices * @EDAC_S16ECD16ED: Chipkill x16 devices */ enum edac_type { … }; #define EDAC_FLAG_UNKNOWN … #define EDAC_FLAG_NONE … #define EDAC_FLAG_PARITY … #define EDAC_FLAG_EC … #define EDAC_FLAG_SECDED … #define EDAC_FLAG_S2ECD2ED … #define EDAC_FLAG_S4ECD4ED … #define EDAC_FLAG_S8ECD8ED … #define EDAC_FLAG_S16ECD16ED … /** * enum scrub_type - scrubbing capabilities * @SCRUB_UNKNOWN: Unknown if scrubber is available * @SCRUB_NONE: No scrubber * @SCRUB_SW_PROG: SW progressive (sequential) scrubbing * @SCRUB_SW_SRC: Software scrub only errors * @SCRUB_SW_PROG_SRC: Progressive software scrub from an error * @SCRUB_SW_TUNABLE: Software scrub frequency is tunable * @SCRUB_HW_PROG: HW progressive (sequential) scrubbing * @SCRUB_HW_SRC: Hardware scrub only errors * @SCRUB_HW_PROG_SRC: Progressive hardware scrub from an error * @SCRUB_HW_TUNABLE: Hardware scrub frequency is tunable */ enum scrub_type { … }; #define SCRUB_FLAG_SW_PROG … #define SCRUB_FLAG_SW_SRC … #define SCRUB_FLAG_SW_PROG_SRC … #define SCRUB_FLAG_SW_TUN … #define SCRUB_FLAG_HW_PROG … #define SCRUB_FLAG_HW_SRC … #define SCRUB_FLAG_HW_PROG_SRC … #define SCRUB_FLAG_HW_TUN … /* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */ /* EDAC internal operation states */ #define OP_ALLOC … #define OP_RUNNING_POLL … #define OP_RUNNING_INTERRUPT … #define OP_RUNNING_POLL_INTR … #define OP_OFFLINE … /** * enum edac_mc_layer_type - memory controller hierarchy layer * * @EDAC_MC_LAYER_BRANCH: memory layer is named "branch" * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel" * @EDAC_MC_LAYER_SLOT: memory layer is named "slot" * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select" * @EDAC_MC_LAYER_ALL_MEM: memory layout is unknown. All memory is mapped * as a single memory area. This is used when * retrieving errors from a firmware driven driver. * * This enum is used by the drivers to tell edac_mc_sysfs what name should * be used when describing a memory stick location. */ enum edac_mc_layer_type { … }; /** * struct edac_mc_layer - describes the memory controller hierarchy * @type: layer type * @size: number of components per layer. For example, * if the channel layer has two channels, size = 2 * @is_virt_csrow: This layer is part of the "csrow" when old API * compatibility mode is enabled. Otherwise, it is * a channel */ struct edac_mc_layer { … }; /* * Maximum number of layers used by the memory controller to uniquely * identify a single memory stick. * NOTE: Changing this constant requires not only to change the constant * below, but also to change the existing code at the core, as there are * some code there that are optimized for 3 layers. */ #define EDAC_MAX_LAYERS … struct dimm_info { … }; /** * struct rank_info - contains the information for one DIMM rank * * @chan_idx: channel number where the rank is (typically, 0 or 1) * @ce_count: number of correctable errors for this rank * @csrow: A pointer to the chip select row structure (the parent * structure). The location of the rank is given by * the (csrow->csrow_idx, chan_idx) vector. * @dimm: A pointer to the DIMM structure, where the DIMM label * information is stored. * * FIXME: Currently, the EDAC core model will assume one DIMM per rank. * This is a bad assumption, but it makes this patch easier. Later * patches in this series will fix this issue. */ struct rank_info { … }; struct csrow_info { … }; /* * struct errcount_attribute - used to store the several error counts */ struct errcount_attribute_data { … }; /** * struct edac_raw_error_desc - Raw error report structure * @grain: minimum granularity for an error report, in bytes * @error_count: number of errors of the same type * @type: severity of the error (CE/UE/Fatal) * @top_layer: top layer of the error (layer[0]) * @mid_layer: middle layer of the error (layer[1]) * @low_layer: low layer of the error (layer[2]) * @page_frame_number: page where the error happened * @offset_in_page: page offset * @syndrome: syndrome of the error (or 0 if unknown or if * the syndrome is not applicable) * @msg: error message * @location: location of the error * @label: label of the affected DIMM(s) * @other_detail: other driver-specific detail about the error */ struct edac_raw_error_desc { … }; /* MEMORY controller information structure */ struct mem_ctl_info { … }; #define mci_for_each_dimm(mci, dimm) … /** * edac_get_dimm - Get DIMM info from a memory controller given by * [layer0,layer1,layer2] position * * @mci: MC descriptor struct mem_ctl_info * @layer0: layer0 position * @layer1: layer1 position. Unused if n_layers < 2 * @layer2: layer2 position. Unused if n_layers < 3 * * For 1 layer, this function returns "dimms[layer0]"; * * For 2 layers, this function is similar to allocating a two-dimensional * array and returning "dimms[layer0][layer1]"; * * For 3 layers, this function is similar to allocating a tri-dimensional * array and returning "dimms[layer0][layer1][layer2]"; */ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci, int layer0, int layer1, int layer2) { … } #endif /* _LINUX_EDAC_H_ */