// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Firmware-Assisted Dump support on POWERVM platform.
*
* Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
* Copyright 2019, Hari Bathini, IBM Corporation.
*/
#define pr_fmt(fmt) "rtas fadump: " fmt
#include <linux/string.h>
#include <linux/memblock.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/crash_dump.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <asm/page.h>
#include <asm/rtas.h>
#include <asm/setup.h>
#include <asm/fadump.h>
#include <asm/fadump-internal.h>
#include "rtas-fadump.h"
static struct rtas_fadump_mem_struct fdm;
static const struct rtas_fadump_mem_struct *fdm_active;
static void rtas_fadump_update_config(struct fw_dump *fadump_conf,
const struct rtas_fadump_mem_struct *fdm)
{
fadump_conf->fadumphdr_addr = (fadump_conf->boot_mem_dest_addr +
fadump_conf->boot_memory_size);
}
/*
* This function is called in the capture kernel to get configuration details
* setup in the first kernel and passed to the f/w.
*/
static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf,
const struct rtas_fadump_mem_struct *fdm)
{
unsigned long base, size, last_end, hole_size;
last_end = 0;
hole_size = 0;
fadump_conf->boot_memory_size = 0;
fadump_conf->boot_mem_regs_cnt = 0;
pr_debug("Boot memory regions:\n");
for (int i = 0; i < be16_to_cpu(fdm->header.dump_num_sections); i++) {
int type = be16_to_cpu(fdm->rgn[i].source_data_type);
u64 addr;
switch (type) {
case RTAS_FADUMP_CPU_STATE_DATA:
addr = be64_to_cpu(fdm->rgn[i].destination_address);
fadump_conf->cpu_state_dest_vaddr = (u64)__va(addr);
/*
* Start address of reserve dump area (permanent reservation) for
* re-registering FADump after dump capture.
*/
fadump_conf->reserve_dump_area_start = addr;
break;
case RTAS_FADUMP_HPTE_REGION:
/* Not processed currently. */
break;
case RTAS_FADUMP_REAL_MODE_REGION:
base = be64_to_cpu(fdm->rgn[i].source_address);
size = be64_to_cpu(fdm->rgn[i].source_len);
pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size);
if (!base) {
fadump_conf->boot_mem_dest_addr =
be64_to_cpu(fdm->rgn[i].destination_address);
}
fadump_conf->boot_mem_addr[fadump_conf->boot_mem_regs_cnt] = base;
fadump_conf->boot_mem_sz[fadump_conf->boot_mem_regs_cnt] = size;
fadump_conf->boot_memory_size += size;
hole_size += (base - last_end);
last_end = base + size;
fadump_conf->boot_mem_regs_cnt++;
break;
case RTAS_FADUMP_PARAM_AREA:
fadump_conf->param_area = be64_to_cpu(fdm->rgn[i].destination_address);
break;
default:
pr_warn("Section type %d unsupported on this kernel. Ignoring!\n", type);
break;
}
}
fadump_conf->boot_mem_top = fadump_conf->boot_memory_size + hole_size;
rtas_fadump_update_config(fadump_conf, fdm);
}
static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf)
{
u64 addr = fadump_conf->reserve_dump_area_start;
u16 sec_cnt = 0;
memset(&fdm, 0, sizeof(struct rtas_fadump_mem_struct));
addr = addr & PAGE_MASK;
fdm.header.dump_format_version = cpu_to_be32(0x00000001);
fdm.header.dump_status_flag = 0;
fdm.header.offset_first_dump_section =
cpu_to_be32((u32)offsetof(struct rtas_fadump_mem_struct, rgn));
/*
* Fields for disk dump option.
* We are not using disk dump option, hence set these fields to 0.
*/
fdm.header.dd_block_size = 0;
fdm.header.dd_block_offset = 0;
fdm.header.dd_num_blocks = 0;
fdm.header.dd_offset_disk_path = 0;
/* set 0 to disable an automatic dump-reboot. */
fdm.header.max_time_auto = 0;
/* Kernel dump sections */
/* cpu state data section. */
fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_CPU_STATE_DATA);
fdm.rgn[sec_cnt].source_address = 0;
fdm.rgn[sec_cnt].source_len = cpu_to_be64(fadump_conf->cpu_state_data_size);
fdm.rgn[sec_cnt].destination_address = cpu_to_be64(addr);
addr += fadump_conf->cpu_state_data_size;
sec_cnt++;
/* hpte region section */
fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_HPTE_REGION);
fdm.rgn[sec_cnt].source_address = 0;
fdm.rgn[sec_cnt].source_len = cpu_to_be64(fadump_conf->hpte_region_size);
fdm.rgn[sec_cnt].destination_address = cpu_to_be64(addr);
addr += fadump_conf->hpte_region_size;
sec_cnt++;
/*
* Align boot memory area destination address to page boundary to
* be able to mmap read this area in the vmcore.
*/
addr = PAGE_ALIGN(addr);
/* First boot memory region destination address */
fadump_conf->boot_mem_dest_addr = addr;
for (int i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) {
/* Boot memory regions */
fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_REAL_MODE_REGION);
fdm.rgn[sec_cnt].source_address = cpu_to_be64(fadump_conf->boot_mem_addr[i]);
fdm.rgn[sec_cnt].source_len = cpu_to_be64(fadump_conf->boot_mem_sz[i]);
fdm.rgn[sec_cnt].destination_address = cpu_to_be64(addr);
addr += fadump_conf->boot_mem_sz[i];
sec_cnt++;
}
/* Parameters area */
if (fadump_conf->param_area) {
fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_PARAM_AREA);
fdm.rgn[sec_cnt].source_address = cpu_to_be64(fadump_conf->param_area);
fdm.rgn[sec_cnt].source_len = cpu_to_be64(COMMAND_LINE_SIZE);
fdm.rgn[sec_cnt].destination_address = cpu_to_be64(fadump_conf->param_area);
sec_cnt++;
}
fdm.header.dump_num_sections = cpu_to_be16(sec_cnt);
rtas_fadump_update_config(fadump_conf, &fdm);
return addr;
}
static u64 rtas_fadump_get_bootmem_min(void)
{
return RTAS_FADUMP_MIN_BOOT_MEM;
}
static int rtas_fadump_register(struct fw_dump *fadump_conf)
{
unsigned int wait_time, fdm_size;
int rc, err = -EIO;
/*
* Platform requires the exact size of the Dump Memory Structure.
* Avoid including any unused rgns in the calculation, as this
* could result in a parameter error (-3) from the platform.
*/
fdm_size = sizeof(struct rtas_fadump_section_header);
fdm_size += be16_to_cpu(fdm.header.dump_num_sections) * sizeof(struct rtas_fadump_section);
/* TODO: Add upper time limit for the delay */
do {
rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
NULL, FADUMP_REGISTER, &fdm, fdm_size);
wait_time = rtas_busy_delay_time(rc);
if (wait_time)
mdelay(wait_time);
} while (wait_time);
switch (rc) {
case 0:
pr_info("Registration is successful!\n");
fadump_conf->dump_registered = 1;
err = 0;
break;
case -1:
pr_err("Failed to register. Hardware Error(%d).\n", rc);
break;
case -3:
if (!is_fadump_reserved_mem_contiguous())
pr_err("Can't have holes in reserved memory area.\n");
pr_err("Failed to register. Parameter Error(%d).\n", rc);
err = -EINVAL;
break;
case -9:
pr_err("Already registered!\n");
fadump_conf->dump_registered = 1;
err = -EEXIST;
break;
default:
pr_err("Failed to register. Unknown Error(%d).\n", rc);
break;
}
return err;
}
static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
{
unsigned int wait_time;
int rc;
/* TODO: Add upper time limit for the delay */
do {
rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
NULL, FADUMP_UNREGISTER, &fdm,
sizeof(struct rtas_fadump_mem_struct));
wait_time = rtas_busy_delay_time(rc);
if (wait_time)
mdelay(wait_time);
} while (wait_time);
if (rc) {
pr_err("Failed to un-register - unexpected error(%d).\n", rc);
return -EIO;
}
fadump_conf->dump_registered = 0;
return 0;
}
static int rtas_fadump_invalidate(struct fw_dump *fadump_conf)
{
unsigned int wait_time;
int rc;
/* TODO: Add upper time limit for the delay */
do {
rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
NULL, FADUMP_INVALIDATE, fdm_active,
sizeof(struct rtas_fadump_mem_struct));
wait_time = rtas_busy_delay_time(rc);
if (wait_time)
mdelay(wait_time);
} while (wait_time);
if (rc) {
pr_err("Failed to invalidate - unexpected error (%d).\n", rc);
return -EIO;
}
fadump_conf->dump_active = 0;
fdm_active = NULL;
return 0;
}
#define RTAS_FADUMP_GPR_MASK 0xffffff0000000000
static inline int rtas_fadump_gpr_index(u64 id)
{
char str[3];
int i = -1;
if ((id & RTAS_FADUMP_GPR_MASK) == fadump_str_to_u64("GPR")) {
/* get the digits at the end */
id &= ~RTAS_FADUMP_GPR_MASK;
id >>= 24;
str[2] = '\0';
str[1] = id & 0xff;
str[0] = (id >> 8) & 0xff;
if (kstrtoint(str, 10, &i))
i = -EINVAL;
if (i > 31)
i = -1;
}
return i;
}
static void __init rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
{
int i;
i = rtas_fadump_gpr_index(reg_id);
if (i >= 0)
regs->gpr[i] = (unsigned long)reg_val;
else if (reg_id == fadump_str_to_u64("NIA"))
regs->nip = (unsigned long)reg_val;
else if (reg_id == fadump_str_to_u64("MSR"))
regs->msr = (unsigned long)reg_val;
else if (reg_id == fadump_str_to_u64("CTR"))
regs->ctr = (unsigned long)reg_val;
else if (reg_id == fadump_str_to_u64("LR"))
regs->link = (unsigned long)reg_val;
else if (reg_id == fadump_str_to_u64("XER"))
regs->xer = (unsigned long)reg_val;
else if (reg_id == fadump_str_to_u64("CR"))
regs->ccr = (unsigned long)reg_val;
else if (reg_id == fadump_str_to_u64("DAR"))
regs->dar = (unsigned long)reg_val;
else if (reg_id == fadump_str_to_u64("DSISR"))
regs->dsisr = (unsigned long)reg_val;
}
static struct rtas_fadump_reg_entry* __init
rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry,
struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
while (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUEND")) {
rtas_fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
be64_to_cpu(reg_entry->reg_value));
reg_entry++;
}
reg_entry++;
return reg_entry;
}
/*
* Read CPU state dump data and convert it into ELF notes.
* The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
* used to access the data to allow for additional fields to be added without
* affecting compatibility. Each list of registers for a CPU starts with
* "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
* 8 Byte ASCII identifier and 8 Byte register value. The register entry
* with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
* of register value. For more details refer to PAPR document.
*
* Only for the crashing cpu we ignore the CPU dump data and get exact
* state from fadump crash info structure populated by first kernel at the
* time of crash.
*/
static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf)
{
struct rtas_fadump_reg_save_area_header *reg_header;
struct fadump_crash_info_header *fdh = NULL;
struct rtas_fadump_reg_entry *reg_entry;
u32 num_cpus, *note_buf;
int i, rc = 0, cpu = 0;
struct pt_regs regs;
void *vaddr;
vaddr = (void *)fadump_conf->cpu_state_dest_vaddr;
reg_header = vaddr;
if (be64_to_cpu(reg_header->magic_number) !=
fadump_str_to_u64("REGSAVE")) {
pr_err("Unable to read register save area.\n");
return -ENOENT;
}
pr_debug("--------CPU State Data------------\n");
pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
vaddr += be32_to_cpu(reg_header->num_cpu_offset);
num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
pr_debug("NumCpus : %u\n", num_cpus);
vaddr += sizeof(u32);
reg_entry = (struct rtas_fadump_reg_entry *)vaddr;
rc = fadump_setup_cpu_notes_buf(num_cpus);
if (rc != 0)
return rc;
note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr;
if (fadump_conf->fadumphdr_addr)
fdh = __va(fadump_conf->fadumphdr_addr);
for (i = 0; i < num_cpus; i++) {
if (be64_to_cpu(reg_entry->reg_id) !=
fadump_str_to_u64("CPUSTRT")) {
pr_err("Unable to read CPU state data\n");
rc = -ENOENT;
goto error_out;
}
/* Lower 4 bytes of reg_value contains logical cpu id */
cpu = (be64_to_cpu(reg_entry->reg_value) &
RTAS_FADUMP_CPU_ID_MASK);
if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_mask)) {
RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
continue;
}
pr_debug("Reading register data for cpu %d...\n", cpu);
if (fdh && fdh->crashing_cpu == cpu) {
regs = fdh->regs;
note_buf = fadump_regs_to_elf_notes(note_buf, ®s);
RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
} else {
reg_entry++;
reg_entry = rtas_fadump_read_regs(reg_entry, ®s);
note_buf = fadump_regs_to_elf_notes(note_buf, ®s);
}
}
final_note(note_buf);
pr_debug("Updating elfcore header (%llx) with cpu notes\n", fadump_conf->elfcorehdr_addr);
fadump_update_elfcore_header((char *)fadump_conf->elfcorehdr_addr);
return 0;
error_out:
fadump_free_cpu_notes_buf();
return rc;
}
/*
* Validate and process the dump data stored by the firmware, and update
* the CPU notes of elfcorehdr.
*/
static int __init rtas_fadump_process(struct fw_dump *fadump_conf)
{
if (!fdm_active || !fadump_conf->fadumphdr_addr)
return -EINVAL;
/* Check if the dump data is valid. */
for (int i = 0; i < be16_to_cpu(fdm_active->header.dump_num_sections); i++) {
int type = be16_to_cpu(fdm_active->rgn[i].source_data_type);
int rc = 0;
switch (type) {
case RTAS_FADUMP_CPU_STATE_DATA:
case RTAS_FADUMP_HPTE_REGION:
case RTAS_FADUMP_REAL_MODE_REGION:
if (fdm_active->rgn[i].error_flags != 0) {
pr_err("Dump taken by platform is not valid (%d)\n", i);
rc = -EINVAL;
}
if (fdm_active->rgn[i].bytes_dumped != fdm_active->rgn[i].source_len) {
pr_err("Dump taken by platform is incomplete (%d)\n", i);
rc = -EINVAL;
}
if (rc) {
pr_warn("Region type: %u src addr: 0x%llx dest addr: 0x%llx\n",
be16_to_cpu(fdm_active->rgn[i].source_data_type),
be64_to_cpu(fdm_active->rgn[i].source_address),
be64_to_cpu(fdm_active->rgn[i].destination_address));
return rc;
}
break;
case RTAS_FADUMP_PARAM_AREA:
if (fdm_active->rgn[i].bytes_dumped != fdm_active->rgn[i].source_len ||
fdm_active->rgn[i].error_flags != 0) {
pr_warn("Failed to process additional parameters! Proceeding anyway..\n");
fadump_conf->param_area = 0;
}
break;
default:
/*
* If the first/crashed kernel added a new region type that the
* second/fadump kernel doesn't recognize, skip it and process
* assuming backward compatibility.
*/
pr_warn("Unknown region found: type: %u src addr: 0x%llx dest addr: 0x%llx\n",
be16_to_cpu(fdm_active->rgn[i].source_data_type),
be64_to_cpu(fdm_active->rgn[i].source_address),
be64_to_cpu(fdm_active->rgn[i].destination_address));
break;
}
}
return rtas_fadump_build_cpu_notes(fadump_conf);
}
static void rtas_fadump_region_show(struct fw_dump *fadump_conf,
struct seq_file *m)
{
const struct rtas_fadump_mem_struct *fdm_ptr;
if (fdm_active)
fdm_ptr = fdm_active;
else
fdm_ptr = &fdm;
for (int i = 0; i < be16_to_cpu(fdm_ptr->header.dump_num_sections); i++) {
int type = be16_to_cpu(fdm_ptr->rgn[i].source_data_type);
switch (type) {
case RTAS_FADUMP_CPU_STATE_DATA:
seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
be64_to_cpu(fdm_ptr->rgn[i].destination_address),
be64_to_cpu(fdm_ptr->rgn[i].destination_address) +
be64_to_cpu(fdm_ptr->rgn[i].source_len) - 1,
be64_to_cpu(fdm_ptr->rgn[i].source_len),
be64_to_cpu(fdm_ptr->rgn[i].bytes_dumped));
break;
case RTAS_FADUMP_HPTE_REGION:
seq_printf(m, "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
be64_to_cpu(fdm_ptr->rgn[i].destination_address),
be64_to_cpu(fdm_ptr->rgn[i].destination_address) +
be64_to_cpu(fdm_ptr->rgn[i].source_len) - 1,
be64_to_cpu(fdm_ptr->rgn[i].source_len),
be64_to_cpu(fdm_ptr->rgn[i].bytes_dumped));
break;
case RTAS_FADUMP_REAL_MODE_REGION:
seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
be64_to_cpu(fdm_ptr->rgn[i].source_address),
be64_to_cpu(fdm_ptr->rgn[i].destination_address));
seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
be64_to_cpu(fdm_ptr->rgn[i].source_len),
be64_to_cpu(fdm_ptr->rgn[i].bytes_dumped));
break;
case RTAS_FADUMP_PARAM_AREA:
seq_printf(m, "\n[%#016llx-%#016llx]: cmdline append: '%s'\n",
be64_to_cpu(fdm_ptr->rgn[i].destination_address),
be64_to_cpu(fdm_ptr->rgn[i].destination_address) +
be64_to_cpu(fdm_ptr->rgn[i].source_len) - 1,
(char *)__va(be64_to_cpu(fdm_ptr->rgn[i].destination_address)));
break;
default:
seq_printf(m, "Unknown region type %d : Src: %#016llx, Dest: %#016llx, ",
type, be64_to_cpu(fdm_ptr->rgn[i].source_address),
be64_to_cpu(fdm_ptr->rgn[i].destination_address));
break;
}
}
/* Dump is active. Show preserved area start address. */
if (fdm_active) {
seq_printf(m, "\nMemory above %#016llx is reserved for saving crash dump\n",
fadump_conf->boot_mem_top);
}
}
static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh,
const char *msg)
{
/* Call ibm,os-term rtas call to trigger firmware assisted dump */
rtas_os_term((char *)msg);
}
/* FADUMP_MAX_MEM_REGS or lower */
static int rtas_fadump_max_boot_mem_rgns(void)
{
/*
* Version 1 of Kernel Assisted Dump Memory Structure (PAPR) supports 10 sections.
* With one each section taken for CPU state data & HPTE respectively, 8 sections
* can be used for boot memory regions.
*
* If new region(s) is(are) defined, maximum boot memory regions will decrease
* proportionally.
*/
return RTAS_FADUMP_MAX_BOOT_MEM_REGS;
}
static struct fadump_ops rtas_fadump_ops = {
.fadump_init_mem_struct = rtas_fadump_init_mem_struct,
.fadump_get_bootmem_min = rtas_fadump_get_bootmem_min,
.fadump_register = rtas_fadump_register,
.fadump_unregister = rtas_fadump_unregister,
.fadump_invalidate = rtas_fadump_invalidate,
.fadump_process = rtas_fadump_process,
.fadump_region_show = rtas_fadump_region_show,
.fadump_trigger = rtas_fadump_trigger,
.fadump_max_boot_mem_rgns = rtas_fadump_max_boot_mem_rgns,
};
void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
{
int i, size, num_sections;
const __be32 *sections;
const __be32 *token;
/*
* Check if Firmware Assisted dump is supported. if yes, check
* if dump has been initiated on last reboot.
*/
token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
if (!token)
return;
fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token);
fadump_conf->ops = &rtas_fadump_ops;
fadump_conf->fadump_supported = 1;
fadump_conf->param_area_supported = 1;
/* Firmware supports 64-bit value for size, align it to pagesize. */
fadump_conf->max_copy_size = ALIGN_DOWN(U64_MAX, PAGE_SIZE);
/*
* The 'ibm,kernel-dump' rtas node is present only if there is
* dump data waiting for us.
*/
fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
if (fdm_active) {
pr_info("Firmware-assisted dump is active.\n");
fadump_conf->dump_active = 1;
rtas_fadump_get_config(fadump_conf, (void *)__pa(fdm_active));
}
/* Get the sizes required to store dump data for the firmware provided
* dump sections.
* For each dump section type supported, a 32bit cell which defines
* the ID of a supported section followed by two 32 bit cells which
* gives the size of the section in bytes.
*/
sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
&size);
if (!sections)
return;
num_sections = size / (3 * sizeof(u32));
for (i = 0; i < num_sections; i++, sections += 3) {
u32 type = (u32)of_read_number(sections, 1);
switch (type) {
case RTAS_FADUMP_CPU_STATE_DATA:
fadump_conf->cpu_state_data_size =
of_read_ulong(§ions[1], 2);
break;
case RTAS_FADUMP_HPTE_REGION:
fadump_conf->hpte_region_size =
of_read_ulong(§ions[1], 2);
break;
}
}
}