linux/drivers/net/ethernet/amd/pds_core/core.c

// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2023 Advanced Micro Devices, Inc */

#include <linux/pci.h>
#include <linux/vmalloc.h>

#include "core.h"

static BLOCKING_NOTIFIER_HEAD(pds_notify_chain);

int pdsc_register_notify(struct notifier_block *nb)
{
	return blocking_notifier_chain_register(&pds_notify_chain, nb);
}
EXPORT_SYMBOL_GPL(pdsc_register_notify);

void pdsc_unregister_notify(struct notifier_block *nb)
{
	blocking_notifier_chain_unregister(&pds_notify_chain, nb);
}
EXPORT_SYMBOL_GPL(pdsc_unregister_notify);

void pdsc_notify(unsigned long event, void *data)
{
	blocking_notifier_call_chain(&pds_notify_chain, event, data);
}

void pdsc_intr_free(struct pdsc *pdsc, int index)
{
	struct pdsc_intr_info *intr_info;

	if (index >= pdsc->nintrs || index < 0) {
		WARN(true, "bad intr index %d\n", index);
		return;
	}

	intr_info = &pdsc->intr_info[index];
	if (!intr_info->vector)
		return;
	dev_dbg(pdsc->dev, "%s: idx %d vec %d name %s\n",
		__func__, index, intr_info->vector, intr_info->name);

	pds_core_intr_mask(&pdsc->intr_ctrl[index], PDS_CORE_INTR_MASK_SET);
	pds_core_intr_clean(&pdsc->intr_ctrl[index]);

	free_irq(intr_info->vector, intr_info->data);

	memset(intr_info, 0, sizeof(*intr_info));
}

int pdsc_intr_alloc(struct pdsc *pdsc, char *name,
		    irq_handler_t handler, void *data)
{
	struct pdsc_intr_info *intr_info;
	unsigned int index;
	int err;

	/* Find the first available interrupt */
	for (index = 0; index < pdsc->nintrs; index++)
		if (!pdsc->intr_info[index].vector)
			break;
	if (index >= pdsc->nintrs) {
		dev_warn(pdsc->dev, "%s: no intr, index=%d nintrs=%d\n",
			 __func__, index, pdsc->nintrs);
		return -ENOSPC;
	}

	pds_core_intr_clean_flags(&pdsc->intr_ctrl[index],
				  PDS_CORE_INTR_CRED_RESET_COALESCE);

	intr_info = &pdsc->intr_info[index];

	intr_info->index = index;
	intr_info->data = data;
	strscpy(intr_info->name, name, sizeof(intr_info->name));

	/* Get the OS vector number for the interrupt */
	err = pci_irq_vector(pdsc->pdev, index);
	if (err < 0) {
		dev_err(pdsc->dev, "failed to get intr vector index %d: %pe\n",
			index, ERR_PTR(err));
		goto err_out_free_intr;
	}
	intr_info->vector = err;

	/* Init the device's intr mask */
	pds_core_intr_clean(&pdsc->intr_ctrl[index]);
	pds_core_intr_mask_assert(&pdsc->intr_ctrl[index], 1);
	pds_core_intr_mask(&pdsc->intr_ctrl[index], PDS_CORE_INTR_MASK_SET);

	/* Register the isr with a name */
	err = request_irq(intr_info->vector, handler, 0, intr_info->name, data);
	if (err) {
		dev_err(pdsc->dev, "failed to get intr irq vector %d: %pe\n",
			intr_info->vector, ERR_PTR(err));
		goto err_out_free_intr;
	}

	return index;

err_out_free_intr:
	pdsc_intr_free(pdsc, index);
	return err;
}

static void pdsc_qcq_intr_free(struct pdsc *pdsc, struct pdsc_qcq *qcq)
{
	if (!(qcq->flags & PDS_CORE_QCQ_F_INTR) ||
	    qcq->intx == PDS_CORE_INTR_INDEX_NOT_ASSIGNED)
		return;

	pdsc_intr_free(pdsc, qcq->intx);
	qcq->intx = PDS_CORE_INTR_INDEX_NOT_ASSIGNED;
}

static int pdsc_qcq_intr_alloc(struct pdsc *pdsc, struct pdsc_qcq *qcq)
{
	char name[PDSC_INTR_NAME_MAX_SZ];
	int index;

	if (!(qcq->flags & PDS_CORE_QCQ_F_INTR)) {
		qcq->intx = PDS_CORE_INTR_INDEX_NOT_ASSIGNED;
		return 0;
	}

	snprintf(name, sizeof(name), "%s-%d-%s",
		 PDS_CORE_DRV_NAME, pdsc->pdev->bus->number, qcq->q.name);
	index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, pdsc);
	if (index < 0)
		return index;
	qcq->intx = index;
	qcq->cq.bound_intr = &pdsc->intr_info[index];

	return 0;
}

void pdsc_qcq_free(struct pdsc *pdsc, struct pdsc_qcq *qcq)
{
	struct device *dev = pdsc->dev;

	if (!(qcq && qcq->pdsc))
		return;

	pdsc_debugfs_del_qcq(qcq);

	pdsc_qcq_intr_free(pdsc, qcq);

	if (qcq->q_base)
		dma_free_coherent(dev, qcq->q_size,
				  qcq->q_base, qcq->q_base_pa);

	if (qcq->cq_base)
		dma_free_coherent(dev, qcq->cq_size,
				  qcq->cq_base, qcq->cq_base_pa);

	vfree(qcq->cq.info);
	vfree(qcq->q.info);

	memset(qcq, 0, sizeof(*qcq));
}

static void pdsc_q_map(struct pdsc_queue *q, void *base, dma_addr_t base_pa)
{
	struct pdsc_q_info *cur;
	unsigned int i;

	q->base = base;
	q->base_pa = base_pa;

	for (i = 0, cur = q->info; i < q->num_descs; i++, cur++)
		cur->desc = base + (i * q->desc_size);
}

static void pdsc_cq_map(struct pdsc_cq *cq, void *base, dma_addr_t base_pa)
{
	struct pdsc_cq_info *cur;
	unsigned int i;

	cq->base = base;
	cq->base_pa = base_pa;

	for (i = 0, cur = cq->info; i < cq->num_descs; i++, cur++)
		cur->comp = base + (i * cq->desc_size);
}

int pdsc_qcq_alloc(struct pdsc *pdsc, unsigned int type, unsigned int index,
		   const char *name, unsigned int flags, unsigned int num_descs,
		   unsigned int desc_size, unsigned int cq_desc_size,
		   unsigned int pid, struct pdsc_qcq *qcq)
{
	struct device *dev = pdsc->dev;
	void *q_base, *cq_base;
	dma_addr_t cq_base_pa;
	dma_addr_t q_base_pa;
	int err;

	qcq->q.info = vcalloc(num_descs, sizeof(*qcq->q.info));
	if (!qcq->q.info) {
		err = -ENOMEM;
		goto err_out;
	}

	qcq->pdsc = pdsc;
	qcq->flags = flags;
	INIT_WORK(&qcq->work, pdsc_work_thread);

	qcq->q.type = type;
	qcq->q.index = index;
	qcq->q.num_descs = num_descs;
	qcq->q.desc_size = desc_size;
	qcq->q.tail_idx = 0;
	qcq->q.head_idx = 0;
	qcq->q.pid = pid;
	snprintf(qcq->q.name, sizeof(qcq->q.name), "%s%u", name, index);

	err = pdsc_qcq_intr_alloc(pdsc, qcq);
	if (err)
		goto err_out_free_q_info;

	qcq->cq.info = vcalloc(num_descs, sizeof(*qcq->cq.info));
	if (!qcq->cq.info) {
		err = -ENOMEM;
		goto err_out_free_irq;
	}

	qcq->cq.num_descs = num_descs;
	qcq->cq.desc_size = cq_desc_size;
	qcq->cq.tail_idx = 0;
	qcq->cq.done_color = 1;

	if (flags & PDS_CORE_QCQ_F_NOTIFYQ) {
		/* q & cq need to be contiguous in case of notifyq */
		qcq->q_size = PDS_PAGE_SIZE +
			      ALIGN(num_descs * desc_size, PDS_PAGE_SIZE) +
			      ALIGN(num_descs * cq_desc_size, PDS_PAGE_SIZE);
		qcq->q_base = dma_alloc_coherent(dev,
						 qcq->q_size + qcq->cq_size,
						 &qcq->q_base_pa,
						 GFP_KERNEL);
		if (!qcq->q_base) {
			err = -ENOMEM;
			goto err_out_free_cq_info;
		}
		q_base = PTR_ALIGN(qcq->q_base, PDS_PAGE_SIZE);
		q_base_pa = ALIGN(qcq->q_base_pa, PDS_PAGE_SIZE);
		pdsc_q_map(&qcq->q, q_base, q_base_pa);

		cq_base = PTR_ALIGN(q_base +
				    ALIGN(num_descs * desc_size, PDS_PAGE_SIZE),
				    PDS_PAGE_SIZE);
		cq_base_pa = ALIGN(qcq->q_base_pa +
				   ALIGN(num_descs * desc_size, PDS_PAGE_SIZE),
				   PDS_PAGE_SIZE);

	} else {
		/* q DMA descriptors */
		qcq->q_size = PDS_PAGE_SIZE + (num_descs * desc_size);
		qcq->q_base = dma_alloc_coherent(dev, qcq->q_size,
						 &qcq->q_base_pa,
						 GFP_KERNEL);
		if (!qcq->q_base) {
			err = -ENOMEM;
			goto err_out_free_cq_info;
		}
		q_base = PTR_ALIGN(qcq->q_base, PDS_PAGE_SIZE);
		q_base_pa = ALIGN(qcq->q_base_pa, PDS_PAGE_SIZE);
		pdsc_q_map(&qcq->q, q_base, q_base_pa);

		/* cq DMA descriptors */
		qcq->cq_size = PDS_PAGE_SIZE + (num_descs * cq_desc_size);
		qcq->cq_base = dma_alloc_coherent(dev, qcq->cq_size,
						  &qcq->cq_base_pa,
						  GFP_KERNEL);
		if (!qcq->cq_base) {
			err = -ENOMEM;
			goto err_out_free_q;
		}
		cq_base = PTR_ALIGN(qcq->cq_base, PDS_PAGE_SIZE);
		cq_base_pa = ALIGN(qcq->cq_base_pa, PDS_PAGE_SIZE);
	}

	pdsc_cq_map(&qcq->cq, cq_base, cq_base_pa);
	qcq->cq.bound_q = &qcq->q;

	pdsc_debugfs_add_qcq(pdsc, qcq);

	return 0;

err_out_free_q:
	dma_free_coherent(dev, qcq->q_size, qcq->q_base, qcq->q_base_pa);
err_out_free_cq_info:
	vfree(qcq->cq.info);
err_out_free_irq:
	pdsc_qcq_intr_free(pdsc, qcq);
err_out_free_q_info:
	vfree(qcq->q.info);
	memset(qcq, 0, sizeof(*qcq));
err_out:
	dev_err(dev, "qcq alloc of %s%d failed %d\n", name, index, err);
	return err;
}

static void pdsc_core_uninit(struct pdsc *pdsc)
{
	pdsc_qcq_free(pdsc, &pdsc->notifyqcq);
	pdsc_qcq_free(pdsc, &pdsc->adminqcq);

	if (pdsc->kern_dbpage) {
		iounmap(pdsc->kern_dbpage);
		pdsc->kern_dbpage = NULL;
	}
}

static int pdsc_core_init(struct pdsc *pdsc)
{
	union pds_core_dev_comp comp = {};
	union pds_core_dev_cmd cmd = {
		.init.opcode = PDS_CORE_CMD_INIT,
	};
	struct pds_core_dev_init_data_out cido;
	struct pds_core_dev_init_data_in cidi;
	u32 dbid_count;
	u32 dbpage_num;
	int numdescs;
	size_t sz;
	int err;

	/* Scale the descriptor ring length based on number of CPUs and VFs */
	numdescs = max_t(int, PDSC_ADMINQ_MIN_LENGTH, num_online_cpus());
	numdescs += 2 * pci_sriov_get_totalvfs(pdsc->pdev);
	numdescs = roundup_pow_of_two(numdescs);
	err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_ADMINQ, 0, "adminq",
			     PDS_CORE_QCQ_F_CORE | PDS_CORE_QCQ_F_INTR,
			     numdescs,
			     sizeof(union pds_core_adminq_cmd),
			     sizeof(union pds_core_adminq_comp),
			     0, &pdsc->adminqcq);
	if (err)
		return err;

	err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_NOTIFYQ, 0, "notifyq",
			     PDS_CORE_QCQ_F_NOTIFYQ,
			     PDSC_NOTIFYQ_LENGTH,
			     sizeof(struct pds_core_notifyq_cmd),
			     sizeof(union pds_core_notifyq_comp),
			     0, &pdsc->notifyqcq);
	if (err)
		goto err_out_uninit;

	cidi.adminq_q_base = cpu_to_le64(pdsc->adminqcq.q_base_pa);
	cidi.adminq_cq_base = cpu_to_le64(pdsc->adminqcq.cq_base_pa);
	cidi.notifyq_cq_base = cpu_to_le64(pdsc->notifyqcq.cq.base_pa);
	cidi.flags = cpu_to_le32(PDS_CORE_QINIT_F_IRQ | PDS_CORE_QINIT_F_ENA);
	cidi.intr_index = cpu_to_le16(pdsc->adminqcq.intx);
	cidi.adminq_ring_size = ilog2(pdsc->adminqcq.q.num_descs);
	cidi.notifyq_ring_size = ilog2(pdsc->notifyqcq.q.num_descs);

	mutex_lock(&pdsc->devcmd_lock);

	sz = min_t(size_t, sizeof(cidi), sizeof(pdsc->cmd_regs->data));
	memcpy_toio(&pdsc->cmd_regs->data, &cidi, sz);

	err = pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
	if (!err) {
		sz = min_t(size_t, sizeof(cido), sizeof(pdsc->cmd_regs->data));
		memcpy_fromio(&cido, &pdsc->cmd_regs->data, sz);
	}

	mutex_unlock(&pdsc->devcmd_lock);
	if (err) {
		dev_err(pdsc->dev, "Device init command failed: %pe\n",
			ERR_PTR(err));
		goto err_out_uninit;
	}

	pdsc->hw_index = le32_to_cpu(cido.core_hw_index);

	dbid_count = le32_to_cpu(pdsc->dev_ident.ndbpgs_per_lif);
	dbpage_num = pdsc->hw_index * dbid_count;
	pdsc->kern_dbpage = pdsc_map_dbpage(pdsc, dbpage_num);
	if (!pdsc->kern_dbpage) {
		dev_err(pdsc->dev, "Cannot map dbpage, aborting\n");
		err = -ENOMEM;
		goto err_out_uninit;
	}

	pdsc->adminqcq.q.hw_type = cido.adminq_hw_type;
	pdsc->adminqcq.q.hw_index = le32_to_cpu(cido.adminq_hw_index);
	pdsc->adminqcq.q.dbval = PDS_CORE_DBELL_QID(pdsc->adminqcq.q.hw_index);

	pdsc->notifyqcq.q.hw_type = cido.notifyq_hw_type;
	pdsc->notifyqcq.q.hw_index = le32_to_cpu(cido.notifyq_hw_index);
	pdsc->notifyqcq.q.dbval = PDS_CORE_DBELL_QID(pdsc->notifyqcq.q.hw_index);

	pdsc->last_eid = 0;

	return 0;

err_out_uninit:
	pdsc_core_uninit(pdsc);
	return err;
}

static struct pdsc_viftype pdsc_viftype_defaults[] = {
	[PDS_DEV_TYPE_VDPA] = { .name = PDS_DEV_TYPE_VDPA_STR,
				.vif_id = PDS_DEV_TYPE_VDPA,
				.dl_id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET },
	[PDS_DEV_TYPE_MAX] = {}
};

static int pdsc_viftypes_init(struct pdsc *pdsc)
{
	enum pds_core_vif_types vt;

	pdsc->viftype_status = kzalloc(sizeof(pdsc_viftype_defaults),
				       GFP_KERNEL);
	if (!pdsc->viftype_status)
		return -ENOMEM;

	for (vt = 0; vt < PDS_DEV_TYPE_MAX; vt++) {
		bool vt_support;

		if (!pdsc_viftype_defaults[vt].name)
			continue;

		/* Grab the defaults */
		pdsc->viftype_status[vt] = pdsc_viftype_defaults[vt];

		/* See what the Core device has for support */
		vt_support = !!le16_to_cpu(pdsc->dev_ident.vif_types[vt]);
		dev_dbg(pdsc->dev, "VIF %s is %ssupported\n",
			pdsc->viftype_status[vt].name,
			vt_support ? "" : "not ");

		pdsc->viftype_status[vt].supported = vt_support;
	}

	return 0;
}

int pdsc_setup(struct pdsc *pdsc, bool init)
{
	int err;

	err = pdsc_dev_init(pdsc);
	if (err)
		return err;

	/* Set up the Core with the AdminQ and NotifyQ info */
	err = pdsc_core_init(pdsc);
	if (err)
		goto err_out_teardown;

	/* Set up the VIFs */
	if (init) {
		err = pdsc_viftypes_init(pdsc);
		if (err)
			goto err_out_teardown;

		pdsc_debugfs_add_viftype(pdsc);
	}

	refcount_set(&pdsc->adminq_refcnt, 1);
	clear_bit(PDSC_S_FW_DEAD, &pdsc->state);
	return 0;

err_out_teardown:
	pdsc_teardown(pdsc, init);
	return err;
}

void pdsc_teardown(struct pdsc *pdsc, bool removing)
{
	if (!pdsc->pdev->is_virtfn)
		pdsc_devcmd_reset(pdsc);
	if (pdsc->adminqcq.work.func)
		cancel_work_sync(&pdsc->adminqcq.work);

	pdsc_core_uninit(pdsc);

	if (removing) {
		kfree(pdsc->viftype_status);
		pdsc->viftype_status = NULL;
	}

	pdsc_dev_uninit(pdsc);

	set_bit(PDSC_S_FW_DEAD, &pdsc->state);
}

int pdsc_start(struct pdsc *pdsc)
{
	pds_core_intr_mask(&pdsc->intr_ctrl[pdsc->adminqcq.intx],
			   PDS_CORE_INTR_MASK_CLEAR);

	return 0;
}

void pdsc_stop(struct pdsc *pdsc)
{
	int i;

	if (!pdsc->intr_info)
		return;

	/* Mask interrupts that are in use */
	for (i = 0; i < pdsc->nintrs; i++)
		if (pdsc->intr_info[i].vector)
			pds_core_intr_mask(&pdsc->intr_ctrl[i],
					   PDS_CORE_INTR_MASK_SET);
}

static void pdsc_adminq_wait_and_dec_once_unused(struct pdsc *pdsc)
{
	/* The driver initializes the adminq_refcnt to 1 when the adminq is
	 * allocated and ready for use. Other users/requesters will increment
	 * the refcnt while in use. If the refcnt is down to 1 then the adminq
	 * is not in use and the refcnt can be cleared and adminq freed. Before
	 * calling this function the driver will set PDSC_S_FW_DEAD, which
	 * prevent subsequent attempts to use the adminq and increment the
	 * refcnt to fail. This guarantees that this function will eventually
	 * exit.
	 */
	while (!refcount_dec_if_one(&pdsc->adminq_refcnt)) {
		dev_dbg_ratelimited(pdsc->dev, "%s: adminq in use\n",
				    __func__);
		cpu_relax();
	}
}

void pdsc_fw_down(struct pdsc *pdsc)
{
	union pds_core_notifyq_comp reset_event = {
		.reset.ecode = cpu_to_le16(PDS_EVENT_RESET),
		.reset.state = 0,
	};

	if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
		dev_warn(pdsc->dev, "%s: already happening\n", __func__);
		return;
	}

	if (pdsc->pdev->is_virtfn)
		return;

	pdsc_adminq_wait_and_dec_once_unused(pdsc);

	/* Notify clients of fw_down */
	if (pdsc->fw_reporter)
		devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
	pdsc_notify(PDS_EVENT_RESET, &reset_event);

	pdsc_stop(pdsc);
	pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
}

void pdsc_fw_up(struct pdsc *pdsc)
{
	union pds_core_notifyq_comp reset_event = {
		.reset.ecode = cpu_to_le16(PDS_EVENT_RESET),
		.reset.state = 1,
	};
	int err;

	if (!test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
		dev_err(pdsc->dev, "%s: fw not dead\n", __func__);
		return;
	}

	if (pdsc->pdev->is_virtfn) {
		clear_bit(PDSC_S_FW_DEAD, &pdsc->state);
		return;
	}

	err = pdsc_setup(pdsc, PDSC_SETUP_RECOVERY);
	if (err)
		goto err_out;

	err = pdsc_start(pdsc);
	if (err)
		goto err_out;

	/* Notify clients of fw_up */
	pdsc->fw_recoveries++;
	if (pdsc->fw_reporter)
		devlink_health_reporter_state_update(pdsc->fw_reporter,
						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
	pdsc_notify(PDS_EVENT_RESET, &reset_event);

	return;

err_out:
	pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
}

void pdsc_pci_reset_thread(struct work_struct *work)
{
	struct pdsc *pdsc = container_of(work, struct pdsc, pci_reset_work);
	struct pci_dev *pdev = pdsc->pdev;

	pci_dev_get(pdev);
	pci_reset_function(pdev);
	pci_dev_put(pdev);
}

static void pdsc_check_pci_health(struct pdsc *pdsc)
{
	u8 fw_status;

	/* some sort of teardown already in progress */
	if (!pdsc->info_regs)
		return;

	fw_status = ioread8(&pdsc->info_regs->fw_status);

	/* is PCI broken? */
	if (fw_status != PDS_RC_BAD_PCI)
		return;

	/* prevent deadlock between pdsc_reset_prepare and pdsc_health_thread */
	queue_work(pdsc->wq, &pdsc->pci_reset_work);
}

void pdsc_health_thread(struct work_struct *work)
{
	struct pdsc *pdsc = container_of(work, struct pdsc, health_work);
	unsigned long mask;
	bool healthy;

	mutex_lock(&pdsc->config_lock);

	/* Don't do a check when in a transition state */
	mask = BIT_ULL(PDSC_S_INITING_DRIVER) |
	       BIT_ULL(PDSC_S_STOPPING_DRIVER);
	if (pdsc->state & mask)
		goto out_unlock;

	healthy = pdsc_is_fw_good(pdsc);
	dev_dbg(pdsc->dev, "%s: health %d fw_status %#02x fw_heartbeat %d\n",
		__func__, healthy, pdsc->fw_status, pdsc->last_hb);

	if (test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
		if (healthy)
			pdsc_fw_up(pdsc);
	} else {
		if (!healthy)
			pdsc_fw_down(pdsc);
	}

	pdsc_check_pci_health(pdsc);

	pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;

out_unlock:
	mutex_unlock(&pdsc->config_lock);
}