// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020 Mellanox Technologies Ltd */
#include <linux/mlx5/driver.h>
#include <linux/mlx5/device.h>
#include "mlx5_core.h"
#include "dev.h"
#include "sf/vhca_event.h"
#include "sf/sf.h"
#include "sf/mlx5_ifc_vhca_event.h"
#include "ecpf.h"
#define CREATE_TRACE_POINTS
#include "diag/dev_tracepoint.h"
struct mlx5_sf_dev_table {
struct xarray devices;
phys_addr_t base_address;
u64 sf_bar_length;
struct notifier_block nb;
struct workqueue_struct *active_wq;
struct work_struct work;
u8 stop_active_wq:1;
struct mlx5_core_dev *dev;
};
struct mlx5_sf_dev_active_work_ctx {
struct work_struct work;
struct mlx5_vhca_state_event event;
struct mlx5_sf_dev_table *table;
int sf_index;
};
static bool mlx5_sf_dev_supported(const struct mlx5_core_dev *dev)
{
return MLX5_CAP_GEN(dev, sf) && mlx5_vhca_event_supported(dev);
}
bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev)
{
struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
return table && !xa_empty(&table->devices);
}
static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev);
struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
return sysfs_emit(buf, "%u\n", sf_dev->sfnum);
}
static DEVICE_ATTR_RO(sfnum);
static struct attribute *sf_device_attrs[] = {
&dev_attr_sfnum.attr,
NULL,
};
static const struct attribute_group sf_attr_group = {
.attrs = sf_device_attrs,
};
static const struct attribute_group *sf_attr_groups[2] = {
&sf_attr_group,
NULL
};
static void mlx5_sf_dev_release(struct device *device)
{
struct auxiliary_device *adev = container_of(device, struct auxiliary_device, dev);
struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
mlx5_adev_idx_free(adev->id);
kfree(sf_dev);
}
static void mlx5_sf_dev_remove_aux(struct mlx5_core_dev *dev,
struct mlx5_sf_dev *sf_dev)
{
int id;
id = sf_dev->adev.id;
trace_mlx5_sf_dev_del(dev, sf_dev, id);
auxiliary_device_delete(&sf_dev->adev);
auxiliary_device_uninit(&sf_dev->adev);
}
static void mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u16 fn_id, u32 sfnum)
{
struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
struct mlx5_sf_dev *sf_dev;
struct pci_dev *pdev;
int err;
int id;
id = mlx5_adev_idx_alloc();
if (id < 0) {
err = id;
goto add_err;
}
sf_dev = kzalloc(sizeof(*sf_dev), GFP_KERNEL);
if (!sf_dev) {
mlx5_adev_idx_free(id);
err = -ENOMEM;
goto add_err;
}
pdev = dev->pdev;
sf_dev->adev.id = id;
sf_dev->adev.name = MLX5_SF_DEV_ID_NAME;
sf_dev->adev.dev.release = mlx5_sf_dev_release;
sf_dev->adev.dev.parent = &pdev->dev;
sf_dev->adev.dev.groups = sf_attr_groups;
sf_dev->sfnum = sfnum;
sf_dev->parent_mdev = dev;
sf_dev->fn_id = fn_id;
sf_dev->bar_base_addr = table->base_address + (sf_index * table->sf_bar_length);
trace_mlx5_sf_dev_add(dev, sf_dev, id);
err = auxiliary_device_init(&sf_dev->adev);
if (err) {
mlx5_adev_idx_free(id);
kfree(sf_dev);
goto add_err;
}
err = auxiliary_device_add(&sf_dev->adev);
if (err) {
auxiliary_device_uninit(&sf_dev->adev);
goto add_err;
}
err = xa_insert(&table->devices, sf_index, sf_dev, GFP_KERNEL);
if (err)
goto xa_err;
return;
xa_err:
mlx5_sf_dev_remove_aux(dev, sf_dev);
add_err:
mlx5_core_err(dev, "SF DEV: fail device add for index=%d sfnum=%d err=%d\n",
sf_index, sfnum, err);
}
static void mlx5_sf_dev_del(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev, u16 sf_index)
{
struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
xa_erase(&table->devices, sf_index);
mlx5_sf_dev_remove_aux(dev, sf_dev);
}
static int
mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_code, void *data)
{
struct mlx5_sf_dev_table *table = container_of(nb, struct mlx5_sf_dev_table, nb);
const struct mlx5_vhca_state_event *event = data;
struct mlx5_sf_dev *sf_dev;
u16 max_functions;
u16 sf_index;
u16 base_id;
max_functions = mlx5_sf_max_functions(table->dev);
if (!max_functions)
return 0;
base_id = mlx5_sf_start_function_id(table->dev);
if (event->function_id < base_id || event->function_id >= (base_id + max_functions))
return 0;
sf_index = event->function_id - base_id;
sf_dev = xa_load(&table->devices, sf_index);
switch (event->new_vhca_state) {
case MLX5_VHCA_STATE_INVALID:
case MLX5_VHCA_STATE_ALLOCATED:
if (sf_dev)
mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
break;
case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
if (sf_dev)
mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
else
mlx5_core_err(table->dev,
"SF DEV: teardown state for invalid dev index=%d sfnum=0x%x\n",
sf_index, event->sw_function_id);
break;
case MLX5_VHCA_STATE_ACTIVE:
if (!sf_dev)
mlx5_sf_dev_add(table->dev, sf_index, event->function_id,
event->sw_function_id);
break;
default:
break;
}
return 0;
}
static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table)
{
struct mlx5_core_dev *dev = table->dev;
u16 max_functions;
u16 function_id;
int err = 0;
int i;
max_functions = mlx5_sf_max_functions(dev);
function_id = mlx5_sf_start_function_id(dev);
/* Arm the vhca context as the vhca event notifier */
for (i = 0; i < max_functions; i++) {
err = mlx5_vhca_event_arm(dev, function_id);
if (err)
return err;
function_id++;
}
return 0;
}
static void mlx5_sf_dev_add_active_work(struct work_struct *_work)
{
struct mlx5_sf_dev_active_work_ctx *work_ctx;
work_ctx = container_of(_work, struct mlx5_sf_dev_active_work_ctx, work);
if (work_ctx->table->stop_active_wq)
goto out;
/* Don't probe device which is already probe */
if (!xa_load(&work_ctx->table->devices, work_ctx->sf_index))
mlx5_sf_dev_add(work_ctx->table->dev, work_ctx->sf_index,
work_ctx->event.function_id, work_ctx->event.sw_function_id);
/* There is a race where SF got inactive after the query
* above. e.g.: the query returns that the state of the
* SF is active, and after that the eswitch manager set it to
* inactive.
* This case cannot be managed in SW, since the probing of the
* SF is on one system, and the inactivation is on a different
* system.
* If the inactive is done after the SF perform init_hca(),
* the SF will fully probe and then removed. If it was
* done before init_hca(), the SF probe will fail.
*/
out:
kfree(work_ctx);
}
/* In case SFs are generated externally, probe active SFs */
static void mlx5_sf_dev_queue_active_works(struct work_struct *_work)
{
struct mlx5_sf_dev_table *table = container_of(_work, struct mlx5_sf_dev_table, work);
u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
struct mlx5_sf_dev_active_work_ctx *work_ctx;
struct mlx5_core_dev *dev = table->dev;
u16 max_functions;
u16 function_id;
u16 sw_func_id;
int err = 0;
int wq_idx;
u8 state;
int i;
max_functions = mlx5_sf_max_functions(dev);
function_id = mlx5_sf_start_function_id(dev);
for (i = 0; i < max_functions; i++, function_id++) {
if (table->stop_active_wq)
return;
err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out));
if (err)
/* A failure of specific vhca doesn't mean others will
* fail as well.
*/
continue;
state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
if (state != MLX5_VHCA_STATE_ACTIVE)
continue;
sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id);
work_ctx = kzalloc(sizeof(*work_ctx), GFP_KERNEL);
if (!work_ctx)
return;
INIT_WORK(&work_ctx->work, &mlx5_sf_dev_add_active_work);
work_ctx->event.function_id = function_id;
work_ctx->event.sw_function_id = sw_func_id;
work_ctx->table = table;
work_ctx->sf_index = i;
wq_idx = work_ctx->event.function_id % MLX5_DEV_MAX_WQS;
mlx5_vhca_events_work_enqueue(dev, wq_idx, &work_ctx->work);
}
}
/* In case SFs are generated externally, probe active SFs */
static int mlx5_sf_dev_create_active_works(struct mlx5_sf_dev_table *table)
{
if (MLX5_CAP_GEN(table->dev, eswitch_manager))
return 0; /* the table is local */
/* Use a workqueue to probe active SFs, which are in large
* quantity and may take up to minutes to probe.
*/
table->active_wq = create_singlethread_workqueue("mlx5_active_sf");
if (!table->active_wq)
return -ENOMEM;
INIT_WORK(&table->work, &mlx5_sf_dev_queue_active_works);
queue_work(table->active_wq, &table->work);
return 0;
}
static void mlx5_sf_dev_destroy_active_works(struct mlx5_sf_dev_table *table)
{
if (table->active_wq) {
table->stop_active_wq = true;
destroy_workqueue(table->active_wq);
}
}
void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
{
struct mlx5_sf_dev_table *table;
int err;
if (!mlx5_sf_dev_supported(dev))
return;
table = kzalloc(sizeof(*table), GFP_KERNEL);
if (!table) {
err = -ENOMEM;
goto table_err;
}
table->nb.notifier_call = mlx5_sf_dev_state_change_handler;
table->dev = dev;
table->sf_bar_length = 1 << (MLX5_CAP_GEN(dev, log_min_sf_size) + 12);
table->base_address = pci_resource_start(dev->pdev, 2);
xa_init(&table->devices);
dev->priv.sf_dev_table = table;
err = mlx5_vhca_event_notifier_register(dev, &table->nb);
if (err)
goto vhca_err;
err = mlx5_sf_dev_create_active_works(table);
if (err)
goto add_active_err;
err = mlx5_sf_dev_vhca_arm_all(table);
if (err)
goto arm_err;
return;
arm_err:
mlx5_sf_dev_destroy_active_works(table);
add_active_err:
mlx5_vhca_event_notifier_unregister(dev, &table->nb);
mlx5_vhca_event_work_queues_flush(dev);
vhca_err:
kfree(table);
dev->priv.sf_dev_table = NULL;
table_err:
mlx5_core_err(dev, "SF DEV table create err = %d\n", err);
}
static void mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table *table)
{
struct mlx5_sf_dev *sf_dev;
unsigned long index;
xa_for_each(&table->devices, index, sf_dev) {
xa_erase(&table->devices, index);
mlx5_sf_dev_remove_aux(table->dev, sf_dev);
}
}
void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev)
{
struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
if (!table)
return;
mlx5_sf_dev_destroy_active_works(table);
mlx5_vhca_event_notifier_unregister(dev, &table->nb);
mlx5_vhca_event_work_queues_flush(dev);
/* Now that event handler is not running, it is safe to destroy
* the sf device without race.
*/
mlx5_sf_dev_destroy_all(table);
WARN_ON(!xa_empty(&table->devices));
kfree(table);
dev->priv.sf_dev_table = NULL;
}