// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020 Mellanox Technologies. */
#include <linux/refcount.h>
#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/rtnetlink.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#include <linux/notifier.h>
#include <net/netevent.h>
#include <net/arp.h>
#include "neigh.h"
#include "tc.h"
#include "en_rep.h"
#include "fs_core.h"
#include "diag/en_rep_tracepoint.h"
static unsigned long mlx5e_rep_ipv6_interval(void)
{
if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
return ~0UL;
}
static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
{
unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
}
void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
mlx5_fc_queue_stats_work(priv->mdev,
&neigh_update->neigh_stats_work,
neigh_update->min_interval);
}
static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
{
return refcount_inc_not_zero(&nhe->refcnt);
}
static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
{
if (refcount_dec_and_test(&nhe->refcnt)) {
mlx5e_rep_neigh_entry_remove(nhe);
kfree_rcu(nhe, rcu);
}
}
static struct mlx5e_neigh_hash_entry *
mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
struct mlx5e_neigh_hash_entry *nhe)
{
struct mlx5e_neigh_hash_entry *next = NULL;
rcu_read_lock();
for (next = nhe ?
list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
&nhe->neigh_list,
struct mlx5e_neigh_hash_entry,
neigh_list) :
list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
struct mlx5e_neigh_hash_entry,
neigh_list);
next;
next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
&next->neigh_list,
struct mlx5e_neigh_hash_entry,
neigh_list))
if (mlx5e_rep_neigh_entry_hold(next))
break;
rcu_read_unlock();
if (nhe)
mlx5e_rep_neigh_entry_release(nhe);
return next;
}
static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
{
struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
neigh_update.neigh_stats_work.work);
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_neigh_hash_entry *nhe = NULL;
rtnl_lock();
if (!list_empty(&rpriv->neigh_update.neigh_list))
mlx5e_rep_queue_neigh_stats_work(priv);
while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
mlx5e_tc_update_neigh_used_value(nhe);
rtnl_unlock();
}
struct neigh_update_work {
struct work_struct work;
struct neighbour *n;
struct mlx5e_neigh_hash_entry *nhe;
};
static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work)
{
neigh_release(update_work->n);
mlx5e_rep_neigh_entry_release(update_work->nhe);
kfree(update_work);
}
static void mlx5e_rep_neigh_update(struct work_struct *work)
{
struct neigh_update_work *update_work = container_of(work, struct neigh_update_work,
work);
struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
struct neighbour *n = update_work->n;
struct mlx5e_encap_entry *e = NULL;
bool neigh_connected, same_dev;
unsigned char ha[ETH_ALEN];
u8 nud_state, dead;
rtnl_lock();
/* If these parameters are changed after we release the lock,
* we'll receive another event letting us know about it.
* We use this lock to avoid inconsistency between the neigh validity
* and it's hw address.
*/
read_lock_bh(&n->lock);
memcpy(ha, n->ha, ETH_ALEN);
nud_state = n->nud_state;
dead = n->dead;
same_dev = READ_ONCE(nhe->neigh_dev) == n->dev;
read_unlock_bh(&n->lock);
neigh_connected = (nud_state & NUD_VALID) && !dead;
trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
if (!same_dev)
goto out;
/* mlx5e_get_next_init_encap() releases previous encap before returning
* the next one.
*/
while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
out:
rtnl_unlock();
mlx5e_release_neigh_update_work(update_work);
}
static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv,
struct neighbour *n)
{
struct neigh_update_work *update_work;
struct mlx5e_neigh_hash_entry *nhe;
struct mlx5e_neigh m_neigh = {};
update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC);
if (WARN_ON(!update_work))
return NULL;
m_neigh.family = n->ops->family;
memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
/* Obtain reference to nhe as last step in order not to release it in
* atomic context.
*/
rcu_read_lock();
nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
rcu_read_unlock();
if (!nhe) {
kfree(update_work);
return NULL;
}
INIT_WORK(&update_work->work, mlx5e_rep_neigh_update);
neigh_hold(n);
update_work->n = n;
update_work->nhe = nhe;
return update_work;
}
static int mlx5e_rep_netevent_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
neigh_update.netevent_nb);
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_neigh_hash_entry *nhe = NULL;
struct neigh_update_work *update_work;
struct neigh_parms *p;
struct neighbour *n;
bool found = false;
switch (event) {
case NETEVENT_NEIGH_UPDATE:
n = ptr;
#if IS_ENABLED(CONFIG_IPV6)
if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
#else
if (n->tbl != &arp_tbl)
#endif
return NOTIFY_DONE;
update_work = mlx5e_alloc_neigh_update_work(priv, n);
if (!update_work)
return NOTIFY_DONE;
queue_work(priv->wq, &update_work->work);
break;
case NETEVENT_DELAY_PROBE_TIME_UPDATE:
p = ptr;
/* We check the device is present since we don't care about
* changes in the default table, we only care about changes
* done per device delay prob time parameter.
*/
#if IS_ENABLED(CONFIG_IPV6)
if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
#else
if (!p->dev || p->tbl != &arp_tbl)
#endif
return NOTIFY_DONE;
rcu_read_lock();
list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
neigh_list) {
if (p->dev == READ_ONCE(nhe->neigh_dev)) {
found = true;
break;
}
}
rcu_read_unlock();
if (!found)
return NOTIFY_DONE;
neigh_update->min_interval = min_t(unsigned long,
NEIGH_VAR(p, DELAY_PROBE_TIME),
neigh_update->min_interval);
mlx5_fc_update_sampling_interval(priv->mdev,
neigh_update->min_interval);
break;
}
return NOTIFY_DONE;
}
static const struct rhashtable_params mlx5e_neigh_ht_params = {
.head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
.key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
.key_len = sizeof(struct mlx5e_neigh),
.automatic_shrinking = true,
};
int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
{
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
int err;
err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
if (err)
goto out_err;
INIT_LIST_HEAD(&neigh_update->neigh_list);
mutex_init(&neigh_update->encap_lock);
INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
mlx5e_rep_neigh_stats_work);
mlx5e_rep_neigh_update_init_interval(rpriv);
neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event;
err = register_netevent_notifier(&neigh_update->netevent_nb);
if (err)
goto out_notifier;
return 0;
out_notifier:
neigh_update->netevent_nb.notifier_call = NULL;
rhashtable_destroy(&neigh_update->neigh_ht);
out_err:
netdev_warn(rpriv->netdev,
"Failed to initialize neighbours handling for vport %d\n",
rpriv->rep->vport);
return err;
}
void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
{
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
if (!rpriv->neigh_update.netevent_nb.notifier_call)
return;
unregister_netevent_notifier(&neigh_update->netevent_nb);
flush_workqueue(priv->wq); /* flush neigh update works */
cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
mutex_destroy(&neigh_update->encap_lock);
rhashtable_destroy(&neigh_update->neigh_ht);
}
static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
struct mlx5e_neigh_hash_entry *nhe)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
int err;
err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
&nhe->rhash_node,
mlx5e_neigh_ht_params);
if (err)
return err;
list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
return err;
}
static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
{
struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
mutex_lock(&rpriv->neigh_update.encap_lock);
list_del_rcu(&nhe->neigh_list);
rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
&nhe->rhash_node,
mlx5e_neigh_ht_params);
mutex_unlock(&rpriv->neigh_update.encap_lock);
}
/* This function must only be called under the representor's encap_lock or
* inside rcu read lock section.
*/
struct mlx5e_neigh_hash_entry *
mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
struct mlx5e_neigh *m_neigh)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
struct mlx5e_neigh_hash_entry *nhe;
nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
mlx5e_neigh_ht_params);
return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
}
int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
struct mlx5e_neigh *m_neigh,
struct net_device *neigh_dev,
struct mlx5e_neigh_hash_entry **nhe)
{
int err;
*nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
if (!*nhe)
return -ENOMEM;
(*nhe)->priv = priv;
memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh));
spin_lock_init(&(*nhe)->encap_list_lock);
INIT_LIST_HEAD(&(*nhe)->encap_list);
refcount_set(&(*nhe)->refcnt, 1);
WRITE_ONCE((*nhe)->neigh_dev, neigh_dev);
err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
if (err)
goto out_free;
return 0;
out_free:
kfree(*nhe);
return err;
}