linux/net/smc/smc_llc.c

// SPDX-License-Identifier: GPL-2.0
/*
 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
 *
 *  Link Layer Control (LLC)
 *
 *  Copyright IBM Corp. 2016
 *
 *  Author(s):  Klaus Wacker <[email protected]>
 *              Ursula Braun <[email protected]>
 */

#include <net/tcp.h>
#include <rdma/ib_verbs.h>

#include "smc.h"
#include "smc_core.h"
#include "smc_clc.h"
#include "smc_llc.h"
#include "smc_pnet.h"

#define SMC_LLC_DATA_LEN		40

struct smc_llc_hdr {
	struct smc_wr_rx_hdr common;
	union {
		struct {
			u8 length;	/* 44 */
	#if defined(__BIG_ENDIAN_BITFIELD)
			u8 reserved:4,
			   add_link_rej_rsn:4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
			u8 add_link_rej_rsn:4,
			   reserved:4;
#endif
		};
		u16 length_v2;	/* 44 - 8192*/
	};
	u8 flags;
} __packed;		/* format defined in
			 * IBM Shared Memory Communications Version 2
			 * (https://www.ibm.com/support/pages/node/6326337)
			 */

#define SMC_LLC_FLAG_NO_RMBE_EYEC	0x03

struct smc_llc_msg_confirm_link {	/* type 0x01 */
	struct smc_llc_hdr hd;
	u8 sender_mac[ETH_ALEN];
	u8 sender_gid[SMC_GID_SIZE];
	u8 sender_qp_num[3];
	u8 link_num;
	u8 link_uid[SMC_LGR_ID_SIZE];
	u8 max_links;
	u8 max_conns;
	u8 reserved[8];
};

#define SMC_LLC_FLAG_ADD_LNK_REJ	0x40
#define SMC_LLC_REJ_RSN_NO_ALT_PATH	1

struct smc_llc_msg_add_link {		/* type 0x02 */
	struct smc_llc_hdr hd;
	u8 sender_mac[ETH_ALEN];
	u8 reserved2[2];
	u8 sender_gid[SMC_GID_SIZE];
	u8 sender_qp_num[3];
	u8 link_num;
#if defined(__BIG_ENDIAN_BITFIELD)
	u8 reserved3 : 4,
	   qp_mtu   : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
	u8 qp_mtu   : 4,
	   reserved3 : 4;
#endif
	u8 initial_psn[3];
	u8 reserved[8];
};

struct smc_llc_msg_add_link_cont_rt {
	__be32 rmb_key;
	__be32 rmb_key_new;
	__be64 rmb_vaddr_new;
};

struct smc_llc_msg_add_link_v2_ext {
#if defined(__BIG_ENDIAN_BITFIELD)
	u8 v2_direct : 1,
	   reserved  : 7;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
	u8 reserved  : 7,
	   v2_direct : 1;
#endif
	u8 reserved2;
	u8 client_target_gid[SMC_GID_SIZE];
	u8 reserved3[8];
	u16 num_rkeys;
	struct smc_llc_msg_add_link_cont_rt rt[];
} __packed;		/* format defined in
			 * IBM Shared Memory Communications Version 2
			 * (https://www.ibm.com/support/pages/node/6326337)
			 */

struct smc_llc_msg_req_add_link_v2 {
	struct smc_llc_hdr hd;
	u8 reserved[20];
	u8 gid_cnt;
	u8 reserved2[3];
	u8 gid[][SMC_GID_SIZE];
};

#define SMC_LLC_RKEYS_PER_CONT_MSG	2

struct smc_llc_msg_add_link_cont {	/* type 0x03 */
	struct smc_llc_hdr hd;
	u8 link_num;
	u8 num_rkeys;
	u8 reserved2[2];
	struct smc_llc_msg_add_link_cont_rt rt[SMC_LLC_RKEYS_PER_CONT_MSG];
	u8 reserved[4];
} __packed;			/* format defined in RFC7609 */

#define SMC_LLC_FLAG_DEL_LINK_ALL	0x40
#define SMC_LLC_FLAG_DEL_LINK_ORDERLY	0x20

struct smc_llc_msg_del_link {		/* type 0x04 */
	struct smc_llc_hdr hd;
	u8 link_num;
	__be32 reason;
	u8 reserved[35];
} __packed;			/* format defined in RFC7609 */

struct smc_llc_msg_test_link {		/* type 0x07 */
	struct smc_llc_hdr hd;
	u8 user_data[16];
	u8 reserved[24];
};

struct smc_rmb_rtoken {
	union {
		u8 num_rkeys;	/* first rtoken byte of CONFIRM LINK msg */
				/* is actually the num of rtokens, first */
				/* rtoken is always for the current link */
		u8 link_id;	/* link id of the rtoken */
	};
	__be32 rmb_key;
	__be64 rmb_vaddr;
} __packed;			/* format defined in RFC7609 */

#define SMC_LLC_RKEYS_PER_MSG		3
#define SMC_LLC_RKEYS_PER_MSG_V2	255

struct smc_llc_msg_confirm_rkey {	/* type 0x06 */
	struct smc_llc_hdr hd;
	struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
	u8 reserved;
};

#define SMC_LLC_DEL_RKEY_MAX	8
#define SMC_LLC_FLAG_RKEY_RETRY	0x10
#define SMC_LLC_FLAG_RKEY_NEG	0x20

struct smc_llc_msg_delete_rkey {	/* type 0x09 */
	struct smc_llc_hdr hd;
	u8 num_rkeys;
	u8 err_mask;
	u8 reserved[2];
	__be32 rkey[8];
	u8 reserved2[4];
};

struct smc_llc_msg_delete_rkey_v2 {	/* type 0x29 */
	struct smc_llc_hdr hd;
	u8 num_rkeys;
	u8 num_inval_rkeys;
	u8 reserved[2];
	__be32 rkey[];
};

union smc_llc_msg {
	struct smc_llc_msg_confirm_link confirm_link;
	struct smc_llc_msg_add_link add_link;
	struct smc_llc_msg_req_add_link_v2 req_add_link;
	struct smc_llc_msg_add_link_cont add_link_cont;
	struct smc_llc_msg_del_link delete_link;

	struct smc_llc_msg_confirm_rkey confirm_rkey;
	struct smc_llc_msg_delete_rkey delete_rkey;

	struct smc_llc_msg_test_link test_link;
	struct {
		struct smc_llc_hdr hdr;
		u8 data[SMC_LLC_DATA_LEN];
	} raw;
};

#define SMC_LLC_FLAG_RESP		0x80

struct smc_llc_qentry {
	struct list_head list;
	struct smc_link *link;
	union smc_llc_msg msg;
};

static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc);

struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow)
{
	struct smc_llc_qentry *qentry = flow->qentry;

	flow->qentry = NULL;
	return qentry;
}

void smc_llc_flow_qentry_del(struct smc_llc_flow *flow)
{
	struct smc_llc_qentry *qentry;

	if (flow->qentry) {
		qentry = flow->qentry;
		flow->qentry = NULL;
		kfree(qentry);
	}
}

static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow,
					   struct smc_llc_qentry *qentry)
{
	flow->qentry = qentry;
}

static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type,
				  struct smc_llc_qentry *qentry)
{
	u8 msg_type = qentry->msg.raw.hdr.common.llc_type;

	if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) &&
	    flow_type != msg_type && !lgr->delayed_event) {
		lgr->delayed_event = qentry;
		return;
	}
	/* drop parallel or already-in-progress llc requests */
	if (flow_type != msg_type)
		pr_warn_once("smc: SMC-R lg %*phN net %llu dropped parallel "
			     "LLC msg: msg %d flow %d role %d\n",
			     SMC_LGR_ID_SIZE, &lgr->id,
			     lgr->net->net_cookie,
			     qentry->msg.raw.hdr.common.type,
			     flow_type, lgr->role);
	kfree(qentry);
}

/* try to start a new llc flow, initiated by an incoming llc msg */
static bool smc_llc_flow_start(struct smc_llc_flow *flow,
			       struct smc_llc_qentry *qentry)
{
	struct smc_link_group *lgr = qentry->link->lgr;

	spin_lock_bh(&lgr->llc_flow_lock);
	if (flow->type) {
		/* a flow is already active */
		smc_llc_flow_parallel(lgr, flow->type, qentry);
		spin_unlock_bh(&lgr->llc_flow_lock);
		return false;
	}
	switch (qentry->msg.raw.hdr.common.llc_type) {
	case SMC_LLC_ADD_LINK:
		flow->type = SMC_LLC_FLOW_ADD_LINK;
		break;
	case SMC_LLC_DELETE_LINK:
		flow->type = SMC_LLC_FLOW_DEL_LINK;
		break;
	case SMC_LLC_CONFIRM_RKEY:
	case SMC_LLC_DELETE_RKEY:
		flow->type = SMC_LLC_FLOW_RKEY;
		break;
	default:
		flow->type = SMC_LLC_FLOW_NONE;
	}
	smc_llc_flow_qentry_set(flow, qentry);
	spin_unlock_bh(&lgr->llc_flow_lock);
	return true;
}

/* start a new local llc flow, wait till current flow finished */
int smc_llc_flow_initiate(struct smc_link_group *lgr,
			  enum smc_llc_flowtype type)
{
	enum smc_llc_flowtype allowed_remote = SMC_LLC_FLOW_NONE;
	int rc;

	/* all flows except confirm_rkey and delete_rkey are exclusive,
	 * confirm/delete rkey flows can run concurrently (local and remote)
	 */
	if (type == SMC_LLC_FLOW_RKEY)
		allowed_remote = SMC_LLC_FLOW_RKEY;
again:
	if (list_empty(&lgr->list))
		return -ENODEV;
	spin_lock_bh(&lgr->llc_flow_lock);
	if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
	    (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
	     lgr->llc_flow_rmt.type == allowed_remote)) {
		lgr->llc_flow_lcl.type = type;
		spin_unlock_bh(&lgr->llc_flow_lock);
		return 0;
	}
	spin_unlock_bh(&lgr->llc_flow_lock);
	rc = wait_event_timeout(lgr->llc_flow_waiter, (list_empty(&lgr->list) ||
				(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
				 (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
				  lgr->llc_flow_rmt.type == allowed_remote))),
				SMC_LLC_WAIT_TIME * 10);
	if (!rc)
		return -ETIMEDOUT;
	goto again;
}

/* finish the current llc flow */
void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow)
{
	spin_lock_bh(&lgr->llc_flow_lock);
	memset(flow, 0, sizeof(*flow));
	flow->type = SMC_LLC_FLOW_NONE;
	spin_unlock_bh(&lgr->llc_flow_lock);
	if (!list_empty(&lgr->list) && lgr->delayed_event &&
	    flow == &lgr->llc_flow_lcl)
		schedule_work(&lgr->llc_event_work);
	else
		wake_up(&lgr->llc_flow_waiter);
}

/* lnk is optional and used for early wakeup when link goes down, useful in
 * cases where we wait for a response on the link after we sent a request
 */
struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
				    struct smc_link *lnk,
				    int time_out, u8 exp_msg)
{
	struct smc_llc_flow *flow = &lgr->llc_flow_lcl;
	u8 rcv_msg;

	wait_event_timeout(lgr->llc_msg_waiter,
			   (flow->qentry ||
			    (lnk && !smc_link_usable(lnk)) ||
			    list_empty(&lgr->list)),
			   time_out);
	if (!flow->qentry ||
	    (lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) {
		smc_llc_flow_qentry_del(flow);
		goto out;
	}
	rcv_msg = flow->qentry->msg.raw.hdr.common.llc_type;
	if (exp_msg && rcv_msg != exp_msg) {
		if (exp_msg == SMC_LLC_ADD_LINK &&
		    rcv_msg == SMC_LLC_DELETE_LINK) {
			/* flow_start will delay the unexpected msg */
			smc_llc_flow_start(&lgr->llc_flow_lcl,
					   smc_llc_flow_qentry_clr(flow));
			return NULL;
		}
		pr_warn_once("smc: SMC-R lg %*phN net %llu dropped unexpected LLC msg: "
			     "msg %d exp %d flow %d role %d flags %x\n",
			     SMC_LGR_ID_SIZE, &lgr->id, lgr->net->net_cookie,
			     rcv_msg, exp_msg,
			     flow->type, lgr->role,
			     flow->qentry->msg.raw.hdr.flags);
		smc_llc_flow_qentry_del(flow);
	}
out:
	return flow->qentry;
}

/********************************** send *************************************/

struct smc_llc_tx_pend {
};

/* handler for send/transmission completion of an LLC msg */
static void smc_llc_tx_handler(struct smc_wr_tx_pend_priv *pend,
			       struct smc_link *link,
			       enum ib_wc_status wc_status)
{
	/* future work: handle wc_status error for recovery and failover */
}

/**
 * smc_llc_add_pending_send() - add LLC control message to pending WQE transmits
 * @link: Pointer to SMC link used for sending LLC control message.
 * @wr_buf: Out variable returning pointer to work request payload buffer.
 * @pend: Out variable returning pointer to private pending WR tracking.
 *	  It's the context the transmit complete handler will get.
 *
 * Reserves and pre-fills an entry for a pending work request send/tx.
 * Used by mid-level smc_llc_send_msg() to prepare for later actual send/tx.
 * Can sleep due to smc_get_ctrl_buf (if not in softirq context).
 *
 * Return: 0 on success, otherwise an error value.
 */
static int smc_llc_add_pending_send(struct smc_link *link,
				    struct smc_wr_buf **wr_buf,
				    struct smc_wr_tx_pend_priv **pend)
{
	int rc;

	rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL,
				     pend);
	if (rc < 0)
		return rc;
	BUILD_BUG_ON_MSG(
		sizeof(union smc_llc_msg) > SMC_WR_BUF_SIZE,
		"must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_llc_msg)");
	BUILD_BUG_ON_MSG(
		sizeof(union smc_llc_msg) != SMC_WR_TX_SIZE,
		"must adapt SMC_WR_TX_SIZE to sizeof(struct smc_llc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
	BUILD_BUG_ON_MSG(
		sizeof(struct smc_llc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
		"must increase SMC_WR_TX_PEND_PRIV_SIZE to at least sizeof(struct smc_llc_tx_pend)");
	return 0;
}

static int smc_llc_add_pending_send_v2(struct smc_link *link,
				       struct smc_wr_v2_buf **wr_buf,
				       struct smc_wr_tx_pend_priv **pend)
{
	int rc;

	rc = smc_wr_tx_get_v2_slot(link, smc_llc_tx_handler, wr_buf, pend);
	if (rc < 0)
		return rc;
	return 0;
}

static void smc_llc_init_msg_hdr(struct smc_llc_hdr *hdr,
				 struct smc_link_group *lgr, size_t len)
{
	if (lgr->smc_version == SMC_V2) {
		hdr->common.llc_version = SMC_V2;
		hdr->length_v2 = len;
	} else {
		hdr->common.llc_version = 0;
		hdr->length = len;
	}
}

/* high-level API to send LLC confirm link */
int smc_llc_send_confirm_link(struct smc_link *link,
			      enum smc_llc_reqresp reqresp)
{
	struct smc_llc_msg_confirm_link *confllc;
	struct smc_wr_tx_pend_priv *pend;
	struct smc_wr_buf *wr_buf;
	int rc;

	if (!smc_wr_tx_link_hold(link))
		return -ENOLINK;
	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
	if (rc)
		goto put_out;
	confllc = (struct smc_llc_msg_confirm_link *)wr_buf;
	memset(confllc, 0, sizeof(*confllc));
	confllc->hd.common.llc_type = SMC_LLC_CONFIRM_LINK;
	smc_llc_init_msg_hdr(&confllc->hd, link->lgr, sizeof(*confllc));
	confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
	if (reqresp == SMC_LLC_RESP)
		confllc->hd.flags |= SMC_LLC_FLAG_RESP;
	memcpy(confllc->sender_mac, link->smcibdev->mac[link->ibport - 1],
	       ETH_ALEN);
	memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE);
	hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
	confllc->link_num = link->link_id;
	memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE);
	confllc->max_links = SMC_LINKS_ADD_LNK_MAX;
	if (link->lgr->smc_version == SMC_V2 &&
	    link->lgr->peer_smc_release >= SMC_RELEASE_1) {
		confllc->max_conns = link->lgr->max_conns;
		confllc->max_links = link->lgr->max_links;
	}
	/* send llc message */
	rc = smc_wr_tx_send(link, pend);
put_out:
	smc_wr_tx_link_put(link);
	return rc;
}

/* send LLC confirm rkey request */
static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
				     struct smc_buf_desc *rmb_desc)
{
	struct smc_llc_msg_confirm_rkey *rkeyllc;
	struct smc_wr_tx_pend_priv *pend;
	struct smc_wr_buf *wr_buf;
	struct smc_link *link;
	int i, rc, rtok_ix;

	if (!smc_wr_tx_link_hold(send_link))
		return -ENOLINK;
	rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend);
	if (rc)
		goto put_out;
	rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
	memset(rkeyllc, 0, sizeof(*rkeyllc));
	rkeyllc->hd.common.llc_type = SMC_LLC_CONFIRM_RKEY;
	smc_llc_init_msg_hdr(&rkeyllc->hd, send_link->lgr, sizeof(*rkeyllc));

	rtok_ix = 1;
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		link = &send_link->lgr->lnk[i];
		if (smc_link_active(link) && link != send_link) {
			rkeyllc->rtoken[rtok_ix].link_id = link->link_id;
			rkeyllc->rtoken[rtok_ix].rmb_key =
				htonl(rmb_desc->mr[link->link_idx]->rkey);
			rkeyllc->rtoken[rtok_ix].rmb_vaddr = rmb_desc->is_vm ?
				cpu_to_be64((uintptr_t)rmb_desc->cpu_addr) :
				cpu_to_be64((u64)sg_dma_address
					    (rmb_desc->sgt[link->link_idx].sgl));
			rtok_ix++;
		}
	}
	/* rkey of send_link is in rtoken[0] */
	rkeyllc->rtoken[0].num_rkeys = rtok_ix - 1;
	rkeyllc->rtoken[0].rmb_key =
		htonl(rmb_desc->mr[send_link->link_idx]->rkey);
	rkeyllc->rtoken[0].rmb_vaddr = rmb_desc->is_vm ?
		cpu_to_be64((uintptr_t)rmb_desc->cpu_addr) :
		cpu_to_be64((u64)sg_dma_address
			    (rmb_desc->sgt[send_link->link_idx].sgl));
	/* send llc message */
	rc = smc_wr_tx_send(send_link, pend);
put_out:
	smc_wr_tx_link_put(send_link);
	return rc;
}

/* send LLC delete rkey request */
static int smc_llc_send_delete_rkey(struct smc_link *link,
				    struct smc_buf_desc *rmb_desc)
{
	struct smc_llc_msg_delete_rkey *rkeyllc;
	struct smc_wr_tx_pend_priv *pend;
	struct smc_wr_buf *wr_buf;
	int rc;

	if (!smc_wr_tx_link_hold(link))
		return -ENOLINK;
	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
	if (rc)
		goto put_out;
	rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf;
	memset(rkeyllc, 0, sizeof(*rkeyllc));
	rkeyllc->hd.common.llc_type = SMC_LLC_DELETE_RKEY;
	smc_llc_init_msg_hdr(&rkeyllc->hd, link->lgr, sizeof(*rkeyllc));
	rkeyllc->num_rkeys = 1;
	rkeyllc->rkey[0] = htonl(rmb_desc->mr[link->link_idx]->rkey);
	/* send llc message */
	rc = smc_wr_tx_send(link, pend);
put_out:
	smc_wr_tx_link_put(link);
	return rc;
}

/* return first buffer from any of the next buf lists */
static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr,
						  int *buf_lst)
{
	struct smc_buf_desc *buf_pos;

	while (*buf_lst < SMC_RMBE_SIZES) {
		buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst],
						   struct smc_buf_desc, list);
		if (buf_pos)
			return buf_pos;
		(*buf_lst)++;
	}
	return NULL;
}

/* return next rmb from buffer lists */
static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr,
						 int *buf_lst,
						 struct smc_buf_desc *buf_pos)
{
	struct smc_buf_desc *buf_next;

	if (!buf_pos)
		return _smc_llc_get_next_rmb(lgr, buf_lst);

	if (list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) {
		(*buf_lst)++;
		return _smc_llc_get_next_rmb(lgr, buf_lst);
	}
	buf_next = list_next_entry(buf_pos, list);
	return buf_next;
}

static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr,
						  int *buf_lst)
{
	*buf_lst = 0;
	return smc_llc_get_next_rmb(lgr, buf_lst, NULL);
}

static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
			       struct smc_link *link, struct smc_link *link_new)
{
	struct smc_link_group *lgr = link->lgr;
	struct smc_buf_desc *buf_pos;
	int prim_lnk_idx, lnk_idx, i;
	struct smc_buf_desc *rmb;
	int len = sizeof(*ext);
	int buf_lst;

	ext->v2_direct = !lgr->uses_gateway;
	memcpy(ext->client_target_gid, link_new->gid, SMC_GID_SIZE);

	prim_lnk_idx = link->link_idx;
	lnk_idx = link_new->link_idx;
	down_write(&lgr->rmbs_lock);
	ext->num_rkeys = lgr->conns_num;
	if (!ext->num_rkeys)
		goto out;
	buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
	for (i = 0; i < ext->num_rkeys; i++) {
		while (buf_pos && !(buf_pos)->used)
			buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos);
		if (!buf_pos)
			break;
		rmb = buf_pos;
		ext->rt[i].rmb_key = htonl(rmb->mr[prim_lnk_idx]->rkey);
		ext->rt[i].rmb_key_new = htonl(rmb->mr[lnk_idx]->rkey);
		ext->rt[i].rmb_vaddr_new = rmb->is_vm ?
			cpu_to_be64((uintptr_t)rmb->cpu_addr) :
			cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));
		buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos);
	}
	len += i * sizeof(ext->rt[0]);
out:
	up_write(&lgr->rmbs_lock);
	return len;
}

/* send ADD LINK request or response */
int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
			  struct smc_link *link_new,
			  enum smc_llc_reqresp reqresp)
{
	struct smc_llc_msg_add_link_v2_ext *ext = NULL;
	struct smc_llc_msg_add_link *addllc;
	struct smc_wr_tx_pend_priv *pend;
	int len = sizeof(*addllc);
	int rc;

	if (!smc_wr_tx_link_hold(link))
		return -ENOLINK;
	if (link->lgr->smc_version == SMC_V2) {
		struct smc_wr_v2_buf *wr_buf;

		rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend);
		if (rc)
			goto put_out;
		addllc = (struct smc_llc_msg_add_link *)wr_buf;
		ext = (struct smc_llc_msg_add_link_v2_ext *)
						&wr_buf->raw[sizeof(*addllc)];
		memset(ext, 0, SMC_WR_TX_SIZE);
	} else {
		struct smc_wr_buf *wr_buf;

		rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
		if (rc)
			goto put_out;
		addllc = (struct smc_llc_msg_add_link *)wr_buf;
	}

	memset(addllc, 0, sizeof(*addllc));
	addllc->hd.common.llc_type = SMC_LLC_ADD_LINK;
	if (reqresp == SMC_LLC_RESP)
		addllc->hd.flags |= SMC_LLC_FLAG_RESP;
	memcpy(addllc->sender_mac, mac, ETH_ALEN);
	memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
	if (link_new) {
		addllc->link_num = link_new->link_id;
		hton24(addllc->sender_qp_num, link_new->roce_qp->qp_num);
		hton24(addllc->initial_psn, link_new->psn_initial);
		if (reqresp == SMC_LLC_REQ)
			addllc->qp_mtu = link_new->path_mtu;
		else
			addllc->qp_mtu = min(link_new->path_mtu,
					     link_new->peer_mtu);
	}
	if (ext && link_new)
		len += smc_llc_fill_ext_v2(ext, link, link_new);
	smc_llc_init_msg_hdr(&addllc->hd, link->lgr, len);
	/* send llc message */
	if (link->lgr->smc_version == SMC_V2)
		rc = smc_wr_tx_v2_send(link, pend, len);
	else
		rc = smc_wr_tx_send(link, pend);
put_out:
	smc_wr_tx_link_put(link);
	return rc;
}

/* send DELETE LINK request or response */
int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
			     enum smc_llc_reqresp reqresp, bool orderly,
			     u32 reason)
{
	struct smc_llc_msg_del_link *delllc;
	struct smc_wr_tx_pend_priv *pend;
	struct smc_wr_buf *wr_buf;
	int rc;

	if (!smc_wr_tx_link_hold(link))
		return -ENOLINK;
	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
	if (rc)
		goto put_out;
	delllc = (struct smc_llc_msg_del_link *)wr_buf;

	memset(delllc, 0, sizeof(*delllc));
	delllc->hd.common.llc_type = SMC_LLC_DELETE_LINK;
	smc_llc_init_msg_hdr(&delllc->hd, link->lgr, sizeof(*delllc));
	if (reqresp == SMC_LLC_RESP)
		delllc->hd.flags |= SMC_LLC_FLAG_RESP;
	if (orderly)
		delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
	if (link_del_id)
		delllc->link_num = link_del_id;
	else
		delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
	delllc->reason = htonl(reason);
	/* send llc message */
	rc = smc_wr_tx_send(link, pend);
put_out:
	smc_wr_tx_link_put(link);
	return rc;
}

/* send LLC test link request */
static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
{
	struct smc_llc_msg_test_link *testllc;
	struct smc_wr_tx_pend_priv *pend;
	struct smc_wr_buf *wr_buf;
	int rc;

	if (!smc_wr_tx_link_hold(link))
		return -ENOLINK;
	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
	if (rc)
		goto put_out;
	testllc = (struct smc_llc_msg_test_link *)wr_buf;
	memset(testllc, 0, sizeof(*testllc));
	testllc->hd.common.llc_type = SMC_LLC_TEST_LINK;
	smc_llc_init_msg_hdr(&testllc->hd, link->lgr, sizeof(*testllc));
	memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
	/* send llc message */
	rc = smc_wr_tx_send(link, pend);
put_out:
	smc_wr_tx_link_put(link);
	return rc;
}

/* schedule an llc send on link, may wait for buffers */
static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
{
	struct smc_wr_tx_pend_priv *pend;
	struct smc_wr_buf *wr_buf;
	int rc;

	if (!smc_wr_tx_link_hold(link))
		return -ENOLINK;
	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
	if (rc)
		goto put_out;
	memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
	rc = smc_wr_tx_send(link, pend);
put_out:
	smc_wr_tx_link_put(link);
	return rc;
}

/* schedule an llc send on link, may wait for buffers,
 * and wait for send completion notification.
 * @return 0 on success
 */
static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf)
{
	struct smc_wr_tx_pend_priv *pend;
	struct smc_wr_buf *wr_buf;
	int rc;

	if (!smc_wr_tx_link_hold(link))
		return -ENOLINK;
	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
	if (rc)
		goto put_out;
	memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
	rc = smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME);
put_out:
	smc_wr_tx_link_put(link);
	return rc;
}

/********************************* receive ***********************************/

static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
				  enum smc_lgr_type lgr_new_t)
{
	int i;

	if (lgr->type == SMC_LGR_SYMMETRIC ||
	    (lgr->type != SMC_LGR_SINGLE &&
	     (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
	      lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)))
		return -EMLINK;

	if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
	    lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) {
		for (i = SMC_LINKS_PER_LGR_MAX - 1; i >= 0; i--)
			if (lgr->lnk[i].state == SMC_LNK_UNUSED)
				return i;
	} else {
		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
			if (lgr->lnk[i].state == SMC_LNK_UNUSED)
				return i;
	}
	return -EMLINK;
}

/* send one add_link_continue msg */
static int smc_llc_add_link_cont(struct smc_link *link,
				 struct smc_link *link_new, u8 *num_rkeys_todo,
				 int *buf_lst, struct smc_buf_desc **buf_pos)
{
	struct smc_llc_msg_add_link_cont *addc_llc;
	struct smc_link_group *lgr = link->lgr;
	int prim_lnk_idx, lnk_idx, i, rc;
	struct smc_wr_tx_pend_priv *pend;
	struct smc_wr_buf *wr_buf;
	struct smc_buf_desc *rmb;
	u8 n;

	if (!smc_wr_tx_link_hold(link))
		return -ENOLINK;
	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
	if (rc)
		goto put_out;
	addc_llc = (struct smc_llc_msg_add_link_cont *)wr_buf;
	memset(addc_llc, 0, sizeof(*addc_llc));

	prim_lnk_idx = link->link_idx;
	lnk_idx = link_new->link_idx;
	addc_llc->link_num = link_new->link_id;
	addc_llc->num_rkeys = *num_rkeys_todo;
	n = *num_rkeys_todo;
	for (i = 0; i < min_t(u8, n, SMC_LLC_RKEYS_PER_CONT_MSG); i++) {
		while (*buf_pos && !(*buf_pos)->used)
			*buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
		if (!*buf_pos) {
			addc_llc->num_rkeys = addc_llc->num_rkeys -
					      *num_rkeys_todo;
			*num_rkeys_todo = 0;
			break;
		}
		rmb = *buf_pos;

		addc_llc->rt[i].rmb_key = htonl(rmb->mr[prim_lnk_idx]->rkey);
		addc_llc->rt[i].rmb_key_new = htonl(rmb->mr[lnk_idx]->rkey);
		addc_llc->rt[i].rmb_vaddr_new = rmb->is_vm ?
			cpu_to_be64((uintptr_t)rmb->cpu_addr) :
			cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));

		(*num_rkeys_todo)--;
		*buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
	}
	addc_llc->hd.common.llc_type = SMC_LLC_ADD_LINK_CONT;
	addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
	if (lgr->role == SMC_CLNT)
		addc_llc->hd.flags |= SMC_LLC_FLAG_RESP;
	rc = smc_wr_tx_send(link, pend);
put_out:
	smc_wr_tx_link_put(link);
	return rc;
}

static int smc_llc_cli_rkey_exchange(struct smc_link *link,
				     struct smc_link *link_new)
{
	struct smc_llc_msg_add_link_cont *addc_llc;
	struct smc_link_group *lgr = link->lgr;
	u8 max, num_rkeys_send, num_rkeys_recv;
	struct smc_llc_qentry *qentry;
	struct smc_buf_desc *buf_pos;
	int buf_lst;
	int rc = 0;
	int i;

	down_write(&lgr->rmbs_lock);
	num_rkeys_send = lgr->conns_num;
	buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
	do {
		qentry = smc_llc_wait(lgr, NULL, SMC_LLC_WAIT_TIME,
				      SMC_LLC_ADD_LINK_CONT);
		if (!qentry) {
			rc = -ETIMEDOUT;
			break;
		}
		addc_llc = &qentry->msg.add_link_cont;
		num_rkeys_recv = addc_llc->num_rkeys;
		max = min_t(u8, num_rkeys_recv, SMC_LLC_RKEYS_PER_CONT_MSG);
		for (i = 0; i < max; i++) {
			smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
				       addc_llc->rt[i].rmb_key,
				       addc_llc->rt[i].rmb_vaddr_new,
				       addc_llc->rt[i].rmb_key_new);
			num_rkeys_recv--;
		}
		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
		rc = smc_llc_add_link_cont(link, link_new, &num_rkeys_send,
					   &buf_lst, &buf_pos);
		if (rc)
			break;
	} while (num_rkeys_send || num_rkeys_recv);

	up_write(&lgr->rmbs_lock);
	return rc;
}

/* prepare and send an add link reject response */
static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry)
{
	qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
	qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
	qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
	smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr,
			     sizeof(qentry->msg));
	return smc_llc_send_message(qentry->link, &qentry->msg);
}

static int smc_llc_cli_conf_link(struct smc_link *link,
				 struct smc_init_info *ini,
				 struct smc_link *link_new,
				 enum smc_lgr_type lgr_new_t)
{
	struct smc_link_group *lgr = link->lgr;
	struct smc_llc_qentry *qentry = NULL;
	int rc = 0;

	/* receive CONFIRM LINK request over RoCE fabric */
	qentry = smc_llc_wait(lgr, NULL, SMC_LLC_WAIT_FIRST_TIME, 0);
	if (!qentry) {
		rc = smc_llc_send_delete_link(link, link_new->link_id,
					      SMC_LLC_REQ, false,
					      SMC_LLC_DEL_LOST_PATH);
		return -ENOLINK;
	}
	if (qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) {
		/* received DELETE_LINK instead */
		qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
		smc_llc_send_message(link, &qentry->msg);
		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
		return -ENOLINK;
	}
	smc_llc_save_peer_uid(qentry);
	smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);

	rc = smc_ib_modify_qp_rts(link_new);
	if (rc) {
		smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
					 false, SMC_LLC_DEL_LOST_PATH);
		return -ENOLINK;
	}
	smc_wr_remember_qp_attr(link_new);

	rc = smcr_buf_reg_lgr(link_new);
	if (rc) {
		smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
					 false, SMC_LLC_DEL_LOST_PATH);
		return -ENOLINK;
	}

	/* send CONFIRM LINK response over RoCE fabric */
	rc = smc_llc_send_confirm_link(link_new, SMC_LLC_RESP);
	if (rc) {
		smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
					 false, SMC_LLC_DEL_LOST_PATH);
		return -ENOLINK;
	}
	smc_llc_link_active(link_new);
	if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
	    lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)
		smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx);
	else
		smcr_lgr_set_type(lgr, lgr_new_t);
	return 0;
}

static void smc_llc_save_add_link_rkeys(struct smc_link *link,
					struct smc_link *link_new)
{
	struct smc_llc_msg_add_link_v2_ext *ext;
	struct smc_link_group *lgr = link->lgr;
	int max, i;

	ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 +
						     SMC_WR_TX_SIZE);
	max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
	down_write(&lgr->rmbs_lock);
	for (i = 0; i < max; i++) {
		smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
			       ext->rt[i].rmb_key,
			       ext->rt[i].rmb_vaddr_new,
			       ext->rt[i].rmb_key_new);
	}
	up_write(&lgr->rmbs_lock);
}

static void smc_llc_save_add_link_info(struct smc_link *link,
				       struct smc_llc_msg_add_link *add_llc)
{
	link->peer_qpn = ntoh24(add_llc->sender_qp_num);
	memcpy(link->peer_gid, add_llc->sender_gid, SMC_GID_SIZE);
	memcpy(link->peer_mac, add_llc->sender_mac, ETH_ALEN);
	link->peer_psn = ntoh24(add_llc->initial_psn);
	link->peer_mtu = add_llc->qp_mtu;
}

/* as an SMC client, process an add link request */
int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
{
	struct smc_llc_msg_add_link *llc = &qentry->msg.add_link;
	enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
	struct smc_link_group *lgr = smc_get_lgr(link);
	struct smc_init_info *ini = NULL;
	struct smc_link *lnk_new = NULL;
	int lnk_idx, rc = 0;

	if (!llc->qp_mtu)
		goto out_reject;

	ini = kzalloc(sizeof(*ini), GFP_KERNEL);
	if (!ini) {
		rc = -ENOMEM;
		goto out_reject;
	}

	if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) {
		rc = 0;
		goto out_reject;
	}

	ini->vlan_id = lgr->vlan_id;
	if (lgr->smc_version == SMC_V2) {
		ini->check_smcrv2 = true;
		ini->smcrv2.saddr = lgr->saddr;
		ini->smcrv2.daddr = smc_ib_gid_to_ipv4(llc->sender_gid);
	}
	smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
	if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
	    (lgr->smc_version == SMC_V2 ||
	     !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN))) {
		if (!ini->ib_dev && !ini->smcrv2.ib_dev_v2)
			goto out_reject;
		lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
	}
	if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) {
		lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
		ini->smcrv2.ib_dev_v2 = link->smcibdev;
		ini->smcrv2.ib_port_v2 = link->ibport;
	} else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) {
		lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
		ini->ib_dev = link->smcibdev;
		ini->ib_port = link->ibport;
	}
	lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
	if (lnk_idx < 0)
		goto out_reject;
	lnk_new = &lgr->lnk[lnk_idx];
	rc = smcr_link_init(lgr, lnk_new, lnk_idx, ini);
	if (rc)
		goto out_reject;
	smc_llc_save_add_link_info(lnk_new, llc);
	lnk_new->link_id = llc->link_num;	/* SMC server assigns link id */
	smc_llc_link_set_uid(lnk_new);

	rc = smc_ib_ready_link(lnk_new);
	if (rc)
		goto out_clear_lnk;

	rc = smcr_buf_map_lgr(lnk_new);
	if (rc)
		goto out_clear_lnk;

	rc = smc_llc_send_add_link(link,
				   lnk_new->smcibdev->mac[lnk_new->ibport - 1],
				   lnk_new->gid, lnk_new, SMC_LLC_RESP);
	if (rc)
		goto out_clear_lnk;
	if (lgr->smc_version == SMC_V2) {
		smc_llc_save_add_link_rkeys(link, lnk_new);
	} else {
		rc = smc_llc_cli_rkey_exchange(link, lnk_new);
		if (rc) {
			rc = 0;
			goto out_clear_lnk;
		}
	}
	rc = smc_llc_cli_conf_link(link, ini, lnk_new, lgr_new_t);
	if (!rc)
		goto out;
out_clear_lnk:
	lnk_new->state = SMC_LNK_INACTIVE;
	smcr_link_clear(lnk_new, false);
out_reject:
	smc_llc_cli_add_link_reject(qentry);
out:
	kfree(ini);
	kfree(qentry);
	return rc;
}

static void smc_llc_send_request_add_link(struct smc_link *link)
{
	struct smc_llc_msg_req_add_link_v2 *llc;
	struct smc_wr_tx_pend_priv *pend;
	struct smc_wr_v2_buf *wr_buf;
	struct smc_gidlist gidlist;
	int rc, len, i;

	if (!smc_wr_tx_link_hold(link))
		return;
	if (link->lgr->type == SMC_LGR_SYMMETRIC ||
	    link->lgr->type == SMC_LGR_ASYMMETRIC_PEER)
		goto put_out;

	smc_fill_gid_list(link->lgr, &gidlist, link->smcibdev, link->gid);
	if (gidlist.len <= 1)
		goto put_out;

	rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend);
	if (rc)
		goto put_out;
	llc = (struct smc_llc_msg_req_add_link_v2 *)wr_buf;
	memset(llc, 0, SMC_WR_TX_SIZE);

	llc->hd.common.llc_type = SMC_LLC_REQ_ADD_LINK;
	for (i = 0; i < gidlist.len; i++)
		memcpy(llc->gid[i], gidlist.list[i], sizeof(gidlist.list[0]));
	llc->gid_cnt = gidlist.len;
	len = sizeof(*llc) + (gidlist.len * sizeof(gidlist.list[0]));
	smc_llc_init_msg_hdr(&llc->hd, link->lgr, len);
	rc = smc_wr_tx_v2_send(link, pend, len);
	if (!rc)
		/* set REQ_ADD_LINK flow and wait for response from peer */
		link->lgr->llc_flow_lcl.type = SMC_LLC_FLOW_REQ_ADD_LINK;
put_out:
	smc_wr_tx_link_put(link);
}

/* as an SMC client, invite server to start the add_link processing */
static void smc_llc_cli_add_link_invite(struct smc_link *link,
					struct smc_llc_qentry *qentry)
{
	struct smc_link_group *lgr = smc_get_lgr(link);
	struct smc_init_info *ini = NULL;

	if (lgr->smc_version == SMC_V2) {
		smc_llc_send_request_add_link(link);
		goto out;
	}

	if (lgr->type == SMC_LGR_SYMMETRIC ||
	    lgr->type == SMC_LGR_ASYMMETRIC_PEER)
		goto out;

	if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1)
		goto out;

	ini = kzalloc(sizeof(*ini), GFP_KERNEL);
	if (!ini)
		goto out;

	ini->vlan_id = lgr->vlan_id;
	smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
	if (!ini->ib_dev)
		goto out;

	smc_llc_send_add_link(link, ini->ib_dev->mac[ini->ib_port - 1],
			      ini->ib_gid, NULL, SMC_LLC_REQ);
out:
	kfree(ini);
	kfree(qentry);
}

static bool smc_llc_is_empty_llc_message(union smc_llc_msg *llc)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(llc->raw.data); i++)
		if (llc->raw.data[i])
			return false;
	return true;
}

static bool smc_llc_is_local_add_link(union smc_llc_msg *llc)
{
	if (llc->raw.hdr.common.llc_type == SMC_LLC_ADD_LINK &&
	    smc_llc_is_empty_llc_message(llc))
		return true;
	return false;
}

static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
{
	struct smc_llc_qentry *qentry;

	qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);

	down_write(&lgr->llc_conf_mutex);
	if (smc_llc_is_local_add_link(&qentry->msg))
		smc_llc_cli_add_link_invite(qentry->link, qentry);
	else
		smc_llc_cli_add_link(qentry->link, qentry);
	up_write(&lgr->llc_conf_mutex);
}

static int smc_llc_active_link_count(struct smc_link_group *lgr)
{
	int i, link_count = 0;

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		if (!smc_link_active(&lgr->lnk[i]))
			continue;
		link_count++;
	}
	return link_count;
}

/* find the asymmetric link when 3 links are established  */
static struct smc_link *smc_llc_find_asym_link(struct smc_link_group *lgr)
{
	int asym_idx = -ENOENT;
	int i, j, k;
	bool found;

	/* determine asymmetric link */
	found = false;
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
			if (!smc_link_usable(&lgr->lnk[i]) ||
			    !smc_link_usable(&lgr->lnk[j]))
				continue;
			if (!memcmp(lgr->lnk[i].gid, lgr->lnk[j].gid,
				    SMC_GID_SIZE)) {
				found = true;	/* asym_lnk is i or j */
				break;
			}
		}
		if (found)
			break;
	}
	if (!found)
		goto out; /* no asymmetric link */
	for (k = 0; k < SMC_LINKS_PER_LGR_MAX; k++) {
		if (!smc_link_usable(&lgr->lnk[k]))
			continue;
		if (k != i &&
		    !memcmp(lgr->lnk[i].peer_gid, lgr->lnk[k].peer_gid,
			    SMC_GID_SIZE)) {
			asym_idx = i;
			break;
		}
		if (k != j &&
		    !memcmp(lgr->lnk[j].peer_gid, lgr->lnk[k].peer_gid,
			    SMC_GID_SIZE)) {
			asym_idx = j;
			break;
		}
	}
out:
	return (asym_idx < 0) ? NULL : &lgr->lnk[asym_idx];
}

static void smc_llc_delete_asym_link(struct smc_link_group *lgr)
{
	struct smc_link *lnk_new = NULL, *lnk_asym;
	struct smc_llc_qentry *qentry;
	int rc;

	lnk_asym = smc_llc_find_asym_link(lgr);
	if (!lnk_asym)
		return; /* no asymmetric link */
	if (!smc_link_downing(&lnk_asym->state))
		return;
	lnk_new = smc_switch_conns(lgr, lnk_asym, false);
	smc_wr_tx_wait_no_pending_sends(lnk_asym);
	if (!lnk_new)
		goto out_free;
	/* change flow type from ADD_LINK into DEL_LINK */
	lgr->llc_flow_lcl.type = SMC_LLC_FLOW_DEL_LINK;
	rc = smc_llc_send_delete_link(lnk_new, lnk_asym->link_id, SMC_LLC_REQ,
				      true, SMC_LLC_DEL_NO_ASYM_NEEDED);
	if (rc) {
		smcr_link_down_cond(lnk_new);
		goto out_free;
	}
	qentry = smc_llc_wait(lgr, lnk_new, SMC_LLC_WAIT_TIME,
			      SMC_LLC_DELETE_LINK);
	if (!qentry) {
		smcr_link_down_cond(lnk_new);
		goto out_free;
	}
	smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
out_free:
	smcr_link_clear(lnk_asym, true);
}

static int smc_llc_srv_rkey_exchange(struct smc_link *link,
				     struct smc_link *link_new)
{
	struct smc_llc_msg_add_link_cont *addc_llc;
	struct smc_link_group *lgr = link->lgr;
	u8 max, num_rkeys_send, num_rkeys_recv;
	struct smc_llc_qentry *qentry = NULL;
	struct smc_buf_desc *buf_pos;
	int buf_lst;
	int rc = 0;
	int i;

	down_write(&lgr->rmbs_lock);
	num_rkeys_send = lgr->conns_num;
	buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
	do {
		smc_llc_add_link_cont(link, link_new, &num_rkeys_send,
				      &buf_lst, &buf_pos);
		qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME,
				      SMC_LLC_ADD_LINK_CONT);
		if (!qentry) {
			rc = -ETIMEDOUT;
			goto out;
		}
		addc_llc = &qentry->msg.add_link_cont;
		num_rkeys_recv = addc_llc->num_rkeys;
		max = min_t(u8, num_rkeys_recv, SMC_LLC_RKEYS_PER_CONT_MSG);
		for (i = 0; i < max; i++) {
			smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
				       addc_llc->rt[i].rmb_key,
				       addc_llc->rt[i].rmb_vaddr_new,
				       addc_llc->rt[i].rmb_key_new);
			num_rkeys_recv--;
		}
		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
	} while (num_rkeys_send || num_rkeys_recv);
out:
	up_write(&lgr->rmbs_lock);
	return rc;
}

static int smc_llc_srv_conf_link(struct smc_link *link,
				 struct smc_link *link_new,
				 enum smc_lgr_type lgr_new_t)
{
	struct smc_link_group *lgr = link->lgr;
	struct smc_llc_qentry *qentry = NULL;
	int rc;

	/* send CONFIRM LINK request over the RoCE fabric */
	rc = smc_llc_send_confirm_link(link_new, SMC_LLC_REQ);
	if (rc)
		return -ENOLINK;
	/* receive CONFIRM LINK response over the RoCE fabric */
	qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0);
	if (!qentry ||
	    qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) {
		/* send DELETE LINK */
		smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
					 false, SMC_LLC_DEL_LOST_PATH);
		if (qentry)
			smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
		return -ENOLINK;
	}
	smc_llc_save_peer_uid(qentry);
	smc_llc_link_active(link_new);
	if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
	    lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)
		smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx);
	else
		smcr_lgr_set_type(lgr, lgr_new_t);
	smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
	return 0;
}

static void smc_llc_send_req_add_link_response(struct smc_llc_qentry *qentry)
{
	qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
	smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr,
			     sizeof(qentry->msg));
	memset(&qentry->msg.raw.data, 0, sizeof(qentry->msg.raw.data));
	smc_llc_send_message(qentry->link, &qentry->msg);
}

int smc_llc_srv_add_link(struct smc_link *link,
			 struct smc_llc_qentry *req_qentry)
{
	enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
	struct smc_link_group *lgr = link->lgr;
	struct smc_llc_msg_add_link *add_llc;
	struct smc_llc_qentry *qentry = NULL;
	bool send_req_add_link_resp = false;
	struct smc_link *link_new = NULL;
	struct smc_init_info *ini = NULL;
	int lnk_idx, rc = 0;

	if (req_qentry &&
	    req_qentry->msg.raw.hdr.common.llc_type == SMC_LLC_REQ_ADD_LINK)
		send_req_add_link_resp = true;

	ini = kzalloc(sizeof(*ini), GFP_KERNEL);
	if (!ini) {
		rc = -ENOMEM;
		goto out;
	}

	if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) {
		rc = 0;
		goto out;
	}

	/* ignore client add link recommendation, start new flow */
	ini->vlan_id = lgr->vlan_id;
	if (lgr->smc_version == SMC_V2) {
		ini->check_smcrv2 = true;
		ini->smcrv2.saddr = lgr->saddr;
		if (send_req_add_link_resp) {
			struct smc_llc_msg_req_add_link_v2 *req_add =
				&req_qentry->msg.req_add_link;

			ini->smcrv2.daddr = smc_ib_gid_to_ipv4(req_add->gid[0]);
		}
	}
	smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
	if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) {
		lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
		ini->smcrv2.ib_dev_v2 = link->smcibdev;
		ini->smcrv2.ib_port_v2 = link->ibport;
	} else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) {
		lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
		ini->ib_dev = link->smcibdev;
		ini->ib_port = link->ibport;
	}
	lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
	if (lnk_idx < 0) {
		rc = 0;
		goto out;
	}

	rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, ini);
	if (rc)
		goto out;
	link_new = &lgr->lnk[lnk_idx];

	rc = smcr_buf_map_lgr(link_new);
	if (rc)
		goto out_err;

	rc = smc_llc_send_add_link(link,
				   link_new->smcibdev->mac[link_new->ibport-1],
				   link_new->gid, link_new, SMC_LLC_REQ);
	if (rc)
		goto out_err;
	send_req_add_link_resp = false;
	/* receive ADD LINK response over the RoCE fabric */
	qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK);
	if (!qentry) {
		rc = -ETIMEDOUT;
		goto out_err;
	}
	add_llc = &qentry->msg.add_link;
	if (add_llc->hd.flags & SMC_LLC_FLAG_ADD_LNK_REJ) {
		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
		rc = -ENOLINK;
		goto out_err;
	}
	if (lgr->type == SMC_LGR_SINGLE &&
	    (!memcmp(add_llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
	     (lgr->smc_version == SMC_V2 ||
	      !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN)))) {
		lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
	}
	smc_llc_save_add_link_info(link_new, add_llc);
	smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);

	rc = smc_ib_ready_link(link_new);
	if (rc)
		goto out_err;
	rc = smcr_buf_reg_lgr(link_new);
	if (rc)
		goto out_err;
	if (lgr->smc_version == SMC_V2) {
		smc_llc_save_add_link_rkeys(link, link_new);
	} else {
		rc = smc_llc_srv_rkey_exchange(link, link_new);
		if (rc)
			goto out_err;
	}
	rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t);
	if (rc)
		goto out_err;
	kfree(ini);
	return 0;
out_err:
	if (link_new) {
		link_new->state = SMC_LNK_INACTIVE;
		smcr_link_clear(link_new, false);
	}
out:
	kfree(ini);
	if (send_req_add_link_resp)
		smc_llc_send_req_add_link_response(req_qentry);
	return rc;
}

static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
{
	struct smc_link *link = lgr->llc_flow_lcl.qentry->link;
	struct smc_llc_qentry *qentry;
	int rc;

	qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);

	down_write(&lgr->llc_conf_mutex);
	rc = smc_llc_srv_add_link(link, qentry);
	if (!rc && lgr->type == SMC_LGR_SYMMETRIC) {
		/* delete any asymmetric link */
		smc_llc_delete_asym_link(lgr);
	}
	up_write(&lgr->llc_conf_mutex);
	kfree(qentry);
}

/* enqueue a local add_link req to trigger a new add_link flow */
void smc_llc_add_link_local(struct smc_link *link)
{
	struct smc_llc_msg_add_link add_llc = {};

	add_llc.hd.common.llc_type = SMC_LLC_ADD_LINK;
	smc_llc_init_msg_hdr(&add_llc.hd, link->lgr, sizeof(add_llc));
	/* no dev and port needed */
	smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc);
}

/* worker to process an add link message */
static void smc_llc_add_link_work(struct work_struct *work)
{
	struct smc_link_group *lgr = container_of(work, struct smc_link_group,
						  llc_add_link_work);

	if (list_empty(&lgr->list)) {
		/* link group is terminating */
		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
		goto out;
	}

	if (lgr->role == SMC_CLNT)
		smc_llc_process_cli_add_link(lgr);
	else
		smc_llc_process_srv_add_link(lgr);
out:
	if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_REQ_ADD_LINK)
		smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}

/* enqueue a local del_link msg to trigger a new del_link flow,
 * called only for role SMC_SERV
 */
void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id)
{
	struct smc_llc_msg_del_link del_llc = {};

	del_llc.hd.common.llc_type = SMC_LLC_DELETE_LINK;
	smc_llc_init_msg_hdr(&del_llc.hd, link->lgr, sizeof(del_llc));
	del_llc.link_num = del_link_id;
	del_llc.reason = htonl(SMC_LLC_DEL_LOST_PATH);
	del_llc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
	smc_llc_enqueue(link, (union smc_llc_msg *)&del_llc);
}

static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
{
	struct smc_link *lnk_del = NULL, *lnk_asym, *lnk;
	struct smc_llc_msg_del_link *del_llc;
	struct smc_llc_qentry *qentry;
	int active_links;
	int lnk_idx;

	qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
	lnk = qentry->link;
	del_llc = &qentry->msg.delete_link;

	if (del_llc->hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) {
		smc_lgr_terminate_sched(lgr);
		goto out;
	}
	down_write(&lgr->llc_conf_mutex);
	/* delete single link */
	for (lnk_idx = 0; lnk_idx < SMC_LINKS_PER_LGR_MAX; lnk_idx++) {
		if (lgr->lnk[lnk_idx].link_id != del_llc->link_num)
			continue;
		lnk_del = &lgr->lnk[lnk_idx];
		break;
	}
	del_llc->hd.flags |= SMC_LLC_FLAG_RESP;
	if (!lnk_del) {
		/* link was not found */
		del_llc->reason = htonl(SMC_LLC_DEL_NOLNK);
		smc_llc_send_message(lnk, &qentry->msg);
		goto out_unlock;
	}
	lnk_asym = smc_llc_find_asym_link(lgr);

	del_llc->reason = 0;
	smc_llc_send_message(lnk, &qentry->msg); /* response */

	if (smc_link_downing(&lnk_del->state))
		smc_switch_conns(lgr, lnk_del, false);
	smcr_link_clear(lnk_del, true);

	active_links = smc_llc_active_link_count(lgr);
	if (lnk_del == lnk_asym) {
		/* expected deletion of asym link, don't change lgr state */
	} else if (active_links == 1) {
		smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
	} else if (!active_links) {
		smcr_lgr_set_type(lgr, SMC_LGR_NONE);
		smc_lgr_terminate_sched(lgr);
	}
out_unlock:
	up_write(&lgr->llc_conf_mutex);
out:
	kfree(qentry);
}

/* try to send a DELETE LINK ALL request on any active link,
 * waiting for send completion
 */
void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
{
	struct smc_llc_msg_del_link delllc = {};
	int i;

	delllc.hd.common.llc_type = SMC_LLC_DELETE_LINK;
	smc_llc_init_msg_hdr(&delllc.hd, lgr, sizeof(delllc));
	if (ord)
		delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
	delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
	delllc.reason = htonl(rsn);

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		if (!smc_link_sendable(&lgr->lnk[i]))
			continue;
		if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc))
			break;
	}
}

static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
{
	struct smc_llc_msg_del_link *del_llc;
	struct smc_link *lnk, *lnk_del;
	struct smc_llc_qentry *qentry;
	int active_links;
	int i;

	down_write(&lgr->llc_conf_mutex);
	qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
	lnk = qentry->link;
	del_llc = &qentry->msg.delete_link;

	if (qentry->msg.delete_link.hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) {
		/* delete entire lgr */
		smc_llc_send_link_delete_all(lgr, true, ntohl(
					      qentry->msg.delete_link.reason));
		smc_lgr_terminate_sched(lgr);
		goto out;
	}
	/* delete single link */
	lnk_del = NULL;
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		if (lgr->lnk[i].link_id == del_llc->link_num) {
			lnk_del = &lgr->lnk[i];
			break;
		}
	}
	if (!lnk_del)
		goto out; /* asymmetric link already deleted */

	if (smc_link_downing(&lnk_del->state)) {
		if (smc_switch_conns(lgr, lnk_del, false))
			smc_wr_tx_wait_no_pending_sends(lnk_del);
	}
	if (!list_empty(&lgr->list)) {
		/* qentry is either a request from peer (send it back to
		 * initiate the DELETE_LINK processing), or a locally
		 * enqueued DELETE_LINK request (forward it)
		 */
		if (!smc_llc_send_message(lnk, &qentry->msg)) {
			struct smc_llc_qentry *qentry2;

			qentry2 = smc_llc_wait(lgr, lnk, SMC_LLC_WAIT_TIME,
					       SMC_LLC_DELETE_LINK);
			if (qentry2)
				smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
		}
	}
	smcr_link_clear(lnk_del, true);

	active_links = smc_llc_active_link_count(lgr);
	if (active_links == 1) {
		smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
	} else if (!active_links) {
		smcr_lgr_set_type(lgr, SMC_LGR_NONE);
		smc_lgr_terminate_sched(lgr);
	}

	if (lgr->type == SMC_LGR_SINGLE && !list_empty(&lgr->list)) {
		/* trigger setup of asymm alt link */
		smc_llc_add_link_local(lnk);
	}
out:
	up_write(&lgr->llc_conf_mutex);
	kfree(qentry);
}

static void smc_llc_delete_link_work(struct work_struct *work)
{
	struct smc_link_group *lgr = container_of(work, struct smc_link_group,
						  llc_del_link_work);

	if (list_empty(&lgr->list)) {
		/* link group is terminating */
		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
		goto out;
	}

	if (lgr->role == SMC_CLNT)
		smc_llc_process_cli_delete_link(lgr);
	else
		smc_llc_process_srv_delete_link(lgr);
out:
	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}

/* process a confirm_rkey request from peer, remote flow */
static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr)
{
	struct smc_llc_msg_confirm_rkey *llc;
	struct smc_llc_qentry *qentry;
	struct smc_link *link;
	int num_entries;
	int rk_idx;
	int i;

	qentry = lgr->llc_flow_rmt.qentry;
	llc = &qentry->msg.confirm_rkey;
	link = qentry->link;

	num_entries = llc->rtoken[0].num_rkeys;
	if (num_entries > SMC_LLC_RKEYS_PER_MSG)
		goto out_err;
	/* first rkey entry is for receiving link */
	rk_idx = smc_rtoken_add(link,
				llc->rtoken[0].rmb_vaddr,
				llc->rtoken[0].rmb_key);
	if (rk_idx < 0)
		goto out_err;

	for (i = 1; i <= min_t(u8, num_entries, SMC_LLC_RKEYS_PER_MSG - 1); i++)
		smc_rtoken_set2(lgr, rk_idx, llc->rtoken[i].link_id,
				llc->rtoken[i].rmb_vaddr,
				llc->rtoken[i].rmb_key);
	/* max links is 3 so there is no need to support conf_rkey_cont msgs */
	goto out;
out_err:
	llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
	llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY;
out:
	llc->hd.flags |= SMC_LLC_FLAG_RESP;
	smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc));
	smc_llc_send_message(link, &qentry->msg);
	smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
}

/* process a delete_rkey request from peer, remote flow */
static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
{
	struct smc_llc_msg_delete_rkey *llc;
	struct smc_llc_qentry *qentry;
	struct smc_link *link;
	u8 err_mask = 0;
	int i, max;

	qentry = lgr->llc_flow_rmt.qentry;
	llc = &qentry->msg.delete_rkey;
	link = qentry->link;

	if (lgr->smc_version == SMC_V2) {
		struct smc_llc_msg_delete_rkey_v2 *llcv2;

		memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc));
		llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2;
		llcv2->num_inval_rkeys = 0;

		max = min_t(u8, llcv2->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
		for (i = 0; i < max; i++) {
			if (smc_rtoken_delete(link, llcv2->rkey[i]))
				llcv2->num_inval_rkeys++;
		}
		memset(&llc->rkey[0], 0, sizeof(llc->rkey));
		memset(&llc->reserved2, 0, sizeof(llc->reserved2));
		smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc));
		if (llcv2->num_inval_rkeys) {
			llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
			llc->err_mask = llcv2->num_inval_rkeys;
		}
		goto finish;
	}

	max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
	for (i = 0; i < max; i++) {
		if (smc_rtoken_delete(link, llc->rkey[i]))
			err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
	}
	if (err_mask) {
		llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
		llc->err_mask = err_mask;
	}
finish:
	llc->hd.flags |= SMC_LLC_FLAG_RESP;
	smc_llc_send_message(link, &qentry->msg);
	smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
}

static void smc_llc_protocol_violation(struct smc_link_group *lgr, u8 type)
{
	pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu LLC protocol violation: "
			    "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id,
			    lgr->net->net_cookie, type);
	smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_PROT_VIOL);
	smc_lgr_terminate_sched(lgr);
}

/* flush the llc event queue */
static void smc_llc_event_flush(struct smc_link_group *lgr)
{
	struct smc_llc_qentry *qentry, *q;

	spin_lock_bh(&lgr->llc_event_q_lock);
	list_for_each_entry_safe(qentry, q, &lgr->llc_event_q, list) {
		list_del_init(&qentry->list);
		kfree(qentry);
	}
	spin_unlock_bh(&lgr->llc_event_q_lock);
}

static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
{
	union smc_llc_msg *llc = &qentry->msg;
	struct smc_link *link = qentry->link;
	struct smc_link_group *lgr = link->lgr;

	if (!smc_link_usable(link))
		goto out;

	switch (llc->raw.hdr.common.llc_type) {
	case SMC_LLC_TEST_LINK:
		llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP;
		smc_llc_send_message(link, llc);
		break;
	case SMC_LLC_ADD_LINK:
		if (list_empty(&lgr->list))
			goto out;	/* lgr is terminating */
		if (lgr->role == SMC_CLNT) {
			if (smc_llc_is_local_add_link(llc)) {
				if (lgr->llc_flow_lcl.type ==
				    SMC_LLC_FLOW_ADD_LINK)
					break;	/* add_link in progress */
				if (smc_llc_flow_start(&lgr->llc_flow_lcl,
						       qentry)) {
					schedule_work(&lgr->llc_add_link_work);
				}
				return;
			}
			if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK &&
			    !lgr->llc_flow_lcl.qentry) {
				/* a flow is waiting for this message */
				smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
							qentry);
				wake_up(&lgr->llc_msg_waiter);
				return;
			}
			if (lgr->llc_flow_lcl.type ==
					SMC_LLC_FLOW_REQ_ADD_LINK) {
				/* server started add_link processing */
				lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK;
				smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
							qentry);
				schedule_work(&lgr->llc_add_link_work);
				return;
			}
			if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
				schedule_work(&lgr->llc_add_link_work);
			}
		} else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
			/* as smc server, handle client suggestion */
			schedule_work(&lgr->llc_add_link_work);
		}
		return;
	case SMC_LLC_CONFIRM_LINK:
	case SMC_LLC_ADD_LINK_CONT:
		if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
			/* a flow is waiting for this message */
			smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry);
			wake_up(&lgr->llc_msg_waiter);
			return;
		}
		break;
	case SMC_LLC_DELETE_LINK:
		if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK &&
		    !lgr->llc_flow_lcl.qentry) {
			/* DEL LINK REQ during ADD LINK SEQ */
			smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry);
			wake_up(&lgr->llc_msg_waiter);
		} else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
			schedule_work(&lgr->llc_del_link_work);
		}
		return;
	case SMC_LLC_CONFIRM_RKEY:
		/* new request from remote, assign to remote flow */
		if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
			/* process here, does not wait for more llc msgs */
			smc_llc_rmt_conf_rkey(lgr);
			smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
		}
		return;
	case SMC_LLC_CONFIRM_RKEY_CONT:
		/* not used because max links is 3, and 3 rkeys fit into
		 * one CONFIRM_RKEY message
		 */
		break;
	case SMC_LLC_DELETE_RKEY:
		/* new request from remote, assign to remote flow */
		if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
			/* process here, does not wait for more llc msgs */
			smc_llc_rmt_delete_rkey(lgr);
			smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
		}
		return;
	case SMC_LLC_REQ_ADD_LINK:
		/* handle response here, smc_llc_flow_stop() cannot be called
		 * in tasklet context
		 */
		if (lgr->role == SMC_CLNT &&
		    lgr->llc_flow_lcl.type == SMC_LLC_FLOW_REQ_ADD_LINK &&
		    (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP)) {
			smc_llc_flow_stop(link->lgr, &lgr->llc_flow_lcl);
		} else if (lgr->role == SMC_SERV) {
			if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
				/* as smc server, handle client suggestion */
				lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK;
				schedule_work(&lgr->llc_add_link_work);
			}
			return;
		}
		break;
	default:
		smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type);
		break;
	}
out:
	kfree(qentry);
}

/* worker to process llc messages on the event queue */
static void smc_llc_event_work(struct work_struct *work)
{
	struct smc_link_group *lgr = container_of(work, struct smc_link_group,
						  llc_event_work);
	struct smc_llc_qentry *qentry;

	if (!lgr->llc_flow_lcl.type && lgr->delayed_event) {
		qentry = lgr->delayed_event;
		lgr->delayed_event = NULL;
		if (smc_link_usable(qentry->link))
			smc_llc_event_handler(qentry);
		else
			kfree(qentry);
	}

again:
	spin_lock_bh(&lgr->llc_event_q_lock);
	if (!list_empty(&lgr->llc_event_q)) {
		qentry = list_first_entry(&lgr->llc_event_q,
					  struct smc_llc_qentry, list);
		list_del_init(&qentry->list);
		spin_unlock_bh(&lgr->llc_event_q_lock);
		smc_llc_event_handler(qentry);
		goto again;
	}
	spin_unlock_bh(&lgr->llc_event_q_lock);
}

/* process llc responses in tasklet context */
static void smc_llc_rx_response(struct smc_link *link,
				struct smc_llc_qentry *qentry)
{
	enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type;
	struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl;
	u8 llc_type = qentry->msg.raw.hdr.common.llc_type;

	switch (llc_type) {
	case SMC_LLC_TEST_LINK:
		if (smc_link_active(link))
			complete(&link->llc_testlink_resp);
		break;
	case SMC_LLC_ADD_LINK:
	case SMC_LLC_ADD_LINK_CONT:
	case SMC_LLC_CONFIRM_LINK:
		if (flowtype != SMC_LLC_FLOW_ADD_LINK || flow->qentry)
			break;	/* drop out-of-flow response */
		goto assign;
	case SMC_LLC_DELETE_LINK:
		if (flowtype != SMC_LLC_FLOW_DEL_LINK || flow->qentry)
			break;	/* drop out-of-flow response */
		goto assign;
	case SMC_LLC_CONFIRM_RKEY:
	case SMC_LLC_DELETE_RKEY:
		if (flowtype != SMC_LLC_FLOW_RKEY || flow->qentry)
			break;	/* drop out-of-flow response */
		goto assign;
	case SMC_LLC_CONFIRM_RKEY_CONT:
		/* not used because max links is 3 */
		break;
	default:
		smc_llc_protocol_violation(link->lgr,
					   qentry->msg.raw.hdr.common.type);
		break;
	}
	kfree(qentry);
	return;
assign:
	/* assign responses to the local flow, we requested them */
	smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry);
	wake_up(&link->lgr->llc_msg_waiter);
}

static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
{
	struct smc_link_group *lgr = link->lgr;
	struct smc_llc_qentry *qentry;
	unsigned long flags;

	qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC);
	if (!qentry)
		return;
	qentry->link = link;
	INIT_LIST_HEAD(&qentry->list);
	memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg));

	/* process responses immediately */
	if ((llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) &&
	    llc->raw.hdr.common.llc_type != SMC_LLC_REQ_ADD_LINK) {
		smc_llc_rx_response(link, qentry);
		return;
	}

	/* add requests to event queue */
	spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
	list_add_tail(&qentry->list, &lgr->llc_event_q);
	spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
	queue_work(system_highpri_wq, &lgr->llc_event_work);
}

/* copy received msg and add it to the event queue */
static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
{
	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
	union smc_llc_msg *llc = buf;

	if (wc->byte_len < sizeof(*llc))
		return; /* short message */
	if (!llc->raw.hdr.common.llc_version) {
		if (llc->raw.hdr.length != sizeof(*llc))
			return; /* invalid message */
	} else {
		if (llc->raw.hdr.length_v2 < sizeof(*llc))
			return; /* invalid message */
	}

	smc_llc_enqueue(link, llc);
}

/***************************** worker, utils *********************************/

static void smc_llc_testlink_work(struct work_struct *work)
{
	struct smc_link *link = container_of(to_delayed_work(work),
					     struct smc_link, llc_testlink_wrk);
	unsigned long next_interval;
	unsigned long expire_time;
	u8 user_data[16] = { 0 };
	int rc;

	if (!smc_link_active(link))
		return;		/* don't reschedule worker */
	expire_time = link->wr_rx_tstamp + link->llc_testlink_time;
	if (time_is_after_jiffies(expire_time)) {
		next_interval = expire_time - jiffies;
		goto out;
	}
	reinit_completion(&link->llc_testlink_resp);
	smc_llc_send_test_link(link, user_data);
	/* receive TEST LINK response over RoCE fabric */
	rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
						       SMC_LLC_WAIT_TIME);
	if (!smc_link_active(link))
		return;		/* link state changed */
	if (rc <= 0) {
		smcr_link_down_cond_sched(link);
		return;
	}
	next_interval = link->llc_testlink_time;
out:
	schedule_delayed_work(&link->llc_testlink_wrk, next_interval);
}

void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
{
	struct net *net = sock_net(smc->clcsock->sk);

	INIT_WORK(&lgr->llc_event_work, smc_llc_event_work);
	INIT_WORK(&lgr->llc_add_link_work, smc_llc_add_link_work);
	INIT_WORK(&lgr->llc_del_link_work, smc_llc_delete_link_work);
	INIT_LIST_HEAD(&lgr->llc_event_q);
	spin_lock_init(&lgr->llc_event_q_lock);
	spin_lock_init(&lgr->llc_flow_lock);
	init_waitqueue_head(&lgr->llc_flow_waiter);
	init_waitqueue_head(&lgr->llc_msg_waiter);
	init_rwsem(&lgr->llc_conf_mutex);
	lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time);
}

/* called after lgr was removed from lgr_list */
void smc_llc_lgr_clear(struct smc_link_group *lgr)
{
	smc_llc_event_flush(lgr);
	wake_up_all(&lgr->llc_flow_waiter);
	wake_up_all(&lgr->llc_msg_waiter);
	cancel_work_sync(&lgr->llc_event_work);
	cancel_work_sync(&lgr->llc_add_link_work);
	cancel_work_sync(&lgr->llc_del_link_work);
	if (lgr->delayed_event) {
		kfree(lgr->delayed_event);
		lgr->delayed_event = NULL;
	}
}

int smc_llc_link_init(struct smc_link *link)
{
	init_completion(&link->llc_testlink_resp);
	INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
	return 0;
}

void smc_llc_link_active(struct smc_link *link)
{
	pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu link added: id %*phN, "
			    "peerid %*phN, ibdev %s, ibport %d\n",
			    SMC_LGR_ID_SIZE, &link->lgr->id,
			    link->lgr->net->net_cookie,
			    SMC_LGR_ID_SIZE, &link->link_uid,
			    SMC_LGR_ID_SIZE, &link->peer_link_uid,
			    link->smcibdev->ibdev->name, link->ibport);
	link->state = SMC_LNK_ACTIVE;
	if (link->lgr->llc_testlink_time) {
		link->llc_testlink_time = link->lgr->llc_testlink_time;
		schedule_delayed_work(&link->llc_testlink_wrk,
				      link->llc_testlink_time);
	}
}

/* called in worker context */
void smc_llc_link_clear(struct smc_link *link, bool log)
{
	if (log)
		pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu link removed: id %*phN"
				    ", peerid %*phN, ibdev %s, ibport %d\n",
				    SMC_LGR_ID_SIZE, &link->lgr->id,
				    link->lgr->net->net_cookie,
				    SMC_LGR_ID_SIZE, &link->link_uid,
				    SMC_LGR_ID_SIZE, &link->peer_link_uid,
				    link->smcibdev->ibdev->name, link->ibport);
	complete(&link->llc_testlink_resp);
	cancel_delayed_work_sync(&link->llc_testlink_wrk);
}

/* register a new rtoken at the remote peer (for all links) */
int smc_llc_do_confirm_rkey(struct smc_link *send_link,
			    struct smc_buf_desc *rmb_desc)
{
	struct smc_link_group *lgr = send_link->lgr;
	struct smc_llc_qentry *qentry = NULL;
	int rc = 0;

	rc = smc_llc_send_confirm_rkey(send_link, rmb_desc);
	if (rc)
		goto out;
	/* receive CONFIRM RKEY response from server over RoCE fabric */
	qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
			      SMC_LLC_CONFIRM_RKEY);
	if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
		rc = -EFAULT;
out:
	if (qentry)
		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
	return rc;
}

/* unregister an rtoken at the remote peer */
int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
			   struct smc_buf_desc *rmb_desc)
{
	struct smc_llc_qentry *qentry = NULL;
	struct smc_link *send_link;
	int rc = 0;

	send_link = smc_llc_usable_link(lgr);
	if (!send_link)
		return -ENOLINK;

	/* protected by llc_flow control */
	rc = smc_llc_send_delete_rkey(send_link, rmb_desc);
	if (rc)
		goto out;
	/* receive DELETE RKEY response from server over RoCE fabric */
	qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
			      SMC_LLC_DELETE_RKEY);
	if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
		rc = -EFAULT;
out:
	if (qentry)
		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
	return rc;
}

void smc_llc_link_set_uid(struct smc_link *link)
{
	__be32 link_uid;

	link_uid = htonl(*((u32 *)link->lgr->id) + link->link_id);
	memcpy(link->link_uid, &link_uid, SMC_LGR_ID_SIZE);
}

/* save peers link user id, used for debug purposes */
void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry)
{
	memcpy(qentry->link->peer_link_uid, qentry->msg.confirm_link.link_uid,
	       SMC_LGR_ID_SIZE);
}

/* evaluate confirm link request or response */
int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
			   enum smc_llc_reqresp type)
{
	if (type == SMC_LLC_REQ) {	/* SMC server assigns link_id */
		qentry->link->link_id = qentry->msg.confirm_link.link_num;
		smc_llc_link_set_uid(qentry->link);
	}
	if (!(qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
		return -ENOTSUPP;
	return 0;
}

/***************************** init, exit, misc ******************************/

static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_CONFIRM_LINK
	},
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_TEST_LINK
	},
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_ADD_LINK
	},
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_ADD_LINK_CONT
	},
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_DELETE_LINK
	},
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_CONFIRM_RKEY
	},
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_CONFIRM_RKEY_CONT
	},
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_DELETE_RKEY
	},
	/* V2 types */
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_CONFIRM_LINK_V2
	},
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_TEST_LINK_V2
	},
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_ADD_LINK_V2
	},
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_DELETE_LINK_V2
	},
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_REQ_ADD_LINK_V2
	},
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_CONFIRM_RKEY_V2
	},
	{
		.handler	= smc_llc_rx_handler,
		.type		= SMC_LLC_DELETE_RKEY_V2
	},
	{
		.handler	= NULL,
	}
};

int __init smc_llc_init(void)
{
	struct smc_wr_rx_handler *handler;
	int rc = 0;

	for (handler = smc_llc_rx_handlers; handler->handler; handler++) {
		INIT_HLIST_NODE(&handler->list);
		rc = smc_wr_rx_register_handler(handler);
		if (rc)
			break;
	}
	return rc;
}