ib_send.c | Explore in Territory

/*
 * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 */
#include <linux/kernel.h>
#include <linux/in.h>
#include <linux/device.h>
#include <linux/dmapool.h>
#include <linux/ratelimit.h>

#include "rds_single_path.h"
#include "rds.h"
#include "ib.h"
#include "ib_mr.h"

/*
 * Convert IB-specific error message to RDS error message and call core
 * completion handler.
 */
static void rds_ib_send_complete(struct rds_message *rm,
				 int wc_status,
				 void (*complete)(struct rds_message *rm, int status))
{ … }

static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
				   struct rm_data_op *op,
				   int wc_status)
{ … }

static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
				   struct rm_rdma_op *op,
				   int wc_status)
{ … }

static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic,
				     struct rm_atomic_op *op,
				     int wc_status)
{ … }

/*
 * Unmap the resources associated with a struct send_work.
 *
 * Returns the rm for no good reason other than it is unobtainable
 * other than by switching on wr.opcode, currently, and the caller,
 * the event handler, needs it.
 */
static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic,
						struct rds_ib_send_work *send,
						int wc_status)
{ … }

void rds_ib_send_init_ring(struct rds_ib_connection *ic)
{ … }

void rds_ib_send_clear_ring(struct rds_ib_connection *ic)
{ … }

/*
 * The only fast path caller always has a non-zero nr, so we don't
 * bother testing nr before performing the atomic sub.
 */
static void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr)
{ … }

/*
 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
 * operations performed in the send path.  As the sender allocs and potentially
 * unallocs the next free entry in the ring it doesn't alter which is
 * the next to be freed, which is what this is concerned with.
 */
void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
{ … }

/*
 * This is the main function for allocating credits when sending
 * messages.
 *
 * Conceptually, we have two counters:
 *  -	send credits: this tells us how many WRs we're allowed
 *	to submit without overruning the receiver's queue. For
 *	each SEND WR we post, we decrement this by one.
 *
 *  -	posted credits: this tells us how many WRs we recently
 *	posted to the receive queue. This value is transferred
 *	to the peer as a "credit update" in a RDS header field.
 *	Every time we transmit credits to the peer, we subtract
 *	the amount of transferred credits from this counter.
 *
 * It is essential that we avoid situations where both sides have
 * exhausted their send credits, and are unable to send new credits
 * to the peer. We achieve this by requiring that we send at least
 * one credit update to the peer before exhausting our credits.
 * When new credits arrive, we subtract one credit that is withheld
 * until we've posted new buffers and are ready to transmit these
 * credits (see rds_ib_send_add_credits below).
 *
 * The RDS send code is essentially single-threaded; rds_send_xmit
 * sets RDS_IN_XMIT to ensure exclusive access to the send ring.
 * However, the ACK sending code is independent and can race with
 * message SENDs.
 *
 * In the send path, we need to update the counters for send credits
 * and the counter of posted buffers atomically - when we use the
 * last available credit, we cannot allow another thread to race us
 * and grab the posted credits counter.  Hence, we have to use a
 * spinlock to protect the credit counter, or use atomics.
 *
 * Spinlocks shared between the send and the receive path are bad,
 * because they create unnecessary delays. An early implementation
 * using a spinlock showed a 5% degradation in throughput at some
 * loads.
 *
 * This implementation avoids spinlocks completely, putting both
 * counters into a single atomic, and updating that atomic using
 * atomic_add (in the receive path, when receiving fresh credits),
 * and using atomic_cmpxchg when updating the two counters.
 */
int rds_ib_send_grab_credits(struct rds_ib_connection *ic,
			     u32 wanted, u32 *adv_credits, int need_posted, int max_posted)
{ … }

void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits)
{ … }

void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted)
{ … }

static inline int rds_ib_set_wr_signal_state(struct rds_ib_connection *ic,
					     struct rds_ib_send_work *send,
					     bool notify)
{ … }

/*
 * This can be called multiple times for a given message.  The first time
 * we see a message we map its scatterlist into the IB device so that
 * we can provide that mapped address to the IB scatter gather entries
 * in the IB work requests.  We translate the scatterlist into a series
 * of work requests that fragment the message.  These work requests complete
 * in order so we pass ownership of the message to the completion handler
 * once we send the final fragment.
 *
 * The RDS core uses the c_send_lock to only enter this function once
 * per connection.  This makes sure that the tx ring alloc/unalloc pairs
 * don't get out of sync and confuse the ring.
 */
int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
		unsigned int hdr_off, unsigned int sg, unsigned int off)
{ … }

/*
 * Issue atomic operation.
 * A simplified version of the rdma case, we always map 1 SG, and
 * only 8 bytes, for the return value from the atomic operation.
 */
int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
{ … }

int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
{ … }

void rds_ib_xmit_path_complete(struct rds_conn_path *cp)
{ … }
linux/net/rds/ib_send.c