/* * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/in.h> #include <linux/device.h> #include <linux/dmapool.h> #include <linux/ratelimit.h> #include "rds_single_path.h" #include "rds.h" #include "ib.h" #include "ib_mr.h" /* * Convert IB-specific error message to RDS error message and call core * completion handler. */ static void rds_ib_send_complete(struct rds_message *rm, int wc_status, void (*complete)(struct rds_message *rm, int status)) { … } static void rds_ib_send_unmap_data(struct rds_ib_connection *ic, struct rm_data_op *op, int wc_status) { … } static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, struct rm_rdma_op *op, int wc_status) { … } static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic, struct rm_atomic_op *op, int wc_status) { … } /* * Unmap the resources associated with a struct send_work. * * Returns the rm for no good reason other than it is unobtainable * other than by switching on wr.opcode, currently, and the caller, * the event handler, needs it. */ static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic, struct rds_ib_send_work *send, int wc_status) { … } void rds_ib_send_init_ring(struct rds_ib_connection *ic) { … } void rds_ib_send_clear_ring(struct rds_ib_connection *ic) { … } /* * The only fast path caller always has a non-zero nr, so we don't * bother testing nr before performing the atomic sub. */ static void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr) { … } /* * The _oldest/_free ring operations here race cleanly with the alloc/unalloc * operations performed in the send path. As the sender allocs and potentially * unallocs the next free entry in the ring it doesn't alter which is * the next to be freed, which is what this is concerned with. */ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) { … } /* * This is the main function for allocating credits when sending * messages. * * Conceptually, we have two counters: * - send credits: this tells us how many WRs we're allowed * to submit without overruning the receiver's queue. For * each SEND WR we post, we decrement this by one. * * - posted credits: this tells us how many WRs we recently * posted to the receive queue. This value is transferred * to the peer as a "credit update" in a RDS header field. * Every time we transmit credits to the peer, we subtract * the amount of transferred credits from this counter. * * It is essential that we avoid situations where both sides have * exhausted their send credits, and are unable to send new credits * to the peer. We achieve this by requiring that we send at least * one credit update to the peer before exhausting our credits. * When new credits arrive, we subtract one credit that is withheld * until we've posted new buffers and are ready to transmit these * credits (see rds_ib_send_add_credits below). * * The RDS send code is essentially single-threaded; rds_send_xmit * sets RDS_IN_XMIT to ensure exclusive access to the send ring. * However, the ACK sending code is independent and can race with * message SENDs. * * In the send path, we need to update the counters for send credits * and the counter of posted buffers atomically - when we use the * last available credit, we cannot allow another thread to race us * and grab the posted credits counter. Hence, we have to use a * spinlock to protect the credit counter, or use atomics. * * Spinlocks shared between the send and the receive path are bad, * because they create unnecessary delays. An early implementation * using a spinlock showed a 5% degradation in throughput at some * loads. * * This implementation avoids spinlocks completely, putting both * counters into a single atomic, and updating that atomic using * atomic_add (in the receive path, when receiving fresh credits), * and using atomic_cmpxchg when updating the two counters. */ int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, u32 *adv_credits, int need_posted, int max_posted) { … } void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits) { … } void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted) { … } static inline int rds_ib_set_wr_signal_state(struct rds_ib_connection *ic, struct rds_ib_send_work *send, bool notify) { … } /* * This can be called multiple times for a given message. The first time * we see a message we map its scatterlist into the IB device so that * we can provide that mapped address to the IB scatter gather entries * in the IB work requests. We translate the scatterlist into a series * of work requests that fragment the message. These work requests complete * in order so we pass ownership of the message to the completion handler * once we send the final fragment. * * The RDS core uses the c_send_lock to only enter this function once * per connection. This makes sure that the tx ring alloc/unalloc pairs * don't get out of sync and confuse the ring. */ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off) { … } /* * Issue atomic operation. * A simplified version of the rdma case, we always map 1 SG, and * only 8 bytes, for the return value from the atomic operation. */ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) { … } int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) { … } void rds_ib_xmit_path_complete(struct rds_conn_path *cp) { … }