// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Bernard Metzler <[email protected]> */ /* Copyright (c) 2008-2019, IBM Corporation */ #include <linux/errno.h> #include <linux/types.h> #include <linux/net.h> #include <linux/scatterlist.h> #include <linux/highmem.h> #include <net/tcp.h> #include <rdma/iw_cm.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> #include "siw.h" #include "siw_verbs.h" #include "siw_mem.h" #define MAX_HDR_INLINE … static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) { … } static struct page *siw_get_page(struct siw_mem *mem, struct siw_sge *sge, unsigned long offset, int *pbl_idx) { … } /* * Copy short payload at provided destination payload address */ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) { … } #define PKT_FRAGMENTED … #define PKT_COMPLETE … /* * siw_qp_prepare_tx() * * Prepare tx state for sending out one fpdu. Builds complete pkt * if no user data or only immediate data are present. * * returns PKT_COMPLETE if complete pkt built, PKT_FRAGMENTED otherwise. */ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) { … } /* * Send out one complete control type FPDU, or header of FPDU carrying * data. Used for fixed sized packets like Read.Requests or zero length * SENDs, WRITEs, READ.Responses, or header only. */ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s, int flags) { … } /* * 0copy TCP transmit interface: Use MSG_SPLICE_PAGES. * * Using sendpage to push page by page appears to be less efficient * than using sendmsg, even if data are copied. * * A general performance limitation might be the extra four bytes * trailer checksum segment to be pushed after user data. */ static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset, size_t size) { … } /* * siw_0copy_tx() * * Pushes list of pages to TCP socket. If pages from multiple * SGE's, all referenced pages of each SGE are pushed in one * shot. */ static int siw_0copy_tx(struct socket *s, struct page **page, struct siw_sge *sge, unsigned int offset, unsigned int size) { … } #define MAX_TRAILER … static void siw_unmap_pages(struct kvec *iov, unsigned long kmap_mask, int len) { … } /* * siw_tx_hdt() tries to push a complete packet to TCP where all * packet fragments are referenced by the elements of one iovec. * For the data portion, each involved page must be referenced by * one extra element. All sge's data can be non-aligned to page * boundaries. Two more elements are referencing iWARP header * and trailer: * MAX_ARRAY = 64KB/PAGE_SIZE + 1 + (2 * (SIW_MAX_SGE - 1) + HDR + TRL */ #define MAX_ARRAY … /* * Write out iov referencing hdr, data and trailer of current FPDU. * Update transmit state dependent on write return status */ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) { … } static void siw_update_tcpseg(struct siw_iwarp_tx *c_tx, struct socket *s) { … } /* * siw_prepare_fpdu() * * Prepares transmit context to send out one FPDU if FPDU will contain * user data and user data are not immediate data. * Computes maximum FPDU length to fill up TCP MSS if possible. * * @qp: QP from which to transmit * @wqe: Current WQE causing transmission * * TODO: Take into account real available sendspace on socket * to avoid header misalignment due to send pausing within * fpdu transmission */ static void siw_prepare_fpdu(struct siw_qp *qp, struct siw_wqe *wqe) { … } /* * siw_check_sgl_tx() * * Check permissions for a list of SGE's (SGL). * A successful check will have all memory referenced * for transmission resolved and assigned to the WQE. * * @pd: Protection Domain SGL should belong to * @wqe: WQE to be checked * @perms: requested access permissions * */ static int siw_check_sgl_tx(struct ib_pd *pd, struct siw_wqe *wqe, enum ib_access_flags perms) { … } /* * siw_qp_sq_proc_tx() * * Process one WQE which needs transmission on the wire. */ static int siw_qp_sq_proc_tx(struct siw_qp *qp, struct siw_wqe *wqe) { … } static int siw_fastreg_mr(struct ib_pd *pd, struct siw_sqe *sqe) { … } static int siw_qp_sq_proc_local(struct siw_qp *qp, struct siw_wqe *wqe) { … } /* * siw_qp_sq_process() * * Core TX path routine for RDMAP/DDP/MPA using a TCP kernel socket. * Sends RDMAP payload for the current SQ WR @wqe of @qp in one or more * MPA FPDUs, each containing a DDP segment. * * SQ processing may occur in user context as a result of posting * new WQE's or from siw_tx_thread context. Processing in * user context is limited to non-kernel verbs users. * * SQ processing may get paused anytime, possibly in the middle of a WR * or FPDU, if insufficient send space is available. SQ processing * gets resumed from siw_tx_thread, if send space becomes available again. * * Must be called with the QP state read-locked. * * Note: * An outbound RREQ can be satisfied by the corresponding RRESP * _before_ it gets assigned to the ORQ. This happens regularly * in RDMA READ via loopback case. Since both outbound RREQ and * inbound RRESP can be handled by the same CPU, locking the ORQ * is dead-lock prone and thus not an option. With that, the * RREQ gets assigned to the ORQ _before_ being sent - see * siw_activate_tx() - and pulled back in case of send failure. */ int siw_qp_sq_process(struct siw_qp *qp) { … } static void siw_sq_resume(struct siw_qp *qp) { … } struct tx_task_t { … }; static DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g); int siw_create_tx_threads(void) { … } void siw_stop_tx_threads(void) { … } int siw_run_sq(void *data) { … } int siw_sq_start(struct siw_qp *qp) { … }