linux/net/rds/rds.h

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDS_RDS_H
#define _RDS_RDS_H

#include <net/sock.h>
#include <linux/scatterlist.h>
#include <linux/highmem.h>
#include <rdma/rdma_cm.h>
#include <linux/mutex.h>
#include <linux/rds.h>
#include <linux/rhashtable.h>
#include <linux/refcount.h>
#include <linux/in6.h>

#include "info.h"

/*
 * RDS Network protocol version
 */
#define RDS_PROTOCOL_3_0
#define RDS_PROTOCOL_3_1
#define RDS_PROTOCOL_4_0
#define RDS_PROTOCOL_4_1
#define RDS_PROTOCOL_VERSION
#define RDS_PROTOCOL_MAJOR(v)
#define RDS_PROTOCOL_MINOR(v)
#define RDS_PROTOCOL(maj, min)
#define RDS_PROTOCOL_COMPAT_VERSION

/* The following ports, 16385, 18634, 18635, are registered with IANA as
 * the ports to be used for RDS over TCP and UDP.  Currently, only RDS over
 * TCP and RDS over IB/RDMA are implemented.  18634 is the historical value
 * used for the RDMA_CM listener port.  RDS/TCP uses port 16385.  After
 * IPv6 work, RDMA_CM also uses 16385 as the listener port.  18634 is kept
 * to ensure compatibility with older RDS modules.  Those ports are defined
 * in each transport's header file.
 */
#define RDS_PORT

#ifdef ATOMIC64_INIT
#define KERNEL_HAS_ATOMIC64
#endif
#ifdef RDS_DEBUG
#define rdsdebug(fmt, args...)
#else
/* sigh, pr_debug() causes unused variable warnings */
static inline __printf(1, 2)
void rdsdebug(char *fmt, ...)
{
}
#endif

#define RDS_FRAG_SHIFT
#define RDS_FRAG_SIZE

/* Used to limit both RDMA and non-RDMA RDS message to 1MB */
#define RDS_MAX_MSG_SIZE

#define RDS_CONG_MAP_BYTES
#define RDS_CONG_MAP_PAGES
#define RDS_CONG_MAP_PAGE_BITS

struct rds_cong_map {};


/*
 * This is how we will track the connection state:
 * A connection is always in one of the following
 * states. Updates to the state are atomic and imply
 * a memory barrier.
 */
enum {};

/* Bits for c_flags */
#define RDS_LL_SEND_FULL
#define RDS_RECONNECT_PENDING
#define RDS_IN_XMIT
#define RDS_RECV_REFILL
#define RDS_DESTROY_PENDING

/* Max number of multipaths per RDS connection. Must be a power of 2 */
#define RDS_MPATH_WORKERS
#define RDS_MPATH_HASH(rs, n)

#define IS_CANONICAL(laddr, faddr)

/* Per mpath connection state */
struct rds_conn_path {};

/* One rds_connection per RDS address pair */
struct rds_connection {};

static inline
struct net *rds_conn_net(struct rds_connection *conn)
{}

static inline
void rds_conn_net_set(struct rds_connection *conn, struct net *net)
{}

#define RDS_FLAG_CONG_BITMAP
#define RDS_FLAG_ACK_REQUIRED
#define RDS_FLAG_RETRANSMITTED
#define RDS_MAX_ADV_CREDIT

/* RDS_FLAG_PROBE_PORT is the reserved sport used for sending a ping
 * probe to exchange control information before establishing a connection.
 * Currently the control information that is exchanged is the number of
 * supported paths. If the peer is a legacy (older kernel revision) peer,
 * it would return a pong message without additional control information
 * that would then alert the sender that the peer was an older rev.
 */
#define RDS_FLAG_PROBE_PORT
#define RDS_HS_PROBE(sport, dport)
/*
 * Maximum space available for extension headers.
 */
#define RDS_HEADER_EXT_SPACE

struct rds_header {};

/*
 * Reserved - indicates end of extensions
 */
#define RDS_EXTHDR_NONE

/*
 * This extension header is included in the very
 * first message that is sent on a new connection,
 * and identifies the protocol level. This will help
 * rolling updates if a future change requires breaking
 * the protocol.
 * NB: This is no longer true for IB, where we do a version
 * negotiation during the connection setup phase (protocol
 * version information is included in the RDMA CM private data).
 */
#define RDS_EXTHDR_VERSION
struct rds_ext_header_version {};

/*
 * This extension header is included in the RDS message
 * chasing an RDMA operation.
 */
#define RDS_EXTHDR_RDMA
struct rds_ext_header_rdma {};

/*
 * This extension header tells the peer about the
 * destination <R_Key,offset> of the requested RDMA
 * operation.
 */
#define RDS_EXTHDR_RDMA_DEST
struct rds_ext_header_rdma_dest {};

/* Extension header announcing number of paths.
 * Implicit length = 2 bytes.
 */
#define RDS_EXTHDR_NPATHS
#define RDS_EXTHDR_GEN_NUM

#define __RDS_EXTHDR_MAX
#define RDS_RX_MAX_TRACES
#define RDS_MSG_RX_HDR
#define RDS_MSG_RX_START
#define RDS_MSG_RX_END
#define RDS_MSG_RX_CMSG

/* The following values are whitelisted for usercopy */
struct rds_inc_usercopy {};

struct rds_incoming {};

struct rds_mr {};

static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
{}

static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
{}

static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
{}

/* atomic operation types */
#define RDS_ATOMIC_TYPE_CSWP
#define RDS_ATOMIC_TYPE_FADD

/*
 * m_sock_item and m_conn_item are on lists that are serialized under
 * conn->c_lock.  m_sock_item has additional meaning in that once it is empty
 * the message will not be put back on the retransmit list after being sent.
 * messages that are canceled while being sent rely on this.
 *
 * m_inc is used by loopback so that it can pass an incoming message straight
 * back up into the rx path.  It embeds a wire header which is also used by
 * the send path, which is kind of awkward.
 *
 * m_sock_item indicates the message's presence on a socket's send or receive
 * queue.  m_rs will point to that socket.
 *
 * m_daddr is used by cancellation to prune messages to a given destination.
 *
 * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock
 * nesting.  As paths iterate over messages on a sock, or conn, they must
 * also lock the conn, or sock, to remove the message from those lists too.
 * Testing the flag to determine if the message is still on the lists lets
 * us avoid testing the list_head directly.  That means each path can use
 * the message's list_head to keep it on a local list while juggling locks
 * without confusing the other path.
 *
 * m_ack_seq is an optional field set by transports who need a different
 * sequence number range to invalidate.  They can use this in a callback
 * that they pass to rds_send_drop_acked() to see if each message has been
 * acked.  The HAS_ACK_SEQ flag can be used to detect messages which haven't
 * had ack_seq set yet.
 */
#define RDS_MSG_ON_SOCK
#define RDS_MSG_ON_CONN
#define RDS_MSG_HAS_ACK_SEQ
#define RDS_MSG_ACK_REQUIRED
#define RDS_MSG_RETRANSMITTED
#define RDS_MSG_MAPPED
#define RDS_MSG_PAGEVEC
#define RDS_MSG_FLUSH

struct rds_znotifier {};

struct rds_msg_zcopy_info {};

struct rds_msg_zcopy_queue {};

static inline void rds_message_zcopy_queue_init(struct rds_msg_zcopy_queue *q)
{}

struct rds_iov_vector {};

struct rds_iov_vector_arr {};

struct rds_message {};

/*
 * The RDS notifier is used (optionally) to tell the application about
 * completed RDMA operations. Rather than keeping the whole rds message
 * around on the queue, we allocate a small notifier that is put on the
 * socket's notifier_list. Notifications are delivered to the application
 * through control messages.
 */
struct rds_notifier {};

/* Available as part of RDS core, so doesn't need to participate
 * in get_preferred transport etc
 */
#define RDS_TRANS_LOOP

/**
 * struct rds_transport -  transport specific behavioural hooks
 *
 * @xmit: .xmit is called by rds_send_xmit() to tell the transport to send
 *        part of a message.  The caller serializes on the send_sem so this
 *        doesn't need to be reentrant for a given conn.  The header must be
 *        sent before the data payload.  .xmit must be prepared to send a
 *        message with no data payload.  .xmit should return the number of
 *        bytes that were sent down the connection, including header bytes.
 *        Returning 0 tells the caller that it doesn't need to perform any
 *        additional work now.  This is usually the case when the transport has
 *        filled the sending queue for its connection and will handle
 *        triggering the rds thread to continue the send when space becomes
 *        available.  Returning -EAGAIN tells the caller to retry the send
 *        immediately.  Returning -ENOMEM tells the caller to retry the send at
 *        some point in the future.
 *
 * @conn_shutdown: conn_shutdown stops traffic on the given connection.  Once
 *                 it returns the connection can not call rds_recv_incoming().
 *                 This will only be called once after conn_connect returns
 *                 non-zero success and will The caller serializes this with
 *                 the send and connecting paths (xmit_* and conn_*).  The
 *                 transport is responsible for other serialization, including
 *                 rds_recv_incoming().  This is called in process context but
 *                 should try hard not to block.
 */

struct rds_transport {};

/* Bind hash table key length.  It is the sum of the size of a struct
 * in6_addr, a scope_id  and a port.
 */
#define RDS_BOUND_KEY_LEN

struct rds_sock {};

static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
{}
static inline struct sock *rds_rs_to_sk(struct rds_sock *rs)
{}

/*
 * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value
 * to account for overhead.  We don't account for overhead, we just apply
 * the number of payload bytes to the specified value.
 */
static inline int rds_sk_sndbuf(struct rds_sock *rs)
{}
static inline int rds_sk_rcvbuf(struct rds_sock *rs)
{}

struct rds_statistics {};

/* af_rds.c */
void rds_sock_addref(struct rds_sock *rs);
void rds_sock_put(struct rds_sock *rs);
void rds_wake_sk_sleep(struct rds_sock *rs);
static inline void __rds_wake_sk_sleep(struct sock *sk)
{}
extern wait_queue_head_t rds_poll_waitq;


/* bind.c */
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
void rds_remove_bound(struct rds_sock *rs);
struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port,
				__u32 scope_id);
int rds_bind_lock_init(void);
void rds_bind_lock_destroy(void);

/* cong.c */
int rds_cong_get_maps(struct rds_connection *conn);
void rds_cong_add_conn(struct rds_connection *conn);
void rds_cong_remove_conn(struct rds_connection *conn);
void rds_cong_set_bit(struct rds_cong_map *map, __be16 port);
void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port);
int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock, struct rds_sock *rs);
void rds_cong_queue_updates(struct rds_cong_map *map);
void rds_cong_map_updated(struct rds_cong_map *map, uint64_t);
int rds_cong_updated_since(unsigned long *recent);
void rds_cong_add_socket(struct rds_sock *);
void rds_cong_remove_socket(struct rds_sock *);
void rds_cong_exit(void);
struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);

/* connection.c */
extern u32 rds_gen_num;
int rds_conn_init(void);
void rds_conn_exit(void);
struct rds_connection *rds_conn_create(struct net *net,
				       const struct in6_addr *laddr,
				       const struct in6_addr *faddr,
				       struct rds_transport *trans,
				       u8 tos, gfp_t gfp,
				       int dev_if);
struct rds_connection *rds_conn_create_outgoing(struct net *net,
						const struct in6_addr *laddr,
						const struct in6_addr *faddr,
						struct rds_transport *trans,
						u8 tos, gfp_t gfp, int dev_if);
void rds_conn_shutdown(struct rds_conn_path *cpath);
void rds_conn_destroy(struct rds_connection *conn);
void rds_conn_drop(struct rds_connection *conn);
void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy);
void rds_conn_connect_if_down(struct rds_connection *conn);
void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
void rds_check_all_paths(struct rds_connection *conn);
void rds_for_each_conn_info(struct socket *sock, unsigned int len,
			  struct rds_info_iterator *iter,
			  struct rds_info_lengths *lens,
			  int (*visitor)(struct rds_connection *, void *),
			  u64 *buffer,
			  size_t item_len);

__printf(2, 3)
void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...);
#define rds_conn_path_error(cp, fmt...)

static inline int
rds_conn_path_transition(struct rds_conn_path *cp, int old, int new)
{}

static inline int
rds_conn_transition(struct rds_connection *conn, int old, int new)
{}

static inline int
rds_conn_path_state(struct rds_conn_path *cp)
{}

static inline int
rds_conn_state(struct rds_connection *conn)
{}

static inline int
rds_conn_path_up(struct rds_conn_path *cp)
{}

static inline int
rds_conn_path_down(struct rds_conn_path *cp)
{}

static inline int
rds_conn_up(struct rds_connection *conn)
{}

static inline int
rds_conn_path_connecting(struct rds_conn_path *cp)
{}

static inline int
rds_conn_connecting(struct rds_connection *conn)
{}

/* message.c */
struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
			       bool zcopy);
struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
				 __be16 dport, u64 seq);
int rds_message_add_extension(struct rds_header *hdr,
			      unsigned int type, const void *data, unsigned int len);
int rds_message_next_extension(struct rds_header *hdr,
			       unsigned int *pos, void *buf, unsigned int *buflen);
int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
void rds_message_addref(struct rds_message *rm);
void rds_message_put(struct rds_message *rm);
void rds_message_wait(struct rds_message *rm);
void rds_message_unmapped(struct rds_message *rm);
void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *info);

static inline void rds_message_make_checksum(struct rds_header *hdr)
{}

static inline int rds_message_verify_checksum(const struct rds_header *hdr)
{}


/* page.c */
int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
			     gfp_t gfp);
void rds_page_exit(void);

/* recv.c */
void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
		  struct in6_addr *saddr);
void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *conn,
		       struct in6_addr *saddr);
void rds_inc_put(struct rds_incoming *inc);
void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
		       struct in6_addr *daddr,
		       struct rds_incoming *inc, gfp_t gfp);
int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
		int msg_flags);
void rds_clear_recv_queue(struct rds_sock *rs);
int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg);
void rds_inc_info_copy(struct rds_incoming *inc,
		       struct rds_info_iterator *iter,
		       __be32 saddr, __be32 daddr, int flip);
void rds6_inc_info_copy(struct rds_incoming *inc,
			struct rds_info_iterator *iter,
			struct in6_addr *saddr, struct in6_addr *daddr,
			int flip);

/* send.c */
int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len);
void rds_send_path_reset(struct rds_conn_path *conn);
int rds_send_xmit(struct rds_conn_path *cp);
struct sockaddr_in;
void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest);
is_acked_func;
void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
			 is_acked_func is_acked);
void rds_send_path_drop_acked(struct rds_conn_path *cp, u64 ack,
			      is_acked_func is_acked);
void rds_send_ping(struct rds_connection *conn, int cp_index);
int rds_send_pong(struct rds_conn_path *cp, __be16 dport);

/* rdma.c */
void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen);
int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen);
int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen);
void rds_rdma_drop_keys(struct rds_sock *rs);
int rds_rdma_extra_size(struct rds_rdma_args *args,
			struct rds_iov_vector *iov);
int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
			  struct cmsghdr *cmsg);
int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
			  struct cmsghdr *cmsg,
			  struct rds_iov_vector *vec);
int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
			  struct cmsghdr *cmsg);
void rds_rdma_free_op(struct rm_rdma_op *ro);
void rds_atomic_free_op(struct rm_atomic_op *ao);
void rds_rdma_send_complete(struct rds_message *rm, int wc_status);
void rds_atomic_send_complete(struct rds_message *rm, int wc_status);
int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
		    struct cmsghdr *cmsg);

void __rds_put_mr_final(struct kref *kref);

static inline bool rds_destroy_pending(struct rds_connection *conn)
{}

enum {};

/* stats.c */
DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
#define rds_stats_inc_which(which, member)
#define rds_stats_inc(member)
#define rds_stats_add_which(which, member, count)
#define rds_stats_add(member, count)
int rds_stats_init(void);
void rds_stats_exit(void);
void rds_stats_info_copy(struct rds_info_iterator *iter,
			 uint64_t *values, const char *const *names,
			 size_t nr);

/* sysctl.c */
int rds_sysctl_init(void);
void rds_sysctl_exit(void);
extern unsigned long rds_sysctl_sndbuf_min;
extern unsigned long rds_sysctl_sndbuf_default;
extern unsigned long rds_sysctl_sndbuf_max;
extern unsigned long rds_sysctl_reconnect_min_jiffies;
extern unsigned long rds_sysctl_reconnect_max_jiffies;
extern unsigned int  rds_sysctl_max_unacked_packets;
extern unsigned int  rds_sysctl_max_unacked_bytes;
extern unsigned int  rds_sysctl_ping_enable;
extern unsigned long rds_sysctl_trace_flags;
extern unsigned int  rds_sysctl_trace_level;

/* threads.c */
int rds_threads_init(void);
void rds_threads_exit(void);
extern struct workqueue_struct *rds_wq;
void rds_queue_reconnect(struct rds_conn_path *cp);
void rds_connect_worker(struct work_struct *);
void rds_shutdown_worker(struct work_struct *);
void rds_send_worker(struct work_struct *);
void rds_recv_worker(struct work_struct *);
void rds_connect_path_complete(struct rds_conn_path *conn, int curr);
void rds_connect_complete(struct rds_connection *conn);
int rds_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2);

/* transport.c */
void rds_trans_register(struct rds_transport *trans);
void rds_trans_unregister(struct rds_transport *trans);
struct rds_transport *rds_trans_get_preferred(struct net *net,
					      const struct in6_addr *addr,
					      __u32 scope_id);
void rds_trans_put(struct rds_transport *trans);
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
				       unsigned int avail);
struct rds_transport *rds_trans_get(int t_type);

#endif