// SPDX-License-Identifier: GPL-2.0-only /* * TCP NV: TCP with Congestion Avoidance * * TCP-NV is a successor of TCP-Vegas that has been developed to * deal with the issues that occur in modern networks. * Like TCP-Vegas, TCP-NV supports true congestion avoidance, * the ability to detect congestion before packet losses occur. * When congestion (queue buildup) starts to occur, TCP-NV * predicts what the cwnd size should be for the current * throughput and it reduces the cwnd proportionally to * the difference between the current cwnd and the predicted cwnd. * * NV is only recommeneded for traffic within a data center, and when * all the flows are NV (at least those within the data center). This * is due to the inherent unfairness between flows using losses to * detect congestion (congestion control) and those that use queue * buildup to detect congestion (congestion avoidance). * * Note: High NIC coalescence values may lower the performance of NV * due to the increased noise in RTT values. In particular, we have * seen issues with rx-frames values greater than 8. * * TODO: * 1) Add mechanism to deal with reverse congestion. */ #include <linux/module.h> #include <linux/math64.h> #include <net/tcp.h> #include <linux/inet_diag.h> /* TCP NV parameters * * nv_pad Max number of queued packets allowed in network * nv_pad_buffer Do not grow cwnd if this closed to nv_pad * nv_reset_period How often (in) seconds)to reset min_rtt * nv_min_cwnd Don't decrease cwnd below this if there are no losses * nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected * nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8 * nv_rtt_factor RTT averaging factor * nv_loss_dec_factor Decrease cwnd to this (80%) when losses occur * nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd * nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd * nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping * slow-start due to congestion * nv_stop_rtt_cnt Only grow cwnd for this many RTTs after non-congestion * nv_rtt_min_cnt Wait these many RTTs before making congesion decision * nv_cwnd_growth_rate_neg * nv_cwnd_growth_rate_pos * How quickly to double growth rate (not rate) of cwnd when not * congested. One value (nv_cwnd_growth_rate_neg) for when * rate < 1 pkt/RTT (after losses). The other (nv_cwnd_growth_rate_pos) * otherwise. */ static int nv_pad __read_mostly = …; static int nv_pad_buffer __read_mostly = …; static int nv_reset_period __read_mostly = …; /* in seconds */ static int nv_min_cwnd __read_mostly = …; static int nv_cong_dec_mult __read_mostly = …; /* = 30% */ static int nv_ssthresh_factor __read_mostly = …; /* = 1 */ static int nv_rtt_factor __read_mostly = …; /* = 1/2*old + 1/2*new */ static int nv_loss_dec_factor __read_mostly = …; /* => 80% */ static int nv_cwnd_growth_rate_neg __read_mostly = …; static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */ static int nv_dec_eval_min_calls __read_mostly = …; static int nv_inc_eval_min_calls __read_mostly = …; static int nv_ssthresh_eval_min_calls __read_mostly = …; static int nv_stop_rtt_cnt __read_mostly = …; static int nv_rtt_min_cnt __read_mostly = …; module_param(nv_pad, int, 0644); MODULE_PARM_DESC(…) …; module_param(nv_reset_period, int, 0644); MODULE_PARM_DESC(…) …; module_param(nv_min_cwnd, int, 0644); MODULE_PARM_DESC(…) …; /* TCP NV Parameters */ struct tcpnv { … }; #define NV_INIT_RTT … #define NV_MIN_CWND … #define NV_MIN_CWND_GROW … #define NV_TSO_CWND_BOUND … static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk) { … } static void tcpnv_init(struct sock *sk) { … } /* If provided, apply upper (base_rtt) and lower (lower_bound_rtt) * bounds to RTT. */ inline u32 nv_get_bounded_rtt(struct tcpnv *ca, u32 val) { … } static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked) { … } static u32 tcpnv_recalc_ssthresh(struct sock *sk) { … } static void tcpnv_state(struct sock *sk, u8 new_state) { … } /* Do congestion avoidance calculations for TCP-NV */ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) { … } /* Extract info for Tcp socket info provided via netlink */ static size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { … } static struct tcp_congestion_ops tcpnv __read_mostly = …; static int __init tcpnv_register(void) { … } static void __exit tcpnv_unregister(void) { … } module_init(…) …; module_exit(tcpnv_unregister); MODULE_AUTHOR(…) …; MODULE_LICENSE(…) …; MODULE_DESCRIPTION(…) …; MODULE_VERSION(…) …;