# SPDX-License-Identifier: GPL-2.0
# This test sends a >1Gbps stream of traffic from H1, to the switch, which
# forwards it to a 1Gbps port. This 1Gbps stream is then looped back to the
# switch and forwarded to the port under test $swp3, which is also 1Gbps.
#
# This way, $swp3 should be 100% filled with traffic without any of it spilling
# to the backlog. Any extra packets sent should almost 1:1 go to backlog. That
# is what H2 is used for--it sends the extra traffic to create backlog.
#
# A RED Qdisc is installed on $swp3. The configuration is such that the minimum
# and maximum size are 1 byte apart, so there is a very clear border under which
# no marking or dropping takes place, and above which everything is marked or
# dropped.
#
# The test uses the buffer build-up behavior to test the installed RED.
#
# In order to test WRED, $swp3 actually contains RED under PRIO, with two
# different configurations. Traffic is prioritized using 802.1p and relies on
# the implicit mlxsw configuration, where packet priority is taken 1:1 from the
# 802.1p marking.
#
# +--------------------------+ +--------------------------+
# | H1 | | H2 |
# | + $h1.10 | | + $h2.10 |
# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
# | | | | | |
# | | $h1.11 + | | | $h2.11 + |
# | | 192.0.2.17/28 | | | | 192.0.2.18/28 | |
# | | | | | | | |
# | \______ ______/ | | \______ ______/ |
# | \ / | | \ / |
# | + $h1 | | + $h2 |
# +-------------|------------+ +-------------|------------+
# | >1Gbps |
# +-------------|------------------------------------------------|------------+
# | SW + $swp1 + $swp2 |
# | _______/ \___________ ___________/ \_______ |
# | / \ / \ |
# | +-|-----------------+ | +-|-----------------+ | |
# | | + $swp1.10 | | | + $swp2.10 | | |
# | | | | .-------------+ $swp5.10 | | |
# | | BR1_10 | | | | | | |
# | | | | | | BR2_10 | | |
# | | + $swp2.10 | | | | | | |
# | +-|-----------------+ | | | + $swp3.10 | | |
# | | | | +-|-----------------+ | |
# | | +-----------------|-+ | | +-----------------|-+ |
# | | | $swp1.11 + | | | | $swp2.11 + | |
# | | | | | .-----------------+ $swp5.11 | |
# | | | BR1_11 | | | | | | |
# | | | | | | | | BR2_11 | |
# | | | $swp2.11 + | | | | | | |
# | | +-----------------|-+ | | | | $swp3.11 + | |
# | | | | | | +-----------------|-+ |
# | \_______ ___________/ | | \___________ _______/ |
# | \ / \ / \ / |
# | + $swp4 + $swp5 + $swp3 |
# +-------------|----------------------|-------------------------|------------+
# | | | 1Gbps
# \________1Gbps_________/ |
# +----------------------------|------------+
# | H3 + $h3 |
# | _____________________/ \_______ |
# | / \ |
# | | | |
# | + $h3.10 $h3.11 + |
# | 192.0.2.3/28 192.0.2.19/28 |
# +-----------------------------------------+
NUM_NETIFS=8
CHECK_TC="yes"
lib_dir=$(dirname $0)/../../../net/forwarding
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
source mlxsw_lib.sh
ipaddr()
{
local host=$1; shift
local vlan=$1; shift
echo 192.0.2.$((16 * (vlan - 10) + host))
}
host_create()
{
local dev=$1; shift
local host=$1; shift
simple_if_init $dev
mtu_set $dev 10000
vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
ip link set dev $dev.10 type vlan egress 0:0
vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
ip link set dev $dev.11 type vlan egress 0:1
}
host_destroy()
{
local dev=$1; shift
vlan_destroy $dev 11
vlan_destroy $dev 10
mtu_restore $dev
simple_if_fini $dev
}
h1_create()
{
host_create $h1 1
}
h1_destroy()
{
host_destroy $h1
}
h2_create()
{
host_create $h2 2
tc qdisc add dev $h2 clsact
# Some of the tests in this suite use multicast traffic. As this traffic
# enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
# e.g. traffic ingressing through $swp2 is flooded to $swp3 (the
# intended destination) and $swp5 (which is intended as ingress for
# another stream of traffic).
#
# This is generally not a problem, but if the $swp5 throughput is lower
# than $swp2 throughput, there will be a build-up at $swp5. That may
# cause packets to fail to queue up at $swp3 due to shared buffer
# quotas, and the test to spuriously fail.
#
# Prevent this by adding a shaper which limits the traffic in $h2 to
# 1Gbps.
tc qdisc replace dev $h2 root handle 10: tbf rate 1gbit \
burst 128K limit 1G
}
h2_destroy()
{
tc qdisc del dev $h2 root handle 10:
tc qdisc del dev $h2 clsact
host_destroy $h2
}
h3_create()
{
host_create $h3 3
}
h3_destroy()
{
host_destroy $h3
}
switch_create()
{
local intf
local vlan
ip link add dev br1_10 type bridge
ip link add dev br1_11 type bridge
ip link add dev br2_10 type bridge
ip link add dev br2_11 type bridge
for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do
ip link set dev $intf up
mtu_set $intf 10000
done
for intf in $swp1 $swp4; do
for vlan in 10 11; do
vlan_create $intf $vlan
ip link set dev $intf.$vlan master br1_$vlan
ip link set dev $intf.$vlan up
done
done
for intf in $swp2 $swp3 $swp5; do
for vlan in 10 11; do
vlan_create $intf $vlan
ip link set dev $intf.$vlan master br2_$vlan
ip link set dev $intf.$vlan up
done
done
ip link set dev $swp4.10 type vlan egress 0:0
ip link set dev $swp4.11 type vlan egress 0:1
for intf in $swp1 $swp2 $swp5; do
for vlan in 10 11; do
ip link set dev $intf.$vlan type vlan ingress 0:0 1:1
done
done
for intf in $swp3 $swp4; do
tc qdisc replace dev $intf root handle 1: tbf rate 1gbit \
burst 128K limit 1G
done
ip link set dev br1_10 up
ip link set dev br1_11 up
ip link set dev br2_10 up
ip link set dev br2_11 up
local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
devlink_port_pool_th_save $swp3 8
devlink_port_pool_th_set $swp3 8 $size
}
switch_destroy()
{
local intf
local vlan
devlink_port_pool_th_restore $swp3 8
ip link set dev br2_11 down
ip link set dev br2_10 down
ip link set dev br1_11 down
ip link set dev br1_10 down
for intf in $swp4 $swp3; do
tc qdisc del dev $intf root handle 1:
done
for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do
for vlan in 11 10; do
ip link set dev $intf.$vlan down
ip link set dev $intf.$vlan nomaster
vlan_destroy $intf $vlan
done
mtu_restore $intf
ip link set dev $intf down
done
ip link del dev br2_11
ip link del dev br2_10
ip link del dev br1_11
ip link del dev br1_10
}
setup_prepare()
{
h1=${NETIFS[p1]}
swp1=${NETIFS[p2]}
swp2=${NETIFS[p3]}
h2=${NETIFS[p4]}
swp3=${NETIFS[p5]}
h3=${NETIFS[p6]}
swp4=${NETIFS[p7]}
swp5=${NETIFS[p8]}
h3_mac=$(mac_get $h3)
vrf_prepare
h1_create
h2_create
h3_create
switch_create
}
cleanup()
{
pre_cleanup
switch_destroy
h3_destroy
h2_destroy
h1_destroy
vrf_cleanup
}
ping_ipv4()
{
ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10"
ping_test $h1.11 $(ipaddr 3 11) " from host 1, vlan 11"
ping_test $h2.10 $(ipaddr 3 10) " from host 2, vlan 10"
ping_test $h2.11 $(ipaddr 3 11) " from host 2, vlan 11"
}
get_tc()
{
local vlan=$1; shift
echo $((vlan - 10))
}
get_qdisc_handle()
{
local vlan=$1; shift
local tc=$(get_tc $vlan)
local band=$((8 - tc))
# Handle is 107: for TC1, 108: for TC0.
echo "10$band:"
}
get_qdisc_backlog()
{
local vlan=$1; shift
qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .backlog
}
get_mc_transmit_queue()
{
local vlan=$1; shift
local tc=$(($(get_tc $vlan) + 8))
ethtool_stats_get $swp3 tc_transmit_queue_tc_$tc
}
get_nmarked()
{
local vlan=$1; shift
ethtool_stats_get $swp3 ecn_marked
}
get_qdisc_nmarked()
{
local vlan=$1; shift
busywait_for_counter 1100 +1 \
qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .marked
}
get_qdisc_npackets()
{
local vlan=$1; shift
busywait_for_counter 1100 +1 \
qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets
}
send_packets()
{
local vlan=$1; shift
local proto=$1; shift
local pkts=$1; shift
$MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
-A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
-t $proto -q -c $pkts "$@"
}
# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
# success. After 10 failed attempts it bails out and returns 1. It dumps the
# backlog size to stdout.
build_backlog()
{
local vlan=$1; shift
local size=$1; shift
local proto=$1; shift
local tc=$((vlan - 10))
local band=$((8 - tc))
local cur=-1
local i=0
while :; do
local cur=$(busywait 1100 until_counter_is "> $cur" \
get_qdisc_backlog $vlan)
local diff=$((size - cur))
local pkts=$(((diff + 7999) / 8000))
if ((cur >= size)); then
echo $cur
return 0
elif ((i++ > 10)); then
echo $cur
return 1
fi
send_packets $vlan $proto $pkts "$@"
done
}
check_marking()
{
local get_nmarked=$1; shift
local vlan=$1; shift
local cond=$1; shift
local npackets_0=$(get_qdisc_npackets $vlan)
local nmarked_0=$($get_nmarked $vlan)
sleep 5
local npackets_1=$(get_qdisc_npackets $vlan)
local nmarked_1=$($get_nmarked $vlan)
local nmarked_d=$((nmarked_1 - nmarked_0))
local npackets_d=$((npackets_1 - npackets_0))
local pct=$((100 * nmarked_d / npackets_d))
echo $pct
((pct $cond))
}
ecn_test_common()
{
local name=$1; shift
local get_nmarked=$1; shift
local vlan=$1; shift
local limit=$1; shift
local backlog
local pct
# Build the below-the-limit backlog using UDP. We could use TCP just
# fine, but this way we get a proof that UDP is accepted when queue
# length is below the limit. The main stream is using TCP, and if the
# limit is misconfigured, we would see this traffic being ECN marked.
RET=0
backlog=$(build_backlog $vlan $((2 * limit / 3)) udp)
check_err $? "Could not build the requested backlog"
pct=$(check_marking "$get_nmarked" $vlan "== 0")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
log_test "TC $((vlan - 10)): $name backlog < limit"
# Now push TCP, because non-TCP traffic would be early-dropped after the
# backlog crosses the limit, and we want to make sure that the backlog
# is above the limit.
RET=0
backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
check_err $? "Could not build the requested backlog"
pct=$(check_marking "$get_nmarked" $vlan ">= 95")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
log_test "TC $((vlan - 10)): $name backlog > limit"
}
__do_ecn_test()
{
local get_nmarked=$1; shift
local vlan=$1; shift
local limit=$1; shift
local name=${1-ECN}; shift
start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
$h3_mac tos=0x01
sleep 1
ecn_test_common "$name" "$get_nmarked" $vlan $limit
# Up there we saw that UDP gets accepted when backlog is below the
# limit. Now that it is above, it should all get dropped, and backlog
# building should fail.
RET=0
build_backlog $vlan $((2 * limit)) udp >/dev/null
check_fail $? "UDP traffic went into backlog instead of being early-dropped"
log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped"
stop_traffic
sleep 1
}
do_ecn_test()
{
local vlan=$1; shift
local limit=$1; shift
__do_ecn_test get_nmarked "$vlan" "$limit"
}
do_ecn_test_perband()
{
local vlan=$1; shift
local limit=$1; shift
mlxsw_only_on_spectrum 3+ || return
__do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN"
}
do_ecn_nodrop_test()
{
local vlan=$1; shift
local limit=$1; shift
local name="ECN nodrop"
start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
$h3_mac tos=0x01
sleep 1
ecn_test_common "$name" get_nmarked $vlan $limit
# Up there we saw that UDP gets accepted when backlog is below the
# limit. Now that it is above, in nodrop mode, make sure it goes to
# backlog as well.
RET=0
build_backlog $vlan $((2 * limit)) udp >/dev/null
check_err $? "UDP traffic was early-dropped instead of getting into backlog"
log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped"
stop_traffic
sleep 1
}
do_red_test()
{
local vlan=$1; shift
local limit=$1; shift
local backlog
local pct
# Use ECN-capable TCP to verify there's no marking even though the queue
# is above limit.
start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
$h3_mac tos=0x01
# Pushing below the queue limit should work.
RET=0
backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01)
check_err $? "Could not build the requested backlog"
pct=$(check_marking get_nmarked $vlan "== 0")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
log_test "TC $((vlan - 10)): RED backlog < limit"
# Pushing above should not.
RET=0
backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
check_fail $? "Traffic went into backlog instead of being early-dropped"
pct=$(check_marking get_nmarked $vlan "== 0")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
local diff=$((limit - backlog))
pct=$((100 * diff / limit))
((-10 <= pct && pct <= 10))
check_err $? "backlog $backlog / $limit expected <= 10% distance"
log_test "TC $((vlan - 10)): RED backlog > limit"
stop_traffic
sleep 1
}
do_mc_backlog_test()
{
local vlan=$1; shift
local limit=$1; shift
local backlog
local pct
RET=0
start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc
start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc
qbl=$(busywait 5000 until_counter_is ">= 500000" \
get_qdisc_backlog $vlan)
check_err $? "Could not build MC backlog"
# Verify that we actually see the backlog on BUM TC. Do a busywait as
# well, performance blips might cause false fail.
local ebl
ebl=$(busywait 5000 until_counter_is ">= 500000" \
get_mc_transmit_queue $vlan)
check_err $? "MC backlog reported by qdisc not visible in ethtool"
stop_traffic
stop_traffic
log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
}
do_mark_test()
{
local vlan=$1; shift
local limit=$1; shift
local subtest=$1; shift
local fetch_counter=$1; shift
local should_fail=$1; shift
local base
mlxsw_only_on_spectrum 2+ || return
RET=0
start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
$h3_mac tos=0x01
# Create a bit of a backlog and observe no mirroring due to marks.
qevent_rule_install_$subtest
build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01 >/dev/null
base=$($fetch_counter)
count=$(busywait 1100 until_counter_is ">= $((base + 1))" \
$fetch_counter)
check_fail $? "Spurious packets ($base -> $count) observed without buffer pressure"
# Above limit, everything should be mirrored, we should see lots of
# packets.
build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01 >/dev/null
busywait_for_counter 1100 +10000 \
$fetch_counter > /dev/null
check_err_fail "$should_fail" $? "ECN-marked packets $subtest'd"
# When the rule is uninstalled, there should be no mirroring.
qevent_rule_uninstall_$subtest
busywait_for_counter 1100 +10 \
$fetch_counter > /dev/null
check_fail $? "Spurious packets observed after uninstall"
if ((should_fail)); then
log_test "TC $((vlan - 10)): marked packets not $subtest'd"
else
log_test "TC $((vlan - 10)): marked packets $subtest'd"
fi
stop_traffic
sleep 1
}
do_drop_test()
{
local vlan=$1; shift
local limit=$1; shift
local trigger=$1; shift
local subtest=$1; shift
local fetch_counter=$1; shift
local base
local now
mlxsw_only_on_spectrum 2+ || return
RET=0
start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) $h3_mac
# Create a bit of a backlog and observe no mirroring due to drops.
qevent_rule_install_$subtest
base=$($fetch_counter)
build_backlog $vlan $((2 * limit / 3)) udp >/dev/null
busywait 1100 until_counter_is ">= $((base + 1))" $fetch_counter >/dev/null
check_fail $? "Spurious packets observed without buffer pressure"
# Push to the queue until it's at the limit. The configured limit is
# rounded by the qdisc and then by the driver, so this is the best we
# can do to get to the real limit of the system.
build_backlog $vlan $((3 * limit / 2)) udp >/dev/null
base=$($fetch_counter)
send_packets $vlan udp 11
now=$(busywait 1100 until_counter_is ">= $((base + 10))" $fetch_counter)
check_err $? "Dropped packets not observed: 11 expected, $((now - base)) seen"
# When no extra traffic is injected, there should be no mirroring.
busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
check_fail $? "Spurious packets observed"
# When the rule is uninstalled, there should be no mirroring.
qevent_rule_uninstall_$subtest
send_packets $vlan udp 11
busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
check_fail $? "Spurious packets observed after uninstall"
log_test "TC $((vlan - 10)): ${trigger}ped packets $subtest'd"
stop_traffic
sleep 1
}
qevent_rule_install_mirror()
{
tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
action mirred egress mirror dev $swp2 hw_stats disabled
}
qevent_rule_uninstall_mirror()
{
tc filter del block 10 pref 1234 handle 102 matchall
}
qevent_counter_fetch_mirror()
{
tc_rule_handle_stats_get "dev $h2 ingress" 101
}
do_drop_mirror_test()
{
local vlan=$1; shift
local limit=$1; shift
local qevent_name=$1; shift
tc filter add dev $h2 ingress pref 1 handle 101 prot ip \
flower skip_sw ip_proto udp \
action drop
do_drop_test "$vlan" "$limit" "$qevent_name" mirror \
qevent_counter_fetch_mirror
tc filter del dev $h2 ingress pref 1 handle 101 flower
}
do_mark_mirror_test()
{
local vlan=$1; shift
local limit=$1; shift
tc filter add dev $h2 ingress pref 1 handle 101 prot ip \
flower skip_sw ip_proto tcp \
action drop
do_mark_test "$vlan" "$limit" mirror \
qevent_counter_fetch_mirror \
$(: should_fail=)0
tc filter del dev $h2 ingress pref 1 handle 101 flower
}
qevent_rule_install_trap()
{
tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
action trap hw_stats disabled
}
qevent_rule_uninstall_trap()
{
tc filter del block 10 pref 1234 handle 102 matchall
}
qevent_counter_fetch_trap()
{
local trap_name=$1; shift
devlink_trap_rx_packets_get "$trap_name"
}
do_drop_trap_test()
{
local vlan=$1; shift
local limit=$1; shift
local trap_name=$1; shift
do_drop_test "$vlan" "$limit" "$trap_name" trap \
"qevent_counter_fetch_trap $trap_name"
}
qevent_rule_install_trap_fwd()
{
tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
action trap_fwd hw_stats disabled
}
qevent_rule_uninstall_trap_fwd()
{
tc filter del block 10 pref 1234 handle 102 matchall
}