#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Test that packets are sampled when tc-sample is used and that reported
# metadata is correct. Two sets of hosts (with and without LAG) are used, since
# metadata extraction in mlxsw is a bit different when LAG is involved.
#
# +---------------------------------+ +---------------------------------+
# | H1 (vrf) | | H3 (vrf) |
# | + $h1 | | + $h3_lag |
# | | 192.0.2.1/28 | | | 192.0.2.17/28 |
# | | | | | |
# | | default via 192.0.2.2 | | | default via 192.0.2.18 |
# +----|----------------------------+ +----|----------------------------+
# | |
# +----|-----------------------------------------|----------------------------+
# | | 192.0.2.2/28 | 192.0.2.18/28 |
# | + $rp1 + $rp3_lag |
# | |
# | + $rp2 + $rp4_lag |
# | | 198.51.100.2/28 | 198.51.100.18/28 |
# +----|-----------------------------------------|----------------------------+
# | |
# +----|----------------------------+ +----|----------------------------+
# | | default via 198.51.100.2 | | | default via 198.51.100.18 |
# | | | | | |
# | | 198.51.100.1/28 | | | 198.51.100.17/28 |
# | + $h2 | | + $h4_lag |
# | H2 (vrf) | | H4 (vrf) |
# +---------------------------------+ +---------------------------------+
lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
tc_sample_rate_test
tc_sample_max_rate_test
tc_sample_conflict_test
tc_sample_group_conflict_test
tc_sample_md_iif_test
tc_sample_md_lag_iif_test
tc_sample_md_oif_test
tc_sample_md_lag_oif_test
tc_sample_md_out_tc_test
tc_sample_md_out_tc_occ_test
tc_sample_md_latency_test
tc_sample_acl_group_conflict_test
tc_sample_acl_rate_test
tc_sample_acl_max_rate_test
"
NUM_NETIFS=8
CAPTURE_FILE=$(mktemp)
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
source mlxsw_lib.sh
# Available at https://github.com/Mellanox/libpsample
require_command psample
h1_create()
{
simple_if_init $h1 192.0.2.1/28
ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
}
h1_destroy()
{
ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
simple_if_fini $h1 192.0.2.1/28
}
h2_create()
{
simple_if_init $h2 198.51.100.1/28
ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
}
h2_destroy()
{
ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
simple_if_fini $h2 198.51.100.1/28
}
h3_create()
{
ip link set dev $h3 down
ip link add name ${h3}_bond type bond mode 802.3ad
ip link set dev $h3 master ${h3}_bond
simple_if_init ${h3}_bond 192.0.2.17/28
ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18
}
h3_destroy()
{
ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18
simple_if_fini ${h3}_bond 192.0.2.17/28
ip link set dev $h3 nomaster
ip link del dev ${h3}_bond
}
h4_create()
{
ip link set dev $h4 down
ip link add name ${h4}_bond type bond mode 802.3ad
ip link set dev $h4 master ${h4}_bond
simple_if_init ${h4}_bond 198.51.100.17/28
ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18
}
h4_destroy()
{
ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18
simple_if_fini ${h4}_bond 198.51.100.17/28
ip link set dev $h4 nomaster
ip link del dev ${h4}_bond
}
router_create()
{
ip link set dev $rp1 up
__addr_add_del $rp1 add 192.0.2.2/28
tc qdisc add dev $rp1 clsact
ip link set dev $rp2 up
__addr_add_del $rp2 add 198.51.100.2/28
tc qdisc add dev $rp2 clsact
ip link add name ${rp3}_bond type bond mode 802.3ad
ip link set dev $rp3 master ${rp3}_bond
__addr_add_del ${rp3}_bond add 192.0.2.18/28
tc qdisc add dev $rp3 clsact
ip link set dev ${rp3}_bond up
ip link add name ${rp4}_bond type bond mode 802.3ad
ip link set dev $rp4 master ${rp4}_bond
__addr_add_del ${rp4}_bond add 198.51.100.18/28
tc qdisc add dev $rp4 clsact
ip link set dev ${rp4}_bond up
}
router_destroy()
{
ip link set dev ${rp4}_bond down
tc qdisc del dev $rp4 clsact
__addr_add_del ${rp4}_bond del 198.51.100.18/28
ip link set dev $rp4 nomaster
ip link del dev ${rp4}_bond
ip link set dev ${rp3}_bond down
tc qdisc del dev $rp3 clsact
__addr_add_del ${rp3}_bond del 192.0.2.18/28
ip link set dev $rp3 nomaster
ip link del dev ${rp3}_bond
tc qdisc del dev $rp2 clsact
__addr_add_del $rp2 del 198.51.100.2/28
ip link set dev $rp2 down
tc qdisc del dev $rp1 clsact
__addr_add_del $rp1 del 192.0.2.2/28
ip link set dev $rp1 down
}
setup_prepare()
{
h1=${NETIFS[p1]}
rp1=${NETIFS[p2]}
rp2=${NETIFS[p3]}
h2=${NETIFS[p4]}
h3=${NETIFS[p5]}
rp3=${NETIFS[p6]}
h4=${NETIFS[p7]}
rp4=${NETIFS[p8]}
vrf_prepare
h1_create
h2_create
h3_create
h4_create
router_create
}
cleanup()
{
pre_cleanup
rm -f $CAPTURE_FILE
router_destroy
h4_destroy
h3_destroy
h2_destroy
h1_destroy
vrf_cleanup
}
psample_capture_start()
{
rm -f $CAPTURE_FILE
psample &> $CAPTURE_FILE &
sleep 1
}
psample_capture_stop()
{
{ kill %% && wait %%; } 2>/dev/null
}
__tc_sample_rate_test()
{
local desc=$1; shift
local dip=$1; shift
local pkts pct
RET=0
tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
skip_sw action sample rate 32 group 1
check_err $? "Failed to configure sampling rule"
psample_capture_start
ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
-B $dip -t udp dp=52768,sp=42768 -q
psample_capture_stop
pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
pct=$((100 * (pkts - 10000) / 10000))
(( -25 <= pct && pct <= 25))
check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
log_test "tc sample rate ($desc)"
tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
}
tc_sample_rate_test()
{
__tc_sample_rate_test "forward" 198.51.100.1
__tc_sample_rate_test "local receive" 192.0.2.2
}
tc_sample_max_rate_test()
{
RET=0
tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
skip_sw action sample rate $((35 * 10 ** 8)) group 1
check_err $? "Failed to configure sampling rule with max rate"
tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
skip_sw action sample rate $((35 * 10 ** 8 + 1)) \
group 1 &> /dev/null
check_fail $? "Managed to configure sampling rate above maximum"
log_test "tc sample maximum rate"
}
tc_sample_conflict_test()
{
RET=0
# Test that two sampling rules cannot be configured on the same port,
# even when they share the same parameters.
tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
skip_sw action sample rate 1024 group 1
check_err $? "Failed to configure sampling rule"
tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
skip_sw action sample rate 1024 group 1 &> /dev/null
check_fail $? "Managed to configure second sampling rule"
# Delete the first rule and make sure the second rule can now be
# configured.
tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
skip_sw action sample rate 1024 group 1
check_err $? "Failed to configure sampling rule after deletion"
log_test "tc sample conflict test"
tc filter del dev $rp1 ingress protocol all pref 2 handle 102 matchall
}
tc_sample_group_conflict_test()
{
RET=0
# Test that two sampling rules cannot be configured on the same port
# with different groups.
tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
skip_sw action sample rate 1024 group 1
check_err $? "Failed to configure sampling rule"
tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
skip_sw action sample rate 1024 group 2 &> /dev/null
check_fail $? "Managed to configure sampling rule with conflicting group"
log_test "tc sample group conflict test"
tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
}
tc_sample_md_iif_test()
{
local rp1_ifindex
RET=0
tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
skip_sw action sample rate 5 group 1
check_err $? "Failed to configure sampling rule"
psample_capture_start
ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
psample_capture_stop
rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]')
grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE
check_err $? "Sampled packets do not have expected in-ifindex"
log_test "tc sample iif"
tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
}
tc_sample_md_lag_iif_test()
{
local rp3_ifindex
RET=0
tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
skip_sw action sample rate 5 group 1
check_err $? "Failed to configure sampling rule"
psample_capture_start
ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
psample_capture_stop
rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]')
grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE
check_err $? "Sampled packets do not have expected in-ifindex"
log_test "tc sample lag iif"
tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
}
tc_sample_md_oif_test()
{
local rp2_ifindex
RET=0
tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
skip_sw action sample rate 5 group 1
check_err $? "Failed to configure sampling rule"
psample_capture_start
ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
psample_capture_stop
rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]')
grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE
check_err $? "Sampled packets do not have expected out-ifindex"
log_test "tc sample oif"
tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
}
tc_sample_md_lag_oif_test()
{
local rp4_ifindex
RET=0
tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
skip_sw action sample rate 5 group 1
check_err $? "Failed to configure sampling rule"
psample_capture_start
ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
psample_capture_stop
rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]')
grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE
check_err $? "Sampled packets do not have expected out-ifindex"
log_test "tc sample lag oif"
tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
}
tc_sample_md_out_tc_test()
{
RET=0
# Output traffic class is not supported on Spectrum-1.
mlxsw_only_on_spectrum 2+ || return
tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
skip_sw action sample rate 5 group 1
check_err $? "Failed to configure sampling rule"
# By default, all the packets should go to the same traffic class (0).
psample_capture_start
ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
psample_capture_stop
grep -q -e "out-tc 0 " $CAPTURE_FILE
check_err $? "Sampled packets do not have expected out-tc (0)"
# Map all priorities to highest traffic class (7) and check reported
# out-tc.
tc qdisc replace dev $rp2 root handle 1: \
prio bands 3 priomap 0 0 0 0 0 0 0 0
psample_capture_start
ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
psample_capture_stop
grep -q -e "out-tc 7 " $CAPTURE_FILE
check_err $? "Sampled packets do not have expected out-tc (7)"
log_test "tc sample out-tc"
tc qdisc del dev $rp2 root handle 1:
tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
}
tc_sample_md_out_tc_occ_test()
{
local backlog pct occ
RET=0
# Output traffic class occupancy is not supported on Spectrum-1.
mlxsw_only_on_spectrum 2+ || return
tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
skip_sw action sample rate 1024 group 1
check_err $? "Failed to configure sampling rule"
# Configure a shaper on egress to create congestion.
tc qdisc replace dev $rp2 root handle 1: \
tbf rate 1Mbit burst 256k limit 1M
psample_capture_start
ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \
-B 198.51.100.1 -t udp dp=52768,sp=42768 -q &
# Allow congestion to reach steady state.
sleep 10
backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]')
# Kill mausezahn.
{ kill %% && wait %%; } 2>/dev/null
psample_capture_stop
# Record last congestion sample.
occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \
cut -d ' ' -f 16)
pct=$((100 * (occ - backlog) / backlog))
(( -1 <= pct && pct <= 1))
check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%"
log_test "tc sample out-tc-occ"
tc qdisc del dev $rp2 root handle 1:
tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
}
tc_sample_md_latency_test()
{
RET=0
# Egress sampling not supported on Spectrum-1.
mlxsw_only_on_spectrum 2+ || return
tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \
skip_sw action sample rate 5 group 1
check_err $? "Failed to configure sampling rule"
psample_capture_start
ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
psample_capture_stop
grep -q -e "latency " $CAPTURE_FILE
check_err $? "Sampled packets do not have latency attribute"
log_test "tc sample latency"
tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall
}
tc_sample_acl_group_conflict_test()
{
RET=0
# Test that two flower sampling rules cannot be configured on the same
# port with different groups.
# Policy-based sampling is not supported on Spectrum-1.
mlxsw_only_on_spectrum 2+ || return
tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
skip_sw action sample rate 1024 group 1
check_err $? "Failed to configure sampling rule"
tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \
skip_sw action sample rate 1024 group 1
check_err $? "Failed to configure sampling rule with same group"
tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \
skip_sw action sample rate 1024 group 2 &> /dev/null
check_fail $? "Managed to configure sampling rule with conflicting group"
log_test "tc sample (w/ flower) group conflict test"
tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower
tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
}
__tc_sample_acl_rate_test()
{
local bind=$1; shift
local port=$1; shift
local pkts pct
RET=0
# Policy-based sampling is not supported on Spectrum-1.
mlxsw_only_on_spectrum 2+ || return
tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1
check_err $? "Failed to configure sampling rule"
psample_capture_start
ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
psample_capture_stop
pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
pct=$((100 * (pkts - 10000) / 10000))
(( -25 <= pct && pct <= 25))
check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
# Setup a filter that should not match any packet and make sure packets
# are not sampled.
tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1
check_err $? "Failed to configure sampling rule"
psample_capture_start
ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
psample_capture_stop
grep -q -e "group 1 " $CAPTURE_FILE
check_fail $? "Sampled packets when should not"
log_test "tc sample (w/ flower) rate ($bind)"
tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
}
tc_sample_acl_rate_test()
{
__tc_sample_acl_rate_test ingress $rp1
__tc_sample_acl_rate_test egress $rp2
}
tc_sample_acl_max_rate_test()
{
RET=0
# Policy-based sampling is not supported on Spectrum-1.
mlxsw_only_on_spectrum 2+ || return
tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
skip_sw action sample rate $((2 ** 24 - 1)) group 1
check_err $? "Failed to configure sampling rule with max rate"
tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
skip_sw action sample rate $((2 ** 24)) \
group 1 &> /dev/null
check_fail $? "Managed to configure sampling rate above maximum"
log_test "tc sample (w/ flower) maximum rate"
}
trap cleanup EXIT
setup_prepare
setup_wait
tests_run
exit $EXIT_STATUS