linux/tools/testing/selftests/net/netfilter/nft_queue.sh

#!/bin/bash
#
# This tests nf_queue:
# 1. can process packets from all hooks
# 2. support running nfqueue from more than one base chain
#
# shellcheck disable=SC2162,SC2317

source lib.sh
ret=0
timeout=5

cleanup()
{
	ip netns pids "$ns1" | xargs kill 2>/dev/null
	ip netns pids "$ns2" | xargs kill 2>/dev/null
	ip netns pids "$nsrouter" | xargs kill 2>/dev/null

	cleanup_all_ns

	rm -f "$TMPINPUT"
	rm -f "$TMPFILE0"
	rm -f "$TMPFILE1"
	rm -f "$TMPFILE2" "$TMPFILE3"
}

checktool "nft --version" "test without nft tool"
checktool "socat -h" "run test without socat"

modprobe -q sctp

trap cleanup EXIT

setup_ns ns1 ns2 ns3 nsrouter

TMPFILE0=$(mktemp)
TMPFILE1=$(mktemp)
TMPFILE2=$(mktemp)
TMPFILE3=$(mktemp)

TMPINPUT=$(mktemp)
COUNT=200
[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25
dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT"

if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
    echo "SKIP: No virtual ethernet pair device support in kernel"
    exit $ksft_skip
fi
ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3"

ip -net "$nsrouter" link set veth0 up
ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad

ip -net "$nsrouter" link set veth1 up
ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad

ip -net "$nsrouter" link set veth2 up
ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2
ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad

ip -net "$ns1" link set eth0 up
ip -net "$ns2" link set eth0 up
ip -net "$ns3" link set eth0 up

ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
ip -net "$ns1" route add default via 10.0.1.1
ip -net "$ns1" route add default via dead:1::1

ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
ip -net "$ns2" route add default via 10.0.2.1
ip -net "$ns2" route add default via dead:2::1

ip -net "$ns3" addr add 10.0.3.99/24 dev eth0
ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad
ip -net "$ns3" route add default via 10.0.3.1
ip -net "$ns3" route add default via dead:3::1

load_ruleset() {
	local name=$1
	local prio=$2

ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
table inet $name {
	chain nfq {
		ip protocol icmp queue bypass
		icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
	}
	chain pre {
		type filter hook prerouting priority $prio; policy accept;
		jump nfq
	}
	chain input {
		type filter hook input priority $prio; policy accept;
		jump nfq
	}
	chain forward {
		type filter hook forward priority $prio; policy accept;
		tcp dport 12345 queue num 2
		jump nfq
	}
	chain output {
		type filter hook output priority $prio; policy accept;
		tcp dport 12345 queue num 3
		tcp sport 23456 queue num 3
		jump nfq
	}
	chain post {
		type filter hook postrouting priority $prio; policy accept;
		jump nfq
	}
}
EOF
}

load_counter_ruleset() {
	local prio=$1

ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
table inet countrules {
	chain pre {
		type filter hook prerouting priority $prio; policy accept;
		counter
	}
	chain input {
		type filter hook input priority $prio; policy accept;
		counter
	}
	chain forward {
		type filter hook forward priority $prio; policy accept;
		counter
	}
	chain output {
		type filter hook output priority $prio; policy accept;
		counter
	}
	chain post {
		type filter hook postrouting priority $prio; policy accept;
		counter
	}
}
EOF
}

test_ping() {
  if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
	return 1
  fi

  if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then
	return 2
  fi

  return 0
}

test_ping_router() {
  if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then
	return 3
  fi

  if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then
	return 4
  fi

  return 0
}

test_queue_blackhole() {
	local proto=$1

ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
table $proto blackh {
	chain forward {
	type filter hook forward priority 0; policy accept;
		queue num 600
	}
}
EOF
	if [ "$proto" = "ip" ] ;then
		ip netns exec "$ns1" ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
		lret=$?
	elif [ "$proto" = "ip6" ]; then
		ip netns exec "$ns1" ping -W 2 -c 1 -q dead:2::99 > /dev/null
		lret=$?
	else
		lret=111
	fi

	# queue without bypass keyword should drop traffic if no listener exists.
	if [ "$lret" -eq 0 ];then
		echo "FAIL: $proto expected failure, got $lret" 1>&2
		exit 1
	fi

	if ! ip netns exec "$nsrouter" nft delete table "$proto" blackh; then
	        echo "FAIL: $proto: Could not delete blackh table"
	        exit 1
	fi

        echo "PASS: $proto: statement with no listener results in packet drop"
}

nf_queue_wait()
{
	local procfile="/proc/self/net/netfilter/nfnetlink_queue"
	local netns id

	netns="$1"
	id="$2"

	# if this file doesn't exist, nfnetlink_module isn't loaded.
	# rather than loading it ourselves, wait for kernel module autoload
	# completion, nfnetlink should do so automatically because nf_queue
	# helper program, spawned in the background, asked for this functionality.
	test -f "$procfile" &&
		ip netns exec "$netns" cat "$procfile" | grep -q "^ *$id "
}

test_queue()
{
	local expected="$1"
	local last=""

	# spawn nf_queue listeners
	ip netns exec "$nsrouter" ./nf_queue -c -q 0 -t $timeout > "$TMPFILE0" &
	ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t $timeout > "$TMPFILE1" &

	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 0
	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1

	if ! test_ping;then
		echo "FAIL: netns routing/connectivity with active listener on queues 0 and 1: $ret" 1>&2
		exit $ret
	fi

	if ! test_ping_router;then
		echo "FAIL: netns router unreachable listener on queue 0 and 1: $ret" 1>&2
		exit $ret
	fi

	wait
	ret=$?

	for file in $TMPFILE0 $TMPFILE1; do
		last=$(tail -n1 "$file")
		if [ x"$last" != x"$expected packets total" ]; then
			echo "FAIL: Expected $expected packets total, but got $last" 1>&2
			ip netns exec "$nsrouter" nft list ruleset
			exit 1
		fi
	done

	echo "PASS: Expected and received $last"
}

listener_ready()
{
	ss -N "$1" -lnt -o "sport = :12345" | grep -q 12345
}

test_tcp_forward()
{
	ip netns exec "$nsrouter" ./nf_queue -q 2 &
	local nfqpid=$!

	timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
	local rpid=$!

	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2

	ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null

	wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
	kill "$nfqpid"
}

test_tcp_localhost()
{
	timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
	local rpid=$!

	ip netns exec "$nsrouter" ./nf_queue -q 3 &
	local nfqpid=$!

	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3

	ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null

	wait "$rpid" && echo "PASS: tcp via loopback"
	kill "$nfqpid"
}

test_tcp_localhost_connectclose()
{
	ip netns exec "$nsrouter" ./nf_queue -q 3 &
	local nfqpid=$!

	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3

	timeout 10 ip netns exec "$nsrouter" ./connect_close -p 23456 -t 3

	kill "$nfqpid"
	wait && echo "PASS: tcp via loopback with connect/close"
}

test_tcp_localhost_requeue()
{
ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
flush ruleset
table inet filter {
	chain output {
		type filter hook output priority 0; policy accept;
		tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
	}
	chain post {
		type filter hook postrouting priority 0; policy accept;
		tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
	}
}
EOF
	timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
	local rpid=$!

	ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t "$timeout" > "$TMPFILE2" &

	# nfqueue 1 will be called via output hook.  But this time,
        # re-queue the packet to nfqueue program on queue 2.
	ip netns exec "$nsrouter" ./nf_queue -G -d 150 -c -q 0 -Q 1 -t "$timeout" > "$TMPFILE3" &

	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
	ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" > /dev/null

	wait

	if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
		echo "FAIL: lost packets during requeue?!" 1>&2
		return
	fi

	echo "PASS: tcp via loopback and re-queueing"
}

test_icmp_vrf() {
	if ! ip -net "$ns1" link add tvrf type vrf table 9876;then
		echo "SKIP: Could not add vrf device"
		return
	fi

	ip -net "$ns1" li set eth0 master tvrf
	ip -net "$ns1" li set tvrf up

	ip -net "$ns1" route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
ip netns exec "$ns1" nft -f /dev/stdin <<EOF
flush ruleset
table inet filter {
	chain output {
		type filter hook output priority 0; policy accept;
		meta oifname "tvrf" icmp type echo-request counter queue num 1
		meta oifname "eth0" icmp type echo-request counter queue num 1
	}
	chain post {
		type filter hook postrouting priority 0; policy accept;
		meta oifname "tvrf" icmp type echo-request counter queue num 1
		meta oifname "eth0" icmp type echo-request counter queue num 1
	}
}
EOF
	ip netns exec "$ns1" ./nf_queue -q 1 &
	local nfqpid=$!

	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1

	ip netns exec "$ns1" ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null

	for n in output post; do
		for d in tvrf eth0; do
			if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then
				kill "$nfqpid"
				echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
				ip netns exec "$ns1" nft list ruleset
				ret=1
				return
			fi
		done
	done

	kill "$nfqpid"
	echo "PASS: icmp+nfqueue via vrf"
}

sctp_listener_ready()
{
	ss -S -N "$1" -ln -o "sport = :12345" | grep -q 12345
}

check_output_files()
{
	local f1="$1"
	local f2="$2"
	local err="$3"

	if ! cmp "$f1" "$f2" ; then
		echo "FAIL: $err: input and output file differ" 1>&2
		echo -n " Input file" 1>&2
		ls -l "$f1" 1>&2
		echo -n "Output file" 1>&2
		ls -l "$f2" 1>&2
		ret=1
	fi
}

test_sctp_forward()
{
	ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
flush ruleset
table inet sctpq {
        chain forward {
        type filter hook forward priority 0; policy accept;
                sctp dport 12345 queue num 10
        }
}
EOF
	timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
	local rpid=$!

	busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"

	ip netns exec "$nsrouter" ./nf_queue -q 10 -G &
	local nfqpid=$!

	ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null

	if ! ip netns exec "$nsrouter" nft delete table inet sctpq; then
		echo "FAIL:  Could not delete sctpq table"
		exit 1
	fi

	wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain"
	kill "$nfqpid"

	check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward"
}

test_sctp_output()
{
        ip netns exec "$ns1" nft -f /dev/stdin <<EOF
table inet sctpq {
        chain output {
        type filter hook output priority 0; policy accept;
                sctp dport 12345 queue num 11
        }
}
EOF
	# reduce test file size, software segmentation causes sk wmem increase.
	dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT"

	timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
	local rpid=$!

	busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"

	ip netns exec "$ns1" ./nf_queue -q 11 &
	local nfqpid=$!

	ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null

	if ! ip netns exec "$ns1" nft delete table inet sctpq; then
		echo "FAIL:  Could not delete sctpq table"
		exit 1
	fi

	# must wait before checking completeness of output file.
	wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO"
	kill "$nfqpid"

	check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output"
}

udp_listener_ready()
{
	ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345
}

output_files_written()
{
	test -s "$1" && test -s "$2"
}

test_udp_ct_race()
{
        ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
flush ruleset
table inet udpq {
	chain prerouting {
		type nat hook prerouting priority dstnat - 5; policy accept;
		ip daddr 10.6.6.6 udp dport 12345 counter dnat to numgen inc mod 2 map { 0 : 10.0.2.99, 1 : 10.0.3.99 }
	}
        chain postrouting {
		type filter hook postrouting priority srcnat - 5; policy accept;
		udp dport 12345 counter queue num 12
        }
}
EOF
	:> "$TMPFILE1"
	:> "$TMPFILE2"

	timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12345,fork OPEN:"$TMPFILE1",trunc &
	local rpid1=$!

	timeout 10 ip netns exec "$ns3" socat UDP-LISTEN:12345,fork OPEN:"$TMPFILE2",trunc &
	local rpid2=$!

	ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 &
	local nfqpid=$!

	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2"
	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3"
	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12

	# Send two packets, one should end up in ns1, other in ns2.
	# This is because nfqueue will delay packet for long enough so that
	# second packet will not find existing conntrack entry.
	echo "Packet 1" | ip netns exec "$ns1" socat STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221
	echo "Packet 2" | ip netns exec "$ns1" socat STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221

	busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2"

	kill "$nfqpid"

	if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then
		echo "FAIL: Expected One udp conntrack entry"
		ip netns exec "$nsrouter" conntrack -L -p udp --dport 12345
		ret=1
	fi

	if ! ip netns exec "$nsrouter" nft delete table inet udpq; then
		echo "FAIL: Could not delete udpq table"
		ret=1
		return
	fi

	NUMLINES1=$(wc -l < "$TMPFILE1")
	NUMLINES2=$(wc -l < "$TMPFILE2")

	if [ "$NUMLINES1" -ne 1 ] || [ "$NUMLINES2" -ne 1 ]; then
		ret=1
		echo "FAIL: uneven udp packet distribution: $NUMLINES1 $NUMLINES2"
		echo -n "$TMPFILE1: ";cat "$TMPFILE1"
		echo -n "$TMPFILE2: ";cat "$TMPFILE2"
		return
	fi

	echo "PASS: both udp receivers got one packet each"
}

test_queue_removal()
{
	read tainted_then < /proc/sys/kernel/tainted

	ip netns exec "$ns1" nft -f - <<EOF
flush ruleset
table ip filter {
	chain output {
		type filter hook output priority 0; policy accept;
		ip protocol icmp queue num 0
	}
}
EOF
	ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 &
	local nfqpid=$!

	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0

	ip netns exec "$ns1" ping -w 2 -f -c 10 127.0.0.1 -q >/dev/null
	kill $nfqpid

	ip netns exec "$ns1" nft flush ruleset

	if [ "$tainted_then" -ne 0 ];then
		return
	fi

	read tainted_now < /proc/sys/kernel/tainted
	if [ "$tainted_now" -eq 0 ];then
		echo "PASS: queue program exiting while packets queued"
	else
		echo "TAINT: queue program exiting while packets queued"
		ret=1
	fi
}

ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null

load_ruleset "filter" 0

if test_ping; then
	# queue bypass works (rules were skipped, no listener)
	echo "PASS: ${ns1} can reach ${ns2}"
else
	echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
	exit $ret
fi

test_queue_blackhole ip
test_queue_blackhole ip6

# dummy ruleset to add base chains between the
# queueing rules.  We don't want the second reinject
# to re-execute the old hooks.
load_counter_ruleset 10

# we are hooking all: prerouting/input/forward/output/postrouting.
# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so:
# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
# so we expect that userspace program receives 10 packets.
test_queue 10

# same.  We queue to a second program as well.
load_ruleset "filter2" 20
test_queue 20
ip netns exec "$ns1" nft flush ruleset

test_tcp_forward
test_tcp_localhost
test_tcp_localhost_connectclose
test_tcp_localhost_requeue
test_sctp_forward
test_sctp_output
test_udp_ct_race

# should be last, adds vrf device in ns1 and changes routes
test_icmp_vrf
test_queue_removal

exit $ret