// SPDX-License-Identifier: GPL-2.0-only /* PIPAPO: PIle PAcket POlicies: AVX2 packet lookup routines * * Copyright (c) 2019-2020 Red Hat GmbH * * Author: Stefano Brivio <[email protected]> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <uapi/linux/netfilter/nf_tables.h> #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/compiler.h> #include <asm/fpu/api.h> #include "nft_set_pipapo_avx2.h" #include "nft_set_pipapo.h" #define NFT_PIPAPO_LONGS_PER_M256 … /* Load from memory into YMM register with non-temporal hint ("stream load"), * that is, don't fetch lines from memory into the cache. This avoids pushing * precious packet data out of the cache hierarchy, and is appropriate when: * * - loading buckets from lookup tables, as they are not going to be used * again before packets are entirely classified * * - loading the result bitmap from the previous field, as it's never used * again */ #define NFT_PIPAPO_AVX2_LOAD(reg, loc) … /* Stream a single lookup table bucket into YMM register given lookup table, * group index, value of packet bits, bucket size. */ #define NFT_PIPAPO_AVX2_BUCKET_LOAD4(reg, lt, group, v, bsize) … #define NFT_PIPAPO_AVX2_BUCKET_LOAD8(reg, lt, group, v, bsize) … /* Bitwise AND: the staple operation of this algorithm */ #define NFT_PIPAPO_AVX2_AND(dst, a, b) … /* Jump to label if @reg is zero */ #define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) … /* Store 256 bits from YMM register into memory. Contrary to bucket load * operation, we don't bypass the cache here, as stored matching results * are always used shortly after. */ #define NFT_PIPAPO_AVX2_STORE(loc, reg) … /* Zero out a complete YMM register, @reg */ #define NFT_PIPAPO_AVX2_ZERO(reg) … /** * nft_pipapo_avx2_prepare() - Prepare before main algorithm body * * This zeroes out ymm15, which is later used whenever we need to clear a * memory location, by storing its content into memory. */ static void nft_pipapo_avx2_prepare(void) { … } /** * nft_pipapo_avx2_fill() - Fill a bitmap region with ones * @data: Base memory area * @start: First bit to set * @len: Count of bits to fill * * This is nothing else than a version of bitmap_set(), as used e.g. by * pipapo_refill(), tailored for the microarchitectures using it and better * suited for the specific usage: it's very likely that we'll set a small number * of bits, not crossing a word boundary, and correct branch prediction is * critical here. * * This function doesn't actually use any AVX2 instruction. */ static void nft_pipapo_avx2_fill(unsigned long *data, int start, int len) { … } /** * nft_pipapo_avx2_refill() - Scan bitmap, select mapping table item, set bits * @offset: Start from given bitmap (equivalent to bucket) offset, in longs * @map: Bitmap to be scanned for set bits * @dst: Destination bitmap * @mt: Mapping table containing bit set specifiers * @last: Return index of first set bit, if this is the last field * * This is an alternative implementation of pipapo_refill() suitable for usage * with AVX2 lookup routines: we know there are four words to be scanned, at * a given offset inside the map, for each matching iteration. * * This function doesn't actually use any AVX2 instruction. * * Return: first set bit index if @last, index of first filled word otherwise. */ static int nft_pipapo_avx2_refill(int offset, unsigned long *map, unsigned long *dst, union nft_pipapo_map_bucket *mt, bool last) { … } /** * nft_pipapo_avx2_lookup_4b_2() - AVX2-based lookup for 2 four-bit groups * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables * @offset: Ignore buckets before the given index, no bits are filled there * @pkt: Packet data, pointer to input nftables register * @first: If this is the first field, don't source previous result * @last: Last field: stop at the first match and return bit index * * Load buckets from lookup table corresponding to the values of each 4-bit * group of packet bytes, and perform a bitwise intersection between them. If * this is the first field in the set, simply AND the buckets together * (equivalent to using an all-ones starting bitmap), use the provided starting * bitmap otherwise. Then call nft_pipapo_avx2_refill() to generate the next * working bitmap, @fill. * * This is used for 8-bit fields (i.e. protocol numbers). * * Out-of-order (and superscalar) execution is vital here, so it's critical to * avoid false data dependencies. CPU and compiler could (mostly) take care of * this on their own, but the operation ordering is explicitly given here with * a likely execution order in mind, to highlight possible stalls. That's why * a number of logically distinct operations (i.e. loading buckets, intersecting * buckets) are interleaved. * * Return: -1 on no match, rule index of match if @last, otherwise first long * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) { … } /** * nft_pipapo_avx2_lookup_4b_4() - AVX2-based lookup for 4 four-bit groups * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables * @offset: Ignore buckets before the given index, no bits are filled there * @pkt: Packet data, pointer to input nftables register * @first: If this is the first field, don't source previous result * @last: Last field: stop at the first match and return bit index * * See nft_pipapo_avx2_lookup_4b_2(). * * This is used for 16-bit fields (i.e. ports). * * Return: -1 on no match, rule index of match if @last, otherwise first long * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) { … } /** * nft_pipapo_avx2_lookup_4b_8() - AVX2-based lookup for 8 four-bit groups * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables * @offset: Ignore buckets before the given index, no bits are filled there * @pkt: Packet data, pointer to input nftables register * @first: If this is the first field, don't source previous result * @last: Last field: stop at the first match and return bit index * * See nft_pipapo_avx2_lookup_4b_2(). * * This is used for 32-bit fields (i.e. IPv4 addresses). * * Return: -1 on no match, rule index of match if @last, otherwise first long * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) { … } /** * nft_pipapo_avx2_lookup_4b_12() - AVX2-based lookup for 12 four-bit groups * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables * @offset: Ignore buckets before the given index, no bits are filled there * @pkt: Packet data, pointer to input nftables register * @first: If this is the first field, don't source previous result * @last: Last field: stop at the first match and return bit index * * See nft_pipapo_avx2_lookup_4b_2(). * * This is used for 48-bit fields (i.e. MAC addresses/EUI-48). * * Return: -1 on no match, rule index of match if @last, otherwise first long * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) { … } /** * nft_pipapo_avx2_lookup_4b_32() - AVX2-based lookup for 32 four-bit groups * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables * @offset: Ignore buckets before the given index, no bits are filled there * @pkt: Packet data, pointer to input nftables register * @first: If this is the first field, don't source previous result * @last: Last field: stop at the first match and return bit index * * See nft_pipapo_avx2_lookup_4b_2(). * * This is used for 128-bit fields (i.e. IPv6 addresses). * * Return: -1 on no match, rule index of match if @last, otherwise first long * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) { … } /** * nft_pipapo_avx2_lookup_8b_1() - AVX2-based lookup for one eight-bit group * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables * @offset: Ignore buckets before the given index, no bits are filled there * @pkt: Packet data, pointer to input nftables register * @first: If this is the first field, don't source previous result * @last: Last field: stop at the first match and return bit index * * See nft_pipapo_avx2_lookup_4b_2(). * * This is used for 8-bit fields (i.e. protocol numbers). * * Return: -1 on no match, rule index of match if @last, otherwise first long * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) { … } /** * nft_pipapo_avx2_lookup_8b_2() - AVX2-based lookup for 2 eight-bit groups * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables * @offset: Ignore buckets before the given index, no bits are filled there * @pkt: Packet data, pointer to input nftables register * @first: If this is the first field, don't source previous result * @last: Last field: stop at the first match and return bit index * * See nft_pipapo_avx2_lookup_4b_2(). * * This is used for 16-bit fields (i.e. ports). * * Return: -1 on no match, rule index of match if @last, otherwise first long * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) { … } /** * nft_pipapo_avx2_lookup_8b_4() - AVX2-based lookup for 4 eight-bit groups * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables * @offset: Ignore buckets before the given index, no bits are filled there * @pkt: Packet data, pointer to input nftables register * @first: If this is the first field, don't source previous result * @last: Last field: stop at the first match and return bit index * * See nft_pipapo_avx2_lookup_4b_2(). * * This is used for 32-bit fields (i.e. IPv4 addresses). * * Return: -1 on no match, rule index of match if @last, otherwise first long * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) { … } /** * nft_pipapo_avx2_lookup_8b_6() - AVX2-based lookup for 6 eight-bit groups * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables * @offset: Ignore buckets before the given index, no bits are filled there * @pkt: Packet data, pointer to input nftables register * @first: If this is the first field, don't source previous result * @last: Last field: stop at the first match and return bit index * * See nft_pipapo_avx2_lookup_4b_2(). * * This is used for 48-bit fields (i.e. MAC addresses/EUI-48). * * Return: -1 on no match, rule index of match if @last, otherwise first long * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) { … } /** * nft_pipapo_avx2_lookup_8b_16() - AVX2-based lookup for 16 eight-bit groups * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables * @offset: Ignore buckets before the given index, no bits are filled there * @pkt: Packet data, pointer to input nftables register * @first: If this is the first field, don't source previous result * @last: Last field: stop at the first match and return bit index * * See nft_pipapo_avx2_lookup_4b_2(). * * This is used for 128-bit fields (i.e. IPv6 addresses). * * Return: -1 on no match, rule index of match if @last, otherwise first long * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) { … } /** * nft_pipapo_avx2_lookup_slow() - Fallback function for uncommon field sizes * @mdata: Matching data, including mapping table * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables * @offset: Ignore buckets before the given index, no bits are filled there * @pkt: Packet data, pointer to input nftables register * @first: If this is the first field, don't source previous result * @last: Last field: stop at the first match and return bit index * * This function should never be called, but is provided for the case the field * size doesn't match any of the known data types. Matching rate is * substantially lower than AVX2 routines. * * Return: -1 on no match, rule index of match if @last, otherwise first long * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_slow(const struct nft_pipapo_match *mdata, unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) { … } /** * nft_pipapo_avx2_estimate() - Set size, space and lookup complexity * @desc: Set description, element count and field description used * @features: Flags: NFT_SET_INTERVAL needs to be there * @est: Storage for estimation data * * Return: true if set is compatible and AVX2 available, false otherwise. */ bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est) { … } /** * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation * @net: Network namespace * @set: nftables API set representation * @key: nftables API element representation containing key data * @ext: nftables API extension pointer, filled with matching reference * * For more details, see DOC: Theory of Operation in nft_set_pipapo.c. * * This implementation exploits the repetitive characteristic of the algorithm * to provide a fast, vectorised version using the AVX2 SIMD instruction set. * * Return: true on match, false otherwise. */ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { … }