linux/net/ipv4/fib_trie.c

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *
 *   Robert Olsson <[email protected]> Uppsala Universitet
 *     & Swedish University of Agricultural Sciences.
 *
 *   Jens Laas <[email protected]> Swedish University of
 *     Agricultural Sciences.
 *
 *   Hans Liss <[email protected]>  Uppsala Universitet
 *
 * This work is based on the LPC-trie which is originally described in:
 *
 * An experimental study of compression methods for dynamic tries
 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
 * https://www.csc.kth.se/~snilsson/software/dyntrie2/
 *
 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
 *
 * Code from fib_hash has been reused which includes the following header:
 *
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		IPv4 FIB: lookup engine and maintenance routines.
 *
 * Authors:	Alexey Kuznetsov, <[email protected]>
 *
 * Substantial contributions to this work comes from:
 *
 *		David S. Miller, <[email protected]>
 *		Stephen Hemminger <[email protected]>
 *		Paul E. McKenney <[email protected]>
 *		Patrick McHardy <[email protected]>
 */
#include <linux/cache.h>
#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/rcupdate.h>
#include <linux/rcupdate_wait.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/notifier.h>
#include <net/net_namespace.h>
#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/fib_notifier.h>
#include <trace/events/fib.h>
#include "fib_lookup.h"

static int call_fib_entry_notifier(struct notifier_block *nb,
				   enum fib_event_type event_type, u32 dst,
				   int dst_len, struct fib_alias *fa,
				   struct netlink_ext_ack *extack)
{}

static int call_fib_entry_notifiers(struct net *net,
				    enum fib_event_type event_type, u32 dst,
				    int dst_len, struct fib_alias *fa,
				    struct netlink_ext_ack *extack)
{}

#define MAX_STAT_DEPTH

#define KEYLENGTH
#define KEY_MAX

t_key;

#define IS_TRIE(n)
#define IS_TNODE(n)
#define IS_LEAF(n)

struct key_vector {};

struct tnode {};

#define TNODE_SIZE(n)
#define LEAF_SIZE

#ifdef CONFIG_IP_FIB_TRIE_STATS
struct trie_use_stats {};
#endif

struct trie_stat {};

struct trie {};

static struct key_vector *resize(struct trie *t, struct key_vector *tn);
static unsigned int tnode_free_size;

/*
 * synchronize_rcu after call_rcu for outstanding dirty memory; it should be
 * especially useful before resizing the root node with PREEMPT_NONE configs;
 * the value was obtained experimentally, aiming to avoid visible slowdown.
 */
unsigned int sysctl_fib_sync_mem =;
unsigned int sysctl_fib_sync_mem_min =;
unsigned int sysctl_fib_sync_mem_max =;

static struct kmem_cache *fn_alias_kmem __ro_after_init;
static struct kmem_cache *trie_leaf_kmem __ro_after_init;

static inline struct tnode *tn_info(struct key_vector *kv)
{}

/* caller must hold RTNL */
#define node_parent(tn)
#define get_child(tn, i)

/* caller must hold RCU read lock or RTNL */
#define node_parent_rcu(tn)
#define get_child_rcu(tn, i)

/* wrapper for rcu_assign_pointer */
static inline void node_set_parent(struct key_vector *n, struct key_vector *tp)
{}

#define NODE_INIT_PARENT(n, p)

/* This provides us with the number of children in this node, in the case of a
 * leaf this will return 0 meaning none of the children are accessible.
 */
static inline unsigned long child_length(const struct key_vector *tn)
{}

#define get_cindex(key, kv)

static inline unsigned long get_index(t_key key, struct key_vector *kv)
{}

/* To understand this stuff, an understanding of keys and all their bits is
 * necessary. Every node in the trie has a key associated with it, but not
 * all of the bits in that key are significant.
 *
 * Consider a node 'n' and its parent 'tp'.
 *
 * If n is a leaf, every bit in its key is significant. Its presence is
 * necessitated by path compression, since during a tree traversal (when
 * searching for a leaf - unless we are doing an insertion) we will completely
 * ignore all skipped bits we encounter. Thus we need to verify, at the end of
 * a potentially successful search, that we have indeed been walking the
 * correct key path.
 *
 * Note that we can never "miss" the correct key in the tree if present by
 * following the wrong path. Path compression ensures that segments of the key
 * that are the same for all keys with a given prefix are skipped, but the
 * skipped part *is* identical for each node in the subtrie below the skipped
 * bit! trie_insert() in this implementation takes care of that.
 *
 * if n is an internal node - a 'tnode' here, the various parts of its key
 * have many different meanings.
 *
 * Example:
 * _________________________________________________________________
 * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
 * -----------------------------------------------------------------
 *  31  30  29  28  27  26  25  24  23  22  21  20  19  18  17  16
 *
 * _________________________________________________________________
 * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
 * -----------------------------------------------------------------
 *  15  14  13  12  11  10   9   8   7   6   5   4   3   2   1   0
 *
 * tp->pos = 22
 * tp->bits = 3
 * n->pos = 13
 * n->bits = 4
 *
 * First, let's just ignore the bits that come before the parent tp, that is
 * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this
 * point we do not use them for anything.
 *
 * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
 * index into the parent's child array. That is, they will be used to find
 * 'n' among tp's children.
 *
 * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits
 * for the node n.
 *
 * All the bits we have seen so far are significant to the node n. The rest
 * of the bits are really not needed or indeed known in n->key.
 *
 * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
 * n's child array, and will of course be different for each child.
 *
 * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown
 * at this point.
 */

static const int halve_threshold =;
static const int inflate_threshold =;
static const int halve_threshold_root =;
static const int inflate_threshold_root =;

static void __alias_free_mem(struct rcu_head *head)
{}

static inline void alias_free_mem_rcu(struct fib_alias *fa)
{}

#define TNODE_VMALLOC_MAX

static void __node_free_rcu(struct rcu_head *head)
{}

#define node_free(n)

static struct tnode *tnode_alloc(int bits)
{}

static inline void empty_child_inc(struct key_vector *n)
{}

static inline void empty_child_dec(struct key_vector *n)
{}

static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
{}

static struct key_vector *tnode_new(t_key key, int pos, int bits)
{}

/* Check whether a tnode 'n' is "full", i.e. it is an internal node
 * and no bits are skipped. See discussion in dyntree paper p. 6
 */
static inline int tnode_full(struct key_vector *tn, struct key_vector *n)
{}

/* Add a child at position i overwriting the old value.
 * Update the value of full_children and empty_children.
 */
static void put_child(struct key_vector *tn, unsigned long i,
		      struct key_vector *n)
{}

static void update_children(struct key_vector *tn)
{}

static inline void put_child_root(struct key_vector *tp, t_key key,
				  struct key_vector *n)
{}

static inline void tnode_free_init(struct key_vector *tn)
{}

static inline void tnode_free_append(struct key_vector *tn,
				     struct key_vector *n)
{}

static void tnode_free(struct key_vector *tn)
{}

static struct key_vector *replace(struct trie *t,
				  struct key_vector *oldtnode,
				  struct key_vector *tn)
{}

static struct key_vector *inflate(struct trie *t,
				  struct key_vector *oldtnode)
{}

static struct key_vector *halve(struct trie *t,
				struct key_vector *oldtnode)
{}

static struct key_vector *collapse(struct trie *t,
				   struct key_vector *oldtnode)
{}

static unsigned char update_suffix(struct key_vector *tn)
{}

/* From "Implementing a dynamic compressed trie" by Stefan Nilsson of
 * the Helsinki University of Technology and Matti Tikkanen of Nokia
 * Telecommunications, page 6:
 * "A node is doubled if the ratio of non-empty children to all
 * children in the *doubled* node is at least 'high'."
 *
 * 'high' in this instance is the variable 'inflate_threshold'. It
 * is expressed as a percentage, so we multiply it with
 * child_length() and instead of multiplying by 2 (since the
 * child array will be doubled by inflate()) and multiplying
 * the left-hand side by 100 (to handle the percentage thing) we
 * multiply the left-hand side by 50.
 *
 * The left-hand side may look a bit weird: child_length(tn)
 * - tn->empty_children is of course the number of non-null children
 * in the current node. tn->full_children is the number of "full"
 * children, that is non-null tnodes with a skip value of 0.
 * All of those will be doubled in the resulting inflated tnode, so
 * we just count them one extra time here.
 *
 * A clearer way to write this would be:
 *
 * to_be_doubled = tn->full_children;
 * not_to_be_doubled = child_length(tn) - tn->empty_children -
 *     tn->full_children;
 *
 * new_child_length = child_length(tn) * 2;
 *
 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
 *      new_child_length;
 * if (new_fill_factor >= inflate_threshold)
 *
 * ...and so on, tho it would mess up the while () loop.
 *
 * anyway,
 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
 *      inflate_threshold
 *
 * avoid a division:
 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
 *      inflate_threshold * new_child_length
 *
 * expand not_to_be_doubled and to_be_doubled, and shorten:
 * 100 * (child_length(tn) - tn->empty_children +
 *    tn->full_children) >= inflate_threshold * new_child_length
 *
 * expand new_child_length:
 * 100 * (child_length(tn) - tn->empty_children +
 *    tn->full_children) >=
 *      inflate_threshold * child_length(tn) * 2
 *
 * shorten again:
 * 50 * (tn->full_children + child_length(tn) -
 *    tn->empty_children) >= inflate_threshold *
 *    child_length(tn)
 *
 */
static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn)
{}

static inline bool should_halve(struct key_vector *tp, struct key_vector *tn)
{}

static inline bool should_collapse(struct key_vector *tn)
{}

#define MAX_WORK
static struct key_vector *resize(struct trie *t, struct key_vector *tn)
{}

static void node_pull_suffix(struct key_vector *tn, unsigned char slen)
{}

static void node_push_suffix(struct key_vector *tn, unsigned char slen)
{}

/* rcu_read_lock needs to be hold by caller from readside */
static struct key_vector *fib_find_node(struct trie *t,
					struct key_vector **tp, u32 key)
{}

/* Return the first fib alias matching DSCP with
 * priority less than or equal to PRIO.
 * If 'find_first' is set, return the first matching
 * fib alias, regardless of DSCP and priority.
 */
static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
					dscp_t dscp, u32 prio, u32 tb_id,
					bool find_first)
{}

static struct fib_alias *
fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
{}

void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
{}
EXPORT_SYMBOL_GPL();

static void trie_rebalance(struct trie *t, struct key_vector *tn)
{}

static int fib_insert_node(struct trie *t, struct key_vector *tp,
			   struct fib_alias *new, t_key key)
{}

static int fib_insert_alias(struct trie *t, struct key_vector *tp,
			    struct key_vector *l, struct fib_alias *new,
			    struct fib_alias *fa, t_key key)
{}

static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
{}

static void fib_remove_alias(struct trie *t, struct key_vector *tp,
			     struct key_vector *l, struct fib_alias *old);

/* Caller must hold RTNL. */
int fib_table_insert(struct net *net, struct fib_table *tb,
		     struct fib_config *cfg, struct netlink_ext_ack *extack)
{}

static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
{}

bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
			 const struct flowi4 *flp)
{}

/* should be called with rcu_read_lock */
int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
		     struct fib_result *res, int fib_flags)
{}
EXPORT_SYMBOL_GPL();

static void fib_remove_alias(struct trie *t, struct key_vector *tp,
			     struct key_vector *l, struct fib_alias *old)
{}

static void fib_notify_alias_delete(struct net *net, u32 key,
				    struct hlist_head *fah,
				    struct fib_alias *fa_to_delete,
				    struct netlink_ext_ack *extack)
{}

/* Caller must hold RTNL. */
int fib_table_delete(struct net *net, struct fib_table *tb,
		     struct fib_config *cfg, struct netlink_ext_ack *extack)
{}

/* Scan for the next leaf starting at the provided key value */
static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
{}

static void fib_trie_free(struct fib_table *tb)
{}

struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
{}

/* Caller must hold RTNL */
void fib_table_flush_external(struct fib_table *tb)
{}

/* Caller must hold RTNL. */
int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
{}

/* derived from fib_trie_free */
static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
				     struct nl_info *info)
{}

void fib_info_notify_update(struct net *net, struct nl_info *info)
{}

static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
			   struct notifier_block *nb,
			   struct netlink_ext_ack *extack)
{}

static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb,
			    struct netlink_ext_ack *extack)
{}

int fib_notify(struct net *net, struct notifier_block *nb,
	       struct netlink_ext_ack *extack)
{}

static void __trie_free_rcu(struct rcu_head *head)
{}

void fib_free_table(struct fib_table *tb)
{}

static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
			     struct sk_buff *skb, struct netlink_callback *cb,
			     struct fib_dump_filter *filter)
{}

/* rcu_read_lock needs to be hold by caller from readside */
int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
		   struct netlink_callback *cb, struct fib_dump_filter *filter)
{}

void __init fib_trie_init(void)
{}

struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
{}

#ifdef CONFIG_PROC_FS
/* Depth first Trie walk iterator */
struct fib_trie_iter {};

static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
{}

static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
					     struct trie *t)
{}

static void trie_collect_stats(struct trie *t, struct trie_stat *s)
{}

/*
 *	This outputs /proc/net/fib_triestats
 */
static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
{}

#ifdef CONFIG_IP_FIB_TRIE_STATS
static void trie_show_usage(struct seq_file *seq,
			    const struct trie_use_stats __percpu *stats)
{}
#endif /*  CONFIG_IP_FIB_TRIE_STATS */

static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
{}


static int fib_triestat_seq_show(struct seq_file *seq, void *v)
{}

static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
{}

static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
	__acquires(RCU)
{}

static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{}

static void fib_trie_seq_stop(struct seq_file *seq, void *v)
	__releases(RCU)
{}

static void seq_indent(struct seq_file *seq, int n)
{}

static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
{}

static const char *const rtn_type_names[__RTN_MAX] =;

static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
{}

/* Pretty print the trie */
static int fib_trie_seq_show(struct seq_file *seq, void *v)
{}

static const struct seq_operations fib_trie_seq_ops =;

struct fib_route_iter {};

static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
					    loff_t pos)
{}

static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
	__acquires(RCU)
{}

static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{}

static void fib_route_seq_stop(struct seq_file *seq, void *v)
	__releases(RCU)
{}

static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
{}

/*
 *	This outputs /proc/net/route.
 *	The format of the file is not supposed to be changed
 *	and needs to be same as fib_hash output to avoid breaking
 *	legacy utilities
 */
static int fib_route_seq_show(struct seq_file *seq, void *v)
{}

static const struct seq_operations fib_route_seq_ops =;

int __net_init fib_proc_init(struct net *net)
{}

void __net_exit fib_proc_exit(struct net *net)
{}

#endif /* CONFIG_PROC_FS */