linux/net/ipv4/route.c

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		ROUTE - implementation of the IP router.
 *
 * Authors:	Ross Biro
 *		Fred N. van Kempen, <[email protected]>
 *		Alan Cox, <[email protected]>
 *		Linus Torvalds, <[email protected]>
 *		Alexey Kuznetsov, <[email protected]>
 *
 * Fixes:
 *		Alan Cox	:	Verify area fixes.
 *		Alan Cox	:	cli() protects routing changes
 *		Rui Oliveira	:	ICMP routing table updates
 *		([email protected])	Routing table insertion and update
 *		Linus Torvalds	:	Rewrote bits to be sensible
 *		Alan Cox	:	Added BSD route gw semantics
 *		Alan Cox	:	Super /proc >4K
 *		Alan Cox	:	MTU in route table
 *		Alan Cox	:	MSS actually. Also added the window
 *					clamper.
 *		Sam Lantinga	:	Fixed route matching in rt_del()
 *		Alan Cox	:	Routing cache support.
 *		Alan Cox	:	Removed compatibility cruft.
 *		Alan Cox	:	RTF_REJECT support.
 *		Alan Cox	:	TCP irtt support.
 *		Jonathan Naylor	:	Added Metric support.
 *	Miquel van Smoorenburg	:	BSD API fixes.
 *	Miquel van Smoorenburg	:	Metrics.
 *		Alan Cox	:	Use __u32 properly
 *		Alan Cox	:	Aligned routing errors more closely with BSD
 *					our system is still very different.
 *		Alan Cox	:	Faster /proc handling
 *	Alexey Kuznetsov	:	Massive rework to support tree based routing,
 *					routing caches and better behaviour.
 *
 *		Olaf Erb	:	irtt wasn't being copied right.
 *		Bjorn Ekwall	:	Kerneld route support.
 *		Alan Cox	:	Multicast fixed (I hope)
 *		Pavel Krauz	:	Limited broadcast fixed
 *		Mike McLagan	:	Routing by source
 *	Alexey Kuznetsov	:	End of old history. Split to fib.c and
 *					route.c and rewritten from scratch.
 *		Andi Kleen	:	Load-limit warning messages.
 *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
 *	Vitaly E. Lavrov	:	Race condition in ip_route_input_slow.
 *	Tobias Ringstrom	:	Uninitialized res.type in ip_route_output_slow.
 *	Vladimir V. Ivanov	:	IP rule info (flowid) is really useful.
 *		Marc Boucher	:	routing by fwmark
 *	Robert Olsson		:	Added rt_cache statistics
 *	Arnaldo C. Melo		:	Convert proc stuff to seq_file
 *	Eric Dumazet		:	hashed spinlocks and rt_check_expire() fixes.
 *	Ilia Sotnikov		:	Ignore TOS on PMTUD and Redirect
 *	Ilia Sotnikov		:	Removed TOS from hash calculations
 */

#define pr_fmt(fmt)

#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/memblock.h>
#include <linux/socket.h>
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
#include <linux/pkt_sched.h>
#include <linux/mroute.h>
#include <linux/netfilter_ipv4.h>
#include <linux/random.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/jhash.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
#include <net/inet_dscp.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/inetpeer.h>
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/nexthop.h>
#include <net/tcp.h>
#include <net/icmp.h>
#include <net/xfrm.h>
#include <net/lwtunnel.h>
#include <net/netevent.h>
#include <net/rtnetlink.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#include <net/secure_seq.h>
#include <net/ip_tunnels.h>

#include "fib_lookup.h"

#define RT_GC_TIMEOUT

#define DEFAULT_MIN_PMTU
#define DEFAULT_MTU_EXPIRES
#define DEFAULT_MIN_ADVMSS
static int ip_rt_max_size;
static int ip_rt_redirect_number __read_mostly	=;
static int ip_rt_redirect_load __read_mostly	=;
static int ip_rt_redirect_silence __read_mostly	=;
static int ip_rt_error_cost __read_mostly	=;
static int ip_rt_error_burst __read_mostly	=;

static int ip_rt_gc_timeout __read_mostly	=;

/*
 *	Interface to generic destination cache.
 */

INDIRECT_CALLABLE_SCOPE
struct dst_entry	*ipv4_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int	 ipv4_default_advmss(const struct dst_entry *dst);
INDIRECT_CALLABLE_SCOPE
unsigned int		ipv4_mtu(const struct dst_entry *dst);
static void		ipv4_negative_advice(struct sock *sk,
					     struct dst_entry *dst);
static void		 ipv4_link_failure(struct sk_buff *skb);
static void		 ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
					   struct sk_buff *skb, u32 mtu,
					   bool confirm_neigh);
static void		 ip_do_redirect(struct dst_entry *dst, struct sock *sk,
					struct sk_buff *skb);
static void		ipv4_dst_destroy(struct dst_entry *dst);

static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
{}

static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
					   struct sk_buff *skb,
					   const void *daddr);
static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);

static struct dst_ops ipv4_dst_ops =;

#define ECN_OR_COST(class)

const __u8 ip_tos2prio[16] =;
EXPORT_SYMBOL();

static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
#define RT_CACHE_STAT_INC(field)

#ifdef CONFIG_PROC_FS
static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
{}

static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{}

static void rt_cache_seq_stop(struct seq_file *seq, void *v)
{}

static int rt_cache_seq_show(struct seq_file *seq, void *v)
{}

static const struct seq_operations rt_cache_seq_ops =;

static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{}

static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{}

static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
{}

static int rt_cpu_seq_show(struct seq_file *seq, void *v)
{}

static const struct seq_operations rt_cpu_seq_ops =;

#ifdef CONFIG_IP_ROUTE_CLASSID
static int rt_acct_proc_show(struct seq_file *m, void *v)
{}
#endif

static int __net_init ip_rt_do_proc_init(struct net *net)
{}

static void __net_exit ip_rt_do_proc_exit(struct net *net)
{}

static struct pernet_operations ip_rt_proc_ops __net_initdata =;

static int __init ip_rt_proc_init(void)
{}

#else
static inline int ip_rt_proc_init(void)
{
	return 0;
}
#endif /* CONFIG_PROC_FS */

static inline bool rt_is_expired(const struct rtable *rth)
{}

void rt_cache_flush(struct net *net)
{}

static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
					   struct sk_buff *skb,
					   const void *daddr)
{}

static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{}

/* Hash tables of size 2048..262144 depending on RAM size.
 * Each bucket uses 8 bytes.
 */
static u32 ip_idents_mask __read_mostly;
static atomic_t *ip_idents __read_mostly;
static u32 *ip_tstamps __read_mostly;

/* In order to protect privacy, we add a perturbation to identifiers
 * if one generator is seldom used. This makes hard for an attacker
 * to infer how many packets were sent between two points in time.
 */
static u32 ip_idents_reserve(u32 hash, int segs)
{}

void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
{}
EXPORT_SYMBOL();

static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
			     const struct sock *sk, const struct iphdr *iph,
			     int oif, __u8 tos, u8 prot, u32 mark,
			     int flow_flags)
{}

static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
			       const struct sock *sk)
{}

static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
{}

static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
				 const struct sk_buff *skb)
{}

static DEFINE_SPINLOCK(fnhe_lock);

static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
{}

static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash)
{}

static u32 fnhe_hashfun(__be32 daddr)
{}

static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
{}

static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
				  __be32 gw, u32 pmtu, bool lock,
				  unsigned long expires)
{}

static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
			     bool kill_route)
{}

static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
{}

static void ipv4_negative_advice(struct sock *sk,
				 struct dst_entry *dst)
{}

/*
 * Algorithm:
 *	1. The first ip_rt_redirect_number redirects are sent
 *	   with exponential backoff, then we stop sending them at all,
 *	   assuming that the host ignores our redirects.
 *	2. If we did not see packets requiring redirects
 *	   during ip_rt_redirect_silence, we assume that the host
 *	   forgot redirected route and start to send redirects again.
 *
 * This algorithm is much cheaper and more intelligent than dumb load limiting
 * in icmp.c.
 *
 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
 * and "frag. need" (breaks PMTU discovery) in icmp.c.
 */

void ip_rt_send_redirect(struct sk_buff *skb)
{}

static int ip_error(struct sk_buff *skb)
{}

static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{}

static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
			      struct sk_buff *skb, u32 mtu,
			      bool confirm_neigh)
{}

void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
		      int oif, u8 protocol)
{}
EXPORT_SYMBOL_GPL();

static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
{}

void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
{}
EXPORT_SYMBOL_GPL();

void ipv4_redirect(struct sk_buff *skb, struct net *net,
		   int oif, u8 protocol)
{}
EXPORT_SYMBOL_GPL();

void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
{}
EXPORT_SYMBOL_GPL();

INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst,
							 u32 cookie)
{}
EXPORT_INDIRECT_CALLABLE();

static void ipv4_send_dest_unreach(struct sk_buff *skb)
{}

static void ipv4_link_failure(struct sk_buff *skb)
{}

static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
{}

/*
 * We do not cache source address of outgoing interface,
 * because it is used only by IP RR, TS and SRR options,
 * so that it out of fast path.
 *
 * BTW remember: "addr" is allowed to be not aligned
 * in IP options!
 */

void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
{}

#ifdef CONFIG_IP_ROUTE_CLASSID
static void set_class_tag(struct rtable *rt, u32 tag)
{}
#endif

static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{}

INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst)
{}
EXPORT_INDIRECT_CALLABLE();

static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
{}

static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
					       __be32 daddr)
{}

/* MTU selection:
 * 1. mtu on route is locked - use it
 * 2. mtu from nexthop exception
 * 3. mtu from egress device
 */

u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
{}

static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
			      __be32 daddr, const bool do_cache)
{}

static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
{}

struct uncached_list {};

static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);

void rt_add_uncached_list(struct rtable *rt)
{}

void rt_del_uncached_list(struct rtable *rt)
{}

static void ipv4_dst_destroy(struct dst_entry *dst)
{}

void rt_flush_dev(struct net_device *dev)
{}

static bool rt_cache_valid(const struct rtable *rt)
{}

static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
			   const struct fib_result *res,
			   struct fib_nh_exception *fnhe,
			   struct fib_info *fi, u16 type, u32 itag,
			   const bool do_cache)
{}

struct rtable *rt_dst_alloc(struct net_device *dev,
			    unsigned int flags, u16 type,
			    bool noxfrm)
{}
EXPORT_SYMBOL();

struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
{}
EXPORT_SYMBOL();

/* called in rcu_read_lock() section */
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
			  u8 tos, struct net_device *dev,
			  struct in_device *in_dev, u32 *itag)
{}

/* called in rcu_read_lock() section */
static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
			     u8 tos, struct net_device *dev, int our)
{}


static void ip_handle_martian_source(struct net_device *dev,
				     struct in_device *in_dev,
				     struct sk_buff *skb,
				     __be32 daddr,
				     __be32 saddr)
{}

/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
			   const struct fib_result *res,
			   struct in_device *in_dev,
			   __be32 daddr, __be32 saddr, u32 tos)
{}

#ifdef CONFIG_IP_ROUTE_MULTIPATH
/* To make ICMP packets follow the right flow, the multipath hash is
 * calculated from the inner IP addresses.
 */
static void ip_multipath_l3_keys(const struct sk_buff *skb,
				 struct flow_keys *hash_keys)
{}

static u32 fib_multipath_custom_hash_outer(const struct net *net,
					   const struct sk_buff *skb,
					   bool *p_has_inner)
{}

static u32 fib_multipath_custom_hash_inner(const struct net *net,
					   const struct sk_buff *skb,
					   bool has_inner)
{}

static u32 fib_multipath_custom_hash_skb(const struct net *net,
					 const struct sk_buff *skb)
{}

static u32 fib_multipath_custom_hash_fl4(const struct net *net,
					 const struct flowi4 *fl4)
{}

/* if skb is set it will be used and fl4 can be NULL */
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
		       const struct sk_buff *skb, struct flow_keys *flkeys)
{}
#endif /* CONFIG_IP_ROUTE_MULTIPATH */

static int ip_mkroute_input(struct sk_buff *skb,
			    struct fib_result *res,
			    struct in_device *in_dev,
			    __be32 daddr, __be32 saddr, u32 tos,
			    struct flow_keys *hkeys)
{}

/* Implements all the saddr-related checks as ip_route_input_slow(),
 * assuming daddr is valid and the destination is not a local broadcast one.
 * Uses the provided hint instead of performing a route lookup.
 */
int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
		      u8 tos, struct net_device *dev,
		      const struct sk_buff *hint)
{}

/* get device for dst_alloc with local routes */
static struct net_device *ip_rt_get_dev(struct net *net,
					const struct fib_result *res)
{}

/*
 *	NOTE. We drop all the packets that has local source
 *	addresses, because every properly looped back packet
 *	must have correct destination already attached by output routine.
 *	Changes in the enforced policies must be applied also to
 *	ip_route_use_hint().
 *
 *	Such approach solves two big problems:
 *	1. Not simplex devices are handled properly.
 *	2. IP spoofing attempts are filtered with 100% of guarantee.
 *	called with rcu_read_lock()
 */

static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
			       u8 tos, struct net_device *dev,
			       struct fib_result *res)
{}

/* called with rcu_read_lock held */
static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
			      u8 tos, struct net_device *dev, struct fib_result *res)
{}

int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
			 u8 tos, struct net_device *dev)
{}
EXPORT_SYMBOL();

/* called with rcu_read_lock() */
static struct rtable *__mkroute_output(const struct fib_result *res,
				       const struct flowi4 *fl4, int orig_oif,
				       struct net_device *dev_out,
				       unsigned int flags)
{}

/*
 * Major route resolver routine.
 */

struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
					const struct sk_buff *skb)
{}
EXPORT_SYMBOL_GPL();

struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
					    struct fib_result *res,
					    const struct sk_buff *skb)
{}

static struct dst_ops ipv4_dst_blackhole_ops =;

struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{}

struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
				    const struct sock *sk)
{}
EXPORT_SYMBOL_GPL();

/* called with rcu_read_lock held */
static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
			struct rtable *rt, u32 table_id, dscp_t dscp,
			struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
			u32 seq, unsigned int flags)
{}

static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
			    struct netlink_callback *cb, u32 table_id,
			    struct fnhe_hash_bucket *bucket, int genid,
			    int *fa_index, int fa_start, unsigned int flags)
{}

int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
		       u32 table_id, struct fib_info *fi,
		       int *fa_index, int fa_start, unsigned int flags)
{}

static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
						   u8 ip_proto, __be16 sport,
						   __be16 dport)
{}

static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
				       const struct nlmsghdr *nlh,
				       struct nlattr **tb,
				       struct netlink_ext_ack *extack)
{}

static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
			     struct netlink_ext_ack *extack)
{}

void ip_rt_multicast_event(struct in_device *in_dev)
{}

#ifdef CONFIG_SYSCTL
static int ip_rt_gc_interval __read_mostly  =;
static int ip_rt_gc_min_interval __read_mostly	=;
static int ip_rt_gc_elasticity __read_mostly	=;
static int ip_min_valid_pmtu __read_mostly	=;

static int ipv4_sysctl_rtcache_flush(const struct ctl_table *__ctl, int write,
		void *buffer, size_t *lenp, loff_t *ppos)
{}

static struct ctl_table ipv4_route_table[] =;

static const char ipv4_route_flush_procname[] =;

static struct ctl_table ipv4_route_netns_table[] =;

static __net_init int sysctl_route_net_init(struct net *net)
{}

static __net_exit void sysctl_route_net_exit(struct net *net)
{}

static __net_initdata struct pernet_operations sysctl_route_ops =;
#endif

static __net_init int netns_ip_rt_init(struct net *net)
{}

static struct pernet_operations __net_initdata ip_rt_ops =;

static __net_init int rt_genid_init(struct net *net)
{}

static __net_initdata struct pernet_operations rt_genid_ops =;

static int __net_init ipv4_inetpeer_init(struct net *net)
{}

static void __net_exit ipv4_inetpeer_exit(struct net *net)
{}

static __net_initdata struct pernet_operations ipv4_inetpeer_ops =;

#ifdef CONFIG_IP_ROUTE_CLASSID
struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
#endif /* CONFIG_IP_ROUTE_CLASSID */

int __init ip_rt_init(void)
{}

#ifdef CONFIG_SYSCTL
/*
 * We really need to sanitize the damn ipv4 init order, then all
 * this nonsense will go away.
 */
void __init ip_static_sysctl_init(void)
{}
#endif