linux/samples/bpf/xdp_router_ipv4.bpf.c

/* Copyright (C) 2017 Cavium, Inc.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of version 2 of the GNU General Public License
 * as published by the Free Software Foundation.
 */

#include "vmlinux.h"
#include "xdp_sample.bpf.h"
#include "xdp_sample_shared.h"

#define ETH_ALEN	6
#define ETH_P_8021Q	0x8100
#define ETH_P_8021AD	0x88A8

struct trie_value {
	__u8 prefix[4];
	__be64 value;
	int ifindex;
	int metric;
	__be32 gw;
};

/* Key for lpm_trie */
union key_4 {
	u32 b32[2];
	u8 b8[8];
};

struct arp_entry {
	__be64 mac;
	__be32 dst;
};

struct direct_map {
	struct arp_entry arp;
	int ifindex;
	__be64 mac;
};

/* Map for trie implementation */
struct {
	__uint(type, BPF_MAP_TYPE_LPM_TRIE);
	__uint(key_size, 8);
	__uint(value_size, sizeof(struct trie_value));
	__uint(max_entries, 50);
	__uint(map_flags, BPF_F_NO_PREALLOC);
} lpm_map SEC(".maps");

/* Map for ARP table */
struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__type(key, __be32);
	__type(value, __be64);
	__uint(max_entries, 50);
} arp_table SEC(".maps");

/* Map to keep the exact match entries in the route table */
struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__type(key, __be32);
	__type(value, struct direct_map);
	__uint(max_entries, 50);
} exact_match SEC(".maps");

struct {
	__uint(type, BPF_MAP_TYPE_DEVMAP);
	__uint(key_size, sizeof(int));
	__uint(value_size, sizeof(int));
	__uint(max_entries, 100);
} tx_port SEC(".maps");

SEC("xdp")
int xdp_router_ipv4_prog(struct xdp_md *ctx)
{
	void *data_end = (void *)(long)ctx->data_end;
	void *data = (void *)(long)ctx->data;
	struct ethhdr *eth = data;
	u64 nh_off = sizeof(*eth);
	struct datarec *rec;
	__be16 h_proto;
	u32 key = 0;

	rec = bpf_map_lookup_elem(&rx_cnt, &key);
	if (rec)
		NO_TEAR_INC(rec->processed);

	if (data + nh_off > data_end)
		goto drop;

	h_proto = eth->h_proto;
	if (h_proto == bpf_htons(ETH_P_8021Q) ||
	    h_proto == bpf_htons(ETH_P_8021AD)) {
		struct vlan_hdr *vhdr;

		vhdr = data + nh_off;
		nh_off += sizeof(struct vlan_hdr);
		if (data + nh_off > data_end)
			goto drop;

		h_proto = vhdr->h_vlan_encapsulated_proto;
	}

	switch (bpf_ntohs(h_proto)) {
	case ETH_P_ARP:
		if (rec)
			NO_TEAR_INC(rec->xdp_pass);
		return XDP_PASS;
	case ETH_P_IP: {
		struct iphdr *iph = data + nh_off;
		struct direct_map *direct_entry;
		__be64 *dest_mac, *src_mac;
		int forward_to;

		if (iph + 1 > data_end)
			goto drop;

		direct_entry = bpf_map_lookup_elem(&exact_match, &iph->daddr);

		/* Check for exact match, this would give a faster lookup */
		if (direct_entry && direct_entry->mac &&
		    direct_entry->arp.mac) {
			src_mac = &direct_entry->mac;
			dest_mac = &direct_entry->arp.mac;
			forward_to = direct_entry->ifindex;
		} else {
			struct trie_value *prefix_value;
			union key_4 key4;

			/* Look up in the trie for lpm */
			key4.b32[0] = 32;
			key4.b8[4] = iph->daddr & 0xff;
			key4.b8[5] = (iph->daddr >> 8) & 0xff;
			key4.b8[6] = (iph->daddr >> 16) & 0xff;
			key4.b8[7] = (iph->daddr >> 24) & 0xff;

			prefix_value = bpf_map_lookup_elem(&lpm_map, &key4);
			if (!prefix_value)
				goto drop;

			forward_to = prefix_value->ifindex;
			src_mac = &prefix_value->value;
			if (!src_mac)
				goto drop;

			dest_mac = bpf_map_lookup_elem(&arp_table, &iph->daddr);
			if (!dest_mac) {
				if (!prefix_value->gw)
					goto drop;

				dest_mac = bpf_map_lookup_elem(&arp_table,
							       &prefix_value->gw);
				if (!dest_mac) {
					/* Forward the packet to the kernel in
					 * order to trigger ARP discovery for
					 * the default gw.
					 */
					if (rec)
						NO_TEAR_INC(rec->xdp_pass);
					return XDP_PASS;
				}
			}
		}

		if (src_mac && dest_mac) {
			int ret;

			__builtin_memcpy(eth->h_dest, dest_mac, ETH_ALEN);
			__builtin_memcpy(eth->h_source, src_mac, ETH_ALEN);

			ret = bpf_redirect_map(&tx_port, forward_to, 0);
			if (ret == XDP_REDIRECT) {
				if (rec)
					NO_TEAR_INC(rec->xdp_redirect);
				return ret;
			}
		}
	}
	default:
		break;
	}
drop:
	if (rec)
		NO_TEAR_INC(rec->xdp_drop);

	return XDP_DROP;
}

char _license[] SEC("license") = "GPL";