// SPDX-License-Identifier: GPL-2.0
#include <string.h>
#include <linux/tcp.h>
#include <linux/bpf.h>
#include <netinet/in.h>
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
int page_size = 0; /* userspace should set it */
#ifndef SOL_TCP
#define SOL_TCP IPPROTO_TCP
#endif
#define SOL_CUSTOM 0xdeadbeef
struct sockopt_sk {
__u8 val;
};
struct {
__uint(type, BPF_MAP_TYPE_SK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, struct sockopt_sk);
} socket_storage_map SEC(".maps");
SEC("cgroup/getsockopt")
int _getsockopt(struct bpf_sockopt *ctx)
{
__u8 *optval_end = ctx->optval_end;
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
struct bpf_sock *sk;
/* Bypass AF_NETLINK. */
sk = ctx->sk;
if (sk && sk->family == AF_NETLINK)
goto out;
/* Make sure bpf_get_netns_cookie is callable.
*/
if (bpf_get_netns_cookie(NULL) == 0)
return 0;
if (bpf_get_netns_cookie(ctx) == 0)
return 0;
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
goto out;
}
if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
/* Not interested in SOL_SOCKET:SO_SNDBUF;
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
goto out;
}
if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
/* Not interested in SOL_TCP:TCP_CONGESTION;
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
goto out;
}
if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
/* Verify that TCP_ZEROCOPY_RECEIVE triggers.
* It has a custom implementation for performance
* reasons.
*/
/* Check that optval contains address (__u64) */
if (optval + sizeof(__u64) > optval_end)
return 0; /* bounds check */
if (((struct tcp_zerocopy_receive *)optval)->address != 0)
return 0; /* unexpected data */
goto out;
}
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
if (optval + 1 > optval_end)
return 0; /* bounds check */
ctx->retval = 0; /* Reset system call return value to zero */
/* Always export 0x55 */
optval[0] = 0x55;
ctx->optlen = 1;
/* Userspace buffer is PAGE_SIZE * 2, but BPF
* program can only see the first PAGE_SIZE
* bytes of data.
*/
if (optval_end - optval != page_size)
return 0; /* unexpected data size */
return 1;
}
if (ctx->level != SOL_CUSTOM)
return 0; /* deny everything except custom level */
if (optval + 1 > optval_end)
return 0; /* bounds check */
storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE);
if (!storage)
return 0; /* couldn't get sk storage */
if (!ctx->retval)
return 0; /* kernel should not have handled
* SOL_CUSTOM, something is wrong!
*/
ctx->retval = 0; /* Reset system call return value to zero */
optval[0] = storage->val;
ctx->optlen = 1;
return 1;
out:
/* optval larger than PAGE_SIZE use kernel's buffer. */
if (ctx->optlen > page_size)
ctx->optlen = 0;
return 1;
}
SEC("cgroup/setsockopt")
int _setsockopt(struct bpf_sockopt *ctx)
{
__u8 *optval_end = ctx->optval_end;
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
struct bpf_sock *sk;
/* Bypass AF_NETLINK. */
sk = ctx->sk;
if (sk && sk->family == AF_NETLINK)
goto out;
/* Make sure bpf_get_netns_cookie is callable.
*/
if (bpf_get_netns_cookie(NULL) == 0)
return 0;
if (bpf_get_netns_cookie(ctx) == 0)
return 0;
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
return 1;
}
if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
/* Overwrite SO_SNDBUF value */
if (optval + sizeof(__u32) > optval_end)
return 0; /* bounds check */
*(__u32 *)optval = 0x55AA;
ctx->optlen = 4;
return 1;
}
if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
/* Always use cubic */
if (optval + 5 > optval_end)
return 0; /* bounds check */
memcpy(optval, "cubic", 5);
ctx->optlen = 5;
return 1;
}
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
/* Original optlen is larger than PAGE_SIZE. */
if (ctx->optlen != page_size * 2)
return 0; /* unexpected data size */
if (optval + 1 > optval_end)
return 0; /* bounds check */
/* Make sure we can trim the buffer. */
optval[0] = 0;
ctx->optlen = 1;
/* Usepace buffer is PAGE_SIZE * 2, but BPF
* program can only see the first PAGE_SIZE
* bytes of data.
*/
if (optval_end - optval != page_size)
return 0; /* unexpected data size */
return 1;
}
if (ctx->level != SOL_CUSTOM)
return 0; /* deny everything except custom level */
if (optval + 1 > optval_end)
return 0; /* bounds check */
storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE);
if (!storage)
return 0; /* couldn't get sk storage */
storage->val = optval[0];
ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
* setsockopt handler.
*/
return 1;
out:
/* optval larger than PAGE_SIZE use kernel's buffer. */
if (ctx->optlen > page_size)
ctx->optlen = 0;
return 1;
}