/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/module.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_socket.h>
#include <net/inet_sock.h>
#include <net/tcp.h>
struct nft_socket {
enum nft_socket_keys key:8;
u8 level; /* cgroupv2 level to extract */
u8 level_user; /* cgroupv2 level provided by userspace */
u8 len;
union {
u8 dreg;
};
};
static void nft_socket_wildcard(const struct nft_pktinfo *pkt,
struct nft_regs *regs, struct sock *sk,
u32 *dest)
{
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
nft_reg_store8(dest, inet_sk(sk)->inet_rcv_saddr == 0);
break;
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
case NFPROTO_IPV6:
nft_reg_store8(dest, ipv6_addr_any(&sk->sk_v6_rcv_saddr));
break;
#endif
default:
regs->verdict.code = NFT_BREAK;
return;
}
}
#ifdef CONFIG_SOCK_CGROUP_DATA
static noinline bool
nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level)
{
struct cgroup *cgrp;
u64 cgid;
if (!sk_fullsock(sk))
return false;
cgrp = cgroup_ancestor(sock_cgroup_ptr(&sk->sk_cgrp_data), level);
if (!cgrp)
return false;
cgid = cgroup_id(cgrp);
memcpy(dest, &cgid, sizeof(u64));
return true;
}
/* process context only, uses current->nsproxy. */
static noinline int nft_socket_cgroup_subtree_level(void)
{
struct cgroup *cgrp = cgroup_get_from_path("/");
int level;
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
level = cgrp->level;
cgroup_put(cgrp);
if (WARN_ON_ONCE(level > 255))
return -ERANGE;
if (WARN_ON_ONCE(level < 0))
return -EINVAL;
return level;
}
#endif
static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
{
const struct net_device *indev = nft_in(pkt);
const struct sk_buff *skb = pkt->skb;
struct sock *sk = NULL;
if (!indev)
return NULL;
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev);
break;
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
case NFPROTO_IPV6:
sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev);
break;
#endif
default:
WARN_ON_ONCE(1);
break;
}
return sk;
}
static void nft_socket_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_socket *priv = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
struct sock *sk = skb->sk;
u32 *dest = ®s->data[priv->dreg];
if (sk && !net_eq(nft_net(pkt), sock_net(sk)))
sk = NULL;
if (!sk)
sk = nft_socket_do_lookup(pkt);
if (!sk) {
regs->verdict.code = NFT_BREAK;
return;
}
switch(priv->key) {
case NFT_SOCKET_TRANSPARENT:
nft_reg_store8(dest, inet_sk_transparent(sk));
break;
case NFT_SOCKET_MARK:
if (sk_fullsock(sk)) {
*dest = READ_ONCE(sk->sk_mark);
} else {
regs->verdict.code = NFT_BREAK;
goto out_put_sk;
}
break;
case NFT_SOCKET_WILDCARD:
if (!sk_fullsock(sk)) {
regs->verdict.code = NFT_BREAK;
goto out_put_sk;
}
nft_socket_wildcard(pkt, regs, sk, dest);
break;
#ifdef CONFIG_SOCK_CGROUP_DATA
case NFT_SOCKET_CGROUPV2:
if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) {
regs->verdict.code = NFT_BREAK;
goto out_put_sk;
}
break;
#endif
default:
WARN_ON(1);
regs->verdict.code = NFT_BREAK;
}
out_put_sk:
if (sk != skb->sk)
sock_gen_put(sk);
}
static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = {
[NFTA_SOCKET_KEY] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_SOCKET_DREG] = { .type = NLA_U32 },
[NFTA_SOCKET_LEVEL] = NLA_POLICY_MAX(NLA_BE32, 255),
};
static int nft_socket_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_socket *priv = nft_expr_priv(expr);
unsigned int len;
if (!tb[NFTA_SOCKET_DREG] || !tb[NFTA_SOCKET_KEY])
return -EINVAL;
switch(ctx->family) {
case NFPROTO_IPV4:
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
case NFPROTO_IPV6:
#endif
case NFPROTO_INET:
break;
default:
return -EOPNOTSUPP;
}
priv->key = ntohl(nla_get_be32(tb[NFTA_SOCKET_KEY]));
switch(priv->key) {
case NFT_SOCKET_TRANSPARENT:
case NFT_SOCKET_WILDCARD:
len = sizeof(u8);
break;
case NFT_SOCKET_MARK:
len = sizeof(u32);
break;
#ifdef CONFIG_SOCK_CGROUP_DATA
case NFT_SOCKET_CGROUPV2: {
unsigned int level;
int err;
if (!tb[NFTA_SOCKET_LEVEL])
return -EINVAL;
level = ntohl(nla_get_be32(tb[NFTA_SOCKET_LEVEL]));
if (level > 255)
return -EOPNOTSUPP;
err = nft_socket_cgroup_subtree_level();
if (err < 0)
return err;
priv->level_user = level;
level += err;
/* Implies a giant cgroup tree */
if (WARN_ON_ONCE(level > 255))
return -EOPNOTSUPP;
priv->level = level;
len = sizeof(u64);
break;
}
#endif
default:
return -EOPNOTSUPP;
}
priv->len = len;
return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg,
NULL, NFT_DATA_VALUE, len);
}
static int nft_socket_dump(struct sk_buff *skb,
const struct nft_expr *expr, bool reset)
{
const struct nft_socket *priv = nft_expr_priv(expr);
if (nla_put_be32(skb, NFTA_SOCKET_KEY, htonl(priv->key)))
return -1;
if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
return -1;
if (priv->key == NFT_SOCKET_CGROUPV2 &&
nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user)))
return -1;
return 0;
}
static bool nft_socket_reduce(struct nft_regs_track *track,
const struct nft_expr *expr)
{
const struct nft_socket *priv = nft_expr_priv(expr);
const struct nft_socket *socket;
if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
nft_reg_track_update(track, expr, priv->dreg, priv->len);
return false;
}
socket = nft_expr_priv(track->regs[priv->dreg].selector);
if (priv->key != socket->key ||
priv->dreg != socket->dreg ||
priv->level != socket->level) {
nft_reg_track_update(track, expr, priv->dreg, priv->len);
return false;
}
if (!track->regs[priv->dreg].bitwise)
return true;
return nft_expr_reduce_bitwise(track, expr);
}
static int nft_socket_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
ctx->family != NFPROTO_INET)
return -EOPNOTSUPP;
return nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_LOCAL_OUT));
}
static struct nft_expr_type nft_socket_type;
static const struct nft_expr_ops nft_socket_ops = {
.type = &nft_socket_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_socket)),
.eval = nft_socket_eval,
.init = nft_socket_init,
.dump = nft_socket_dump,
.validate = nft_socket_validate,
.reduce = nft_socket_reduce,
};
static struct nft_expr_type nft_socket_type __read_mostly = {
.name = "socket",
.ops = &nft_socket_ops,
.policy = nft_socket_policy,
.maxattr = NFTA_SOCKET_MAX,
.owner = THIS_MODULE,
};
static int __init nft_socket_module_init(void)
{
return nft_register_expr(&nft_socket_type);
}
static void __exit nft_socket_module_exit(void)
{
nft_unregister_expr(&nft_socket_type);
}
module_init(nft_socket_module_init);
module_exit(nft_socket_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Máté Eckl");
MODULE_DESCRIPTION("nf_tables socket match module");
MODULE_ALIAS_NFT_EXPR("socket");