blob: 25e35d480a88392f5a3d85ba8ef4c264513bb133 [file] [log] [blame]
/*
* Copyright (C) 2018 Felix Fietkau <nbd@nbd.name>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/netfilter.h>
#include <linux/netfilter/xt_FLOWOFFLOAD.h>
#include <net/ip.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_flow_table.h>
static struct nf_flowtable nf_flowtable;
static HLIST_HEAD(hooks);
static DEFINE_SPINLOCK(hooks_lock);
static struct delayed_work hook_work;
struct xt_flowoffload_hook {
struct hlist_node list;
struct nf_hook_ops ops;
struct net *net;
bool registered;
bool used;
};
static unsigned int
xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
switch (skb->protocol) {
case htons(ETH_P_IP):
return nf_flow_offload_ip_hook(priv, skb, state);
case htons(ETH_P_IPV6):
return nf_flow_offload_ipv6_hook(priv, skb, state);
}
return NF_ACCEPT;
}
int nf_flow_table_iterate(struct nf_flowtable *flow_table,
void (*iter)(struct flow_offload *flow, void *data),
void *data);
static int
xt_flowoffload_create_hook(struct net_device *dev)
{
struct xt_flowoffload_hook *hook;
struct nf_hook_ops *ops;
hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
if (!hook)
return -ENOMEM;
ops = &hook->ops;
ops->pf = NFPROTO_NETDEV;
ops->hooknum = NF_NETDEV_INGRESS;
ops->priority = 10;
ops->priv = &nf_flowtable;
ops->hook = xt_flowoffload_net_hook;
ops->dev = dev;
hlist_add_head(&hook->list, &hooks);
mod_delayed_work(system_power_efficient_wq, &hook_work, 0);
return 0;
}
static struct xt_flowoffload_hook *
flow_offload_lookup_hook(struct net_device *dev)
{
struct xt_flowoffload_hook *hook;
hlist_for_each_entry(hook, &hooks, list) {
if (hook->ops.dev == dev)
return hook;
}
return NULL;
}
static void
xt_flowoffload_check_device(struct net_device *dev)
{
struct xt_flowoffload_hook *hook;
spin_lock_bh(&hooks_lock);
hook = flow_offload_lookup_hook(dev);
if (hook)
hook->used = true;
else
xt_flowoffload_create_hook(dev);
spin_unlock_bh(&hooks_lock);
}
static void
xt_flowoffload_register_hooks(void)
{
struct xt_flowoffload_hook *hook;
restart:
hlist_for_each_entry(hook, &hooks, list) {
if (hook->registered)
continue;
hook->registered = true;
hook->net = dev_net(hook->ops.dev);
spin_unlock_bh(&hooks_lock);
nf_register_net_hook(hook->net, &hook->ops);
spin_lock_bh(&hooks_lock);
goto restart;
}
}
static void
xt_flowoffload_cleanup_hooks(void)
{
struct xt_flowoffload_hook *hook;
restart:
hlist_for_each_entry(hook, &hooks, list) {
if (hook->used || !hook->registered)
continue;
hlist_del(&hook->list);
spin_unlock_bh(&hooks_lock);
nf_unregister_net_hook(hook->net, &hook->ops);
kfree(hook);
spin_lock_bh(&hooks_lock);
goto restart;
}
}
static void
xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
{
struct flow_offload_tuple *tuple = &flow->tuplehash[0].tuple;
struct xt_flowoffload_hook *hook;
bool *found = data;
struct rtable *rt = (struct rtable *)tuple->dst_cache;
spin_lock_bh(&hooks_lock);
hlist_for_each_entry(hook, &hooks, list) {
if (hook->ops.dev->ifindex != tuple->iifidx &&
hook->ops.dev->ifindex != rt->dst.dev->ifindex)
continue;
hook->used = true;
*found = true;
}
spin_unlock_bh(&hooks_lock);
}
static void
xt_flowoffload_hook_work(struct work_struct *work)
{
struct xt_flowoffload_hook *hook;
bool found = false;
int err;
spin_lock_bh(&hooks_lock);
xt_flowoffload_register_hooks();
hlist_for_each_entry(hook, &hooks, list)
hook->used = false;
spin_unlock_bh(&hooks_lock);
err = nf_flow_table_iterate(&nf_flowtable, xt_flowoffload_check_hook,
&found);
if (err && err != -EAGAIN)
goto out;
spin_lock_bh(&hooks_lock);
xt_flowoffload_cleanup_hooks();
spin_unlock_bh(&hooks_lock);
out:
if (found)
queue_delayed_work(system_power_efficient_wq, &hook_work, HZ);
}
static bool
xt_flowoffload_skip(struct sk_buff *skb, int family)
{
if (skb_sec_path(skb))
return true;
if (family == NFPROTO_IPV4) {
const struct ip_options *opt = &(IPCB(skb)->opt);
if (unlikely(opt->optlen))
return true;
}
return false;
}
static struct dst_entry *
xt_flowoffload_dst(const struct nf_conn *ct, enum ip_conntrack_dir dir,
const struct xt_action_param *par, int ifindex)
{
struct dst_entry *dst = NULL;
struct flowi fl;
memset(&fl, 0, sizeof(fl));
switch (xt_family(par)) {
case NFPROTO_IPV4:
fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
fl.u.ip4.flowi4_oif = ifindex;
break;
case NFPROTO_IPV6:
fl.u.ip6.saddr = ct->tuplehash[dir].tuple.dst.u3.in6;
fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
fl.u.ip6.flowi6_oif = ifindex;
break;
}
nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
return dst;
}
static int
xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct,
const struct xt_action_param *par,
struct nf_flow_route *route, enum ip_conntrack_dir dir)
{
struct dst_entry *this_dst, *other_dst;
this_dst = xt_flowoffload_dst(ct, !dir, par, xt_out(par)->ifindex);
other_dst = xt_flowoffload_dst(ct, dir, par, xt_in(par)->ifindex);
route->tuple[dir].dst = this_dst;
route->tuple[!dir].dst = other_dst;
if (!this_dst || !other_dst)
return -ENOENT;
if (dst_xfrm(this_dst) || dst_xfrm(other_dst))
return -EINVAL;
return 0;
}
static unsigned int
flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_flowoffload_target_info *info = par->targinfo;
struct tcphdr _tcph, *tcph = NULL;
enum ip_conntrack_info ctinfo;
enum ip_conntrack_dir dir;
struct nf_flow_route route;
struct flow_offload *flow = NULL;
struct nf_conn *ct;
struct net *net;
if (xt_flowoffload_skip(skb, xt_family(par)))
return XT_CONTINUE;
ct = nf_ct_get(skb, &ctinfo);
if (ct == NULL)
return XT_CONTINUE;
switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
case IPPROTO_TCP:
if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
return XT_CONTINUE;
tcph = skb_header_pointer(skb, par->thoff,
sizeof(_tcph), &_tcph);
if (unlikely(!tcph || tcph->fin || tcph->rst))
return XT_CONTINUE;
break;
case IPPROTO_UDP:
break;
default:
return XT_CONTINUE;
}
if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
ct->status & IPS_SEQ_ADJUST)
return XT_CONTINUE;
if (!nf_ct_is_confirmed(ct))
return XT_CONTINUE;
if (!xt_in(par) || !xt_out(par))
return XT_CONTINUE;
if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
return XT_CONTINUE;
dir = CTINFO2DIR(ctinfo);
if (xt_flowoffload_route(skb, ct, par, &route, dir) == 0)
flow = flow_offload_alloc(ct, &route);
dst_release(route.tuple[dir].dst);
dst_release(route.tuple[!dir].dst);
if (!flow)
goto err_flow_route;
if (tcph) {
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
if (flow_offload_add(&nf_flowtable, flow) < 0)
goto err_flow_add;
xt_flowoffload_check_device(xt_in(par));
xt_flowoffload_check_device(xt_out(par));
net = read_pnet(&nf_flowtable.ft_net);
if (!net)
write_pnet(&nf_flowtable.ft_net, xt_net(par));
if (info->flags & XT_FLOWOFFLOAD_HW)
nf_flow_offload_hw_add(xt_net(par), flow, ct);
return XT_CONTINUE;
err_flow_add:
flow_offload_free(flow);
err_flow_route:
clear_bit(IPS_OFFLOAD_BIT, &ct->status);
return XT_CONTINUE;
}
static int flowoffload_chk(const struct xt_tgchk_param *par)
{
struct xt_flowoffload_target_info *info = par->targinfo;
if (info->flags & ~XT_FLOWOFFLOAD_MASK)
return -EINVAL;
return 0;
}
static struct xt_target offload_tg_reg __read_mostly = {
.family = NFPROTO_UNSPEC,
.name = "FLOWOFFLOAD",
.revision = 0,
.targetsize = sizeof(struct xt_flowoffload_target_info),
.usersize = sizeof(struct xt_flowoffload_target_info),
.checkentry = flowoffload_chk,
.target = flowoffload_tg,
.me = THIS_MODULE,
};
static int xt_flowoffload_table_init(struct nf_flowtable *table)
{
table->flags = NF_FLOWTABLE_F_HW;
nf_flow_table_init(table);
return 0;
}
static void xt_flowoffload_table_cleanup(struct nf_flowtable *table)
{
nf_flow_table_free(table);
}
static int flow_offload_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct xt_flowoffload_hook *hook = NULL;
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
spin_lock_bh(&hooks_lock);
hook = flow_offload_lookup_hook(dev);
if (hook) {
hlist_del(&hook->list);
}
spin_unlock_bh(&hooks_lock);
if (hook) {
nf_unregister_net_hook(hook->net, &hook->ops);
kfree(hook);
}
nf_flow_table_cleanup(dev);
return NOTIFY_DONE;
}
static struct notifier_block flow_offload_netdev_notifier = {
.notifier_call = flow_offload_netdev_event,
};
static int __init xt_flowoffload_tg_init(void)
{
int ret;
register_netdevice_notifier(&flow_offload_netdev_notifier);
INIT_DELAYED_WORK(&hook_work, xt_flowoffload_hook_work);
ret = xt_flowoffload_table_init(&nf_flowtable);
if (ret)
return ret;
ret = xt_register_target(&offload_tg_reg);
if (ret)
xt_flowoffload_table_cleanup(&nf_flowtable);
return ret;
}
static void __exit xt_flowoffload_tg_exit(void)
{
xt_unregister_target(&offload_tg_reg);
xt_flowoffload_table_cleanup(&nf_flowtable);
unregister_netdevice_notifier(&flow_offload_netdev_notifier);
}
MODULE_LICENSE("GPL");
module_init(xt_flowoffload_tg_init);
module_exit(xt_flowoffload_tg_exit);