blob: 321c78b4bc5735ff5c97e743425387e829c63a71 [file] [log] [blame] [edit]
/*
* Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <linux/version.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <net/xfrm.h>
#include <net/protocol.h>
#include <net/ip6_route.h>
#include <linux/inetdevice.h>
#include <linux/debugfs.h>
#include <linux/netfilter.h>
#include <crypto/rng.h>
#include <crypto/aes.h>
#include <crypto/gcm.h>
#include <crypto/ghash.h>
#include "sfe_xfrm.h"
/*
* Structure to store necessary fields during xfrm state walk.
*/
struct sfe_xfrm_iter_data {
struct net_device *ndev;
ssize_t max_len;
char *buf;
};
/*
* Structure to map crypto init function to xfrm state algo.
*/
struct sfe_xfrm_algo {
const char *algo_name; /* Crypto algorithm name */
int (*crypto_init)(struct sfe_xfrm_sa *sa, struct xfrm_state *xs, bool is_decap);
/* pointer to crypto init */
};
static int sfe_xfrm_crypto_init_gcm(struct sfe_xfrm_sa *sa, struct xfrm_state *xs, bool is_decap);
static void sfe_xfrm_dev_final(struct kref *kref);
static struct sfe_xfrm_algo xfrm_algo[] = {
{.algo_name = "rfc4106(gcm(aes))", .crypto_init = sfe_xfrm_crypto_init_gcm},
};
/*
* Original ESP protocol handlers
*/
static const struct net_protocol *linux_esp_handler;
/*
* sfe_xfrm_genkey()
*/
static int sfe_xfrm_genkey(struct crypto_sync_skcipher *etfm, uint8_t *key, uint16_t len)
{
uint8_t iv[AES_BLOCK_SIZE] = {0};
struct scatterlist sg[1];
int ret;
/*
* TODO: Move this to heap
*/
SYNC_SKCIPHER_REQUEST_ON_STACK(ereq, etfm);
sg_init_one(sg, key, len);
skcipher_request_set_sync_tfm(ereq, etfm);
skcipher_request_set_callback(ereq, 0, NULL, NULL);
skcipher_request_set_crypt(ereq, sg, sg, len, iv);
ret = crypto_skcipher_encrypt(ereq);
if (ret) {
pr_debug("%px: Failed to encrypt; err(%d)\n", etfm, ret);
return ret;
}
return 0;
}
/*
* sfe_xfrm_crypto_init_gcm()
* Crypto init func for GCM.
*/
static int sfe_xfrm_crypto_init_gcm(struct sfe_xfrm_sa *sa, struct xfrm_state *xs, bool is_decap)
{
uint8_t authkey[AES_BLOCK_SIZE] = {0};
uint8_t *cipher_key;
unsigned int cipher_keylen;
unsigned int key_len = 0;
int err;
key_len = ALIGN(xs->aead->alg_key_len, BITS_PER_BYTE) / BITS_PER_BYTE;
/*
* Cipher key
*/
cipher_keylen = key_len - 4; /* Subtract nonce */
cipher_key = xs->aead->alg_key;
/*
* Allocate the cipher context
*/
sa->enc_tfm = crypto_alloc_sync_skcipher("ctr(aes)", 0, 0);
if (IS_ERR(sa->enc_tfm)) {
pr_err("%px: Error allocating tfm for skcipher: ctr(aes)\n", sa);
return PTR_ERR(sa->enc_tfm);
}
/*
* Setup cipher keys
*/
err = crypto_sync_skcipher_setkey(sa->enc_tfm, cipher_key, cipher_keylen);
if (err) {
pr_err("%px: Failed to set the key for skcipher: ctr(aes)\n", sa);
goto fail2;
}
if (is_decap) {
struct sfe_xfrm_sa_state_dec *dec = &sa->state.dec;
dec->icv_len = xs->aead->alg_icv_len / BITS_PER_BYTE;
dec->iv_len = GCM_RFC4106_IV_SIZE;
memcpy(&dec->nonce, cipher_key + cipher_keylen, 4);
dec->auth_decrypt = sfe_xfrm_auth_decrypt_gcm;
} else {
struct sfe_xfrm_sa_state_enc *enc = &sa->state.enc;
enc->icv_len = xs->aead->alg_icv_len / BITS_PER_BYTE;
enc->iv_len = GCM_RFC4106_IV_SIZE;
enc->blk_len = ALIGN(crypto_sync_skcipher_blocksize(sa->enc_tfm), 4);
memcpy(&enc->nonce, cipher_key + cipher_keylen, 4);
get_random_bytes(&enc->salt, sizeof(enc->salt));
get_random_bytes(&enc->iv_seq, sizeof(enc->iv_seq));
enc->mtu_overhead += SFE_XFRM_MAX_ESP_GCM_OVERHEAD;
enc->encrypt_auth = sfe_xfrm_encrypt_auth_gcm;
}
/*
* Generate authentication key
*/
err = sfe_xfrm_genkey(sa->enc_tfm, authkey, sizeof(authkey));
if (err) {
pr_warn("%px: Failed to generate authentication key for GCM\n", sa);
goto fail2;
}
/*
* Allocate authentication context.
* Use GHASH CE Driver.
*/
sa->auth_tfm = crypto_alloc_shash("__driver-ghash-ce", CRYPTO_ALG_INTERNAL, CRYPTO_ALG_INTERNAL);
if (IS_ERR(sa->auth_tfm)) {
pr_err("%px: Error allocating tfm for shash: ghash\n", sa);
err = PTR_ERR(sa->auth_tfm);
goto fail2;
}
/*
* Setup authentication key
*/
err = crypto_shash_setkey(sa->auth_tfm, authkey, sizeof(authkey));
if (err) {
pr_err("%px: Failed to set the key for auth: ghash\n", sa);
goto fail1;
}
/*
* Reset the memory that was allocated for confidentiality
*/
memzero_explicit(authkey, sizeof(authkey));
memzero_explicit(cipher_key, cipher_keylen);
pr_info("skcipher driver name: %s\n", crypto_tfm_alg_driver_name(crypto_skcipher_tfm((struct crypto_skcipher *)sa->enc_tfm)));
pr_info("shash driver name: %s\n", crypto_tfm_alg_driver_name(crypto_shash_tfm(sa->auth_tfm)));
return 0;
fail1:
crypto_free_shash(sa->auth_tfm);
fail2:
crypto_free_sync_skcipher(sa->enc_tfm);
return err;
}
/*
* sfe_xfrm_open_ndev()
* Netdevice open handler.
*/
static int sfe_xfrm_open_ndev(struct net_device *ndev)
{
netif_start_queue(ndev);
return 0;
}
/*
* sfe_xfrm_stop_ndev()
* Netdevice stop handler.
*/
static int sfe_xfrm_stop_ndev(struct net_device *ndev)
{
netif_stop_queue(ndev);
return 0;
}
/*
* sfe_xfrm_get_dev_stats()
* Fetch all the device statistics.
*/
static void sfe_xfrm_get_dev_stats(struct sfe_xfrm_dev *dev, struct sfe_xfrm_dev_stats *stats)
{
int cpu;
int i;
/*
* All statistics are 64bit. So we can just iterate by words.
*/
for_each_possible_cpu(cpu) {
const struct sfe_xfrm_dev_stats *sp = per_cpu_ptr(dev->stats_pcpu, cpu);
uint64_t *stats_ptr = (uint64_t *)stats;
uint64_t *sp_ptr = (uint64_t *)sp;
for (i = 0; i < SFE_XFRM_DEV_STATS_DWORDS; i++, stats_ptr++, sp_ptr++) {
*stats_ptr += *sp_ptr;
}
}
}
/*
* sfe_xfrm_get_sa_stats()
* Fetch all the SA statistics.
*/
static void sfe_xfrm_get_sa_stats(struct sfe_xfrm_sa *sa, struct sfe_xfrm_sa_stats *stats)
{
int cpu;
int i;
/*
* All statistics are 64bit. So we can just iterate by words.
*/
for_each_possible_cpu(cpu) {
const struct sfe_xfrm_sa_stats *sp = per_cpu_ptr(sa->stats_pcpu, cpu);
uint64_t *stats_ptr = (uint64_t *)stats;
uint64_t *sp_ptr = (uint64_t *)sp;
for (i = 0; i < SFE_XFRM_SA_STATS_DWORDS; i++, stats_ptr++, sp_ptr++) {
*stats_ptr += *sp_ptr;
}
}
}
/*
* sfe_xfrm_dump_sa_stats()
* Print the SA statistics.
*/
static int sfe_xfrm_dump_sa_stats(struct xfrm_state *xs, int count, void *ptr)
{
struct sfe_xfrm_iter_data *iter = (struct sfe_xfrm_iter_data *)ptr;
struct sfe_xfrm_sa_stats stats = {0};
struct sfe_xfrm_sa *sa;
ssize_t len = 0;
bool is_encap;
char *hdr;
/*
* Non NSS offloaded SA
*/
if (!(xs->xflags & XFRM_STATE_OFFLOAD_NSS))
return 0;
/*
* SA belongs to a different NETDEVICE
*/
if (xs->offload_dev != iter->ndev)
return 0;
BUG_ON(xs->props.family == AF_INET6);
sa = xs->data;
/*
* We need to first fetch the stats values
* from the SA object.
*/
sfe_xfrm_get_sa_stats(sa, &stats);
is_encap = (sa->state.flags & SFE_XFRM_SA_FLAG_ENC);
hdr = is_encap ? "Encap SA" : "Decap SA";
len += snprintf(iter->buf + len, iter->max_len - len,
"%s: (src:%pI4n dst:%pI4n spi:0x%X sport:%u dport:%u flags:0x%x)\n", hdr,
sa->hdr.src_ip, sa->hdr.dst_ip, htonl(sa->hdr.spi), htons(sa->hdr.sport),
htons(sa->hdr.dport), sa->state.flags);
len += snprintf(iter->buf + len, iter->max_len - len, "\tTx packets: %llu\n", stats.tx_pkts);
len += snprintf(iter->buf + len, iter->max_len - len, "\tTx bytes: %llu\n", stats.tx_bytes);
len += snprintf(iter->buf + len, iter->max_len - len, "\tRx packets: %llu\n", stats.rx_pkts);
len += snprintf(iter->buf + len, iter->max_len - len, "\tRx bytes: %llu\n", stats.rx_bytes);
len += snprintf(iter->buf + len, iter->max_len - len, "\tDst cache error: %llu\n", stats.fail_dst_cache);
len += snprintf(iter->buf + len, iter->max_len - len, "\tEnqueue error: %llu\n", stats.fail_enqueue);
len += snprintf(iter->buf + len, iter->max_len - len, "\tTransformation error: %llu\n", stats.fail_transform);
len += snprintf(iter->buf + len, iter->max_len - len, "\tRoute error: %llu\n", stats.fail_route);
len += snprintf(iter->buf + len, iter->max_len - len, "\tSP allocation error: %llu\n", stats.fail_sp_alloc);
len += snprintf(iter->buf + len, iter->max_len - len, "\tSequence error: %llu\n", stats.fail_seq);
iter->buf += len;
iter->max_len -= len;
/*
* Stop processing if the available length is zero.
*/
return !iter->max_len;
}
/*
* sfe_xfrm_dump_dev_stats()
* Print the Device statistics.
*/
static ssize_t sfe_xfrm_dump_dev_stats(struct sfe_xfrm_dev_stats *stats, char *buf, ssize_t max_len)
{
ssize_t len = 0;
len = snprintf(buf, max_len, "SA Alloc: %llu\n", stats->sa_alloc);
len += snprintf(buf + len, max_len - len, "SA Free: %llu\n", stats->sa_free);
len += snprintf(buf + len, max_len - len, "Device Encapsulation Statistics:\n");
len += snprintf(buf + len, max_len - len, "\tTx packets: %llu\n", stats->tx_pkts);
len += snprintf(buf + len, max_len - len, "\tTx bytes: %llu\n", stats->tx_bytes);
len += snprintf(buf + len, max_len - len, "\tTx linearize: %llu\n", stats->tx_linearize);
len += snprintf(buf + len, max_len - len, "\tTx Fail: %llu\n", stats->tx_fail);
len += snprintf(buf + len, max_len - len, "\tTx Fail SA: %llu\n", stats->tx_fail_sa);
len += snprintf(buf + len, max_len - len, "\tTx Fail shared: %llu\n", stats->tx_fail_shared);
len += snprintf(buf + len, max_len - len, "\tTx Fail headroom: %llu\n", stats->tx_fail_hroom);
len += snprintf(buf + len, max_len - len, "\tTx Fail tailroom: %llu\n", stats->tx_fail_troom);
len += snprintf(buf + len, max_len - len, "\tTx Fail Linearize: %llu\n", stats->tx_fail_linearize);
len += snprintf(buf + len, max_len - len, "Device Decapsulation Statistics:\n");
len += snprintf(buf + len, max_len - len, "\tRx packets: %llu\n", stats->rx_pkts);
len += snprintf(buf + len, max_len - len, "\tRx bytes: %llu\n", stats->rx_bytes);
len += snprintf(buf + len, max_len - len, "\tRx linearize: %llu\n", stats->rx_linearize);
len += snprintf(buf + len, max_len - len, "\tRx dummy: %llu\n", stats->rx_dummy);
len += snprintf(buf + len, max_len - len, "\tRx Fail: %llu\n", stats->rx_fail);
len += snprintf(buf + len, max_len - len, "\tRx Fail SA: %llu\n", stats->rx_fail_sa);
len += snprintf(buf + len, max_len - len, "\tRx Fail Linearize: %llu\n", stats->rx_fail_linearize);
return len;
}
/*
* sfe_xfrm_dump_all_stats()
* Read all device and SA statistics.
*/
static ssize_t sfe_xfrm_dump_all_stats(struct file *fp, char __user *ubuf, size_t sz, loff_t *ppos)
{
struct sfe_xfrm_dev *dev = fp->private_data;
struct sfe_xfrm_dev_stats dev_stats = {0};
struct sfe_xfrm_iter_data iter = {0};
struct xfrm_state_walk walk;
int32_t sa_count;
ssize_t len = 0;
ssize_t max_len;
char *buf;
/*
* Fetch the stats values from the device object.
*/
sfe_xfrm_get_dev_stats(dev, &dev_stats);
sa_count = dev_stats.sa_alloc - dev_stats.sa_free;
if (sa_count < 0)
sa_count = 0;
/*
* Calculate required string buffer for stats.
*/
max_len = SFE_XFRM_DEV_STATS_DWORDS * SFE_XFRM_MAX_STR_LEN; /* Members */
max_len += SFE_XFRM_MAX_STR_LEN; /* Encap heading */
max_len += SFE_XFRM_MAX_STR_LEN; /* Decap heading */
max_len += SFE_XFRM_MAX_STR_LEN * sa_count; /* SA header */
max_len += SFE_XFRM_SA_STATS_DWORDS * SFE_XFRM_MAX_STR_LEN * sa_count ; /* SA Members */
/*
* Allocate the buffer.
*/
buf = vzalloc(max_len);
if (!buf) {
pr_warn("%px: failed to allocate print buffer (%zu)", dev, max_len);
return 0;
}
/*
* Print the device statistics.
*/
len += sfe_xfrm_dump_dev_stats(&dev_stats, buf, max_len);
/*
* No active SA.
*/
if (!sa_count) {
goto done;
}
/*
* Initialize the walk object for ESP xfrm state.
*/
xfrm_state_walk_init(&walk, IPPROTO_ESP, NULL);
/*
* We need the below fields to selectively
* print the necessary SA stats.
*
* Since there is no direct way to pass these fields to the
* (sfe_xfrm_dump_sa_stats) callback, we pass this as a pointer
* (iter).
*/
iter.ndev = dev->ndev;
iter.buf = buf + len;
iter.max_len = max_len - len;
xfrm_state_walk(&init_net, &walk, sfe_xfrm_dump_sa_stats, &iter);
xfrm_state_walk_done(&walk, &init_net);
len = iter.buf - buf;
done:
len = simple_read_from_buffer(ubuf, sz, ppos, buf, len);
vfree(buf);
return len;
}
/*
* sfe_xfrm_get_rtnl_stats()
* Handler to fetch netdevice rtnl statistics.
*/
static void sfe_xfrm_get_rtnl_stats(struct net_device *ndev, struct rtnl_link_stats64 *rtnl_stats)
{
struct sfe_xfrm_dev *dev = netdev_priv(ndev);
struct sfe_xfrm_dev_stats stats = {0};
memset(rtnl_stats, 0, sizeof(*rtnl_stats));
sfe_xfrm_get_dev_stats(dev, &stats);
rtnl_stats->tx_packets = stats.tx_pkts;
rtnl_stats->tx_bytes = stats.tx_bytes;
rtnl_stats->tx_dropped = stats.tx_fail;
rtnl_stats->rx_packets = stats.rx_pkts;
rtnl_stats->rx_bytes = stats.rx_bytes;
rtnl_stats->rx_dropped = stats.rx_fail;
}
/*
* sfe_xfrm_mtu_set()
* Update device MTU.
*/
static int sfe_xfrm_mtu_set(struct net_device *ndev, int mtu)
{
ndev->mtu = mtu;
return 0;
}
/*
* sfe_xfrm_xmit()
* This is called for IPv4/v6 pakcets that are to be transformed.
*/
static int sfe_xfrm_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return dev_queue_xmit(skb);
}
/*
* sfe_xfrm_v4_output()
* Called for IPv4 Plain text packets submitted for IPSec transformation.
*/
static int sfe_xfrm_v4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct sfe_xfrm_sa_state_enc *enc;
struct sfe_xfrm_sa *sa;
struct xfrm_state *xs;
bool expand_skb;
/*
* No xfrm_state associated; Drop
*/
xs = skb_dst(skb)->xfrm;
if (!xs) {
pr_warn("%px: Failed to offload; No xfrm_state associated: drop\n", skb);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
goto drop;
}
/*
* Only process packets for XFRM state managed by IPsec offload
*/
if (!(xs->xflags & XFRM_STATE_OFFLOAD_NSS)) {
pr_debug("%px: state is not offloaded; xfrm_state %p :drop\n", skb, xs);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID);
goto drop;
}
/*
* Unshare the SKB as we will be modifying it.
*/
if (unlikely(skb_shared(skb))) {
skb = skb_unshare(skb, GFP_NOWAIT | __GFP_NOWARN);
if (!skb) {
goto drop;
}
}
skb->dev = xs->offload_dev;
sa = xs->data;
enc = &sa->state.enc;
/*
* Expand the SKB if needed.
*/
expand_skb = (skb_headroom(skb) < enc->head_room) || (skb_tailroom(skb) < enc->tail_room);
if (expand_skb && pskb_expand_head(skb, enc->head_room, enc->tail_room, GFP_NOWAIT | __GFP_NOWARN)) {
pr_debug("%px: Failed to expand SKB head(%u) or tail(%u)\n", skb, skb_headroom(skb), skb_tailroom(skb));
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
goto drop;
}
/*
* Call the Post routing hooks.
*/
return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, NULL, skb_dst(skb)->dev, sfe_xfrm_xmit);
drop:
dev_kfree_skb_any(skb);
return -EINVAL;
}
/*
* sfe_xfrm_dev_free()
* Free netdevice memory.
*/
static void sfe_xfrm_dev_free(struct net_device *ndev)
{
struct sfe_xfrm_dev *dev = netdev_priv(ndev);
/*
* There should be no active references at this point.
*/
WARN_ON(kref_read(&dev->ref));
free_percpu(dev->stats_pcpu);
debugfs_remove_recursive(dev->dentry);
free_netdev(ndev);
pr_info("%px: IPsec device freed\n", ndev);
}
/*
* IPsec device callbacks.
*/
static const struct net_device_ops xfrm_dev_ops = {
.ndo_open = sfe_xfrm_open_ndev,
.ndo_stop = sfe_xfrm_stop_ndev,
.ndo_start_xmit = sfe_xfrm_enc,
.ndo_get_stats64 = sfe_xfrm_get_rtnl_stats,
.ndo_change_mtu = sfe_xfrm_mtu_set,
};
/*
* sfe_xfrm_dev_setup()
* Setup ipsec connection device.
*/
static void sfe_xfrm_dev_setup(struct net_device *ndev)
{
ndev->addr_len = ETH_ALEN;
ndev->mtu = ETH_DATA_LEN - SFE_XFRM_DEV_MAX_HEADROOM;
ndev->hard_header_len = SFE_XFRM_DEV_MAX_HEADROOM;
ndev->needed_headroom = SFE_XFRM_DEV_MAX_HEADROOM;
ndev->needed_tailroom = SFE_XFRM_DEV_MAX_TAILROOM;
ndev->type = SFE_XFRM_DEV_ARPHRD;
ndev->ethtool_ops = NULL;
ndev->header_ops = NULL;
ndev->netdev_ops = &xfrm_dev_ops;
ndev->priv_destructor = sfe_xfrm_dev_free;
/*
* Assign random ethernet address.
*/
random_ether_addr(ndev->dev_addr);
memset(ndev->broadcast, 0xff, ndev->addr_len);
memcpy(ndev->perm_addr, ndev->dev_addr, ndev->addr_len);
}
/*
* sfe_xfrm_udp_override()
*/
static void sfe_xfrm_udp_override(struct sfe_xfrm_sa *sa)
{
struct sfe_xfrm *g_xfrm = &g_sfe_xfrm;
uint32_t src_ip, dst_ip;
struct udp_sock *up;
struct sock *sk;
/*
* Only ESP-over-UDP & decapsulation flow will be processed
*/
if (!(sa->state.flags & SFE_XFRM_SA_FLAG_UDP) || (sa->state.flags & SFE_XFRM_SA_FLAG_ENC))
return;
src_ip = sa->hdr.src_ip[0];
dst_ip = sa->hdr.dst_ip[0];
rcu_read_lock();
sk = __udp4_lib_lookup(&init_net, src_ip, sa->hdr.sport, dst_ip, sa->hdr.dport, 0, 0, &udp_table, NULL);
if (!sk) {
rcu_read_unlock();
pr_err("%px: Failed to lookup UDP socket dst(%pI4h) dport(0x%X)\n", g_xfrm, sa->hdr.dst_ip, sa->hdr.dport);
return;
}
up = udp_sk(sk);
if (up->encap_type != UDP_ENCAP_ESPINUDP) {
rcu_read_unlock();
pr_err("%px: Socket type is not UDP_ENCAP_ESPINUDP (%u)\n", up, up->encap_type);
return;
}
if (READ_ONCE(up->encap_rcv) != sfe_xfrm_dec_natt) {
xchg(&up->encap_rcv, sfe_xfrm_dec_natt);
pr_debug("%px: Overriden socket encap handler\n", up);
}
rcu_read_unlock();
}
/*
* sfe_xfrm_crypto_init()
* Allocate skcipher and shash crypto transform.
*/
static int sfe_xfrm_crypto_init(struct xfrm_state *xs, struct sfe_xfrm_sa *sa, bool is_decap)
{
struct sfe_xfrm_algo *xalg = xfrm_algo;
char *alg_name;
uint32_t i;
/*
* TODO: Only supports combined mode Cipher/authentication
*/
alg_name = xs->aead ? xs->aead->alg_name : NULL;
/*
* Perform algorithm specific crypto initialisation.
*/
for (i = 0; i < ARRAY_SIZE(xfrm_algo); i++, xalg++) {
if (alg_name && !strncmp(xalg->algo_name, alg_name, strlen(xalg->algo_name))) {
return xalg->crypto_init(sa, xs, is_decap);
}
}
pr_warn("%px: Unsupported algorithm for IPsec\n", xs);
return -1;
}
/*
* sfe_xfrm_sa_add()
* Add new IPsec SA for given xfrm state.
*/
static int sfe_xfrm_sa_add(struct sfe_xfrm_dev *dev, struct xfrm_state *xs)
{
struct sfe_xfrm_dev_stats *dev_stats = this_cpu_ptr(dev->stats_pcpu);
struct sfe_xfrm *g_xfrm = &g_sfe_xfrm;
struct net_device *ndev = dev->ndev;
struct dst_cache *dst_cache = NULL;
struct sfe_xfrm_sa *sa;
bool is_decap = false;
bool is_natt = false;
int err;
/*
* SA flag shouldn't be set
*/
BUG_ON(xs->xflags & XFRM_STATE_OFFLOAD_NSS);
BUG_ON(xs->props.family == AF_INET6);
/*
* SA object allocation.
*/
sa = kmem_cache_alloc(g_xfrm->sa_cache, GFP_KERNEL | __GFP_ZERO);
if (!sa) {
pr_warn("%px: Failed to allocate SA\n", ndev);
return -ENOMEM;
}
sa->stats_pcpu = alloc_percpu_gfp(struct sfe_xfrm_sa_stats, GFP_KERNEL | __GFP_ZERO);
if (!sa->stats_pcpu) {
pr_err("%px: Failed to allocate stats memory for SA\n", ndev);
err = -ENOMEM;
goto fail_pcpu;
}
/*
* Initialise the SA object. Find the SA direction.
* For Decapsulation SA, the Destination address is local.
*/
if (xs->props.family == AF_INET) {
struct sfe_xfrm_sa_state_enc *enc;
struct net_device *local_dev;
sa->hdr.src_ip[0] = xs->props.saddr.a4;
sa->hdr.dst_ip[0] = xs->id.daddr.a4;
/*
* Check for NAT-T flow
*/
is_natt = !!xs->encap;
sa->state.flags |= is_natt ? SFE_XFRM_SA_FLAG_UDP : 0;
/*
* Find the SA direction (encap or decap)
*/
local_dev = ip_dev_find(&init_net, xs->id.daddr.a4);
if (local_dev) {
dev_put(local_dev);
is_decap = true;
goto init;
}
/*
* Destination is remote hence this is an encapsulation SA
*/
enc = &sa->state.enc;
sa->state.flags |= SFE_XFRM_SA_FLAG_ENC;
dst_cache = &enc->dst_cache;
err = dst_cache_init(dst_cache, GFP_KERNEL);
if (err) {
pr_err("%px: Failed to initialize dst for SA\n", ndev);
goto fail_dst;
}
enc->mtu_overhead = sizeof(struct iphdr);
enc->head_room = SFE_XFRM_DEV_MAX_HEADROOM;
enc->tail_room = SFE_XFRM_DEV_MAX_TAILROOM;
enc->esp_seq = 1;
enc->esp_offset = sizeof(struct iphdr);
enc->add_hdr = sfe_xfrm_add_hdr_v4;
enc->ip_send = sfe_xfrm_ip4_send;
if (is_natt) {
enc->mtu_overhead += sizeof(struct udphdr);
enc->esp_offset += sizeof(struct udphdr);
enc->add_hdr = sfe_xfrm_add_hdr_natt;
}
}
init:
/*
* Allocate the transform pointer for the
* skcipher and shash.
*/
err = sfe_xfrm_crypto_init(xs, sa, is_decap);
if (err) {
pr_err("%px: Crypto Initialisation failed for SA\n", sa);
goto fail_crypto;
}
/*
* Dereference: sfe_xfrm_sa_del()
*/
kref_get(&dev->ref);
sa->dev = dev;
sa->ndev = ndev;
sa->ifindex = ndev->ifindex;
sa->xs = xs;
sa->hdr.spi = xs->id.spi;
sa->hdr.sport = is_natt ? xs->encap->encap_sport : 0;
sa->hdr.dport = is_natt ? xs->encap->encap_dport : 0;
sfe_xfrm_udp_override(sa);
/*
* Make this SA active. For old SA, We wait for all RCU readers during SA deletion.
*/
if (!is_decap) {
uint32_t new_mtu = ETH_DATA_LEN - sa->state.enc.mtu_overhead;
rtnl_lock();
dev_set_mtu(ndev, new_mtu);
rtnl_unlock();
spin_lock_bh(&g_xfrm->lock);
rcu_assign_pointer(dev->sa, sa);
spin_unlock_bh(&g_xfrm->lock);
}
WRITE_ONCE(xs->data, sa);
dev_stats->sa_alloc++;
return 0;
fail_crypto:
if (dst_cache) {
dst_cache_destroy(dst_cache);
}
fail_dst:
free_percpu(sa->stats_pcpu);
fail_pcpu:
kmem_cache_free(g_xfrm->sa_cache, sa);
return err;
}
/*
* sfe_xfrm_sa_del()
* Delete existing IPsec SA for given xfrm state.
*/
static void sfe_xfrm_sa_del(struct sfe_xfrm_dev *dev, struct xfrm_state *xs)
{
struct sfe_xfrm_dev_stats *dev_stats;
struct sfe_xfrm *g_xfrm = &g_sfe_xfrm;
struct sfe_xfrm_sa *sa, *dev_sa;
sa = READ_ONCE(xs->data);
BUG_ON(!sa);
/*
* SA flag should be set
*/
BUG_ON((xs->xflags & XFRM_STATE_OFFLOAD_NSS) == 0);
/*
* if the SA being deleted is the Active
* encap SA, then set the dev->sa to NULL.
*
* TODO: Change it to reference counting
*/
spin_lock_bh(&g_xfrm->lock);
dev_sa = rcu_dereference_protected(dev->sa, lockdep_is_held(&g_xfrm->lock));
if (dev_sa == sa) {
rcu_assign_pointer(dev->sa, NULL);
}
spin_unlock_bh(&g_xfrm->lock);
synchronize_rcu();
/*
* SA free
*/
dev_stats = this_cpu_ptr(dev->stats_pcpu);
dev_stats->sa_free++;
sa->xs = NULL;
/*
* Deallocate the Crypto resources
*/
crypto_free_sync_skcipher(sa->enc_tfm);
crypto_free_shash(sa->auth_tfm);
/*
* Reference: sfe_xfrm_sa_add
*/
kref_put(&dev->ref, sfe_xfrm_dev_final);
sa->dev = NULL;
if (sa->state.flags & SFE_XFRM_SA_FLAG_ENC) {
dst_cache_destroy(&sa->state.enc.dst_cache);
}
free_percpu(sa->stats_pcpu);
kmem_cache_free(g_xfrm->sa_cache, sa);
}
/*
* IPsec device stats callback.
*/
const struct file_operations sfe_xfrm_dev_file_ops = {
.open = simple_open,
.llseek = default_llseek,
.read = sfe_xfrm_dump_all_stats,
};
/*
* sfe_xfrm_dev_add_ref()
* Add new IPsec device for given reqid.
*/
static struct sfe_xfrm_dev *sfe_xfrm_dev_add_ref(int64_t devid)
{
struct sfe_xfrm *g_xfrm = &g_sfe_xfrm;
struct sfe_xfrm_dev *dev;
struct list_head *cur;
struct net_device *ndev;
int status;
/*
* Fetch the net_device from the db for the given ID.
*/
spin_lock_bh(&g_xfrm->lock);
list_for_each(cur, &g_xfrm->dev_head) {
dev = list_entry(cur, struct sfe_xfrm_dev, node);
/*
* Ensure that we are not incrementing the reference
* if the final is already executing
*/
if ((dev->xfrm_reqid == devid) && kref_get_unless_zero(&dev->ref)) {
break;
}
}
spin_unlock_bh(&g_xfrm->lock);
/*
* Entry is found
*/
if (cur != &g_xfrm->dev_head) {
return dev;
}
/*
* Netdevice not created for this id.
* Allocate new IPsec device for the given XFRM reqid.
*/
ndev = alloc_netdev(sizeof(*dev), "ipsectun%d", NET_NAME_ENUM, sfe_xfrm_dev_setup);
if (!ndev) {
pr_err("%px: Failed to allocate IPsec device\n", g_xfrm);
return NULL;
}
/*
* Initialize device private structure.
*/
dev = netdev_priv(ndev);
dev->ndev = ndev;
dev->xfrm_reqid = devid;
rcu_assign_pointer(dev->sa, NULL);
dev->stats_pcpu = alloc_percpu_gfp(struct sfe_xfrm_dev_stats, GFP_KERNEL | __GFP_ZERO);
if (!dev->stats_pcpu) {
pr_err("%px: Failed to allocate stats memory for encap\n", ndev);
ndev->priv_destructor(ndev);
return NULL;
}
rtnl_lock();
/*
* Register netdevice with kernel.
* Note: Linux will invoke the destructor upon failure
*/
status = register_netdevice(ndev);
if (status < 0) {
pr_err("%px: Failed to register netdevce, error(%d)\n", ndev, status);
rtnl_unlock();
return NULL;
}
/*
* Set netdevice to UP state.
*/
status = dev_open(ndev, NULL);
if (status < 0) {
pr_err("%px: Failed to Open netdevce, error(%d)\n", ndev, status);
unregister_netdevice(ndev);
rtnl_unlock();
return NULL;
}
rtnl_unlock();
dev->dentry = debugfs_create_file(ndev->name, S_IRUGO, g_xfrm->dentry, dev, &sfe_xfrm_dev_file_ops);
if (IS_ERR_OR_NULL(dev->dentry)) {
pr_warn("%px: Failed to allocate dentry for %s\n", ndev, ndev->name);
}
kref_init(&dev->ref);
/*
* Add the net_device entry into the db.
*/
spin_lock_bh(&g_xfrm->lock);
list_add(&dev->node, &g_xfrm->dev_head);
spin_unlock_bh(&g_xfrm->lock);
return dev;
}
/*
* sfe_xfrm_dev_final()
* Delete existing IPsec device.
*/
static void sfe_xfrm_dev_final(struct kref *kref)
{
struct sfe_xfrm_dev *dev = container_of(kref, struct sfe_xfrm_dev, ref);
struct sfe_xfrm *g_xfrm = &g_sfe_xfrm;
struct net_device *ndev = dev->ndev;
/*
* Delete the net_device entry from the db.
*/
BUG_ON(dev->xfrm_reqid < 0);
spin_lock_bh(&g_xfrm->lock);
list_del_init(&dev->node);
spin_unlock_bh(&g_xfrm->lock);
/*
* Bring down the device and unregister from linux.
*/
unregister_netdev(ndev);
}
/*
* sfe_xfrm_esp_init_state()
* Initialize IPsec xfrm state of type ESP.
*/
static int sfe_xfrm_esp_init_state(struct xfrm_state *xs)
{
struct sfe_xfrm_dev *dev;
int ret;
/*
* SA flag shouldn't be set
*/
BUG_ON(xs->xflags & XFRM_STATE_OFFLOAD_NSS);
/*
* verify whether the xfrm state can be offloaded or not.
*/
if (xs->props.mode != XFRM_MODE_TUNNEL) {
pr_warn("%px: SFE/IPsec transport mode not supported\n", xs);
return -ENOTSUPP;
}
if (xs->encap && (xs->encap->encap_type != UDP_ENCAP_ESPINUDP)) {
pr_warn("%px: SFE/IPsec UDP encap type(%d) not supported\n", xs, xs->encap->encap_type);
return -ENOTSUPP;
}
/*
* Before adding a new SA object, run through the db and find out
* if there is a net device already created for the given (id).
* If not present, then create one.
*/
dev = sfe_xfrm_dev_add_ref(xs->props.reqid);
if (!dev) {
pr_err("%px: Unable to fetch/add netdevice for this id %d\n", xs, xs->props.reqid);
return -1;
}
/*
* Create and add the SA object.
*/
ret = sfe_xfrm_sa_add(dev, xs);
if (ret < 0) {
pr_warn("%px: unable to offload xfrm_state\n", xs);
/*
* Reference: sfe_xfrm_dev_add_ref()
*/
kref_put(&dev->ref, sfe_xfrm_dev_final);
return ret;
}
xs->offload_dev = dev->ndev;
xs->xflags |= XFRM_STATE_OFFLOAD_NSS;
return 0;
}
/*
* sfe_xfrm_esp_deinit_state()
* Destroy IPsec xfrm state of type ESP.
*/
static void sfe_xfrm_esp_deinit_state(struct xfrm_state *xs)
{
struct net_device *ndev;
struct sfe_xfrm_dev *dev;
/*
* Check if the xfrm state is already offloaded or not.
*/
if (unlikely(!(xs->xflags & XFRM_STATE_OFFLOAD_NSS))) {
pr_warn("%px: xfrm_state is not offloaded\n", xs);
return;
}
/*
* Fetch the net_device from the xfrm state.
*/
ndev = xs->offload_dev;
dev = netdev_priv(ndev);
sfe_xfrm_sa_del(dev, xs);
/*
* Reference: sfe_xfrm_dev_add_ref()
*/
kref_put(&dev->ref, sfe_xfrm_dev_final);
return;
}
/*
* sfe_xfrm_esp_get_mtu()
* Get mtu for inner packet.
*/
static uint32_t sfe_xfrm_esp_get_mtu(struct xfrm_state *xs, int mtu)
{
struct net_device *ndev;
/*
* WARN_ON if the xfrm_state is not offloaded.
*/
WARN_ON(!(xs->xflags & XFRM_STATE_OFFLOAD_NSS));
/*
* since we are tracking each encap SA using a unique
* netdevice, hence net_device mtu is the same as SA mtu.
*/
ndev = xs->offload_dev;
BUG_ON(!ndev);
/*
* FIXME: return the overhead value
*/
return ndev->mtu;
}
/*
* Trapping ipv4 packets to be sent for ipsec encapsulation.
*/
static struct xfrm_state_afinfo xfrm_v4_afinfo = {
.family = AF_INET,
.proto = IPPROTO_IPIP,
.output = sfe_xfrm_v4_output,
.output_finish = NULL,
.extract_input = NULL,
.extract_output = NULL,
.transport_finish = NULL,
.local_error = NULL,
};
/*
* ESP proto specific init/de-init handlers for ipv4.
*/
static const struct xfrm_type xfrm_v4_type = {
.description = "SFE ESP4",
.owner = THIS_MODULE,
.proto = IPPROTO_ESP,
.flags = XFRM_TYPE_REPLAY_PROT,
.init_state = sfe_xfrm_esp_init_state,
.destructor = sfe_xfrm_esp_deinit_state,
.get_mtu = sfe_xfrm_esp_get_mtu,
.input = NULL,
.output = NULL,
};
/*
* IPv4 ESP handler
*/
static struct net_protocol esp_protocol = {
.handler = sfe_xfrm_dec_esp4,
.no_policy = 1,
.netns_ok = 1,
};
/*
* sfe_xfrm_override_afinfo()
* Override the native linux afinfo object.
*/
static void sfe_xfrm_override_afinfo(uint16_t family)
{
const struct xfrm_type *type_dstopts, *type_routing;
const struct xfrm_type *type_ipip, *type_ipv6;
const struct xfrm_type *type_ah, *type_comp;
struct xfrm_state_afinfo *afinfo = NULL;
const struct xfrm_type *base;
/*
* Override ESP type.
*/
if (family == AF_INET) {
base = &xfrm_v4_type;
afinfo = xfrm_state_update_afinfo(AF_INET, &xfrm_v4_afinfo);
}
/*
* TODO: Add ipv6 support
*/
BUG_ON(family == AF_INET6);
xfrm_register_type(base, family);
type_ah = afinfo->type_ah;
type_comp = afinfo->type_comp;
type_ipip = afinfo->type_ipip;
type_ipv6 = afinfo->type_ipip6;
type_dstopts = afinfo->type_dstopts;
type_routing = afinfo->type_routing;
/*
* Register types
*
* Propagating the registered xfrm_type from
* old afinfo object into new object.
*/
if (type_ah) {
xfrm_register_type(type_ah, family);
}
if (type_comp) {
xfrm_register_type(type_comp, family);
}
if (type_ipip) {
xfrm_register_type(type_ipip, family);
}
if (type_ipv6) {
xfrm_register_type(type_ipv6, family);
}
if (type_dstopts) {
xfrm_register_type(type_dstopts, family);
}
if (type_routing) {
xfrm_register_type(type_routing, family);
}
}
/*
* sfe_xfrm_ctrl_init()
* initialization function
*/
void sfe_xfrm_ctrl_init(void)
{
int err;
err = inet_update_protocol(&esp_protocol, IPPROTO_ESP, &linux_esp_handler);
BUG_ON(err < 0);
/*
* overide the xfrm_state afinfo.
*/
sfe_xfrm_override_afinfo(AF_INET);
}