| /* |
| * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. |
| * |
| * Permission to use, copy, modify, and/or distribute this software for any |
| * purpose with or without fee is hereby granted, provided that the above |
| * copyright notice and this permission notice appear in all copies. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
| * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
| * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| */ |
| |
| #include <linux/version.h> |
| #include <linux/types.h> |
| #include <linux/kernel.h> |
| #include <linux/module.h> |
| #include <net/xfrm.h> |
| #include <net/protocol.h> |
| #include <net/ip6_route.h> |
| #include <linux/inetdevice.h> |
| #include <linux/debugfs.h> |
| #include <linux/netfilter.h> |
| #include <crypto/rng.h> |
| #include <crypto/aes.h> |
| #include <crypto/gcm.h> |
| #include <crypto/ghash.h> |
| #include "sfe_xfrm.h" |
| |
| /* |
| * Structure to store necessary fields during xfrm state walk. |
| */ |
| struct sfe_xfrm_iter_data { |
| struct net_device *ndev; |
| ssize_t max_len; |
| char *buf; |
| }; |
| |
| /* |
| * Structure to map crypto init function to xfrm state algo. |
| */ |
| struct sfe_xfrm_algo { |
| const char *algo_name; /* Crypto algorithm name */ |
| int (*crypto_init)(struct sfe_xfrm_sa *sa, struct xfrm_state *xs, bool is_decap); |
| /* pointer to crypto init */ |
| }; |
| |
| static int sfe_xfrm_crypto_init_gcm(struct sfe_xfrm_sa *sa, struct xfrm_state *xs, bool is_decap); |
| static void sfe_xfrm_dev_final(struct kref *kref); |
| |
| static struct sfe_xfrm_algo xfrm_algo[] = { |
| {.algo_name = "rfc4106(gcm(aes))", .crypto_init = sfe_xfrm_crypto_init_gcm}, |
| }; |
| |
| /* |
| * Original ESP protocol handlers |
| */ |
| static const struct net_protocol *linux_esp_handler; |
| |
| /* |
| * sfe_xfrm_genkey() |
| */ |
| static int sfe_xfrm_genkey(struct crypto_sync_skcipher *etfm, uint8_t *key, uint16_t len) |
| { |
| uint8_t iv[AES_BLOCK_SIZE] = {0}; |
| struct scatterlist sg[1]; |
| int ret; |
| |
| /* |
| * TODO: Move this to heap |
| */ |
| SYNC_SKCIPHER_REQUEST_ON_STACK(ereq, etfm); |
| |
| sg_init_one(sg, key, len); |
| skcipher_request_set_sync_tfm(ereq, etfm); |
| skcipher_request_set_callback(ereq, 0, NULL, NULL); |
| skcipher_request_set_crypt(ereq, sg, sg, len, iv); |
| |
| ret = crypto_skcipher_encrypt(ereq); |
| if (ret) { |
| pr_debug("%px: Failed to encrypt; err(%d)\n", etfm, ret); |
| return ret; |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * sfe_xfrm_crypto_init_gcm() |
| * Crypto init func for GCM. |
| */ |
| static int sfe_xfrm_crypto_init_gcm(struct sfe_xfrm_sa *sa, struct xfrm_state *xs, bool is_decap) |
| { |
| uint8_t authkey[AES_BLOCK_SIZE] = {0}; |
| uint8_t *cipher_key; |
| unsigned int cipher_keylen; |
| unsigned int key_len = 0; |
| int err; |
| |
| key_len = ALIGN(xs->aead->alg_key_len, BITS_PER_BYTE) / BITS_PER_BYTE; |
| |
| /* |
| * Cipher key |
| */ |
| cipher_keylen = key_len - 4; /* Subtract nonce */ |
| cipher_key = xs->aead->alg_key; |
| |
| /* |
| * Allocate the cipher context |
| */ |
| sa->enc_tfm = crypto_alloc_sync_skcipher("ctr(aes)", 0, 0); |
| if (IS_ERR(sa->enc_tfm)) { |
| pr_err("%px: Error allocating tfm for skcipher: ctr(aes)\n", sa); |
| return PTR_ERR(sa->enc_tfm); |
| } |
| |
| /* |
| * Setup cipher keys |
| */ |
| err = crypto_sync_skcipher_setkey(sa->enc_tfm, cipher_key, cipher_keylen); |
| if (err) { |
| pr_err("%px: Failed to set the key for skcipher: ctr(aes)\n", sa); |
| goto fail2; |
| } |
| |
| if (is_decap) { |
| struct sfe_xfrm_sa_state_dec *dec = &sa->state.dec; |
| |
| dec->icv_len = xs->aead->alg_icv_len / BITS_PER_BYTE; |
| dec->iv_len = GCM_RFC4106_IV_SIZE; |
| |
| memcpy(&dec->nonce, cipher_key + cipher_keylen, 4); |
| |
| dec->auth_decrypt = sfe_xfrm_auth_decrypt_gcm; |
| } else { |
| struct sfe_xfrm_sa_state_enc *enc = &sa->state.enc; |
| |
| enc->icv_len = xs->aead->alg_icv_len / BITS_PER_BYTE; |
| enc->iv_len = GCM_RFC4106_IV_SIZE; |
| enc->blk_len = ALIGN(crypto_sync_skcipher_blocksize(sa->enc_tfm), 4); |
| memcpy(&enc->nonce, cipher_key + cipher_keylen, 4); |
| |
| get_random_bytes(&enc->salt, sizeof(enc->salt)); |
| get_random_bytes(&enc->iv_seq, sizeof(enc->iv_seq)); |
| |
| enc->mtu_overhead += SFE_XFRM_MAX_ESP_GCM_OVERHEAD; |
| enc->encrypt_auth = sfe_xfrm_encrypt_auth_gcm; |
| } |
| |
| /* |
| * Generate authentication key |
| */ |
| err = sfe_xfrm_genkey(sa->enc_tfm, authkey, sizeof(authkey)); |
| if (err) { |
| pr_warn("%px: Failed to generate authentication key for GCM\n", sa); |
| goto fail2; |
| } |
| |
| /* |
| * Allocate authentication context. |
| * Use GHASH CE Driver. |
| */ |
| sa->auth_tfm = crypto_alloc_shash("__driver-ghash-ce", CRYPTO_ALG_INTERNAL, CRYPTO_ALG_INTERNAL); |
| if (IS_ERR(sa->auth_tfm)) { |
| pr_err("%px: Error allocating tfm for shash: ghash\n", sa); |
| err = PTR_ERR(sa->auth_tfm); |
| goto fail2; |
| } |
| |
| /* |
| * Setup authentication key |
| */ |
| err = crypto_shash_setkey(sa->auth_tfm, authkey, sizeof(authkey)); |
| if (err) { |
| pr_err("%px: Failed to set the key for auth: ghash\n", sa); |
| goto fail1; |
| } |
| |
| /* |
| * Reset the memory that was allocated for confidentiality |
| */ |
| memzero_explicit(authkey, sizeof(authkey)); |
| memzero_explicit(cipher_key, cipher_keylen); |
| |
| pr_info("skcipher driver name: %s\n", crypto_tfm_alg_driver_name(crypto_skcipher_tfm((struct crypto_skcipher *)sa->enc_tfm))); |
| pr_info("shash driver name: %s\n", crypto_tfm_alg_driver_name(crypto_shash_tfm(sa->auth_tfm))); |
| return 0; |
| |
| fail1: |
| crypto_free_shash(sa->auth_tfm); |
| fail2: |
| crypto_free_sync_skcipher(sa->enc_tfm); |
| return err; |
| } |
| |
| /* |
| * sfe_xfrm_open_ndev() |
| * Netdevice open handler. |
| */ |
| static int sfe_xfrm_open_ndev(struct net_device *ndev) |
| { |
| netif_start_queue(ndev); |
| return 0; |
| } |
| |
| /* |
| * sfe_xfrm_stop_ndev() |
| * Netdevice stop handler. |
| */ |
| static int sfe_xfrm_stop_ndev(struct net_device *ndev) |
| { |
| netif_stop_queue(ndev); |
| return 0; |
| } |
| |
| /* |
| * sfe_xfrm_get_dev_stats() |
| * Fetch all the device statistics. |
| */ |
| static void sfe_xfrm_get_dev_stats(struct sfe_xfrm_dev *dev, struct sfe_xfrm_dev_stats *stats) |
| { |
| int cpu; |
| int i; |
| |
| /* |
| * All statistics are 64bit. So we can just iterate by words. |
| */ |
| for_each_possible_cpu(cpu) { |
| const struct sfe_xfrm_dev_stats *sp = per_cpu_ptr(dev->stats_pcpu, cpu); |
| uint64_t *stats_ptr = (uint64_t *)stats; |
| uint64_t *sp_ptr = (uint64_t *)sp; |
| |
| for (i = 0; i < SFE_XFRM_DEV_STATS_DWORDS; i++, stats_ptr++, sp_ptr++) { |
| *stats_ptr += *sp_ptr; |
| } |
| } |
| } |
| |
| /* |
| * sfe_xfrm_get_sa_stats() |
| * Fetch all the SA statistics. |
| */ |
| static void sfe_xfrm_get_sa_stats(struct sfe_xfrm_sa *sa, struct sfe_xfrm_sa_stats *stats) |
| { |
| int cpu; |
| int i; |
| |
| /* |
| * All statistics are 64bit. So we can just iterate by words. |
| */ |
| for_each_possible_cpu(cpu) { |
| const struct sfe_xfrm_sa_stats *sp = per_cpu_ptr(sa->stats_pcpu, cpu); |
| uint64_t *stats_ptr = (uint64_t *)stats; |
| uint64_t *sp_ptr = (uint64_t *)sp; |
| |
| for (i = 0; i < SFE_XFRM_SA_STATS_DWORDS; i++, stats_ptr++, sp_ptr++) { |
| *stats_ptr += *sp_ptr; |
| } |
| } |
| } |
| |
| /* |
| * sfe_xfrm_dump_sa_stats() |
| * Print the SA statistics. |
| */ |
| static int sfe_xfrm_dump_sa_stats(struct xfrm_state *xs, int count, void *ptr) |
| { |
| struct sfe_xfrm_iter_data *iter = (struct sfe_xfrm_iter_data *)ptr; |
| struct sfe_xfrm_sa_stats stats = {0}; |
| struct sfe_xfrm_sa *sa; |
| ssize_t len = 0; |
| bool is_encap; |
| char *hdr; |
| |
| /* |
| * Non NSS offloaded SA |
| */ |
| if (!(xs->xflags & XFRM_STATE_OFFLOAD_NSS)) |
| return 0; |
| |
| /* |
| * SA belongs to a different NETDEVICE |
| */ |
| if (xs->offload_dev != iter->ndev) |
| return 0; |
| |
| BUG_ON(xs->props.family == AF_INET6); |
| sa = xs->data; |
| |
| /* |
| * We need to first fetch the stats values |
| * from the SA object. |
| */ |
| sfe_xfrm_get_sa_stats(sa, &stats); |
| |
| is_encap = (sa->state.flags & SFE_XFRM_SA_FLAG_ENC); |
| hdr = is_encap ? "Encap SA" : "Decap SA"; |
| |
| len += snprintf(iter->buf + len, iter->max_len - len, |
| "%s: (src:%pI4n dst:%pI4n spi:0x%X sport:%u dport:%u flags:0x%x)\n", hdr, |
| sa->hdr.src_ip, sa->hdr.dst_ip, htonl(sa->hdr.spi), htons(sa->hdr.sport), |
| htons(sa->hdr.dport), sa->state.flags); |
| |
| len += snprintf(iter->buf + len, iter->max_len - len, "\tTx packets: %llu\n", stats.tx_pkts); |
| len += snprintf(iter->buf + len, iter->max_len - len, "\tTx bytes: %llu\n", stats.tx_bytes); |
| len += snprintf(iter->buf + len, iter->max_len - len, "\tRx packets: %llu\n", stats.rx_pkts); |
| len += snprintf(iter->buf + len, iter->max_len - len, "\tRx bytes: %llu\n", stats.rx_bytes); |
| len += snprintf(iter->buf + len, iter->max_len - len, "\tDst cache error: %llu\n", stats.fail_dst_cache); |
| len += snprintf(iter->buf + len, iter->max_len - len, "\tEnqueue error: %llu\n", stats.fail_enqueue); |
| len += snprintf(iter->buf + len, iter->max_len - len, "\tTransformation error: %llu\n", stats.fail_transform); |
| len += snprintf(iter->buf + len, iter->max_len - len, "\tRoute error: %llu\n", stats.fail_route); |
| len += snprintf(iter->buf + len, iter->max_len - len, "\tSP allocation error: %llu\n", stats.fail_sp_alloc); |
| len += snprintf(iter->buf + len, iter->max_len - len, "\tSequence error: %llu\n", stats.fail_seq); |
| |
| iter->buf += len; |
| iter->max_len -= len; |
| |
| /* |
| * Stop processing if the available length is zero. |
| */ |
| return !iter->max_len; |
| } |
| |
| /* |
| * sfe_xfrm_dump_dev_stats() |
| * Print the Device statistics. |
| */ |
| static ssize_t sfe_xfrm_dump_dev_stats(struct sfe_xfrm_dev_stats *stats, char *buf, ssize_t max_len) |
| { |
| ssize_t len = 0; |
| |
| len = snprintf(buf, max_len, "SA Alloc: %llu\n", stats->sa_alloc); |
| len += snprintf(buf + len, max_len - len, "SA Free: %llu\n", stats->sa_free); |
| len += snprintf(buf + len, max_len - len, "Device Encapsulation Statistics:\n"); |
| len += snprintf(buf + len, max_len - len, "\tTx packets: %llu\n", stats->tx_pkts); |
| len += snprintf(buf + len, max_len - len, "\tTx bytes: %llu\n", stats->tx_bytes); |
| len += snprintf(buf + len, max_len - len, "\tTx linearize: %llu\n", stats->tx_linearize); |
| len += snprintf(buf + len, max_len - len, "\tTx Fail: %llu\n", stats->tx_fail); |
| len += snprintf(buf + len, max_len - len, "\tTx Fail SA: %llu\n", stats->tx_fail_sa); |
| len += snprintf(buf + len, max_len - len, "\tTx Fail shared: %llu\n", stats->tx_fail_shared); |
| len += snprintf(buf + len, max_len - len, "\tTx Fail headroom: %llu\n", stats->tx_fail_hroom); |
| len += snprintf(buf + len, max_len - len, "\tTx Fail tailroom: %llu\n", stats->tx_fail_troom); |
| len += snprintf(buf + len, max_len - len, "\tTx Fail Linearize: %llu\n", stats->tx_fail_linearize); |
| |
| len += snprintf(buf + len, max_len - len, "Device Decapsulation Statistics:\n"); |
| len += snprintf(buf + len, max_len - len, "\tRx packets: %llu\n", stats->rx_pkts); |
| len += snprintf(buf + len, max_len - len, "\tRx bytes: %llu\n", stats->rx_bytes); |
| len += snprintf(buf + len, max_len - len, "\tRx linearize: %llu\n", stats->rx_linearize); |
| len += snprintf(buf + len, max_len - len, "\tRx dummy: %llu\n", stats->rx_dummy); |
| len += snprintf(buf + len, max_len - len, "\tRx Fail: %llu\n", stats->rx_fail); |
| len += snprintf(buf + len, max_len - len, "\tRx Fail SA: %llu\n", stats->rx_fail_sa); |
| len += snprintf(buf + len, max_len - len, "\tRx Fail Linearize: %llu\n", stats->rx_fail_linearize); |
| |
| return len; |
| } |
| |
| /* |
| * sfe_xfrm_dump_all_stats() |
| * Read all device and SA statistics. |
| */ |
| static ssize_t sfe_xfrm_dump_all_stats(struct file *fp, char __user *ubuf, size_t sz, loff_t *ppos) |
| { |
| struct sfe_xfrm_dev *dev = fp->private_data; |
| struct sfe_xfrm_dev_stats dev_stats = {0}; |
| struct sfe_xfrm_iter_data iter = {0}; |
| struct xfrm_state_walk walk; |
| int32_t sa_count; |
| ssize_t len = 0; |
| ssize_t max_len; |
| char *buf; |
| |
| /* |
| * Fetch the stats values from the device object. |
| */ |
| sfe_xfrm_get_dev_stats(dev, &dev_stats); |
| sa_count = dev_stats.sa_alloc - dev_stats.sa_free; |
| if (sa_count < 0) |
| sa_count = 0; |
| |
| /* |
| * Calculate required string buffer for stats. |
| */ |
| max_len = SFE_XFRM_DEV_STATS_DWORDS * SFE_XFRM_MAX_STR_LEN; /* Members */ |
| max_len += SFE_XFRM_MAX_STR_LEN; /* Encap heading */ |
| max_len += SFE_XFRM_MAX_STR_LEN; /* Decap heading */ |
| max_len += SFE_XFRM_MAX_STR_LEN * sa_count; /* SA header */ |
| max_len += SFE_XFRM_SA_STATS_DWORDS * SFE_XFRM_MAX_STR_LEN * sa_count ; /* SA Members */ |
| |
| /* |
| * Allocate the buffer. |
| */ |
| buf = vzalloc(max_len); |
| if (!buf) { |
| pr_warn("%px: failed to allocate print buffer (%zu)", dev, max_len); |
| return 0; |
| } |
| |
| /* |
| * Print the device statistics. |
| */ |
| len += sfe_xfrm_dump_dev_stats(&dev_stats, buf, max_len); |
| |
| /* |
| * No active SA. |
| */ |
| if (!sa_count) { |
| goto done; |
| } |
| |
| /* |
| * Initialize the walk object for ESP xfrm state. |
| */ |
| xfrm_state_walk_init(&walk, IPPROTO_ESP, NULL); |
| |
| /* |
| * We need the below fields to selectively |
| * print the necessary SA stats. |
| * |
| * Since there is no direct way to pass these fields to the |
| * (sfe_xfrm_dump_sa_stats) callback, we pass this as a pointer |
| * (iter). |
| */ |
| iter.ndev = dev->ndev; |
| iter.buf = buf + len; |
| iter.max_len = max_len - len; |
| |
| xfrm_state_walk(&init_net, &walk, sfe_xfrm_dump_sa_stats, &iter); |
| xfrm_state_walk_done(&walk, &init_net); |
| len = iter.buf - buf; |
| |
| done: |
| len = simple_read_from_buffer(ubuf, sz, ppos, buf, len); |
| vfree(buf); |
| return len; |
| } |
| |
| /* |
| * sfe_xfrm_get_rtnl_stats() |
| * Handler to fetch netdevice rtnl statistics. |
| */ |
| static void sfe_xfrm_get_rtnl_stats(struct net_device *ndev, struct rtnl_link_stats64 *rtnl_stats) |
| { |
| struct sfe_xfrm_dev *dev = netdev_priv(ndev); |
| struct sfe_xfrm_dev_stats stats = {0}; |
| |
| memset(rtnl_stats, 0, sizeof(*rtnl_stats)); |
| sfe_xfrm_get_dev_stats(dev, &stats); |
| |
| rtnl_stats->tx_packets = stats.tx_pkts; |
| rtnl_stats->tx_bytes = stats.tx_bytes; |
| rtnl_stats->tx_dropped = stats.tx_fail; |
| rtnl_stats->rx_packets = stats.rx_pkts; |
| rtnl_stats->rx_bytes = stats.rx_bytes; |
| rtnl_stats->rx_dropped = stats.rx_fail; |
| } |
| |
| /* |
| * sfe_xfrm_mtu_set() |
| * Update device MTU. |
| */ |
| static int sfe_xfrm_mtu_set(struct net_device *ndev, int mtu) |
| { |
| ndev->mtu = mtu; |
| return 0; |
| } |
| |
| /* |
| * sfe_xfrm_xmit() |
| * This is called for IPv4/v6 pakcets that are to be transformed. |
| */ |
| static int sfe_xfrm_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) |
| { |
| return dev_queue_xmit(skb); |
| } |
| |
| /* |
| * sfe_xfrm_v4_output() |
| * Called for IPv4 Plain text packets submitted for IPSec transformation. |
| */ |
| static int sfe_xfrm_v4_output(struct net *net, struct sock *sk, struct sk_buff *skb) |
| { |
| struct sfe_xfrm_sa_state_enc *enc; |
| struct sfe_xfrm_sa *sa; |
| struct xfrm_state *xs; |
| bool expand_skb; |
| |
| /* |
| * No xfrm_state associated; Drop |
| */ |
| xs = skb_dst(skb)->xfrm; |
| if (!xs) { |
| pr_warn("%px: Failed to offload; No xfrm_state associated: drop\n", skb); |
| XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); |
| goto drop; |
| } |
| |
| /* |
| * Only process packets for XFRM state managed by IPsec offload |
| */ |
| if (!(xs->xflags & XFRM_STATE_OFFLOAD_NSS)) { |
| pr_debug("%px: state is not offloaded; xfrm_state %p :drop\n", skb, xs); |
| XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); |
| goto drop; |
| } |
| |
| /* |
| * Unshare the SKB as we will be modifying it. |
| */ |
| if (unlikely(skb_shared(skb))) { |
| skb = skb_unshare(skb, GFP_NOWAIT | __GFP_NOWARN); |
| if (!skb) { |
| goto drop; |
| } |
| } |
| |
| skb->dev = xs->offload_dev; |
| sa = xs->data; |
| enc = &sa->state.enc; |
| |
| /* |
| * Expand the SKB if needed. |
| */ |
| expand_skb = (skb_headroom(skb) < enc->head_room) || (skb_tailroom(skb) < enc->tail_room); |
| if (expand_skb && pskb_expand_head(skb, enc->head_room, enc->tail_room, GFP_NOWAIT | __GFP_NOWARN)) { |
| pr_debug("%px: Failed to expand SKB head(%u) or tail(%u)\n", skb, skb_headroom(skb), skb_tailroom(skb)); |
| XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); |
| goto drop; |
| } |
| |
| /* |
| * Call the Post routing hooks. |
| */ |
| return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, NULL, skb_dst(skb)->dev, sfe_xfrm_xmit); |
| |
| drop: |
| dev_kfree_skb_any(skb); |
| return -EINVAL; |
| } |
| |
| /* |
| * sfe_xfrm_dev_free() |
| * Free netdevice memory. |
| */ |
| static void sfe_xfrm_dev_free(struct net_device *ndev) |
| { |
| struct sfe_xfrm_dev *dev = netdev_priv(ndev); |
| |
| /* |
| * There should be no active references at this point. |
| */ |
| WARN_ON(kref_read(&dev->ref)); |
| |
| free_percpu(dev->stats_pcpu); |
| debugfs_remove_recursive(dev->dentry); |
| free_netdev(ndev); |
| pr_info("%px: IPsec device freed\n", ndev); |
| } |
| |
| /* |
| * IPsec device callbacks. |
| */ |
| static const struct net_device_ops xfrm_dev_ops = { |
| .ndo_open = sfe_xfrm_open_ndev, |
| .ndo_stop = sfe_xfrm_stop_ndev, |
| .ndo_start_xmit = sfe_xfrm_enc, |
| .ndo_get_stats64 = sfe_xfrm_get_rtnl_stats, |
| .ndo_change_mtu = sfe_xfrm_mtu_set, |
| }; |
| |
| /* |
| * sfe_xfrm_dev_setup() |
| * Setup ipsec connection device. |
| */ |
| static void sfe_xfrm_dev_setup(struct net_device *ndev) |
| { |
| ndev->addr_len = ETH_ALEN; |
| ndev->mtu = ETH_DATA_LEN - SFE_XFRM_DEV_MAX_HEADROOM; |
| |
| ndev->hard_header_len = SFE_XFRM_DEV_MAX_HEADROOM; |
| ndev->needed_headroom = SFE_XFRM_DEV_MAX_HEADROOM; |
| ndev->needed_tailroom = SFE_XFRM_DEV_MAX_TAILROOM; |
| ndev->type = SFE_XFRM_DEV_ARPHRD; |
| ndev->ethtool_ops = NULL; |
| ndev->header_ops = NULL; |
| ndev->netdev_ops = &xfrm_dev_ops; |
| ndev->priv_destructor = sfe_xfrm_dev_free; |
| |
| /* |
| * Assign random ethernet address. |
| */ |
| random_ether_addr(ndev->dev_addr); |
| memset(ndev->broadcast, 0xff, ndev->addr_len); |
| memcpy(ndev->perm_addr, ndev->dev_addr, ndev->addr_len); |
| } |
| |
| /* |
| * sfe_xfrm_udp_override() |
| */ |
| static void sfe_xfrm_udp_override(struct sfe_xfrm_sa *sa) |
| { |
| struct sfe_xfrm *g_xfrm = &g_sfe_xfrm; |
| uint32_t src_ip, dst_ip; |
| struct udp_sock *up; |
| struct sock *sk; |
| |
| /* |
| * Only ESP-over-UDP & decapsulation flow will be processed |
| */ |
| if (!(sa->state.flags & SFE_XFRM_SA_FLAG_UDP) || (sa->state.flags & SFE_XFRM_SA_FLAG_ENC)) |
| return; |
| |
| src_ip = sa->hdr.src_ip[0]; |
| dst_ip = sa->hdr.dst_ip[0]; |
| |
| rcu_read_lock(); |
| sk = __udp4_lib_lookup(&init_net, src_ip, sa->hdr.sport, dst_ip, sa->hdr.dport, 0, 0, &udp_table, NULL); |
| if (!sk) { |
| rcu_read_unlock(); |
| pr_err("%px: Failed to lookup UDP socket dst(%pI4h) dport(0x%X)\n", g_xfrm, sa->hdr.dst_ip, sa->hdr.dport); |
| return; |
| } |
| |
| up = udp_sk(sk); |
| if (up->encap_type != UDP_ENCAP_ESPINUDP) { |
| rcu_read_unlock(); |
| pr_err("%px: Socket type is not UDP_ENCAP_ESPINUDP (%u)\n", up, up->encap_type); |
| return; |
| } |
| |
| if (READ_ONCE(up->encap_rcv) != sfe_xfrm_dec_natt) { |
| xchg(&up->encap_rcv, sfe_xfrm_dec_natt); |
| pr_debug("%px: Overriden socket encap handler\n", up); |
| } |
| |
| rcu_read_unlock(); |
| } |
| |
| /* |
| * sfe_xfrm_crypto_init() |
| * Allocate skcipher and shash crypto transform. |
| */ |
| static int sfe_xfrm_crypto_init(struct xfrm_state *xs, struct sfe_xfrm_sa *sa, bool is_decap) |
| { |
| struct sfe_xfrm_algo *xalg = xfrm_algo; |
| char *alg_name; |
| uint32_t i; |
| |
| /* |
| * TODO: Only supports combined mode Cipher/authentication |
| */ |
| alg_name = xs->aead ? xs->aead->alg_name : NULL; |
| |
| /* |
| * Perform algorithm specific crypto initialisation. |
| */ |
| for (i = 0; i < ARRAY_SIZE(xfrm_algo); i++, xalg++) { |
| if (alg_name && !strncmp(xalg->algo_name, alg_name, strlen(xalg->algo_name))) { |
| return xalg->crypto_init(sa, xs, is_decap); |
| } |
| } |
| |
| pr_warn("%px: Unsupported algorithm for IPsec\n", xs); |
| return -1; |
| } |
| |
| /* |
| * sfe_xfrm_sa_add() |
| * Add new IPsec SA for given xfrm state. |
| */ |
| static int sfe_xfrm_sa_add(struct sfe_xfrm_dev *dev, struct xfrm_state *xs) |
| { |
| struct sfe_xfrm_dev_stats *dev_stats = this_cpu_ptr(dev->stats_pcpu); |
| struct sfe_xfrm *g_xfrm = &g_sfe_xfrm; |
| struct net_device *ndev = dev->ndev; |
| struct dst_cache *dst_cache = NULL; |
| struct sfe_xfrm_sa *sa; |
| bool is_decap = false; |
| bool is_natt = false; |
| int err; |
| |
| /* |
| * SA flag shouldn't be set |
| */ |
| BUG_ON(xs->xflags & XFRM_STATE_OFFLOAD_NSS); |
| BUG_ON(xs->props.family == AF_INET6); |
| |
| /* |
| * SA object allocation. |
| */ |
| sa = kmem_cache_alloc(g_xfrm->sa_cache, GFP_KERNEL | __GFP_ZERO); |
| if (!sa) { |
| pr_warn("%px: Failed to allocate SA\n", ndev); |
| return -ENOMEM; |
| } |
| |
| sa->stats_pcpu = alloc_percpu_gfp(struct sfe_xfrm_sa_stats, GFP_KERNEL | __GFP_ZERO); |
| if (!sa->stats_pcpu) { |
| pr_err("%px: Failed to allocate stats memory for SA\n", ndev); |
| err = -ENOMEM; |
| goto fail_pcpu; |
| } |
| |
| /* |
| * Initialise the SA object. Find the SA direction. |
| * For Decapsulation SA, the Destination address is local. |
| */ |
| if (xs->props.family == AF_INET) { |
| struct sfe_xfrm_sa_state_enc *enc; |
| struct net_device *local_dev; |
| |
| sa->hdr.src_ip[0] = xs->props.saddr.a4; |
| sa->hdr.dst_ip[0] = xs->id.daddr.a4; |
| |
| /* |
| * Check for NAT-T flow |
| */ |
| is_natt = !!xs->encap; |
| sa->state.flags |= is_natt ? SFE_XFRM_SA_FLAG_UDP : 0; |
| |
| /* |
| * Find the SA direction (encap or decap) |
| */ |
| local_dev = ip_dev_find(&init_net, xs->id.daddr.a4); |
| if (local_dev) { |
| dev_put(local_dev); |
| is_decap = true; |
| |
| goto init; |
| } |
| |
| /* |
| * Destination is remote hence this is an encapsulation SA |
| */ |
| enc = &sa->state.enc; |
| sa->state.flags |= SFE_XFRM_SA_FLAG_ENC; |
| dst_cache = &enc->dst_cache; |
| |
| err = dst_cache_init(dst_cache, GFP_KERNEL); |
| if (err) { |
| pr_err("%px: Failed to initialize dst for SA\n", ndev); |
| goto fail_dst; |
| } |
| |
| enc->mtu_overhead = sizeof(struct iphdr); |
| enc->head_room = SFE_XFRM_DEV_MAX_HEADROOM; |
| enc->tail_room = SFE_XFRM_DEV_MAX_TAILROOM; |
| enc->esp_seq = 1; |
| enc->esp_offset = sizeof(struct iphdr); |
| enc->add_hdr = sfe_xfrm_add_hdr_v4; |
| enc->ip_send = sfe_xfrm_ip4_send; |
| if (is_natt) { |
| enc->mtu_overhead += sizeof(struct udphdr); |
| enc->esp_offset += sizeof(struct udphdr); |
| enc->add_hdr = sfe_xfrm_add_hdr_natt; |
| } |
| } |
| |
| init: |
| /* |
| * Allocate the transform pointer for the |
| * skcipher and shash. |
| */ |
| err = sfe_xfrm_crypto_init(xs, sa, is_decap); |
| if (err) { |
| pr_err("%px: Crypto Initialisation failed for SA\n", sa); |
| goto fail_crypto; |
| } |
| |
| /* |
| * Dereference: sfe_xfrm_sa_del() |
| */ |
| kref_get(&dev->ref); |
| |
| sa->dev = dev; |
| sa->ndev = ndev; |
| sa->ifindex = ndev->ifindex; |
| sa->xs = xs; |
| sa->hdr.spi = xs->id.spi; |
| sa->hdr.sport = is_natt ? xs->encap->encap_sport : 0; |
| sa->hdr.dport = is_natt ? xs->encap->encap_dport : 0; |
| |
| sfe_xfrm_udp_override(sa); |
| |
| /* |
| * Make this SA active. For old SA, We wait for all RCU readers during SA deletion. |
| */ |
| if (!is_decap) { |
| uint32_t new_mtu = ETH_DATA_LEN - sa->state.enc.mtu_overhead; |
| |
| rtnl_lock(); |
| dev_set_mtu(ndev, new_mtu); |
| rtnl_unlock(); |
| |
| spin_lock_bh(&g_xfrm->lock); |
| rcu_assign_pointer(dev->sa, sa); |
| spin_unlock_bh(&g_xfrm->lock); |
| } |
| |
| WRITE_ONCE(xs->data, sa); |
| dev_stats->sa_alloc++; |
| return 0; |
| |
| fail_crypto: |
| if (dst_cache) { |
| dst_cache_destroy(dst_cache); |
| } |
| |
| fail_dst: |
| free_percpu(sa->stats_pcpu); |
| |
| fail_pcpu: |
| kmem_cache_free(g_xfrm->sa_cache, sa); |
| return err; |
| } |
| |
| /* |
| * sfe_xfrm_sa_del() |
| * Delete existing IPsec SA for given xfrm state. |
| */ |
| static void sfe_xfrm_sa_del(struct sfe_xfrm_dev *dev, struct xfrm_state *xs) |
| { |
| struct sfe_xfrm_dev_stats *dev_stats; |
| struct sfe_xfrm *g_xfrm = &g_sfe_xfrm; |
| struct sfe_xfrm_sa *sa, *dev_sa; |
| |
| sa = READ_ONCE(xs->data); |
| BUG_ON(!sa); |
| |
| /* |
| * SA flag should be set |
| */ |
| BUG_ON((xs->xflags & XFRM_STATE_OFFLOAD_NSS) == 0); |
| |
| /* |
| * if the SA being deleted is the Active |
| * encap SA, then set the dev->sa to NULL. |
| * |
| * TODO: Change it to reference counting |
| */ |
| spin_lock_bh(&g_xfrm->lock); |
| dev_sa = rcu_dereference_protected(dev->sa, lockdep_is_held(&g_xfrm->lock)); |
| if (dev_sa == sa) { |
| rcu_assign_pointer(dev->sa, NULL); |
| } |
| |
| spin_unlock_bh(&g_xfrm->lock); |
| synchronize_rcu(); |
| |
| /* |
| * SA free |
| */ |
| dev_stats = this_cpu_ptr(dev->stats_pcpu); |
| dev_stats->sa_free++; |
| |
| sa->xs = NULL; |
| |
| /* |
| * Deallocate the Crypto resources |
| */ |
| crypto_free_sync_skcipher(sa->enc_tfm); |
| crypto_free_shash(sa->auth_tfm); |
| |
| /* |
| * Reference: sfe_xfrm_sa_add |
| */ |
| kref_put(&dev->ref, sfe_xfrm_dev_final); |
| sa->dev = NULL; |
| |
| if (sa->state.flags & SFE_XFRM_SA_FLAG_ENC) { |
| dst_cache_destroy(&sa->state.enc.dst_cache); |
| } |
| |
| free_percpu(sa->stats_pcpu); |
| kmem_cache_free(g_xfrm->sa_cache, sa); |
| } |
| |
| /* |
| * IPsec device stats callback. |
| */ |
| const struct file_operations sfe_xfrm_dev_file_ops = { |
| .open = simple_open, |
| .llseek = default_llseek, |
| .read = sfe_xfrm_dump_all_stats, |
| }; |
| |
| /* |
| * sfe_xfrm_dev_add_ref() |
| * Add new IPsec device for given reqid. |
| */ |
| static struct sfe_xfrm_dev *sfe_xfrm_dev_add_ref(int64_t devid) |
| { |
| struct sfe_xfrm *g_xfrm = &g_sfe_xfrm; |
| struct sfe_xfrm_dev *dev; |
| struct list_head *cur; |
| struct net_device *ndev; |
| int status; |
| |
| /* |
| * Fetch the net_device from the db for the given ID. |
| */ |
| spin_lock_bh(&g_xfrm->lock); |
| list_for_each(cur, &g_xfrm->dev_head) { |
| dev = list_entry(cur, struct sfe_xfrm_dev, node); |
| |
| /* |
| * Ensure that we are not incrementing the reference |
| * if the final is already executing |
| */ |
| if ((dev->xfrm_reqid == devid) && kref_get_unless_zero(&dev->ref)) { |
| break; |
| } |
| } |
| |
| spin_unlock_bh(&g_xfrm->lock); |
| |
| /* |
| * Entry is found |
| */ |
| if (cur != &g_xfrm->dev_head) { |
| return dev; |
| } |
| |
| /* |
| * Netdevice not created for this id. |
| * Allocate new IPsec device for the given XFRM reqid. |
| */ |
| ndev = alloc_netdev(sizeof(*dev), "ipsectun%d", NET_NAME_ENUM, sfe_xfrm_dev_setup); |
| if (!ndev) { |
| pr_err("%px: Failed to allocate IPsec device\n", g_xfrm); |
| return NULL; |
| } |
| |
| /* |
| * Initialize device private structure. |
| */ |
| dev = netdev_priv(ndev); |
| |
| dev->ndev = ndev; |
| dev->xfrm_reqid = devid; |
| rcu_assign_pointer(dev->sa, NULL); |
| |
| dev->stats_pcpu = alloc_percpu_gfp(struct sfe_xfrm_dev_stats, GFP_KERNEL | __GFP_ZERO); |
| if (!dev->stats_pcpu) { |
| pr_err("%px: Failed to allocate stats memory for encap\n", ndev); |
| ndev->priv_destructor(ndev); |
| return NULL; |
| } |
| |
| rtnl_lock(); |
| |
| /* |
| * Register netdevice with kernel. |
| * Note: Linux will invoke the destructor upon failure |
| */ |
| status = register_netdevice(ndev); |
| if (status < 0) { |
| pr_err("%px: Failed to register netdevce, error(%d)\n", ndev, status); |
| rtnl_unlock(); |
| return NULL; |
| } |
| |
| /* |
| * Set netdevice to UP state. |
| */ |
| status = dev_open(ndev, NULL); |
| if (status < 0) { |
| pr_err("%px: Failed to Open netdevce, error(%d)\n", ndev, status); |
| unregister_netdevice(ndev); |
| rtnl_unlock(); |
| return NULL; |
| } |
| |
| rtnl_unlock(); |
| |
| dev->dentry = debugfs_create_file(ndev->name, S_IRUGO, g_xfrm->dentry, dev, &sfe_xfrm_dev_file_ops); |
| if (IS_ERR_OR_NULL(dev->dentry)) { |
| pr_warn("%px: Failed to allocate dentry for %s\n", ndev, ndev->name); |
| } |
| |
| kref_init(&dev->ref); |
| |
| /* |
| * Add the net_device entry into the db. |
| */ |
| spin_lock_bh(&g_xfrm->lock); |
| list_add(&dev->node, &g_xfrm->dev_head); |
| spin_unlock_bh(&g_xfrm->lock); |
| |
| return dev; |
| } |
| |
| /* |
| * sfe_xfrm_dev_final() |
| * Delete existing IPsec device. |
| */ |
| static void sfe_xfrm_dev_final(struct kref *kref) |
| { |
| struct sfe_xfrm_dev *dev = container_of(kref, struct sfe_xfrm_dev, ref); |
| struct sfe_xfrm *g_xfrm = &g_sfe_xfrm; |
| struct net_device *ndev = dev->ndev; |
| |
| /* |
| * Delete the net_device entry from the db. |
| */ |
| BUG_ON(dev->xfrm_reqid < 0); |
| |
| spin_lock_bh(&g_xfrm->lock); |
| list_del_init(&dev->node); |
| spin_unlock_bh(&g_xfrm->lock); |
| |
| /* |
| * Bring down the device and unregister from linux. |
| */ |
| unregister_netdev(ndev); |
| } |
| |
| /* |
| * sfe_xfrm_esp_init_state() |
| * Initialize IPsec xfrm state of type ESP. |
| */ |
| static int sfe_xfrm_esp_init_state(struct xfrm_state *xs) |
| { |
| struct sfe_xfrm_dev *dev; |
| int ret; |
| |
| /* |
| * SA flag shouldn't be set |
| */ |
| BUG_ON(xs->xflags & XFRM_STATE_OFFLOAD_NSS); |
| |
| /* |
| * verify whether the xfrm state can be offloaded or not. |
| */ |
| if (xs->props.mode != XFRM_MODE_TUNNEL) { |
| pr_warn("%px: SFE/IPsec transport mode not supported\n", xs); |
| return -ENOTSUPP; |
| } |
| |
| if (xs->encap && (xs->encap->encap_type != UDP_ENCAP_ESPINUDP)) { |
| pr_warn("%px: SFE/IPsec UDP encap type(%d) not supported\n", xs, xs->encap->encap_type); |
| return -ENOTSUPP; |
| } |
| |
| /* |
| * Before adding a new SA object, run through the db and find out |
| * if there is a net device already created for the given (id). |
| * If not present, then create one. |
| */ |
| dev = sfe_xfrm_dev_add_ref(xs->props.reqid); |
| if (!dev) { |
| pr_err("%px: Unable to fetch/add netdevice for this id %d\n", xs, xs->props.reqid); |
| return -1; |
| } |
| |
| /* |
| * Create and add the SA object. |
| */ |
| ret = sfe_xfrm_sa_add(dev, xs); |
| if (ret < 0) { |
| pr_warn("%px: unable to offload xfrm_state\n", xs); |
| |
| /* |
| * Reference: sfe_xfrm_dev_add_ref() |
| */ |
| kref_put(&dev->ref, sfe_xfrm_dev_final); |
| return ret; |
| } |
| |
| xs->offload_dev = dev->ndev; |
| xs->xflags |= XFRM_STATE_OFFLOAD_NSS; |
| return 0; |
| } |
| |
| /* |
| * sfe_xfrm_esp_deinit_state() |
| * Destroy IPsec xfrm state of type ESP. |
| */ |
| static void sfe_xfrm_esp_deinit_state(struct xfrm_state *xs) |
| { |
| struct net_device *ndev; |
| struct sfe_xfrm_dev *dev; |
| |
| /* |
| * Check if the xfrm state is already offloaded or not. |
| */ |
| if (unlikely(!(xs->xflags & XFRM_STATE_OFFLOAD_NSS))) { |
| pr_warn("%px: xfrm_state is not offloaded\n", xs); |
| return; |
| } |
| |
| /* |
| * Fetch the net_device from the xfrm state. |
| */ |
| ndev = xs->offload_dev; |
| dev = netdev_priv(ndev); |
| |
| sfe_xfrm_sa_del(dev, xs); |
| |
| /* |
| * Reference: sfe_xfrm_dev_add_ref() |
| */ |
| kref_put(&dev->ref, sfe_xfrm_dev_final); |
| return; |
| } |
| |
| /* |
| * sfe_xfrm_esp_get_mtu() |
| * Get mtu for inner packet. |
| */ |
| static uint32_t sfe_xfrm_esp_get_mtu(struct xfrm_state *xs, int mtu) |
| { |
| struct net_device *ndev; |
| |
| /* |
| * WARN_ON if the xfrm_state is not offloaded. |
| */ |
| WARN_ON(!(xs->xflags & XFRM_STATE_OFFLOAD_NSS)); |
| |
| /* |
| * since we are tracking each encap SA using a unique |
| * netdevice, hence net_device mtu is the same as SA mtu. |
| */ |
| ndev = xs->offload_dev; |
| BUG_ON(!ndev); |
| |
| /* |
| * FIXME: return the overhead value |
| */ |
| return ndev->mtu; |
| } |
| |
| /* |
| * Trapping ipv4 packets to be sent for ipsec encapsulation. |
| */ |
| static struct xfrm_state_afinfo xfrm_v4_afinfo = { |
| .family = AF_INET, |
| .proto = IPPROTO_IPIP, |
| .output = sfe_xfrm_v4_output, |
| .output_finish = NULL, |
| .extract_input = NULL, |
| .extract_output = NULL, |
| .transport_finish = NULL, |
| .local_error = NULL, |
| }; |
| |
| /* |
| * ESP proto specific init/de-init handlers for ipv4. |
| */ |
| static const struct xfrm_type xfrm_v4_type = { |
| .description = "SFE ESP4", |
| .owner = THIS_MODULE, |
| .proto = IPPROTO_ESP, |
| .flags = XFRM_TYPE_REPLAY_PROT, |
| .init_state = sfe_xfrm_esp_init_state, |
| .destructor = sfe_xfrm_esp_deinit_state, |
| .get_mtu = sfe_xfrm_esp_get_mtu, |
| .input = NULL, |
| .output = NULL, |
| }; |
| |
| /* |
| * IPv4 ESP handler |
| */ |
| static struct net_protocol esp_protocol = { |
| .handler = sfe_xfrm_dec_esp4, |
| .no_policy = 1, |
| .netns_ok = 1, |
| }; |
| |
| /* |
| * sfe_xfrm_override_afinfo() |
| * Override the native linux afinfo object. |
| */ |
| static void sfe_xfrm_override_afinfo(uint16_t family) |
| { |
| const struct xfrm_type *type_dstopts, *type_routing; |
| const struct xfrm_type *type_ipip, *type_ipv6; |
| const struct xfrm_type *type_ah, *type_comp; |
| struct xfrm_state_afinfo *afinfo = NULL; |
| const struct xfrm_type *base; |
| |
| /* |
| * Override ESP type. |
| */ |
| if (family == AF_INET) { |
| base = &xfrm_v4_type; |
| afinfo = xfrm_state_update_afinfo(AF_INET, &xfrm_v4_afinfo); |
| } |
| |
| /* |
| * TODO: Add ipv6 support |
| */ |
| BUG_ON(family == AF_INET6); |
| |
| xfrm_register_type(base, family); |
| |
| type_ah = afinfo->type_ah; |
| type_comp = afinfo->type_comp; |
| type_ipip = afinfo->type_ipip; |
| type_ipv6 = afinfo->type_ipip6; |
| type_dstopts = afinfo->type_dstopts; |
| type_routing = afinfo->type_routing; |
| |
| /* |
| * Register types |
| * |
| * Propagating the registered xfrm_type from |
| * old afinfo object into new object. |
| */ |
| if (type_ah) { |
| xfrm_register_type(type_ah, family); |
| } |
| |
| if (type_comp) { |
| xfrm_register_type(type_comp, family); |
| } |
| |
| if (type_ipip) { |
| xfrm_register_type(type_ipip, family); |
| } |
| |
| if (type_ipv6) { |
| xfrm_register_type(type_ipv6, family); |
| } |
| |
| if (type_dstopts) { |
| xfrm_register_type(type_dstopts, family); |
| } |
| |
| if (type_routing) { |
| xfrm_register_type(type_routing, family); |
| } |
| } |
| |
| /* |
| * sfe_xfrm_ctrl_init() |
| * initialization function |
| */ |
| void sfe_xfrm_ctrl_init(void) |
| { |
| int err; |
| |
| err = inet_update_protocol(&esp_protocol, IPPROTO_ESP, &linux_esp_handler); |
| BUG_ON(err < 0); |
| |
| /* |
| * overide the xfrm_state afinfo. |
| */ |
| sfe_xfrm_override_afinfo(AF_INET); |
| } |