Project import generated by Copybara.

GitOrigin-RevId: 7c20c0fc9d25f1105432955e3f8e88efad9b63f4
diff --git a/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp.h b/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp.h
index 54c9ca3..a8bc27c 100644
--- a/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp.h
+++ b/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp.h
@@ -43,6 +43,9 @@
 
 extern int tx_requeue_stop;
 extern int tx_desc_threshold_size;
+extern int tx_completion_load_balance;
+extern int tx_completion_load_balance_threshold;
+extern int tx_completion_load_balance_cpu;
 
 /*
  * syn_dp_info
diff --git a/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp_cfg_tx.c b/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp_cfg_tx.c
index bf5e19a..83238be 100644
--- a/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp_cfg_tx.c
+++ b/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp_cfg_tx.c
@@ -48,6 +48,11 @@
 	tx_info->tx_idx = 0;
 	tx_info->busy_tx_desc_cnt = 0;
 
+	tx_info->tx_free_batch_size = SYN_DP_NAPI_BUDGET_TX;
+	tx_info->csd.func = syn_dp_free_tx_done_skb;
+	tx_info->csd.info = &tx_info->tx_free_batch_size;
+	tx_info->csd.flags = 0;
+
 	return NSS_DP_SUCCESS;
 }
 
@@ -82,7 +87,9 @@
 	int i;
 	struct sk_buff *skb;
 	uint32_t busy_tx_desc_cnt = atomic_read((atomic_t *)&tx_info->busy_tx_desc_cnt);
+	int budget = SYN_DP_TX_DESC_SIZE;
 
+	syn_dp_free_tx_done_skb(&budget);
 	/*
 	 * Tx Ring cleaning
 	 */
diff --git a/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp_tx.c b/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp_tx.c
index 70c395f..29e6a4c 100644
--- a/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp_tx.c
+++ b/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp_tx.c
@@ -409,6 +409,11 @@
 	tx_info->tx_idx = syn_dp_tx_inc_index(tx_idx, 1);
 }
 
+void syn_dp_schedule_free_tx_skb(call_single_data_t *csd)
+{
+	smp_call_function_single_async(tx_completion_load_balance_cpu, csd);
+}
+
 /*
  * syn_dp_tx_complete()
  *	Xmit complete, clear descriptor and free the skb
@@ -502,13 +507,34 @@
 	 * in performance.
 	 * All the completed skb's shinfo are prefetched and skb's are freed in batch.
 	 */
-	for (free_idx = 0; free_idx < count; free_idx++) {
-		if (likely((free_idx + 1) < count)) {
-			prefetch((void *)tx_info->shinfo_addr_virt[free_idx+1]);
+
+	if (unlikely(tx_completion_load_balance &&
+			count > tx_completion_load_balance_threshold)) {
+		unsigned long flags;
+		struct list_head skb_list;
+		struct sk_buff *skb;
+
+		INIT_LIST_HEAD(&skb_list);
+		for (free_idx = 0; free_idx < count; free_idx++) {
+			skb = tx_info->skb_free_list[free_idx];
+			tx_info->skb_free_list[free_idx] = NULL;
+			tx_info->shinfo_addr_virt[free_idx] = 0;
+			list_add_tail(&skb->list, &skb_list);
 		}
-		dev_kfree_skb_any(tx_info->skb_free_list[free_idx]);
-		tx_info->skb_free_list[free_idx] = NULL;
-		tx_info->shinfo_addr_virt[free_idx] = 0;
+		atomic_add(count, &syn_dp_tx_done_skb_count);
+		spin_lock_irqsave(&syn_dp_tx_done_skb_list_lock, flags);
+		list_splice_tail_init(&skb_list, &syn_dp_tx_done_skb_list);
+		spin_unlock_irqrestore(&syn_dp_tx_done_skb_list_lock, flags);
+		syn_dp_schedule_free_tx_skb(&tx_info->csd);
+	} else {
+		for (free_idx = 0; free_idx < count; free_idx++) {
+			if (likely((free_idx + 1) < count)) {
+				prefetch((void *)tx_info->shinfo_addr_virt[free_idx+1]);
+			}
+			dev_kfree_skb_any(tx_info->skb_free_list[free_idx]);
+			tx_info->skb_free_list[free_idx] = NULL;
+			tx_info->shinfo_addr_virt[free_idx] = 0;
+		}
 	}
 
 	atomic64_add(tx_packets, (atomic64_t *)&tx_info->tx_stats.tx_packets);
@@ -598,3 +624,42 @@
 	atomic_inc((atomic_t *)&tx_info->busy_tx_desc_cnt);
 	return 0;
 }
+
+void syn_dp_free_tx_done_skb(void *data)
+{
+	int max = *(int *)data;
+	int count = 0;
+	unsigned long flags;
+	struct sk_buff *skb, *next, *skb_prefetch = NULL;
+	struct list_head work_list;
+
+	INIT_LIST_HEAD(&work_list);
+	spin_lock_irqsave(&syn_dp_tx_done_skb_list_lock, flags);
+	list_splice_init(&syn_dp_tx_done_skb_list, &work_list);
+	spin_unlock_irqrestore(&syn_dp_tx_done_skb_list_lock, flags);
+
+	if (list_empty(&work_list))
+		return;
+
+	skb_prefetch = list_first_entry(&work_list, struct sk_buff, list);
+	if (skb_prefetch) prefetchw(skb_prefetch);
+
+	list_for_each_entry_safe(skb, next, &work_list, list) {
+		prefetch(skb_shinfo(skb));
+		skb_prefetch = list_next_entry(skb, list);
+		if (skb_prefetch) prefetchw(skb_prefetch);
+		count++;
+		skb_list_del_init(skb);
+		dev_kfree_skb_any(skb);
+		if (count >= max)
+			break;
+	}
+	atomic_sub(count, &syn_dp_tx_done_skb_count);
+	if (list_empty(&work_list))
+		return;
+
+	spin_lock_irqsave(&syn_dp_tx_done_skb_list_lock, flags);
+	list_splice_init(&work_list, &syn_dp_tx_done_skb_list);
+	spin_unlock_irqrestore(&syn_dp_tx_done_skb_list_lock, flags);
+	return;
+}
diff --git a/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp_tx.h b/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp_tx.h
index 8ffbaa8..3f74b7f 100644
--- a/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp_tx.h
+++ b/qca-nss-dp/hal/dp_ops/syn_gmac_dp/syn_dp_tx.h
@@ -23,6 +23,8 @@
 #define SYN_DP_TX_DESC_MAX_INDEX	(SYN_DP_TX_DESC_SIZE - 1)
 #define SYN_DP_TX_INVALID_DESC_INDEX	SYN_DP_TX_DESC_SIZE
 #define NSS_DP_TX_MAX_DESC_SIZE		SYN_DP_TX_DESC_SIZE
+
+
 /*
  * syn_dp_tx_buf
  */
@@ -51,6 +53,8 @@
 					/* GMAC driver Tx statistics */
 	struct net_device *netdev;	/* Net-device corresponding to the GMAC */
 	struct device *dev;		/* Platform device corresponding to the GMAC */
+	call_single_data_t csd;
+	int tx_free_batch_size;
 	struct sk_buff *skb_free_list[SYN_DP_NAPI_BUDGET_TX];
 					/* Array to hold SKBs before free during Tx completion */
 	size_t shinfo_addr_virt[SYN_DP_NAPI_BUDGET_TX];
@@ -85,4 +89,8 @@
 	return tx_info->tx_comp_idx;
 }
 
+extern void syn_dp_free_tx_done_skb(void *data);
+extern spinlock_t syn_dp_tx_done_skb_list_lock;
+extern struct list_head syn_dp_tx_done_skb_list;
+extern atomic_t syn_dp_tx_done_skb_count;
 #endif /*  __NSS_DP_SYN_DP_TX__ */
diff --git a/qca-nss-dp/nss_dp_main.c b/qca-nss-dp/nss_dp_main.c
index 5ae217f..f9607de 100644
--- a/qca-nss-dp/nss_dp_main.c
+++ b/qca-nss-dp/nss_dp_main.c
@@ -107,6 +107,10 @@
 MODULE_PARM_DESC(nss_dp_rx_mitigation_pkt_cnt, "Rx mitigation packet count value");
 #endif
 
+spinlock_t syn_dp_tx_done_skb_list_lock;
+struct list_head syn_dp_tx_done_skb_list;
+atomic_t syn_dp_tx_done_skb_count = ATOMIC_INIT(0);
+
 /*
  * Sysctl table
  */
@@ -114,6 +118,9 @@
 
 int tx_requeue_stop;
 int tx_desc_threshold_size = 0;
+int tx_completion_load_balance = false;
+int tx_completion_load_balance_threshold = 6;
+int tx_completion_load_balance_cpu = 1;
 
 /*
  * nss_dp_tx_requeue_stop()
@@ -181,6 +188,108 @@
 	return ret;
 }
 
+/*
+ * nss_dp_tx_completion_load_balance()
+ * 	Tx completion load balance sysctl handler
+ */
+static int nss_dp_tx_completion_load_balance(struct ctl_table *ctl, int write,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	int current_value = tx_completion_load_balance;
+
+	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+
+	if (!write) {
+		return ret;
+	}
+
+	if (ret) {
+		pr_err("Errno: -%d.\n", ret);
+		return ret;
+	}
+
+	/*
+	 * Check if tx_completion_load_balance is holding a valid value
+	 */
+	if ((tx_completion_load_balance != 1) &&
+			(tx_completion_load_balance != 0)) {
+		pr_err(" Invalid input. Valid values are 0/1\n");
+		tx_completion_load_balance = current_value;
+		return -EINVAL;
+	}
+	return ret;
+}
+
+/*
+ * nss_dp_tx_completion_load_balance_threshold()
+ * 	tx_completion_load_balance_threshold sysctl handler
+ */
+static int nss_dp_tx_completion_load_balance_threshold(struct ctl_table *ctl,
+		int write, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	int current_value = tx_completion_load_balance_threshold;
+
+	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+
+	if (!write) {
+		return ret;
+	}
+
+	if (ret) {
+		pr_err("Errno: -%d.\n", ret);
+		return ret;
+	}
+
+	/*
+	 * Check if tx_completion_load_balance is holding a valid value
+	 */
+	if (tx_completion_load_balance_threshold < 0) {
+		pr_err(" Invalid input\n");
+		tx_completion_load_balance_threshold = current_value;
+		return -EINVAL;
+	}
+	return ret;
+}
+
+/*
+ * nss_dp_tx_completion_load_balance_cput()
+ * 	tx_completion_load_balance_cpu sysctl handler
+ */
+static int nss_dp_tx_completion_load_balance_cpu(struct ctl_table *ctl,
+		int write, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	int current_value = tx_completion_load_balance_cpu;
+
+	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+
+	if (!write) {
+		return ret;
+	}
+
+	if (ret) {
+		pr_err("Errno: -%d.\n", ret);
+		return ret;
+	}
+
+	/*
+	 * Check if tx_completion_load_balance is holding a valid value
+	 */
+	if (tx_completion_load_balance_cpu < 0  ||
+			tx_completion_load_balance_cpu > 1) {
+		pr_err(" Invalid input\n");
+		tx_completion_load_balance_cpu = current_value;
+		return -EINVAL;
+	}
+	return ret;
+}
+
+
 /* nss_dp_table
  *	Sysctl entries which are part of nss dp
  */
@@ -199,6 +308,27 @@
 		.mode                   = 0666,
 		.proc_handler           = &nss_dp_tx_desc_threshold_set,
 	},
+	{
+		.procname               = "tx_completion_load_balance",
+		.data                   = &tx_completion_load_balance,
+		.maxlen                 = sizeof(int),
+		.mode                   = 0666,
+		.proc_handler           = &nss_dp_tx_completion_load_balance,
+	},
+	{
+		.procname               = "tx_completion_load_balance_threshold",
+		.data                   = &tx_completion_load_balance_threshold,
+		.maxlen                 = sizeof(int),
+		.mode                   = 0666,
+		.proc_handler           = &nss_dp_tx_completion_load_balance_threshold,
+	},
+	{
+		.procname               = "tx_completion_load_balance_cpu",
+		.data                   = &tx_completion_load_balance_cpu,
+		.maxlen                 = sizeof(int),
+		.mode                   = 0666,
+		.proc_handler           = &nss_dp_tx_completion_load_balance_cpu,
+	},
 	{ }
 };
 
@@ -992,6 +1122,9 @@
 	}
 #endif
 
+	spin_lock_init(&syn_dp_tx_done_skb_list_lock);
+	INIT_LIST_HEAD(&syn_dp_tx_done_skb_list);
+
 	/* TODO: Features: CSUM, tx/rx offload... configure */
 
 	/* Register the network interface */
diff --git a/qca-nss-ecm/ecm_classifier_mark.c b/qca-nss-ecm/ecm_classifier_mark.c
index d054d8e..cd112fa 100644
--- a/qca-nss-ecm/ecm_classifier_mark.c
+++ b/qca-nss-ecm/ecm_classifier_mark.c
@@ -523,8 +523,8 @@
 	int dst_port;
 	ip_addr_t src_ip;
 	ip_addr_t dst_ip;
-	struct in6_addr src_ip6;
-	struct in6_addr dest_ip6;
+	struct in6_addr src_ip6 = IN6ADDR_ANY_INIT;
+	struct in6_addr dest_ip6 = IN6ADDR_ANY_INIT;
 	struct ecm_db_connection_instance *ci;
 	ecm_classifier_mark_sync_to_ipv6_callback_t cb = NULL;
 	ecm_classifier_mark_type_t type = ECM_CLASSIFIER_MARK_TYPE_L2_ENCAP;
diff --git a/qca-nss-ecm/ecm_classifier_pcc.c b/qca-nss-ecm/ecm_classifier_pcc.c
index 6c6fcd4..71f5c2a 100644
--- a/qca-nss-ecm/ecm_classifier_pcc.c
+++ b/qca-nss-ecm/ecm_classifier_pcc.c
@@ -828,8 +828,8 @@
 	}
 #ifdef ECM_IPV6_ENABLE
 	if (ip_version == 6) {
-		struct in6_addr src_ip6;
-		struct in6_addr dest_ip6;
+		struct in6_addr src_ip6 = IN6ADDR_ANY_INIT;
+		struct in6_addr dest_ip6 = IN6ADDR_ANY_INIT;
 		ECM_IP_ADDR_TO_NIN6_ADDR(src_ip6, src_ip);
 		ECM_IP_ADDR_TO_NIN6_ADDR(dest_ip6, dst_ip);
 
diff --git a/qca-nss-ecm/ecm_db/ecm_db.c b/qca-nss-ecm/ecm_db/ecm_db.c
index 722d6d5..3b899e9 100644
--- a/qca-nss-ecm/ecm_db/ecm_db.c
+++ b/qca-nss-ecm/ecm_db/ecm_db.c
@@ -275,7 +275,7 @@
 	while (ci) {
 		struct ecm_db_connection_instance *cin;
 		struct in6_addr prefix_addr;
-		struct in6_addr ecm_in6;
+		struct in6_addr ecm_in6 = IN6ADDR_ANY_INIT;
 		ip_addr_t ecm_addr;
 		struct ecm_db_iface_instance *interfaces[ECM_DB_IFACE_HEIRARCHY_MAX];
 		int32_t if_first;
diff --git a/qca-nss-ecm/ecm_db/ecm_db_connection.c b/qca-nss-ecm/ecm_db/ecm_db_connection.c
index fd68f23..717aa3a 100644
--- a/qca-nss-ecm/ecm_db/ecm_db_connection.c
+++ b/qca-nss-ecm/ecm_db/ecm_db_connection.c
@@ -1239,7 +1239,7 @@
 	/*
 	 * Remove from database if inserted
 	 */
-	if (!ci->flags & ECM_DB_CONNECTION_FLAGS_INSERTED) {
+	if (!(ci->flags & ECM_DB_CONNECTION_FLAGS_INSERTED)) {
 		spin_unlock_bh(&ecm_db_lock);
 	} else {
 		struct ecm_db_listener_instance *li;
diff --git a/qca-nss-ecm/ecm_db/ecm_db_host.c b/qca-nss-ecm/ecm_db/ecm_db_host.c
index 8820576..1bc3a94 100644
--- a/qca-nss-ecm/ecm_db/ecm_db_host.c
+++ b/qca-nss-ecm/ecm_db/ecm_db_host.c
@@ -249,7 +249,7 @@
 	/*
 	 * Remove from database if inserted
 	 */
-	if (!hi->flags & ECM_DB_HOST_FLAGS_INSERTED) {
+	if (!(hi->flags & ECM_DB_HOST_FLAGS_INSERTED)) {
 		spin_unlock_bh(&ecm_db_lock);
 	} else {
 		struct ecm_db_listener_instance *li;
diff --git a/qca-nss-ecm/ecm_db/ecm_db_iface.c b/qca-nss-ecm/ecm_db/ecm_db_iface.c
index 53c254c..108540c 100644
--- a/qca-nss-ecm/ecm_db/ecm_db_iface.c
+++ b/qca-nss-ecm/ecm_db/ecm_db_iface.c
@@ -1032,7 +1032,7 @@
 	/*
 	 * Remove from database if inserted
 	 */
-	if (!ii->flags & ECM_DB_IFACE_FLAGS_INSERTED) {
+	if (!(ii->flags & ECM_DB_IFACE_FLAGS_INSERTED)) {
 		spin_unlock_bh(&ecm_db_lock);
 	} else {
 		struct ecm_db_listener_instance *li;
@@ -1510,7 +1510,7 @@
 	/*
 	 * Remove from database if inserted
 	 */
-	if (!ii->flags & ECM_DB_IFACE_FLAGS_INSERTED) {
+	if (!(ii->flags & ECM_DB_IFACE_FLAGS_INSERTED)) {
 		return;
 	}
 
diff --git a/qca-nss-ecm/ecm_db/ecm_db_mapping.c b/qca-nss-ecm/ecm_db/ecm_db_mapping.c
index 1d06ef4..329c7cf 100644
--- a/qca-nss-ecm/ecm_db/ecm_db_mapping.c
+++ b/qca-nss-ecm/ecm_db/ecm_db_mapping.c
@@ -390,7 +390,7 @@
 	/*
 	 * Remove from database if inserted
 	 */
-	if (!mi->flags & ECM_DB_MAPPING_FLAGS_INSERTED) {
+	if (!(mi->flags & ECM_DB_MAPPING_FLAGS_INSERTED)) {
 		spin_unlock_bh(&ecm_db_lock);
 	} else {
 		struct ecm_db_listener_instance *li;
diff --git a/qca-nss-ecm/ecm_db/ecm_db_node.c b/qca-nss-ecm/ecm_db/ecm_db_node.c
index 18d6aba..8221e77 100644
--- a/qca-nss-ecm/ecm_db/ecm_db_node.c
+++ b/qca-nss-ecm/ecm_db/ecm_db_node.c
@@ -254,7 +254,7 @@
 	/*
 	 * Remove from database if inserted
 	 */
-	if (!ni->flags & ECM_DB_NODE_FLAGS_INSERTED) {
+	if (!(ni->flags & ECM_DB_NODE_FLAGS_INSERTED)) {
 		spin_unlock_bh(&ecm_db_lock);
 	} else {
 		struct ecm_db_listener_instance *li;
diff --git a/qca-nss-ecm/ecm_interface.c b/qca-nss-ecm/ecm_interface.c
index 52bd11b..7058f15 100644
--- a/qca-nss-ecm/ecm_interface.c
+++ b/qca-nss-ecm/ecm_interface.c
@@ -330,7 +330,7 @@
  */
 static struct net_device *ecm_interface_dev_find_by_local_addr_ipv6(ip_addr_t addr)
 {
-	struct in6_addr addr6;
+	struct in6_addr addr6 = IN6ADDR_ANY_INIT;
 	struct net_device *dev;
 
 	ECM_IP_ADDR_TO_NIN6_ADDR(addr6, addr);
@@ -421,7 +421,7 @@
  */
 static bool ecm_interface_mac_addr_get_ipv6(ip_addr_t addr, uint8_t *mac_addr, bool *on_link, ip_addr_t gw_addr)
 {
-	struct in6_addr daddr;
+	struct in6_addr daddr = IN6ADDR_ANY_INIT;
 	struct ecm_interface_route ecm_rt;
 	struct neighbour *neigh;
 	struct rt6_info *rt;
@@ -726,7 +726,7 @@
  */
 static bool ecm_interface_mac_addr_get_ipv6_no_route(struct net_device *dev, ip_addr_t addr, uint8_t *mac_addr)
 {
-	struct in6_addr daddr;
+	struct in6_addr daddr = IN6ADDR_ANY_INIT;
 	struct neighbour *neigh;
 	struct net_device *local_dev;
 
@@ -1129,7 +1129,7 @@
  */
 static bool ecm_interface_find_route_by_addr_ipv6(ip_addr_t addr, struct ecm_interface_route *ecm_rt)
 {
-	struct in6_addr naddr;
+	struct in6_addr naddr = IN6ADDR_ANY_INIT;
 
 	ECM_IP_ADDR_TO_NIN6_ADDR(naddr, addr);
 
@@ -1193,7 +1193,8 @@
  */
 void ecm_interface_send_neighbour_solicitation(struct net_device *dev, ip_addr_t addr)
 {
-	struct in6_addr dst_addr, src_addr;
+	struct in6_addr dst_addr = IN6ADDR_ANY_INIT;
+	struct in6_addr src_addr;
 	struct in6_addr mc_dst_addr;
 	struct rt6_info *rt6i;
 	struct neighbour *neigh;
@@ -1321,7 +1322,7 @@
 	struct neighbour *neigh;
 	struct rt6_info *rt;
 	struct dst_entry *dst;
-	struct in6_addr ipv6_addr;
+	struct in6_addr ipv6_addr = IN6ADDR_ANY_INIT;
 
 	ECM_IP_ADDR_TO_NIN6_ADDR(ipv6_addr, addr);
 
@@ -4156,7 +4157,10 @@
 #endif
 		   ) {
 			br_dev_src = ecm_interface_get_and_hold_dev_master(in_dev);
-			DEBUG_ASSERT(br_dev_src, "Expected a master\n");
+			if (!br_dev_src) {
+				DEBUG_WARN("Expected a master\n");
+				return 0;
+			}
 
 			/*
 			 * Source netdev is part of a bridge. First make sure that this bridge
diff --git a/qca-nss-ecm/frontends/cmn/ecm_ipv4.c b/qca-nss-ecm/frontends/cmn/ecm_ipv4.c
index e5ceae4..f542566 100644
--- a/qca-nss-ecm/frontends/cmn/ecm_ipv4.c
+++ b/qca-nss-ecm/frontends/cmn/ecm_ipv4.c
@@ -1,7 +1,7 @@
 /*
  **************************************************************************
  * Copyright (c) 2014-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for
  * any purpose with or without fee is hereby granted, provided that the
@@ -475,7 +475,11 @@
 				} else {
 					struct net_device *master;
 					master = ecm_interface_get_and_hold_dev_master(dev);
-					DEBUG_ASSERT(master, "%px: Expected a master\n", feci);
+					if (!master) {
+						DEBUG_WARN("%px: Expected a master\n", feci);
+						return NULL;
+					}
+
 					ecm_interface_send_arp_request(master, addr, on_link, gw_addr);
 					dev_put(master);
 				}
@@ -1508,50 +1512,6 @@
 	DEBUG_TRACE("%px: IPv4 CMN Routing: %s\n", out, out->name);
 
 	/*
-	 * If operations have stopped then do not process packets
-	 */
-	spin_lock_bh(&ecm_ipv4_lock);
-	if (unlikely(ecm_front_end_ipv4_stopped)) {
-		spin_unlock_bh(&ecm_ipv4_lock);
-		DEBUG_TRACE("Front end stopped\n");
-		return NF_ACCEPT;
-	}
-	spin_unlock_bh(&ecm_ipv4_lock);
-
-	/*
-	 * Don't process broadcast or multicast
-	 */
-	if (skb->pkt_type == PACKET_BROADCAST) {
-		DEBUG_TRACE("Broadcast, ignoring: %px\n", skb);
-		return NF_ACCEPT;
-	}
-
-#ifndef ECM_INTERFACE_PPTP_ENABLE
-	/*
-	 * skip pptp because we don't accelerate them
-	 */
-	if (ecm_interface_is_pptp(skb, out)) {
-		return NF_ACCEPT;
-	}
-#endif
-
-#ifndef ECM_INTERFACE_L2TPV2_ENABLE
-	/*
-	 * skip l2tpv2 because we don't accelerate them
-	 */
-	if (ecm_interface_is_l2tp_packet_by_version(skb, out, 2)) {
-		return NF_ACCEPT;
-	}
-#endif
-
-	/*
-	 * skip l2tpv3 because we don't accelerate them
-	 */
-	if (ecm_interface_is_l2tp_packet_by_version(skb, out, 3)) {
-		return NF_ACCEPT;
-	}
-
-	/*
 	 * Identify interface from where this packet came
 	 */
 	in = dev_get_by_index(&init_net, skb->skb_iif);
@@ -1562,13 +1522,56 @@
 		return NF_ACCEPT;
 	}
 
+	/*
+	 * If operations have stopped then do not process packets
+	 */
+	spin_lock_bh(&ecm_ipv4_lock);
+	if (unlikely(ecm_front_end_ipv4_stopped)) {
+		spin_unlock_bh(&ecm_ipv4_lock);
+		DEBUG_TRACE("Front end stopped\n");
+		goto skip_ipv4_route_flow;
+	}
+	spin_unlock_bh(&ecm_ipv4_lock);
+
+	/*
+	 * Don't process broadcast or multicast
+	 */
+	if (skb->pkt_type == PACKET_BROADCAST) {
+		DEBUG_TRACE("Broadcast, ignoring: %px\n", skb);
+		goto skip_ipv4_route_flow;
+	}
+
+#ifndef ECM_INTERFACE_PPTP_ENABLE
+	/*
+	 * skip pptp because we don't accelerate them
+	 */
+	if (ecm_interface_is_pptp(skb, out)) {
+		goto skip_ipv4_route_flow;
+	}
+#endif
+
+#ifndef ECM_INTERFACE_L2TPV2_ENABLE
+	/*
+	 * skip l2tpv2 because we don't accelerate them
+	 */
+	if (ecm_interface_is_l2tp_packet_by_version(skb, out, 2)) {
+		goto skip_ipv4_route_flow;
+	}
+#endif
+
+	/*
+	 * skip l2tpv3 because we don't accelerate them
+	 */
+	if (ecm_interface_is_l2tp_packet_by_version(skb, out, 3)) {
+		goto skip_ipv4_route_flow;
+	}
+
 #ifndef ECM_INTERFACE_OVS_BRIDGE_ENABLE
 	/*
 	 * skip OpenVSwitch flows because we don't accelerate them
 	 */
 	if (netif_is_ovs_master(out) || netif_is_ovs_master(in)) {
-		dev_put(in);
-		return NF_ACCEPT;
+		goto skip_ipv4_route_flow;
 	}
 #endif
 
@@ -1577,6 +1580,10 @@
 							can_accel, true, false, skb, 0);
 	dev_put(in);
 	return result;
+
+skip_ipv4_route_flow:
+	dev_put(in);
+	return NF_ACCEPT;
 }
 
 /*
@@ -1650,6 +1657,36 @@
 	DEBUG_TRACE("%px: IPv4 CMN Bridge: %s\n", out, out->name);
 
 	/*
+	 * Identify interface from where this packet came.
+	 * There are three scenarios to consider here:
+	 * 1. Packet came from a local source.
+	 *	Ignore - local is not handled.
+	 * 2. Packet came from a routed path.
+	 *	Ignore - it was handled in INET post routing.
+	 * 3. Packet is bridged from another port.
+	 *	Process.
+	 *
+	 * Begin by identifying case 1.
+	 * NOTE: We are given 'out' (which we implicitly know is a bridge port) so out interface's master is the 'bridge'.
+	 */
+	in = dev_get_by_index(&init_net, skb->skb_iif);
+	if  (!in) {
+		/*
+		 * Case 1.
+		 */
+		bridge = ecm_interface_get_and_hold_dev_master((struct net_device *)out);
+
+		if (!bridge) {
+			DEBUG_WARN("Expected bridge\n");
+			return NF_ACCEPT;
+		}
+		DEBUG_TRACE("Local traffic: %px, ignoring traffic to bridge: %px (%s) \n", skb, bridge, bridge->name);
+		dev_put(bridge);
+		return NF_ACCEPT;
+	}
+	dev_put(in);
+
+	/*
 	 * If operations have stopped then do not process packets
 	 */
 	spin_lock_bh(&ecm_ipv4_lock);
@@ -1689,31 +1726,11 @@
 		return NF_ACCEPT;
 	}
 
-	/*
-	 * Identify interface from where this packet came.
-	 * There are three scenarios to consider here:
-	 * 1. Packet came from a local source.
-	 *	Ignore - local is not handled.
-	 * 2. Packet came from a routed path.
-	 *	Ignore - it was handled in INET post routing.
-	 * 3. Packet is bridged from another port.
-	 *	Process.
-	 *
-	 * Begin by identifying case 1.
-	 * NOTE: We are given 'out' (which we implicitly know is a bridge port) so out interface's master is the 'bridge'.
-	 */
 	bridge = ecm_interface_get_and_hold_dev_master((struct net_device *)out);
-	DEBUG_ASSERT(bridge, "Expected bridge\n");
-	in = dev_get_by_index(&init_net, skb->skb_iif);
-	if  (!in) {
-		/*
-		 * Case 1.
-		 */
-		DEBUG_TRACE("Local traffic: %px, ignoring traffic to bridge: %px (%s) \n", skb, bridge, bridge->name);
-		dev_put(bridge);
+	if (!bridge) {
+		DEBUG_WARN("Expected bridge\n");
 		return NF_ACCEPT;
 	}
-	dev_put(in);
 
 	/*
 	 * Case 2:
diff --git a/qca-nss-ecm/frontends/cmn/ecm_ipv6.c b/qca-nss-ecm/frontends/cmn/ecm_ipv6.c
index fc37eb8..dac809c 100644
--- a/qca-nss-ecm/frontends/cmn/ecm_ipv6.c
+++ b/qca-nss-ecm/frontends/cmn/ecm_ipv6.c
@@ -1,7 +1,7 @@
 /*
  **************************************************************************
  * Copyright (c) 2014-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Permission to use, copy, modify, and/or distribute this software for
  * any purpose with or without fee is hereby granted, provided that the
  * above copyright notice and this permission notice appear in all copies.
@@ -519,7 +519,10 @@
 				if (ecm_front_end_is_bridge_port(dev)) {
 					struct net_device *master;
 					master = ecm_interface_get_and_hold_dev_master(dev);
-					DEBUG_ASSERT(master, "%px: Expected a master\n", feci);
+					if (!master) {
+						DEBUG_WARN("%px: Expected a master for bridge port %s\n", feci, dev->name);
+						return NULL;
+					}
 					ecm_interface_send_neighbour_solicitation(master, gw_addr);
 					dev_put(master);
 				} else {
@@ -1264,50 +1267,6 @@
 	DEBUG_TRACE("%px: Routing: %s\n", out, out->name);
 
 	/*
-	 * If operations have stopped then do not process packets
-	 */
-	spin_lock_bh(&ecm_ipv6_lock);
-	if (unlikely(ecm_front_end_ipv6_stopped)) {
-		spin_unlock_bh(&ecm_ipv6_lock);
-		DEBUG_TRACE("Front end stopped\n");
-		return NF_ACCEPT;
-	}
-	spin_unlock_bh(&ecm_ipv6_lock);
-
-	/*
-	 * Don't process broadcast or multicast
-	 */
-	if (skb->pkt_type == PACKET_BROADCAST) {
-		DEBUG_TRACE("Broadcast, ignoring: %px\n", skb);
-		return NF_ACCEPT;
-	}
-
-#ifndef ECM_INTERFACE_PPTP_ENABLE
-	/*
-	 * skip pptp because we don't accelerate them
-	 */
-	if (ecm_interface_is_pptp(skb, out)) {
-		return NF_ACCEPT;
-	}
-#endif
-
-#ifndef ECM_INTERFACE_L2TPV2_ENABLE
-	/*
-	 * skip l2tpv2 because we don't accelerate them
-	 */
-	if (ecm_interface_is_l2tp_packet_by_version(skb, out, 2)) {
-		return NF_ACCEPT;
-	}
-#endif
-
-	/*
-	 * skip l2tpv3 because we don't accelerate them
-	 */
-	if (ecm_interface_is_l2tp_packet_by_version(skb, out, 3)) {
-		return NF_ACCEPT;
-	}
-
-	/*
 	 * Identify interface from where this packet came
 	 */
 	in = dev_get_by_index(&init_net, skb->skb_iif);
@@ -1318,13 +1277,56 @@
 		return NF_ACCEPT;
 	}
 
+	/*
+	 * If operations have stopped then do not process packets
+	 */
+	spin_lock_bh(&ecm_ipv6_lock);
+	if (unlikely(ecm_front_end_ipv6_stopped)) {
+		spin_unlock_bh(&ecm_ipv6_lock);
+		DEBUG_TRACE("Front end stopped\n");
+		goto skip_ipv6_route_flow;
+	}
+	spin_unlock_bh(&ecm_ipv6_lock);
+
+	/*
+	 * Don't process broadcast or multicast
+	 */
+	if (skb->pkt_type == PACKET_BROADCAST) {
+		DEBUG_TRACE("Broadcast, ignoring: %px\n", skb);
+		goto skip_ipv6_route_flow;
+	}
+
+#ifndef ECM_INTERFACE_PPTP_ENABLE
+	/*
+	 * skip pptp because we don't accelerate them
+	 */
+	if (ecm_interface_is_pptp(skb, out)) {
+		goto skip_ipv6_route_flow;
+	}
+#endif
+
+#ifndef ECM_INTERFACE_L2TPV2_ENABLE
+	/*
+	 * skip l2tpv2 because we don't accelerate them
+	 */
+	if (ecm_interface_is_l2tp_packet_by_version(skb, out, 2)) {
+		goto skip_ipv6_route_flow;
+	}
+#endif
+
+	/*
+	 * skip l2tpv3 because we don't accelerate them
+	 */
+	if (ecm_interface_is_l2tp_packet_by_version(skb, out, 3)) {
+		goto skip_ipv6_route_flow;
+	}
+
 #ifndef ECM_INTERFACE_OVS_BRIDGE_ENABLE
 	/*
 	 * skip OpenVSwitch flows because we don't accelerate them
 	 */
 	if (netif_is_ovs_master(out) || netif_is_ovs_master(in)) {
-		dev_put(in);
-		return NF_ACCEPT;
+		goto skip_ipv6_route_flow;
 	}
 #endif
 
@@ -1332,6 +1334,10 @@
 	result = ecm_ipv6_ip_process((struct net_device *)out, in, NULL, NULL, can_accel, true, false, skb, 0);
 	dev_put(in);
 	return result;
+
+skip_ipv6_route_flow:
+	dev_put(in);
+	return NF_ACCEPT;
 }
 
 /*
@@ -1405,6 +1411,36 @@
 	DEBUG_TRACE("%px: IPv6 CMN Bridge: %s\n", out, out->name);
 
 	/*
+	 * Identify interface from where this packet came.
+	 * There are three scenarios to consider here:
+	 * 1. Packet came from a local source.
+	 *	Ignore - local is not handled.
+	 * 2. Packet came from a routed path.
+	 *	Ignore - it was handled in INET post routing.
+	 * 3. Packet is bridged from another port.
+	 *	Process.
+	 *
+	 * Begin by identifying case 1.
+	 * NOTE: We are given 'out' (which we implicitly know is a bridge port) so out interface's master is the 'bridge'.
+	 */
+	in = dev_get_by_index(&init_net, skb->skb_iif);
+	if (!in) {
+		/*
+		 * Case 1.
+		 */
+		bridge = ecm_interface_get_and_hold_dev_master((struct net_device *)out);
+
+		if (!bridge) {
+			DEBUG_WARN("Expected bridge\n");
+			return NF_ACCEPT;
+		}
+		DEBUG_TRACE("Local traffic: %px, ignoring traffic to bridge: %px (%s) \n", skb, bridge, bridge->name);
+		dev_put(bridge);
+		return NF_ACCEPT;
+	}
+	dev_put(in);
+
+	/*
 	 * If operations have stopped then do not process packets
 	 */
 	spin_lock_bh(&ecm_ipv6_lock);
@@ -1444,31 +1480,11 @@
 		return NF_ACCEPT;
 	}
 
-	/*
-	 * Identify interface from where this packet came.
-	 * There are three scenarios to consider here:
-	 * 1. Packet came from a local source.
-	 *	Ignore - local is not handled.
-	 * 2. Packet came from a routed path.
-	 *	Ignore - it was handled in INET post routing.
-	 * 3. Packet is bridged from another port.
-	 *	Process.
-	 *
-	 * Begin by identifying case 1.
-	 * NOTE: We are given 'out' (which we implicitly know is a bridge port) so out interface's master is the 'bridge'.
-	 */
 	bridge = ecm_interface_get_and_hold_dev_master((struct net_device *)out);
-	DEBUG_ASSERT(bridge, "Expected bridge\n");
-	in = dev_get_by_index(&init_net, skb->skb_iif);
-	if (!in) {
-		/*
-		 * Case 1.
-		 */
-		DEBUG_TRACE("Local traffic: %px, ignoring traffic to bridge: %px (%s) \n", skb, bridge, bridge->name);
-		dev_put(bridge);
+	if (!bridge) {
+		DEBUG_WARN("Expected bridge\n");
 		return NF_ACCEPT;
 	}
-	dev_put(in);
 
 	/*
 	 * Case 2:
diff --git a/qca-nss-ecm/frontends/cmn/ecm_multicast_ipv4.c b/qca-nss-ecm/frontends/cmn/ecm_multicast_ipv4.c
index ea91eda..e61c1c7 100644
--- a/qca-nss-ecm/frontends/cmn/ecm_multicast_ipv4.c
+++ b/qca-nss-ecm/frontends/cmn/ecm_multicast_ipv4.c
@@ -682,7 +682,11 @@
 	 * interface list
 	 */
 	out_dev_master =  ecm_interface_get_and_hold_dev_master(out_dev);
-	DEBUG_ASSERT(out_dev_master, "Expected a master\n");
+	if (!out_dev_master) {
+		DEBUG_WARN("Expected a master\n");
+		goto done;
+	}
+
 	if_cnt = mc_bridge_ipv4_get_if(out_dev_master, ip_src, ip_grp, ECM_DB_MULTICAST_IF_MAX, dst_dev);
 	if (if_cnt <= 0) {
 		DEBUG_WARN("Not found a valid MCS if count %d\n", if_cnt);
diff --git a/qca-nss-ecm/frontends/cmn/ecm_multicast_ipv6.c b/qca-nss-ecm/frontends/cmn/ecm_multicast_ipv6.c
index e25318c..21046a5 100644
--- a/qca-nss-ecm/frontends/cmn/ecm_multicast_ipv6.c
+++ b/qca-nss-ecm/frontends/cmn/ecm_multicast_ipv6.c
@@ -366,7 +366,11 @@
 				if (ecm_front_end_is_bridge_port(dev)) {
 					struct net_device *master;
 					master = ecm_interface_get_and_hold_dev_master(dev);
-					DEBUG_ASSERT(master, "Expected a master\n");
+					if (!master) {
+						DEBUG_WARN("Expected a master\n");
+						return NULL;
+					}
+
 					ecm_interface_send_neighbour_solicitation(master, addr);
 					dev_put(master);
 				} else {
diff --git a/qca-nss-ecm/frontends/include/ecm_front_end_common.h b/qca-nss-ecm/frontends/include/ecm_front_end_common.h
index 0c70a1d..1c0a6d0 100644
--- a/qca-nss-ecm/frontends/include/ecm_front_end_common.h
+++ b/qca-nss-ecm/frontends/include/ecm_front_end_common.h
@@ -119,8 +119,8 @@
 	ip_addr_t ct_src_ip;
 
 	if (ip_version == 4) {
-		uint32_t flow_ip_32;
-		uint32_t ct_src_ip_32;
+		uint32_t flow_ip_32 = 0;
+		uint32_t ct_src_ip_32 = 0;
 		ECM_NIN4_ADDR_TO_IP_ADDR(ct_src_ip, ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip);
 
 		/*
diff --git a/qca-nss-sfe/sfe.c b/qca-nss-sfe/sfe.c
index 0195d41..70f1c2c 100644
--- a/qca-nss-sfe/sfe.c
+++ b/qca-nss-sfe/sfe.c
@@ -132,6 +132,9 @@
 
 static struct sfe_ctx_instance_internal __sfe_ctx;
 
+// Temporarily pause sfe_ipvX_periodic_sync() if enabled to reduce CPU overhead
+bool sfe_pause_stats_sync = false;
+
 /*
  * Convert public SFE context to internal context
  */
@@ -1551,6 +1554,32 @@
 	__ATTR(bypass_mark, S_IWUSR | S_IRUGO, sfe_get_bypass_mark,
 	       sfe_set_bypass_mark);
 
+static ssize_t
+sfe_get_pause_stats_sync(struct device *dev,	struct device_attribute *attr,
+		    char *buf)
+{
+	return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", sfe_pause_stats_sync);
+}
+
+static ssize_t
+sfe_set_pause_stats_sync(struct device *dev,	struct device_attribute *attr,
+		    const char *buf, size_t count)
+{
+	int ret;
+	bool enable;
+
+	ret = kstrtobool(buf, &enable);
+	if (ret) {
+		return ret;
+	}
+	sfe_pause_stats_sync = enable;
+	return count;
+}
+
+static const struct device_attribute sfe_pause_stats_sync_attr =
+	__ATTR(pause_stats_sync, S_IWUSR | S_IRUGO, sfe_get_pause_stats_sync,
+	       sfe_set_pause_stats_sync);
+
 /*
  * sfe_init_if()
  */
@@ -1603,6 +1632,13 @@
 		goto exit2;
 	}
 
+	result = sysfs_create_file(sfe_ctx->sys_sfe,
+				   &sfe_pause_stats_sync_attr.attr);
+	if (result) {
+		DEBUG_ERROR("failed to register Pause Stats sync  sysfs file: %d\n",
+			    result);
+		goto exit2;
+	}
 	spin_lock_init(&sfe_ctx->lock);
 
 	INIT_LIST_HEAD(&sfe_ctx->msg_queue);
diff --git a/qca-nss-sfe/sfe.h b/qca-nss-sfe/sfe.h
index 59203ad..c613c7f 100644
--- a/qca-nss-sfe/sfe.h
+++ b/qca-nss-sfe/sfe.h
@@ -327,4 +327,5 @@
 int sfe_init_if(void);
 void sfe_exit_if(void);
 
+extern bool sfe_pause_stats_sync;
 #endif /* __SFE_H */
diff --git a/qca-nss-sfe/sfe_ipv4.c b/qca-nss-sfe/sfe_ipv4.c
index 1fd2883..3650022 100644
--- a/qca-nss-sfe/sfe_ipv4.c
+++ b/qca-nss-sfe/sfe_ipv4.c
@@ -1859,6 +1859,9 @@
 	sfe_sync_rule_callback_t sync_rule_callback;
 	struct sfe_ipv4_connection *c;
 
+	if (sfe_pause_stats_sync)
+		goto done;
+
 	now_jiffies = get_jiffies_64();
 
 	rcu_read_lock();
diff --git a/qca-nss-sfe/sfe_ipv4_tcp.c b/qca-nss-sfe/sfe_ipv4_tcp.c
index b2d5ec9..5761564 100644
--- a/qca-nss-sfe/sfe_ipv4_tcp.c
+++ b/qca-nss-sfe/sfe_ipv4_tcp.c
@@ -133,6 +133,7 @@
 	bool bridge_flow;
 	bool fast_xmit;
 	netdev_features_t features;
+	bool segmentation_needed;
 
 	/*
 	 * Is our packet too short to contain a valid TCP header?
@@ -268,7 +269,8 @@
 	 * If our packet is larger than the MTU of the transmit interface then
 	 * we can't forward it easily.
 	 */
-	if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
+	segmentation_needed = len > cm->xmit_dev_mtu;
+	if (unlikely(segmentation_needed && !skb_is_gso(skb))) {
 		sfe_ipv4_sync_status(si, cm->connection, SFE_SYNC_REASON_STATS);
 		rcu_read_unlock();
 
@@ -555,6 +557,12 @@
 	}
 
 	/*
+	 * We're going to check for GSO flags when we transmit the packet so
+	 * start fetching the necessary cache line now.
+	 */
+	if (segmentation_needed) prefetch(skb_shinfo(skb));
+
+	/*
 	 * From this point on we're good to modify the packet.
 	 */
 
@@ -724,18 +732,19 @@
 	this_cpu_inc(si->stats_pcpu->packets_forwarded64);
 
 	/*
-	 * We're going to check for GSO flags when we transmit the packet so
-	 * start fetching the necessary cache line now.
-	 */
-	prefetch(skb_shinfo(skb));
-
-	/*
 	 * We do per packet condition check before we could fast xmit the
 	 * packet.
 	 */
-	if (likely(fast_xmit && dev_fast_xmit(skb, xmit_dev, features))) {
-		this_cpu_inc(si->stats_pcpu->packets_fast_xmited64);
-		return 1;
+	if (likely(fast_xmit)) {
+		if (likely(!segmentation_needed || !skb_is_gso(skb))) {
+			if (likely(dev_fast_xmit(skb, xmit_dev, features))) {
+				this_cpu_inc(si->stats_pcpu->packets_fast_xmited64);
+				return 1;
+			}
+		} else {
+			cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_XMIT;
+			DEBUG_TRACE("%px: fast xmit disabled for xmit dev %s", skb, xmit_dev->name);
+		}
 	}
 
 	/*
diff --git a/qca-nss-sfe/sfe_ipv6.c b/qca-nss-sfe/sfe_ipv6.c
index 488f3a4..9670c7e 100644
--- a/qca-nss-sfe/sfe_ipv6.c
+++ b/qca-nss-sfe/sfe_ipv6.c
@@ -1839,6 +1839,8 @@
 	sfe_sync_rule_callback_t sync_rule_callback;
 	struct sfe_ipv6_connection *c;
 
+	if (sfe_pause_stats_sync)
+		goto done;
 	now_jiffies = get_jiffies_64();
 
 	rcu_read_lock();
diff --git a/qca-nss-sfe/sfe_ipv6_tcp.c b/qca-nss-sfe/sfe_ipv6_tcp.c
index 6ba30b3..672289c 100644
--- a/qca-nss-sfe/sfe_ipv6_tcp.c
+++ b/qca-nss-sfe/sfe_ipv6_tcp.c
@@ -132,6 +132,7 @@
 	bool bridge_flow;
 	bool fast_xmit;
 	netdev_features_t features;
+	bool segmentation_needed;
 
 	/*
 	 * Is our packet too short to contain a valid TCP header?
@@ -269,7 +270,8 @@
 	 * If our packet is larger than the MTU of the transmit interface then
 	 * we can't forward it easily.
 	 */
-	if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
+	segmentation_needed = len > cm->xmit_dev_mtu;
+	if (unlikely(segmentation_needed && !skb_is_gso(skb))) {
 		sfe_ipv6_sync_status(si, cm->connection, SFE_SYNC_REASON_STATS);
 		rcu_read_unlock();
 
@@ -566,6 +568,12 @@
 	}
 
 	/*
+	 * We're going to check for GSO flags when we transmit the packet so
+	 * start fetching the necessary cache line now.
+	 */
+	if (len > cm->xmit_dev_mtu) prefetch(skb_shinfo(skb));
+
+	/*
 	 * From this point on we're good to modify the packet.
 	 */
 
@@ -729,18 +737,19 @@
 	this_cpu_inc(si->stats_pcpu->packets_forwarded64);
 
 	/*
-	 * We're going to check for GSO flags when we transmit the packet so
-	 * start fetching the necessary cache line now.
-	 */
-	prefetch(skb_shinfo(skb));
-
-	/*
 	 * We do per packet condition check before we could fast xmit the
 	 * packet.
 	 */
-	if (likely(fast_xmit && dev_fast_xmit(skb, xmit_dev, features))) {
-		this_cpu_inc(si->stats_pcpu->packets_fast_xmited64);
-		return 1;
+	if (likely(fast_xmit)) {
+		if (likely(!segmentation_needed || !skb_is_gso(skb))) {
+			if (likely(dev_fast_xmit(skb, xmit_dev, features))) {
+				this_cpu_inc(si->stats_pcpu->packets_fast_xmited64);
+				return 1;
+			}
+		} else {
+			cm->flags &= ~SFE_IPV6_CONNECTION_MATCH_FLAG_FAST_XMIT;
+			DEBUG_TRACE("%px: fast xmit disabled for xmit dev %s", skb, xmit_dev->name);
+		}
 	}
 
 	/*