| /* |
| * sfe_ipv4.c |
| * Shortcut forwarding engine - IPv4 edition. |
| * |
| * Copyright (c) 2013-2016, 2019-2020, The Linux Foundation. All rights reserved. |
| * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. |
| * |
| * Permission to use, copy, modify, and/or distribute this software for any |
| * purpose with or without fee is hereby granted, provided that the above |
| * copyright notice and this permission notice appear in all copies. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
| * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
| * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| */ |
| |
| #include <linux/module.h> |
| #include <linux/sysfs.h> |
| #include <linux/skbuff.h> |
| #include <linux/icmp.h> |
| #include <net/tcp.h> |
| #include <net/udp.h> |
| #include <net/vxlan.h> |
| #include <linux/etherdevice.h> |
| #include <linux/version.h> |
| #include <linux/lockdep.h> |
| #include <linux/refcount.h> |
| #include <linux/netfilter.h> |
| #include <linux/inetdevice.h> |
| #include <linux/netfilter_ipv4.h> |
| #include <net/protocol.h> |
| #include <net/gre.h> |
| |
| #include "sfe_debug.h" |
| #include "sfe_api.h" |
| #include "sfe.h" |
| #include "sfe_flow_cookie.h" |
| #include "sfe_ipv4.h" |
| #include "sfe_ipv4_udp.h" |
| #include "sfe_ipv4_tcp.h" |
| #include "sfe_ipv4_icmp.h" |
| #include "sfe_pppoe.h" |
| #include "sfe_ipv4_gre.h" |
| |
| static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = { |
| "UDP_HEADER_INCOMPLETE", |
| "UDP_NO_CONNECTION", |
| "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT", |
| "UDP_SMALL_TTL", |
| "UDP_NEEDS_FRAGMENTATION", |
| "TCP_HEADER_INCOMPLETE", |
| "TCP_NO_CONNECTION_SLOW_FLAGS", |
| "TCP_NO_CONNECTION_FAST_FLAGS", |
| "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT", |
| "TCP_SMALL_TTL", |
| "TCP_NEEDS_FRAGMENTATION", |
| "TCP_FLAGS", |
| "TCP_SEQ_EXCEEDS_RIGHT_EDGE", |
| "TCP_SMALL_DATA_OFFS", |
| "TCP_BAD_SACK", |
| "TCP_BIG_DATA_OFFS", |
| "TCP_SEQ_BEFORE_LEFT_EDGE", |
| "TCP_ACK_EXCEEDS_RIGHT_EDGE", |
| "TCP_ACK_BEFORE_LEFT_EDGE", |
| "ICMP_HEADER_INCOMPLETE", |
| "ICMP_UNHANDLED_TYPE", |
| "ICMP_IPV4_HEADER_INCOMPLETE", |
| "ICMP_IPV4_NON_V4", |
| "ICMP_IPV4_IP_OPTIONS_INCOMPLETE", |
| "ICMP_IPV4_UDP_HEADER_INCOMPLETE", |
| "ICMP_IPV4_TCP_HEADER_INCOMPLETE", |
| "ICMP_IPV4_UNHANDLED_PROTOCOL", |
| "ICMP_NO_CONNECTION", |
| "ICMP_FLUSHED_CONNECTION", |
| "HEADER_INCOMPLETE", |
| "HEADER_CSUM_BAD", |
| "BAD_TOTAL_LENGTH", |
| "NON_V4", |
| "NON_INITIAL_FRAGMENT", |
| "DATAGRAM_INCOMPLETE", |
| "IP_OPTIONS_INCOMPLETE", |
| "UNHANDLED_PROTOCOL", |
| "NO_HEADROOM", |
| "INVALID_PPPOE_SESSION", |
| "INCORRECT_PPPOE_PARSING", |
| "PPPOE_NOT_SET_IN_CME", |
| "INGRESS_VLAN_TAG_MISMATCH", |
| "INVALID_SOURCE_INTERFACE", |
| "GRE_HEADER_INCOMPLETE", |
| "GRE_NO_CONNECTION", |
| "GRE_IP_OPTIONS_OR_INITIAL_FRAGMENT", |
| "GRE_SMALL_TTL", |
| "GRE_NEEDS_FRAGMENTATION" |
| }; |
| |
| static struct sfe_ipv4 __si; |
| |
| /* |
| * sfe_ipv4_gen_ip_csum() |
| * Generate the IP checksum for an IPv4 header. |
| * |
| * Note that this function assumes that we have only 20 bytes of IP header. |
| */ |
| u16 sfe_ipv4_gen_ip_csum(struct iphdr *iph) |
| { |
| u32 sum; |
| u16 *i = (u16 *)iph; |
| |
| iph->check = 0; |
| |
| /* |
| * Generate the sum. |
| */ |
| sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9]; |
| |
| /* |
| * Fold it to ones-complement form. |
| */ |
| sum = (sum & 0xffff) + (sum >> 16); |
| sum = (sum & 0xffff) + (sum >> 16); |
| |
| return (u16)sum ^ 0xffff; |
| } |
| |
| /* |
| * sfe_ipv4_get_connection_match_hash() |
| * Generate the hash used in connection match lookups. |
| */ |
| static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, u8 protocol, |
| __be32 src_ip, __be16 src_port, |
| __be32 dest_ip, __be16 dest_port) |
| { |
| u32 hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port); |
| return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK; |
| } |
| |
| /* |
| * sfe_ipv4_find_connection_match_rcu() |
| * Get the IPv4 flow match info that corresponds to a particular 5-tuple. |
| * |
| * On entry we must be holding the lock that protects the hash table. |
| */ |
| struct sfe_ipv4_connection_match * |
| sfe_ipv4_find_connection_match_rcu(struct sfe_ipv4 *si, struct net_device *dev, u8 protocol, |
| __be32 src_ip, __be16 src_port, |
| __be32 dest_ip, __be16 dest_port) |
| { |
| struct sfe_ipv4_connection_match *cm = NULL; |
| unsigned int conn_match_idx; |
| struct hlist_head *lhead; |
| |
| WARN_ON_ONCE(!rcu_read_lock_held()); |
| |
| conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port); |
| |
| lhead = &si->hlist_conn_match_hash_head[conn_match_idx]; |
| |
| hlist_for_each_entry_rcu(cm, lhead, hnode) { |
| if (cm->match_src_port != src_port |
| || cm->match_dest_port != dest_port |
| || cm->match_src_ip != src_ip |
| || cm->match_dest_ip != dest_ip |
| || cm->match_protocol != protocol) { |
| continue; |
| } |
| |
| this_cpu_inc(si->stats_pcpu->connection_match_hash_hits64); |
| |
| break; |
| } |
| |
| return cm; |
| } |
| |
| /* |
| * sfe_ipv4_connection_match_update_summary_stats() |
| * Update the summary stats for a connection match entry. |
| * |
| * Stats are incremented atomically. So use atomic substraction to update summary |
| * stats. |
| */ |
| static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm, |
| u32 *packets, u32 *bytes) |
| { |
| u32 packet_count, byte_count; |
| |
| packet_count = atomic_read(&cm->rx_packet_count); |
| cm->rx_packet_count64 += packet_count; |
| atomic_sub(packet_count, &cm->rx_packet_count); |
| |
| byte_count = atomic_read(&cm->rx_byte_count); |
| cm->rx_byte_count64 += byte_count; |
| atomic_sub(byte_count, &cm->rx_byte_count); |
| |
| *packets = packet_count; |
| *bytes = byte_count; |
| } |
| |
| /* |
| * sfe_ipv4_connection_match_compute_translations() |
| * Compute port and address translations for a connection match entry. |
| */ |
| static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm) |
| { |
| /* |
| * Before we insert the entry look to see if this is tagged as doing address |
| * translations. If it is then work out the adjustment that we need to apply |
| * to the transport checksum. |
| */ |
| if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) { |
| /* |
| * Precompute an incremental checksum adjustment so we can |
| * edit packets in this stream very quickly. The algorithm is from RFC1624. |
| */ |
| u16 src_ip_hi = cm->match_src_ip >> 16; |
| u16 src_ip_lo = cm->match_src_ip & 0xffff; |
| u32 xlate_src_ip = ~cm->xlate_src_ip; |
| u16 xlate_src_ip_hi = xlate_src_ip >> 16; |
| u16 xlate_src_ip_lo = xlate_src_ip & 0xffff; |
| u16 xlate_src_port = ~cm->xlate_src_port; |
| u32 adj; |
| |
| /* |
| * When we compute this fold it down to a 16-bit offset |
| * as that way we can avoid having to do a double |
| * folding of the twos-complement result because the |
| * addition of 2 16-bit values cannot cause a double |
| * wrap-around! |
| */ |
| adj = src_ip_hi + src_ip_lo + cm->match_src_port |
| + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port; |
| adj = (adj & 0xffff) + (adj >> 16); |
| adj = (adj & 0xffff) + (adj >> 16); |
| cm->xlate_src_csum_adjustment = (u16)adj; |
| |
| } |
| |
| if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) { |
| /* |
| * Precompute an incremental checksum adjustment so we can |
| * edit packets in this stream very quickly. The algorithm is from RFC1624. |
| */ |
| u16 dest_ip_hi = cm->match_dest_ip >> 16; |
| u16 dest_ip_lo = cm->match_dest_ip & 0xffff; |
| u32 xlate_dest_ip = ~cm->xlate_dest_ip; |
| u16 xlate_dest_ip_hi = xlate_dest_ip >> 16; |
| u16 xlate_dest_ip_lo = xlate_dest_ip & 0xffff; |
| u16 xlate_dest_port = ~cm->xlate_dest_port; |
| u32 adj; |
| |
| /* |
| * When we compute this fold it down to a 16-bit offset |
| * as that way we can avoid having to do a double |
| * folding of the twos-complement result because the |
| * addition of 2 16-bit values cannot cause a double |
| * wrap-around! |
| */ |
| adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port |
| + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port; |
| adj = (adj & 0xffff) + (adj >> 16); |
| adj = (adj & 0xffff) + (adj >> 16); |
| cm->xlate_dest_csum_adjustment = (u16)adj; |
| } |
| |
| if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) { |
| u32 adj = ~cm->match_src_ip + cm->xlate_src_ip; |
| if (adj < cm->xlate_src_ip) { |
| adj++; |
| } |
| |
| adj = (adj & 0xffff) + (adj >> 16); |
| adj = (adj & 0xffff) + (adj >> 16); |
| cm->xlate_src_partial_csum_adjustment = (u16)adj; |
| } |
| |
| if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) { |
| u32 adj = ~cm->match_dest_ip + cm->xlate_dest_ip; |
| if (adj < cm->xlate_dest_ip) { |
| adj++; |
| } |
| |
| adj = (adj & 0xffff) + (adj >> 16); |
| adj = (adj & 0xffff) + (adj >> 16); |
| cm->xlate_dest_partial_csum_adjustment = (u16)adj; |
| } |
| |
| } |
| |
| /* |
| * sfe_ipv4_update_summary_stats() |
| * Update the summary stats. |
| */ |
| static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si, struct sfe_ipv4_stats *stats) |
| { |
| int i = 0; |
| |
| memset(stats, 0, sizeof(*stats)); |
| |
| for_each_possible_cpu(i) { |
| const struct sfe_ipv4_stats *s = per_cpu_ptr(si->stats_pcpu, i); |
| |
| stats->connection_create_requests64 += s->connection_create_requests64; |
| stats->connection_create_collisions64 += s->connection_create_collisions64; |
| stats->connection_create_failures64 += s->connection_create_failures64; |
| stats->connection_destroy_requests64 += s->connection_destroy_requests64; |
| stats->connection_destroy_misses64 += s->connection_destroy_misses64; |
| stats->connection_match_hash_hits64 += s->connection_match_hash_hits64; |
| stats->connection_match_hash_reorders64 += s->connection_match_hash_reorders64; |
| stats->connection_flushes64 += s->connection_flushes64; |
| stats->packets_dropped64 += s->packets_dropped64; |
| stats->packets_forwarded64 += s->packets_forwarded64; |
| stats->packets_fast_xmited64 += s->packets_fast_xmited64; |
| stats->packets_not_forwarded64 += s->packets_not_forwarded64; |
| stats->pppoe_encap_packets_forwarded64 += s->pppoe_encap_packets_forwarded64; |
| stats->pppoe_decap_packets_forwarded64 += s->pppoe_decap_packets_forwarded64; |
| stats->pppoe_bridge_packets_forwarded64 += s->pppoe_bridge_packets_forwarded64; |
| } |
| |
| } |
| |
| /* |
| * sfe_ipv4_insert_connection_match() |
| * Insert a connection match into the hash. |
| */ |
| static inline void sfe_ipv4_insert_connection_match(struct sfe_ipv4 *si, |
| struct sfe_ipv4_connection_match *cm) |
| { |
| unsigned int conn_match_idx |
| = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol, |
| cm->match_src_ip, cm->match_src_port, |
| cm->match_dest_ip, cm->match_dest_port); |
| |
| lockdep_assert_held(&si->lock); |
| |
| hlist_add_head_rcu(&cm->hnode, &si->hlist_conn_match_hash_head[conn_match_idx]); |
| #ifdef CONFIG_NF_FLOW_COOKIE |
| if (!si->flow_cookie_enable) |
| return; |
| |
| /* |
| * Configure hardware to put a flow cookie in packet of this flow, |
| * then we can accelerate the lookup process when we received this packet. |
| */ |
| for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { |
| struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; |
| |
| if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) { |
| flow_cookie_set_func_t func; |
| |
| rcu_read_lock(); |
| func = rcu_dereference(si->flow_cookie_set_func); |
| if (func) { |
| if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port, |
| cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) { |
| entry->match = cm; |
| cm->flow_cookie = conn_match_idx; |
| } |
| } |
| rcu_read_unlock(); |
| |
| break; |
| } |
| } |
| #endif |
| } |
| |
| /* |
| * sfe_ipv4_remove_connection_match() |
| * Remove a connection match object from the hash. |
| */ |
| static inline void sfe_ipv4_remove_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm) |
| { |
| |
| lockdep_assert_held(&si->lock); |
| |
| #ifdef CONFIG_NF_FLOW_COOKIE |
| if (si->flow_cookie_enable) { |
| /* |
| * Tell hardware that we no longer need a flow cookie in packet of this flow |
| */ |
| unsigned int conn_match_idx; |
| |
| for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { |
| struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; |
| |
| if (cm == entry->match) { |
| flow_cookie_set_func_t func; |
| |
| rcu_read_lock(); |
| func = rcu_dereference(si->flow_cookie_set_func); |
| if (func) { |
| func(cm->match_protocol, cm->match_src_ip, cm->match_src_port, |
| cm->match_dest_ip, cm->match_dest_port, 0); |
| } |
| rcu_read_unlock(); |
| |
| cm->flow_cookie = 0; |
| entry->match = NULL; |
| entry->last_clean_time = jiffies; |
| break; |
| } |
| } |
| } |
| #endif |
| |
| hlist_del_init_rcu(&cm->hnode); |
| |
| } |
| |
| /* |
| * sfe_ipv4_get_connection_hash() |
| * Generate the hash used in connection lookups. |
| */ |
| static inline unsigned int sfe_ipv4_get_connection_hash(u8 protocol, __be32 src_ip, __be16 src_port, |
| __be32 dest_ip, __be16 dest_port) |
| { |
| u32 hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port); |
| return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK; |
| } |
| |
| /* |
| * sfe_ipv4_find_connection() |
| * Get the IPv4 connection info that corresponds to a particular 5-tuple. |
| * |
| * On entry we must be holding the lock that protects the hash table. |
| */ |
| static inline struct sfe_ipv4_connection *sfe_ipv4_find_connection(struct sfe_ipv4 *si, u32 protocol, |
| __be32 src_ip, __be16 src_port, |
| __be32 dest_ip, __be16 dest_port) |
| { |
| struct sfe_ipv4_connection *c; |
| unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port); |
| |
| lockdep_assert_held(&si->lock); |
| |
| c = si->conn_hash[conn_idx]; |
| |
| /* |
| * Will need connection entry for next create/destroy metadata, |
| * So no need to re-order entry for these requests |
| */ |
| while (c) { |
| if ((c->src_port == src_port) |
| && (c->dest_port == dest_port) |
| && (c->src_ip == src_ip) |
| && (c->dest_ip == dest_ip) |
| && (c->protocol == protocol)) { |
| return c; |
| } |
| |
| c = c->next; |
| } |
| |
| return NULL; |
| } |
| |
| /* |
| * sfe_ipv4_insert_connection() |
| * Insert a connection into the hash. |
| * |
| * On entry we must be holding the lock that protects the hash table. |
| */ |
| static void sfe_ipv4_insert_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c) |
| { |
| struct sfe_ipv4_connection **hash_head; |
| struct sfe_ipv4_connection *prev_head; |
| unsigned int conn_idx; |
| |
| lockdep_assert_held(&si->lock); |
| |
| /* |
| * Insert entry into the connection hash. |
| */ |
| conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port, |
| c->dest_ip, c->dest_port); |
| hash_head = &si->conn_hash[conn_idx]; |
| prev_head = *hash_head; |
| c->prev = NULL; |
| if (prev_head) { |
| prev_head->prev = c; |
| } |
| |
| c->next = prev_head; |
| *hash_head = c; |
| |
| /* |
| * Insert entry into the "all connections" list. |
| */ |
| if (si->all_connections_tail) { |
| c->all_connections_prev = si->all_connections_tail; |
| si->all_connections_tail->all_connections_next = c; |
| } else { |
| c->all_connections_prev = NULL; |
| si->all_connections_head = c; |
| } |
| |
| si->all_connections_tail = c; |
| c->all_connections_next = NULL; |
| si->num_connections++; |
| |
| /* |
| * Insert the connection match objects too. |
| */ |
| sfe_ipv4_insert_connection_match(si, c->original_match); |
| sfe_ipv4_insert_connection_match(si, c->reply_match); |
| } |
| |
| /* |
| * sfe_ipv4_remove_connection() |
| * Remove a sfe_ipv4_connection object from the hash. |
| */ |
| bool sfe_ipv4_remove_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c) |
| { |
| lockdep_assert_held(&si->lock); |
| |
| if (c->removed) { |
| DEBUG_ERROR("%px: Connection has been removed already\n", c); |
| return false; |
| } |
| |
| /* |
| * Remove the connection match objects. |
| */ |
| sfe_ipv4_remove_connection_match(si, c->reply_match); |
| sfe_ipv4_remove_connection_match(si, c->original_match); |
| |
| /* |
| * Unlink the connection. |
| */ |
| if (c->prev) { |
| c->prev->next = c->next; |
| } else { |
| unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port, |
| c->dest_ip, c->dest_port); |
| si->conn_hash[conn_idx] = c->next; |
| } |
| |
| if (c->next) { |
| c->next->prev = c->prev; |
| } |
| |
| /* |
| * Unlink connection from all_connections list |
| */ |
| if (c->all_connections_prev) { |
| c->all_connections_prev->all_connections_next = c->all_connections_next; |
| } else { |
| si->all_connections_head = c->all_connections_next; |
| } |
| |
| if (c->all_connections_next) { |
| c->all_connections_next->all_connections_prev = c->all_connections_prev; |
| } else { |
| si->all_connections_tail = c->all_connections_prev; |
| } |
| |
| /* |
| * If I am the next sync connection, move the sync to my next or head. |
| */ |
| if (unlikely(si->wc_next == c)) { |
| si->wc_next = c->all_connections_next; |
| } |
| |
| c->removed = true; |
| si->num_connections--; |
| return true; |
| } |
| |
| /* |
| * sfe_ipv4_gen_sync_connection() |
| * Sync a connection. |
| * |
| * On entry to this function we expect that the lock for the connection is either |
| * already held (while called from sfe_ipv4_periodic_sync() or isn't required |
| * (while called from sfe_ipv4_flush_connection()) |
| */ |
| static void sfe_ipv4_gen_sync_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c, |
| struct sfe_connection_sync *sis, sfe_sync_reason_t reason, |
| u64 now_jiffies) |
| { |
| struct sfe_ipv4_connection_match *original_cm; |
| struct sfe_ipv4_connection_match *reply_cm; |
| u32 packet_count, byte_count; |
| |
| /* |
| * Fill in the update message. |
| */ |
| sis->is_v6 = 0; |
| sis->protocol = c->protocol; |
| sis->src_ip.ip = c->src_ip; |
| sis->src_ip_xlate.ip = c->src_ip_xlate; |
| sis->dest_ip.ip = c->dest_ip; |
| sis->dest_ip_xlate.ip = c->dest_ip_xlate; |
| sis->src_port = c->src_port; |
| sis->src_port_xlate = c->src_port_xlate; |
| sis->dest_port = c->dest_port; |
| sis->dest_port_xlate = c->dest_port_xlate; |
| |
| original_cm = c->original_match; |
| reply_cm = c->reply_match; |
| sis->src_td_max_window = original_cm->protocol_state.tcp.max_win; |
| sis->src_td_end = original_cm->protocol_state.tcp.end; |
| sis->src_td_max_end = original_cm->protocol_state.tcp.max_end; |
| sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win; |
| sis->dest_td_end = reply_cm->protocol_state.tcp.end; |
| sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end; |
| |
| sfe_ipv4_connection_match_update_summary_stats(original_cm, &packet_count, &byte_count); |
| sis->src_new_packet_count = packet_count; |
| sis->src_new_byte_count = byte_count; |
| |
| sfe_ipv4_connection_match_update_summary_stats(reply_cm, &packet_count, &byte_count); |
| sis->dest_new_packet_count = packet_count; |
| sis->dest_new_byte_count = byte_count; |
| |
| sis->src_dev = original_cm->match_dev; |
| sis->src_packet_count = original_cm->rx_packet_count64; |
| sis->src_byte_count = original_cm->rx_byte_count64; |
| |
| sis->dest_dev = reply_cm->match_dev; |
| sis->dest_packet_count = reply_cm->rx_packet_count64; |
| sis->dest_byte_count = reply_cm->rx_byte_count64; |
| |
| sis->reason = reason; |
| |
| /* |
| * Get the time increment since our last sync. |
| */ |
| sis->delta_jiffies = now_jiffies - c->last_sync_jiffies; |
| c->last_sync_jiffies = now_jiffies; |
| } |
| |
| /* |
| * sfe_ipv4_free_connection_rcu() |
| * Called at RCU qs state to free the connection object. |
| */ |
| static void sfe_ipv4_free_connection_rcu(struct rcu_head *head) |
| { |
| struct sfe_ipv4_connection *c; |
| struct udp_sock *up; |
| struct sock *sk; |
| |
| /* |
| * We dont need spin lock as the connection is already removed from link list |
| */ |
| c = container_of(head, struct sfe_ipv4_connection, rcu); |
| |
| BUG_ON(!c->removed); |
| |
| DEBUG_TRACE("%px: connecton has been deleted\n", c); |
| |
| /* |
| * Decrease the refcount taken in function sfe_ipv4_create_rule(), |
| * during call of __udp4_lib_lookup() |
| */ |
| up = c->reply_match->up; |
| if (up) { |
| sk = (struct sock *)up; |
| sock_put(sk); |
| } |
| |
| /* |
| * Release our hold of the source and dest devices and free the memory |
| * for our connection objects. |
| */ |
| dev_put(c->original_dev); |
| dev_put(c->reply_dev); |
| kfree(c->original_match); |
| kfree(c->reply_match); |
| kfree(c); |
| } |
| |
| /* |
| * sfe_ipv4_sync_status() |
| * update a connection status to its connection manager. |
| * |
| * si: the ipv4 context |
| * c: which connection to be notified |
| * reason: what kind of notification: flush, stats or destroy |
| */ |
| void sfe_ipv4_sync_status(struct sfe_ipv4 *si, |
| struct sfe_ipv4_connection *c, |
| sfe_sync_reason_t reason) |
| { |
| struct sfe_connection_sync sis; |
| u64 now_jiffies; |
| sfe_sync_rule_callback_t sync_rule_callback; |
| |
| rcu_read_lock(); |
| sync_rule_callback = rcu_dereference(si->sync_rule_callback); |
| if (!sync_rule_callback) { |
| rcu_read_unlock(); |
| return; |
| } |
| |
| /* |
| * Generate a sync message and then sync. |
| */ |
| now_jiffies = get_jiffies_64(); |
| sfe_ipv4_gen_sync_connection(si, c, &sis, reason, now_jiffies); |
| sync_rule_callback(&sis); |
| |
| rcu_read_unlock(); |
| } |
| |
| /* |
| * sfe_ipv4_flush_connection() |
| * Flush a connection and free all associated resources. |
| * |
| * We need to be called with bottom halves disabled locally as we need to acquire |
| * the connection hash lock and release it again. In general we're actually called |
| * from within a BH and so we're fine, but we're also called when connections are |
| * torn down. |
| */ |
| void sfe_ipv4_flush_connection(struct sfe_ipv4 *si, |
| struct sfe_ipv4_connection *c, |
| sfe_sync_reason_t reason) |
| { |
| BUG_ON(!c->removed); |
| |
| this_cpu_inc(si->stats_pcpu->connection_flushes64); |
| sfe_ipv4_sync_status(si, c, reason); |
| |
| /* |
| * Release our hold of the source and dest devices and free the memory |
| * for our connection objects. |
| */ |
| call_rcu(&c->rcu, sfe_ipv4_free_connection_rcu); |
| } |
| |
| /* |
| * sfe_ipv4_exception_stats_inc() |
| * Increment exception stats. |
| */ |
| void sfe_ipv4_exception_stats_inc(struct sfe_ipv4 *si, enum sfe_ipv4_exception_events reason) |
| { |
| struct sfe_ipv4_stats *stats = this_cpu_ptr(si->stats_pcpu); |
| stats->exception_events64[reason]++; |
| stats->packets_not_forwarded64++; |
| } |
| |
| /* |
| * sfe_ipv4_is_loal_ip() |
| * Returns true if IP is local; returns false otherwise. |
| */ |
| static bool sfe_ipv4_is_local_ip(struct sfe_ipv4 *si, __be32 ip_addr) |
| { |
| struct net_device *dev; |
| |
| dev = ip_dev_find(&init_net, ip_addr); |
| if (dev) { |
| dev_put(dev); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* |
| * sfe_ipv4_recv() |
| * Handle packet receives and forwaring. |
| * |
| * Returns 1 if the packet is forwarded or 0 if it isn't. |
| */ |
| int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb, struct sfe_l2_info *l2_info, bool tun_outer) |
| { |
| struct sfe_ipv4 *si = &__si; |
| unsigned int len; |
| unsigned int tot_len; |
| unsigned int frag_off; |
| unsigned int ihl; |
| bool sync_on_find; |
| bool ip_options; |
| struct iphdr *iph; |
| u32 protocol; |
| |
| /* |
| * Check that we have space for an IP header here. |
| */ |
| len = skb->len; |
| if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr)))) { |
| sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE); |
| DEBUG_TRACE("len: %u is too short\n", len); |
| return 0; |
| } |
| |
| /* |
| * Validate ip csum if necessary. If ip_summed is set to CHECKSUM_UNNECESSARY, it is assumed |
| * that the L3 checksum is validated by the Rx interface or the tunnel interface that has |
| * generated the packet. |
| */ |
| iph = (struct iphdr *)skb->data; |
| if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY) && (ip_fast_csum((u8 *)iph, iph->ihl))) { |
| sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_HEADER_CSUM_BAD); |
| |
| DEBUG_TRACE("Bad IPv4 header csum: 0x%x\n", iph->check); |
| return 0; |
| } |
| |
| /* |
| * Check that our "total length" is large enough for an IP header. |
| */ |
| tot_len = ntohs(iph->tot_len); |
| if (unlikely(tot_len < sizeof(struct iphdr))) { |
| |
| sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH); |
| DEBUG_TRACE("tot_len: %u is too short\n", tot_len); |
| return 0; |
| } |
| |
| /* |
| * Is our IP version wrong? |
| */ |
| if (unlikely(iph->version != 4)) { |
| sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_NON_V4); |
| DEBUG_TRACE("IP version: %u\n", iph->version); |
| return 0; |
| } |
| |
| /* |
| * Does our datagram fit inside the skb? |
| */ |
| if (unlikely(tot_len > len)) { |
| DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len); |
| sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE); |
| return 0; |
| } |
| |
| /* |
| * Do we have a non-initial fragment? |
| */ |
| frag_off = ntohs(iph->frag_off); |
| if (unlikely(frag_off & IP_OFFSET)) { |
| sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT); |
| DEBUG_TRACE("non-initial fragment\n"); |
| return 0; |
| } |
| |
| /* |
| * If we have a (first) fragment then mark it to cause any connection to flush. |
| */ |
| sync_on_find = unlikely(frag_off & IP_MF) ? true : false; |
| |
| /* |
| * Do we have any IP options? That's definite a slow path! If we do have IP |
| * options we need to recheck our header size. |
| */ |
| ihl = iph->ihl << 2; |
| ip_options = unlikely(ihl != sizeof(struct iphdr)) ? true : false; |
| if (unlikely(ip_options)) { |
| if (unlikely(len < ihl)) { |
| |
| DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl); |
| sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE); |
| return 0; |
| } |
| |
| sync_on_find = true; |
| } |
| |
| protocol = iph->protocol; |
| if (IPPROTO_UDP == protocol) { |
| return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, sync_on_find, l2_info, tun_outer); |
| } |
| |
| if (IPPROTO_TCP == protocol) { |
| return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, sync_on_find, l2_info); |
| } |
| |
| if (IPPROTO_ICMP == protocol) { |
| return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl); |
| } |
| |
| #ifdef SFE_GRE_TUN_ENABLE |
| if (IPPROTO_GRE == protocol) { |
| return sfe_ipv4_recv_gre(si, skb, dev, len, iph, ihl, sync_on_find, l2_info, tun_outer); |
| } |
| #endif |
| |
| sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL); |
| |
| DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol); |
| return 0; |
| } |
| |
| static void |
| sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c, |
| struct sfe_ipv4_rule_create_msg *msg) |
| { |
| struct sfe_ipv4_connection_match *orig_cm; |
| struct sfe_ipv4_connection_match *repl_cm; |
| struct sfe_ipv4_tcp_connection_match *orig_tcp; |
| struct sfe_ipv4_tcp_connection_match *repl_tcp; |
| |
| orig_cm = c->original_match; |
| repl_cm = c->reply_match; |
| orig_tcp = &orig_cm->protocol_state.tcp; |
| repl_tcp = &repl_cm->protocol_state.tcp; |
| |
| /* update orig */ |
| if (orig_tcp->max_win < msg->tcp_rule.flow_max_window) { |
| orig_tcp->max_win = msg->tcp_rule.flow_max_window; |
| } |
| if ((s32)(orig_tcp->end - msg->tcp_rule.flow_end) < 0) { |
| orig_tcp->end = msg->tcp_rule.flow_end; |
| } |
| if ((s32)(orig_tcp->max_end - msg->tcp_rule.flow_max_end) < 0) { |
| orig_tcp->max_end = msg->tcp_rule.flow_max_end; |
| } |
| |
| /* update reply */ |
| if (repl_tcp->max_win < msg->tcp_rule.return_max_window) { |
| repl_tcp->max_win = msg->tcp_rule.return_max_window; |
| } |
| if ((s32)(repl_tcp->end - msg->tcp_rule.return_end) < 0) { |
| repl_tcp->end = msg->tcp_rule.return_end; |
| } |
| if ((s32)(repl_tcp->max_end - msg->tcp_rule.return_max_end) < 0) { |
| repl_tcp->max_end = msg->tcp_rule.return_max_end; |
| } |
| |
| /* update match flags */ |
| orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; |
| repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_NO_SEQ_CHECK) { |
| |
| orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; |
| repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; |
| } |
| } |
| |
| static void |
| sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c, |
| struct sfe_ipv4_rule_create_msg *msg) |
| { |
| switch (msg->tuple.protocol) { |
| case IPPROTO_TCP: |
| sfe_ipv4_update_tcp_state(c, msg); |
| break; |
| } |
| } |
| |
| /* |
| * sfe_ipv4_match_entry_set_vlan() |
| */ |
| static void sfe_ipv4_match_entry_set_vlan( |
| struct sfe_ipv4_connection_match *cm, |
| u32 primary_ingress_vlan_tag, |
| u32 primary_egress_vlan_tag, |
| u32 secondary_ingress_vlan_tag, |
| u32 secondary_egress_vlan_tag) |
| { |
| u16 tpid; |
| /* |
| * Prevent stacking header counts when updating. |
| */ |
| cm->ingress_vlan_hdr_cnt = 0; |
| cm->egress_vlan_hdr_cnt = 0; |
| memset(cm->ingress_vlan_hdr, 0, sizeof(cm->ingress_vlan_hdr)); |
| memset(cm->egress_vlan_hdr, 0, sizeof(cm->egress_vlan_hdr)); |
| |
| /* |
| * vlan_hdr[0] corresponds to outer tag |
| * vlan_hdr[1] corresponds to inner tag |
| * Extract the vlan information (tpid and tci) from rule message |
| */ |
| if ((primary_ingress_vlan_tag & VLAN_VID_MASK) != SFE_VLAN_ID_NOT_CONFIGURED) { |
| tpid = (u16)(primary_ingress_vlan_tag >> 16); |
| cm->ingress_vlan_hdr[0].tpid = ntohs(tpid); |
| cm->ingress_vlan_hdr[0].tci = (u16)primary_ingress_vlan_tag; |
| cm->ingress_vlan_hdr_cnt++; |
| } |
| |
| if ((secondary_ingress_vlan_tag & VLAN_VID_MASK) != SFE_VLAN_ID_NOT_CONFIGURED) { |
| tpid = (u16)(secondary_ingress_vlan_tag >> 16); |
| cm->ingress_vlan_hdr[1].tpid = ntohs(tpid); |
| cm->ingress_vlan_hdr[1].tci = (u16)secondary_ingress_vlan_tag; |
| cm->ingress_vlan_hdr_cnt++; |
| } |
| |
| if ((primary_egress_vlan_tag & VLAN_VID_MASK) != SFE_VLAN_ID_NOT_CONFIGURED) { |
| tpid = (u16)(primary_egress_vlan_tag >> 16); |
| cm->egress_vlan_hdr[0].tpid = ntohs(tpid); |
| cm->egress_vlan_hdr[0].tci = (u16)primary_egress_vlan_tag; |
| cm->egress_vlan_hdr_cnt++; |
| } |
| |
| if ((secondary_egress_vlan_tag & VLAN_VID_MASK) != SFE_VLAN_ID_NOT_CONFIGURED) { |
| tpid = (u16)(secondary_egress_vlan_tag >> 16); |
| cm->egress_vlan_hdr[1].tpid = ntohs(tpid); |
| cm->egress_vlan_hdr[1].tci = (u16)secondary_egress_vlan_tag; |
| cm->egress_vlan_hdr_cnt++; |
| } |
| } |
| |
| void sfe_ipv4_update_rule(struct sfe_ipv4_rule_create_msg *msg) |
| { |
| struct sfe_ipv4_connection *c; |
| struct sfe_ipv4 *si = &__si; |
| |
| spin_lock_bh(&si->lock); |
| |
| c = sfe_ipv4_find_connection(si, |
| msg->tuple.protocol, |
| msg->tuple.flow_ip, |
| msg->tuple.flow_ident, |
| msg->tuple.return_ip, |
| msg->tuple.return_ident); |
| if (c != NULL) { |
| sfe_ipv4_update_protocol_state(c, msg); |
| } |
| |
| spin_unlock_bh(&si->lock); |
| } |
| |
| /* |
| * sfe_ipv4_mark_rule_update() |
| * Updates the mark values of match entries. |
| */ |
| void sfe_ipv4_mark_rule_update(struct sfe_connection_mark *mark) |
| { |
| struct sfe_ipv4_connection *c; |
| struct sfe_ipv4 *si = &__si; |
| |
| spin_lock_bh(&si->lock); |
| c = sfe_ipv4_find_connection(si, mark->protocol, |
| mark->src_ip[0], |
| mark->src_port, |
| mark->dest_ip[0], |
| mark->dest_port); |
| if (!c) { |
| spin_unlock_bh(&si->lock); |
| DEBUG_WARN("%px: connection not found for mark update\n", mark); |
| return; |
| } |
| c->original_match->mark = mark->mark; |
| c->reply_match->mark = mark->mark; |
| spin_unlock_bh(&si->lock); |
| DEBUG_TRACE("%px: connection mark updated with %d\n", mark, mark->mark); |
| } |
| EXPORT_SYMBOL(sfe_ipv4_mark_rule_update); |
| |
| /* |
| * sfe_ipv4_xmit_eth_type_check() |
| * Checking if MAC header has to be written. |
| */ |
| static inline bool sfe_ipv4_xmit_eth_type_check(struct net_device *dev, u32 cm_flags) |
| { |
| if (!(dev->flags & IFF_NOARP)) { |
| return true; |
| } |
| |
| /* |
| * For PPPoE, since we are now supporting PPPoE encapsulation, we are writing L2 header. |
| */ |
| if (unlikely(cm_flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_ENCAP)) { |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* |
| * sfe_ipv4_create_rule() |
| * Create a forwarding rule. |
| */ |
| int sfe_ipv4_create_rule(struct sfe_ipv4_rule_create_msg *msg) |
| { |
| struct sfe_ipv4 *si = &__si; |
| struct sfe_ipv4_connection *c, *c_old; |
| struct sfe_ipv4_connection_match *original_cm; |
| struct sfe_ipv4_connection_match *reply_cm; |
| struct net_device *dest_dev; |
| struct net_device *src_dev; |
| struct sfe_ipv4_5tuple *tuple = &msg->tuple; |
| s32 flow_interface_num = msg->conn_rule.flow_top_interface_num; |
| s32 return_interface_num = msg->conn_rule.return_top_interface_num; |
| struct net *net; |
| struct sock *sk; |
| unsigned int src_if_idx; |
| |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_FLOW_BOTTOM_INTERFACE) { |
| flow_interface_num = msg->conn_rule.flow_interface_num; |
| } |
| |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_RETURN_BOTTOM_INTERFACE) { |
| return_interface_num = msg->conn_rule.return_interface_num; |
| } |
| |
| src_dev = dev_get_by_index(&init_net, flow_interface_num); |
| if (!src_dev) { |
| DEBUG_WARN("%px: Unable to find src_dev corresponding to %d\n", msg, |
| flow_interface_num); |
| this_cpu_inc(si->stats_pcpu->connection_create_failures64); |
| return -EINVAL; |
| } |
| |
| dest_dev = dev_get_by_index(&init_net, return_interface_num); |
| if (!dest_dev) { |
| DEBUG_WARN("%px: Unable to find dest_dev corresponding to %d\n", msg, |
| return_interface_num); |
| this_cpu_inc(si->stats_pcpu->connection_create_failures64); |
| dev_put(src_dev); |
| return -EINVAL; |
| } |
| |
| if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) || |
| (src_dev->reg_state != NETREG_REGISTERED))) { |
| dev_put(src_dev); |
| dev_put(dest_dev); |
| DEBUG_WARN("%px: src_dev=%s and dest_dev=%s are unregistered\n", msg, |
| src_dev->name, dest_dev->name); |
| this_cpu_inc(si->stats_pcpu->connection_create_failures64); |
| return -EINVAL; |
| } |
| |
| /* |
| * Allocate the various connection tracking objects. |
| */ |
| c = (struct sfe_ipv4_connection *)kzalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC); |
| if (unlikely(!c)) { |
| DEBUG_WARN("%px: memory allocation of connection entry failed\n", msg); |
| this_cpu_inc(si->stats_pcpu->connection_create_failures64); |
| dev_put(src_dev); |
| dev_put(dest_dev); |
| return -ENOMEM; |
| } |
| |
| original_cm = (struct sfe_ipv4_connection_match *)kzalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC); |
| if (unlikely(!original_cm)) { |
| DEBUG_WARN("%px: memory allocation of connection match entry failed\n", msg); |
| this_cpu_inc(si->stats_pcpu->connection_create_failures64); |
| kfree(c); |
| dev_put(src_dev); |
| dev_put(dest_dev); |
| return -ENOMEM; |
| } |
| |
| reply_cm = (struct sfe_ipv4_connection_match *)kzalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC); |
| if (unlikely(!reply_cm)) { |
| DEBUG_WARN("%px: memory allocation of connection match entry failed\n", msg); |
| this_cpu_inc(si->stats_pcpu->connection_create_failures64); |
| kfree(original_cm); |
| kfree(c); |
| dev_put(src_dev); |
| dev_put(dest_dev); |
| return -ENOMEM; |
| } |
| |
| this_cpu_inc(si->stats_pcpu->connection_create_requests64); |
| |
| spin_lock_bh(&si->lock); |
| |
| /* |
| * Check to see if there is already a flow that matches the rule we're |
| * trying to create. If there is then we can't create a new one. |
| */ |
| c_old = sfe_ipv4_find_connection(si, |
| msg->tuple.protocol, |
| msg->tuple.flow_ip, |
| msg->tuple.flow_ident, |
| msg->tuple.return_ip, |
| msg->tuple.return_ident); |
| |
| if (c_old != NULL) { |
| this_cpu_inc(si->stats_pcpu->connection_create_collisions64); |
| |
| /* |
| * If we already have the flow then it's likely that this |
| * request to create the connection rule contains more |
| * up-to-date information. Check and update accordingly. |
| */ |
| sfe_ipv4_update_protocol_state(c, msg); |
| spin_unlock_bh(&si->lock); |
| |
| kfree(reply_cm); |
| kfree(original_cm); |
| kfree(c); |
| |
| dev_put(src_dev); |
| dev_put(dest_dev); |
| |
| DEBUG_TRACE("%px: connection already exists - p:%d\n" |
| " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n", |
| msg, tuple->protocol, |
| src_dev->name, msg->conn_rule.flow_mac, &tuple->flow_ip, ntohs(tuple->flow_ident), |
| dest_dev->name, msg->conn_rule.return_mac, &tuple->return_ip, ntohs(tuple->return_ident)); |
| |
| return -EADDRINUSE; |
| } |
| |
| /* |
| * Fill in the "original" direction connection matching object. |
| * Note that the transmit MAC address is "dest_mac_xlate" because |
| * we always know both ends of a connection by their translated |
| * addresses and not their public addresses. |
| */ |
| original_cm->match_dev = src_dev; |
| original_cm->match_protocol = tuple->protocol; |
| original_cm->match_src_ip = tuple->flow_ip; |
| original_cm->match_src_port = netif_is_vxlan(src_dev) ? 0 : tuple->flow_ident; |
| original_cm->match_dest_ip = tuple->return_ip; |
| original_cm->match_dest_port = tuple->return_ident; |
| |
| original_cm->xlate_src_ip = msg->conn_rule.flow_ip_xlate; |
| original_cm->xlate_src_port = msg->conn_rule.flow_ident_xlate; |
| original_cm->xlate_dest_ip = msg->conn_rule.return_ip_xlate; |
| original_cm->xlate_dest_port = msg->conn_rule.return_ident_xlate; |
| |
| if (tuple->protocol == IPPROTO_GRE) { |
| /* |
| * the PPTP is 4 tuple lookup. |
| * During th rule lookup destination call id from packet |
| * is matched against destination port in cm. |
| */ |
| original_cm->match_src_port = 0; |
| original_cm->xlate_src_port = 0; |
| } |
| |
| original_cm->xmit_dev = dest_dev; |
| original_cm->xmit_dev_mtu = msg->conn_rule.return_mtu; |
| |
| original_cm->connection = c; |
| original_cm->counter_match = reply_cm; |
| |
| /* |
| * UDP Socket is valid only in decap direction. |
| */ |
| RCU_INIT_POINTER(original_cm->up, NULL); |
| |
| if (msg->valid_flags & SFE_RULE_CREATE_MARK_VALID) { |
| original_cm->mark = msg->mark_rule.flow_mark; |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_MARK; |
| } |
| if (msg->valid_flags & SFE_RULE_CREATE_QOS_VALID) { |
| original_cm->priority = msg->qos_rule.flow_qos_tag; |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; |
| } |
| if (msg->valid_flags & SFE_RULE_CREATE_DSCP_MARKING_VALID) { |
| original_cm->dscp = msg->dscp_rule.flow_dscp << SFE_IPV4_DSCP_SHIFT; |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK; |
| } |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_BRIDGE_FLOW) { |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_BRIDGE_FLOW; |
| } |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_FLOW_TRANSMIT_FAST) { |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_XMIT_DEV_ADMISSION; |
| } |
| |
| /* |
| * Add VLAN rule to original_cm |
| */ |
| if (msg->valid_flags & SFE_RULE_CREATE_VLAN_VALID) { |
| struct sfe_vlan_rule *vlan_primary_rule = &msg->vlan_primary_rule; |
| struct sfe_vlan_rule *vlan_secondary_rule = &msg->vlan_secondary_rule; |
| sfe_ipv4_match_entry_set_vlan(original_cm, |
| vlan_primary_rule->ingress_vlan_tag, |
| vlan_primary_rule->egress_vlan_tag, |
| vlan_secondary_rule->ingress_vlan_tag, |
| vlan_secondary_rule->egress_vlan_tag); |
| |
| if ((msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_RETURN_BOTTOM_INTERFACE) && |
| original_cm->egress_vlan_hdr_cnt > 0) { |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_INSERT_EGRESS_VLAN_TAG; |
| original_cm->l2_hdr_size += original_cm->egress_vlan_hdr_cnt * VLAN_HLEN; |
| } |
| } |
| |
| if ((IPPROTO_GRE == tuple->protocol) && !sfe_ipv4_is_local_ip(si, original_cm->match_dest_ip)) { |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PASSTHROUGH; |
| } |
| |
| #ifdef CONFIG_NF_FLOW_COOKIE |
| original_cm->flow_cookie = 0; |
| #endif |
| #ifdef CONFIG_XFRM |
| if (msg->valid_flags & SFE_RULE_CREATE_DIRECTION_VALID) { |
| original_cm->flow_accel = msg->direction_rule.flow_accel; |
| } else { |
| original_cm->flow_accel = 1; |
| } |
| #endif |
| /* |
| * If l2_features are disabled and flow uses l2 features such as macvlan/bridge/pppoe/vlan, |
| * bottom interfaces are expected to be disabled in the flow rule and always top interfaces |
| * are used. In such cases, do not use HW csum offload. csum offload is used only when we |
| * are sending directly to the destination interface that supports it. |
| */ |
| if (likely(dest_dev->features & NETIF_F_HW_CSUM) && sfe_dev_has_hw_csum(dest_dev)) { |
| if ((msg->conn_rule.return_top_interface_num == msg->conn_rule.return_interface_num) || |
| (msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_RETURN_BOTTOM_INTERFACE)) { |
| |
| /* |
| * Dont enable CSUM offload |
| */ |
| #if 0 |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD; |
| #endif |
| } |
| } |
| |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_FLOW_SRC_INTERFACE_CHECK) { |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_SRC_INTERFACE_CHECK; |
| } |
| |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_FLOW_SRC_INTERFACE_CHECK_NO_FLUSH) { |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_SRC_INTERFACE_CHECK_NO_FLUSH; |
| } |
| |
| /* |
| * Adding PPPoE parameters to original and reply entries based on the direction where |
| * PPPoE header is valid in ECM rule. |
| * |
| * If PPPoE is valid in flow direction (from interface is PPPoE), then |
| * original cm will have PPPoE at ingress (strip PPPoE header) |
| * reply cm will have PPPoE at egress (add PPPoE header) |
| * |
| * If PPPoE is valid in return direction (to interface is PPPoE), then |
| * original cm will have PPPoE at egress (add PPPoE header) |
| * reply cm will have PPPoE at ingress (strip PPPoE header) |
| */ |
| if (msg->valid_flags & SFE_RULE_CREATE_PPPOE_DECAP_VALID) { |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_DECAP; |
| original_cm->pppoe_session_id = msg->pppoe_rule.flow_pppoe_session_id; |
| ether_addr_copy(original_cm->pppoe_remote_mac, msg->pppoe_rule.flow_pppoe_remote_mac); |
| |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_ENCAP; |
| reply_cm->l2_hdr_size += SFE_PPPOE_SESSION_HEADER_SIZE; |
| reply_cm->pppoe_session_id = msg->pppoe_rule.flow_pppoe_session_id; |
| ether_addr_copy(reply_cm->pppoe_remote_mac, msg->pppoe_rule.flow_pppoe_remote_mac); |
| } |
| |
| if (msg->valid_flags & SFE_RULE_CREATE_PPPOE_ENCAP_VALID) { |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_ENCAP; |
| original_cm->l2_hdr_size += SFE_PPPOE_SESSION_HEADER_SIZE; |
| original_cm->pppoe_session_id = msg->pppoe_rule.return_pppoe_session_id; |
| ether_addr_copy(original_cm->pppoe_remote_mac, msg->pppoe_rule.return_pppoe_remote_mac); |
| |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_DECAP; |
| reply_cm->pppoe_session_id = msg->pppoe_rule.return_pppoe_session_id; |
| ether_addr_copy(reply_cm->pppoe_remote_mac, msg->pppoe_rule.return_pppoe_remote_mac); |
| } |
| |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_RETURN_SRC_INTERFACE_CHECK) { |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_SRC_INTERFACE_CHECK; |
| } |
| |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_RETURN_SRC_INTERFACE_CHECK_NO_FLUSH) { |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_SRC_INTERFACE_CHECK_NO_FLUSH; |
| } |
| |
| /* |
| * For the non-arp interface, we don't write L2 HDR. |
| */ |
| if (sfe_ipv4_xmit_eth_type_check(dest_dev, original_cm->flags)) { |
| |
| /* |
| * Check whether the rule has configured a specific source MAC address to use. |
| * This is needed when virtual L3 interfaces such as br-lan, macvlan, vlan are used during egress |
| */ |
| |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_BRIDGE_FLOW) { |
| ether_addr_copy((u8 *)original_cm->xmit_src_mac, (u8 *)msg->conn_rule.flow_mac); |
| } else { |
| if ((msg->valid_flags & SFE_RULE_CREATE_SRC_MAC_VALID) && |
| (msg->src_mac_rule.mac_valid_flags & SFE_SRC_MAC_RETURN_VALID)) { |
| ether_addr_copy((u8 *)original_cm->xmit_src_mac, (u8 *)msg->src_mac_rule.return_src_mac); |
| } else { |
| ether_addr_copy((u8 *)original_cm->xmit_src_mac, (u8 *)dest_dev->dev_addr); |
| } |
| } |
| |
| ether_addr_copy((u8 *)original_cm->xmit_dest_mac, (u8 *)msg->conn_rule.return_mac); |
| |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; |
| original_cm->l2_hdr_size += ETH_HLEN; |
| |
| /* |
| * If our dev writes Ethernet headers then we can write a really fast |
| * version. |
| */ |
| if (dest_dev->header_ops) { |
| if (dest_dev->header_ops->create == eth_header) { |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; |
| } |
| } |
| } |
| |
| /* |
| * Fill in the "reply" direction connection matching object. |
| */ |
| reply_cm->match_dev = dest_dev; |
| reply_cm->match_protocol = tuple->protocol; |
| reply_cm->match_src_ip = msg->conn_rule.return_ip_xlate; |
| |
| /* |
| * Keep source port as 0 for VxLAN tunnels. |
| */ |
| if (netif_is_vxlan(src_dev) || netif_is_vxlan(dest_dev)) { |
| reply_cm->match_src_port = 0; |
| } else { |
| reply_cm->match_src_port = msg->conn_rule.return_ident_xlate; |
| } |
| |
| reply_cm->match_dest_ip = msg->conn_rule.flow_ip_xlate; |
| reply_cm->match_dest_port = msg->conn_rule.flow_ident_xlate; |
| |
| reply_cm->xlate_src_ip = tuple->return_ip; |
| reply_cm->xlate_src_port = tuple->return_ident; |
| reply_cm->xlate_dest_ip = tuple->flow_ip; |
| reply_cm->xlate_dest_port = tuple->flow_ident; |
| |
| if (tuple->protocol == IPPROTO_GRE) { |
| /* |
| * the PPTP is 4 tuple lookup. |
| * During th rule lookup destination call id from packet |
| * is matched against destination port in cm. |
| */ |
| reply_cm->match_src_port = 0; |
| reply_cm->xlate_src_port = 0; |
| } |
| |
| reply_cm->xmit_dev = src_dev; |
| reply_cm->xmit_dev_mtu = msg->conn_rule.flow_mtu; |
| |
| reply_cm->connection = c; |
| reply_cm->counter_match = original_cm; |
| |
| if (msg->valid_flags & SFE_RULE_CREATE_MARK_VALID) { |
| reply_cm->mark = msg->mark_rule.return_mark; |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_MARK; |
| } |
| if (msg->valid_flags & SFE_RULE_CREATE_QOS_VALID) { |
| reply_cm->priority = msg->qos_rule.return_qos_tag; |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; |
| } |
| |
| if (msg->valid_flags & SFE_RULE_CREATE_DSCP_MARKING_VALID) { |
| reply_cm->dscp = msg->dscp_rule.return_dscp << SFE_IPV4_DSCP_SHIFT; |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK; |
| } |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_BRIDGE_FLOW) { |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_BRIDGE_FLOW; |
| } |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_RETURN_TRANSMIT_FAST) { |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_XMIT_DEV_ADMISSION; |
| } |
| |
| if ((IPPROTO_GRE == tuple->protocol) && !sfe_ipv4_is_local_ip(si, reply_cm->match_dest_ip)) { |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PASSTHROUGH; |
| } |
| |
| /* |
| * Setup UDP Socket if found to be valid for decap. |
| */ |
| RCU_INIT_POINTER(reply_cm->up, NULL); |
| net = dev_net(reply_cm->match_dev); |
| src_if_idx = src_dev->ifindex; |
| |
| rcu_read_lock(); |
| |
| /* |
| * Look for the associated sock object. |
| * __udp4_lib_lookup() holds a reference for this sock object, |
| * which will be released in sfe_ipv4_free_connection_rcu() |
| */ |
| #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) |
| sk = __udp4_lib_lookup(net, reply_cm->match_dest_ip, reply_cm->match_dest_port, |
| reply_cm->xlate_src_ip, reply_cm->xlate_src_port, src_if_idx, &udp_table); |
| #else |
| sk = __udp4_lib_lookup(net, reply_cm->match_dest_ip, reply_cm->match_dest_port, |
| reply_cm->xlate_src_ip, reply_cm->xlate_src_port, src_if_idx, 0, &udp_table, NULL); |
| #endif |
| |
| rcu_read_unlock(); |
| |
| /* |
| * We set the UDP sock pointer as valid only for decap direction. |
| */ |
| if (sk && udp_sk(sk)->encap_type) { |
| #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) |
| if (!atomic_add_unless(&sk->sk_refcnt, 1, 0)) { |
| #else |
| if (!refcount_inc_not_zero(&sk->sk_refcnt)) { |
| #endif |
| spin_unlock_bh(&si->lock); |
| kfree(reply_cm); |
| kfree(original_cm); |
| kfree(c); |
| |
| DEBUG_TRACE("%px: sfe: unable to take reference for socket(%px) p:%d\n" |
| " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n", |
| msg, sk, tuple->protocol, |
| src_dev->name, msg->conn_rule.flow_mac, &tuple->flow_ip, ntohs(tuple->flow_ident), |
| dest_dev->name, msg->conn_rule.return_mac, &tuple->return_ip, ntohs(tuple->return_ident)); |
| |
| dev_put(src_dev); |
| dev_put(dest_dev); |
| |
| return -ESHUTDOWN; |
| } |
| |
| rcu_assign_pointer(reply_cm->up, udp_sk(sk)); |
| |
| DEBUG_INFO("%px: Sock(%px) lookup success with reply_cm direction\n", msg, sk); |
| DEBUG_INFO("%px: SFE connection -\n" |
| " s: %s:%pI4(%pI4):%u(%u)\n" |
| " d: %s:%pI4(%pI4):%u(%u)\n", |
| msg, reply_cm->match_dev->name, &reply_cm->match_src_ip, &reply_cm->xlate_src_ip, |
| ntohs(reply_cm->match_src_port), ntohs(reply_cm->xlate_src_port), |
| reply_cm->xmit_dev->name, &reply_cm->match_dest_ip, &reply_cm->xlate_dest_ip, |
| ntohs(reply_cm->match_dest_port), ntohs(reply_cm->xlate_dest_port)); |
| } |
| |
| /* |
| * Add VLAN rule to reply_cm |
| */ |
| if (msg->valid_flags & SFE_RULE_CREATE_VLAN_VALID) { |
| struct sfe_vlan_rule *vlan_primary_rule = &msg->vlan_primary_rule; |
| struct sfe_vlan_rule *vlan_secondary_rule = &msg->vlan_secondary_rule; |
| sfe_ipv4_match_entry_set_vlan(reply_cm, |
| vlan_primary_rule->egress_vlan_tag, |
| vlan_primary_rule->ingress_vlan_tag, |
| vlan_secondary_rule->egress_vlan_tag, |
| vlan_secondary_rule->ingress_vlan_tag); |
| |
| if ((msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_FLOW_BOTTOM_INTERFACE) && |
| reply_cm->egress_vlan_hdr_cnt > 0) { |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_INSERT_EGRESS_VLAN_TAG; |
| reply_cm->l2_hdr_size += reply_cm->egress_vlan_hdr_cnt * VLAN_HLEN; |
| } |
| } |
| |
| /* |
| * the net_protocol handler will be used only in decap path |
| * for non passthrough case. |
| */ |
| original_cm->proto = NULL; |
| reply_cm->proto = NULL; |
| |
| #ifdef SFE_GRE_TUN_ENABLE |
| if ((IPPROTO_GRE == tuple->protocol) && !(reply_cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PASSTHROUGH)) { |
| rcu_read_lock(); |
| reply_cm->proto = rcu_dereference(inet_protos[IPPROTO_GRE]); |
| rcu_read_unlock(); |
| |
| if (unlikely(!reply_cm->proto)) { |
| kfree(reply_cm); |
| kfree(original_cm); |
| kfree(c); |
| dev_put(src_dev); |
| dev_put(dest_dev); |
| DEBUG_WARN("sfe: GRE proto handler is not registered\n"); |
| return -EPERM; |
| } |
| } |
| #endif |
| |
| #ifdef CONFIG_NF_FLOW_COOKIE |
| reply_cm->flow_cookie = 0; |
| #endif |
| #ifdef CONFIG_XFRM |
| if (msg->valid_flags & SFE_RULE_CREATE_DIRECTION_VALID) { |
| reply_cm->flow_accel = msg->direction_rule.return_accel; |
| } else { |
| reply_cm->flow_accel = 1; |
| } |
| |
| #endif |
| /* |
| * If l2_features are disabled and flow uses l2 features such as macvlan/bridge/pppoe/vlan, |
| * bottom interfaces are expected to be disabled in the flow rule and always top interfaces |
| * are used. In such cases, do not use HW csum offload. csum offload is used only when we |
| * are sending directly to the destination interface that supports it. |
| */ |
| if (likely(src_dev->features & NETIF_F_HW_CSUM) && sfe_dev_has_hw_csum(src_dev)) { |
| if ((msg->conn_rule.flow_top_interface_num == msg->conn_rule.flow_interface_num) || |
| (msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_FLOW_BOTTOM_INTERFACE)) { |
| /* |
| * Dont enable CSUM offload |
| */ |
| #if 0 |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD; |
| #endif |
| } |
| } |
| |
| /* |
| * For the non-arp interface, we don't write L2 HDR. |
| */ |
| if (sfe_ipv4_xmit_eth_type_check(src_dev, reply_cm->flags)) { |
| |
| /* |
| * Check whether the rule has configured a specific source MAC address to use. |
| * This is needed when virtual L3 interfaces such as br-lan, macvlan, vlan are used during egress |
| */ |
| |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_BRIDGE_FLOW) { |
| ether_addr_copy((u8 *)reply_cm->xmit_src_mac, (u8 *)msg->conn_rule.return_mac); |
| } else { |
| if ((msg->valid_flags & SFE_RULE_CREATE_SRC_MAC_VALID) && |
| (msg->src_mac_rule.mac_valid_flags & SFE_SRC_MAC_FLOW_VALID)) { |
| ether_addr_copy((u8 *)reply_cm->xmit_src_mac, (u8 *)msg->src_mac_rule.flow_src_mac); |
| } else { |
| ether_addr_copy((u8 *)reply_cm->xmit_src_mac, (u8 *)src_dev->dev_addr); |
| } |
| } |
| |
| ether_addr_copy((u8 *)reply_cm->xmit_dest_mac, (u8 *)msg->conn_rule.flow_mac); |
| |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; |
| reply_cm->l2_hdr_size += ETH_HLEN; |
| |
| /* |
| * If our dev writes Ethernet headers then we can write a really fast |
| * version. |
| */ |
| if (src_dev->header_ops) { |
| if (src_dev->header_ops->create == eth_header) { |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; |
| } |
| } |
| } |
| |
| if ((tuple->return_ip != msg->conn_rule.return_ip_xlate) || |
| (tuple->return_ident != msg->conn_rule.return_ident_xlate)) { |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST; |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC; |
| } |
| |
| if ((tuple->flow_ip != msg->conn_rule.flow_ip_xlate) || |
| (tuple->flow_ident != msg->conn_rule.flow_ident_xlate)) { |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC; |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST; |
| } |
| |
| /* |
| * Initialize the protocol-specific information that we track. |
| */ |
| switch (tuple->protocol) { |
| case IPPROTO_TCP: |
| original_cm->protocol_state.tcp.win_scale = msg->tcp_rule.flow_window_scale; |
| original_cm->protocol_state.tcp.max_win = msg->tcp_rule.flow_max_window ? msg->tcp_rule.flow_max_window : 1; |
| original_cm->protocol_state.tcp.end = msg->tcp_rule.flow_end; |
| original_cm->protocol_state.tcp.max_end = msg->tcp_rule.flow_max_end; |
| |
| reply_cm->protocol_state.tcp.win_scale = msg->tcp_rule.return_window_scale; |
| reply_cm->protocol_state.tcp.max_win = msg->tcp_rule.return_max_window ? msg->tcp_rule.return_max_window : 1; |
| reply_cm->protocol_state.tcp.end = msg->tcp_rule.return_end; |
| reply_cm->protocol_state.tcp.max_end = msg->tcp_rule.return_max_end; |
| |
| if (msg->rule_flags & SFE_RULE_CREATE_FLAG_NO_SEQ_CHECK) { |
| original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; |
| reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; |
| } |
| break; |
| } |
| |
| /* |
| * Fill in the ipv4_connection object. |
| */ |
| c->protocol = tuple->protocol; |
| c->src_ip = tuple->flow_ip; |
| c->src_ip_xlate = msg->conn_rule.flow_ip_xlate; |
| c->src_port = tuple->flow_ident; |
| c->src_port_xlate = msg->conn_rule.flow_ident_xlate; |
| c->original_dev = src_dev; |
| c->original_match = original_cm; |
| c->dest_ip = tuple->return_ip; |
| c->dest_ip_xlate = msg->conn_rule.return_ip_xlate; |
| c->dest_port = tuple->return_ident; |
| c->dest_port_xlate = msg->conn_rule.return_ident_xlate; |
| c->reply_dev = dest_dev; |
| c->reply_match = reply_cm; |
| c->debug_read_seq = 0; |
| c->last_sync_jiffies = get_jiffies_64(); |
| c->removed = false; |
| |
| sfe_ipv4_connection_match_compute_translations(original_cm); |
| sfe_ipv4_connection_match_compute_translations(reply_cm); |
| sfe_ipv4_insert_connection(si, c); |
| |
| spin_unlock_bh(&si->lock); |
| |
| /* |
| * We have everything we need! |
| */ |
| DEBUG_INFO("NEW connection - p: %d\n" |
| "original_cm: match_dev=src_dev: %s %d %pM\n" |
| " xmit_dev=dest_dev: %s %d %pM\n" |
| " xmit_src_mac: %pM\n" |
| " xmit_dest_mac: %pM\n" |
| " flags: %x l2_hdr: %u\n" |
| "flow_ip: %pI4:%u\n" |
| "flow_ip_xlate: %pI4:%u\n" |
| "flow_mac: %pM\n" |
| "reply_cm: match_dev=dest_dev: %s %d %pM\n" |
| " xmit_dev=src_dev: %s %d %pM\n" |
| " xmit_src_mac: %pM\n" |
| " xmit_dest_mac: %pM\n" |
| " flags: %x l2_hdr: %u\n" |
| "return_ip: %pI4:%u\n" |
| "return_ip_xlate: %pI4:%u\n" |
| "return_mac: %pM\n" |
| "flags: valid=%x src_mac_valid=%x\n", |
| tuple->protocol, |
| original_cm->match_dev->name, original_cm->match_dev->ifindex, original_cm->match_dev->dev_addr, |
| original_cm->xmit_dev->name, original_cm->xmit_dev->ifindex, original_cm->xmit_dev->dev_addr, |
| original_cm->xmit_src_mac, original_cm->xmit_dest_mac, original_cm->flags, original_cm->l2_hdr_size, |
| &tuple->flow_ip, ntohs(tuple->flow_ident), |
| &msg->conn_rule.flow_ip_xlate, ntohs(msg->conn_rule.flow_ident_xlate), |
| msg->conn_rule.flow_mac, |
| reply_cm->match_dev->name, reply_cm->match_dev->ifindex, reply_cm->match_dev->dev_addr, |
| reply_cm->xmit_dev->name, reply_cm->xmit_dev->ifindex, reply_cm->xmit_dev->dev_addr, |
| reply_cm->xmit_src_mac, reply_cm->xmit_dest_mac, reply_cm->flags, reply_cm->l2_hdr_size, |
| &tuple->return_ip, ntohs(tuple->return_ident), |
| &msg->conn_rule.return_ip_xlate, ntohs(msg->conn_rule.return_ident_xlate), |
| msg->conn_rule.return_mac, |
| msg->valid_flags, msg->src_mac_rule.mac_valid_flags); |
| |
| return 0; |
| } |
| |
| /* |
| * sfe_ipv4_destroy_rule() |
| * Destroy a forwarding rule. |
| */ |
| void sfe_ipv4_destroy_rule(struct sfe_ipv4_rule_destroy_msg *msg) |
| { |
| struct sfe_ipv4 *si = &__si; |
| struct sfe_ipv4_connection *c; |
| bool ret; |
| struct sfe_ipv4_5tuple *tuple = &msg->tuple; |
| |
| this_cpu_inc(si->stats_pcpu->connection_destroy_requests64); |
| spin_lock_bh(&si->lock); |
| |
| /* |
| * Check to see if we have a flow that matches the rule we're trying |
| * to destroy. If there isn't then we can't destroy it. |
| */ |
| c = sfe_ipv4_find_connection(si, tuple->protocol, tuple->flow_ip, tuple->flow_ident, |
| tuple->return_ip, tuple->return_ident); |
| if (!c) { |
| spin_unlock_bh(&si->lock); |
| this_cpu_inc(si->stats_pcpu->connection_destroy_misses64); |
| |
| DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n", |
| tuple->protocol, &tuple->flow_ip, ntohs(tuple->flow_ident), |
| &tuple->return_ip, ntohs(tuple->return_ident)); |
| return; |
| } |
| |
| /* |
| * Remove our connection details from the hash tables. |
| */ |
| ret = sfe_ipv4_remove_connection(si, c); |
| spin_unlock_bh(&si->lock); |
| |
| if (ret) { |
| sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_DESTROY); |
| } |
| |
| DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n", |
| tuple->protocol, &tuple->flow_ip, ntohs(tuple->flow_ident), |
| &tuple->return_ip, ntohs(tuple->return_ident)); |
| } |
| |
| /* |
| * sfe_ipv4_register_sync_rule_callback() |
| * Register a callback for rule synchronization. |
| */ |
| void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback) |
| { |
| struct sfe_ipv4 *si = &__si; |
| |
| spin_lock_bh(&si->lock); |
| rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback); |
| spin_unlock_bh(&si->lock); |
| } |
| |
| /* |
| * sfe_ipv4_get_debug_dev() |
| */ |
| static ssize_t sfe_ipv4_get_debug_dev(struct device *dev, |
| struct device_attribute *attr, |
| char *buf) |
| { |
| struct sfe_ipv4 *si = &__si; |
| ssize_t count; |
| int num; |
| |
| spin_lock_bh(&si->lock); |
| num = si->debug_dev; |
| spin_unlock_bh(&si->lock); |
| |
| count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num); |
| return count; |
| } |
| |
| /* |
| * sysfs attributes. |
| */ |
| static const struct device_attribute sfe_ipv4_debug_dev_attr = |
| __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv4_get_debug_dev, NULL); |
| |
| /* |
| * sfe_ipv4_destroy_all_rules_for_dev() |
| * Destroy all connections that match a particular device. |
| * |
| * If we pass dev as NULL then this destroys all connections. |
| */ |
| void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev) |
| { |
| struct sfe_ipv4 *si = &__si; |
| struct sfe_ipv4_connection *c; |
| bool ret; |
| |
| another_round: |
| spin_lock_bh(&si->lock); |
| |
| for (c = si->all_connections_head; c; c = c->all_connections_next) { |
| /* |
| * Does this connection relate to the device we are destroying? |
| */ |
| if (!dev |
| || (dev == c->original_dev) |
| || (dev == c->reply_dev)) { |
| break; |
| } |
| } |
| |
| if (c) { |
| ret = sfe_ipv4_remove_connection(si, c); |
| } |
| |
| spin_unlock_bh(&si->lock); |
| |
| if (c) { |
| if (ret) { |
| sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_DESTROY); |
| } |
| goto another_round; |
| } |
| } |
| |
| /* |
| * sfe_ipv4_periodic_sync() |
| */ |
| static void sfe_ipv4_periodic_sync(struct work_struct *work) |
| { |
| struct sfe_ipv4 *si = container_of((struct delayed_work *)work, struct sfe_ipv4, sync_dwork); |
| u64 now_jiffies; |
| int quota; |
| sfe_sync_rule_callback_t sync_rule_callback; |
| struct sfe_ipv4_connection *c; |
| |
| now_jiffies = get_jiffies_64(); |
| |
| rcu_read_lock(); |
| sync_rule_callback = rcu_dereference(si->sync_rule_callback); |
| if (!sync_rule_callback) { |
| rcu_read_unlock(); |
| goto done; |
| } |
| |
| spin_lock_bh(&si->lock); |
| |
| /* |
| * If we have reached the end of the connection list, walk from |
| * the connection head. |
| */ |
| c = si->wc_next; |
| if (unlikely(!c)) { |
| c = si->all_connections_head; |
| } |
| |
| /* |
| * Get an estimate of the number of connections to parse in this sync. |
| */ |
| quota = (si->num_connections + 63) / 64; |
| |
| /* |
| * Walk the "all connection" list and sync the connection state. |
| */ |
| while (likely(c && quota)) { |
| struct sfe_ipv4_connection_match *cm; |
| struct sfe_ipv4_connection_match *counter_cm; |
| struct sfe_connection_sync sis; |
| |
| cm = c->original_match; |
| counter_cm = c->reply_match; |
| |
| /* |
| * Didn't receive packets in the original direction or reply |
| * direction, move to the next connection. |
| */ |
| if ((!atomic_read(&cm->rx_packet_count)) && !(atomic_read(&counter_cm->rx_packet_count))) { |
| c = c->all_connections_next; |
| continue; |
| } |
| |
| quota--; |
| |
| sfe_ipv4_gen_sync_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies); |
| |
| si->wc_next = c->all_connections_next; |
| |
| /* |
| * We don't want to be holding the lock when we sync! |
| */ |
| spin_unlock_bh(&si->lock); |
| sync_rule_callback(&sis); |
| spin_lock_bh(&si->lock); |
| |
| /* |
| * c must be set and used in the same lock/unlock window; |
| * because c could be removed when we don't hold the lock, |
| * so delay grabbing until after the callback and relock. |
| */ |
| c = si->wc_next; |
| } |
| |
| /* |
| * At the end of the sync, put the wc_next to the connection we left. |
| */ |
| si->wc_next = c; |
| |
| spin_unlock_bh(&si->lock); |
| rcu_read_unlock(); |
| |
| done: |
| schedule_delayed_work_on(si->work_cpu, (struct delayed_work *)work, ((HZ + 99) / 100)); |
| } |
| |
| #define CHAR_DEV_MSG_SIZE 768 |
| |
| /* |
| * sfe_ipv4_debug_dev_read_start() |
| * Generate part of the XML output. |
| */ |
| static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, |
| int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) |
| { |
| int bytes_read; |
| |
| si->debug_read_seq++; |
| |
| bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n"); |
| if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { |
| return false; |
| } |
| |
| *length -= bytes_read; |
| *total_read += bytes_read; |
| |
| ws->state++; |
| return true; |
| } |
| |
| /* |
| * sfe_ipv4_debug_dev_read_connections_start() |
| * Generate part of the XML output. |
| */ |
| static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, |
| int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) |
| { |
| int bytes_read; |
| |
| bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n"); |
| if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { |
| return false; |
| } |
| |
| *length -= bytes_read; |
| *total_read += bytes_read; |
| |
| ws->state++; |
| return true; |
| } |
| |
| /* |
| * sfe_ipv4_debug_dev_read_connections_connection() |
| * Generate part of the XML output. |
| */ |
| static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, |
| int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) |
| { |
| struct sfe_ipv4_connection *c; |
| struct sfe_ipv4_connection_match *original_cm; |
| struct sfe_ipv4_connection_match *reply_cm; |
| int bytes_read; |
| int protocol; |
| struct net_device *src_dev; |
| __be32 src_ip; |
| __be32 src_ip_xlate; |
| __be16 src_port; |
| __be16 src_port_xlate; |
| u64 src_rx_packets; |
| u64 src_rx_bytes; |
| struct net_device *dest_dev; |
| __be32 dest_ip; |
| __be32 dest_ip_xlate; |
| __be16 dest_port; |
| __be16 dest_port_xlate; |
| u64 dest_rx_packets; |
| u64 dest_rx_bytes; |
| u64 last_sync_jiffies; |
| u32 src_mark, dest_mark, src_priority, dest_priority, src_dscp, dest_dscp; |
| u32 packet, byte, original_cm_flags; |
| u16 pppoe_session_id; |
| u8 pppoe_remote_mac[ETH_ALEN]; |
| u32 original_fast_xmit, reply_fast_xmit; |
| #ifdef CONFIG_NF_FLOW_COOKIE |
| int src_flow_cookie, dst_flow_cookie; |
| #endif |
| |
| spin_lock_bh(&si->lock); |
| |
| for (c = si->all_connections_head; c; c = c->all_connections_next) { |
| if (c->debug_read_seq < si->debug_read_seq) { |
| c->debug_read_seq = si->debug_read_seq; |
| break; |
| } |
| } |
| |
| /* |
| * If there were no connections then move to the next state. |
| */ |
| if (!c || c->removed) { |
| spin_unlock_bh(&si->lock); |
| ws->state++; |
| return true; |
| } |
| |
| original_cm = c->original_match; |
| reply_cm = c->reply_match; |
| |
| protocol = c->protocol; |
| src_dev = c->original_dev; |
| src_ip = c->src_ip; |
| src_ip_xlate = c->src_ip_xlate; |
| src_port = c->src_port; |
| src_port_xlate = c->src_port_xlate; |
| src_priority = original_cm->priority; |
| src_dscp = original_cm->dscp >> SFE_IPV4_DSCP_SHIFT; |
| |
| sfe_ipv4_connection_match_update_summary_stats(original_cm, &packet, &byte); |
| sfe_ipv4_connection_match_update_summary_stats(reply_cm, &packet, &byte); |
| |
| src_rx_packets = original_cm->rx_packet_count64; |
| src_rx_bytes = original_cm->rx_byte_count64; |
| src_mark = original_cm->mark; |
| original_fast_xmit = (original_cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_XMIT); |
| dest_dev = c->reply_dev; |
| dest_ip = c->dest_ip; |
| dest_ip_xlate = c->dest_ip_xlate; |
| dest_port = c->dest_port; |
| dest_port_xlate = c->dest_port_xlate; |
| dest_priority = reply_cm->priority; |
| dest_dscp = reply_cm->dscp >> SFE_IPV4_DSCP_SHIFT; |
| dest_rx_packets = reply_cm->rx_packet_count64; |
| dest_rx_bytes = reply_cm->rx_byte_count64; |
| dest_mark = reply_cm->mark; |
| reply_fast_xmit = (reply_cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_XMIT); |
| last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies; |
| original_cm_flags = original_cm->flags; |
| pppoe_session_id = original_cm->pppoe_session_id; |
| ether_addr_copy(pppoe_remote_mac, original_cm->pppoe_remote_mac); |
| |
| #ifdef CONFIG_NF_FLOW_COOKIE |
| src_flow_cookie = original_cm->flow_cookie; |
| dst_flow_cookie = reply_cm->flow_cookie; |
| #endif |
| spin_unlock_bh(&si->lock); |
| |
| bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection " |
| "protocol=\"%u\" " |
| "src_dev=\"%s\" " |
| "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" " |
| "src_port=\"%u\" src_port_xlate=\"%u\" " |
| "src_priority=\"%u\" src_dscp=\"%u\" " |
| "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" " |
| "src_mark=\"%08x\" " |
| "src_fast_xmit=\"%s\" " |
| "dest_dev=\"%s\" " |
| "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" " |
| "dest_port=\"%u\" dest_port_xlate=\"%u\" " |
| "dest_priority=\"%u\" dest_dscp=\"%u\" " |
| "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" " |
| "dest_mark=\"%08x\" " |
| "reply_fast_xmit=\"%s\" " |
| #ifdef CONFIG_NF_FLOW_COOKIE |
| "src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" " |
| #endif |
| "last_sync=\"%llu\" ", |
| protocol, |
| src_dev->name, |
| &src_ip, &src_ip_xlate, |
| ntohs(src_port), ntohs(src_port_xlate), |
| src_priority, src_dscp, |
| src_rx_packets, src_rx_bytes, |
| src_mark, |
| original_fast_xmit ? "Yes" : "No", |
| dest_dev->name, |
| &dest_ip, &dest_ip_xlate, |
| ntohs(dest_port), ntohs(dest_port_xlate), |
| dest_priority, dest_dscp, |
| dest_rx_packets, dest_rx_bytes, |
| dest_mark, |
| reply_fast_xmit ? "Yes" : "No", |
| #ifdef CONFIG_NF_FLOW_COOKIE |
| src_flow_cookie, dst_flow_cookie, |
| #endif |
| last_sync_jiffies); |
| |
| if (original_cm_flags &= (SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_DECAP | SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_ENCAP)) { |
| bytes_read += snprintf(msg + bytes_read, CHAR_DEV_MSG_SIZE, "pppoe_session_id=\"%u\" pppoe_server MAC=\"%pM\" ", |
| pppoe_session_id, pppoe_remote_mac); |
| } |
| |
| bytes_read += snprintf(msg + bytes_read, CHAR_DEV_MSG_SIZE, "/>\n"); |
| |
| if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { |
| return false; |
| } |
| |
| *length -= bytes_read; |
| *total_read += bytes_read; |
| |
| return true; |
| } |
| |
| /* |
| * sfe_ipv4_debug_dev_read_connections_end() |
| * Generate part of the XML output. |
| */ |
| static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, |
| int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) |
| { |
| int bytes_read; |
| |
| bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n"); |
| if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { |
| return false; |
| } |
| |
| *length -= bytes_read; |
| *total_read += bytes_read; |
| |
| ws->state++; |
| return true; |
| } |
| |
| /* |
| * sfe_ipv4_debug_dev_read_exceptions_start() |
| * Generate part of the XML output. |
| */ |
| static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, |
| int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) |
| { |
| int bytes_read; |
| |
| bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n"); |
| if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { |
| return false; |
| } |
| |
| *length -= bytes_read; |
| *total_read += bytes_read; |
| |
| ws->state++; |
| return true; |
| } |
| |
| /* |
| * sfe_ipv4_debug_dev_read_exceptions_exception() |
| * Generate part of the XML output. |
| */ |
| static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, |
| int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) |
| { |
| int i; |
| u64 val = 0; |
| |
| for_each_possible_cpu(i) { |
| const struct sfe_ipv4_stats *s = per_cpu_ptr(si->stats_pcpu, i); |
| val += s->exception_events64[ws->iter_exception]; |
| } |
| |
| if (val) { |
| int bytes_read; |
| |
| bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, |
| "\t\t<exception name=\"%s\" count=\"%llu\" />\n", |
| sfe_ipv4_exception_events_string[ws->iter_exception], |
| val); |
| if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { |
| return false; |
| } |
| |
| *length -= bytes_read; |
| *total_read += bytes_read; |
| } |
| |
| ws->iter_exception++; |
| if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) { |
| ws->iter_exception = 0; |
| ws->state++; |
| } |
| |
| return true; |
| } |
| |
| /* |
| * sfe_ipv4_debug_dev_read_exceptions_end() |
| * Generate part of the XML output. |
| */ |
| static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, |
| int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) |
| { |
| int bytes_read; |
| |
| bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n"); |
| if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { |
| return false; |
| } |
| |
| *length -= bytes_read; |
| *total_read += bytes_read; |
| |
| ws->state++; |
| return true; |
| } |
| |
| /* |
| * sfe_ipv4_debug_dev_read_stats() |
| * Generate part of the XML output. |
| */ |
| static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, |
| int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) |
| { |
| int bytes_read; |
| struct sfe_ipv4_stats stats; |
| unsigned int num_conn; |
| |
| sfe_ipv4_update_summary_stats(si, &stats); |
| |
| spin_lock_bh(&si->lock); |
| num_conn = si->num_connections; |
| spin_unlock_bh(&si->lock); |
| |
| bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats " |
| "num_connections=\"%u\" " |
| "pkts_dropped=\"%llu\" " |
| "pkts_fast_xmited=\"%llu\" " |
| "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" " |
| "create_requests=\"%llu\" create_collisions=\"%llu\" " |
| "create_failures=\"%llu\" " |
| "destroy_requests=\"%llu\" destroy_misses=\"%llu\" " |
| "flushes=\"%llu\" " |
| "hash_hits=\"%llu\" hash_reorders=\"%llu\" " |
| "pppoe_encap_pkts_fwded=\"%llu\" " |
| "pppoe_decap_pkts_fwded=\"%llu\" " |
| "pppoe_bridge_pkts_fwded=\"%llu\" />\n", |
| num_conn, |
| stats.packets_dropped64, |
| stats.packets_fast_xmited64, |
| stats.packets_forwarded64, |
| stats.packets_not_forwarded64, |
| stats.connection_create_requests64, |
| stats.connection_create_collisions64, |
| stats.connection_create_failures64, |
| stats.connection_destroy_requests64, |
| stats.connection_destroy_misses64, |
| stats.connection_flushes64, |
| stats.connection_match_hash_hits64, |
| stats.connection_match_hash_reorders64, |
| stats.pppoe_encap_packets_forwarded64, |
| stats.pppoe_decap_packets_forwarded64, |
| stats.pppoe_bridge_packets_forwarded64); |
| if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { |
| return false; |
| } |
| |
| *length -= bytes_read; |
| *total_read += bytes_read; |
| |
| ws->state++; |
| return true; |
| } |
| |
| /* |
| * sfe_ipv4_debug_dev_read_end() |
| * Generate part of the XML output. |
| */ |
| static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, |
| int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) |
| { |
| int bytes_read; |
| |
| bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n"); |
| if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { |
| return false; |
| } |
| |
| *length -= bytes_read; |
| *total_read += bytes_read; |
| |
| ws->state++; |
| return true; |
| } |
| |
| /* |
| * Array of write functions that write various XML elements that correspond to |
| * our XML output state machine. |
| */ |
| static sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = { |
| sfe_ipv4_debug_dev_read_start, |
| sfe_ipv4_debug_dev_read_connections_start, |
| sfe_ipv4_debug_dev_read_connections_connection, |
| sfe_ipv4_debug_dev_read_connections_end, |
| sfe_ipv4_debug_dev_read_exceptions_start, |
| sfe_ipv4_debug_dev_read_exceptions_exception, |
| sfe_ipv4_debug_dev_read_exceptions_end, |
| sfe_ipv4_debug_dev_read_stats, |
| sfe_ipv4_debug_dev_read_end, |
| }; |
| |
| /* |
| * sfe_ipv4_debug_dev_read() |
| * Send info to userspace upon read request from user |
| */ |
| static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset) |
| { |
| char msg[CHAR_DEV_MSG_SIZE]; |
| int total_read = 0; |
| struct sfe_ipv4_debug_xml_write_state *ws; |
| struct sfe_ipv4 *si = &__si; |
| |
| ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data; |
| while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) { |
| if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) { |
| continue; |
| } |
| } |
| |
| return total_read; |
| } |
| |
| /* |
| * sfe_ipv4_debug_dev_open() |
| */ |
| static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file) |
| { |
| struct sfe_ipv4_debug_xml_write_state *ws; |
| |
| ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data; |
| if (!ws) { |
| ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL); |
| if (!ws) { |
| return -ENOMEM; |
| } |
| |
| ws->state = SFE_IPV4_DEBUG_XML_STATE_START; |
| file->private_data = ws; |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * sfe_ipv4_debug_dev_release() |
| */ |
| static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file) |
| { |
| struct sfe_ipv4_debug_xml_write_state *ws; |
| |
| ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data; |
| if (ws) { |
| /* |
| * We've finished with our output so free the write state. |
| */ |
| kfree(ws); |
| file->private_data = NULL; |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * File operations used in the debug char device |
| */ |
| static struct file_operations sfe_ipv4_debug_dev_fops = { |
| .read = sfe_ipv4_debug_dev_read, |
| .open = sfe_ipv4_debug_dev_open, |
| .release = sfe_ipv4_debug_dev_release |
| }; |
| |
| #ifdef CONFIG_NF_FLOW_COOKIE |
| /* |
| * sfe_register_flow_cookie_cb |
| * register a function in SFE to let SFE use this function to configure flow cookie for a flow |
| * |
| * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE |
| * can use this function to configure flow cookie for a flow. |
| * return: 0, success; !=0, fail |
| */ |
| int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb) |
| { |
| struct sfe_ipv4 *si = &__si; |
| |
| BUG_ON(!cb); |
| |
| if (si->flow_cookie_set_func) { |
| return -1; |
| } |
| |
| rcu_assign_pointer(si->flow_cookie_set_func, cb); |
| return 0; |
| } |
| |
| /* |
| * sfe_unregister_flow_cookie_cb |
| * unregister function which is used to configure flow cookie for a flow |
| * |
| * return: 0, success; !=0, fail |
| */ |
| int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb) |
| { |
| struct sfe_ipv4 *si = &__si; |
| |
| RCU_INIT_POINTER(si->flow_cookie_set_func, NULL); |
| return 0; |
| } |
| |
| /* |
| * sfe_ipv4_get_flow_cookie() |
| */ |
| static ssize_t sfe_ipv4_get_flow_cookie(struct device *dev, |
| struct device_attribute *attr, |
| char *buf) |
| { |
| struct sfe_ipv4 *si = &__si; |
| return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable); |
| } |
| |
| /* |
| * sfe_ipv4_set_flow_cookie() |
| */ |
| static ssize_t sfe_ipv4_set_flow_cookie(struct device *dev, |
| struct device_attribute *attr, |
| const char *buf, size_t size) |
| { |
| struct sfe_ipv4 *si = &__si; |
| si->flow_cookie_enable = simple_strtol(buf, NULL, 0); |
| |
| return size; |
| } |
| |
| /* |
| * sysfs attributes. |
| */ |
| static const struct device_attribute sfe_ipv4_flow_cookie_attr = |
| __ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv4_get_flow_cookie, sfe_ipv4_set_flow_cookie); |
| #endif /*CONFIG_NF_FLOW_COOKIE*/ |
| |
| /* |
| * sfe_ipv4_get_cpu() |
| */ |
| static ssize_t sfe_ipv4_get_cpu(struct device *dev, |
| struct device_attribute *attr, |
| char *buf) |
| { |
| struct sfe_ipv4 *si = &__si; |
| return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->work_cpu); |
| } |
| |
| /* |
| * sfe_ipv4_set_cpu() |
| */ |
| static ssize_t sfe_ipv4_set_cpu(struct device *dev, |
| struct device_attribute *attr, |
| const char *buf, size_t size) |
| { |
| struct sfe_ipv4 *si = &__si; |
| int work_cpu; |
| work_cpu = simple_strtol(buf, NULL, 0); |
| if ((work_cpu >= 0) && (work_cpu <= NR_CPUS)) { |
| si->work_cpu = work_cpu; |
| } else { |
| dev_err(dev, "%s is not in valid range[0,%d]", buf, NR_CPUS); |
| } |
| return size; |
| } |
| |
| /* |
| * sysfs attributes. |
| */ |
| static const struct device_attribute sfe_ipv4_cpu_attr = |
| __ATTR(stats_work_cpu, S_IWUSR | S_IRUGO, sfe_ipv4_get_cpu, sfe_ipv4_set_cpu); |
| |
| /* |
| * DSCP rewrite table |
| */ |
| static ssize_t |
| sfe_ipv4_get_dscp_rewrite_mark_to_match(struct device *dev, |
| struct device_attribute *attr, |
| char *buf) |
| { |
| struct sfe_ipv4 *si = &__si; |
| return snprintf(buf, (ssize_t)PAGE_SIZE, "0x%x\n", |
| si->dscp_rewrite_mark_to_match); |
| } |
| |
| static ssize_t |
| sfe_ipv4_set_dscp_rewrite_mark_to_match(struct device *dev, |
| struct device_attribute *attr, |
| const char *buf, size_t size) |
| { |
| struct sfe_ipv4 *si = &__si; |
| int ret; |
| u32 mark_to_match; |
| |
| ret = kstrtou32(buf, 0, &mark_to_match); |
| if (ret) |
| return ret; |
| si->dscp_rewrite_mark_to_match = mark_to_match; |
| return size; |
| } |
| |
| static const struct device_attribute sfe_ipv4_dscp_rewrite_mark_to_match_attr = |
| __ATTR(dscp_rewrite_mark_to_match, S_IWUSR | S_IRUGO, |
| sfe_ipv4_get_dscp_rewrite_mark_to_match, |
| sfe_ipv4_set_dscp_rewrite_mark_to_match); |
| |
| static ssize_t |
| sfe_ipv4_get_dscp_rewrite_dscp_to_set(struct device *dev, |
| struct device_attribute *attr, |
| char *buf) |
| { |
| struct sfe_ipv4 *si = &__si; |
| return snprintf(buf, (ssize_t)PAGE_SIZE, "0x%x\n", |
| si->dscp_rewrite_dscp_to_set >> SFE_IPV4_DSCP_SHIFT); |
| } |
| |
| static ssize_t |
| sfe_ipv4_set_dscp_rewrite_dscp_to_set(struct device *dev, |
| struct device_attribute *attr, |
| const char *buf, size_t size) |
| { |
| struct sfe_ipv4 *si = &__si; |
| int ret; |
| u32 dscp_to_set; |
| |
| ret = kstrtou32(buf, 0, &dscp_to_set); |
| if (ret) |
| return ret; |
| si->dscp_rewrite_dscp_to_set = dscp_to_set << SFE_IPV4_DSCP_SHIFT; |
| return size; |
| } |
| |
| static const struct device_attribute sfe_ipv4_dscp_rewrite_dscp_to_set_attr = |
| __ATTR(dscp_rewrite_dscp_to_set, S_IWUSR | S_IRUGO, |
| sfe_ipv4_get_dscp_rewrite_dscp_to_set, |
| sfe_ipv4_set_dscp_rewrite_dscp_to_set); |
| |
| /* |
| * sfe_ipv4_conn_match_hash_init() |
| * Initialize conn match hash lists |
| */ |
| static void sfe_ipv4_conn_match_hash_init(struct sfe_ipv4 *si, int len) |
| { |
| struct hlist_head *hash_list = si->hlist_conn_match_hash_head; |
| int i; |
| |
| for (i = 0; i < len; i++) { |
| INIT_HLIST_HEAD(&hash_list[i]); |
| } |
| } |
| |
| #ifdef SFE_PROCESS_LOCAL_OUT |
| /* |
| * sfe_ipv4_local_out() |
| * Called for packets from ip_local_out() - post encapsulation & other packets |
| */ |
| static unsigned int sfe_ipv4_local_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs) |
| { |
| struct sfe_l2_info l2_info = {0}; |
| |
| DEBUG_TRACE("%px: sfe: sfe_ipv4_local_out hook called.\n", skb); |
| |
| if (likely(skb->skb_iif)) { |
| return sfe_ipv4_recv(skb->dev, skb, &l2_info, true) ? NF_STOLEN : NF_ACCEPT; |
| } |
| |
| return NF_ACCEPT; |
| } |
| |
| /* |
| * struct nf_hook_ops sfe_ipv4_ops_local_out[] |
| * Hooks into netfilter local out packet monitoring points. |
| */ |
| static struct nf_hook_ops sfe_ipv4_ops_local_out[] __read_mostly = { |
| |
| /* |
| * Local out routing hook is used to monitor packets. |
| */ |
| { |
| .hook = sfe_ipv4_local_out, |
| .pf = PF_INET, |
| .hooknum = NF_INET_LOCAL_OUT, |
| .priority = NF_IP_PRI_FIRST, |
| }, |
| }; |
| #endif |
| |
| /* |
| * sfe_ipv4_init() |
| */ |
| int sfe_ipv4_init(void) |
| { |
| struct sfe_ipv4 *si = &__si; |
| int result = -1; |
| |
| DEBUG_INFO("SFE IPv4 init\n"); |
| |
| sfe_ipv4_conn_match_hash_init(si, ARRAY_SIZE(si->hlist_conn_match_hash_head)); |
| |
| si->stats_pcpu = alloc_percpu_gfp(struct sfe_ipv4_stats, GFP_KERNEL | __GFP_ZERO); |
| if (!si->stats_pcpu) { |
| DEBUG_ERROR("failed to allocate stats memory for sfe_ipv4\n"); |
| goto exit0; |
| } |
| |
| /* |
| * Create sys/sfe_ipv4 |
| */ |
| si->sys_ipv4 = kobject_create_and_add("sfe_ipv4", NULL); |
| if (!si->sys_ipv4) { |
| DEBUG_ERROR("failed to register sfe_ipv4\n"); |
| goto exit1; |
| } |
| |
| /* |
| * Create files, one for each parameter supported by this module. |
| */ |
| result = sysfs_create_file(si->sys_ipv4, &sfe_ipv4_debug_dev_attr.attr); |
| if (result) { |
| DEBUG_ERROR("failed to register debug dev file: %d\n", result); |
| goto exit2; |
| } |
| |
| result = sysfs_create_file(si->sys_ipv4, &sfe_ipv4_cpu_attr.attr); |
| if (result) { |
| DEBUG_ERROR("failed to register debug dev file: %d\n", result); |
| goto exit3; |
| } |
| |
| result = sysfs_create_file(si->sys_ipv4, |
| &sfe_ipv4_dscp_rewrite_mark_to_match_attr.attr); |
| if (result) { |
| DEBUG_ERROR("failed to register DSCP rewrite mark_to_match file: %d\n", |
| result); |
| goto exit4; |
| } |
| result = sysfs_create_file(si->sys_ipv4, |
| &sfe_ipv4_dscp_rewrite_dscp_to_set_attr.attr); |
| if (result) { |
| DEBUG_ERROR("failed to register DSCP rewrite dscp_to_set file: %d\n", |
| result); |
| goto exit5; |
| } |
| |
| #ifdef CONFIG_NF_FLOW_COOKIE |
| result = sysfs_create_file(si->sys_ipv4, &sfe_ipv4_flow_cookie_attr.attr); |
| if (result) { |
| DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result); |
| goto exit6; |
| } |
| #endif /* CONFIG_NF_FLOW_COOKIE */ |
| |
| #ifdef SFE_PROCESS_LOCAL_OUT |
| #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) |
| result = nf_register_hooks(sfe_ipv4_ops_local_out, ARRAY_SIZE(sfe_ipv4_ops_local_out)); |
| #else |
| result = nf_register_net_hooks(&init_net, sfe_ipv4_ops_local_out, ARRAY_SIZE(sfe_ipv4_ops_local_out)); |
| #endif |
| if (result < 0) { |
| DEBUG_ERROR("can't register nf local out hook: %d\n", result); |
| goto exit7; |
| } |
| DEBUG_INFO("Register nf local out hook success: %d\n", result); |
| #endif |
| /* |
| * Register our debug char device. |
| */ |
| result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops); |
| if (result < 0) { |
| DEBUG_ERROR("Failed to register chrdev: %d\n", result); |
| goto exit8; |
| } |
| |
| si->debug_dev = result; |
| si->work_cpu = WORK_CPU_UNBOUND; |
| |
| /* |
| * Create a work to handle periodic statistics. |
| */ |
| INIT_DELAYED_WORK(&(si->sync_dwork), sfe_ipv4_periodic_sync); |
| schedule_delayed_work_on(si->work_cpu, &(si->sync_dwork), ((HZ + 99) / 100)); |
| |
| spin_lock_init(&si->lock); |
| return 0; |
| |
| exit8: |
| #ifdef SFE_PROCESS_LOCAL_OUT |
| DEBUG_TRACE("sfe: Unregister local out hook\n"); |
| #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) |
| nf_unregister_hooks(sfe_ipv4_ops_local_out, ARRAY_SIZE(sfe_ipv4_ops_local_out)); |
| #else |
| nf_unregister_net_hooks(&init_net, sfe_ipv4_ops_local_out, ARRAY_SIZE(sfe_ipv4_ops_local_out)); |
| #endif |
| exit7: |
| #endif |
| #ifdef CONFIG_NF_FLOW_COOKIE |
| sysfs_remove_file(si->sys_ipv4, &sfe_ipv4_flow_cookie_attr.attr); |
| |
| exit6: |
| #endif /* CONFIG_NF_FLOW_COOKIE */ |
| sysfs_remove_file(si->sys_ipv4, |
| &sfe_ipv4_dscp_rewrite_dscp_to_set_attr.attr); |
| exit5: |
| sysfs_remove_file(si->sys_ipv4, |
| &sfe_ipv4_dscp_rewrite_mark_to_match_attr.attr); |
| exit4: |
| sysfs_remove_file(si->sys_ipv4, &sfe_ipv4_cpu_attr.attr); |
| exit3: |
| sysfs_remove_file(si->sys_ipv4, &sfe_ipv4_debug_dev_attr.attr); |
| |
| exit2: |
| kobject_put(si->sys_ipv4); |
| |
| exit1: |
| free_percpu(si->stats_pcpu); |
| |
| exit0: |
| return result; |
| } |
| |
| /* |
| * sfe_ipv4_exit() |
| */ |
| void sfe_ipv4_exit(void) |
| { |
| struct sfe_ipv4 *si = &__si; |
| |
| DEBUG_INFO("SFE IPv4 exit\n"); |
| /* |
| * Destroy all connections. |
| */ |
| sfe_ipv4_destroy_all_rules_for_dev(NULL); |
| |
| cancel_delayed_work_sync(&si->sync_dwork); |
| |
| unregister_chrdev(si->debug_dev, "sfe_ipv4"); |
| |
| #ifdef SFE_PROCESS_LOCAL_OUT |
| DEBUG_TRACE("sfe: Unregister local out hook\n"); |
| #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) |
| nf_unregister_hooks(sfe_ipv4_ops_local_out, ARRAY_SIZE(sfe_ipv4_ops_local_out)); |
| #else |
| nf_unregister_net_hooks(&init_net, sfe_ipv4_ops_local_out, ARRAY_SIZE(sfe_ipv4_ops_local_out)); |
| #endif |
| #endif |
| |
| #ifdef CONFIG_NF_FLOW_COOKIE |
| sysfs_remove_file(si->sys_ipv4, &sfe_ipv4_flow_cookie_attr.attr); |
| #endif /* CONFIG_NF_FLOW_COOKIE */ |
| sysfs_remove_file(si->sys_ipv4, |
| &sfe_ipv4_dscp_rewrite_dscp_to_set_attr.attr); |
| sysfs_remove_file(si->sys_ipv4, |
| &sfe_ipv4_dscp_rewrite_mark_to_match_attr.attr); |
| sysfs_remove_file(si->sys_ipv4, &sfe_ipv4_debug_dev_attr.attr); |
| |
| sysfs_remove_file(si->sys_ipv4, &sfe_ipv4_cpu_attr.attr); |
| |
| kobject_put(si->sys_ipv4); |
| |
| free_percpu(si->stats_pcpu); |
| } |
| |
| #ifdef CONFIG_NF_FLOW_COOKIE |
| EXPORT_SYMBOL(sfe_register_flow_cookie_cb); |
| EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb); |
| #endif |