| /* |
| * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org> |
| * (C) 2011 by Vyatta Inc. <http://www.vyatta.com> |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, write to the Free Software |
| * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| */ |
| |
| #include "cache.h" |
| #include "hash.h" |
| #include "log.h" |
| #include "conntrackd.h" |
| #include "netlink.h" |
| #include "event.h" |
| #include "jhash.h" |
| #include "network.h" |
| |
| #include <errno.h> |
| #include <string.h> |
| #include <time.h> |
| #include <libnetfilter_conntrack/libnetfilter_conntrack.h> |
| |
| static uint32_t |
| cache_hash4_exp(const struct nf_conntrack *ct, const struct hashtable *table) |
| { |
| uint32_t a[4] = { |
| [0] = nfct_get_attr_u32(ct, ATTR_IPV4_SRC), |
| [1] = nfct_get_attr_u32(ct, ATTR_IPV4_DST), |
| [2] = nfct_get_attr_u8(ct, ATTR_L3PROTO) << 16 | |
| nfct_get_attr_u8(ct, ATTR_L4PROTO), |
| [3] = nfct_get_attr_u16(ct, ATTR_PORT_SRC) << 16 | |
| nfct_get_attr_u16(ct, ATTR_PORT_DST), |
| }; |
| |
| /* |
| * Instead of returning hash % table->hashsize (implying a divide) |
| * we return the high 32 bits of the (hash * table->hashsize) that will |
| * give results between [0 and hashsize-1] and same hash distribution, |
| * but using a multiply, less expensive than a divide. See: |
| * http://www.mail-archive.com/netdev@vger.kernel.org/msg56623.html |
| */ |
| return ((uint64_t)jhash2(a, 4, 0) * table->hashsize) >> 32; |
| } |
| |
| static uint32_t |
| cache_hash6_exp(const struct nf_conntrack *ct, const struct hashtable *table) |
| { |
| uint32_t a[10]; |
| |
| memcpy(&a[0], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); |
| memcpy(&a[4], nfct_get_attr(ct, ATTR_IPV6_DST), sizeof(uint32_t)*4); |
| a[8] = nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16 | |
| nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO); |
| a[9] = nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16 | |
| nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST); |
| |
| return ((uint64_t)jhash2(a, 10, 0) * table->hashsize) >> 32; |
| } |
| |
| static uint32_t |
| cache_exp_hash(const void *data, const struct hashtable *table) |
| { |
| int ret = 0; |
| const struct nf_expect *exp = data; |
| const struct nf_conntrack *ct = nfexp_get_attr(exp, ATTR_EXP_MASTER); |
| |
| switch(nfct_get_attr_u8(ct, ATTR_L3PROTO)) { |
| case AF_INET: |
| ret = cache_hash4_exp(ct, table); |
| break; |
| case AF_INET6: |
| ret = cache_hash6_exp(ct, table); |
| break; |
| default: |
| dlog(LOG_ERR, "unknown layer 3 proto in hash"); |
| break; |
| } |
| return ret; |
| } |
| |
| static int cache_exp_cmp(const void *data1, const void *data2) |
| { |
| const struct cache_object *obj = data1; |
| const struct nf_expect *exp = data2; |
| |
| return nfexp_cmp(obj->ptr, exp, 0); |
| } |
| |
| static void *cache_exp_alloc(void) |
| { |
| return nfexp_new(); |
| } |
| |
| static void cache_exp_free(void *ptr) |
| { |
| nfexp_destroy(ptr); |
| } |
| |
| static void cache_exp_copy(void *dst, void *src, unsigned int flags) |
| { |
| /* XXX: add nfexp_copy(...) to libnetfilter_conntrack. */ |
| memcpy(dst, src, nfexp_maxsize()); |
| } |
| |
| static int cache_exp_dump_step(void *data1, void *n) |
| { |
| char buf[1024]; |
| int size; |
| struct __dump_container *container = data1; |
| struct cache_object *obj = n; |
| char *data = obj->data; |
| unsigned i; |
| |
| /* |
| * XXX: Do not dump the entries that are scheduled to expire. |
| * These entries talk about already destroyed connections |
| * that we keep for some time just in case that we have to |
| * resent some lost messages. We do not show them to the |
| * user as he may think that the firewall replicas are not |
| * in sync. The branch below is a hack as it is quite |
| * specific and it breaks conntrackd modularity. Probably |
| * there's a nicer way to do this but until I come up with it... |
| */ |
| if (CONFIG(flags) & CTD_SYNC_FTFW && obj->status == C_OBJ_DEAD) |
| return 0; |
| |
| /* do not show cached timeout, this may confuse users */ |
| if (nfexp_attr_is_set(obj->ptr, ATTR_EXP_TIMEOUT)) |
| nfexp_attr_unset(obj->ptr, ATTR_EXP_TIMEOUT); |
| |
| memset(buf, 0, sizeof(buf)); |
| size = nfexp_snprintf(buf, sizeof(buf),obj->ptr, |
| NFCT_T_UNKNOWN, container->type, 0); |
| |
| for (i = 0; i < obj->cache->num_features; i++) { |
| if (obj->cache->features[i]->dump) { |
| size += obj->cache->features[i]->dump(obj, data, |
| buf+size, |
| container->type); |
| data += obj->cache->features[i]->size; |
| } |
| } |
| if (container->type != NFCT_O_XML) { |
| long tm = time(NULL); |
| size += sprintf(buf+size, " [active since %lds]", |
| tm - obj->lifetime); |
| } |
| size += sprintf(buf+size, "\n"); |
| if (send(container->fd, buf, size, 0) == -1) { |
| if (errno != EPIPE) |
| return -1; |
| } |
| |
| return 0; |
| } |
| |
| static int cache_exp_commit_step(void *data, void *n) |
| { |
| struct cache_object *obj = n; |
| struct __commit_container *tmp = data; |
| int ret, retry = 1, timeout; |
| struct nf_expect *exp = obj->ptr; |
| |
| if (CONFIG(commit_timeout)) { |
| timeout = CONFIG(commit_timeout); |
| } else { |
| timeout = time(NULL) - obj->lastupdate; |
| if (timeout < 0) { |
| /* XXX: Arbitrarily set the timer to one minute, how |
| * can this happen? For example, an adjustment due to |
| * daylight-saving. Probably other situations can |
| * trigger this. */ |
| timeout = 60; |
| } |
| /* calculate an estimation of the current timeout */ |
| timeout = nfexp_get_attr_u32(exp, ATTR_EXP_TIMEOUT) - timeout; |
| if (timeout < 0) { |
| timeout = 60; |
| } |
| } |
| |
| retry: |
| if (nl_create_expect(tmp->h, exp, timeout) == -1) { |
| if (errno == EEXIST && retry == 1) { |
| ret = nl_destroy_expect(tmp->h, exp); |
| if (ret == 0 || (ret == -1 && errno == ENOENT)) { |
| if (retry) { |
| retry = 0; |
| goto retry; |
| } |
| } |
| dlog(LOG_ERR, "commit-destroy: %s", strerror(errno)); |
| dlog_exp(STATE(log), exp, NFCT_O_PLAIN); |
| tmp->c->stats.commit_fail++; |
| } else { |
| dlog(LOG_ERR, "commit-create: %s", strerror(errno)); |
| dlog_exp(STATE(log), exp, NFCT_O_PLAIN); |
| tmp->c->stats.commit_fail++; |
| } |
| } else { |
| tmp->c->stats.commit_ok++; |
| } |
| /* keep iterating even if we have found errors */ |
| return 0; |
| } |
| |
| static int |
| cache_exp_commit(struct cache *c, struct nfct_handle *h, int clientfd) |
| { |
| unsigned int commit_ok, commit_fail; |
| struct timeval commit_stop, res; |
| struct __commit_container tmp = { |
| .h = h, |
| .c = c, |
| }; |
| |
| /* we already have one commit in progress, skip this. The clientfd |
| * descriptor has to be closed by the caller. */ |
| if (clientfd && STATE_SYNC(commit).clientfd != -1) |
| return -1; |
| |
| switch(STATE_SYNC(commit).state) { |
| case COMMIT_STATE_INACTIVE: |
| gettimeofday(&STATE_SYNC(commit).stats.start, NULL); |
| STATE_SYNC(commit).stats.ok = c->stats.commit_ok; |
| STATE_SYNC(commit).stats.fail = c->stats.commit_fail; |
| STATE_SYNC(commit).clientfd = clientfd; |
| case COMMIT_STATE_MASTER: |
| STATE_SYNC(commit).current = |
| hashtable_iterate_limit(c->h, &tmp, |
| STATE_SYNC(commit).current, |
| CONFIG(general).commit_steps, |
| cache_exp_commit_step); |
| if (STATE_SYNC(commit).current < CONFIG(hashsize)) { |
| STATE_SYNC(commit).state = COMMIT_STATE_MASTER; |
| /* give it another step as soon as possible */ |
| write_evfd(STATE_SYNC(commit).evfd); |
| return 1; |
| } |
| |
| /* calculate the time that commit has taken */ |
| gettimeofday(&commit_stop, NULL); |
| timersub(&commit_stop, &STATE_SYNC(commit).stats.start, &res); |
| |
| /* calculate new entries committed */ |
| commit_ok = c->stats.commit_ok - STATE_SYNC(commit).stats.ok; |
| commit_fail = |
| c->stats.commit_fail - STATE_SYNC(commit).stats.fail; |
| |
| /* log results */ |
| dlog(LOG_NOTICE, "Committed %u new expectations", commit_ok); |
| |
| if (commit_fail) |
| dlog(LOG_NOTICE, "%u expectations can't be " |
| "committed", commit_fail); |
| |
| dlog(LOG_NOTICE, "commit has taken %lu.%06lu seconds", |
| res.tv_sec, res.tv_usec); |
| |
| /* prepare the state machine for new commits */ |
| STATE_SYNC(commit).current = 0; |
| STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE; |
| |
| return 0; |
| } |
| return 1; |
| } |
| |
| static struct nethdr * |
| cache_exp_build_msg(const struct cache_object *obj, int type) |
| { |
| return BUILD_NETMSG_FROM_EXP(obj->ptr, type); |
| } |
| |
| /* template to cache expectations coming from the kernel. */ |
| struct cache_ops cache_sync_internal_exp_ops = { |
| .hash = cache_exp_hash, |
| .cmp = cache_exp_cmp, |
| .alloc = cache_exp_alloc, |
| .free = cache_exp_free, |
| .copy = cache_exp_copy, |
| .dump_step = cache_exp_dump_step, |
| .commit = NULL, |
| .build_msg = cache_exp_build_msg, |
| }; |
| |
| /* template to cache expectations coming from the network. */ |
| struct cache_ops cache_sync_external_exp_ops = { |
| .hash = cache_exp_hash, |
| .cmp = cache_exp_cmp, |
| .alloc = cache_exp_alloc, |
| .free = cache_exp_free, |
| .copy = cache_exp_copy, |
| .dump_step = cache_exp_dump_step, |
| .commit = cache_exp_commit, |
| .build_msg = NULL, |
| }; |