numam-dpdk/lib/librte_ip_frag/ip_frag_internal.c
Anatoly Burakov 2505824261 ip_frag: fix order of key compare arguments
When using key compare function, it uses key length of the first
argument to determine how long should be the keys that are compared.
However, currently we are passing a key from the fragmentation table as
first argument. the problem with this is that this key is potentially
uninitialized (i.e. contains all zeroes, including key length). this
leads to a nasty bug of comparing only the key id's and not keys
themselves.

Of course, a safer way would be to do RTE_MAX between key lengths, but
since this compare is done per-packet, every cycle counts, so we just
use the key whose length is guaranteed to be correct because it comes
from an actual packet.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Thomas Monjalon <thomas.monjalon@6wind.com>
2014-06-26 22:51:07 +02:00

419 lines
12 KiB
C

/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stddef.h>
#include <rte_jhash.h>
#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
#include <rte_hash_crc.h>
#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
#include "ip_frag_common.h"
#define PRIME_VALUE 0xeaad8405
#define IP_FRAG_TBL_POS(tbl, sig) \
((tbl)->pkt + ((sig) & (tbl)->entry_mask))
#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT
#define IP_FRAG_TBL_STAT_UPDATE(s, f, v) ((s)->f += (v))
#else
#define IP_FRAG_TBL_STAT_UPDATE(s, f, v) do {} while (0)
#endif /* IP_FRAG_TBL_STAT */
/* local frag table helper functions */
static inline void
ip_frag_tbl_del(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
struct ip_frag_pkt *fp)
{
ip_frag_free(fp, dr);
ip_frag_key_invalidate(&fp->key);
TAILQ_REMOVE(&tbl->lru, fp, lru);
tbl->use_entries--;
IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1);
}
static inline void
ip_frag_tbl_add(struct rte_ip_frag_tbl *tbl, struct ip_frag_pkt *fp,
const struct ip_frag_key *key, uint64_t tms)
{
fp->key = key[0];
ip_frag_reset(fp, tms);
TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
tbl->use_entries++;
IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1);
}
static inline void
ip_frag_tbl_reuse(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
struct ip_frag_pkt *fp, uint64_t tms)
{
ip_frag_free(fp, dr);
ip_frag_reset(fp, tms);
TAILQ_REMOVE(&tbl->lru, fp, lru);
TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1);
}
static inline void
ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
{
uint32_t v;
const uint32_t *p;
p = (const uint32_t *)&key->src_dst;
#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
v = rte_hash_crc_4byte(p[1], v);
v = rte_hash_crc_4byte(key->id, v);
#else
v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE);
#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
*v1 = v;
*v2 = (v << 7) + (v >> 14);
}
static inline void
ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
{
uint32_t v;
const uint32_t *p;
p = (const uint32_t *) &key->src_dst;
#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
v = rte_hash_crc_4byte(p[1], v);
v = rte_hash_crc_4byte(p[2], v);
v = rte_hash_crc_4byte(p[3], v);
v = rte_hash_crc_4byte(p[4], v);
v = rte_hash_crc_4byte(p[5], v);
v = rte_hash_crc_4byte(p[6], v);
v = rte_hash_crc_4byte(p[7], v);
v = rte_hash_crc_4byte(key->id, v);
#else
v = rte_jhash_3words(p[0], p[1], p[2], PRIME_VALUE);
v = rte_jhash_3words(p[3], p[4], p[5], v);
v = rte_jhash_3words(p[6], p[7], key->id, v);
#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
*v1 = v;
*v2 = (v << 7) + (v >> 14);
}
struct rte_mbuf *
ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags)
{
uint32_t idx;
fp->frag_size += len;
/* this is the first fragment. */
if (ofs == 0) {
idx = (fp->frags[IP_FIRST_FRAG_IDX].mb == NULL) ?
IP_FIRST_FRAG_IDX : UINT32_MAX;
/* this is the last fragment. */
} else if (more_frags == 0) {
fp->total_size = ofs + len;
idx = (fp->frags[IP_LAST_FRAG_IDX].mb == NULL) ?
IP_LAST_FRAG_IDX : UINT32_MAX;
/* this is the intermediate fragment. */
} else if ((idx = fp->last_idx) <
sizeof (fp->frags) / sizeof (fp->frags[0])) {
fp->last_idx++;
}
/*
* errorneous packet: either exceeed max allowed number of fragments,
* or duplicate first/last fragment encountered.
*/
if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) {
/* report an error. */
if (fp->key.key_len == IPV4_KEYLEN)
IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
"total_size: %u, frag_size: %u, last_idx: %u\n"
"first fragment: ofs: %u, len: %u\n"
"last fragment: ofs: %u, len: %u\n\n",
__func__, __LINE__,
fp, fp->key.src_dst[0], fp->key.id,
fp->total_size, fp->frag_size, fp->last_idx,
fp->frags[IP_FIRST_FRAG_IDX].ofs,
fp->frags[IP_FIRST_FRAG_IDX].len,
fp->frags[IP_LAST_FRAG_IDX].ofs,
fp->frags[IP_LAST_FRAG_IDX].len);
else
IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
"ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
"total_size: %u, frag_size: %u, last_idx: %u\n"
"first fragment: ofs: %u, len: %u\n"
"last fragment: ofs: %u, len: %u\n\n",
__func__, __LINE__,
fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id,
fp->total_size, fp->frag_size, fp->last_idx,
fp->frags[IP_FIRST_FRAG_IDX].ofs,
fp->frags[IP_FIRST_FRAG_IDX].len,
fp->frags[IP_LAST_FRAG_IDX].ofs,
fp->frags[IP_LAST_FRAG_IDX].len);
/* free all fragments, invalidate the entry. */
ip_frag_free(fp, dr);
ip_frag_key_invalidate(&fp->key);
IP_FRAG_MBUF2DR(dr, mb);
return (NULL);
}
fp->frags[idx].ofs = ofs;
fp->frags[idx].len = len;
fp->frags[idx].mb = mb;
mb = NULL;
/* not all fragments are collected yet. */
if (likely (fp->frag_size < fp->total_size)) {
return (mb);
/* if we collected all fragments, then try to reassemble. */
} else if (fp->frag_size == fp->total_size &&
fp->frags[IP_FIRST_FRAG_IDX].mb != NULL) {
if (fp->key.key_len == IPV4_KEYLEN)
mb = ipv4_frag_reassemble(fp);
else
mb = ipv6_frag_reassemble(fp);
}
/* errorenous set of fragments. */
if (mb == NULL) {
/* report an error. */
if (fp->key.key_len == IPV4_KEYLEN)
IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
"total_size: %u, frag_size: %u, last_idx: %u\n"
"first fragment: ofs: %u, len: %u\n"
"last fragment: ofs: %u, len: %u\n\n",
__func__, __LINE__,
fp, fp->key.src_dst[0], fp->key.id,
fp->total_size, fp->frag_size, fp->last_idx,
fp->frags[IP_FIRST_FRAG_IDX].ofs,
fp->frags[IP_FIRST_FRAG_IDX].len,
fp->frags[IP_LAST_FRAG_IDX].ofs,
fp->frags[IP_LAST_FRAG_IDX].len);
else
IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
"ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
"total_size: %u, frag_size: %u, last_idx: %u\n"
"first fragment: ofs: %u, len: %u\n"
"last fragment: ofs: %u, len: %u\n\n",
__func__, __LINE__,
fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id,
fp->total_size, fp->frag_size, fp->last_idx,
fp->frags[IP_FIRST_FRAG_IDX].ofs,
fp->frags[IP_FIRST_FRAG_IDX].len,
fp->frags[IP_LAST_FRAG_IDX].ofs,
fp->frags[IP_LAST_FRAG_IDX].len);
/* free associated resources. */
ip_frag_free(fp, dr);
}
/* we are done with that entry, invalidate it. */
ip_frag_key_invalidate(&fp->key);
return (mb);
}
/*
* Find an entry in the table for the corresponding fragment.
* If such entry is not present, then allocate a new one.
* If the entry is stale, then free and reuse it.
*/
struct ip_frag_pkt *
ip_frag_find(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
const struct ip_frag_key *key, uint64_t tms)
{
struct ip_frag_pkt *pkt, *free, *stale, *lru;
uint64_t max_cycles;
/*
* Actually the two line below are totally redundant.
* they are here, just to make gcc 4.6 happy.
*/
free = NULL;
stale = NULL;
max_cycles = tbl->max_cycles;
IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1);
if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) {
/*timed-out entry, free and invalidate it*/
if (stale != NULL) {
ip_frag_tbl_del(tbl, dr, stale);
free = stale;
/*
* we found a free entry, check if we can use it.
* If we run out of free entries in the table, then
* check if we have a timed out entry to delete.
*/
} else if (free != NULL &&
tbl->max_entries <= tbl->use_entries) {
lru = TAILQ_FIRST(&tbl->lru);
if (max_cycles + lru->start < tms) {
ip_frag_tbl_del(tbl, dr, lru);
} else {
free = NULL;
IP_FRAG_TBL_STAT_UPDATE(&tbl->stat,
fail_nospace, 1);
}
}
/* found a free entry to reuse. */
if (free != NULL) {
ip_frag_tbl_add(tbl, free, key, tms);
pkt = free;
}
/*
* we found the flow, but it is already timed out,
* so free associated resources, reposition it in the LRU list,
* and reuse it.
*/
} else if (max_cycles + pkt->start < tms) {
ip_frag_tbl_reuse(tbl, dr, pkt, tms);
}
IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL));
tbl->last = pkt;
return (pkt);
}
struct ip_frag_pkt *
ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
const struct ip_frag_key *key, uint64_t tms,
struct ip_frag_pkt **free, struct ip_frag_pkt **stale)
{
struct ip_frag_pkt *p1, *p2;
struct ip_frag_pkt *empty, *old;
uint64_t max_cycles;
uint32_t i, assoc, sig1, sig2;
empty = NULL;
old = NULL;
max_cycles = tbl->max_cycles;
assoc = tbl->bucket_entries;
if (tbl->last != NULL && ip_frag_key_cmp(key, &tbl->last->key) == 0)
return (tbl->last);
/* different hashing methods for IPv4 and IPv6 */
if (key->key_len == IPV4_KEYLEN)
ipv4_frag_hash(key, &sig1, &sig2);
else
ipv6_frag_hash(key, &sig1, &sig2);
p1 = IP_FRAG_TBL_POS(tbl, sig1);
p2 = IP_FRAG_TBL_POS(tbl, sig2);
for (i = 0; i != assoc; i++) {
if (p1->key.key_len == IPV4_KEYLEN)
IP_FRAG_LOG(DEBUG, "%s:%d:\n"
"tbl: %p, max_entries: %u, use_entries: %u\n"
"ipv6_frag_pkt line0: %p, index: %u from %u\n"
"key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
__func__, __LINE__,
tbl, tbl->max_entries, tbl->use_entries,
p1, i, assoc,
p1[i].key.src_dst[0], p1[i].key.id, p1[i].start);
else
IP_FRAG_LOG(DEBUG, "%s:%d:\n"
"tbl: %p, max_entries: %u, use_entries: %u\n"
"ipv6_frag_pkt line0: %p, index: %u from %u\n"
"key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n",
__func__, __LINE__,
tbl, tbl->max_entries, tbl->use_entries,
p1, i, assoc,
IPv6_KEY_BYTES(p1[i].key.src_dst), p1[i].key.id, p1[i].start);
if (ip_frag_key_cmp(key, &p1[i].key) == 0)
return (p1 + i);
else if (ip_frag_key_is_empty(&p1[i].key))
empty = (empty == NULL) ? (p1 + i) : empty;
else if (max_cycles + p1[i].start < tms)
old = (old == NULL) ? (p1 + i) : old;
if (p2->key.key_len == IPV4_KEYLEN)
IP_FRAG_LOG(DEBUG, "%s:%d:\n"
"tbl: %p, max_entries: %u, use_entries: %u\n"
"ipv6_frag_pkt line1: %p, index: %u from %u\n"
"key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
__func__, __LINE__,
tbl, tbl->max_entries, tbl->use_entries,
p2, i, assoc,
p2[i].key.src_dst[0], p2[i].key.id, p2[i].start);
else
IP_FRAG_LOG(DEBUG, "%s:%d:\n"
"tbl: %p, max_entries: %u, use_entries: %u\n"
"ipv6_frag_pkt line1: %p, index: %u from %u\n"
"key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n",
__func__, __LINE__,
tbl, tbl->max_entries, tbl->use_entries,
p2, i, assoc,
IPv6_KEY_BYTES(p2[i].key.src_dst), p2[i].key.id, p2[i].start);
if (ip_frag_key_cmp(key, &p2[i].key) == 0)
return (p2 + i);
else if (ip_frag_key_is_empty(&p2[i].key))
empty = (empty == NULL) ?( p2 + i) : empty;
else if (max_cycles + p2[i].start < tms)
old = (old == NULL) ? (p2 + i) : old;
}
*free = empty;
*stale = old;
return (NULL);
}