tcp/lro: Implement hash table for LRO entries.
This significantly improves HTTP workload performance and reduces HTTP workload latency. Reviewed by: rrs, gallatin, hps Obtained from: rrs, gallatin Sponsored by: Netflix (rrs, gallatin) , Microsoft (sephe) Differential Revision: https://reviews.freebsd.org/D6689
This commit is contained in:
parent
fa5000a4f3
commit
05cde7efa6
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=303656
@ -68,19 +68,24 @@ static MALLOC_DEFINE(M_LRO, "LRO", "LRO control structures");
|
||||
#endif
|
||||
|
||||
static void tcp_lro_rx_done(struct lro_ctrl *lc);
|
||||
static int tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m,
|
||||
uint32_t csum, int use_hash);
|
||||
|
||||
static __inline void
|
||||
tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_entry *le)
|
||||
tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_head *bucket,
|
||||
struct lro_entry *le)
|
||||
{
|
||||
|
||||
LIST_INSERT_HEAD(&lc->lro_active, le, next);
|
||||
LIST_INSERT_HEAD(bucket, le, hash_next);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
tcp_lro_active_remove(struct lro_entry *le)
|
||||
{
|
||||
|
||||
LIST_REMOVE(le, next);
|
||||
LIST_REMOVE(le, next); /* active list */
|
||||
LIST_REMOVE(le, hash_next); /* hash bucket */
|
||||
}
|
||||
|
||||
int
|
||||
@ -95,7 +100,7 @@ tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
|
||||
{
|
||||
struct lro_entry *le;
|
||||
size_t size;
|
||||
unsigned i;
|
||||
unsigned i, elements;
|
||||
|
||||
lc->lro_bad_csum = 0;
|
||||
lc->lro_queued = 0;
|
||||
@ -110,6 +115,18 @@ tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
|
||||
LIST_INIT(&lc->lro_free);
|
||||
LIST_INIT(&lc->lro_active);
|
||||
|
||||
/* create hash table to accelerate entry lookup */
|
||||
if (lro_entries > lro_mbufs)
|
||||
elements = lro_entries;
|
||||
else
|
||||
elements = lro_mbufs;
|
||||
lc->lro_hash = phashinit_flags(elements, M_LRO, &lc->lro_hashsz,
|
||||
HASH_NOWAIT);
|
||||
if (lc->lro_hash == NULL) {
|
||||
memset(lc, 0, sizeof(*lc));
|
||||
return (ENOMEM);
|
||||
}
|
||||
|
||||
/* compute size to allocate */
|
||||
size = (lro_mbufs * sizeof(struct lro_mbuf_sort)) +
|
||||
(lro_entries * sizeof(*le));
|
||||
@ -147,6 +164,13 @@ tcp_lro_free(struct lro_ctrl *lc)
|
||||
m_freem(le->m_head);
|
||||
}
|
||||
|
||||
/* free hash table */
|
||||
if (lc->lro_hash != NULL) {
|
||||
free(lc->lro_hash, M_LRO);
|
||||
lc->lro_hash = NULL;
|
||||
}
|
||||
lc->lro_hashsz = 0;
|
||||
|
||||
/* free mbuf array, if any */
|
||||
for (x = 0; x != lc->lro_mbuf_count; x++)
|
||||
m_freem(lc->lro_mbuf_data[x].mb);
|
||||
@ -487,7 +511,7 @@ tcp_lro_flush_all(struct lro_ctrl *lc)
|
||||
}
|
||||
|
||||
/* add packet to LRO engine */
|
||||
if (tcp_lro_rx(lc, mb, 0) != 0) {
|
||||
if (tcp_lro_rx2(lc, mb, 0, 0) != 0) {
|
||||
/* input packet to network layer */
|
||||
(*lc->ifp->if_input)(lc->ifp, mb);
|
||||
lc->lro_queued++;
|
||||
@ -561,8 +585,8 @@ tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
|
||||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
|
||||
static int
|
||||
tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
|
||||
{
|
||||
struct lro_entry *le;
|
||||
struct ether_header *eh;
|
||||
@ -578,6 +602,7 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
|
||||
tcp_seq seq;
|
||||
int error, ip_len, l;
|
||||
uint16_t eh_type, tcp_data_len;
|
||||
struct lro_head *bucket;
|
||||
|
||||
/* We expect a contiguous header [eh, ip, tcp]. */
|
||||
|
||||
@ -670,8 +695,41 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
|
||||
|
||||
seq = ntohl(th->th_seq);
|
||||
|
||||
if (!use_hash) {
|
||||
bucket = &lc->lro_hash[0];
|
||||
} else if (M_HASHTYPE_ISHASH(m)) {
|
||||
bucket = &lc->lro_hash[m->m_pkthdr.flowid % lc->lro_hashsz];
|
||||
} else {
|
||||
uint32_t hash;
|
||||
|
||||
switch (eh_type) {
|
||||
#ifdef INET
|
||||
case ETHERTYPE_IP:
|
||||
hash = ip4->ip_src.s_addr + ip4->ip_dst.s_addr;
|
||||
break;
|
||||
#endif
|
||||
#ifdef INET6
|
||||
case ETHERTYPE_IPV6:
|
||||
hash = ip6->ip6_src.s6_addr32[0] +
|
||||
ip6->ip6_dst.s6_addr32[0];
|
||||
hash += ip6->ip6_src.s6_addr32[1] +
|
||||
ip6->ip6_dst.s6_addr32[1];
|
||||
hash += ip6->ip6_src.s6_addr32[2] +
|
||||
ip6->ip6_dst.s6_addr32[2];
|
||||
hash += ip6->ip6_src.s6_addr32[3] +
|
||||
ip6->ip6_dst.s6_addr32[3];
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
hash = 0;
|
||||
break;
|
||||
}
|
||||
hash += th->th_sport + th->th_dport;
|
||||
bucket = &lc->lro_hash[hash % lc->lro_hashsz];
|
||||
}
|
||||
|
||||
/* Try to find a matching previous segment. */
|
||||
LIST_FOREACH(le, &lc->lro_active, next) {
|
||||
LIST_FOREACH(le, bucket, hash_next) {
|
||||
if (le->eh_type != eh_type)
|
||||
continue;
|
||||
if (le->source_port != th->th_sport ||
|
||||
@ -779,7 +837,7 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
|
||||
/* Start a new segment chain. */
|
||||
le = LIST_FIRST(&lc->lro_free);
|
||||
LIST_REMOVE(le, next);
|
||||
tcp_lro_active_insert(lc, le);
|
||||
tcp_lro_active_insert(lc, bucket, le);
|
||||
getmicrotime(&le->mtime);
|
||||
|
||||
/* Start filling in details. */
|
||||
@ -837,6 +895,13 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
|
||||
{
|
||||
|
||||
return tcp_lro_rx2(lc, m, csum, 1);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
|
||||
{
|
||||
|
@ -40,6 +40,7 @@
|
||||
|
||||
struct lro_entry {
|
||||
LIST_ENTRY(lro_entry) next;
|
||||
LIST_ENTRY(lro_entry) hash_next;
|
||||
struct mbuf *m_head;
|
||||
struct mbuf *m_tail;
|
||||
union {
|
||||
@ -95,6 +96,8 @@ struct lro_ctrl {
|
||||
unsigned short lro_ackcnt_lim; /* max # of aggregated ACKs */
|
||||
unsigned lro_length_lim; /* max len of aggregated data */
|
||||
|
||||
u_long lro_hashsz;
|
||||
struct lro_head *lro_hash;
|
||||
struct lro_head lro_active;
|
||||
struct lro_head lro_free;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user