cxgbe(4): Updates to the hardware L2 table management code.

- Add full support for IPv6 addresses.

- Read the size of the L2 table during attach.  Do not assume that PCIe
  physical function 4 of the card has all of the table to itself.

- Use FNV instead of Jenkins to hash L3 addresses and drop the private
  copy of jhash.h from the driver.

MFC after:	1 week
This commit is contained in:
Navdeep Parhar 2013-01-14 20:36:22 +00:00
parent 4c10c25e33
commit 0a0a697c73
7 changed files with 149 additions and 190 deletions

View File

@ -1,140 +0,0 @@
#ifndef _JHASH_H
#define _JHASH_H
/* jhash.h: Jenkins hash support.
*
* Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
*
* http://burtleburtle.net/bob/hash/
*
* These are the credits from Bob's sources:
*
* lookup2.c, by Bob Jenkins, December 1996, Public Domain.
* hash(), hash2(), hash3, and mix() are externally useful functions.
* Routines to test the hash are included if SELF_TEST is defined.
* You can use this free for any purpose. It has no warranty.
*
* $FreeBSD$
*/
/* NOTE: Arguments are modified. */
#define __jhash_mix(a, b, c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<<8); \
c -= a; c -= b; c ^= (b>>13); \
a -= b; a -= c; a ^= (c>>12); \
b -= c; b -= a; b ^= (a<<16); \
c -= a; c -= b; c ^= (b>>5); \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
}
/* The golden ration: an arbitrary value */
#define JHASH_GOLDEN_RATIO 0x9e3779b9
/* The most generic version, hashes an arbitrary sequence
* of bytes. No alignment or length assumptions are made about
* the input key.
*/
static inline u32 jhash(const void *key, u32 length, u32 initval)
{
u32 a, b, c, len;
const u8 *k = key;
len = length;
a = b = JHASH_GOLDEN_RATIO;
c = initval;
while (len >= 12) {
a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24));
b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24));
c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24));
__jhash_mix(a,b,c);
k += 12;
len -= 12;
}
c += length;
switch (len) {
case 11: c += ((u32)k[10]<<24);
case 10: c += ((u32)k[9]<<16);
case 9 : c += ((u32)k[8]<<8);
case 8 : b += ((u32)k[7]<<24);
case 7 : b += ((u32)k[6]<<16);
case 6 : b += ((u32)k[5]<<8);
case 5 : b += k[4];
case 4 : a += ((u32)k[3]<<24);
case 3 : a += ((u32)k[2]<<16);
case 2 : a += ((u32)k[1]<<8);
case 1 : a += k[0];
};
__jhash_mix(a,b,c);
return c;
}
/* A special optimized version that handles 1 or more of u32s.
* The length parameter here is the number of u32s in the key.
*/
static inline u32 jhash2(u32 *k, u32 length, u32 initval)
{
u32 a, b, c, len;
a = b = JHASH_GOLDEN_RATIO;
c = initval;
len = length;
while (len >= 3) {
a += k[0];
b += k[1];
c += k[2];
__jhash_mix(a, b, c);
k += 3; len -= 3;
}
c += length * 4;
switch (len) {
case 2 : b += k[1];
case 1 : a += k[0];
};
__jhash_mix(a,b,c);
return c;
}
/* A special ultra-optimized versions that knows they are hashing exactly
* 3, 2 or 1 word(s).
*
* NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
* done at the end is not done here.
*/
static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
{
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
c += initval;
__jhash_mix(a, b, c);
return c;
}
static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
{
return jhash_3words(a, b, 0, initval);
}
static inline u32 jhash_1word(u32 a, u32 initval)
{
return jhash_3words(a, 0, 0, initval);
}
#endif /* _JHASH_H */

View File

@ -104,8 +104,8 @@ struct tid_info {
};
struct t4_range {
unsigned int start;
unsigned int size;
u_int start;
u_int size;
};
struct t4_virt_res { /* virtualized HW resources */
@ -117,6 +117,7 @@ struct t4_virt_res { /* virtualized HW resources */
struct t4_range qp;
struct t4_range cq;
struct t4_range ocq;
struct t4_range l2t;
};
#ifdef TCP_OFFLOAD

View File

@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include "common/common.h"
#include "common/jhash.h"
#include "common/t4_msg.h"
#include "t4_l2t.h"
@ -78,7 +77,7 @@ t4_alloc_l2e(struct l2t_data *d)
return (NULL);
/* there's definitely a free entry */
for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e)
for (e = d->rover, end = &d->l2tab[d->l2t_size]; e != end; ++e)
if (atomic_load_acq_int(&e->refcnt) == 0)
goto found;
@ -115,6 +114,7 @@ t4_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
{
struct wrqe *wr;
struct cpl_l2t_write_req *req;
int idx = e->idx + sc->vres.l2t.start;
mtx_assert(&e->lock, MA_OWNED);
@ -124,10 +124,10 @@ t4_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
req = wrtod(wr);
INIT_TP_WR(req, 0);
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx |
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, idx |
V_SYNC_WR(sync) | V_TID_QID(sc->sge.fwq.abs_id)));
req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync));
req->l2t_idx = htons(e->idx);
req->l2t_idx = htons(idx);
req->vlan = htons(e->vlan);
memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
@ -183,18 +183,24 @@ t4_l2t_set_switching(struct adapter *sc, struct l2t_entry *e, uint16_t vlan,
int
t4_init_l2t(struct adapter *sc, int flags)
{
int i;
int i, l2t_size;
struct l2t_data *d;
d = malloc(sizeof(*d), M_CXGBE, M_ZERO | flags);
l2t_size = sc->vres.l2t.size;
if (l2t_size < 2) /* At least 1 bucket for IP and 1 for IPv6 */
return (EINVAL);
d = malloc(sizeof(*d) + l2t_size * sizeof (struct l2t_entry), M_CXGBE,
M_ZERO | flags);
if (!d)
return (ENOMEM);
d->l2t_size = l2t_size;
d->rover = d->l2tab;
atomic_store_rel_int(&d->nfree, L2T_SIZE);
atomic_store_rel_int(&d->nfree, l2t_size);
rw_init(&d->lock, "L2T");
for (i = 0; i < L2T_SIZE; i++) {
for (i = 0; i < l2t_size; i++) {
struct l2t_entry *e = &d->l2tab[i];
e->idx = i;
@ -215,7 +221,7 @@ t4_free_l2t(struct l2t_data *d)
{
int i;
for (i = 0; i < L2T_SIZE; i++)
for (i = 0; i < d->l2t_size; i++)
mtx_destroy(&d->l2tab[i].lock);
rw_destroy(&d->lock);
free(d, M_CXGBE);
@ -229,11 +235,11 @@ do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss,
{
const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
unsigned int tid = GET_TID(rpl);
unsigned int idx = tid & (L2T_SIZE - 1);
unsigned int idx = tid % L2T_SIZE;
if (__predict_false(rpl->status != CPL_ERR_NONE)) {
log(LOG_ERR,
"Unexpected L2T_WRITE_RPL status %u for entry %u\n",
"Unexpected L2T_WRITE_RPL (%u) for entry at hw_idx %u\n",
rpl->status, idx);
return (EINVAL);
}
@ -269,7 +275,7 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS)
struct l2t_entry *e;
struct sbuf *sb;
int rc, i, header = 0;
char ip[60];
char ip[INET6_ADDRSTRLEN];
if (l2t == NULL)
return (ENXIO);
@ -283,7 +289,7 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS)
return (ENOMEM);
e = &l2t->l2tab[0];
for (i = 0; i < L2T_SIZE; i++, e++) {
for (i = 0; i < l2t->l2t_size; i++, e++) {
mtx_lock(&e->lock);
if (e->state == L2T_STATE_UNUSED)
goto skip;
@ -295,11 +301,15 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS)
}
if (e->state == L2T_STATE_SWITCHING)
ip[0] = 0;
else
snprintf(ip, sizeof(ip), "%s",
inet_ntoa(*(struct in_addr *)&e->addr));
else {
inet_ntop(e->ipv6 ? AF_INET6 : AF_INET, &e->addr[0],
&ip[0], sizeof(ip));
}
/* XXX: e->ifp may not be around */
/*
* XXX: e->ifp may not be around.
* XXX: IPv6 addresses may not align properly in the output.
*/
sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d"
" %u %2u %c %5u %s",
e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2],

View File

@ -60,7 +60,7 @@ enum {
struct l2t_entry {
uint16_t state; /* entry state */
uint16_t idx; /* entry index */
uint32_t addr; /* next hop IP address */
uint32_t addr[4]; /* next hop IP or IPv6 address */
struct ifnet *ifp; /* outgoing interface */
uint16_t smt_idx; /* SMT index */
uint16_t vlan; /* VLAN TCI (id: 0-11, prio: 13-15) */
@ -70,15 +70,17 @@ struct l2t_entry {
struct mtx lock;
volatile int refcnt; /* entry reference count */
uint16_t hash; /* hash bucket the entry is on */
uint8_t ipv6; /* entry is for an IPv6 address */
uint8_t lport; /* associated offload logical port */
uint8_t dmac[ETHER_ADDR_LEN]; /* next hop's MAC address */
};
struct l2t_data {
struct rwlock lock;
u_int l2t_size;
volatile int nfree; /* number of free entries */
struct l2t_entry *rover;/* starting point for next allocation */
struct l2t_entry l2tab[L2T_SIZE];
struct l2t_entry l2tab[];
};

View File

@ -1866,7 +1866,9 @@ get_params__post_init(struct adapter *sc)
param[1] = FW_PARAM_PFVF(EQ_START);
param[2] = FW_PARAM_PFVF(FILTER_START);
param[3] = FW_PARAM_PFVF(FILTER_END);
rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val);
param[4] = FW_PARAM_PFVF(L2T_START);
param[5] = FW_PARAM_PFVF(L2T_END);
rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
if (rc != 0) {
device_printf(sc->dev,
"failed to query parameters (post_init): %d.\n", rc);
@ -1877,6 +1879,11 @@ get_params__post_init(struct adapter *sc)
sc->sge.eq_start = val[1];
sc->tids.ftid_base = val[2];
sc->tids.nftids = val[3] - val[2] + 1;
sc->vres.l2t.start = val[4];
sc->vres.l2t.size = val[5] - val[4] + 1;
KASSERT(sc->vres.l2t.size <= L2T_SIZE,
("%s: L2 table size (%u) larger than expected (%u)",
__func__, sc->vres.l2t.size, L2T_SIZE));
/* get capabilites */
bzero(&caps, sizeof(caps));

View File

@ -27,6 +27,7 @@
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
@ -34,6 +35,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/bus.h>
#include <sys/fnv_hash.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/rwlock.h>
@ -48,28 +50,89 @@ __FBSDID("$FreeBSD$");
#include <netinet/toecore.h>
#include "common/common.h"
#include "common/jhash.h"
#include "common/t4_msg.h"
#include "tom/t4_tom_l2t.h"
#include "tom/t4_tom.h"
#define VLAN_NONE 0xfff
#define SA(x) ((struct sockaddr *)(x))
#define SIN(x) ((struct sockaddr_in *)(x))
#define SINADDR(x) (SIN(x)->sin_addr.s_addr)
static inline void
l2t_hold(struct l2t_data *d, struct l2t_entry *e)
{
if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */
atomic_subtract_int(&d->nfree, 1);
}
static inline unsigned int
arp_hash(const uint32_t key, int ifindex)
static inline u_int
l2_hash(struct l2t_data *d, const struct sockaddr *sa, int ifindex)
{
return jhash_2words(key, ifindex, 0) & (L2T_SIZE - 1);
u_int hash, half = d->l2t_size / 2, start = 0;
const void *key;
size_t len;
KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
("%s: sa %p has unexpected sa_family %d", __func__, sa,
sa->sa_family));
if (sa->sa_family == AF_INET) {
const struct sockaddr_in *sin = (const void *)sa;
key = &sin->sin_addr;
len = sizeof(sin->sin_addr);
} else {
const struct sockaddr_in6 *sin6 = (const void *)sa;
key = &sin6->sin6_addr;
len = sizeof(sin6->sin6_addr);
start = half;
}
hash = fnv_32_buf(key, len, FNV1_32_INIT);
hash = fnv_32_buf(&ifindex, sizeof(ifindex), hash);
hash %= half;
return (hash + start);
}
static inline int
l2_cmp(const struct sockaddr *sa, struct l2t_entry *e)
{
KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
("%s: sa %p has unexpected sa_family %d", __func__, sa,
sa->sa_family));
if (sa->sa_family == AF_INET) {
const struct sockaddr_in *sin = (const void *)sa;
return (e->addr[0] != sin->sin_addr.s_addr);
} else {
const struct sockaddr_in6 *sin6 = (const void *)sa;
return (memcmp(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr)));
}
}
static inline void
l2_store(const struct sockaddr *sa, struct l2t_entry *e)
{
KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
("%s: sa %p has unexpected sa_family %d", __func__, sa,
sa->sa_family));
if (sa->sa_family == AF_INET) {
const struct sockaddr_in *sin = (const void *)sa;
e->addr[0] = sin->sin_addr.s_addr;
e->ipv6 = 0;
} else {
const struct sockaddr_in6 *sin6 = (const void *)sa;
memcpy(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr));
e->ipv6 = 1;
}
}
/*
@ -100,7 +163,7 @@ send_pending(struct adapter *sc, struct l2t_entry *e)
static void
resolution_failed_for_wr(struct wrqe *wr)
{
log(LOG_ERR, "%s: leaked work request %p, wr_len %d", __func__, wr,
log(LOG_ERR, "%s: leaked work request %p, wr_len %d\n", __func__, wr,
wr->wr_len);
/* free(wr, M_CXGBE); */
@ -175,15 +238,25 @@ resolve_entry(struct adapter *sc, struct l2t_entry *e)
struct tom_data *td = sc->tom_softc;
struct toedev *tod = &td->tod;
struct sockaddr_in sin = {0};
struct sockaddr_in6 sin6 = {0};
struct sockaddr *sa;
uint8_t dmac[ETHER_ADDR_LEN];
uint16_t vtag = VLAN_NONE;
int rc;
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
SINADDR(&sin) = e->addr;
if (e->ipv6 == 0) {
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_addr.s_addr = e->addr[0];
sa = (void *)&sin;
} else {
sin6.sin6_family = AF_INET6;
sin6.sin6_len = sizeof(struct sockaddr_in6);
memcpy(&sin6.sin6_addr, &e->addr[0], sizeof(e->addr));
sa = (void *)&sin6;
}
rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag);
rc = toe_l2_resolve(tod, e->ifp, sa, dmac, &vtag);
if (rc == EWOULDBLOCK)
return (rc);
@ -263,7 +336,7 @@ do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss,
struct adapter *sc = iq->adapter;
const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
unsigned int tid = GET_TID(rpl);
unsigned int idx = tid & (L2T_SIZE - 1);
unsigned int idx = tid % L2T_SIZE;
int rc;
rc = do_l2t_write_rpl(iq, rss, m);
@ -271,7 +344,7 @@ do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss,
return (rc);
if (tid & F_SYNC_WR) {
struct l2t_entry *e = &sc->l2t->l2tab[idx];
struct l2t_entry *e = &sc->l2t->l2tab[idx - sc->vres.l2t.start];
mtx_lock(&e->lock);
if (e->state != L2T_STATE_SWITCHING) {
@ -310,21 +383,22 @@ t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
{
struct l2t_entry *e;
struct l2t_data *d = pi->adapter->l2t;
uint32_t addr = SINADDR(sa);
int hash = arp_hash(addr, ifp->if_index);
unsigned int smt_idx = pi->port_id;
u_int hash, smt_idx = pi->port_id;
if (sa->sa_family != AF_INET)
return (NULL); /* XXX: no IPv6 support right now */
KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
("%s: sa %p has unexpected sa_family %d", __func__, sa,
sa->sa_family));
#ifndef VLAN_TAG
if (ifp->if_type == IFT_L2VLAN)
return (NULL);
#endif
hash = l2_hash(d, sa, ifp->if_index);
rw_wlock(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next) {
if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) {
if (l2_cmp(sa, e) == 0 && e->ifp == ifp &&
e->smt_idx == smt_idx) {
l2t_hold(d, e);
goto done;
}
@ -338,7 +412,7 @@ t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
d->l2tab[hash].first = e;
e->state = L2T_STATE_RESOLVING;
e->addr = addr;
l2_store(sa, e);
e->ifp = ifp;
e->smt_idx = smt_idx;
e->hash = hash;
@ -368,14 +442,14 @@ t4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
struct adapter *sc = tod->tod_softc;
struct l2t_entry *e;
struct l2t_data *d = sc->l2t;
uint32_t addr = SINADDR(sa);
int hash = arp_hash(addr, ifp->if_index);
u_int hash;
KASSERT(d != NULL, ("%s: no L2 table", __func__));
hash = l2_hash(d, sa, ifp->if_index);
rw_rlock(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next) {
if (e->addr == addr && e->ifp == ifp) {
if (l2_cmp(sa, e) == 0 && e->ifp == ifp) {
mtx_lock(&e->lock);
if (atomic_load_acq_int(&e->refcnt))
goto found;

View File

@ -10,15 +10,20 @@ CXGBE = ${.CURDIR}/../../../dev/cxgbe
KMOD = t4_tom
SRCS = t4_tom.c t4_connect.c t4_listen.c t4_cpl_io.c t4_tom_l2t.c t4_ddp.c
SRCS+= device_if.h bus_if.h pci_if.h
SRCS+= opt_inet.h
SRCS+= opt_inet.h opt_inet6.h
CFLAGS+= -I${CXGBE}
.if !defined(KERNBUILDDIR)
.if ${MK_INET_SUPPORT} != "no"
opt_inet.h:
echo "#define INET 1" > ${.TARGET}
echo "#define TCP_OFFLOAD 1" >> ${.TARGET}
@echo "#define INET 1" > ${.TARGET}
@echo "#define TCP_OFFLOAD 1" >> ${.TARGET}
.endif
.if ${MK_INET6_SUPPORT} != "no"
opt_inet6.h:
@echo "#define INET6 1" > ${.TARGET}
.endif
.endif