Factor out mbuf hashing code from LAGG driver so that other network

drivers can use it. This avoids some code duplication. Add missing
default case to all switch statements while at it. Also move the
hashing of the IPv6 flow field to layer 4 because the IPv6 flow field
is constant on a per L4 connection basis and not on a per L3 network.

Differential Revision:	https://reviews.freebsd.org/D1987
Sponsored by:		Mellanox Technologies
MFC after:		1 month
This commit is contained in:
Hans Petter Selasky 2015-03-11 16:02:24 +00:00
parent 5ba11c4c2e
commit b7ba031ff7
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=279891
9 changed files with 107 additions and 264 deletions

View File

@ -3140,6 +3140,7 @@ kern/uipc_debug.c optional ddb
kern/uipc_domain.c standard
kern/uipc_mbuf.c standard
kern/uipc_mbuf2.c standard
kern/uipc_mbufhash.c standard
kern/uipc_mqueue.c optional p1003_1b_mqueue
kern/uipc_sem.c optional p1003_1b_semaphores
kern/uipc_shm.c standard
@ -3857,9 +3858,6 @@ ofed/drivers/net/mlx4/sys_tune.c optional mlx4ib | mlxen \
ofed/drivers/net/mlx4/en_cq.c optional mlxen \
no-depend obj-prefix "mlx4_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
ofed/drivers/net/mlx4/utils.c optional mlxen \
no-depend obj-prefix "mlx4_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
ofed/drivers/net/mlx4/en_main.c optional mlxen \
no-depend obj-prefix "mlx4_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"

View File

@ -25,58 +25,34 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/queue.h>
#include <sys/fnv_hash.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/module.h>
#include <sys/priv.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/hash.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <sys/taskqueue.h>
#include <sys/eventhandler.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_clone.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_llc.h>
#include <net/if_media.h>
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/bpf.h>
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
#endif
#ifdef INET
#include <netinet/in_systm.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
#endif
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/in6_var.h>
#include <netinet6/in6_ifattach.h>
#endif
#include <net/if_vlan_var.h>
#include "utils.h"
/* XXX this code should be factored out */
/* XXX copied from if_lagg.c */
static const void *
mlx4_en_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
m_ether_tcpip_hash_gethdr(const struct mbuf *m, const u_int off,
const u_int len, void *buf)
{
if (m->m_pkthdr.len < (off + len)) {
return (NULL);
} else if (m->m_len < (off + len)) {
@ -87,22 +63,18 @@ mlx4_en_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
}
uint32_t
mlx4_en_hashmbuf(uint32_t flags, struct mbuf *m, uint32_t key)
m_ether_tcpip_hash_init(void)
{
uint32_t seed;
seed = arc4random();
return (fnv_32_buf(&seed, sizeof(seed), FNV1_32_INIT));
}
uint32_t
m_ether_tcpip_hash(const uint32_t flags, const struct mbuf *m,
const uint32_t key)
{
uint16_t etype;
uint32_t p = key;
int off;
struct ether_header *eh;
const struct ether_vlan_header *vlan;
#ifdef INET
const struct ip *ip;
const uint32_t *ports;
int iphlen;
#endif
#ifdef INET6
const struct ip6_hdr *ip6;
uint32_t flow;
#endif
union {
#ifdef INET
struct ip ip;
@ -113,47 +85,57 @@ mlx4_en_hashmbuf(uint32_t flags, struct mbuf *m, uint32_t key)
struct ether_vlan_header vlan;
uint32_t port;
} buf;
struct ether_header *eh;
const struct ether_vlan_header *vlan;
#ifdef INET
const struct ip *ip;
#endif
#ifdef INET6
const struct ip6_hdr *ip6;
#endif
uint32_t p;
int off;
uint16_t etype;
p = key;
off = sizeof(*eh);
if (m->m_len < off)
goto out;
goto done;
eh = mtod(m, struct ether_header *);
etype = ntohs(eh->ether_type);
if (flags & MLX4_F_HASHL2) {
p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
if (flags & MBUF_HASHFLAG_L2) {
p = fnv_32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
p = fnv_32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
}
/* Special handling for encapsulating VLAN frames */
if ((m->m_flags & M_VLANTAG) && (flags & MLX4_F_HASHL2)) {
p = hash32_buf(&m->m_pkthdr.ether_vtag,
if ((m->m_flags & M_VLANTAG) && (flags & MBUF_HASHFLAG_L2)) {
p = fnv_32_buf(&m->m_pkthdr.ether_vtag,
sizeof(m->m_pkthdr.ether_vtag), p);
} else if (etype == ETHERTYPE_VLAN) {
vlan = mlx4_en_gethdr(m, off, sizeof(*vlan), &buf);
vlan = m_ether_tcpip_hash_gethdr(m, off, sizeof(*vlan), &buf);
if (vlan == NULL)
goto out;
goto done;
if (flags & MLX4_F_HASHL2)
p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
if (flags & MBUF_HASHFLAG_L2)
p = fnv_32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
etype = ntohs(vlan->evl_proto);
off += sizeof(*vlan) - sizeof(*eh);
}
switch (etype) {
#ifdef INET
case ETHERTYPE_IP:
ip = mlx4_en_gethdr(m, off, sizeof(*ip), &buf);
ip = m_ether_tcpip_hash_gethdr(m, off, sizeof(*ip), &buf);
if (ip == NULL)
goto out;
if (flags & MLX4_F_HASHL3) {
p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
}
if (!(flags & MLX4_F_HASHL4))
break;
switch (ip->ip_p) {
if (flags & MBUF_HASHFLAG_L3) {
p = fnv_32_buf(&ip->ip_src, sizeof(struct in_addr), p);
p = fnv_32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
}
if (flags & MBUF_HASHFLAG_L4) {
const uint32_t *ports;
int iphlen;
switch (ip->ip_p) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_SCTP:
@ -161,29 +143,39 @@ mlx4_en_hashmbuf(uint32_t flags, struct mbuf *m, uint32_t key)
if (iphlen < sizeof(*ip))
break;
off += iphlen;
ports = mlx4_en_gethdr(m, off, sizeof(*ports), &buf);
ports = m_ether_tcpip_hash_gethdr(m,
off, sizeof(*ports), &buf);
if (ports == NULL)
break;
p = hash32_buf(ports, sizeof(*ports), p);
p = fnv_32_buf(ports, sizeof(*ports), p);
break;
default:
break;
}
}
break;
#endif
#ifdef INET6
case ETHERTYPE_IPV6:
if (!(flags & MLX4_F_HASHL3))
break;
ip6 = mlx4_en_gethdr(m, off, sizeof(*ip6), &buf);
ip6 = m_ether_tcpip_hash_gethdr(m, off, sizeof(*ip6), &buf);
if (ip6 == NULL)
goto out;
break;
if (flags & MBUF_HASHFLAG_L3) {
p = fnv_32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
p = fnv_32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
}
if (flags & MBUF_HASHFLAG_L4) {
uint32_t flow;
p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */
/* IPv6 flow label */
flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
p = fnv_32_buf(&flow, sizeof(flow), p);
}
break;
#endif
default:
break;
}
out:
done:
return (p);
}

View File

@ -4,7 +4,7 @@
KMOD = mlxen
SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
SRCS += en_cq.c en_main.c en_netdev.c en_port.c en_resources.c
SRCS += en_rx.c en_tx.c utils.c
SRCS += en_rx.c en_tx.c
SRCS += opt_inet.h opt_inet6.h
CFLAGS+= -I${.CURDIR}/../../ofed/drivers/net/mlx4
CFLAGS+= -I${.CURDIR}/../../ofed/include/

View File

@ -35,7 +35,6 @@ __FBSDID("$FreeBSD$");
#include <sys/eventhandler.h>
#include <sys/mbuf.h>
#include <sys/systm.h>
#include <sys/fnv_hash.h>
#include <sys/malloc.h>
#include <sys/kernel.h> /* hz */
#include <sys/socket.h> /* for net/if.h */
@ -758,16 +757,13 @@ void
lacp_attach(struct lagg_softc *sc)
{
struct lacp_softc *lsc;
uint32_t seed;
lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO);
sc->sc_psc = lsc;
lsc->lsc_softc = sc;
seed = arc4random();
lsc->lsc_hashkey = FNV1_32_INIT;
lsc->lsc_hashkey = fnv_32_buf(&seed, sizeof(seed), lsc->lsc_hashkey);
lsc->lsc_hashkey = m_ether_tcpip_hash_init();
lsc->lsc_active_aggregator = NULL;
lsc->lsc_strict_mode = 1;
LACP_LOCK_INIT(lsc);
@ -843,7 +839,7 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
hash = m->m_pkthdr.flowid >> sc->flowid_shift;
else
hash = lagg_hashmbuf(sc, m, lsc->lsc_hashkey);
hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
hash %= pm->pm_count;
lp = pm->pm_map[hash];

View File

@ -36,7 +36,6 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/fnv_hash.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <sys/taskqueue.h>
@ -131,7 +130,6 @@ static int lagg_media_change(struct ifnet *);
static void lagg_media_status(struct ifnet *, struct ifmediareq *);
static struct lagg_port *lagg_link_active(struct lagg_softc *,
struct lagg_port *);
static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
/* Simple round robin */
static void lagg_rr_attach(struct lagg_softc *);
@ -490,7 +488,7 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
sc->flowid_shift = V_def_flowid_shift;
/* Hash all layers by default */
sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
sc->sc_flags = MBUF_HASHFLAG_L2|MBUF_HASHFLAG_L3|MBUF_HASHFLAG_L4;
lagg_proto_attach(sc, LAGG_PROTO_DEFAULT);
@ -1349,7 +1347,15 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
LAGG_WUNLOCK(sc);
break;
case SIOCGLAGGFLAGS:
rf->rf_flags = sc->sc_flags;
rf->rf_flags = 0;
LAGG_RLOCK(sc, &tracker);
if (sc->sc_flags & MBUF_HASHFLAG_L2)
rf->rf_flags |= LAGG_F_HASHL2;
if (sc->sc_flags & MBUF_HASHFLAG_L3)
rf->rf_flags |= LAGG_F_HASHL3;
if (sc->sc_flags & MBUF_HASHFLAG_L4)
rf->rf_flags |= LAGG_F_HASHL4;
LAGG_RUNLOCK(sc, &tracker);
break;
case SIOCSLAGGHASH:
error = priv_check(td, PRIV_NET_LAGG);
@ -1360,8 +1366,13 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
LAGG_WLOCK(sc);
sc->sc_flags &= ~LAGG_F_HASHMASK;
sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK;
sc->sc_flags = 0;
if (rf->rf_flags & LAGG_F_HASHL2)
sc->sc_flags |= MBUF_HASHFLAG_L2;
if (rf->rf_flags & LAGG_F_HASHL3)
sc->sc_flags |= MBUF_HASHFLAG_L3;
if (rf->rf_flags & LAGG_F_HASHL4)
sc->sc_flags |= MBUF_HASHFLAG_L4;
LAGG_WUNLOCK(sc);
break;
case SIOCGLAGGPORT:
@ -1806,120 +1817,6 @@ lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
return (rval);
}
static const void *
lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
{
if (m->m_pkthdr.len < (off + len)) {
return (NULL);
} else if (m->m_len < (off + len)) {
m_copydata(m, off, len, buf);
return (buf);
}
return (mtod(m, char *) + off);
}
uint32_t
lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
{
uint16_t etype;
uint32_t p = key;
int off;
struct ether_header *eh;
const struct ether_vlan_header *vlan;
#ifdef INET
const struct ip *ip;
const uint32_t *ports;
int iphlen;
#endif
#ifdef INET6
const struct ip6_hdr *ip6;
uint32_t flow;
#endif
union {
#ifdef INET
struct ip ip;
#endif
#ifdef INET6
struct ip6_hdr ip6;
#endif
struct ether_vlan_header vlan;
uint32_t port;
} buf;
off = sizeof(*eh);
if (m->m_len < off)
goto out;
eh = mtod(m, struct ether_header *);
etype = ntohs(eh->ether_type);
if (sc->sc_flags & LAGG_F_HASHL2) {
p = fnv_32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
p = fnv_32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
}
/* Special handling for encapsulating VLAN frames */
if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
p = fnv_32_buf(&m->m_pkthdr.ether_vtag,
sizeof(m->m_pkthdr.ether_vtag), p);
} else if (etype == ETHERTYPE_VLAN) {
vlan = lagg_gethdr(m, off, sizeof(*vlan), &buf);
if (vlan == NULL)
goto out;
if (sc->sc_flags & LAGG_F_HASHL2)
p = fnv_32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
etype = ntohs(vlan->evl_proto);
off += sizeof(*vlan) - sizeof(*eh);
}
switch (etype) {
#ifdef INET
case ETHERTYPE_IP:
ip = lagg_gethdr(m, off, sizeof(*ip), &buf);
if (ip == NULL)
goto out;
if (sc->sc_flags & LAGG_F_HASHL3) {
p = fnv_32_buf(&ip->ip_src, sizeof(struct in_addr), p);
p = fnv_32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
}
if (!(sc->sc_flags & LAGG_F_HASHL4))
break;
switch (ip->ip_p) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_SCTP:
iphlen = ip->ip_hl << 2;
if (iphlen < sizeof(*ip))
break;
off += iphlen;
ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
if (ports == NULL)
break;
p = fnv_32_buf(ports, sizeof(*ports), p);
break;
}
break;
#endif
#ifdef INET6
case ETHERTYPE_IPV6:
if (!(sc->sc_flags & LAGG_F_HASHL3))
break;
ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf);
if (ip6 == NULL)
goto out;
p = fnv_32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
p = fnv_32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
p = fnv_32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */
break;
#endif
}
out:
return (p);
}
int
lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
{
@ -2087,15 +1984,12 @@ lagg_lb_attach(struct lagg_softc *sc)
{
struct lagg_port *lp;
struct lagg_lb *lb;
uint32_t seed;
lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO);
sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
seed = arc4random();
lb->lb_key = FNV1_32_INIT;
lb->lb_key = fnv_32_buf(&seed, sizeof(seed), lb->lb_key);
lb->lb_key = m_ether_tcpip_hash_init();
sc->sc_psc = lb;
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
@ -2160,7 +2054,7 @@ lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
p = m->m_pkthdr.flowid >> sc->flowid_shift;
else
p = lagg_hashmbuf(sc, m, lb->lb_key);
p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key);
p %= sc->sc_count;
lp = lb->lb_ports[p];

View File

@ -279,7 +279,6 @@ extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
extern void (*lagg_linkstate_p)(struct ifnet *, int );
int lagg_enqueue(struct ifnet *, struct mbuf *);
uint32_t lagg_hashmbuf(struct lagg_softc *, struct mbuf *, uint32_t);
SYSCTL_DECL(_net_link_lagg);

View File

@ -49,7 +49,6 @@
#include <netinet/udp.h>
#include "mlx4_en.h"
#include "utils.h"
enum {
MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */
@ -699,10 +698,10 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct mbuf *mb,
tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f;
}
static unsigned long hashrandom;
static uint32_t hashrandom;
static void hashrandom_init(void *arg)
{
hashrandom = random();
hashrandom = m_ether_tcpip_hash_init();
}
SYSINIT(hashrandom_init, SI_SUB_KLD, SI_ORDER_SECOND, &hashrandom_init, NULL);
@ -724,7 +723,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb)
if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE)
queue_index = mb->m_pkthdr.flowid;
else
queue_index = mlx4_en_hashmbuf(MLX4_F_HASHL3 | MLX4_F_HASHL4, mb, hashrandom);
queue_index = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | MBUF_HASHFLAG_L4, mb, hashrandom);
return ((queue_index % rings_p_up) + (up * rings_p_up));
}

View File

@ -1,44 +0,0 @@
/*
* Copyright (c) 2014 Mellanox Technologies Ltd. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _MLX4_UTILS_H_
#define _MLX4_UTILS_H_
/* Lagg flags */
#define MLX4_F_HASHL2 0x00000001 /* hash layer 2 */
#define MLX4_F_HASHL3 0x00000002 /* hash layer 3 */
#define MLX4_F_HASHL4 0x00000004 /* hash layer 4 */
#define MLX4_F_HASHMASK 0x00000007
uint32_t mlx4_en_hashmbuf(uint32_t flags, struct mbuf *m, uint32_t key);
#endif /* _MLX4_UTILS_H_ */

View File

@ -1190,6 +1190,15 @@ rt_m_getfib(struct mbuf *m)
((_m)->m_pkthdr.fibnum) = (_fib); \
} while (0)
/* flags passed as first argument for "m_ether_tcpip_hash()" */
#define MBUF_HASHFLAG_L2 (1 << 2)
#define MBUF_HASHFLAG_L3 (1 << 3)
#define MBUF_HASHFLAG_L4 (1 << 4)
/* mbuf hashing helper routines */
uint32_t m_ether_tcpip_hash_init(void);
uint32_t m_ether_tcpip_hash(const uint32_t, const struct mbuf *, const uint32_t);
#ifdef MBUF_PROFILING
void m_profile(struct mbuf *m);
#define M_PROFILE(m) m_profile(m)