netmap: add load balancer program
Add the lb program, which is able to load-balance input traffic received from a netmap port over M groups, with N netmap pipes in each group. Each received packet is forwarded to one of the pipes chosen from each group (using an L3/L4 connection-consistent hash function). This also adds a man page for lb and some cross-references in related man pages. Reviewed by: bcr, 0mp Approved by: gnn (mentor) Differential Revision: https://reviews.freebsd.org/D17735
This commit is contained in:
parent
9eb7d595e1
commit
689f146bdd
@ -27,7 +27,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd October 23, 2018
|
||||
.Dd October 28, 2018
|
||||
.Dt NETMAP 4
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -1073,8 +1073,11 @@ Other
|
||||
clients attached to the same switch can now communicate
|
||||
with the network card or the host.
|
||||
.Sh SEE ALSO
|
||||
.Xr pkt-gen 8 ,
|
||||
.Xr bridge 8
|
||||
.Xr vale 4 ,
|
||||
.Xr vale-ctl 4 ,
|
||||
.Xr bridge 8 ,
|
||||
.Xr lb 8 ,
|
||||
.Xr pkt-gen 8
|
||||
.Pp
|
||||
.Pa http://info.iet.unipi.it/~luigi/netmap/
|
||||
.Pp
|
||||
|
@ -3,7 +3,7 @@
|
||||
#
|
||||
# For multiple programs using a single source file each,
|
||||
# we can just define 'progs' and create custom targets.
|
||||
PROGS = pkt-gen nmreplay bridge vale-ctl
|
||||
PROGS = pkt-gen nmreplay bridge vale-ctl lb
|
||||
|
||||
CLEANFILES = $(PROGS) *.o
|
||||
MAN=
|
||||
@ -34,3 +34,6 @@ nmreplay: nmreplay.o
|
||||
|
||||
vale-ctl: vale-ctl.o
|
||||
$(CC) $(CFLAGS) -o vale-ctl vale-ctl.o
|
||||
|
||||
lb: lb.o pkt_hash.o
|
||||
$(CC) $(CFLAGS) -o lb lb.o pkt_hash.o $(LDFLAGS)
|
||||
|
@ -1,9 +1,13 @@
|
||||
$FreeBSD$
|
||||
|
||||
This directory contains examples that use netmap
|
||||
This directory contains applications that use the netmap API
|
||||
|
||||
pkt-gen a packet sink/source using the netmap API
|
||||
pkt-gen a multi-function packet generator and traffic sink
|
||||
|
||||
bridge a two-port jumper wire, also using the native API
|
||||
bridge a two-port jumper wire, also using the netmap API
|
||||
|
||||
vale-ctl the program to control VALE bridges
|
||||
vale-ctl the program to control and inspect VALE switches
|
||||
|
||||
lb an L3/L4 load balancer
|
||||
|
||||
nmreplay a tool to playback a pcap file to a netmap port
|
||||
|
@ -23,7 +23,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd October 23, 2018
|
||||
.Dd October 28, 2018
|
||||
.Dt BRIDGE 8
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -71,7 +71,8 @@ Disable zero-copy mode.
|
||||
.El
|
||||
.Sh SEE ALSO
|
||||
.Xr netmap 4 ,
|
||||
.Xr pkt-gen 8
|
||||
.Xr pkt-gen 8 ,
|
||||
.Xr lb 8
|
||||
.Sh AUTHORS
|
||||
.An -nosplit
|
||||
.Nm
|
||||
|
@ -7,30 +7,37 @@
|
||||
|
||||
/* counters to accumulate statistics */
|
||||
struct my_ctrs {
|
||||
uint64_t pkts, bytes, events, drop;
|
||||
uint64_t pkts, bytes, events;
|
||||
uint64_t drop, drop_bytes;
|
||||
uint64_t min_space;
|
||||
struct timeval t;
|
||||
uint32_t oq_n; /* number of elements in overflow queue (used in lb) */
|
||||
};
|
||||
|
||||
/* very crude code to print a number in normalized form.
|
||||
* Caller has to make sure that the buffer is large enough.
|
||||
*/
|
||||
static const char *
|
||||
norm2(char *buf, double val, char *fmt)
|
||||
norm2(char *buf, double val, char *fmt, int normalize)
|
||||
{
|
||||
char *units[] = { "", "K", "M", "G", "T" };
|
||||
u_int i;
|
||||
|
||||
for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++)
|
||||
val /= 1000;
|
||||
if (normalize)
|
||||
for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++)
|
||||
val /= 1000;
|
||||
else
|
||||
i=0;
|
||||
sprintf(buf, fmt, val, units[i]);
|
||||
return buf;
|
||||
}
|
||||
|
||||
static __inline const char *
|
||||
norm(char *buf, double val)
|
||||
norm(char *buf, double val, int normalize)
|
||||
{
|
||||
return norm2(buf, val, "%.3f %s");
|
||||
if (normalize)
|
||||
return norm2(buf, val, "%.3f %s", normalize);
|
||||
else
|
||||
return norm2(buf, val, "%.0f %s", normalize);
|
||||
}
|
||||
|
||||
static __inline int
|
||||
@ -89,7 +96,7 @@ timespec_sub(struct timespec a, struct timespec b)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
static __inline uint64_t
|
||||
wait_for_next_report(struct timeval *prev, struct timeval *cur,
|
||||
int report_interval)
|
||||
{
|
||||
@ -106,3 +113,4 @@ wait_for_next_report(struct timeval *prev, struct timeval *cur,
|
||||
return delta.tv_sec* 1000000 + delta.tv_usec;
|
||||
}
|
||||
#endif /* CTRS_H_ */
|
||||
|
||||
|
130
tools/tools/netmap/lb.8
Normal file
130
tools/tools/netmap/lb.8
Normal file
@ -0,0 +1,130 @@
|
||||
.\" Copyright (c) 2017 Corelight, Inc. and Universita` di Pisa
|
||||
.\" All rights reserved.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd October 28, 2018
|
||||
.Dt LB 8
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm lb
|
||||
.Nd netmap-based load balancer
|
||||
.Sh SYNOPSIS
|
||||
.Bk -words
|
||||
.Bl -tag -width "lb"
|
||||
.It Nm
|
||||
.Op Fl i Ar port
|
||||
.Op Fl p Ar pipe-group
|
||||
.Op Fl B Ar extra-buffers
|
||||
.Op Fl b Ar batch-size
|
||||
.Op Fl w Ar wait-link
|
||||
.El
|
||||
.Ek
|
||||
.Sh DESCRIPTION
|
||||
.Nm
|
||||
reads packets from an input netmap port and sends them to a number of netmap pipes,
|
||||
trying to balance the packets received by each pipe.
|
||||
Packets belonging to the
|
||||
same connection will always be sent to the same pipe.
|
||||
.Pp
|
||||
Command line options are listed below.
|
||||
.Bl -tag -width Ds
|
||||
.It Fl i Ar port
|
||||
Name of a netmap port.
|
||||
It must be supplied exactly once to identify
|
||||
the input port.
|
||||
Any netmap port type (e.g., physical interface, VALE switch, pipe,
|
||||
monitor port) can be used.
|
||||
.It Fl p Ar name Ns Cm \&: Ns Ar number | number
|
||||
Add a new pipe group of the given number of pipes.
|
||||
The pipe group will receive all the packets read from the input port, balanced
|
||||
among the available pipes.
|
||||
The receiving ends of the pipes
|
||||
will be called
|
||||
.Dq Ar name Ns Em }0
|
||||
to
|
||||
.Dq Ar name No Ns Em } Ns Aq Ar number No - 1 .
|
||||
The name is optional and defaults to
|
||||
the name of the input port (stripped down of any netmap operator).
|
||||
If the name is omitted, also the colon can be omitted.
|
||||
.Pp
|
||||
This option can be supplied multiple times to define a sequence of pipe groups,
|
||||
each group receiving all the packets in turn.
|
||||
.Pp
|
||||
If no
|
||||
.Fl p
|
||||
option is given, a single group of two pipes with default name is assumed.
|
||||
.Pp
|
||||
It is allowed to use the same name for several groups.
|
||||
The pipe numbering in each
|
||||
group will start from were the previous identically-named group had left.
|
||||
.It Fl B Ar extra-buffers
|
||||
Try to reserve the given number of extra buffers.
|
||||
Extra buffers are shared among
|
||||
all pipes in all groups and work as an extension of the pipe rings.
|
||||
If a pipe ring is full for whatever reason,
|
||||
.Nm
|
||||
tries to use extra buffers before dropping any packets directed to that pipe.
|
||||
.Pp
|
||||
If all extra buffers are busy, some are stolen from the pipe with the longest
|
||||
backlog.
|
||||
This gives preference to newer packets over old ones, and prevents a
|
||||
stalled pipe to deplete the pool of extra buffers.
|
||||
.It Fl b Ar batch-size
|
||||
Maximum number of packets processed between two read operations from the input port.
|
||||
Higher values of batch-size improve performance by amortizing read operations,
|
||||
but increase the risk of filling up the port internal queues.
|
||||
.It Fl w Ar wait-link
|
||||
indicates the number of seconds to wait before transmitting.
|
||||
It defaults to 2, and may be useful when talking to physical
|
||||
ports to let link negotiation complete before starting transmission.
|
||||
.El
|
||||
.Sh LIMITATIONS
|
||||
The group chaining assumes that the applications on the receiving end of the
|
||||
pipes are read-only: they must not modify the buffers or the pipe ring slots
|
||||
in any way.
|
||||
.Pp
|
||||
The group naming is currently implemented by creating a persistent VALE port
|
||||
with the given name.
|
||||
If
|
||||
.Nm
|
||||
does not exit cleanly the ports will not be removed.
|
||||
Please use
|
||||
.Xr vale-ctl 4
|
||||
to remove any stale persistent VALE port.
|
||||
.Sh SEE ALSO
|
||||
.Xr netmap 4 ,
|
||||
.Xr bridge 8 ,
|
||||
.Xr pkt-gen 8
|
||||
.Pp
|
||||
.Pa http://info.iet.unipi.it/~luigi/netmap/
|
||||
.Sh AUTHORS
|
||||
.An -nosplit
|
||||
.Nm
|
||||
has been written by
|
||||
.An Seth Hall
|
||||
at Corelight, USA.
|
||||
The facilities related to extra buffers and pipe groups have been added by
|
||||
.An Giuseppe Lettieri
|
||||
at University of Pisa, Italy, under contract by Corelight, USA.
|
1027
tools/tools/netmap/lb.c
Normal file
1027
tools/tools/netmap/lb.c
Normal file
File diff suppressed because it is too large
Load Diff
396
tools/tools/netmap/pkt_hash.c
Normal file
396
tools/tools/netmap/pkt_hash.c
Normal file
@ -0,0 +1,396 @@
|
||||
/*
|
||||
** Copyright (c) 2015, Asim Jamshed, Robin Sommer, Seth Hall
|
||||
** and the International Computer Science Institute. All rights reserved.
|
||||
**
|
||||
** Redistribution and use in source and binary forms, with or without
|
||||
** modification, are permitted provided that the following conditions are met:
|
||||
**
|
||||
** (1) Redistributions of source code must retain the above copyright
|
||||
** notice, this list of conditions and the following disclaimer.
|
||||
**
|
||||
** (2) Redistributions in binary form must reproduce the above copyright
|
||||
** notice, this list of conditions and the following disclaimer in the
|
||||
** documentation and/or other materials provided with the distribution.
|
||||
**
|
||||
**
|
||||
** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
** POSSIBILITY OF SUCH DAMAGE.
|
||||
**/
|
||||
/* $FreeBSD$ */
|
||||
/* for func prototypes */
|
||||
#include "pkt_hash.h"
|
||||
|
||||
/* Make Linux headers choose BSD versions of some of the data structures */
|
||||
#define __FAVOR_BSD
|
||||
|
||||
/* for types */
|
||||
#include <sys/types.h>
|
||||
/* for [n/h]to[h/n][ls] */
|
||||
#include <netinet/in.h>
|
||||
/* iphdr */
|
||||
#include <netinet/ip.h>
|
||||
/* ipv6hdr */
|
||||
#include <netinet/ip6.h>
|
||||
/* tcphdr */
|
||||
#include <netinet/tcp.h>
|
||||
/* udphdr */
|
||||
#include <netinet/udp.h>
|
||||
/* eth hdr */
|
||||
#include <net/ethernet.h>
|
||||
/* for memset */
|
||||
#include <string.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
//#include <libnet.h>
|
||||
/*---------------------------------------------------------------------*/
|
||||
/**
|
||||
* * The cache table is used to pick a nice seed for the hash value. It is
|
||||
* * built only once when sym_hash_fn is called for the very first time
|
||||
* */
|
||||
static void
|
||||
build_sym_key_cache(uint32_t *cache, int cache_len)
|
||||
{
|
||||
static const uint8_t key[] = { 0x50, 0x6d };
|
||||
|
||||
uint32_t result = (((uint32_t)key[0]) << 24) |
|
||||
(((uint32_t)key[1]) << 16) |
|
||||
(((uint32_t)key[0]) << 8) |
|
||||
((uint32_t)key[1]);
|
||||
|
||||
uint32_t idx = 32;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < cache_len; i++, idx++) {
|
||||
uint8_t shift = (idx % 8);
|
||||
uint32_t bit;
|
||||
|
||||
cache[i] = result;
|
||||
bit = ((key[(idx/8) & 1] << shift) & 0x80) ? 1 : 0;
|
||||
result = ((result << 1) | bit);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
build_byte_cache(uint32_t byte_cache[256][4])
|
||||
{
|
||||
#define KEY_CACHE_LEN 96
|
||||
int i, j, k;
|
||||
uint32_t key_cache[KEY_CACHE_LEN];
|
||||
|
||||
build_sym_key_cache(key_cache, KEY_CACHE_LEN);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (j = 0; j < 256; j++) {
|
||||
uint8_t b = j;
|
||||
byte_cache[j][i] = 0;
|
||||
for (k = 0; k < 8; k++) {
|
||||
if (b & 0x80)
|
||||
byte_cache[j][i] ^= key_cache[8 * i + k];
|
||||
b <<= 1U;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------*/
|
||||
/**
|
||||
** Computes symmetric hash based on the 4-tuple header data
|
||||
**/
|
||||
static uint32_t
|
||||
sym_hash_fn(uint32_t sip, uint32_t dip, uint16_t sp, uint32_t dp)
|
||||
{
|
||||
uint32_t rc = 0;
|
||||
static int first_time = 1;
|
||||
static uint32_t byte_cache[256][4];
|
||||
uint8_t *sip_b = (uint8_t *)&sip,
|
||||
*dip_b = (uint8_t *)&dip,
|
||||
*sp_b = (uint8_t *)&sp,
|
||||
*dp_b = (uint8_t *)&dp;
|
||||
|
||||
if (first_time) {
|
||||
build_byte_cache(byte_cache);
|
||||
first_time = 0;
|
||||
}
|
||||
|
||||
rc = byte_cache[sip_b[3]][0] ^
|
||||
byte_cache[sip_b[2]][1] ^
|
||||
byte_cache[sip_b[1]][2] ^
|
||||
byte_cache[sip_b[0]][3] ^
|
||||
byte_cache[dip_b[3]][0] ^
|
||||
byte_cache[dip_b[2]][1] ^
|
||||
byte_cache[dip_b[1]][2] ^
|
||||
byte_cache[dip_b[0]][3] ^
|
||||
byte_cache[sp_b[1]][0] ^
|
||||
byte_cache[sp_b[0]][1] ^
|
||||
byte_cache[dp_b[1]][2] ^
|
||||
byte_cache[dp_b[0]][3];
|
||||
|
||||
return rc;
|
||||
}
|
||||
static uint32_t decode_gre_hash(const uint8_t *, uint8_t, uint8_t);
|
||||
/*---------------------------------------------------------------------*/
|
||||
/**
|
||||
** Parser + hash function for the IPv4 packet
|
||||
**/
|
||||
static uint32_t
|
||||
decode_ip_n_hash(struct ip *iph, uint8_t hash_split, uint8_t seed)
|
||||
{
|
||||
uint32_t rc = 0;
|
||||
|
||||
if (hash_split == 2) {
|
||||
rc = sym_hash_fn(ntohl(iph->ip_src.s_addr),
|
||||
ntohl(iph->ip_dst.s_addr),
|
||||
ntohs(0xFFFD) + seed,
|
||||
ntohs(0xFFFE) + seed);
|
||||
} else {
|
||||
struct tcphdr *tcph = NULL;
|
||||
struct udphdr *udph = NULL;
|
||||
|
||||
switch (iph->ip_p) {
|
||||
case IPPROTO_TCP:
|
||||
tcph = (struct tcphdr *)((uint8_t *)iph + (iph->ip_hl<<2));
|
||||
rc = sym_hash_fn(ntohl(iph->ip_src.s_addr),
|
||||
ntohl(iph->ip_dst.s_addr),
|
||||
ntohs(tcph->th_sport) + seed,
|
||||
ntohs(tcph->th_dport) + seed);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
udph = (struct udphdr *)((uint8_t *)iph + (iph->ip_hl<<2));
|
||||
rc = sym_hash_fn(ntohl(iph->ip_src.s_addr),
|
||||
ntohl(iph->ip_dst.s_addr),
|
||||
ntohs(udph->uh_sport) + seed,
|
||||
ntohs(udph->uh_dport) + seed);
|
||||
break;
|
||||
case IPPROTO_IPIP:
|
||||
/* tunneling */
|
||||
rc = decode_ip_n_hash((struct ip *)((uint8_t *)iph + (iph->ip_hl<<2)),
|
||||
hash_split, seed);
|
||||
break;
|
||||
case IPPROTO_GRE:
|
||||
rc = decode_gre_hash((uint8_t *)iph + (iph->ip_hl<<2),
|
||||
hash_split, seed);
|
||||
break;
|
||||
case IPPROTO_ICMP:
|
||||
case IPPROTO_ESP:
|
||||
case IPPROTO_PIM:
|
||||
case IPPROTO_IGMP:
|
||||
default:
|
||||
/*
|
||||
** the hash strength (although weaker but) should still hold
|
||||
** even with 2 fields
|
||||
**/
|
||||
rc = sym_hash_fn(ntohl(iph->ip_src.s_addr),
|
||||
ntohl(iph->ip_dst.s_addr),
|
||||
ntohs(0xFFFD) + seed,
|
||||
ntohs(0xFFFE) + seed);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
/*---------------------------------------------------------------------*/
|
||||
/**
|
||||
** Parser + hash function for the IPv6 packet
|
||||
**/
|
||||
static uint32_t
|
||||
decode_ipv6_n_hash(struct ip6_hdr *ipv6h, uint8_t hash_split, uint8_t seed)
|
||||
{
|
||||
uint32_t saddr, daddr;
|
||||
uint32_t rc = 0;
|
||||
|
||||
/* Get only the first 4 octets */
|
||||
saddr = ipv6h->ip6_src.s6_addr[0] |
|
||||
(ipv6h->ip6_src.s6_addr[1] << 8) |
|
||||
(ipv6h->ip6_src.s6_addr[2] << 16) |
|
||||
(ipv6h->ip6_src.s6_addr[3] << 24);
|
||||
daddr = ipv6h->ip6_dst.s6_addr[0] |
|
||||
(ipv6h->ip6_dst.s6_addr[1] << 8) |
|
||||
(ipv6h->ip6_dst.s6_addr[2] << 16) |
|
||||
(ipv6h->ip6_dst.s6_addr[3] << 24);
|
||||
|
||||
if (hash_split == 2) {
|
||||
rc = sym_hash_fn(ntohl(saddr),
|
||||
ntohl(daddr),
|
||||
ntohs(0xFFFD) + seed,
|
||||
ntohs(0xFFFE) + seed);
|
||||
} else {
|
||||
struct tcphdr *tcph = NULL;
|
||||
struct udphdr *udph = NULL;
|
||||
|
||||
switch(ntohs(ipv6h->ip6_ctlun.ip6_un1.ip6_un1_nxt)) {
|
||||
case IPPROTO_TCP:
|
||||
tcph = (struct tcphdr *)(ipv6h + 1);
|
||||
rc = sym_hash_fn(ntohl(saddr),
|
||||
ntohl(daddr),
|
||||
ntohs(tcph->th_sport) + seed,
|
||||
ntohs(tcph->th_dport) + seed);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
udph = (struct udphdr *)(ipv6h + 1);
|
||||
rc = sym_hash_fn(ntohl(saddr),
|
||||
ntohl(daddr),
|
||||
ntohs(udph->uh_sport) + seed,
|
||||
ntohs(udph->uh_dport) + seed);
|
||||
break;
|
||||
case IPPROTO_IPIP:
|
||||
/* tunneling */
|
||||
rc = decode_ip_n_hash((struct ip *)(ipv6h + 1),
|
||||
hash_split, seed);
|
||||
break;
|
||||
case IPPROTO_IPV6:
|
||||
/* tunneling */
|
||||
rc = decode_ipv6_n_hash((struct ip6_hdr *)(ipv6h + 1),
|
||||
hash_split, seed);
|
||||
break;
|
||||
case IPPROTO_GRE:
|
||||
rc = decode_gre_hash((uint8_t *)(ipv6h + 1), hash_split, seed);
|
||||
break;
|
||||
case IPPROTO_ICMP:
|
||||
case IPPROTO_ESP:
|
||||
case IPPROTO_PIM:
|
||||
case IPPROTO_IGMP:
|
||||
default:
|
||||
/*
|
||||
** the hash strength (although weaker but) should still hold
|
||||
** even with 2 fields
|
||||
**/
|
||||
rc = sym_hash_fn(ntohl(saddr),
|
||||
ntohl(daddr),
|
||||
ntohs(0xFFFD) + seed,
|
||||
ntohs(0xFFFE) + seed);
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
/*---------------------------------------------------------------------*/
|
||||
/**
|
||||
* * A temp solution while hash for other protocols are filled...
|
||||
* * (See decode_vlan_n_hash & pkt_hdr_hash functions).
|
||||
* */
|
||||
static uint32_t
|
||||
decode_others_n_hash(struct ether_header *ethh, uint8_t seed)
|
||||
{
|
||||
uint32_t saddr, daddr, rc;
|
||||
|
||||
saddr = ethh->ether_shost[5] |
|
||||
(ethh->ether_shost[4] << 8) |
|
||||
(ethh->ether_shost[3] << 16) |
|
||||
(ethh->ether_shost[2] << 24);
|
||||
daddr = ethh->ether_dhost[5] |
|
||||
(ethh->ether_dhost[4] << 8) |
|
||||
(ethh->ether_dhost[3] << 16) |
|
||||
(ethh->ether_dhost[2] << 24);
|
||||
|
||||
rc = sym_hash_fn(ntohl(saddr),
|
||||
ntohl(daddr),
|
||||
ntohs(0xFFFD) + seed,
|
||||
ntohs(0xFFFE) + seed);
|
||||
|
||||
return rc;
|
||||
}
|
||||
/*---------------------------------------------------------------------*/
|
||||
/**
|
||||
** Parser + hash function for VLAN packet
|
||||
**/
|
||||
static inline uint32_t
|
||||
decode_vlan_n_hash(struct ether_header *ethh, uint8_t hash_split, uint8_t seed)
|
||||
{
|
||||
uint32_t rc = 0;
|
||||
struct vlanhdr *vhdr = (struct vlanhdr *)(ethh + 1);
|
||||
|
||||
switch (ntohs(vhdr->proto)) {
|
||||
case ETHERTYPE_IP:
|
||||
rc = decode_ip_n_hash((struct ip *)(vhdr + 1),
|
||||
hash_split, seed);
|
||||
break;
|
||||
case ETHERTYPE_IPV6:
|
||||
rc = decode_ipv6_n_hash((struct ip6_hdr *)(vhdr + 1),
|
||||
hash_split, seed);
|
||||
break;
|
||||
case ETHERTYPE_ARP:
|
||||
default:
|
||||
/* others */
|
||||
rc = decode_others_n_hash(ethh, seed);
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------*/
|
||||
/**
|
||||
** General parser + hash function...
|
||||
**/
|
||||
uint32_t
|
||||
pkt_hdr_hash(const unsigned char *buffer, uint8_t hash_split, uint8_t seed)
|
||||
{
|
||||
uint32_t rc = 0;
|
||||
struct ether_header *ethh = (struct ether_header *)buffer;
|
||||
|
||||
switch (ntohs(ethh->ether_type)) {
|
||||
case ETHERTYPE_IP:
|
||||
rc = decode_ip_n_hash((struct ip *)(ethh + 1),
|
||||
hash_split, seed);
|
||||
break;
|
||||
case ETHERTYPE_IPV6:
|
||||
rc = decode_ipv6_n_hash((struct ip6_hdr *)(ethh + 1),
|
||||
hash_split, seed);
|
||||
break;
|
||||
case ETHERTYPE_VLAN:
|
||||
rc = decode_vlan_n_hash(ethh, hash_split, seed);
|
||||
break;
|
||||
case ETHERTYPE_ARP:
|
||||
default:
|
||||
/* others */
|
||||
rc = decode_others_n_hash(ethh, seed);
|
||||
break;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------*/
|
||||
/**
|
||||
** Parser + hash function for the GRE packet
|
||||
**/
|
||||
static uint32_t
|
||||
decode_gre_hash(const uint8_t *grehdr, uint8_t hash_split, uint8_t seed)
|
||||
{
|
||||
uint32_t rc = 0;
|
||||
int len = 4 + 2 * (!!(*grehdr & 1) + /* Checksum */
|
||||
!!(*grehdr & 2) + /* Routing */
|
||||
!!(*grehdr & 4) + /* Key */
|
||||
!!(*grehdr & 8)); /* Sequence Number */
|
||||
uint16_t proto = ntohs(*(uint16_t *)(void *)(grehdr + 2));
|
||||
|
||||
switch (proto) {
|
||||
case ETHERTYPE_IP:
|
||||
rc = decode_ip_n_hash((struct ip *)(grehdr + len),
|
||||
hash_split, seed);
|
||||
break;
|
||||
case ETHERTYPE_IPV6:
|
||||
rc = decode_ipv6_n_hash((struct ip6_hdr *)(grehdr + len),
|
||||
hash_split, seed);
|
||||
break;
|
||||
case 0x6558: /* Transparent Ethernet Bridging */
|
||||
rc = pkt_hdr_hash(grehdr + len, hash_split, seed);
|
||||
break;
|
||||
default:
|
||||
/* others */
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
/*---------------------------------------------------------------------*/
|
||||
|
79
tools/tools/netmap/pkt_hash.h
Normal file
79
tools/tools/netmap/pkt_hash.h
Normal file
@ -0,0 +1,79 @@
|
||||
/*
|
||||
** Copyright (c) 2015, Asim Jamshed, Robin Sommer, Seth Hall
|
||||
** and the International Computer Science Institute. All rights reserved.
|
||||
**
|
||||
** Redistribution and use in source and binary forms, with or without
|
||||
** modification, are permitted provided that the following conditions are met:
|
||||
**
|
||||
** (1) Redistributions of source code must retain the above copyright
|
||||
** notice, this list of conditions and the following disclaimer.
|
||||
**
|
||||
** (2) Redistributions in binary form must reproduce the above copyright
|
||||
** notice, this list of conditions and the following disclaimer in the
|
||||
** documentation and/or other materials provided with the distribution.
|
||||
**
|
||||
**
|
||||
** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
** POSSIBILITY OF SUCH DAMAGE.
|
||||
**/
|
||||
/* $FreeBSD$ */
|
||||
#ifndef LB_PKT_HASH_H
|
||||
#define LB_PKT_HASH_H
|
||||
/*---------------------------------------------------------------------*/
|
||||
/**
|
||||
** Packet header hashing function utility - This file contains functions
|
||||
** that parse the packet headers and computes hash functions based on
|
||||
** the header fields. Please see pkt_hash.c for more details...
|
||||
**/
|
||||
/*---------------------------------------------------------------------*/
|
||||
/* for type def'n */
|
||||
#include <stdint.h>
|
||||
/*---------------------------------------------------------------------*/
|
||||
#ifdef __GNUC__
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#else
|
||||
#define likely(x) (x)
|
||||
#define unlikely(x) (x)
|
||||
#endif
|
||||
|
||||
#define HTONS(n) (((((unsigned short)(n) & 0xFF)) << 8) | \
|
||||
(((unsigned short)(n) & 0xFF00) >> 8))
|
||||
#define NTOHS(n) (((((unsigned short)(n) & 0xFF)) << 8) | \
|
||||
(((unsigned short)(n) & 0xFF00) >> 8))
|
||||
|
||||
#define HTONL(n) (((((unsigned long)(n) & 0xFF)) << 24) | \
|
||||
((((unsigned long)(n) & 0xFF00)) << 8) | \
|
||||
((((unsigned long)(n) & 0xFF0000)) >> 8) | \
|
||||
((((unsigned long)(n) & 0xFF000000)) >> 24))
|
||||
|
||||
#define NTOHL(n) (((((unsigned long)(n) & 0xFF)) << 24) | \
|
||||
((((unsigned long)(n) & 0xFF00)) << 8) | \
|
||||
((((unsigned long)(n) & 0xFF0000)) >> 8) | \
|
||||
((((unsigned long)(n) & 0xFF000000)) >> 24))
|
||||
/*---------------------------------------------------------------------*/
|
||||
typedef struct vlanhdr {
|
||||
uint16_t pri_cfi_vlan;
|
||||
uint16_t proto;
|
||||
} vlanhdr;
|
||||
/*---------------------------------------------------------------------*/
|
||||
/**
|
||||
** Analyzes the packet header of computes a corresponding
|
||||
** hash function.
|
||||
**/
|
||||
uint32_t
|
||||
pkt_hdr_hash(const unsigned char *buffer,
|
||||
uint8_t hash_split,
|
||||
uint8_t seed);
|
||||
/*---------------------------------------------------------------------*/
|
||||
#endif /* LB_PKT_HASH_H */
|
||||
|
Loading…
Reference in New Issue
Block a user