Merge r261582, r261601, r261610, r261613, r261627, r261640, r261641, r261823,

r261825, r261859, r261875, r261883, r261911, r262027, r262028, r262029,
      r262030, r262162 from head.

  Large flowtable revamp. See commit messages for merged revisions for
  details.

Sponsored by:	Netflix
This commit is contained in:
glebius 2014-03-04 15:14:47 +00:00
parent 1b9278cc98
commit ed41469327
13 changed files with 868 additions and 1549 deletions

View File

@ -438,6 +438,7 @@ TCP_SIGNATURE opt_inet.h
VLAN_ARRAY opt_vlan.h
XBONEHACK
FLOWTABLE opt_route.h
FLOWTABLE_HASH_ALL opt_route.h
#
# SCTP

File diff suppressed because it is too large Load Diff

View File

@ -1,83 +1,56 @@
/**************************************************************************
Copyright (c) 2008-2010, BitGravity Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the BitGravity Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
$FreeBSD$
***************************************************************************/
/*-
* Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 2008-2010, BitGravity Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Neither the name of the BitGravity Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*
*/
#ifndef _NET_FLOWTABLE_H_
#define _NET_FLOWTABLE_H_
struct flowtable_stat {
uint64_t ft_collisions;
uint64_t ft_misses;
uint64_t ft_free_checks;
uint64_t ft_frees;
uint64_t ft_hits;
uint64_t ft_lookups;
uint64_t ft_fail_lle_invalid;
uint64_t ft_inserts;
};
#ifdef _KERNEL
#define FL_HASH_ALL (1<<0) /* hash 4-tuple + protocol */
#define FL_PCPU (1<<1) /* pcpu cache */
#define FL_NOAUTO (1<<2) /* don't automatically add flentry on miss */
#define FL_IPV6 (1<<9)
#define FL_TCP (1<<11)
#define FL_SCTP (1<<12)
#define FL_UDP (1<<13)
#define FL_DEBUG (1<<14)
#define FL_DEBUG_ALL (1<<15)
struct flowtable;
struct flentry;
struct route;
struct route_in6;
VNET_DECLARE(struct flowtable *, ip_ft);
#define V_ip_ft VNET(ip_ft)
VNET_DECLARE(struct flowtable *, ip6_ft);
#define V_ip6_ft VNET(ip6_ft)
struct flowtable *flowtable_alloc(char *name, int nentry, int flags);
/*
* Given a flow table, look up the L3 and L2 information and
* return it in the route.
*
* Given a flow table, look up the L3 and L2 information
* and return it in the route.
*/
struct flentry *flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af);
struct flentry *flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
struct sockaddr_storage *dsa, uint32_t fibnum, int flags);
int kern_flowtable_insert(struct flowtable *ft, struct sockaddr_storage *ssa,
struct sockaddr_storage *dsa, struct route *ro, uint32_t fibnum, int flags);
void flow_invalidate(struct flentry *fl);
void flowtable_route_flush(struct flowtable *ft, struct rtentry *rt);
void flow_to_route(struct flentry *fl, struct route *ro);
void flow_to_route_in6(struct flentry *fl, struct route_in6 *ro);
int flowtable_lookup(sa_family_t, struct mbuf *, struct route *);
void flowtable_route_flush(sa_family_t, struct rtentry *);
#endif /* _KERNEL */
#endif
#endif /* !_NET_FLOWTABLE_H_ */

View File

@ -1298,18 +1298,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
}
#ifdef FLOWTABLE
else if (rt0 != NULL) {
switch (dst->sa_family) {
#ifdef INET6
case AF_INET6:
flowtable_route_flush(V_ip6_ft, rt0);
break;
#endif
#ifdef INET
case AF_INET:
flowtable_route_flush(V_ip_ft, rt0);
break;
#endif
}
flowtable_route_flush(dst->sa_family, rt0);
RTFREE(rt0);
}
#endif

View File

@ -62,7 +62,6 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
#include <net/netisr.h>
#include <net/vnet.h>
#include <net/flowtable.h>
#include <netinet/in.h>
#include <netinet/in_kdtrace.h>
@ -198,16 +197,6 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
"IP stealth mode, no TTL decrementation on forwarding");
#endif
#ifdef FLOWTABLE
static VNET_DEFINE(int, ip_output_flowtable_size) = 2048;
VNET_DEFINE(struct flowtable *, ip_ft);
#define V_ip_output_flowtable_size VNET(ip_output_flowtable_size)
SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
&VNET_NAME(ip_output_flowtable_size), 2048,
"number of entries in the per-cpu output flow caches");
#endif
static void ip_freef(struct ipqhead *, struct ipq *);
/*
@ -309,24 +298,6 @@ ip_init(void)
printf("%s: WARNING: unable to register pfil hook, "
"error %d\n", __func__, i);
#ifdef FLOWTABLE
if (TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size",
&V_ip_output_flowtable_size)) {
if (V_ip_output_flowtable_size < 256)
V_ip_output_flowtable_size = 256;
if (!powerof2(V_ip_output_flowtable_size)) {
printf("flowtable must be power of 2 size\n");
V_ip_output_flowtable_size = 2048;
}
} else {
/*
* round up to the next power of 2
*/
V_ip_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
}
V_ip_ft = flowtable_alloc("ipv4", V_ip_output_flowtable_size, FL_PCPU);
#endif
/* Skip initialization of globals for non-default instances. */
if (!IS_DEFAULT_VNET(curvnet))
return;

View File

@ -32,6 +32,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_ipfw.h"
#include "opt_ipsec.h"
#include "opt_kdtrace.h"
@ -154,19 +155,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
}
#ifdef FLOWTABLE
if (ro->ro_rt == NULL) {
struct flentry *fle;
/*
* The flow table returns route entries valid for up to 30
* seconds; we rely on the remainder of ip_output() taking no
* longer than that long for the stability of ro_rt. The
* flow ID assignment must have happened before this point.
*/
fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET);
if (fle != NULL)
flow_to_route(fle, ro);
}
if (ro->ro_rt == NULL)
(void )flowtable_lookup(AF_INET, m, ro);
#endif
if (opt) {

View File

@ -126,10 +126,6 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
#ifdef FLOWTABLE
#include <net/flowtable.h>
#endif
/*
* TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
*/
@ -575,16 +571,6 @@ SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_RW,
&VNET_NAME(ip6stealth), 0, "");
#endif
#ifdef FLOWTABLE
VNET_DEFINE(int, ip6_output_flowtable_size) = 2048;
VNET_DEFINE(struct flowtable *, ip6_ft);
#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size)
SYSCTL_VNET_INT(_net_inet6_ip6, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
&VNET_NAME(ip6_output_flowtable_size), 2048,
"number of entries in the per-cpu output flow caches");
#endif
/* net.inet6.icmp6 */
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept,
CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0, "");

View File

@ -119,12 +119,6 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
#ifdef FLOWTABLE
#include <net/flowtable.h>
VNET_DECLARE(int, ip6_output_flowtable_size);
#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size)
#endif
extern struct domain inet6domain;
u_char ip6_protox[IPPROTO_MAX];
@ -194,24 +188,6 @@ ip6_init(void)
nd6_init();
frag6_init();
#ifdef FLOWTABLE
if (TUNABLE_INT_FETCH("net.inet6.ip6.output_flowtable_size",
&V_ip6_output_flowtable_size)) {
if (V_ip6_output_flowtable_size < 256)
V_ip6_output_flowtable_size = 256;
if (!powerof2(V_ip6_output_flowtable_size)) {
printf("flowtable must be power of 2 size\n");
V_ip6_output_flowtable_size = 2048;
}
} else {
/*
* round up to the next power of 2
*/
V_ip6_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
}
V_ip6_ft = flowtable_alloc("ipv6", V_ip6_output_flowtable_size, FL_IPV6|FL_PCPU);
#endif
V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
/* Skip global initialization stuff for non-default instances. */

View File

@ -521,19 +521,8 @@ skip_ipsec2:;
ro = &opt->ip6po_route;
dst = (struct sockaddr_in6 *)&ro->ro_dst;
#ifdef FLOWTABLE
if (ro->ro_rt == NULL) {
struct flentry *fle;
/*
* The flow table returns route entries valid for up to 30
* seconds; we rely on the remainder of ip_output() taking no
* longer than that long for the stability of ro_rt. The
* flow ID assignment must have happened before this point.
*/
fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6);
if (fle != NULL)
flow_to_route_in6(fle, ro);
}
if (ro->ro_rt == NULL)
(void )flowtable_lookup(AF_INET6, m, (struct route *)ro);
#endif
again:
/*

View File

@ -5,7 +5,8 @@
PROG= netstat
SRCS= if.c inet.c main.c mbuf.c mroute.c netisr.c route.c \
unix.c atalk.c mroute6.c ipsec.c bpf.c pfkey.c sctp.c
unix.c atalk.c mroute6.c ipsec.c bpf.c pfkey.c sctp.c \
flowtable.c
WARNS?= 3
CFLAGS+=-fno-strict-aliasing

View File

@ -0,0 +1,84 @@
/*-
* Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/sysctl.h>
#include <net/flowtable.h>
#include <err.h>
#include <stdint.h>
#include <stdio.h>
#include "netstat.h"
/*
* Print flowtable statistics.
*/
static void
print_stats(struct flowtable_stat *stat)
{
#define p(f, m) if (stat->f || sflag <= 1) \
printf(m, (uintmax_t)stat->f, plural(stat->f))
#define p2(f, m) if (stat->f || sflag <= 1) \
printf(m, (uintmax_t)stat->f, plurales(stat->f))
p(ft_lookups, "\t%ju lookup%s\n");
p(ft_hits, "\t%ju hit%s\n");
p2(ft_misses, "\t%ju miss%s\n");
p(ft_inserts, "\t%ju insert%s\n");
p(ft_collisions, "\t%ju collision%s\n");
p(ft_free_checks, "\t%ju free check%s\n");
p(ft_frees, "\t%ju free%s\n");
p(ft_fail_lle_invalid,
"\t%ju lookup%s with not resolved Layer 2 address\n");
#undef p2
#undef p
}
void
flowtable_stats(void)
{
struct flowtable_stat stat;
size_t len = sizeof(stat);
if (!live)
return;
if (sysctlbyname("net.flowtable.ip4.stat", &stat, &len, NULL, 0) == 0) {
printf("flowtable for IPv4:\n");
print_stats(&stat);
}
if (sysctlbyname("net.flowtable.ip6.stat", &stat, &len, NULL, 0) == 0) {
printf("flowtable for IPv6:\n");
print_stats(&stat);
}
}

View File

@ -556,9 +556,10 @@ main(int argc, char *argv[])
exit(0);
}
if (rflag) {
if (sflag)
if (sflag) {
rt_stats(nl[N_RTSTAT].n_value, nl[N_RTTRASH].n_value);
else
flowtable_stats();
} else
routepr(nl[N_RTREE].n_value, fib);
exit(0);
}

View File

@ -124,6 +124,7 @@ void intpr(int, void (*)(char *));
void pr_rthdr(int);
void pr_family(int);
void rt_stats(u_long, u_long);
void flowtable_stats(void);
char *ipx_pnet(struct sockaddr *);
char *ipx_phost(struct sockaddr *);
char *ns_phost(struct sockaddr *);