o Revamp API between flowtable and netinet, netinet6.

- ip_output() and ip_output6() simply call flowtable_lookup(),
    passing mbuf and address family. That's the only code under
    #ifdef FLOWTABLE in the protocols code now.
o Revamp statistics gathering and export.
  - Remove hand made pcpu stats, and utilize counter(9).
  - Snapshot of statistics is available via 'netstat -rs'.
  - All sysctls are moved into net.flowtable namespace, since
    spreading them over net.inet isn't correct.
o Properly separate at compile time INET and INET6 parts.
o General cleanup.
  - Remove chain of multiple flowtables. We simply have one for
    IPv4 and one for IPv6.
  - Flowtables are allocated in flowtable.c, symbols are static.
  - With proper argument to SYSINIT() we no longer need flowtable_ready.
  - Hash salt doesn't need to be per-VNET.
  - Removed rudimentary debugging, which use quite useless in dtrace era.

The runtime behavior of flowtable shouldn't be changed by this commit.

Sponsored by:	Netflix
Sponsored by:	Nginx, Inc.
This commit is contained in:
Gleb Smirnoff 2014-02-07 15:18:23 +00:00
parent 8044460976
commit 5d6d7e756b
12 changed files with 414 additions and 558 deletions

File diff suppressed because it is too large Load Diff

View File

@ -32,6 +32,15 @@ POSSIBILITY OF SUCH DAMAGE.
#ifndef _NET_FLOWTABLE_H_
#define _NET_FLOWTABLE_H_
struct flowtable_stat {
uint64_t ft_collisions;
uint64_t ft_misses;
uint64_t ft_free_checks;
uint64_t ft_frees;
uint64_t ft_hits;
uint64_t ft_lookups;
};
#ifdef _KERNEL
#define FL_HASH_ALL (1<<0) /* hash 4-tuple + protocol */
@ -50,34 +59,20 @@ struct flentry;
struct route;
struct route_in6;
VNET_DECLARE(struct flowtable *, ip_ft);
#define V_ip_ft VNET(ip_ft)
VNET_DECLARE(struct flowtable *, ip6_ft);
#define V_ip6_ft VNET(ip6_ft)
struct flowtable *flowtable_alloc(char *name, int nentry, int flags);
/*
* Given a flow table, look up the L3 and L2 information and
* return it in the route.
*
*/
struct flentry *flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af);
struct flentry *flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
struct sockaddr_storage *dsa, uint32_t fibnum, int flags);
int kern_flowtable_insert(struct flowtable *ft, struct sockaddr_storage *ssa,
struct sockaddr_storage *dsa, struct route *ro, uint32_t fibnum, int flags);
void flow_invalidate(struct flentry *fl);
void flowtable_route_flush(struct flowtable *ft, struct rtentry *rt);
struct flentry *flowtable_lookup(sa_family_t, struct mbuf *);
void flowtable_route_flush(sa_family_t, struct rtentry *);
#ifdef INET
void flow_to_route(struct flentry *fl, struct route *ro);
#endif
#ifdef INET6
void flow_to_route_in6(struct flentry *fl, struct route_in6 *ro);
#endif
#endif /* _KERNEL */
#endif
#endif /* _NET_FLOWTABLE_H_ */

View File

@ -1352,18 +1352,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
}
#ifdef FLOWTABLE
else if (rt0 != NULL) {
switch (dst->sa_family) {
#ifdef INET6
case AF_INET6:
flowtable_route_flush(V_ip6_ft, rt0);
break;
#endif
#ifdef INET
case AF_INET:
flowtable_route_flush(V_ip_ft, rt0);
break;
#endif
}
flowtable_route_flush(dst->sa_family, rt0);
RTFREE(rt0);
}
#endif

View File

@ -61,7 +61,6 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
#include <net/netisr.h>
#include <net/vnet.h>
#include <net/flowtable.h>
#include <netinet/in.h>
#include <netinet/in_kdtrace.h>
@ -197,16 +196,6 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
"IP stealth mode, no TTL decrementation on forwarding");
#endif
#ifdef FLOWTABLE
static VNET_DEFINE(int, ip_output_flowtable_size) = 2048;
VNET_DEFINE(struct flowtable *, ip_ft);
#define V_ip_output_flowtable_size VNET(ip_output_flowtable_size)
SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
&VNET_NAME(ip_output_flowtable_size), 2048,
"number of entries in the per-cpu output flow caches");
#endif
static void ip_freef(struct ipqhead *, struct ipq *);
/*
@ -308,24 +297,6 @@ ip_init(void)
printf("%s: WARNING: unable to register pfil hook, "
"error %d\n", __func__, i);
#ifdef FLOWTABLE
if (TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size",
&V_ip_output_flowtable_size)) {
if (V_ip_output_flowtable_size < 256)
V_ip_output_flowtable_size = 256;
if (!powerof2(V_ip_output_flowtable_size)) {
printf("flowtable must be power of 2 size\n");
V_ip_output_flowtable_size = 2048;
}
} else {
/*
* round up to the next power of 2
*/
V_ip_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
}
V_ip_ft = flowtable_alloc("ipv4", V_ip_output_flowtable_size, FL_PCPU);
#endif
/* Skip initialization of globals for non-default instances. */
if (!IS_DEFAULT_VNET(curvnet))
return;

View File

@ -32,6 +32,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_ipfw.h"
#include "opt_ipsec.h"
#include "opt_mbuf_stress_test.h"
@ -162,7 +163,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
* longer than that long for the stability of ro_rt. The
* flow ID assignment must have happened before this point.
*/
fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET);
fle = flowtable_lookup(AF_INET, m);
if (fle != NULL)
flow_to_route(fle, ro);
}

View File

@ -127,10 +127,6 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
#ifdef FLOWTABLE
#include <net/flowtable.h>
#endif
/*
* TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
*/
@ -576,16 +572,6 @@ SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_RW,
&VNET_NAME(ip6stealth), 0, "");
#endif
#ifdef FLOWTABLE
VNET_DEFINE(int, ip6_output_flowtable_size) = 2048;
VNET_DEFINE(struct flowtable *, ip6_ft);
#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size)
SYSCTL_VNET_INT(_net_inet6_ip6, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
&VNET_NAME(ip6_output_flowtable_size), 2048,
"number of entries in the per-cpu output flow caches");
#endif
/* net.inet6.icmp6 */
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept,
CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0, "");

View File

@ -119,12 +119,6 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
#ifdef FLOWTABLE
#include <net/flowtable.h>
VNET_DECLARE(int, ip6_output_flowtable_size);
#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size)
#endif
extern struct domain inet6domain;
u_char ip6_protox[IPPROTO_MAX];
@ -194,24 +188,6 @@ ip6_init(void)
nd6_init();
frag6_init();
#ifdef FLOWTABLE
if (TUNABLE_INT_FETCH("net.inet6.ip6.output_flowtable_size",
&V_ip6_output_flowtable_size)) {
if (V_ip6_output_flowtable_size < 256)
V_ip6_output_flowtable_size = 256;
if (!powerof2(V_ip6_output_flowtable_size)) {
printf("flowtable must be power of 2 size\n");
V_ip6_output_flowtable_size = 2048;
}
} else {
/*
* round up to the next power of 2
*/
V_ip6_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
}
V_ip6_ft = flowtable_alloc("ipv6", V_ip6_output_flowtable_size, FL_IPV6|FL_PCPU);
#endif
V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
/* Skip global initialization stuff for non-default instances. */

View File

@ -531,7 +531,7 @@ skip_ipsec2:;
* longer than that long for the stability of ro_rt. The
* flow ID assignment must have happened before this point.
*/
fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6);
fle = flowtable_lookup(AF_INET6, m);
if (fle != NULL)
flow_to_route_in6(fle, ro);
}

View File

@ -5,7 +5,8 @@
PROG= netstat
SRCS= if.c inet.c main.c mbuf.c mroute.c netisr.c route.c \
unix.c atalk.c mroute6.c ipsec.c bpf.c pfkey.c sctp.c
unix.c atalk.c mroute6.c ipsec.c bpf.c pfkey.c sctp.c \
flowtable.c
WARNS?= 3
CFLAGS+=-fno-strict-aliasing

View File

@ -0,0 +1,81 @@
/*-
* Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/sysctl.h>
#include <net/flowtable.h>
#include <err.h>
#include <stdint.h>
#include <stdio.h>
#include "netstat.h"
/*
* Print flowtable statistics.
*/
static void
print_stats(struct flowtable_stat *stat)
{
#define p(f, m) if (stat->f || sflag <= 1) \
printf(m, (uintmax_t)stat->f, plural(stat->f))
#define p2(f, m) if (stat->f || sflag <= 1) \
printf(m, (uintmax_t)stat->f, plurales(stat->f))
p(ft_lookups, "\t%ju lookup%s\n");
p(ft_hits, "\t%ju hit%s\n");
p2(ft_misses, "\t%ju miss%s\n");
p(ft_collisions, "\t%ju collision%s\n");
p(ft_free_checks, "\t%ju free check%s\n");
p(ft_frees, "\t%ju free%s\n");
#undef p2
#undef p
}
void
flowtable_stats(void)
{
struct flowtable_stat stat;
size_t len = sizeof(stat);
if (!live)
return;
if (sysctlbyname("net.flowtable.ip4.stat", &stat, &len, NULL, 0) == 0) {
printf("flowtable for IPv4:\n");
print_stats(&stat);
}
if (sysctlbyname("net.flowtable.ip6.stat", &stat, &len, NULL, 0) == 0) {
printf("flowtable for IPv6:\n");
print_stats(&stat);
}
}

View File

@ -571,9 +571,10 @@ main(int argc, char *argv[])
exit(0);
}
if (rflag) {
if (sflag)
if (sflag) {
rt_stats();
else
flowtable_stats();
} else
routepr(fib, af);
exit(0);
}

View File

@ -125,6 +125,7 @@ void intpr(int, void (*)(char *), int);
void pr_rthdr(int);
void pr_family(int);
void rt_stats(void);
void flowtable_stats(void);
char *ipx_pnet(struct sockaddr *);
char *ipx_phost(struct sockaddr *);
char *ns_phost(struct sockaddr *);