09b44517ca
Two new functions are provided, bit_ffs_at() and bit_ffc_at(), which allow for efficient searching of set or cleared bits starting from any bit offset within the bit string. Performance is improved by operating on longs instead of bytes and using ffsl() for searches within a long. ffsl() is a compiler builtin in both clang and gcc for most architectures, converting what was a brute force while loop search into a couple of instructions. All of the bitstring(3) API continues to be contained in the header file. Some of the functions are large enough that perhaps they should be uninlined and moved to a library, but that is beyond the scope of this commit. sys/sys/bitstring.h: Convert the majority of the existing bit string implementation from macros to inline functions. Properly protect the implementation from inadvertant macro expansion when included in a user's program by prefixing all private macros/functions and local variables with '_'. Add bit_ffs_at() and bit_ffc_at(). Implement bit_ffs() and bit_ffc() in terms of their "at" counterparts. Provide a kernel implementation of bit_alloc(), making the full API usable in the kernel. Improve code documenation. share/man/man3/bitstring.3: Add pre-exisiting API bit_ffc() to the synopsis. Document new APIs. Document the initialization state of the bit strings allocated/declared by bit_alloc() and bit_decl(). Correct documentation for bitstr_size(). The original code comments indicate the size is in bytes, not "elements of bitstr_t". The new implementation follows this lead. Only hastd assumed "elements" rather than bytes and it has been corrected. etc/mtree/BSD.tests.dist: tests/sys/Makefile: tests/sys/sys/Makefile: tests/sys/sys/bitstring.c: Add tests for all existing and new functionality. include/bitstring.h Include all headers needed by sys/bitstring.h lib/libbluetooth/bluetooth.h: usr.sbin/bluetooth/hccontrol/le.c: Include bitstring.h instead of sys/bitstring.h. sbin/hastd/activemap.c: Correct usage of bitstr_size(). sys/dev/xen/blkback/blkback.c Use new bit_alloc. sys/kern/subr_unit.c: Remove hard-coded assumption that sizeof(bitstr_t) is 1. Get rid of unrb.busy, which caches the number of bits set in unrb.map. When INVARIANTS are disabled, nothing needs to know that information. callapse_unr can be adapted to use bit_ffs and bit_ffc instead. Eliminating unrb.busy saves memory, simplifies the code, and provides a slight speedup when INVARIANTS are disabled. sys/net/flowtable.c: Use the new kernel implementation of bit-alloc, instead of hacking the old libc-dependent macro. sys/sys/param.h Update __FreeBSD_version to indicate availability of new API Submitted by: gibbs, asomers Reviewed by: gibbs, ngie MFC after: 4 weeks Sponsored by: Spectra Logic Corp Differential Revision: https://reviews.freebsd.org/D6004
1190 lines
28 KiB
C
1190 lines
28 KiB
C
/*-
|
|
* Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
|
|
* Copyright (c) 2008-2010, BitGravity Inc.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Neither the name of the BitGravity Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "opt_route.h"
|
|
#include "opt_mpath.h"
|
|
#include "opt_ddb.h"
|
|
#include "opt_inet.h"
|
|
#include "opt_inet6.h"
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/types.h>
|
|
#include <sys/bitstring.h>
|
|
#include <sys/condvar.h>
|
|
#include <sys/callout.h>
|
|
#include <sys/hash.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/kthread.h>
|
|
#include <sys/limits.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/mbuf.h>
|
|
#include <sys/pcpu.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/queue.h>
|
|
#include <sys/sbuf.h>
|
|
#include <sys/sched.h>
|
|
#include <sys/smp.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/syslog.h>
|
|
#include <sys/sysctl.h>
|
|
#include <vm/uma.h>
|
|
|
|
#include <net/if.h>
|
|
#include <net/if_llatbl.h>
|
|
#include <net/if_var.h>
|
|
#include <net/route.h>
|
|
#include <net/flowtable.h>
|
|
#include <net/vnet.h>
|
|
|
|
#include <netinet/in.h>
|
|
#include <netinet/in_systm.h>
|
|
#include <netinet/in_var.h>
|
|
#include <netinet/if_ether.h>
|
|
#include <netinet/ip.h>
|
|
#ifdef INET6
|
|
#include <netinet/ip6.h>
|
|
#endif
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
#include <netinet/tcp.h>
|
|
#include <netinet/udp.h>
|
|
#include <netinet/sctp.h>
|
|
#endif
|
|
|
|
#include <ddb/ddb.h>
|
|
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
#define KEY_PORTS (sizeof(uint16_t) * 2)
|
|
#define KEY_ADDRS 2
|
|
#else
|
|
#define KEY_PORTS 0
|
|
#define KEY_ADDRS 1
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
#define KEY_ADDR_LEN sizeof(struct in6_addr)
|
|
#else
|
|
#define KEY_ADDR_LEN sizeof(struct in_addr)
|
|
#endif
|
|
|
|
#define KEYLEN ((KEY_ADDR_LEN * KEY_ADDRS + KEY_PORTS) / sizeof(uint32_t))
|
|
|
|
struct flentry {
|
|
uint32_t f_hash; /* hash flowing forward */
|
|
uint32_t f_key[KEYLEN]; /* address(es and ports) */
|
|
uint32_t f_uptime; /* uptime at last access */
|
|
uint16_t f_fibnum; /* fib index */
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
uint8_t f_proto; /* protocol */
|
|
uint8_t f_flags; /* stale? */
|
|
#define FL_STALE 1
|
|
#endif
|
|
SLIST_ENTRY(flentry) f_next; /* pointer to collision entry */
|
|
struct rtentry *f_rt; /* rtentry for flow */
|
|
struct llentry *f_lle; /* llentry for flow */
|
|
};
|
|
#undef KEYLEN
|
|
|
|
SLIST_HEAD(flist, flentry);
|
|
/* Make sure we can use pcpu_zone_ptr for struct flist. */
|
|
CTASSERT(sizeof(struct flist) == sizeof(void *));
|
|
|
|
struct flowtable {
|
|
counter_u64_t *ft_stat;
|
|
int ft_size;
|
|
/*
|
|
* ft_table is a malloc(9)ed array of pointers. Pointers point to
|
|
* memory from UMA_ZONE_PCPU zone.
|
|
* ft_masks is per-cpu pointer itself. Each instance points
|
|
* to a malloc(9)ed bitset, that is private to corresponding CPU.
|
|
*/
|
|
struct flist **ft_table;
|
|
bitstr_t **ft_masks;
|
|
bitstr_t *ft_tmpmask;
|
|
};
|
|
|
|
#define FLOWSTAT_ADD(ft, name, v) \
|
|
counter_u64_add((ft)->ft_stat[offsetof(struct flowtable_stat, name) / sizeof(uint64_t)], (v))
|
|
#define FLOWSTAT_INC(ft, name) FLOWSTAT_ADD(ft, name, 1)
|
|
|
|
static struct proc *flowcleanerproc;
|
|
static uint32_t flow_hashjitter;
|
|
|
|
static struct cv flowclean_f_cv;
|
|
static struct cv flowclean_c_cv;
|
|
static struct mtx flowclean_lock;
|
|
static uint32_t flowclean_cycles;
|
|
|
|
/*
|
|
* TODO:
|
|
* - add sysctls to resize && flush flow tables
|
|
* - Add per flowtable sysctls for statistics and configuring timeouts
|
|
* - add saturation counter to rtentry to support per-packet load-balancing
|
|
* add flag to indicate round-robin flow, add list lookup from head
|
|
for flows
|
|
* - add sysctl / device node / syscall to support exporting and importing
|
|
* of flows with flag to indicate that a flow was imported so should
|
|
* not be considered for auto-cleaning
|
|
* - support explicit connection state (currently only ad-hoc for DSR)
|
|
* - idetach() cleanup for options VIMAGE builds.
|
|
*/
|
|
#ifdef INET
|
|
static VNET_DEFINE(struct flowtable, ip4_ft);
|
|
#define V_ip4_ft VNET(ip4_ft)
|
|
#endif
|
|
#ifdef INET6
|
|
static VNET_DEFINE(struct flowtable, ip6_ft);
|
|
#define V_ip6_ft VNET(ip6_ft)
|
|
#endif
|
|
|
|
static uma_zone_t flow_zone;
|
|
|
|
static VNET_DEFINE(int, flowtable_enable) = 1;
|
|
#define V_flowtable_enable VNET(flowtable_enable)
|
|
|
|
static SYSCTL_NODE(_net, OID_AUTO, flowtable, CTLFLAG_RD, NULL,
|
|
"flowtable");
|
|
SYSCTL_INT(_net_flowtable, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW,
|
|
&VNET_NAME(flowtable_enable), 0, "enable flowtable caching.");
|
|
SYSCTL_UMA_MAX(_net_flowtable, OID_AUTO, maxflows, CTLFLAG_RW,
|
|
&flow_zone, "Maximum number of flows allowed");
|
|
|
|
static MALLOC_DEFINE(M_FTABLE, "flowtable", "flowtable hashes and bitstrings");
|
|
|
|
static struct flentry *
|
|
flowtable_lookup_common(struct flowtable *, uint32_t *, int, uint32_t);
|
|
|
|
#ifdef INET
|
|
static struct flentry *
|
|
flowtable_lookup_ipv4(struct mbuf *m, struct route *ro)
|
|
{
|
|
struct flentry *fle;
|
|
struct sockaddr_in *sin;
|
|
struct ip *ip;
|
|
uint32_t fibnum;
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
uint32_t key[3];
|
|
int iphlen;
|
|
uint16_t sport, dport;
|
|
uint8_t proto;
|
|
#endif
|
|
|
|
ip = mtod(m, struct ip *);
|
|
|
|
if (ip->ip_src.s_addr == ip->ip_dst.s_addr ||
|
|
(ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
|
|
(ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
|
|
return (NULL);
|
|
|
|
fibnum = M_GETFIB(m);
|
|
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
iphlen = ip->ip_hl << 2;
|
|
proto = ip->ip_p;
|
|
|
|
switch (proto) {
|
|
case IPPROTO_TCP: {
|
|
struct tcphdr *th;
|
|
|
|
th = (struct tcphdr *)((char *)ip + iphlen);
|
|
sport = th->th_sport;
|
|
dport = th->th_dport;
|
|
if (th->th_flags & (TH_RST|TH_FIN))
|
|
fibnum |= (FL_STALE << 24);
|
|
break;
|
|
}
|
|
case IPPROTO_UDP: {
|
|
struct udphdr *uh;
|
|
|
|
uh = (struct udphdr *)((char *)ip + iphlen);
|
|
sport = uh->uh_sport;
|
|
dport = uh->uh_dport;
|
|
break;
|
|
}
|
|
case IPPROTO_SCTP: {
|
|
struct sctphdr *sh;
|
|
|
|
sh = (struct sctphdr *)((char *)ip + iphlen);
|
|
sport = sh->src_port;
|
|
dport = sh->dest_port;
|
|
/* XXXGL: handle stale? */
|
|
break;
|
|
}
|
|
default:
|
|
sport = dport = 0;
|
|
break;
|
|
}
|
|
|
|
key[0] = ip->ip_dst.s_addr;
|
|
key[1] = ip->ip_src.s_addr;
|
|
key[2] = (dport << 16) | sport;
|
|
fibnum |= proto << 16;
|
|
|
|
fle = flowtable_lookup_common(&V_ip4_ft, key, 3 * sizeof(uint32_t),
|
|
fibnum);
|
|
|
|
#else /* !FLOWTABLE_HASH_ALL */
|
|
|
|
fle = flowtable_lookup_common(&V_ip4_ft, (uint32_t *)&ip->ip_dst,
|
|
sizeof(struct in_addr), fibnum);
|
|
|
|
#endif /* FLOWTABLE_HASH_ALL */
|
|
|
|
if (fle == NULL)
|
|
return (NULL);
|
|
|
|
sin = (struct sockaddr_in *)&ro->ro_dst;
|
|
sin->sin_family = AF_INET;
|
|
sin->sin_len = sizeof(*sin);
|
|
sin->sin_addr = ip->ip_dst;
|
|
|
|
return (fle);
|
|
}
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
/*
|
|
* PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
|
|
* then it sets p to point at the offset "len" in the mbuf. WARNING: the
|
|
* pointer might become stale after other pullups (but we never use it
|
|
* this way).
|
|
*/
|
|
#define PULLUP_TO(_len, p, T) \
|
|
do { \
|
|
int x = (_len) + sizeof(T); \
|
|
if ((m)->m_len < x) \
|
|
return (NULL); \
|
|
p = (mtod(m, char *) + (_len)); \
|
|
} while (0)
|
|
|
|
#define TCP(p) ((struct tcphdr *)(p))
|
|
#define SCTP(p) ((struct sctphdr *)(p))
|
|
#define UDP(p) ((struct udphdr *)(p))
|
|
|
|
static struct flentry *
|
|
flowtable_lookup_ipv6(struct mbuf *m, struct route *ro)
|
|
{
|
|
struct flentry *fle;
|
|
struct sockaddr_in6 *sin6;
|
|
struct ip6_hdr *ip6;
|
|
uint32_t fibnum;
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
uint32_t key[9];
|
|
void *ulp;
|
|
int hlen;
|
|
uint16_t sport, dport;
|
|
u_short offset;
|
|
uint8_t proto;
|
|
#else
|
|
uint32_t key[4];
|
|
#endif
|
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
|
if (in6_localaddr(&ip6->ip6_dst))
|
|
return (NULL);
|
|
|
|
fibnum = M_GETFIB(m);
|
|
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
hlen = sizeof(struct ip6_hdr);
|
|
proto = ip6->ip6_nxt;
|
|
offset = sport = dport = 0;
|
|
ulp = NULL;
|
|
while (ulp == NULL) {
|
|
switch (proto) {
|
|
case IPPROTO_ICMPV6:
|
|
case IPPROTO_OSPFIGP:
|
|
case IPPROTO_PIM:
|
|
case IPPROTO_CARP:
|
|
case IPPROTO_ESP:
|
|
case IPPROTO_NONE:
|
|
ulp = ip6;
|
|
break;
|
|
case IPPROTO_TCP:
|
|
PULLUP_TO(hlen, ulp, struct tcphdr);
|
|
dport = TCP(ulp)->th_dport;
|
|
sport = TCP(ulp)->th_sport;
|
|
if (TCP(ulp)->th_flags & (TH_RST|TH_FIN))
|
|
fibnum |= (FL_STALE << 24);
|
|
break;
|
|
case IPPROTO_SCTP:
|
|
PULLUP_TO(hlen, ulp, struct sctphdr);
|
|
dport = SCTP(ulp)->src_port;
|
|
sport = SCTP(ulp)->dest_port;
|
|
/* XXXGL: handle stale? */
|
|
break;
|
|
case IPPROTO_UDP:
|
|
PULLUP_TO(hlen, ulp, struct udphdr);
|
|
dport = UDP(ulp)->uh_dport;
|
|
sport = UDP(ulp)->uh_sport;
|
|
break;
|
|
case IPPROTO_HOPOPTS: /* RFC 2460 */
|
|
PULLUP_TO(hlen, ulp, struct ip6_hbh);
|
|
hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
|
|
proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
|
|
ulp = NULL;
|
|
break;
|
|
case IPPROTO_ROUTING: /* RFC 2460 */
|
|
PULLUP_TO(hlen, ulp, struct ip6_rthdr);
|
|
hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
|
|
proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
|
|
ulp = NULL;
|
|
break;
|
|
case IPPROTO_FRAGMENT: /* RFC 2460 */
|
|
PULLUP_TO(hlen, ulp, struct ip6_frag);
|
|
hlen += sizeof (struct ip6_frag);
|
|
proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
|
|
offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
|
|
IP6F_OFF_MASK;
|
|
ulp = NULL;
|
|
break;
|
|
case IPPROTO_DSTOPTS: /* RFC 2460 */
|
|
PULLUP_TO(hlen, ulp, struct ip6_hbh);
|
|
hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
|
|
proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
|
|
ulp = NULL;
|
|
break;
|
|
case IPPROTO_AH: /* RFC 2402 */
|
|
PULLUP_TO(hlen, ulp, struct ip6_ext);
|
|
hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
|
|
proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
|
|
ulp = NULL;
|
|
break;
|
|
default:
|
|
PULLUP_TO(hlen, ulp, struct ip6_ext);
|
|
break;
|
|
}
|
|
}
|
|
|
|
bcopy(&ip6->ip6_dst, &key[0], sizeof(struct in6_addr));
|
|
bcopy(&ip6->ip6_src, &key[4], sizeof(struct in6_addr));
|
|
key[8] = (dport << 16) | sport;
|
|
fibnum |= proto << 16;
|
|
|
|
fle = flowtable_lookup_common(&V_ip6_ft, key, 9 * sizeof(uint32_t),
|
|
fibnum);
|
|
#else /* !FLOWTABLE_HASH_ALL */
|
|
bcopy(&ip6->ip6_dst, &key[0], sizeof(struct in6_addr));
|
|
fle = flowtable_lookup_common(&V_ip6_ft, key, sizeof(struct in6_addr),
|
|
fibnum);
|
|
#endif /* FLOWTABLE_HASH_ALL */
|
|
|
|
if (fle == NULL)
|
|
return (NULL);
|
|
|
|
sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
|
|
sin6->sin6_family = AF_INET6;
|
|
sin6->sin6_len = sizeof(*sin6);
|
|
bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(struct in6_addr));
|
|
|
|
return (fle);
|
|
}
|
|
#endif /* INET6 */
|
|
|
|
static bitstr_t *
|
|
flowtable_mask(struct flowtable *ft)
|
|
{
|
|
|
|
/*
|
|
* flowtable_free_stale() calls w/o critical section, but
|
|
* with sched_bind(). Since pointer is stable throughout
|
|
* ft lifetime, it is safe, otherwise...
|
|
*
|
|
* CRITICAL_ASSERT(curthread);
|
|
*/
|
|
|
|
return (*(bitstr_t **)zpcpu_get(ft->ft_masks));
|
|
}
|
|
|
|
static struct flist *
|
|
flowtable_list(struct flowtable *ft, uint32_t hash)
|
|
{
|
|
|
|
CRITICAL_ASSERT(curthread);
|
|
return (zpcpu_get(ft->ft_table[hash % ft->ft_size]));
|
|
}
|
|
|
|
static int
|
|
flow_stale(struct flowtable *ft, struct flentry *fle, int maxidle)
|
|
{
|
|
|
|
if (((fle->f_rt->rt_flags & RTF_UP) == 0) ||
|
|
(fle->f_rt->rt_ifp == NULL) ||
|
|
!RT_LINK_IS_UP(fle->f_rt->rt_ifp) ||
|
|
(fle->f_lle->la_flags & LLE_VALID) == 0)
|
|
return (1);
|
|
|
|
if (time_uptime - fle->f_uptime > maxidle)
|
|
return (1);
|
|
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
if (fle->f_flags & FL_STALE)
|
|
return (1);
|
|
#endif
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
flow_full(void)
|
|
{
|
|
int count, max;
|
|
|
|
count = uma_zone_get_cur(flow_zone);
|
|
max = uma_zone_get_max(flow_zone);
|
|
|
|
return (count > (max - (max >> 3)));
|
|
}
|
|
|
|
static int
|
|
flow_matches(struct flentry *fle, uint32_t *key, int keylen, uint32_t fibnum)
|
|
{
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
uint8_t proto;
|
|
|
|
proto = (fibnum >> 16) & 0xff;
|
|
fibnum &= 0xffff;
|
|
#endif
|
|
|
|
CRITICAL_ASSERT(curthread);
|
|
|
|
/* Microoptimization for IPv4: don't use bcmp(). */
|
|
if (((keylen == sizeof(uint32_t) && (fle->f_key[0] == key[0])) ||
|
|
(bcmp(fle->f_key, key, keylen) == 0)) &&
|
|
fibnum == fle->f_fibnum &&
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
proto == fle->f_proto &&
|
|
#endif
|
|
(fle->f_rt->rt_flags & RTF_UP) &&
|
|
fle->f_rt->rt_ifp != NULL &&
|
|
(fle->f_lle->la_flags & LLE_VALID))
|
|
return (1);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static struct flentry *
|
|
flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
|
|
int keylen, uint32_t fibnum0)
|
|
{
|
|
#ifdef INET6
|
|
struct route_in6 sro6;
|
|
#endif
|
|
#ifdef INET
|
|
struct route sro;
|
|
#endif
|
|
struct route *ro = NULL;
|
|
struct rtentry *rt;
|
|
struct lltable *lt = NULL;
|
|
struct llentry *lle;
|
|
struct sockaddr_storage *l3addr;
|
|
struct ifnet *ifp;
|
|
struct flist *flist;
|
|
struct flentry *fle, *iter;
|
|
bitstr_t *mask;
|
|
uint16_t fibnum = fibnum0;
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
uint8_t proto;
|
|
|
|
proto = (fibnum0 >> 16) & 0xff;
|
|
fibnum = fibnum0 & 0xffff;
|
|
#endif
|
|
|
|
/*
|
|
* This bit of code ends up locking the
|
|
* same route 3 times (just like ip_output + ether_output)
|
|
* - at lookup
|
|
* - in rt_check when called by arpresolve
|
|
* - dropping the refcount for the rtentry
|
|
*
|
|
* This could be consolidated to one if we wrote a variant
|
|
* of arpresolve with an rt_check variant that expected to
|
|
* receive the route locked
|
|
*/
|
|
#ifdef INET
|
|
if (ft == &V_ip4_ft) {
|
|
struct sockaddr_in *sin;
|
|
|
|
ro = &sro;
|
|
bzero(&sro.ro_dst, sizeof(sro.ro_dst));
|
|
|
|
sin = (struct sockaddr_in *)&sro.ro_dst;
|
|
sin->sin_family = AF_INET;
|
|
sin->sin_len = sizeof(*sin);
|
|
sin->sin_addr.s_addr = key[0];
|
|
}
|
|
#endif
|
|
#ifdef INET6
|
|
if (ft == &V_ip6_ft) {
|
|
struct sockaddr_in6 *sin6;
|
|
|
|
ro = (struct route *)&sro6;
|
|
sin6 = &sro6.ro_dst;
|
|
|
|
bzero(sin6, sizeof(*sin6));
|
|
sin6->sin6_family = AF_INET6;
|
|
sin6->sin6_len = sizeof(*sin6);
|
|
bcopy(key, &sin6->sin6_addr, sizeof(struct in6_addr));
|
|
}
|
|
#endif
|
|
|
|
ro->ro_rt = NULL;
|
|
#ifdef RADIX_MPATH
|
|
rtalloc_mpath_fib(ro, hash, fibnum);
|
|
#else
|
|
rtalloc_ign_fib(ro, 0, fibnum);
|
|
#endif
|
|
if (ro->ro_rt == NULL)
|
|
return (NULL);
|
|
|
|
rt = ro->ro_rt;
|
|
ifp = rt->rt_ifp;
|
|
|
|
if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) {
|
|
RTFREE(rt);
|
|
return (NULL);
|
|
}
|
|
|
|
#ifdef INET
|
|
if (ft == &V_ip4_ft)
|
|
lt = LLTABLE(ifp);
|
|
#endif
|
|
#ifdef INET6
|
|
if (ft == &V_ip6_ft)
|
|
lt = LLTABLE6(ifp);
|
|
#endif
|
|
|
|
if (rt->rt_flags & RTF_GATEWAY)
|
|
l3addr = (struct sockaddr_storage *)rt->rt_gateway;
|
|
else
|
|
l3addr = (struct sockaddr_storage *)&ro->ro_dst;
|
|
lle = llentry_alloc(ifp, lt, l3addr);
|
|
|
|
if (lle == NULL) {
|
|
RTFREE(rt);
|
|
return (NULL);
|
|
}
|
|
|
|
/* Don't insert the entry if the ARP hasn't yet finished resolving. */
|
|
if ((lle->la_flags & LLE_VALID) == 0) {
|
|
RTFREE(rt);
|
|
LLE_FREE(lle);
|
|
FLOWSTAT_INC(ft, ft_fail_lle_invalid);
|
|
return (NULL);
|
|
}
|
|
|
|
fle = uma_zalloc(flow_zone, M_NOWAIT | M_ZERO);
|
|
if (fle == NULL) {
|
|
RTFREE(rt);
|
|
LLE_FREE(lle);
|
|
return (NULL);
|
|
}
|
|
|
|
fle->f_hash = hash;
|
|
bcopy(key, &fle->f_key, keylen);
|
|
fle->f_rt = rt;
|
|
fle->f_lle = lle;
|
|
fle->f_fibnum = fibnum;
|
|
fle->f_uptime = time_uptime;
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
fle->f_proto = proto;
|
|
fle->f_flags = fibnum0 >> 24;
|
|
#endif
|
|
|
|
critical_enter();
|
|
mask = flowtable_mask(ft);
|
|
flist = flowtable_list(ft, hash);
|
|
|
|
if (SLIST_EMPTY(flist)) {
|
|
bit_set(mask, (hash % ft->ft_size));
|
|
SLIST_INSERT_HEAD(flist, fle, f_next);
|
|
goto skip;
|
|
}
|
|
|
|
/*
|
|
* find end of list and make sure that we were not
|
|
* preempted by another thread handling this flow
|
|
*/
|
|
SLIST_FOREACH(iter, flist, f_next) {
|
|
KASSERT(iter->f_hash % ft->ft_size == hash % ft->ft_size,
|
|
("%s: wrong hash", __func__));
|
|
if (flow_matches(iter, key, keylen, fibnum)) {
|
|
/*
|
|
* We probably migrated to an other CPU after
|
|
* lookup in flowtable_lookup_common() failed.
|
|
* It appeared that this CPU already has flow
|
|
* entry.
|
|
*/
|
|
iter->f_uptime = time_uptime;
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
iter->f_flags |= fibnum >> 24;
|
|
#endif
|
|
critical_exit();
|
|
FLOWSTAT_INC(ft, ft_collisions);
|
|
uma_zfree(flow_zone, fle);
|
|
return (iter);
|
|
}
|
|
}
|
|
|
|
SLIST_INSERT_HEAD(flist, fle, f_next);
|
|
skip:
|
|
critical_exit();
|
|
FLOWSTAT_INC(ft, ft_inserts);
|
|
|
|
return (fle);
|
|
}
|
|
|
|
int
|
|
flowtable_lookup(sa_family_t sa, struct mbuf *m, struct route *ro)
|
|
{
|
|
struct flentry *fle;
|
|
struct llentry *lle;
|
|
|
|
if (V_flowtable_enable == 0)
|
|
return (ENXIO);
|
|
|
|
switch (sa) {
|
|
#ifdef INET
|
|
case AF_INET:
|
|
fle = flowtable_lookup_ipv4(m, ro);
|
|
break;
|
|
#endif
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
fle = flowtable_lookup_ipv6(m, ro);
|
|
break;
|
|
#endif
|
|
default:
|
|
panic("%s: sa %d", __func__, sa);
|
|
}
|
|
|
|
if (fle == NULL)
|
|
return (EHOSTUNREACH);
|
|
|
|
if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE) {
|
|
M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
|
|
m->m_pkthdr.flowid = fle->f_hash;
|
|
}
|
|
|
|
ro->ro_rt = fle->f_rt;
|
|
ro->ro_flags |= RT_NORTREF;
|
|
lle = fle->f_lle;
|
|
if (lle != NULL && (lle->la_flags & LLE_VALID)) {
|
|
ro->ro_prepend = lle->r_linkdata;
|
|
ro->ro_plen = lle->r_hdrlen;
|
|
ro->ro_flags |= RT_MAY_LOOP;
|
|
if (lle->la_flags & LLE_IFADDR)
|
|
ro->ro_flags |= RT_L2_ME;
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
static struct flentry *
|
|
flowtable_lookup_common(struct flowtable *ft, uint32_t *key, int keylen,
|
|
uint32_t fibnum)
|
|
{
|
|
struct flist *flist;
|
|
struct flentry *fle;
|
|
uint32_t hash;
|
|
|
|
FLOWSTAT_INC(ft, ft_lookups);
|
|
|
|
hash = jenkins_hash32(key, keylen / sizeof(uint32_t), flow_hashjitter);
|
|
|
|
critical_enter();
|
|
flist = flowtable_list(ft, hash);
|
|
SLIST_FOREACH(fle, flist, f_next) {
|
|
KASSERT(fle->f_hash % ft->ft_size == hash % ft->ft_size,
|
|
("%s: wrong hash", __func__));
|
|
if (flow_matches(fle, key, keylen, fibnum)) {
|
|
fle->f_uptime = time_uptime;
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
fle->f_flags |= fibnum >> 24;
|
|
#endif
|
|
critical_exit();
|
|
FLOWSTAT_INC(ft, ft_hits);
|
|
return (fle);
|
|
}
|
|
}
|
|
critical_exit();
|
|
|
|
FLOWSTAT_INC(ft, ft_misses);
|
|
|
|
return (flowtable_insert(ft, hash, key, keylen, fibnum));
|
|
}
|
|
|
|
static void
|
|
flowtable_alloc(struct flowtable *ft)
|
|
{
|
|
|
|
ft->ft_table = malloc(ft->ft_size * sizeof(struct flist),
|
|
M_FTABLE, M_WAITOK);
|
|
for (int i = 0; i < ft->ft_size; i++)
|
|
ft->ft_table[i] = uma_zalloc(pcpu_zone_ptr, M_WAITOK | M_ZERO);
|
|
|
|
ft->ft_masks = uma_zalloc(pcpu_zone_ptr, M_WAITOK);
|
|
for (int i = 0; i < mp_ncpus; i++) {
|
|
bitstr_t **b;
|
|
|
|
b = zpcpu_get_cpu(ft->ft_masks, i);
|
|
*b = bit_alloc(ft->ft_size, M_FTABLE, M_WAITOK);
|
|
}
|
|
ft->ft_tmpmask = bit_alloc(ft->ft_size, M_FTABLE, M_WAITOK);
|
|
}
|
|
|
|
static void
|
|
flowtable_free_stale(struct flowtable *ft, struct rtentry *rt, int maxidle)
|
|
{
|
|
struct flist *flist, freelist;
|
|
struct flentry *fle, *fle1, *fleprev;
|
|
bitstr_t *mask, *tmpmask;
|
|
int curbit, tmpsize;
|
|
|
|
SLIST_INIT(&freelist);
|
|
mask = flowtable_mask(ft);
|
|
tmpmask = ft->ft_tmpmask;
|
|
tmpsize = ft->ft_size;
|
|
memcpy(tmpmask, mask, ft->ft_size/8);
|
|
curbit = 0;
|
|
fleprev = NULL; /* pacify gcc */
|
|
/*
|
|
* XXX Note to self, bit_ffs operates at the byte level
|
|
* and thus adds gratuitous overhead
|
|
*/
|
|
bit_ffs(tmpmask, ft->ft_size, &curbit);
|
|
while (curbit != -1) {
|
|
if (curbit >= ft->ft_size || curbit < -1) {
|
|
log(LOG_ALERT,
|
|
"warning: bad curbit value %d \n",
|
|
curbit);
|
|
break;
|
|
}
|
|
|
|
FLOWSTAT_INC(ft, ft_free_checks);
|
|
|
|
critical_enter();
|
|
flist = flowtable_list(ft, curbit);
|
|
#ifdef DIAGNOSTIC
|
|
if (SLIST_EMPTY(flist) && curbit > 0) {
|
|
log(LOG_ALERT,
|
|
"warning bit=%d set, but no fle found\n",
|
|
curbit);
|
|
}
|
|
#endif
|
|
SLIST_FOREACH_SAFE(fle, flist, f_next, fle1) {
|
|
if (rt != NULL && fle->f_rt != rt) {
|
|
fleprev = fle;
|
|
continue;
|
|
}
|
|
if (!flow_stale(ft, fle, maxidle)) {
|
|
fleprev = fle;
|
|
continue;
|
|
}
|
|
|
|
if (fle == SLIST_FIRST(flist))
|
|
SLIST_REMOVE_HEAD(flist, f_next);
|
|
else
|
|
SLIST_REMOVE_AFTER(fleprev, f_next);
|
|
SLIST_INSERT_HEAD(&freelist, fle, f_next);
|
|
}
|
|
if (SLIST_EMPTY(flist))
|
|
bit_clear(mask, curbit);
|
|
critical_exit();
|
|
|
|
bit_clear(tmpmask, curbit);
|
|
bit_ffs(tmpmask, tmpsize, &curbit);
|
|
}
|
|
|
|
SLIST_FOREACH_SAFE(fle, &freelist, f_next, fle1) {
|
|
FLOWSTAT_INC(ft, ft_frees);
|
|
if (fle->f_rt != NULL)
|
|
RTFREE(fle->f_rt);
|
|
if (fle->f_lle != NULL)
|
|
LLE_FREE(fle->f_lle);
|
|
uma_zfree(flow_zone, fle);
|
|
}
|
|
}
|
|
|
|
static void
|
|
flowtable_clean_vnet(struct flowtable *ft, struct rtentry *rt, int maxidle)
|
|
{
|
|
int i;
|
|
|
|
CPU_FOREACH(i) {
|
|
if (smp_started == 1) {
|
|
thread_lock(curthread);
|
|
sched_bind(curthread, i);
|
|
thread_unlock(curthread);
|
|
}
|
|
|
|
flowtable_free_stale(ft, rt, maxidle);
|
|
|
|
if (smp_started == 1) {
|
|
thread_lock(curthread);
|
|
sched_unbind(curthread);
|
|
thread_unlock(curthread);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
flowtable_route_flush(sa_family_t sa, struct rtentry *rt)
|
|
{
|
|
struct flowtable *ft;
|
|
|
|
switch (sa) {
|
|
#ifdef INET
|
|
case AF_INET:
|
|
ft = &V_ip4_ft;
|
|
break;
|
|
#endif
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
ft = &V_ip6_ft;
|
|
break;
|
|
#endif
|
|
default:
|
|
panic("%s: sa %d", __func__, sa);
|
|
}
|
|
|
|
flowtable_clean_vnet(ft, rt, 0);
|
|
}
|
|
|
|
static void
|
|
flowtable_cleaner(void)
|
|
{
|
|
VNET_ITERATOR_DECL(vnet_iter);
|
|
struct thread *td;
|
|
|
|
if (bootverbose)
|
|
log(LOG_INFO, "flowtable cleaner started\n");
|
|
td = curthread;
|
|
while (1) {
|
|
uint32_t flowclean_freq, maxidle;
|
|
|
|
/*
|
|
* The maximum idle time, as well as frequency are arbitrary.
|
|
*/
|
|
if (flow_full())
|
|
maxidle = 5;
|
|
else
|
|
maxidle = 30;
|
|
|
|
VNET_LIST_RLOCK();
|
|
VNET_FOREACH(vnet_iter) {
|
|
CURVNET_SET(vnet_iter);
|
|
#ifdef INET
|
|
flowtable_clean_vnet(&V_ip4_ft, NULL, maxidle);
|
|
#endif
|
|
#ifdef INET6
|
|
flowtable_clean_vnet(&V_ip6_ft, NULL, maxidle);
|
|
#endif
|
|
CURVNET_RESTORE();
|
|
}
|
|
VNET_LIST_RUNLOCK();
|
|
|
|
if (flow_full())
|
|
flowclean_freq = 4*hz;
|
|
else
|
|
flowclean_freq = 20*hz;
|
|
mtx_lock(&flowclean_lock);
|
|
thread_lock(td);
|
|
sched_prio(td, PPAUSE);
|
|
thread_unlock(td);
|
|
flowclean_cycles++;
|
|
cv_broadcast(&flowclean_f_cv);
|
|
cv_timedwait(&flowclean_c_cv, &flowclean_lock, flowclean_freq);
|
|
mtx_unlock(&flowclean_lock);
|
|
}
|
|
}
|
|
|
|
static void
|
|
flowtable_flush(void *unused __unused)
|
|
{
|
|
uint64_t start;
|
|
|
|
mtx_lock(&flowclean_lock);
|
|
start = flowclean_cycles;
|
|
while (start == flowclean_cycles) {
|
|
cv_broadcast(&flowclean_c_cv);
|
|
cv_wait(&flowclean_f_cv, &flowclean_lock);
|
|
}
|
|
mtx_unlock(&flowclean_lock);
|
|
}
|
|
|
|
static struct kproc_desc flow_kp = {
|
|
"flowcleaner",
|
|
flowtable_cleaner,
|
|
&flowcleanerproc
|
|
};
|
|
SYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp);
|
|
|
|
static int
|
|
flowtable_get_size(char *name)
|
|
{
|
|
int size;
|
|
|
|
if (TUNABLE_INT_FETCH(name, &size)) {
|
|
if (size < 256)
|
|
size = 256;
|
|
if (!powerof2(size)) {
|
|
printf("%s must be power of 2\n", name);
|
|
size = 2048;
|
|
}
|
|
} else {
|
|
/*
|
|
* round up to the next power of 2
|
|
*/
|
|
size = 1 << fls((1024 + maxusers * 64) - 1);
|
|
}
|
|
|
|
return (size);
|
|
}
|
|
|
|
static void
|
|
flowtable_init(const void *unused __unused)
|
|
{
|
|
|
|
flow_hashjitter = arc4random();
|
|
|
|
flow_zone = uma_zcreate("flows", sizeof(struct flentry),
|
|
NULL, NULL, NULL, NULL, (64-1), UMA_ZONE_MAXBUCKET);
|
|
uma_zone_set_max(flow_zone, 1024 + maxusers * 64 * mp_ncpus);
|
|
|
|
cv_init(&flowclean_c_cv, "c_flowcleanwait");
|
|
cv_init(&flowclean_f_cv, "f_flowcleanwait");
|
|
mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
|
|
EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
|
|
EVENTHANDLER_PRI_ANY);
|
|
}
|
|
SYSINIT(flowtable_init, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST,
|
|
flowtable_init, NULL);
|
|
|
|
#ifdef INET
|
|
static SYSCTL_NODE(_net_flowtable, OID_AUTO, ip4, CTLFLAG_RD, NULL,
|
|
"Flowtable for IPv4");
|
|
|
|
static VNET_PCPUSTAT_DEFINE(struct flowtable_stat, ip4_ftstat);
|
|
VNET_PCPUSTAT_SYSINIT(ip4_ftstat);
|
|
VNET_PCPUSTAT_SYSUNINIT(ip4_ftstat);
|
|
SYSCTL_VNET_PCPUSTAT(_net_flowtable_ip4, OID_AUTO, stat, struct flowtable_stat,
|
|
ip4_ftstat, "Flowtable statistics for IPv4 "
|
|
"(struct flowtable_stat, net/flowtable.h)");
|
|
|
|
static void
|
|
flowtable_init_vnet_v4(const void *unused __unused)
|
|
{
|
|
|
|
V_ip4_ft.ft_size = flowtable_get_size("net.flowtable.ip4.size");
|
|
V_ip4_ft.ft_stat = VNET(ip4_ftstat);
|
|
flowtable_alloc(&V_ip4_ft);
|
|
}
|
|
VNET_SYSINIT(ft_vnet_v4, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
|
|
flowtable_init_vnet_v4, NULL);
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
static SYSCTL_NODE(_net_flowtable, OID_AUTO, ip6, CTLFLAG_RD, NULL,
|
|
"Flowtable for IPv6");
|
|
|
|
static VNET_PCPUSTAT_DEFINE(struct flowtable_stat, ip6_ftstat);
|
|
VNET_PCPUSTAT_SYSINIT(ip6_ftstat);
|
|
VNET_PCPUSTAT_SYSUNINIT(ip6_ftstat);
|
|
SYSCTL_VNET_PCPUSTAT(_net_flowtable_ip6, OID_AUTO, stat, struct flowtable_stat,
|
|
ip6_ftstat, "Flowtable statistics for IPv6 "
|
|
"(struct flowtable_stat, net/flowtable.h)");
|
|
|
|
static void
|
|
flowtable_init_vnet_v6(const void *unused __unused)
|
|
{
|
|
|
|
V_ip6_ft.ft_size = flowtable_get_size("net.flowtable.ip6.size");
|
|
V_ip6_ft.ft_stat = VNET(ip6_ftstat);
|
|
flowtable_alloc(&V_ip6_ft);
|
|
}
|
|
VNET_SYSINIT(flowtable_init_vnet_v6, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
|
|
flowtable_init_vnet_v6, NULL);
|
|
#endif /* INET6 */
|
|
|
|
#ifdef DDB
|
|
static bitstr_t *
|
|
flowtable_mask_pcpu(struct flowtable *ft, int cpuid)
|
|
{
|
|
|
|
return (zpcpu_get_cpu(*ft->ft_masks, cpuid));
|
|
}
|
|
|
|
static struct flist *
|
|
flowtable_list_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
|
|
{
|
|
|
|
return (zpcpu_get_cpu(&ft->ft_table[hash % ft->ft_size], cpuid));
|
|
}
|
|
|
|
static void
|
|
flow_show(struct flowtable *ft, struct flentry *fle)
|
|
{
|
|
int idle_time;
|
|
int rt_valid, ifp_valid;
|
|
volatile struct rtentry *rt;
|
|
struct ifnet *ifp = NULL;
|
|
uint32_t *hashkey = fle->f_key;
|
|
|
|
idle_time = (int)(time_uptime - fle->f_uptime);
|
|
rt = fle->f_rt;
|
|
rt_valid = rt != NULL;
|
|
if (rt_valid)
|
|
ifp = rt->rt_ifp;
|
|
ifp_valid = ifp != NULL;
|
|
|
|
#ifdef INET
|
|
if (ft == &V_ip4_ft) {
|
|
char daddr[4*sizeof "123"];
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
char saddr[4*sizeof "123"];
|
|
uint16_t sport, dport;
|
|
#endif
|
|
|
|
inet_ntoa_r(*(struct in_addr *) &hashkey[0], daddr);
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
inet_ntoa_r(*(struct in_addr *) &hashkey[1], saddr);
|
|
dport = ntohs((uint16_t)(hashkey[2] >> 16));
|
|
sport = ntohs((uint16_t)(hashkey[2] & 0xffff));
|
|
db_printf("%s:%d->%s:%d", saddr, sport, daddr, dport);
|
|
#else
|
|
db_printf("%s ", daddr);
|
|
#endif
|
|
}
|
|
#endif /* INET */
|
|
#ifdef INET6
|
|
if (ft == &V_ip6_ft) {
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
db_printf("\n\tkey=%08x:%08x:%08x%08x:%08x:%08x%08x:%08x:%08x",
|
|
hashkey[0], hashkey[1], hashkey[2],
|
|
hashkey[3], hashkey[4], hashkey[5],
|
|
hashkey[6], hashkey[7], hashkey[8]);
|
|
#else
|
|
db_printf("\n\tkey=%08x:%08x:%08x ",
|
|
hashkey[0], hashkey[1], hashkey[2]);
|
|
#endif
|
|
}
|
|
#endif /* INET6 */
|
|
|
|
db_printf("hash=%08x idle_time=%03d"
|
|
"\n\tfibnum=%02d rt=%p",
|
|
fle->f_hash, idle_time, fle->f_fibnum, fle->f_rt);
|
|
|
|
#ifdef FLOWTABLE_HASH_ALL
|
|
if (fle->f_flags & FL_STALE)
|
|
db_printf(" FL_STALE ");
|
|
#endif
|
|
if (rt_valid) {
|
|
if (rt->rt_flags & RTF_UP)
|
|
db_printf(" RTF_UP ");
|
|
}
|
|
if (ifp_valid) {
|
|
if (ifp->if_flags & IFF_LOOPBACK)
|
|
db_printf(" IFF_LOOPBACK ");
|
|
if (ifp->if_flags & IFF_UP)
|
|
db_printf(" IFF_UP ");
|
|
if (ifp->if_flags & IFF_POINTOPOINT)
|
|
db_printf(" IFF_POINTOPOINT ");
|
|
}
|
|
db_printf("\n");
|
|
}
|
|
|
|
static void
|
|
flowtable_show(struct flowtable *ft, int cpuid)
|
|
{
|
|
int curbit = 0;
|
|
bitstr_t *mask, *tmpmask;
|
|
|
|
if (cpuid != -1)
|
|
db_printf("cpu: %d\n", cpuid);
|
|
mask = flowtable_mask_pcpu(ft, cpuid);
|
|
tmpmask = ft->ft_tmpmask;
|
|
memcpy(tmpmask, mask, ft->ft_size/8);
|
|
/*
|
|
* XXX Note to self, bit_ffs operates at the byte level
|
|
* and thus adds gratuitous overhead
|
|
*/
|
|
bit_ffs(tmpmask, ft->ft_size, &curbit);
|
|
while (curbit != -1) {
|
|
struct flist *flist;
|
|
struct flentry *fle;
|
|
|
|
if (curbit >= ft->ft_size || curbit < -1) {
|
|
db_printf("warning: bad curbit value %d \n",
|
|
curbit);
|
|
break;
|
|
}
|
|
|
|
flist = flowtable_list_pcpu(ft, curbit, cpuid);
|
|
|
|
SLIST_FOREACH(fle, flist, f_next)
|
|
flow_show(ft, fle);
|
|
bit_clear(tmpmask, curbit);
|
|
bit_ffs(tmpmask, ft->ft_size, &curbit);
|
|
}
|
|
}
|
|
|
|
static void
|
|
flowtable_show_vnet(struct flowtable *ft)
|
|
{
|
|
|
|
int i;
|
|
|
|
CPU_FOREACH(i)
|
|
flowtable_show(ft, i);
|
|
}
|
|
|
|
DB_SHOW_COMMAND(flowtables, db_show_flowtables)
|
|
{
|
|
VNET_ITERATOR_DECL(vnet_iter);
|
|
|
|
VNET_FOREACH(vnet_iter) {
|
|
CURVNET_SET(vnet_iter);
|
|
#ifdef VIMAGE
|
|
db_printf("vnet %p\n", vnet_iter);
|
|
#endif
|
|
#ifdef INET
|
|
printf("IPv4:\n");
|
|
flowtable_show_vnet(&V_ip4_ft);
|
|
#endif
|
|
#ifdef INET6
|
|
printf("IPv6:\n");
|
|
flowtable_show_vnet(&V_ip6_ft);
|
|
#endif
|
|
CURVNET_RESTORE();
|
|
}
|
|
}
|
|
#endif
|