Robert Watson d4b5cae49b Reimplement the netisr framework in order to support parallel netisr
threads:

- Support up to one netisr thread per CPU, each processings its own
  workstream, or set of per-protocol queues.  Threads may be bound
  to specific CPUs, or allowed to migrate, based on a global policy.

  In the future it would be desirable to support topology-centric
  policies, such as "one netisr per package".

- Allow each protocol to advertise an ordering policy, which can
  currently be one of:

  NETISR_POLICY_SOURCE: packets must maintain ordering with respect to
    an implicit or explicit source (such as an interface or socket).

  NETISR_POLICY_FLOW: make use of mbuf flow identifiers to place work,
    as well as allowing protocols to provide a flow generation function
    for mbufs without flow identifers (m2flow).  Falls back on
    NETISR_POLICY_SOURCE if now flow ID is available.

  NETISR_POLICY_CPU: allow protocols to inspect and assign a CPU for
    each packet handled by netisr (m2cpuid).

- Provide utility functions for querying the number of workstreams
  being used, as well as a mapping function from workstream to CPU ID,
  which protocols may use in work placement decisions.

- Add explicit interfaces to get and set per-protocol queue limits, and
  get and clear drop counters, which query data or apply changes across
  all workstreams.

- Add a more extensible netisr registration interface, in which
  protocols declare 'struct netisr_handler' structures for each
  registered NETISR_ type.  These include name, handler function,
  optional mbuf to flow ID function, optional mbuf to CPU ID function,
  queue limit, and ordering policy.  Padding is present to allow these
  to be expanded in the future.  If no queue limit is declared, then
  a default is used.

- Queue limits are now per-workstream, and raised from the previous
  IFQ_MAXLEN default of 50 to 256.

- All protocols are updated to use the new registration interface, and
  with the exception of netnatm, default queue limits.  Most protocols
  register as NETISR_POLICY_SOURCE, except IPv4 and IPv6, which use
  NETISR_POLICY_FLOW, and will therefore take advantage of driver-
  generated flow IDs if present.

- Formalize a non-packet based interface between interface polling and
  the netisr, rather than having polling pretend to be two protocols.
  Provide two explicit hooks in the netisr worker for start and end
  events for runs: netisr_poll() and netisr_pollmore(), as well as a
  function, netisr_sched_poll(), to allow the polling code to schedule
  netisr execution.  DEVICE_POLLING still embeds single-netisr
  assumptions in its implementation, so for now if it is compiled into
  the kernel, a single and un-bound netisr thread is enforced
  regardless of tunable configuration.

In the default configuration, the new netisr implementation maintains
the same basic assumptions as the previous implementation: a single,
un-bound worker thread processes all deferred work, and direct dispatch
is enabled by default wherever possible.

Performance measurement shows a marginal performance improvement over
the old implementation due to the use of batched dequeue.

An rmlock is used to synchronize use and registration/unregistration
using the framework; currently, synchronized use is disabled
(replicating current netisr policy) due to a measurable 3%-6% hit in
ping-pong micro-benchmarking.  It will be enabled once further rmlock
optimization has taken place.  However, in practice, netisrs are
rarely registered or unregistered at runtime.

A new man page for netisr will follow, but since one doesn't currently
exist, it hasn't been updated.

This change is not appropriate for MFC, although the polling shutdown
handler should be merged to 7-STABLE.

Bump __FreeBSD_version.

Reviewed by:	bz
2009-06-01 10:41:38 +00:00

271 lines
9.4 KiB
C

/*-
* Copyright (c) 2006-2008 University of Zagreb
* Copyright (c) 2006-2008 FreeBSD Foundation
*
* This software was developed by the University of Zagreb and the
* FreeBSD Foundation under sponsorship by the Stichting NLnet and the
* FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _NETINET6_VINET6_H_
#define _NETINET6_VINET6_H_
#include <sys/callout.h>
#include <sys/queue.h>
#include <sys/types.h>
#include <net/if_var.h>
#include <netinet/icmp6.h>
#include <netinet/in.h>
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
#include <netinet6/raw_ip6.h>
#include <netinet6/scope6_var.h>
struct vnet_inet6 {
struct in6_ifaddr * _in6_ifaddr;
u_int _frag6_nfragpackets;
u_int _frag6_nfrags;
struct ip6q _ip6q;
struct in6_addrpolicy _defaultaddrpolicy;
TAILQ_HEAD(, addrsel_policyent) _addrsel_policytab;
u_int _in6_maxmtu;
int _ip6_auto_linklocal;
int _rtq_minreallyold6;
int _rtq_reallyold6;
int _rtq_toomany6;
struct ip6stat _ip6stat;
struct rip6stat _rip6stat;
struct icmp6stat _icmp6stat;
int _rtq_timeout6;
struct callout _rtq_timer6;
struct callout _rtq_mtutimer;
struct callout _nd6_slowtimo_ch;
struct callout _nd6_timer_ch;
struct callout _in6_tmpaddrtimer_ch;
int _nd6_inuse;
int _nd6_allocated;
int _nd6_onlink_ns_rfc4861;
struct nd_drhead _nd_defrouter;
struct nd_prhead _nd_prefix;
struct ifnet * _nd6_defifp;
int _nd6_defifindex;
struct scope6_id _sid_default;
TAILQ_HEAD(, dadq) _dadq;
int _dad_init;
int _icmp6errpps_count;
struct timeval _icmp6errppslim_last;
int _ip6_forwarding;
int _ip6_sendredirects;
int _ip6_defhlim;
int _ip6_defmcasthlim;
int _ip6_accept_rtadv;
int _ip6_maxfragpackets;
int _ip6_maxfrags;
int _ip6_log_interval;
int _ip6_hdrnestlimit;
int _ip6_dad_count;
int _ip6_auto_flowlabel;
int _ip6_use_deprecated;
int _ip6_rr_prune;
int _ip6_mcast_pmtu;
int _ip6_v6only;
int _ip6_keepfaith;
int _ip6stealth;
time_t _ip6_log_time;
int _pmtu_expire;
int _pmtu_probe;
u_long _rip6_sendspace;
u_long _rip6_recvspace;
int _icmp6_rediraccept;
int _icmp6_redirtimeout;
int _icmp6errppslim;
int _icmp6_nodeinfo;
int _udp6_sendspace;
int _udp6_recvspace;
int _ip6_prefer_tempaddr;
int _nd6_prune;
int _nd6_delay;
int _nd6_umaxtries;
int _nd6_mmaxtries;
int _nd6_useloopback;
int _nd6_gctimer;
int _nd6_maxndopt;
int _nd6_maxnudhint;
int _nd6_maxqueuelen;
int _nd6_debug;
int _nd6_recalc_reachtm_interval;
int _dad_ignore_ns;
int _dad_maxtry;
int _ip6_use_tempaddr;
int _ip6_desync_factor;
u_int32_t _ip6_temp_preferred_lifetime;
u_int32_t _ip6_temp_valid_lifetime;
struct socket * _ip6_mrouter;
int _ip6_mrouter_ver;
int _pim6;
u_int _mrt6debug;
int _ip6_temp_regen_advance;
int _ip6_use_defzone;
struct ip6_pktopts _ip6_opts;
struct timeval _mld_gsrdelay;
LIST_HEAD(, mld_ifinfo) _mli_head;
int _interface_timers_running6;
int _state_change_timers_running6;
int _current_state_timers_running6;
};
/* Size guard. See sys/vimage.h. */
VIMAGE_CTASSERT(SIZEOF_vnet_inet6, sizeof(struct vnet_inet6));
#ifndef VIMAGE
#ifndef VIMAGE_GLOBALS
extern struct vnet_inet6 vnet_inet6_0;
#endif
#endif
#define INIT_VNET_INET6(vnet) \
INIT_FROM_VNET(vnet, VNET_MOD_INET6, struct vnet_inet6, vnet_inet6)
#define VNET_INET6(sym) VSYM(vnet_inet6, sym)
/*
* Symbol translation macros
*/
#define V_addrsel_policytab VNET_INET6(addrsel_policytab)
#define V_current_state_timers_running6 \
VNET_INET6(current_state_timers_running6)
#define V_dad_ignore_ns VNET_INET6(dad_ignore_ns)
#define V_dad_init VNET_INET6(dad_init)
#define V_dad_maxtry VNET_INET6(dad_maxtry)
#define V_dadq VNET_INET6(dadq)
#define V_defaultaddrpolicy VNET_INET6(defaultaddrpolicy)
#define V_frag6_nfragpackets VNET_INET6(frag6_nfragpackets)
#define V_frag6_nfrags VNET_INET6(frag6_nfrags)
#define V_icmp6_nodeinfo VNET_INET6(icmp6_nodeinfo)
#define V_icmp6_rediraccept VNET_INET6(icmp6_rediraccept)
#define V_icmp6_redirtimeout VNET_INET6(icmp6_redirtimeout)
#define V_icmp6errpps_count VNET_INET6(icmp6errpps_count)
#define V_icmp6errppslim VNET_INET6(icmp6errppslim)
#define V_icmp6errppslim_last VNET_INET6(icmp6errppslim_last)
#define V_icmp6stat VNET_INET6(icmp6stat)
#define V_in6_ifaddr VNET_INET6(in6_ifaddr)
#define V_in6_maxmtu VNET_INET6(in6_maxmtu)
#define V_in6_tmpaddrtimer_ch VNET_INET6(in6_tmpaddrtimer_ch)
#define V_interface_timers_running6 \
VNET_INET6(interface_timers_running6)
#define V_ip6_accept_rtadv VNET_INET6(ip6_accept_rtadv)
#define V_ip6_auto_flowlabel VNET_INET6(ip6_auto_flowlabel)
#define V_ip6_auto_linklocal VNET_INET6(ip6_auto_linklocal)
#define V_ip6_dad_count VNET_INET6(ip6_dad_count)
#define V_ip6_defhlim VNET_INET6(ip6_defhlim)
#define V_ip6_defmcasthlim VNET_INET6(ip6_defmcasthlim)
#define V_ip6_desync_factor VNET_INET6(ip6_desync_factor)
#define V_ip6_forwarding VNET_INET6(ip6_forwarding)
#define V_ip6_hdrnestlimit VNET_INET6(ip6_hdrnestlimit)
#define V_ip6_keepfaith VNET_INET6(ip6_keepfaith)
#define V_ip6_log_interval VNET_INET6(ip6_log_interval)
#define V_ip6_log_time VNET_INET6(ip6_log_time)
#define V_ip6_maxfragpackets VNET_INET6(ip6_maxfragpackets)
#define V_ip6_maxfrags VNET_INET6(ip6_maxfrags)
#define V_ip6_mcast_pmtu VNET_INET6(ip6_mcast_pmtu)
#define V_ip6_mrouter VNET_INET6(ip6_mrouter)
#define V_ip6_mrouter_ver VNET_INET6(ip6_mrouter_ver)
#define V_ip6_opts VNET_INET6(ip6_opts)
#define V_ip6_prefer_tempaddr VNET_INET6(ip6_prefer_tempaddr)
#define V_ip6_rr_prune VNET_INET6(ip6_rr_prune)
#define V_ip6_sendredirects VNET_INET6(ip6_sendredirects)
#define V_ip6_temp_preferred_lifetime VNET_INET6(ip6_temp_preferred_lifetime)
#define V_ip6_temp_regen_advance VNET_INET6(ip6_temp_regen_advance)
#define V_ip6_temp_valid_lifetime VNET_INET6(ip6_temp_valid_lifetime)
#define V_ip6_use_defzone VNET_INET6(ip6_use_defzone)
#define V_ip6_use_deprecated VNET_INET6(ip6_use_deprecated)
#define V_ip6_use_tempaddr VNET_INET6(ip6_use_tempaddr)
#define V_ip6_v6only VNET_INET6(ip6_v6only)
#define V_ip6q VNET_INET6(ip6q)
#define V_ip6stat VNET_INET6(ip6stat)
#define V_ip6stealth VNET_INET6(ip6stealth)
#define V_llinfo_nd6 VNET_INET6(llinfo_nd6)
#define V_mrt6debug VNET_INET6(mrt6debug)
#define V_mld_gsrdelay VNET_INET6(mld_gsrdelay)
#define V_mli_head VNET_INET6(mli_head)
#define V_nd6_allocated VNET_INET6(nd6_allocated)
#define V_nd6_debug VNET_INET6(nd6_debug)
#define V_nd6_defifindex VNET_INET6(nd6_defifindex)
#define V_nd6_defifp VNET_INET6(nd6_defifp)
#define V_nd6_delay VNET_INET6(nd6_delay)
#define V_nd6_gctimer VNET_INET6(nd6_gctimer)
#define V_nd6_inuse VNET_INET6(nd6_inuse)
#define V_nd6_maxndopt VNET_INET6(nd6_maxndopt)
#define V_nd6_maxnudhint VNET_INET6(nd6_maxnudhint)
#define V_nd6_maxqueuelen VNET_INET6(nd6_maxqueuelen)
#define V_nd6_mmaxtries VNET_INET6(nd6_mmaxtries)
#define V_nd6_onlink_ns_rfc4861 VNET_INET6(nd6_onlink_ns_rfc4861)
#define V_nd6_prune VNET_INET6(nd6_prune)
#define V_nd6_recalc_reachtm_interval VNET_INET6(nd6_recalc_reachtm_interval)
#define V_nd6_slowtimo_ch VNET_INET6(nd6_slowtimo_ch)
#define V_nd6_timer_ch VNET_INET6(nd6_timer_ch)
#define V_nd6_umaxtries VNET_INET6(nd6_umaxtries)
#define V_nd6_useloopback VNET_INET6(nd6_useloopback)
#define V_nd_defrouter VNET_INET6(nd_defrouter)
#define V_nd_prefix VNET_INET6(nd_prefix)
#define V_pim6 VNET_INET6(pim6)
#define V_pmtu_expire VNET_INET6(pmtu_expire)
#define V_pmtu_probe VNET_INET6(pmtu_probe)
#define V_rip6_recvspace VNET_INET6(rip6_recvspace)
#define V_rip6_sendspace VNET_INET6(rip6_sendspace)
#define V_rip6stat VNET_INET6(rip6stat)
#define V_rtq_minreallyold6 VNET_INET6(rtq_minreallyold6)
#define V_rtq_mtutimer VNET_INET6(rtq_mtutimer)
#define V_rtq_reallyold6 VNET_INET6(rtq_reallyold6)
#define V_rtq_timeout6 VNET_INET6(rtq_timeout6)
#define V_rtq_timer6 VNET_INET6(rtq_timer6)
#define V_rtq_toomany6 VNET_INET6(rtq_toomany6)
#define V_sid_default VNET_INET6(sid_default)
#define V_state_change_timers_running6 \
VNET_INET6(state_change_timers_running6)
#define V_udp6_recvspace VNET_INET6(udp6_recvspace)
#define V_udp6_sendspace VNET_INET6(udp6_sendspace)
#endif /* !_NETINET6_VINET6_H_ */