hselasky efa6326974 Implement kernel support for hardware rate limited sockets.
- Add RATELIMIT kernel configuration keyword which must be set to
enable the new functionality.

- Add support for hardware driven, Receive Side Scaling, RSS aware, rate
limited sendqueues and expose the functionality through the already
established SO_MAX_PACING_RATE setsockopt(). The API support rates in
the range from 1 to 4Gbytes/s which are suitable for regular TCP and
UDP streams. The setsockopt(2) manual page has been updated.

- Add rate limit function callback API to "struct ifnet" which supports
the following operations: if_snd_tag_alloc(), if_snd_tag_modify(),
if_snd_tag_query() and if_snd_tag_free().

- Add support to ifconfig to view, set and clear the IFCAP_TXRTLMT
flag, which tells if a network driver supports rate limiting or not.

- This patch also adds support for rate limiting through VLAN and LAGG
intermediate network devices.

- How rate limiting works:

1) The userspace application calls setsockopt() after accepting or
making a new connection to set the rate which is then stored in the
socket structure in the kernel. Later on when packets are transmitted
a check is made in the transmit path for rate changes. A rate change
implies a non-blocking ifp->if_snd_tag_alloc() call will be made to the
destination network interface, which then sets up a custom sendqueue
with the given rate limitation parameter. A "struct m_snd_tag" pointer is
returned which serves as a "snd_tag" hint in the m_pkthdr for the
subsequently transmitted mbufs.

2) When the network driver sees the "m->m_pkthdr.snd_tag" different
from NULL, it will move the packets into a designated rate limited sendqueue
given by the snd_tag pointer. It is up to the individual drivers how the rate
limited traffic will be rate limited.

3) Route changes are detected by the NIC drivers in the ifp->if_transmit()
routine when the ifnet pointer in the incoming snd_tag mismatches the
one of the network interface. The network adapter frees the mbuf and
returns EAGAIN which causes the ip_output() to release and clear the send
tag. Upon next ip_output() a new "snd_tag" will be tried allocated.

4) When the PCB is detached the custom sendqueue will be released by a
non-blocking ifp->if_snd_tag_free() call to the currently bound network
interface.

Reviewed by:		wblock (manpages), adrian, gallatin, scottl (network)
Differential Revision:	https://reviews.freebsd.org/D3687
Sponsored by:		Mellanox Technologies
MFC after:		3 months
2017-01-18 13:31:17 +00:00

144 lines
3.4 KiB
C

/*-
* Copyright (c) 2009 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* When an interface has been detached but not yet freed, we set the various
* ifnet function pointers to "ifdead" versions. This prevents unexpected
* calls from the network stack into the device driver after if_detach() has
* returned.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <net/if.h>
#include <net/if_var.h>
static int
ifdead_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
struct route *ro)
{
m_freem(m);
return (ENXIO);
}
static void
ifdead_input(struct ifnet *ifp, struct mbuf *m)
{
m_freem(m);
}
static void
ifdead_start(struct ifnet *ifp)
{
}
static int
ifdead_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
return (ENXIO);
}
static int
ifdead_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
struct sockaddr *sa)
{
*llsa = NULL;
return (ENXIO);
}
static void
ifdead_qflush(struct ifnet *ifp)
{
}
static int
ifdead_transmit(struct ifnet *ifp, struct mbuf *m)
{
m_freem(m);
return (ENXIO);
}
static uint64_t
ifdead_get_counter(struct ifnet *ifp, ift_counter cnt)
{
return (0);
}
static int
ifdead_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
struct m_snd_tag **ppmt)
{
return (EOPNOTSUPP);
}
static int
ifdead_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params)
{
return (EOPNOTSUPP);
}
static int
ifdead_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params)
{
return (EOPNOTSUPP);
}
static void
ifdead_snd_tag_free(struct m_snd_tag *pmt)
{
}
void
if_dead(struct ifnet *ifp)
{
ifp->if_output = ifdead_output;
ifp->if_input = ifdead_input;
ifp->if_start = ifdead_start;
ifp->if_ioctl = ifdead_ioctl;
ifp->if_resolvemulti = ifdead_resolvemulti;
ifp->if_qflush = ifdead_qflush;
ifp->if_transmit = ifdead_transmit;
ifp->if_get_counter = ifdead_get_counter;
ifp->if_snd_tag_alloc = ifdead_snd_tag_alloc;
ifp->if_snd_tag_modify = ifdead_snd_tag_modify;
ifp->if_snd_tag_query = ifdead_snd_tag_query;
ifp->if_snd_tag_free = ifdead_snd_tag_free;
}