2012-06-19 07:34:13 +00:00
|
|
|
/*-
|
2017-11-27 14:52:40 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
|
|
|
*
|
2012-06-19 07:34:13 +00:00
|
|
|
* Copyright (c) 2012 Chelsio Communications, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
* Written by: Navdeep Parhar <np@FreeBSD.org>
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include "opt_inet.h"
|
2013-01-15 18:38:51 +00:00
|
|
|
#include "opt_inet6.h"
|
2012-06-19 07:34:13 +00:00
|
|
|
|
|
|
|
#ifdef TCP_OFFLOAD
|
|
|
|
#include <sys/param.h>
|
2013-10-29 11:17:49 +00:00
|
|
|
#include <sys/systm.h>
|
2012-06-19 07:34:13 +00:00
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/ktr.h>
|
|
|
|
#include <sys/module.h>
|
|
|
|
#include <sys/protosw.h>
|
|
|
|
#include <sys/domain.h>
|
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/socketvar.h>
|
cxgbe(4): Add support for Connection Offload Policy (aka COP).
COP allows fine-grained control on whether to offload a TCP connection
using t4_tom, and what settings to apply to a connection selected for
offload. t4_tom must still be loaded and IFCAP_TOE must still be
enabled for full TCP offload to take place on an interface. The
difference is that IFCAP_TOE used to be the only knob and would enable
TOE for all new connections on the inteface, but now the driver will
also consult the COP, if any, before offloading to the hardware TOE.
A policy is a plain text file with any number of rules, one per line.
Each rule has a "match" part consisting of a socket-type (L = listen,
A = active open, P = passive open, D = don't care) and a pcap-filter(7)
expression, and a "settings" part that specifies whether to offload the
connection or not and the parameters to use if so. The general format
of a rule is: [socket-type] expr => settings
Example. See cxgbetool(8) for more information.
[L] ip && port http => offload
[L] port 443 => !offload
[L] port ssh => offload
[P] src net 192.168/16 && dst port ssh => offload !nagle !timestamp cong newreno
[P] dst port ssh => offload !nagle ecn cong tahoe
[P] dst port http => offload
[A] dst port 443 => offload tls
[A] dst net 192.168/16 => offload !timestamp cong highspeed
The driver processes the rules for each new listen, active open, or
passive open and stops at the first match. There is an implicit rule at
the end of every policy that prohibits offload when no rule in the
policy matches:
[D] all => !offload
This is a reworked and expanded version of a patch submitted by
Krishnamraju Eraparaju @ Chelsio.
Sponsored by: Chelsio Communications
2018-04-14 19:07:56 +00:00
|
|
|
#include <sys/sysctl.h>
|
2012-06-19 07:34:13 +00:00
|
|
|
#include <net/ethernet.h>
|
|
|
|
#include <net/if.h>
|
|
|
|
#include <net/if_types.h>
|
|
|
|
#include <net/if_vlan_var.h>
|
|
|
|
#include <net/route.h>
|
2020-04-22 07:53:43 +00:00
|
|
|
#include <net/route/nhop.h>
|
2012-06-19 07:34:13 +00:00
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/in_pcb.h>
|
|
|
|
#include <netinet/ip.h>
|
|
|
|
#define TCPSTATES
|
|
|
|
#include <netinet/tcp_fsm.h>
|
2016-01-27 05:15:53 +00:00
|
|
|
#include <netinet/tcp_var.h>
|
2012-06-19 07:34:13 +00:00
|
|
|
#include <netinet/toecore.h>
|
cxgbe(4): Add support for Connection Offload Policy (aka COP).
COP allows fine-grained control on whether to offload a TCP connection
using t4_tom, and what settings to apply to a connection selected for
offload. t4_tom must still be loaded and IFCAP_TOE must still be
enabled for full TCP offload to take place on an interface. The
difference is that IFCAP_TOE used to be the only knob and would enable
TOE for all new connections on the inteface, but now the driver will
also consult the COP, if any, before offloading to the hardware TOE.
A policy is a plain text file with any number of rules, one per line.
Each rule has a "match" part consisting of a socket-type (L = listen,
A = active open, P = passive open, D = don't care) and a pcap-filter(7)
expression, and a "settings" part that specifies whether to offload the
connection or not and the parameters to use if so. The general format
of a rule is: [socket-type] expr => settings
Example. See cxgbetool(8) for more information.
[L] ip && port http => offload
[L] port 443 => !offload
[L] port ssh => offload
[P] src net 192.168/16 && dst port ssh => offload !nagle !timestamp cong newreno
[P] dst port ssh => offload !nagle ecn cong tahoe
[P] dst port http => offload
[A] dst port 443 => offload tls
[A] dst net 192.168/16 => offload !timestamp cong highspeed
The driver processes the rules for each new listen, active open, or
passive open and stops at the first match. There is an implicit rule at
the end of every policy that prohibits offload when no rule in the
policy matches:
[D] all => !offload
This is a reworked and expanded version of a patch submitted by
Krishnamraju Eraparaju @ Chelsio.
Sponsored by: Chelsio Communications
2018-04-14 19:07:56 +00:00
|
|
|
#include <netinet/cc/cc.h>
|
2012-06-19 07:34:13 +00:00
|
|
|
|
|
|
|
#include "common/common.h"
|
|
|
|
#include "common/t4_msg.h"
|
|
|
|
#include "common/t4_regs.h"
|
2013-07-04 17:55:52 +00:00
|
|
|
#include "common/t4_regs_values.h"
|
2018-11-29 01:15:53 +00:00
|
|
|
#include "t4_clip.h"
|
2012-06-19 07:34:13 +00:00
|
|
|
#include "tom/t4_tom_l2t.h"
|
|
|
|
#include "tom/t4_tom.h"
|
|
|
|
|
|
|
|
/*
|
2017-01-07 00:08:55 +00:00
|
|
|
* Active open succeeded.
|
2012-06-19 07:34:13 +00:00
|
|
|
*/
|
|
|
|
static int
|
|
|
|
do_act_establish(struct sge_iq *iq, const struct rss_header *rss,
|
|
|
|
struct mbuf *m)
|
|
|
|
{
|
|
|
|
struct adapter *sc = iq->adapter;
|
|
|
|
const struct cpl_act_establish *cpl = (const void *)(rss + 1);
|
2014-10-07 21:26:22 +00:00
|
|
|
u_int tid = GET_TID(cpl);
|
|
|
|
u_int atid = G_TID_TID(ntohl(cpl->tos_atid));
|
2012-06-19 07:34:13 +00:00
|
|
|
struct toepcb *toep = lookup_atid(sc, atid);
|
|
|
|
struct inpcb *inp = toep->inp;
|
|
|
|
|
|
|
|
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
|
|
|
|
KASSERT(toep->tid == atid, ("%s: toep tid/atid mismatch", __func__));
|
|
|
|
|
|
|
|
CTR3(KTR_CXGBE, "%s: atid %u, tid %u", __func__, atid, tid);
|
|
|
|
free_atid(sc, atid);
|
|
|
|
|
2017-01-11 23:48:17 +00:00
|
|
|
CURVNET_SET(toep->vnet);
|
2012-06-19 07:34:13 +00:00
|
|
|
INP_WLOCK(inp);
|
|
|
|
toep->tid = tid;
|
2017-01-07 20:26:19 +00:00
|
|
|
insert_tid(sc, tid, toep, inp->inp_vflag & INP_IPV6 ? 2 : 1);
|
2012-06-19 07:34:13 +00:00
|
|
|
if (inp->inp_flags & INP_DROPPED) {
|
|
|
|
|
|
|
|
/* socket closed by the kernel before hw told us it connected */
|
|
|
|
|
|
|
|
send_flowc_wr(toep, NULL);
|
|
|
|
send_reset(sc, toep, be32toh(cpl->snd_isn));
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2018-12-19 01:37:00 +00:00
|
|
|
make_established(toep, be32toh(cpl->snd_isn) - 1,
|
|
|
|
be32toh(cpl->rcv_isn) - 1, cpl->tcp_opt);
|
2020-04-15 19:28:51 +00:00
|
|
|
inp->inp_flowtype = M_HASHTYPE_OPAQUE;
|
|
|
|
inp->inp_flowid = tid;
|
Support for TLS offload of TOE connections on T6 adapters.
The TOE engine in Chelsio T6 adapters supports offloading of TLS
encryption and TCP segmentation for offloaded connections. Sockets
using TLS are required to use a set of custom socket options to upload
RX and TX keys to the NIC and to enable RX processing. Currently
these socket options are implemented as TCP options in the vendor
specific range. A patched OpenSSL library will be made available in a
port / package for use with the TLS TOE support.
TOE sockets can either offload both transmit and reception of TLS
records or just transmit. TLS offload (both RX and TX) is enabled by
setting the dev.t6nex.<x>.tls sysctl to 1 and requires TOE to be
enabled on the relevant interface. Transmit offload can be used on
any "normal" or TLS TOE socket by using the custom socket option to
program a transmit key. This permits most TOE sockets to
transparently offload TLS when applications use a patched SSL library
(e.g. using LD_LIBRARY_PATH to request use of a patched OpenSSL
library). Receive offload can only be used with TOE sockets using the
TLS mode. The dev.t6nex.0.toe.tls_rx_ports sysctl can be set to a
list of TCP port numbers. Any connection with either a local or
remote port number in that list will be created as a TLS socket rather
than a plain TOE socket. Note that although this sysctl accepts an
arbitrary list of port numbers, the sysctl(8) tool is only able to set
sysctl nodes to a single value. A TLS socket will hang without
receiving data if used by an application that is not using a patched
SSL library. Thus, the tls_rx_ports node should be used with care.
For a server mostly concerned with offloading TLS transmit, this node
is not needed as plain TOE sockets will fall back to software crypto
when using an unpatched SSL library.
New per-interface statistics nodes are added giving counts of TLS
packets and payload bytes (payload bytes do not include TLS headers or
authentication tags/MACs) offloaded via the TOE engine, e.g.:
dev.cc.0.stats.rx_tls_octets: 149
dev.cc.0.stats.rx_tls_records: 13
dev.cc.0.stats.tx_tls_octets: 26501823
dev.cc.0.stats.tx_tls_records: 1620
TLS transmit work requests are constructed by a new variant of
t4_push_frames() called t4_push_tls_records() in tom/t4_tls.c.
TLS transmit work requests require a buffer containing IVs. If the
IVs are too large to fit into the work request, a separate buffer is
allocated when constructing a work request. This buffer is associated
with the transmit descriptor and freed when the descriptor is ACKed by
the adapter.
Received TLS frames use two new CPL messages. The first message is a
CPL_TLS_DATA containing the decryped payload of a single TLS record.
The handler places the mbuf containing the received payload on an
mbufq in the TOE pcb. The second message is a CPL_RX_TLS_CMP message
which includes a copy of the TLS header and indicates if there were
any errors. The handler for this message places the TLS header into
the socket buffer followed by the saved mbuf with the payload data.
Both of these handlers are contained in tom/t4_tls.c.
A few routines were exposed from t4_cpl_io.c for use by t4_tls.c
including send_rx_credits(), a new send_rx_modulate(), and
t4_close_conn().
TLS keys for both transmit and receive are stored in onboard memory
in the NIC in the "TLS keys" memory region.
In some cases a TLS socket can hang with pending data available in the
NIC that is not delivered to the host. As a workaround, TLS sockets
are more aggressive about sending CPL_RX_DATA_ACK messages anytime that
any data is read from a TLS socket. In addition, a fallback timer will
periodically send CPL_RX_DATA_ACK messages to the NIC for connections
that are still in the handshake phase. Once the connection has
finished the handshake and programmed RX keys via the socket option,
the timer is stopped.
A new function select_ulp_mode() is used to determine what sub-mode a
given TOE socket should use (plain TOE, DDP, or TLS). The existing
set_tcpddp_ulp_mode() function has been renamed to set_ulp_mode() and
handles initialization of TLS-specific state when necessary in
addition to DDP-specific state.
Since TLS sockets do not receive individual TCP segments but always
receive full TLS records, they can receive more data than is available
in the current window (e.g. if a 16k TLS record is received but the
socket buffer is itself 16k). To cope with this, just drop the window
to 0 when this happens, but track the overage and "eat" the overage as
it is read from the socket buffer not opening the window (or adding
rx_credits) for the overage bytes.
Reviewed by: np (earlier version)
Sponsored by: Chelsio Communications
Differential Revision: https://reviews.freebsd.org/D14529
2018-03-13 23:05:51 +00:00
|
|
|
|
2012-06-19 07:34:13 +00:00
|
|
|
done:
|
|
|
|
INP_WUNLOCK(inp);
|
2017-01-11 23:48:17 +00:00
|
|
|
CURVNET_RESTORE();
|
2012-06-19 07:34:13 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2014-10-07 21:26:22 +00:00
|
|
|
void
|
|
|
|
act_open_failure_cleanup(struct adapter *sc, u_int atid, u_int status)
|
|
|
|
{
|
|
|
|
struct toepcb *toep = lookup_atid(sc, atid);
|
|
|
|
struct inpcb *inp = toep->inp;
|
|
|
|
struct toedev *tod = &toep->td->tod;
|
2018-07-04 02:47:16 +00:00
|
|
|
struct epoch_tracker et;
|
2014-10-07 21:26:22 +00:00
|
|
|
|
|
|
|
free_atid(sc, atid);
|
|
|
|
toep->tid = -1;
|
|
|
|
|
2017-01-11 23:48:17 +00:00
|
|
|
CURVNET_SET(toep->vnet);
|
2014-10-07 21:26:22 +00:00
|
|
|
if (status != EAGAIN)
|
2019-11-07 00:08:34 +00:00
|
|
|
NET_EPOCH_ENTER(et);
|
2014-10-07 21:26:22 +00:00
|
|
|
INP_WLOCK(inp);
|
|
|
|
toe_connect_failed(tod, inp, status);
|
|
|
|
final_cpl_received(toep); /* unlocks inp */
|
|
|
|
if (status != EAGAIN)
|
2019-11-07 00:08:34 +00:00
|
|
|
NET_EPOCH_EXIT(et);
|
2017-01-11 23:48:17 +00:00
|
|
|
CURVNET_RESTORE();
|
2014-10-07 21:26:22 +00:00
|
|
|
}
|
|
|
|
|
2017-01-07 00:08:55 +00:00
|
|
|
/*
|
|
|
|
* Active open failed.
|
|
|
|
*/
|
2012-06-19 07:34:13 +00:00
|
|
|
static int
|
|
|
|
do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
|
|
|
|
struct mbuf *m)
|
|
|
|
{
|
|
|
|
struct adapter *sc = iq->adapter;
|
|
|
|
const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1);
|
2014-10-07 21:26:22 +00:00
|
|
|
u_int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status)));
|
|
|
|
u_int status = G_AOPEN_STATUS(be32toh(cpl->atid_status));
|
2012-06-19 07:34:13 +00:00
|
|
|
struct toepcb *toep = lookup_atid(sc, atid);
|
2012-08-21 18:09:33 +00:00
|
|
|
int rc;
|
2012-06-19 07:34:13 +00:00
|
|
|
|
|
|
|
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
|
|
|
|
KASSERT(toep->tid == atid, ("%s: toep tid/atid mismatch", __func__));
|
|
|
|
|
|
|
|
CTR3(KTR_CXGBE, "%s: atid %u, status %u ", __func__, atid, status);
|
|
|
|
|
|
|
|
/* Ignore negative advice */
|
2013-01-26 03:01:51 +00:00
|
|
|
if (negative_advice(status))
|
2012-06-19 07:34:13 +00:00
|
|
|
return (0);
|
|
|
|
|
|
|
|
if (status && act_open_has_tid(status))
|
|
|
|
release_tid(sc, GET_TID(cpl), toep->ctrlq);
|
|
|
|
|
2012-08-21 18:09:33 +00:00
|
|
|
rc = act_open_rpl_status_to_errno(status);
|
2014-10-07 21:26:22 +00:00
|
|
|
act_open_failure_cleanup(sc, atid, rc);
|
2012-06-19 07:34:13 +00:00
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2016-07-05 01:29:24 +00:00
|
|
|
t4_init_connect_cpl_handlers(void)
|
2012-06-19 07:34:13 +00:00
|
|
|
{
|
|
|
|
|
2016-07-05 01:29:24 +00:00
|
|
|
t4_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
|
2018-04-30 21:47:30 +00:00
|
|
|
t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl,
|
|
|
|
CPL_COOKIE_TOM);
|
2012-06-19 07:34:13 +00:00
|
|
|
}
|
|
|
|
|
2017-01-27 23:08:30 +00:00
|
|
|
void
|
|
|
|
t4_uninit_connect_cpl_handlers(void)
|
|
|
|
{
|
|
|
|
|
|
|
|
t4_register_cpl_handler(CPL_ACT_ESTABLISH, NULL);
|
2018-04-30 21:47:30 +00:00
|
|
|
t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, NULL, CPL_COOKIE_TOM);
|
2017-01-27 23:08:30 +00:00
|
|
|
}
|
|
|
|
|
2013-01-15 18:38:51 +00:00
|
|
|
#define DONT_OFFLOAD_ACTIVE_OPEN(x) do { \
|
|
|
|
reason = __LINE__; \
|
|
|
|
rc = (x); \
|
|
|
|
goto failed; \
|
|
|
|
} while (0)
|
|
|
|
|
2013-03-30 02:26:20 +00:00
|
|
|
static inline int
|
|
|
|
act_open_cpl_size(struct adapter *sc, int isipv6)
|
|
|
|
{
|
2016-09-17 23:08:49 +00:00
|
|
|
int idx;
|
|
|
|
static const int sz_table[3][2] = {
|
|
|
|
{
|
|
|
|
sizeof (struct cpl_act_open_req),
|
|
|
|
sizeof (struct cpl_act_open_req6)
|
|
|
|
},
|
|
|
|
{
|
|
|
|
sizeof (struct cpl_t5_act_open_req),
|
|
|
|
sizeof (struct cpl_t5_act_open_req6)
|
|
|
|
},
|
|
|
|
{
|
|
|
|
sizeof (struct cpl_t6_act_open_req),
|
|
|
|
sizeof (struct cpl_t6_act_open_req6)
|
|
|
|
},
|
2013-03-30 02:26:20 +00:00
|
|
|
};
|
|
|
|
|
2016-09-17 23:08:49 +00:00
|
|
|
MPASS(chip_id(sc) >= CHELSIO_T4);
|
|
|
|
idx = min(chip_id(sc) - CHELSIO_T4, 2);
|
|
|
|
|
|
|
|
return (sz_table[idx][!!isipv6]);
|
2013-03-30 02:26:20 +00:00
|
|
|
}
|
|
|
|
|
2012-06-19 07:34:13 +00:00
|
|
|
/*
|
|
|
|
* active open (soconnect).
|
|
|
|
*
|
|
|
|
* State of affairs on entry:
|
|
|
|
* soisconnecting (so_state |= SS_ISCONNECTING)
|
|
|
|
* tcbinfo not locked (This has changed - used to be WLOCKed)
|
|
|
|
* inp WLOCKed
|
|
|
|
* tp->t_state = TCPS_SYN_SENT
|
|
|
|
* rtalloc1, RT_UNLOCK on rt.
|
|
|
|
*/
|
|
|
|
int
|
2020-04-22 07:53:43 +00:00
|
|
|
t4_connect(struct toedev *tod, struct socket *so, struct nhop_object *nh,
|
2012-06-19 07:34:13 +00:00
|
|
|
struct sockaddr *nam)
|
|
|
|
{
|
|
|
|
struct adapter *sc = tod->tod_softc;
|
|
|
|
struct toepcb *toep = NULL;
|
|
|
|
struct wrqe *wr = NULL;
|
2020-04-22 07:53:43 +00:00
|
|
|
struct ifnet *rt_ifp = nh->nh_ifp;
|
2015-12-03 00:02:01 +00:00
|
|
|
struct vi_info *vi;
|
2019-08-27 04:19:40 +00:00
|
|
|
int qid_atid, rc, isipv6;
|
2012-06-19 07:34:13 +00:00
|
|
|
struct inpcb *inp = sotoinpcb(so);
|
|
|
|
struct tcpcb *tp = intotcpcb(inp);
|
2013-01-15 18:38:51 +00:00
|
|
|
int reason;
|
cxgbe(4): Add support for Connection Offload Policy (aka COP).
COP allows fine-grained control on whether to offload a TCP connection
using t4_tom, and what settings to apply to a connection selected for
offload. t4_tom must still be loaded and IFCAP_TOE must still be
enabled for full TCP offload to take place on an interface. The
difference is that IFCAP_TOE used to be the only knob and would enable
TOE for all new connections on the inteface, but now the driver will
also consult the COP, if any, before offloading to the hardware TOE.
A policy is a plain text file with any number of rules, one per line.
Each rule has a "match" part consisting of a socket-type (L = listen,
A = active open, P = passive open, D = don't care) and a pcap-filter(7)
expression, and a "settings" part that specifies whether to offload the
connection or not and the parameters to use if so. The general format
of a rule is: [socket-type] expr => settings
Example. See cxgbetool(8) for more information.
[L] ip && port http => offload
[L] port 443 => !offload
[L] port ssh => offload
[P] src net 192.168/16 && dst port ssh => offload !nagle !timestamp cong newreno
[P] dst port ssh => offload !nagle ecn cong tahoe
[P] dst port http => offload
[A] dst port 443 => offload tls
[A] dst net 192.168/16 => offload !timestamp cong highspeed
The driver processes the rules for each new listen, active open, or
passive open and stops at the first match. There is an implicit rule at
the end of every policy that prohibits offload when no rule in the
policy matches:
[D] all => !offload
This is a reworked and expanded version of a patch submitted by
Krishnamraju Eraparaju @ Chelsio.
Sponsored by: Chelsio Communications
2018-04-14 19:07:56 +00:00
|
|
|
struct offload_settings settings;
|
2019-12-13 23:33:54 +00:00
|
|
|
struct epoch_tracker et;
|
2018-08-17 19:22:46 +00:00
|
|
|
uint16_t vid = 0xfff, pcp = 0;
|
2012-06-19 07:34:13 +00:00
|
|
|
|
|
|
|
INP_WLOCK_ASSERT(inp);
|
2013-01-15 18:38:51 +00:00
|
|
|
KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
|
|
|
|
("%s: dest addr %p has family %u", __func__, nam, nam->sa_family));
|
2012-06-19 07:34:13 +00:00
|
|
|
|
|
|
|
if (rt_ifp->if_type == IFT_ETHER)
|
2015-12-03 00:02:01 +00:00
|
|
|
vi = rt_ifp->if_softc;
|
2012-06-19 07:34:13 +00:00
|
|
|
else if (rt_ifp->if_type == IFT_L2VLAN) {
|
2018-08-15 21:24:05 +00:00
|
|
|
struct ifnet *ifp = VLAN_TRUNKDEV(rt_ifp);
|
2012-06-19 07:34:13 +00:00
|
|
|
|
2015-12-03 00:02:01 +00:00
|
|
|
vi = ifp->if_softc;
|
2018-04-19 18:10:44 +00:00
|
|
|
VLAN_TAG(rt_ifp, &vid);
|
2018-08-17 19:22:46 +00:00
|
|
|
VLAN_PCP(rt_ifp, &pcp);
|
2012-06-19 07:34:13 +00:00
|
|
|
} else if (rt_ifp->if_type == IFT_IEEE8023ADLAG)
|
2013-01-15 18:38:51 +00:00
|
|
|
DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */
|
2012-06-19 07:34:13 +00:00
|
|
|
else
|
2013-01-15 18:38:51 +00:00
|
|
|
DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP);
|
2021-03-24 01:01:01 +00:00
|
|
|
if (sc->flags & KERN_TLS_ON)
|
NIC KTLS for Chelsio T6 adapters.
This adds support for ifnet (NIC) KTLS using Chelsio T6 adapters.
Unlike the TOE-based KTLS in r353328, NIC TLS works with non-TOE
connections.
NIC KTLS on T6 is not able to use the normal TSO (LSO) path to segment
the encrypted TLS frames output by the crypto engine. Instead, the
TOE is placed into a special setup to permit "dummy" connections to be
associated with regular sockets using KTLS. This permits using the
TOE to segment the encrypted TLS records. However, this approach does
have some limitations:
1) Regular TOE sockets cannot be used when the TOE is in this special
mode. One can use either TOE and TOE-based KTLS or NIC KTLS, but
not both at the same time.
2) In NIC KTLS mode, the TOE is only able to accept a per-connection
timestamp offset that varies in the upper 4 bits. Put another way,
only connections whose timestamp offset has the 28 lower bits
cleared can use NIC KTLS and generate correct timestamps. The
driver will refuse to enable NIC KTLS on connections with a
timestamp offset with any of the lower 28 bits set. To use NIC
KTLS, users can either disable TCP timestamps by setting the
net.inet.tcp.rfc1323 sysctl to 0, or apply a local patch to the
tcp_new_ts_offset() function to clear the lower 28 bits of the
generated offset.
3) Because the TCP segmentation relies on fields mirrored in a TCB in
the TOE, not all fields in a TCP packet can be sent in the TCP
segments generated from a TLS record. Specifically, for packets
containing TCP options other than timestamps, the driver will
inject an "empty" TCP packet holding the requested options (e.g. a
SACK scoreboard) along with the segments from the TLS record.
These empty TCP packets are counted by the
dev.cc.N.txq.M.kern_tls_options sysctls.
Unlike TOE TLS which is able to buffer encrypted TLS records in
on-card memory to handle retransmits, NIC KTLS must re-encrypt TLS
records for retransmit requests as well as non-retransmit requests
that do not include the start of a TLS record but do include the
trailer. The T6 NIC KTLS code tries to optimize some of the cases for
requests to transmit partial TLS records. In particular it attempts
to minimize sending "waste" bytes that have to be given as input to
the crypto engine but are not needed on the wire to satisfy mbufs sent
from the TCP stack down to the driver.
TCP packets for TLS requests are broken down into the following
classes (with associated counters):
- Mbufs that send an entire TLS record in full do not have any waste
bytes (dev.cc.N.txq.M.kern_tls_full).
- Mbufs that send a short TLS record that ends before the end of the
trailer (dev.cc.N.txq.M.kern_tls_short). For sockets using AES-CBC,
the encryption must always start at the beginning, so if the mbuf
starts at an offset into the TLS record, the offset bytes will be
"waste" bytes. For sockets using AES-GCM, the encryption can start
at the 16 byte block before the starting offset capping the waste at
15 bytes.
- Mbufs that send a partial TLS record that has a non-zero starting
offset but ends at the end of the trailer
(dev.cc.N.txq.M.kern_tls_partial). In order to compute the
authentication hash stored in the trailer, the entire TLS record
must be sent as input to the crypto engine, so the bytes before the
offset are always "waste" bytes.
In addition, other per-txq sysctls are provided:
- dev.cc.N.txq.M.kern_tls_cbc: Count of sockets sent via this txq
using AES-CBC.
- dev.cc.N.txq.M.kern_tls_gcm: Count of sockets sent via this txq
using AES-GCM.
- dev.cc.N.txq.M.kern_tls_fin: Count of empty FIN-only packets sent to
compensate for the TOE engine not being able to set FIN on the last
segment of a TLS record if the TLS record mbuf had FIN set.
- dev.cc.N.txq.M.kern_tls_records: Count of TLS records sent via this
txq including full, short, and partial records.
- dev.cc.N.txq.M.kern_tls_octets: Count of non-waste bytes (TLS header
and payload) sent for TLS record requests.
- dev.cc.N.txq.M.kern_tls_waste: Count of waste bytes sent for TLS
record requests.
To enable NIC KTLS with T6, set the following tunables prior to
loading the cxgbe(4) driver:
hw.cxgbe.config_file=kern_tls
hw.cxgbe.kern_tls=1
Reviewed by: np
Sponsored by: Chelsio Communications
Differential Revision: https://reviews.freebsd.org/D21962
2019-11-21 19:30:31 +00:00
|
|
|
DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP);
|
2012-06-19 07:34:13 +00:00
|
|
|
|
cxgbe(4): Add support for Connection Offload Policy (aka COP).
COP allows fine-grained control on whether to offload a TCP connection
using t4_tom, and what settings to apply to a connection selected for
offload. t4_tom must still be loaded and IFCAP_TOE must still be
enabled for full TCP offload to take place on an interface. The
difference is that IFCAP_TOE used to be the only knob and would enable
TOE for all new connections on the inteface, but now the driver will
also consult the COP, if any, before offloading to the hardware TOE.
A policy is a plain text file with any number of rules, one per line.
Each rule has a "match" part consisting of a socket-type (L = listen,
A = active open, P = passive open, D = don't care) and a pcap-filter(7)
expression, and a "settings" part that specifies whether to offload the
connection or not and the parameters to use if so. The general format
of a rule is: [socket-type] expr => settings
Example. See cxgbetool(8) for more information.
[L] ip && port http => offload
[L] port 443 => !offload
[L] port ssh => offload
[P] src net 192.168/16 && dst port ssh => offload !nagle !timestamp cong newreno
[P] dst port ssh => offload !nagle ecn cong tahoe
[P] dst port http => offload
[A] dst port 443 => offload tls
[A] dst net 192.168/16 => offload !timestamp cong highspeed
The driver processes the rules for each new listen, active open, or
passive open and stops at the first match. There is an implicit rule at
the end of every policy that prohibits offload when no rule in the
policy matches:
[D] all => !offload
This is a reworked and expanded version of a patch submitted by
Krishnamraju Eraparaju @ Chelsio.
Sponsored by: Chelsio Communications
2018-04-14 19:07:56 +00:00
|
|
|
rw_rlock(&sc->policy_lock);
|
2018-08-17 19:22:46 +00:00
|
|
|
settings = *lookup_offload_policy(sc, OPEN_TYPE_ACTIVE, NULL,
|
|
|
|
EVL_MAKETAG(vid, pcp, 0), inp);
|
cxgbe(4): Add support for Connection Offload Policy (aka COP).
COP allows fine-grained control on whether to offload a TCP connection
using t4_tom, and what settings to apply to a connection selected for
offload. t4_tom must still be loaded and IFCAP_TOE must still be
enabled for full TCP offload to take place on an interface. The
difference is that IFCAP_TOE used to be the only knob and would enable
TOE for all new connections on the inteface, but now the driver will
also consult the COP, if any, before offloading to the hardware TOE.
A policy is a plain text file with any number of rules, one per line.
Each rule has a "match" part consisting of a socket-type (L = listen,
A = active open, P = passive open, D = don't care) and a pcap-filter(7)
expression, and a "settings" part that specifies whether to offload the
connection or not and the parameters to use if so. The general format
of a rule is: [socket-type] expr => settings
Example. See cxgbetool(8) for more information.
[L] ip && port http => offload
[L] port 443 => !offload
[L] port ssh => offload
[P] src net 192.168/16 && dst port ssh => offload !nagle !timestamp cong newreno
[P] dst port ssh => offload !nagle ecn cong tahoe
[P] dst port http => offload
[A] dst port 443 => offload tls
[A] dst net 192.168/16 => offload !timestamp cong highspeed
The driver processes the rules for each new listen, active open, or
passive open and stops at the first match. There is an implicit rule at
the end of every policy that prohibits offload when no rule in the
policy matches:
[D] all => !offload
This is a reworked and expanded version of a patch submitted by
Krishnamraju Eraparaju @ Chelsio.
Sponsored by: Chelsio Communications
2018-04-14 19:07:56 +00:00
|
|
|
rw_runlock(&sc->policy_lock);
|
|
|
|
if (!settings.offload)
|
|
|
|
DONT_OFFLOAD_ACTIVE_OPEN(EPERM);
|
|
|
|
|
2019-08-27 04:19:40 +00:00
|
|
|
toep = alloc_toepcb(vi, M_NOWAIT);
|
2012-06-19 07:34:13 +00:00
|
|
|
if (toep == NULL)
|
2013-01-15 18:38:51 +00:00
|
|
|
DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
|
2012-06-19 07:34:13 +00:00
|
|
|
|
2013-01-15 18:38:51 +00:00
|
|
|
toep->tid = alloc_atid(sc, toep);
|
|
|
|
if (toep->tid < 0)
|
|
|
|
DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
|
2012-06-19 07:34:13 +00:00
|
|
|
|
2015-12-03 00:02:01 +00:00
|
|
|
toep->l2te = t4_l2t_get(vi->pi, rt_ifp,
|
2020-04-22 07:53:43 +00:00
|
|
|
nh->nh_flags & NHF_GATEWAY ? &nh->gw_sa : nam);
|
2013-01-15 18:38:51 +00:00
|
|
|
if (toep->l2te == NULL)
|
|
|
|
DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
|
2012-06-19 07:34:13 +00:00
|
|
|
|
2019-08-27 04:19:40 +00:00
|
|
|
toep->vnet = so->so_vnet;
|
|
|
|
init_conn_params(vi, &settings, &inp->inp_inc, so, NULL,
|
|
|
|
toep->l2te->idx, &toep->params);
|
|
|
|
init_toepcb(vi, toep);
|
|
|
|
|
2013-01-15 18:38:51 +00:00
|
|
|
isipv6 = nam->sa_family == AF_INET6;
|
2013-03-30 02:26:20 +00:00
|
|
|
wr = alloc_wrqe(act_open_cpl_size(sc, isipv6), toep->ctrlq);
|
2012-06-19 07:34:13 +00:00
|
|
|
if (wr == NULL)
|
2013-01-15 18:38:51 +00:00
|
|
|
DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
|
2012-06-19 07:34:13 +00:00
|
|
|
|
2018-04-30 21:47:30 +00:00
|
|
|
qid_atid = V_TID_QID(toep->ofld_rxq->iq.abs_id) | V_TID_TID(toep->tid) |
|
|
|
|
V_TID_COOKIE(CPL_COOKIE_TOM);
|
2013-01-15 18:38:51 +00:00
|
|
|
|
|
|
|
if (isipv6) {
|
|
|
|
struct cpl_act_open_req6 *cpl = wrtod(wr);
|
2016-09-17 23:08:49 +00:00
|
|
|
struct cpl_t5_act_open_req6 *cpl5 = (void *)cpl;
|
|
|
|
struct cpl_t6_act_open_req6 *cpl6 = (void *)cpl;
|
2013-01-15 18:38:51 +00:00
|
|
|
|
2016-09-17 23:08:49 +00:00
|
|
|
if ((inp->inp_vflag & INP_IPV6) == 0)
|
2013-01-15 18:38:51 +00:00
|
|
|
DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP);
|
|
|
|
|
2018-11-29 01:15:53 +00:00
|
|
|
toep->ce = t4_hold_lip(sc, &inp->in6p_laddr, NULL);
|
2013-01-15 18:38:51 +00:00
|
|
|
if (toep->ce == NULL)
|
|
|
|
DONT_OFFLOAD_ACTIVE_OPEN(ENOENT);
|
|
|
|
|
2016-09-17 23:08:49 +00:00
|
|
|
switch (chip_id(sc)) {
|
|
|
|
case CHELSIO_T4:
|
2013-07-04 17:55:52 +00:00
|
|
|
INIT_TP_WR(cpl, 0);
|
2015-12-03 00:02:01 +00:00
|
|
|
cpl->params = select_ntuple(vi, toep->l2te);
|
2016-09-17 23:08:49 +00:00
|
|
|
break;
|
|
|
|
case CHELSIO_T5:
|
|
|
|
INIT_TP_WR(cpl5, 0);
|
|
|
|
cpl5->iss = htobe32(tp->iss);
|
|
|
|
cpl5->params = select_ntuple(vi, toep->l2te);
|
|
|
|
break;
|
|
|
|
case CHELSIO_T6:
|
|
|
|
default:
|
|
|
|
INIT_TP_WR(cpl6, 0);
|
|
|
|
cpl6->iss = htobe32(tp->iss);
|
|
|
|
cpl6->params = select_ntuple(vi, toep->l2te);
|
|
|
|
break;
|
2013-07-04 17:55:52 +00:00
|
|
|
}
|
2013-01-15 18:38:51 +00:00
|
|
|
OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
|
|
|
|
qid_atid));
|
|
|
|
cpl->local_port = inp->inp_lport;
|
|
|
|
cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
|
|
|
|
cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
|
|
|
|
cpl->peer_port = inp->inp_fport;
|
|
|
|
cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0];
|
|
|
|
cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8];
|
2019-08-27 04:19:40 +00:00
|
|
|
cpl->opt0 = calc_options0(vi, &toep->params);
|
|
|
|
cpl->opt2 = calc_options2(vi, &toep->params);
|
|
|
|
|
|
|
|
CTR6(KTR_CXGBE,
|
|
|
|
"%s: atid %u, toep %p, inp %p, opt0 %#016lx, opt2 %#08x",
|
|
|
|
__func__, toep->tid, toep, inp, be64toh(cpl->opt0),
|
|
|
|
be32toh(cpl->opt2));
|
2013-07-04 17:55:52 +00:00
|
|
|
} else {
|
|
|
|
struct cpl_act_open_req *cpl = wrtod(wr);
|
2016-09-17 23:08:49 +00:00
|
|
|
struct cpl_t5_act_open_req *cpl5 = (void *)cpl;
|
|
|
|
struct cpl_t6_act_open_req *cpl6 = (void *)cpl;
|
2013-07-04 17:55:52 +00:00
|
|
|
|
2016-09-17 23:08:49 +00:00
|
|
|
switch (chip_id(sc)) {
|
|
|
|
case CHELSIO_T4:
|
2013-07-04 17:55:52 +00:00
|
|
|
INIT_TP_WR(cpl, 0);
|
2015-12-03 00:02:01 +00:00
|
|
|
cpl->params = select_ntuple(vi, toep->l2te);
|
2016-09-17 23:08:49 +00:00
|
|
|
break;
|
|
|
|
case CHELSIO_T5:
|
|
|
|
INIT_TP_WR(cpl5, 0);
|
|
|
|
cpl5->iss = htobe32(tp->iss);
|
|
|
|
cpl5->params = select_ntuple(vi, toep->l2te);
|
|
|
|
break;
|
|
|
|
case CHELSIO_T6:
|
|
|
|
default:
|
|
|
|
INIT_TP_WR(cpl6, 0);
|
|
|
|
cpl6->iss = htobe32(tp->iss);
|
|
|
|
cpl6->params = select_ntuple(vi, toep->l2te);
|
|
|
|
break;
|
2013-03-30 02:26:20 +00:00
|
|
|
}
|
2013-01-15 18:38:51 +00:00
|
|
|
OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
|
|
|
|
qid_atid));
|
|
|
|
inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port,
|
|
|
|
&cpl->peer_ip, &cpl->peer_port);
|
2019-08-27 04:19:40 +00:00
|
|
|
cpl->opt0 = calc_options0(vi, &toep->params);
|
|
|
|
cpl->opt2 = calc_options2(vi, &toep->params);
|
2012-06-19 07:34:13 +00:00
|
|
|
|
2019-08-27 04:19:40 +00:00
|
|
|
CTR6(KTR_CXGBE,
|
|
|
|
"%s: atid %u, toep %p, inp %p, opt0 %#016lx, opt2 %#08x",
|
|
|
|
__func__, toep->tid, toep, inp, be64toh(cpl->opt0),
|
|
|
|
be32toh(cpl->opt2));
|
|
|
|
}
|
2012-06-19 07:34:13 +00:00
|
|
|
|
2013-01-15 18:38:51 +00:00
|
|
|
offload_socket(so, toep);
|
2019-12-13 23:33:54 +00:00
|
|
|
NET_EPOCH_ENTER(et);
|
2013-01-15 18:38:51 +00:00
|
|
|
rc = t4_l2t_send(sc, wr, toep->l2te);
|
2019-12-13 23:33:54 +00:00
|
|
|
NET_EPOCH_EXIT(et);
|
2012-06-19 07:34:13 +00:00
|
|
|
if (rc == 0) {
|
2012-08-21 18:30:16 +00:00
|
|
|
toep->flags |= TPF_CPL_PENDING;
|
2012-06-19 07:34:13 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
undo_offload_socket(so);
|
2013-01-15 18:38:51 +00:00
|
|
|
reason = __LINE__;
|
2012-06-19 07:34:13 +00:00
|
|
|
failed:
|
2013-01-15 18:38:51 +00:00
|
|
|
CTR3(KTR_CXGBE, "%s: not offloading (%d), rc %d", __func__, reason, rc);
|
2012-06-19 07:34:13 +00:00
|
|
|
|
|
|
|
if (wr)
|
|
|
|
free_wrqe(wr);
|
2013-01-15 18:38:51 +00:00
|
|
|
|
|
|
|
if (toep) {
|
|
|
|
if (toep->tid >= 0)
|
|
|
|
free_atid(sc, toep->tid);
|
|
|
|
if (toep->l2te)
|
|
|
|
t4_l2t_release(toep->l2te);
|
|
|
|
if (toep->ce)
|
2018-11-29 01:15:53 +00:00
|
|
|
t4_release_lip(sc, toep->ce);
|
2012-06-19 07:34:13 +00:00
|
|
|
free_toepcb(toep);
|
2013-01-15 18:38:51 +00:00
|
|
|
}
|
2012-06-19 07:34:13 +00:00
|
|
|
|
|
|
|
return (rc);
|
|
|
|
}
|
|
|
|
#endif
|