2011-05-30 21:07:26 +00:00
|
|
|
/*-
|
2017-11-27 14:52:40 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
|
|
|
*
|
2012-06-19 07:34:13 +00:00
|
|
|
* Copyright (c) 2012 Chelsio Communications, Inc.
|
2011-05-30 21:07:26 +00:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include "opt_inet.h"
|
2012-06-29 19:51:06 +00:00
|
|
|
#include "opt_inet6.h"
|
2011-05-30 21:07:26 +00:00
|
|
|
|
|
|
|
#include <sys/param.h>
|
2013-10-28 07:29:16 +00:00
|
|
|
#include <sys/eventhandler.h>
|
2011-05-30 21:07:26 +00:00
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/module.h>
|
|
|
|
#include <sys/bus.h>
|
|
|
|
#include <sys/lock.h>
|
|
|
|
#include <sys/mutex.h>
|
|
|
|
#include <sys/rwlock.h>
|
|
|
|
#include <sys/socket.h>
|
2011-12-16 02:09:51 +00:00
|
|
|
#include <sys/sbuf.h>
|
2011-05-30 21:07:26 +00:00
|
|
|
#include <netinet/in.h>
|
|
|
|
|
|
|
|
#include "common/common.h"
|
|
|
|
#include "common/t4_msg.h"
|
|
|
|
#include "t4_l2t.h"
|
|
|
|
|
2011-12-16 02:09:51 +00:00
|
|
|
/*
|
|
|
|
* Module locking notes: There is a RW lock protecting the L2 table as a
|
|
|
|
* whole plus a spinlock per L2T entry. Entry lookups and allocations happen
|
|
|
|
* under the protection of the table lock, individual entry changes happen
|
|
|
|
* while holding that entry's spinlock. The table lock nests outside the
|
|
|
|
* entry locks. Allocations of new entries take the table lock as writers so
|
|
|
|
* no other lookups can happen while allocating new entries. Entry updates
|
|
|
|
* take the table lock as readers so multiple entries can be updated in
|
|
|
|
* parallel. An L2T entry can be dropped by decrementing its reference count
|
|
|
|
* and therefore can happen in parallel with entry allocation but no entry
|
|
|
|
* can change state or increment its ref count during allocation as both of
|
|
|
|
* these perform lookups.
|
|
|
|
*
|
2016-05-03 03:41:25 +00:00
|
|
|
* Note: We do not take references to ifnets in this module because both
|
2011-12-16 02:09:51 +00:00
|
|
|
* the TOE and the sockets already hold references to the interfaces and the
|
|
|
|
* lifetime of an L2T entry is fully contained in the lifetime of the TOE.
|
|
|
|
*/
|
|
|
|
|
2011-05-30 21:07:26 +00:00
|
|
|
/*
|
2011-12-16 02:09:51 +00:00
|
|
|
* Allocate a free L2T entry. Must be called with l2t_data.lock held.
|
2011-05-30 21:07:26 +00:00
|
|
|
*/
|
2012-06-19 07:34:13 +00:00
|
|
|
struct l2t_entry *
|
|
|
|
t4_alloc_l2e(struct l2t_data *d)
|
2011-12-16 02:09:51 +00:00
|
|
|
{
|
|
|
|
struct l2t_entry *end, *e, **p;
|
|
|
|
|
|
|
|
rw_assert(&d->lock, RA_WLOCKED);
|
|
|
|
|
|
|
|
if (!atomic_load_acq_int(&d->nfree))
|
|
|
|
return (NULL);
|
|
|
|
|
|
|
|
/* there's definitely a free entry */
|
2013-01-14 20:36:22 +00:00
|
|
|
for (e = d->rover, end = &d->l2tab[d->l2t_size]; e != end; ++e)
|
2011-12-16 02:09:51 +00:00
|
|
|
if (atomic_load_acq_int(&e->refcnt) == 0)
|
|
|
|
goto found;
|
|
|
|
|
2012-06-19 07:34:13 +00:00
|
|
|
for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e)
|
|
|
|
continue;
|
2011-12-16 02:09:51 +00:00
|
|
|
found:
|
|
|
|
d->rover = e + 1;
|
|
|
|
atomic_subtract_int(&d->nfree, 1);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The entry we found may be an inactive entry that is
|
|
|
|
* presently in the hash table. We need to remove it.
|
|
|
|
*/
|
|
|
|
if (e->state < L2T_STATE_SWITCHING) {
|
|
|
|
for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) {
|
|
|
|
if (*p == e) {
|
|
|
|
*p = e->next;
|
|
|
|
e->next = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
e->state = L2T_STATE_UNUSED;
|
|
|
|
return (e);
|
|
|
|
}
|
|
|
|
|
2018-09-22 01:24:30 +00:00
|
|
|
static struct l2t_entry *
|
|
|
|
find_or_alloc_l2e(struct l2t_data *d, uint16_t vlan, uint8_t port, uint8_t *dmac)
|
|
|
|
{
|
|
|
|
struct l2t_entry *end, *e, **p;
|
|
|
|
struct l2t_entry *first_free = NULL;
|
|
|
|
|
|
|
|
for (e = &d->l2tab[0], end = &d->l2tab[d->l2t_size]; e != end; ++e) {
|
|
|
|
if (atomic_load_acq_int(&e->refcnt) == 0) {
|
|
|
|
if (!first_free)
|
|
|
|
first_free = e;
|
|
|
|
} else if (e->state == L2T_STATE_SWITCHING &&
|
|
|
|
memcmp(e->dmac, dmac, ETHER_ADDR_LEN) == 0 &&
|
|
|
|
e->vlan == vlan && e->lport == port)
|
|
|
|
return (e); /* Found existing entry that matches. */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (first_free == NULL)
|
|
|
|
return (NULL); /* No match and no room for a new entry. */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The entry we found may be an inactive entry that is
|
|
|
|
* presently in the hash table. We need to remove it.
|
|
|
|
*/
|
|
|
|
e = first_free;
|
|
|
|
if (e->state < L2T_STATE_SWITCHING) {
|
|
|
|
for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) {
|
|
|
|
if (*p == e) {
|
|
|
|
*p = e->next;
|
|
|
|
e->next = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
e->state = L2T_STATE_UNUSED;
|
|
|
|
return (e);
|
|
|
|
}
|
|
|
|
|
NIC KTLS for Chelsio T6 adapters.
This adds support for ifnet (NIC) KTLS using Chelsio T6 adapters.
Unlike the TOE-based KTLS in r353328, NIC TLS works with non-TOE
connections.
NIC KTLS on T6 is not able to use the normal TSO (LSO) path to segment
the encrypted TLS frames output by the crypto engine. Instead, the
TOE is placed into a special setup to permit "dummy" connections to be
associated with regular sockets using KTLS. This permits using the
TOE to segment the encrypted TLS records. However, this approach does
have some limitations:
1) Regular TOE sockets cannot be used when the TOE is in this special
mode. One can use either TOE and TOE-based KTLS or NIC KTLS, but
not both at the same time.
2) In NIC KTLS mode, the TOE is only able to accept a per-connection
timestamp offset that varies in the upper 4 bits. Put another way,
only connections whose timestamp offset has the 28 lower bits
cleared can use NIC KTLS and generate correct timestamps. The
driver will refuse to enable NIC KTLS on connections with a
timestamp offset with any of the lower 28 bits set. To use NIC
KTLS, users can either disable TCP timestamps by setting the
net.inet.tcp.rfc1323 sysctl to 0, or apply a local patch to the
tcp_new_ts_offset() function to clear the lower 28 bits of the
generated offset.
3) Because the TCP segmentation relies on fields mirrored in a TCB in
the TOE, not all fields in a TCP packet can be sent in the TCP
segments generated from a TLS record. Specifically, for packets
containing TCP options other than timestamps, the driver will
inject an "empty" TCP packet holding the requested options (e.g. a
SACK scoreboard) along with the segments from the TLS record.
These empty TCP packets are counted by the
dev.cc.N.txq.M.kern_tls_options sysctls.
Unlike TOE TLS which is able to buffer encrypted TLS records in
on-card memory to handle retransmits, NIC KTLS must re-encrypt TLS
records for retransmit requests as well as non-retransmit requests
that do not include the start of a TLS record but do include the
trailer. The T6 NIC KTLS code tries to optimize some of the cases for
requests to transmit partial TLS records. In particular it attempts
to minimize sending "waste" bytes that have to be given as input to
the crypto engine but are not needed on the wire to satisfy mbufs sent
from the TCP stack down to the driver.
TCP packets for TLS requests are broken down into the following
classes (with associated counters):
- Mbufs that send an entire TLS record in full do not have any waste
bytes (dev.cc.N.txq.M.kern_tls_full).
- Mbufs that send a short TLS record that ends before the end of the
trailer (dev.cc.N.txq.M.kern_tls_short). For sockets using AES-CBC,
the encryption must always start at the beginning, so if the mbuf
starts at an offset into the TLS record, the offset bytes will be
"waste" bytes. For sockets using AES-GCM, the encryption can start
at the 16 byte block before the starting offset capping the waste at
15 bytes.
- Mbufs that send a partial TLS record that has a non-zero starting
offset but ends at the end of the trailer
(dev.cc.N.txq.M.kern_tls_partial). In order to compute the
authentication hash stored in the trailer, the entire TLS record
must be sent as input to the crypto engine, so the bytes before the
offset are always "waste" bytes.
In addition, other per-txq sysctls are provided:
- dev.cc.N.txq.M.kern_tls_cbc: Count of sockets sent via this txq
using AES-CBC.
- dev.cc.N.txq.M.kern_tls_gcm: Count of sockets sent via this txq
using AES-GCM.
- dev.cc.N.txq.M.kern_tls_fin: Count of empty FIN-only packets sent to
compensate for the TOE engine not being able to set FIN on the last
segment of a TLS record if the TLS record mbuf had FIN set.
- dev.cc.N.txq.M.kern_tls_records: Count of TLS records sent via this
txq including full, short, and partial records.
- dev.cc.N.txq.M.kern_tls_octets: Count of non-waste bytes (TLS header
and payload) sent for TLS record requests.
- dev.cc.N.txq.M.kern_tls_waste: Count of waste bytes sent for TLS
record requests.
To enable NIC KTLS with T6, set the following tunables prior to
loading the cxgbe(4) driver:
hw.cxgbe.config_file=kern_tls
hw.cxgbe.kern_tls=1
Reviewed by: np
Sponsored by: Chelsio Communications
Differential Revision: https://reviews.freebsd.org/D21962
2019-11-21 19:30:31 +00:00
|
|
|
static void
|
|
|
|
mk_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync, int reply,
|
|
|
|
void *dst)
|
|
|
|
{
|
|
|
|
struct cpl_l2t_write_req *req;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
req = dst;
|
|
|
|
idx = e->idx + sc->vres.l2t.start;
|
|
|
|
INIT_TP_WR(req, 0);
|
|
|
|
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, idx |
|
|
|
|
V_SYNC_WR(sync) | V_TID_QID(e->iqid)));
|
|
|
|
req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!reply));
|
|
|
|
req->l2t_idx = htons(idx);
|
|
|
|
req->vlan = htons(e->vlan);
|
|
|
|
memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
|
|
|
|
}
|
2018-09-22 01:24:30 +00:00
|
|
|
|
2011-12-16 02:09:51 +00:00
|
|
|
/*
|
|
|
|
* Write an L2T entry. Must be called with the entry locked.
|
|
|
|
* The write may be synchronous or asynchronous.
|
|
|
|
*/
|
2012-06-19 07:34:13 +00:00
|
|
|
int
|
2016-07-05 01:29:24 +00:00
|
|
|
t4_write_l2e(struct l2t_entry *e, int sync)
|
2011-12-16 02:09:51 +00:00
|
|
|
{
|
2016-07-05 01:29:24 +00:00
|
|
|
struct sge_wrq *wrq;
|
|
|
|
struct adapter *sc;
|
2014-12-31 23:19:16 +00:00
|
|
|
struct wrq_cookie cookie;
|
2011-12-16 02:09:51 +00:00
|
|
|
struct cpl_l2t_write_req *req;
|
|
|
|
|
|
|
|
mtx_assert(&e->lock, MA_OWNED);
|
2016-07-05 01:29:24 +00:00
|
|
|
MPASS(e->wrq != NULL);
|
2011-12-16 02:09:51 +00:00
|
|
|
|
2016-07-05 01:29:24 +00:00
|
|
|
wrq = e->wrq;
|
|
|
|
sc = wrq->adapter;
|
|
|
|
|
|
|
|
req = start_wrq_wr(wrq, howmany(sizeof(*req), 16), &cookie);
|
2014-12-31 23:19:16 +00:00
|
|
|
if (req == NULL)
|
2011-12-16 02:09:51 +00:00
|
|
|
return (ENOMEM);
|
|
|
|
|
NIC KTLS for Chelsio T6 adapters.
This adds support for ifnet (NIC) KTLS using Chelsio T6 adapters.
Unlike the TOE-based KTLS in r353328, NIC TLS works with non-TOE
connections.
NIC KTLS on T6 is not able to use the normal TSO (LSO) path to segment
the encrypted TLS frames output by the crypto engine. Instead, the
TOE is placed into a special setup to permit "dummy" connections to be
associated with regular sockets using KTLS. This permits using the
TOE to segment the encrypted TLS records. However, this approach does
have some limitations:
1) Regular TOE sockets cannot be used when the TOE is in this special
mode. One can use either TOE and TOE-based KTLS or NIC KTLS, but
not both at the same time.
2) In NIC KTLS mode, the TOE is only able to accept a per-connection
timestamp offset that varies in the upper 4 bits. Put another way,
only connections whose timestamp offset has the 28 lower bits
cleared can use NIC KTLS and generate correct timestamps. The
driver will refuse to enable NIC KTLS on connections with a
timestamp offset with any of the lower 28 bits set. To use NIC
KTLS, users can either disable TCP timestamps by setting the
net.inet.tcp.rfc1323 sysctl to 0, or apply a local patch to the
tcp_new_ts_offset() function to clear the lower 28 bits of the
generated offset.
3) Because the TCP segmentation relies on fields mirrored in a TCB in
the TOE, not all fields in a TCP packet can be sent in the TCP
segments generated from a TLS record. Specifically, for packets
containing TCP options other than timestamps, the driver will
inject an "empty" TCP packet holding the requested options (e.g. a
SACK scoreboard) along with the segments from the TLS record.
These empty TCP packets are counted by the
dev.cc.N.txq.M.kern_tls_options sysctls.
Unlike TOE TLS which is able to buffer encrypted TLS records in
on-card memory to handle retransmits, NIC KTLS must re-encrypt TLS
records for retransmit requests as well as non-retransmit requests
that do not include the start of a TLS record but do include the
trailer. The T6 NIC KTLS code tries to optimize some of the cases for
requests to transmit partial TLS records. In particular it attempts
to minimize sending "waste" bytes that have to be given as input to
the crypto engine but are not needed on the wire to satisfy mbufs sent
from the TCP stack down to the driver.
TCP packets for TLS requests are broken down into the following
classes (with associated counters):
- Mbufs that send an entire TLS record in full do not have any waste
bytes (dev.cc.N.txq.M.kern_tls_full).
- Mbufs that send a short TLS record that ends before the end of the
trailer (dev.cc.N.txq.M.kern_tls_short). For sockets using AES-CBC,
the encryption must always start at the beginning, so if the mbuf
starts at an offset into the TLS record, the offset bytes will be
"waste" bytes. For sockets using AES-GCM, the encryption can start
at the 16 byte block before the starting offset capping the waste at
15 bytes.
- Mbufs that send a partial TLS record that has a non-zero starting
offset but ends at the end of the trailer
(dev.cc.N.txq.M.kern_tls_partial). In order to compute the
authentication hash stored in the trailer, the entire TLS record
must be sent as input to the crypto engine, so the bytes before the
offset are always "waste" bytes.
In addition, other per-txq sysctls are provided:
- dev.cc.N.txq.M.kern_tls_cbc: Count of sockets sent via this txq
using AES-CBC.
- dev.cc.N.txq.M.kern_tls_gcm: Count of sockets sent via this txq
using AES-GCM.
- dev.cc.N.txq.M.kern_tls_fin: Count of empty FIN-only packets sent to
compensate for the TOE engine not being able to set FIN on the last
segment of a TLS record if the TLS record mbuf had FIN set.
- dev.cc.N.txq.M.kern_tls_records: Count of TLS records sent via this
txq including full, short, and partial records.
- dev.cc.N.txq.M.kern_tls_octets: Count of non-waste bytes (TLS header
and payload) sent for TLS record requests.
- dev.cc.N.txq.M.kern_tls_waste: Count of waste bytes sent for TLS
record requests.
To enable NIC KTLS with T6, set the following tunables prior to
loading the cxgbe(4) driver:
hw.cxgbe.config_file=kern_tls
hw.cxgbe.kern_tls=1
Reviewed by: np
Sponsored by: Chelsio Communications
Differential Revision: https://reviews.freebsd.org/D21962
2019-11-21 19:30:31 +00:00
|
|
|
mk_write_l2e(sc, e, sync, sync, req);
|
2011-12-16 02:09:51 +00:00
|
|
|
|
2016-07-05 01:29:24 +00:00
|
|
|
commit_wrq_wr(wrq, req, &cookie);
|
2011-12-16 02:09:51 +00:00
|
|
|
|
|
|
|
if (sync && e->state != L2T_STATE_SWITCHING)
|
|
|
|
e->state = L2T_STATE_SYNC_WRITE;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
NIC KTLS for Chelsio T6 adapters.
This adds support for ifnet (NIC) KTLS using Chelsio T6 adapters.
Unlike the TOE-based KTLS in r353328, NIC TLS works with non-TOE
connections.
NIC KTLS on T6 is not able to use the normal TSO (LSO) path to segment
the encrypted TLS frames output by the crypto engine. Instead, the
TOE is placed into a special setup to permit "dummy" connections to be
associated with regular sockets using KTLS. This permits using the
TOE to segment the encrypted TLS records. However, this approach does
have some limitations:
1) Regular TOE sockets cannot be used when the TOE is in this special
mode. One can use either TOE and TOE-based KTLS or NIC KTLS, but
not both at the same time.
2) In NIC KTLS mode, the TOE is only able to accept a per-connection
timestamp offset that varies in the upper 4 bits. Put another way,
only connections whose timestamp offset has the 28 lower bits
cleared can use NIC KTLS and generate correct timestamps. The
driver will refuse to enable NIC KTLS on connections with a
timestamp offset with any of the lower 28 bits set. To use NIC
KTLS, users can either disable TCP timestamps by setting the
net.inet.tcp.rfc1323 sysctl to 0, or apply a local patch to the
tcp_new_ts_offset() function to clear the lower 28 bits of the
generated offset.
3) Because the TCP segmentation relies on fields mirrored in a TCB in
the TOE, not all fields in a TCP packet can be sent in the TCP
segments generated from a TLS record. Specifically, for packets
containing TCP options other than timestamps, the driver will
inject an "empty" TCP packet holding the requested options (e.g. a
SACK scoreboard) along with the segments from the TLS record.
These empty TCP packets are counted by the
dev.cc.N.txq.M.kern_tls_options sysctls.
Unlike TOE TLS which is able to buffer encrypted TLS records in
on-card memory to handle retransmits, NIC KTLS must re-encrypt TLS
records for retransmit requests as well as non-retransmit requests
that do not include the start of a TLS record but do include the
trailer. The T6 NIC KTLS code tries to optimize some of the cases for
requests to transmit partial TLS records. In particular it attempts
to minimize sending "waste" bytes that have to be given as input to
the crypto engine but are not needed on the wire to satisfy mbufs sent
from the TCP stack down to the driver.
TCP packets for TLS requests are broken down into the following
classes (with associated counters):
- Mbufs that send an entire TLS record in full do not have any waste
bytes (dev.cc.N.txq.M.kern_tls_full).
- Mbufs that send a short TLS record that ends before the end of the
trailer (dev.cc.N.txq.M.kern_tls_short). For sockets using AES-CBC,
the encryption must always start at the beginning, so if the mbuf
starts at an offset into the TLS record, the offset bytes will be
"waste" bytes. For sockets using AES-GCM, the encryption can start
at the 16 byte block before the starting offset capping the waste at
15 bytes.
- Mbufs that send a partial TLS record that has a non-zero starting
offset but ends at the end of the trailer
(dev.cc.N.txq.M.kern_tls_partial). In order to compute the
authentication hash stored in the trailer, the entire TLS record
must be sent as input to the crypto engine, so the bytes before the
offset are always "waste" bytes.
In addition, other per-txq sysctls are provided:
- dev.cc.N.txq.M.kern_tls_cbc: Count of sockets sent via this txq
using AES-CBC.
- dev.cc.N.txq.M.kern_tls_gcm: Count of sockets sent via this txq
using AES-GCM.
- dev.cc.N.txq.M.kern_tls_fin: Count of empty FIN-only packets sent to
compensate for the TOE engine not being able to set FIN on the last
segment of a TLS record if the TLS record mbuf had FIN set.
- dev.cc.N.txq.M.kern_tls_records: Count of TLS records sent via this
txq including full, short, and partial records.
- dev.cc.N.txq.M.kern_tls_octets: Count of non-waste bytes (TLS header
and payload) sent for TLS record requests.
- dev.cc.N.txq.M.kern_tls_waste: Count of waste bytes sent for TLS
record requests.
To enable NIC KTLS with T6, set the following tunables prior to
loading the cxgbe(4) driver:
hw.cxgbe.config_file=kern_tls
hw.cxgbe.kern_tls=1
Reviewed by: np
Sponsored by: Chelsio Communications
Differential Revision: https://reviews.freebsd.org/D21962
2019-11-21 19:30:31 +00:00
|
|
|
/*
|
|
|
|
* Allocate an L2T entry for use by a TLS connection. These entries are
|
|
|
|
* associated with a specific VLAN and destination MAC that never changes.
|
|
|
|
* However, multiple TLS connections might share a single entry.
|
|
|
|
*
|
|
|
|
* If a new L2T entry is allocated, a work request to initialize it is
|
|
|
|
* written to 'txq' and 'ndesc' will be set to 1. Otherwise, 'ndesc'
|
|
|
|
* will be set to 0.
|
|
|
|
*
|
|
|
|
* To avoid races, separate L2T entries are reserved for individual
|
|
|
|
* queues since the L2T entry update is written to a txq just prior to
|
|
|
|
* TLS work requests that will depend on it being written.
|
|
|
|
*/
|
|
|
|
struct l2t_entry *
|
|
|
|
t4_l2t_alloc_tls(struct adapter *sc, struct sge_txq *txq, void *dst,
|
|
|
|
int *ndesc, uint16_t vlan, uint8_t port, uint8_t *eth_addr)
|
|
|
|
{
|
|
|
|
struct l2t_data *d;
|
|
|
|
struct l2t_entry *e;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
TXQ_LOCK_ASSERT_OWNED(txq);
|
|
|
|
|
|
|
|
d = sc->l2t;
|
|
|
|
*ndesc = 0;
|
|
|
|
|
|
|
|
rw_rlock(&d->lock);
|
|
|
|
|
|
|
|
/* First, try to find an existing entry. */
|
|
|
|
for (i = 0; i < d->l2t_size; i++) {
|
|
|
|
e = &d->l2tab[i];
|
|
|
|
if (e->state != L2T_STATE_TLS)
|
|
|
|
continue;
|
|
|
|
if (e->vlan == vlan && e->lport == port &&
|
|
|
|
e->wrq == (struct sge_wrq *)txq &&
|
|
|
|
memcmp(e->dmac, eth_addr, ETHER_ADDR_LEN) == 0) {
|
|
|
|
if (atomic_fetchadd_int(&e->refcnt, 1) == 0) {
|
|
|
|
/*
|
|
|
|
* This entry wasn't held but is still
|
|
|
|
* valid, so decrement nfree.
|
|
|
|
*/
|
|
|
|
atomic_subtract_int(&d->nfree, 1);
|
|
|
|
}
|
|
|
|
KASSERT(e->refcnt > 0,
|
|
|
|
("%s: refcount overflow", __func__));
|
|
|
|
rw_runlock(&d->lock);
|
|
|
|
return (e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Don't bother rechecking if the upgrade fails since the txq is
|
|
|
|
* already locked.
|
|
|
|
*/
|
|
|
|
if (!rw_try_upgrade(&d->lock)) {
|
|
|
|
rw_runlock(&d->lock);
|
|
|
|
rw_wlock(&d->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Match not found, allocate a new entry. */
|
|
|
|
e = t4_alloc_l2e(d);
|
|
|
|
if (e == NULL) {
|
|
|
|
rw_wunlock(&d->lock);
|
|
|
|
return (e);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialize the entry. */
|
|
|
|
e->state = L2T_STATE_TLS;
|
|
|
|
e->vlan = vlan;
|
|
|
|
e->lport = port;
|
|
|
|
e->iqid = sc->sge.fwq.abs_id;
|
|
|
|
e->wrq = (struct sge_wrq *)txq;
|
|
|
|
memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
|
|
|
|
atomic_store_rel_int(&e->refcnt, 1);
|
|
|
|
rw_wunlock(&d->lock);
|
|
|
|
|
|
|
|
/* Write out the work request. */
|
|
|
|
*ndesc = howmany(sizeof(struct cpl_l2t_write_req), EQ_ESIZE);
|
|
|
|
MPASS(*ndesc == 1);
|
|
|
|
mk_write_l2e(sc, e, 1, 0, dst);
|
|
|
|
|
|
|
|
return (e);
|
|
|
|
}
|
|
|
|
|
2011-12-16 02:09:51 +00:00
|
|
|
/*
|
|
|
|
* Allocate an L2T entry for use by a switching rule. Such need to be
|
|
|
|
* explicitly freed and while busy they are not on any hash chain, so normal
|
|
|
|
* address resolution updates do not see them.
|
|
|
|
*/
|
|
|
|
struct l2t_entry *
|
2018-09-22 01:24:30 +00:00
|
|
|
t4_l2t_alloc_switching(struct adapter *sc, uint16_t vlan, uint8_t port,
|
|
|
|
uint8_t *eth_addr)
|
2011-12-16 02:09:51 +00:00
|
|
|
{
|
2018-09-22 01:24:30 +00:00
|
|
|
struct l2t_data *d = sc->l2t;
|
2011-12-16 02:09:51 +00:00
|
|
|
struct l2t_entry *e;
|
2018-09-22 01:24:30 +00:00
|
|
|
int rc;
|
2011-12-16 02:09:51 +00:00
|
|
|
|
2012-12-21 19:28:17 +00:00
|
|
|
rw_wlock(&d->lock);
|
2018-09-22 01:24:30 +00:00
|
|
|
e = find_or_alloc_l2e(d, vlan, port, eth_addr);
|
2011-12-16 02:09:51 +00:00
|
|
|
if (e) {
|
2018-09-22 01:24:30 +00:00
|
|
|
if (atomic_load_acq_int(&e->refcnt) == 0) {
|
|
|
|
mtx_lock(&e->lock); /* avoid race with t4_l2t_free */
|
|
|
|
e->wrq = &sc->sge.ctrlq[0];
|
|
|
|
e->iqid = sc->sge.fwq.abs_id;
|
|
|
|
e->state = L2T_STATE_SWITCHING;
|
|
|
|
e->vlan = vlan;
|
|
|
|
e->lport = port;
|
|
|
|
memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
|
|
|
|
atomic_store_rel_int(&e->refcnt, 1);
|
|
|
|
atomic_subtract_int(&d->nfree, 1);
|
|
|
|
rc = t4_write_l2e(e, 0);
|
|
|
|
mtx_unlock(&e->lock);
|
|
|
|
if (rc != 0)
|
|
|
|
e = NULL;
|
|
|
|
} else {
|
|
|
|
MPASS(e->vlan == vlan);
|
|
|
|
MPASS(e->lport == port);
|
|
|
|
atomic_add_int(&e->refcnt, 1);
|
|
|
|
}
|
2011-12-16 02:09:51 +00:00
|
|
|
}
|
2012-12-21 19:28:17 +00:00
|
|
|
rw_wunlock(&d->lock);
|
2018-09-22 01:24:30 +00:00
|
|
|
return (e);
|
2011-12-16 02:09:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
t4_init_l2t(struct adapter *sc, int flags)
|
|
|
|
{
|
2013-01-14 20:36:22 +00:00
|
|
|
int i, l2t_size;
|
2011-12-16 02:09:51 +00:00
|
|
|
struct l2t_data *d;
|
|
|
|
|
2013-01-14 20:36:22 +00:00
|
|
|
l2t_size = sc->vres.l2t.size;
|
|
|
|
if (l2t_size < 2) /* At least 1 bucket for IP and 1 for IPv6 */
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
d = malloc(sizeof(*d) + l2t_size * sizeof (struct l2t_entry), M_CXGBE,
|
|
|
|
M_ZERO | flags);
|
2011-12-16 02:09:51 +00:00
|
|
|
if (!d)
|
|
|
|
return (ENOMEM);
|
|
|
|
|
2013-01-14 20:36:22 +00:00
|
|
|
d->l2t_size = l2t_size;
|
2011-12-16 02:09:51 +00:00
|
|
|
d->rover = d->l2tab;
|
2013-01-14 20:36:22 +00:00
|
|
|
atomic_store_rel_int(&d->nfree, l2t_size);
|
2011-12-16 02:09:51 +00:00
|
|
|
rw_init(&d->lock, "L2T");
|
|
|
|
|
2013-01-14 20:36:22 +00:00
|
|
|
for (i = 0; i < l2t_size; i++) {
|
2012-06-19 07:34:13 +00:00
|
|
|
struct l2t_entry *e = &d->l2tab[i];
|
|
|
|
|
|
|
|
e->idx = i;
|
|
|
|
e->state = L2T_STATE_UNUSED;
|
|
|
|
mtx_init(&e->lock, "L2T_E", NULL, MTX_DEF);
|
|
|
|
STAILQ_INIT(&e->wr_list);
|
|
|
|
atomic_store_rel_int(&e->refcnt, 0);
|
2011-12-16 02:09:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
sc->l2t = d;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
t4_free_l2t(struct l2t_data *d)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2013-01-14 20:36:22 +00:00
|
|
|
for (i = 0; i < d->l2t_size; i++)
|
2011-12-16 02:09:51 +00:00
|
|
|
mtx_destroy(&d->l2tab[i].lock);
|
|
|
|
rw_destroy(&d->lock);
|
|
|
|
free(d, M_CXGBE);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2012-06-19 07:34:13 +00:00
|
|
|
int
|
|
|
|
do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss,
|
|
|
|
struct mbuf *m)
|
|
|
|
{
|
|
|
|
const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
|
|
|
|
unsigned int tid = GET_TID(rpl);
|
2013-01-14 20:36:22 +00:00
|
|
|
unsigned int idx = tid % L2T_SIZE;
|
2012-06-19 07:34:13 +00:00
|
|
|
|
|
|
|
if (__predict_false(rpl->status != CPL_ERR_NONE)) {
|
|
|
|
log(LOG_ERR,
|
2013-01-14 20:36:22 +00:00
|
|
|
"Unexpected L2T_WRITE_RPL (%u) for entry at hw_idx %u\n",
|
2012-06-19 07:34:13 +00:00
|
|
|
rpl->status, idx);
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2011-05-30 21:07:26 +00:00
|
|
|
static inline unsigned int
|
|
|
|
vlan_prio(const struct l2t_entry *e)
|
|
|
|
{
|
|
|
|
return e->vlan >> 13;
|
|
|
|
}
|
|
|
|
|
2011-12-16 02:09:51 +00:00
|
|
|
static char
|
|
|
|
l2e_state(const struct l2t_entry *e)
|
|
|
|
{
|
|
|
|
switch (e->state) {
|
|
|
|
case L2T_STATE_VALID: return 'V'; /* valid, fast-path entry */
|
|
|
|
case L2T_STATE_STALE: return 'S'; /* needs revalidation, but usable */
|
|
|
|
case L2T_STATE_SYNC_WRITE: return 'W';
|
2012-06-19 07:34:13 +00:00
|
|
|
case L2T_STATE_RESOLVING: return STAILQ_EMPTY(&e->wr_list) ? 'R' : 'A';
|
2011-12-16 02:09:51 +00:00
|
|
|
case L2T_STATE_SWITCHING: return 'X';
|
NIC KTLS for Chelsio T6 adapters.
This adds support for ifnet (NIC) KTLS using Chelsio T6 adapters.
Unlike the TOE-based KTLS in r353328, NIC TLS works with non-TOE
connections.
NIC KTLS on T6 is not able to use the normal TSO (LSO) path to segment
the encrypted TLS frames output by the crypto engine. Instead, the
TOE is placed into a special setup to permit "dummy" connections to be
associated with regular sockets using KTLS. This permits using the
TOE to segment the encrypted TLS records. However, this approach does
have some limitations:
1) Regular TOE sockets cannot be used when the TOE is in this special
mode. One can use either TOE and TOE-based KTLS or NIC KTLS, but
not both at the same time.
2) In NIC KTLS mode, the TOE is only able to accept a per-connection
timestamp offset that varies in the upper 4 bits. Put another way,
only connections whose timestamp offset has the 28 lower bits
cleared can use NIC KTLS and generate correct timestamps. The
driver will refuse to enable NIC KTLS on connections with a
timestamp offset with any of the lower 28 bits set. To use NIC
KTLS, users can either disable TCP timestamps by setting the
net.inet.tcp.rfc1323 sysctl to 0, or apply a local patch to the
tcp_new_ts_offset() function to clear the lower 28 bits of the
generated offset.
3) Because the TCP segmentation relies on fields mirrored in a TCB in
the TOE, not all fields in a TCP packet can be sent in the TCP
segments generated from a TLS record. Specifically, for packets
containing TCP options other than timestamps, the driver will
inject an "empty" TCP packet holding the requested options (e.g. a
SACK scoreboard) along with the segments from the TLS record.
These empty TCP packets are counted by the
dev.cc.N.txq.M.kern_tls_options sysctls.
Unlike TOE TLS which is able to buffer encrypted TLS records in
on-card memory to handle retransmits, NIC KTLS must re-encrypt TLS
records for retransmit requests as well as non-retransmit requests
that do not include the start of a TLS record but do include the
trailer. The T6 NIC KTLS code tries to optimize some of the cases for
requests to transmit partial TLS records. In particular it attempts
to minimize sending "waste" bytes that have to be given as input to
the crypto engine but are not needed on the wire to satisfy mbufs sent
from the TCP stack down to the driver.
TCP packets for TLS requests are broken down into the following
classes (with associated counters):
- Mbufs that send an entire TLS record in full do not have any waste
bytes (dev.cc.N.txq.M.kern_tls_full).
- Mbufs that send a short TLS record that ends before the end of the
trailer (dev.cc.N.txq.M.kern_tls_short). For sockets using AES-CBC,
the encryption must always start at the beginning, so if the mbuf
starts at an offset into the TLS record, the offset bytes will be
"waste" bytes. For sockets using AES-GCM, the encryption can start
at the 16 byte block before the starting offset capping the waste at
15 bytes.
- Mbufs that send a partial TLS record that has a non-zero starting
offset but ends at the end of the trailer
(dev.cc.N.txq.M.kern_tls_partial). In order to compute the
authentication hash stored in the trailer, the entire TLS record
must be sent as input to the crypto engine, so the bytes before the
offset are always "waste" bytes.
In addition, other per-txq sysctls are provided:
- dev.cc.N.txq.M.kern_tls_cbc: Count of sockets sent via this txq
using AES-CBC.
- dev.cc.N.txq.M.kern_tls_gcm: Count of sockets sent via this txq
using AES-GCM.
- dev.cc.N.txq.M.kern_tls_fin: Count of empty FIN-only packets sent to
compensate for the TOE engine not being able to set FIN on the last
segment of a TLS record if the TLS record mbuf had FIN set.
- dev.cc.N.txq.M.kern_tls_records: Count of TLS records sent via this
txq including full, short, and partial records.
- dev.cc.N.txq.M.kern_tls_octets: Count of non-waste bytes (TLS header
and payload) sent for TLS record requests.
- dev.cc.N.txq.M.kern_tls_waste: Count of waste bytes sent for TLS
record requests.
To enable NIC KTLS with T6, set the following tunables prior to
loading the cxgbe(4) driver:
hw.cxgbe.config_file=kern_tls
hw.cxgbe.kern_tls=1
Reviewed by: np
Sponsored by: Chelsio Communications
Differential Revision: https://reviews.freebsd.org/D21962
2019-11-21 19:30:31 +00:00
|
|
|
case L2T_STATE_TLS: return 'T';
|
2011-12-16 02:09:51 +00:00
|
|
|
default: return 'U';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
sysctl_l2t(SYSCTL_HANDLER_ARGS)
|
|
|
|
{
|
|
|
|
struct adapter *sc = arg1;
|
|
|
|
struct l2t_data *l2t = sc->l2t;
|
|
|
|
struct l2t_entry *e;
|
|
|
|
struct sbuf *sb;
|
|
|
|
int rc, i, header = 0;
|
2013-01-14 20:36:22 +00:00
|
|
|
char ip[INET6_ADDRSTRLEN];
|
2011-12-16 02:09:51 +00:00
|
|
|
|
|
|
|
if (l2t == NULL)
|
|
|
|
return (ENXIO);
|
|
|
|
|
|
|
|
rc = sysctl_wire_old_buffer(req, 0);
|
|
|
|
if (rc != 0)
|
|
|
|
return (rc);
|
|
|
|
|
|
|
|
sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
|
|
|
|
if (sb == NULL)
|
|
|
|
return (ENOMEM);
|
|
|
|
|
|
|
|
e = &l2t->l2tab[0];
|
2013-01-14 20:36:22 +00:00
|
|
|
for (i = 0; i < l2t->l2t_size; i++, e++) {
|
2011-12-16 02:09:51 +00:00
|
|
|
mtx_lock(&e->lock);
|
|
|
|
if (e->state == L2T_STATE_UNUSED)
|
|
|
|
goto skip;
|
|
|
|
|
|
|
|
if (header == 0) {
|
|
|
|
sbuf_printf(sb, " Idx IP address "
|
|
|
|
"Ethernet address VLAN/P LP State Users Port");
|
|
|
|
header = 1;
|
|
|
|
}
|
NIC KTLS for Chelsio T6 adapters.
This adds support for ifnet (NIC) KTLS using Chelsio T6 adapters.
Unlike the TOE-based KTLS in r353328, NIC TLS works with non-TOE
connections.
NIC KTLS on T6 is not able to use the normal TSO (LSO) path to segment
the encrypted TLS frames output by the crypto engine. Instead, the
TOE is placed into a special setup to permit "dummy" connections to be
associated with regular sockets using KTLS. This permits using the
TOE to segment the encrypted TLS records. However, this approach does
have some limitations:
1) Regular TOE sockets cannot be used when the TOE is in this special
mode. One can use either TOE and TOE-based KTLS or NIC KTLS, but
not both at the same time.
2) In NIC KTLS mode, the TOE is only able to accept a per-connection
timestamp offset that varies in the upper 4 bits. Put another way,
only connections whose timestamp offset has the 28 lower bits
cleared can use NIC KTLS and generate correct timestamps. The
driver will refuse to enable NIC KTLS on connections with a
timestamp offset with any of the lower 28 bits set. To use NIC
KTLS, users can either disable TCP timestamps by setting the
net.inet.tcp.rfc1323 sysctl to 0, or apply a local patch to the
tcp_new_ts_offset() function to clear the lower 28 bits of the
generated offset.
3) Because the TCP segmentation relies on fields mirrored in a TCB in
the TOE, not all fields in a TCP packet can be sent in the TCP
segments generated from a TLS record. Specifically, for packets
containing TCP options other than timestamps, the driver will
inject an "empty" TCP packet holding the requested options (e.g. a
SACK scoreboard) along with the segments from the TLS record.
These empty TCP packets are counted by the
dev.cc.N.txq.M.kern_tls_options sysctls.
Unlike TOE TLS which is able to buffer encrypted TLS records in
on-card memory to handle retransmits, NIC KTLS must re-encrypt TLS
records for retransmit requests as well as non-retransmit requests
that do not include the start of a TLS record but do include the
trailer. The T6 NIC KTLS code tries to optimize some of the cases for
requests to transmit partial TLS records. In particular it attempts
to minimize sending "waste" bytes that have to be given as input to
the crypto engine but are not needed on the wire to satisfy mbufs sent
from the TCP stack down to the driver.
TCP packets for TLS requests are broken down into the following
classes (with associated counters):
- Mbufs that send an entire TLS record in full do not have any waste
bytes (dev.cc.N.txq.M.kern_tls_full).
- Mbufs that send a short TLS record that ends before the end of the
trailer (dev.cc.N.txq.M.kern_tls_short). For sockets using AES-CBC,
the encryption must always start at the beginning, so if the mbuf
starts at an offset into the TLS record, the offset bytes will be
"waste" bytes. For sockets using AES-GCM, the encryption can start
at the 16 byte block before the starting offset capping the waste at
15 bytes.
- Mbufs that send a partial TLS record that has a non-zero starting
offset but ends at the end of the trailer
(dev.cc.N.txq.M.kern_tls_partial). In order to compute the
authentication hash stored in the trailer, the entire TLS record
must be sent as input to the crypto engine, so the bytes before the
offset are always "waste" bytes.
In addition, other per-txq sysctls are provided:
- dev.cc.N.txq.M.kern_tls_cbc: Count of sockets sent via this txq
using AES-CBC.
- dev.cc.N.txq.M.kern_tls_gcm: Count of sockets sent via this txq
using AES-GCM.
- dev.cc.N.txq.M.kern_tls_fin: Count of empty FIN-only packets sent to
compensate for the TOE engine not being able to set FIN on the last
segment of a TLS record if the TLS record mbuf had FIN set.
- dev.cc.N.txq.M.kern_tls_records: Count of TLS records sent via this
txq including full, short, and partial records.
- dev.cc.N.txq.M.kern_tls_octets: Count of non-waste bytes (TLS header
and payload) sent for TLS record requests.
- dev.cc.N.txq.M.kern_tls_waste: Count of waste bytes sent for TLS
record requests.
To enable NIC KTLS with T6, set the following tunables prior to
loading the cxgbe(4) driver:
hw.cxgbe.config_file=kern_tls
hw.cxgbe.kern_tls=1
Reviewed by: np
Sponsored by: Chelsio Communications
Differential Revision: https://reviews.freebsd.org/D21962
2019-11-21 19:30:31 +00:00
|
|
|
if (e->state >= L2T_STATE_SWITCHING)
|
2011-12-16 02:09:51 +00:00
|
|
|
ip[0] = 0;
|
2013-01-14 20:36:22 +00:00
|
|
|
else {
|
|
|
|
inet_ntop(e->ipv6 ? AF_INET6 : AF_INET, &e->addr[0],
|
|
|
|
&ip[0], sizeof(ip));
|
|
|
|
}
|
2011-12-16 02:09:51 +00:00
|
|
|
|
2013-01-14 20:36:22 +00:00
|
|
|
/*
|
|
|
|
* XXX: IPv6 addresses may not align properly in the output.
|
|
|
|
*/
|
2011-12-16 02:09:51 +00:00
|
|
|
sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d"
|
|
|
|
" %u %2u %c %5u %s",
|
|
|
|
e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2],
|
|
|
|
e->dmac[3], e->dmac[4], e->dmac[5],
|
|
|
|
e->vlan & 0xfff, vlan_prio(e), e->lport,
|
|
|
|
l2e_state(e), atomic_load_acq_int(&e->refcnt),
|
2016-07-01 23:18:49 +00:00
|
|
|
e->ifp ? e->ifp->if_xname : "-");
|
2011-12-16 02:09:51 +00:00
|
|
|
skip:
|
|
|
|
mtx_unlock(&e->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = sbuf_finish(sb);
|
|
|
|
sbuf_delete(sb);
|
|
|
|
|
|
|
|
return (rc);
|
|
|
|
}
|