478428c5d9
This adds support for ifnet (NIC) KTLS using Chelsio T6 adapters. Unlike the TOE-based KTLS in r353328, NIC TLS works with non-TOE connections. NIC KTLS on T6 is not able to use the normal TSO (LSO) path to segment the encrypted TLS frames output by the crypto engine. Instead, the TOE is placed into a special setup to permit "dummy" connections to be associated with regular sockets using KTLS. This permits using the TOE to segment the encrypted TLS records. However, this approach does have some limitations: 1) Regular TOE sockets cannot be used when the TOE is in this special mode. One can use either TOE and TOE-based KTLS or NIC KTLS, but not both at the same time. 2) In NIC KTLS mode, the TOE is only able to accept a per-connection timestamp offset that varies in the upper 4 bits. Put another way, only connections whose timestamp offset has the 28 lower bits cleared can use NIC KTLS and generate correct timestamps. The driver will refuse to enable NIC KTLS on connections with a timestamp offset with any of the lower 28 bits set. To use NIC KTLS, users can either disable TCP timestamps by setting the net.inet.tcp.rfc1323 sysctl to 0, or apply a local patch to the tcp_new_ts_offset() function to clear the lower 28 bits of the generated offset. 3) Because the TCP segmentation relies on fields mirrored in a TCB in the TOE, not all fields in a TCP packet can be sent in the TCP segments generated from a TLS record. Specifically, for packets containing TCP options other than timestamps, the driver will inject an "empty" TCP packet holding the requested options (e.g. a SACK scoreboard) along with the segments from the TLS record. These empty TCP packets are counted by the dev.cc.N.txq.M.kern_tls_options sysctls. Unlike TOE TLS which is able to buffer encrypted TLS records in on-card memory to handle retransmits, NIC KTLS must re-encrypt TLS records for retransmit requests as well as non-retransmit requests that do not include the start of a TLS record but do include the trailer. The T6 NIC KTLS code tries to optimize some of the cases for requests to transmit partial TLS records. In particular it attempts to minimize sending "waste" bytes that have to be given as input to the crypto engine but are not needed on the wire to satisfy mbufs sent from the TCP stack down to the driver. TCP packets for TLS requests are broken down into the following classes (with associated counters): - Mbufs that send an entire TLS record in full do not have any waste bytes (dev.cc.N.txq.M.kern_tls_full). - Mbufs that send a short TLS record that ends before the end of the trailer (dev.cc.N.txq.M.kern_tls_short). For sockets using AES-CBC, the encryption must always start at the beginning, so if the mbuf starts at an offset into the TLS record, the offset bytes will be "waste" bytes. For sockets using AES-GCM, the encryption can start at the 16 byte block before the starting offset capping the waste at 15 bytes. - Mbufs that send a partial TLS record that has a non-zero starting offset but ends at the end of the trailer (dev.cc.N.txq.M.kern_tls_partial). In order to compute the authentication hash stored in the trailer, the entire TLS record must be sent as input to the crypto engine, so the bytes before the offset are always "waste" bytes. In addition, other per-txq sysctls are provided: - dev.cc.N.txq.M.kern_tls_cbc: Count of sockets sent via this txq using AES-CBC. - dev.cc.N.txq.M.kern_tls_gcm: Count of sockets sent via this txq using AES-GCM. - dev.cc.N.txq.M.kern_tls_fin: Count of empty FIN-only packets sent to compensate for the TOE engine not being able to set FIN on the last segment of a TLS record if the TLS record mbuf had FIN set. - dev.cc.N.txq.M.kern_tls_records: Count of TLS records sent via this txq including full, short, and partial records. - dev.cc.N.txq.M.kern_tls_octets: Count of non-waste bytes (TLS header and payload) sent for TLS record requests. - dev.cc.N.txq.M.kern_tls_waste: Count of waste bytes sent for TLS record requests. To enable NIC KTLS with T6, set the following tunables prior to loading the cxgbe(4) driver: hw.cxgbe.config_file=kern_tls hw.cxgbe.kern_tls=1 Reviewed by: np Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D21962
265 lines
6.7 KiB
C
265 lines
6.7 KiB
C
/*-
|
|
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
|
*
|
|
* Copyright (c) 2010 Chelsio Communications, Inc.
|
|
* All rights reserved.
|
|
* Written by: Navdeep Parhar <np@FreeBSD.org>
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*
|
|
*/
|
|
|
|
#ifndef __T4_OFFLOAD_H__
|
|
#define __T4_OFFLOAD_H__
|
|
#include <sys/param.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/condvar.h>
|
|
|
|
#define INIT_ULPTX_WRH(w, wrlen, atomic, tid) do { \
|
|
(w)->wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
|
|
(w)->wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \
|
|
V_FW_WR_FLOWID(tid)); \
|
|
(w)->wr_lo = cpu_to_be64(0); \
|
|
} while (0)
|
|
|
|
#define INIT_ULPTX_WR(w, wrlen, atomic, tid) \
|
|
INIT_ULPTX_WRH(&((w)->wr), wrlen, atomic, tid)
|
|
|
|
#define INIT_TP_WR(w, tid) do { \
|
|
(w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | \
|
|
V_FW_WR_IMMDLEN(sizeof(*w) - sizeof(w->wr))); \
|
|
(w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(sizeof(*w), 16)) | \
|
|
V_FW_WR_FLOWID(tid)); \
|
|
(w)->wr.wr_lo = cpu_to_be64(0); \
|
|
} while (0)
|
|
|
|
#define INIT_TP_WR_MIT_CPL(w, cpl, tid) do { \
|
|
INIT_TP_WR(w, tid); \
|
|
OPCODE_TID(w) = htonl(MK_OPCODE_TID(cpl, tid)); \
|
|
} while (0)
|
|
|
|
TAILQ_HEAD(stid_head, stid_region);
|
|
struct listen_ctx;
|
|
|
|
struct stid_region {
|
|
TAILQ_ENTRY(stid_region) link;
|
|
u_int used; /* # of stids used by this region */
|
|
u_int free; /* # of contiguous stids free right after this region */
|
|
};
|
|
|
|
/*
|
|
* Max # of ATIDs. The absolute HW max is 14b (enough for 16K) but we reserve
|
|
* the upper 3b for use as a cookie to demux the reply.
|
|
*/
|
|
#define MAX_ATIDS 2048U
|
|
|
|
union aopen_entry {
|
|
void *data;
|
|
union aopen_entry *next;
|
|
};
|
|
|
|
/* cxgbe_rate_tag flags */
|
|
enum {
|
|
EO_FLOWC_PENDING = (1 << 0), /* flowc needs to be sent */
|
|
EO_FLOWC_RPL_PENDING = (1 << 1), /* flowc credits due back */
|
|
EO_SND_TAG_REF = (1 << 2), /* kernel has a ref on us */
|
|
EO_FLUSH_RPL_PENDING = (1 << 3), /* credit flush rpl due back */
|
|
};
|
|
|
|
struct cxgbe_snd_tag {
|
|
struct m_snd_tag com;
|
|
int type;
|
|
};
|
|
|
|
struct cxgbe_rate_tag {
|
|
struct cxgbe_snd_tag com;
|
|
struct adapter *adapter;
|
|
u_int flags;
|
|
struct mtx lock;
|
|
int port_id;
|
|
int etid;
|
|
struct mbufq pending_tx, pending_fwack;
|
|
int plen;
|
|
struct sge_wrq *eo_txq;
|
|
uint32_t ctrl0;
|
|
uint16_t iqid;
|
|
int8_t schedcl;
|
|
uint64_t max_rate; /* in bytes/s */
|
|
uint8_t tx_total; /* total tx WR credits (in 16B units) */
|
|
uint8_t tx_credits; /* tx WR credits (in 16B units) available */
|
|
uint8_t tx_nocompl; /* tx WR credits since last compl request */
|
|
uint8_t ncompl; /* # of completions outstanding. */
|
|
};
|
|
|
|
static inline struct cxgbe_snd_tag *
|
|
mst_to_cst(struct m_snd_tag *t)
|
|
{
|
|
|
|
return (__containerof(t, struct cxgbe_snd_tag, com));
|
|
}
|
|
|
|
static inline struct cxgbe_rate_tag *
|
|
mst_to_crt(struct m_snd_tag *t)
|
|
{
|
|
return ((struct cxgbe_rate_tag *)mst_to_cst(t));
|
|
}
|
|
|
|
union etid_entry {
|
|
struct cxgbe_rate_tag *cst;
|
|
union etid_entry *next;
|
|
};
|
|
|
|
/*
|
|
* Holds the size, base address, start, end, etc. of various types of TIDs. The
|
|
* tables themselves are allocated dynamically.
|
|
*/
|
|
struct tid_info {
|
|
u_int nstids;
|
|
u_int stid_base;
|
|
|
|
u_int natids;
|
|
|
|
u_int nftids;
|
|
u_int ftid_base;
|
|
u_int ftid_end;
|
|
|
|
u_int nhpftids;
|
|
u_int hpftid_base;
|
|
u_int hpftid_end;
|
|
|
|
u_int ntids;
|
|
u_int tid_base;
|
|
|
|
u_int netids;
|
|
u_int etid_base;
|
|
u_int etid_end;
|
|
|
|
struct mtx stid_lock __aligned(CACHE_LINE_SIZE);
|
|
struct listen_ctx **stid_tab;
|
|
u_int stids_in_use;
|
|
u_int nstids_free_head; /* # of available stids at the beginning */
|
|
struct stid_head stids;
|
|
|
|
struct mtx atid_lock __aligned(CACHE_LINE_SIZE);
|
|
union aopen_entry *atid_tab;
|
|
union aopen_entry *afree;
|
|
u_int atids_in_use;
|
|
|
|
/* High priority filters and normal filters share the lock and cv. */
|
|
struct mtx ftid_lock __aligned(CACHE_LINE_SIZE);
|
|
struct cv ftid_cv;
|
|
struct filter_entry *ftid_tab;
|
|
struct filter_entry *hpftid_tab;
|
|
u_int ftids_in_use;
|
|
u_int hpftids_in_use;
|
|
|
|
/*
|
|
* hashfilter and TOE are mutually exclusive and both use ntids and
|
|
* tids_in_use. The lock and cv are used only by hashfilter.
|
|
*/
|
|
struct mtx hftid_lock __aligned(CACHE_LINE_SIZE);
|
|
struct cv hftid_cv;
|
|
void **tid_tab;
|
|
u_int tids_in_use;
|
|
|
|
void *hftid_hash_4t; /* LIST_HEAD(, filter_entry) *hftid_hash_4t; */
|
|
u_long hftid_4t_mask;
|
|
void *hftid_hash_tid; /* LIST_HEAD(, filter_entry) *hftid_hash_tid; */
|
|
u_long hftid_tid_mask;
|
|
|
|
struct mtx etid_lock __aligned(CACHE_LINE_SIZE);
|
|
union etid_entry *etid_tab;
|
|
union etid_entry *efree;
|
|
u_int etids_in_use;
|
|
};
|
|
|
|
struct t4_range {
|
|
u_int start;
|
|
u_int size;
|
|
};
|
|
|
|
struct t4_virt_res { /* virtualized HW resources */
|
|
struct t4_range ddp;
|
|
struct t4_range iscsi;
|
|
struct t4_range stag;
|
|
struct t4_range rq;
|
|
struct t4_range pbl;
|
|
struct t4_range qp;
|
|
struct t4_range cq;
|
|
struct t4_range srq;
|
|
struct t4_range ocq;
|
|
struct t4_range l2t;
|
|
struct t4_range key;
|
|
};
|
|
|
|
enum {
|
|
ULD_TOM = 0,
|
|
ULD_IWARP,
|
|
ULD_ISCSI,
|
|
ULD_MAX = ULD_ISCSI
|
|
};
|
|
|
|
struct adapter;
|
|
struct port_info;
|
|
struct uld_info {
|
|
SLIST_ENTRY(uld_info) link;
|
|
int refcount;
|
|
int uld_id;
|
|
int (*activate)(struct adapter *);
|
|
int (*deactivate)(struct adapter *);
|
|
};
|
|
|
|
struct tom_tunables {
|
|
int cong_algorithm;
|
|
int sndbuf;
|
|
int ddp;
|
|
int rx_coalesce;
|
|
int tls;
|
|
int *tls_rx_ports;
|
|
int num_tls_rx_ports;
|
|
int tx_align;
|
|
int tx_zcopy;
|
|
int cop_managed_offloading;
|
|
int autorcvbuf_inc;
|
|
};
|
|
|
|
/* iWARP driver tunables */
|
|
struct iw_tunables {
|
|
int wc_en;
|
|
};
|
|
|
|
struct tls_tunables {
|
|
int inline_keys;
|
|
int combo_wrs;
|
|
};
|
|
|
|
#ifdef TCP_OFFLOAD
|
|
int t4_register_uld(struct uld_info *);
|
|
int t4_unregister_uld(struct uld_info *);
|
|
int t4_activate_uld(struct adapter *, int);
|
|
int t4_deactivate_uld(struct adapter *, int);
|
|
int uld_active(struct adapter *, int);
|
|
#endif
|
|
#endif
|