freebsd-dev/sys/dev/cxgbe/offload.h
John Baldwin bddf73433e NIC KTLS for Chelsio T6 adapters.
This adds support for ifnet (NIC) KTLS using Chelsio T6 adapters.
Unlike the TOE-based KTLS in r353328, NIC TLS works with non-TOE
connections.

NIC KTLS on T6 is not able to use the normal TSO (LSO) path to segment
the encrypted TLS frames output by the crypto engine.  Instead, the
TOE is placed into a special setup to permit "dummy" connections to be
associated with regular sockets using KTLS.  This permits using the
TOE to segment the encrypted TLS records.  However, this approach does
have some limitations:

1) Regular TOE sockets cannot be used when the TOE is in this special
   mode.  One can use either TOE and TOE-based KTLS or NIC KTLS, but
   not both at the same time.

2) In NIC KTLS mode, the TOE is only able to accept a per-connection
   timestamp offset that varies in the upper 4 bits.  Put another way,
   only connections whose timestamp offset has the 28 lower bits
   cleared can use NIC KTLS and generate correct timestamps.  The
   driver will refuse to enable NIC KTLS on connections with a
   timestamp offset with any of the lower 28 bits set.  To use NIC
   KTLS, users can either disable TCP timestamps by setting the
   net.inet.tcp.rfc1323 sysctl to 0, or apply a local patch to the
   tcp_new_ts_offset() function to clear the lower 28 bits of the
   generated offset.

3) Because the TCP segmentation relies on fields mirrored in a TCB in
   the TOE, not all fields in a TCP packet can be sent in the TCP
   segments generated from a TLS record.  Specifically, for packets
   containing TCP options other than timestamps, the driver will
   inject an "empty" TCP packet holding the requested options (e.g. a
   SACK scoreboard) along with the segments from the TLS record.
   These empty TCP packets are counted by the
   dev.cc.N.txq.M.kern_tls_options sysctls.

Unlike TOE TLS which is able to buffer encrypted TLS records in
on-card memory to handle retransmits, NIC KTLS must re-encrypt TLS
records for retransmit requests as well as non-retransmit requests
that do not include the start of a TLS record but do include the
trailer.  The T6 NIC KTLS code tries to optimize some of the cases for
requests to transmit partial TLS records.  In particular it attempts
to minimize sending "waste" bytes that have to be given as input to
the crypto engine but are not needed on the wire to satisfy mbufs sent
from the TCP stack down to the driver.

TCP packets for TLS requests are broken down into the following
classes (with associated counters):

- Mbufs that send an entire TLS record in full do not have any waste
  bytes (dev.cc.N.txq.M.kern_tls_full).

- Mbufs that send a short TLS record that ends before the end of the
  trailer (dev.cc.N.txq.M.kern_tls_short).  For sockets using AES-CBC,
  the encryption must always start at the beginning, so if the mbuf
  starts at an offset into the TLS record, the offset bytes will be
  "waste" bytes.  For sockets using AES-GCM, the encryption can start
  at the 16 byte block before the starting offset capping the waste at
  15 bytes.

- Mbufs that send a partial TLS record that has a non-zero starting
  offset but ends at the end of the trailer
  (dev.cc.N.txq.M.kern_tls_partial).  In order to compute the
  authentication hash stored in the trailer, the entire TLS record
  must be sent as input to the crypto engine, so the bytes before the
  offset are always "waste" bytes.

In addition, other per-txq sysctls are provided:

- dev.cc.N.txq.M.kern_tls_cbc: Count of sockets sent via this txq
  using AES-CBC.

- dev.cc.N.txq.M.kern_tls_gcm: Count of sockets sent via this txq
  using AES-GCM.

- dev.cc.N.txq.M.kern_tls_fin: Count of empty FIN-only packets sent to
  compensate for the TOE engine not being able to set FIN on the last
  segment of a TLS record if the TLS record mbuf had FIN set.

- dev.cc.N.txq.M.kern_tls_records: Count of TLS records sent via this
  txq including full, short, and partial records.

- dev.cc.N.txq.M.kern_tls_octets: Count of non-waste bytes (TLS header
  and payload) sent for TLS record requests.

- dev.cc.N.txq.M.kern_tls_waste: Count of waste bytes sent for TLS
  record requests.

To enable NIC KTLS with T6, set the following tunables prior to
loading the cxgbe(4) driver:

hw.cxgbe.config_file=kern_tls
hw.cxgbe.kern_tls=1

Reviewed by:	np
Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D21962
2019-11-21 19:30:31 +00:00

265 lines
6.7 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2010 Chelsio Communications, Inc.
* All rights reserved.
* Written by: Navdeep Parhar <np@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*
*/
#ifndef __T4_OFFLOAD_H__
#define __T4_OFFLOAD_H__
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/condvar.h>
#define INIT_ULPTX_WRH(w, wrlen, atomic, tid) do { \
(w)->wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
(w)->wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \
V_FW_WR_FLOWID(tid)); \
(w)->wr_lo = cpu_to_be64(0); \
} while (0)
#define INIT_ULPTX_WR(w, wrlen, atomic, tid) \
INIT_ULPTX_WRH(&((w)->wr), wrlen, atomic, tid)
#define INIT_TP_WR(w, tid) do { \
(w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | \
V_FW_WR_IMMDLEN(sizeof(*w) - sizeof(w->wr))); \
(w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(sizeof(*w), 16)) | \
V_FW_WR_FLOWID(tid)); \
(w)->wr.wr_lo = cpu_to_be64(0); \
} while (0)
#define INIT_TP_WR_MIT_CPL(w, cpl, tid) do { \
INIT_TP_WR(w, tid); \
OPCODE_TID(w) = htonl(MK_OPCODE_TID(cpl, tid)); \
} while (0)
TAILQ_HEAD(stid_head, stid_region);
struct listen_ctx;
struct stid_region {
TAILQ_ENTRY(stid_region) link;
u_int used; /* # of stids used by this region */
u_int free; /* # of contiguous stids free right after this region */
};
/*
* Max # of ATIDs. The absolute HW max is 14b (enough for 16K) but we reserve
* the upper 3b for use as a cookie to demux the reply.
*/
#define MAX_ATIDS 2048U
union aopen_entry {
void *data;
union aopen_entry *next;
};
/* cxgbe_rate_tag flags */
enum {
EO_FLOWC_PENDING = (1 << 0), /* flowc needs to be sent */
EO_FLOWC_RPL_PENDING = (1 << 1), /* flowc credits due back */
EO_SND_TAG_REF = (1 << 2), /* kernel has a ref on us */
EO_FLUSH_RPL_PENDING = (1 << 3), /* credit flush rpl due back */
};
struct cxgbe_snd_tag {
struct m_snd_tag com;
int type;
};
struct cxgbe_rate_tag {
struct cxgbe_snd_tag com;
struct adapter *adapter;
u_int flags;
struct mtx lock;
int port_id;
int etid;
struct mbufq pending_tx, pending_fwack;
int plen;
struct sge_wrq *eo_txq;
uint32_t ctrl0;
uint16_t iqid;
int8_t schedcl;
uint64_t max_rate; /* in bytes/s */
uint8_t tx_total; /* total tx WR credits (in 16B units) */
uint8_t tx_credits; /* tx WR credits (in 16B units) available */
uint8_t tx_nocompl; /* tx WR credits since last compl request */
uint8_t ncompl; /* # of completions outstanding. */
};
static inline struct cxgbe_snd_tag *
mst_to_cst(struct m_snd_tag *t)
{
return (__containerof(t, struct cxgbe_snd_tag, com));
}
static inline struct cxgbe_rate_tag *
mst_to_crt(struct m_snd_tag *t)
{
return ((struct cxgbe_rate_tag *)mst_to_cst(t));
}
union etid_entry {
struct cxgbe_rate_tag *cst;
union etid_entry *next;
};
/*
* Holds the size, base address, start, end, etc. of various types of TIDs. The
* tables themselves are allocated dynamically.
*/
struct tid_info {
u_int nstids;
u_int stid_base;
u_int natids;
u_int nftids;
u_int ftid_base;
u_int ftid_end;
u_int nhpftids;
u_int hpftid_base;
u_int hpftid_end;
u_int ntids;
u_int tid_base;
u_int netids;
u_int etid_base;
u_int etid_end;
struct mtx stid_lock __aligned(CACHE_LINE_SIZE);
struct listen_ctx **stid_tab;
u_int stids_in_use;
u_int nstids_free_head; /* # of available stids at the beginning */
struct stid_head stids;
struct mtx atid_lock __aligned(CACHE_LINE_SIZE);
union aopen_entry *atid_tab;
union aopen_entry *afree;
u_int atids_in_use;
/* High priority filters and normal filters share the lock and cv. */
struct mtx ftid_lock __aligned(CACHE_LINE_SIZE);
struct cv ftid_cv;
struct filter_entry *ftid_tab;
struct filter_entry *hpftid_tab;
u_int ftids_in_use;
u_int hpftids_in_use;
/*
* hashfilter and TOE are mutually exclusive and both use ntids and
* tids_in_use. The lock and cv are used only by hashfilter.
*/
struct mtx hftid_lock __aligned(CACHE_LINE_SIZE);
struct cv hftid_cv;
void **tid_tab;
u_int tids_in_use;
void *hftid_hash_4t; /* LIST_HEAD(, filter_entry) *hftid_hash_4t; */
u_long hftid_4t_mask;
void *hftid_hash_tid; /* LIST_HEAD(, filter_entry) *hftid_hash_tid; */
u_long hftid_tid_mask;
struct mtx etid_lock __aligned(CACHE_LINE_SIZE);
union etid_entry *etid_tab;
union etid_entry *efree;
u_int etids_in_use;
};
struct t4_range {
u_int start;
u_int size;
};
struct t4_virt_res { /* virtualized HW resources */
struct t4_range ddp;
struct t4_range iscsi;
struct t4_range stag;
struct t4_range rq;
struct t4_range pbl;
struct t4_range qp;
struct t4_range cq;
struct t4_range srq;
struct t4_range ocq;
struct t4_range l2t;
struct t4_range key;
};
enum {
ULD_TOM = 0,
ULD_IWARP,
ULD_ISCSI,
ULD_MAX = ULD_ISCSI
};
struct adapter;
struct port_info;
struct uld_info {
SLIST_ENTRY(uld_info) link;
int refcount;
int uld_id;
int (*activate)(struct adapter *);
int (*deactivate)(struct adapter *);
};
struct tom_tunables {
int cong_algorithm;
int sndbuf;
int ddp;
int rx_coalesce;
int tls;
int *tls_rx_ports;
int num_tls_rx_ports;
int tx_align;
int tx_zcopy;
int cop_managed_offloading;
int autorcvbuf_inc;
};
/* iWARP driver tunables */
struct iw_tunables {
int wc_en;
};
struct tls_tunables {
int inline_keys;
int combo_wrs;
};
#ifdef TCP_OFFLOAD
int t4_register_uld(struct uld_info *);
int t4_unregister_uld(struct uld_info *);
int t4_activate_uld(struct adapter *, int);
int t4_deactivate_uld(struct adapter *, int);
int uld_active(struct adapter *, int);
#endif
#endif