freebsd-nq/sys/dev/cxgbe/offload.h

258 lines
6.7 KiB
C
Raw Normal View History

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2010 Chelsio Communications, Inc.
* All rights reserved.
* Written by: Navdeep Parhar <np@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*
*/
#ifndef __T4_OFFLOAD_H__
#define __T4_OFFLOAD_H__
cxgbe(4): Add support for hash filters. These filters reside in the card's memory instead of its TCAM and can be configured via a new "hashfilter" subcommand in cxgbetool. Hash and normal TCAM filters can be used together. The hardware does an exact-match of packet fields for hash filters, unlike the masked match performed for TCAM filters. Any T5/T6 card with memory can support at least half a million hash filters. The sample config file with the driver configures 512K of these, it is possible to double this to 1 million+ in some cases. The chip does an exact-match of fields of incoming datagrams with hash filters and performs the action configured for the filter if it matches. The fields to match are specified in a "filter mask" in the firmware config file. The filter mask always includes the 5-tuple (sip, dip, sport, dport, ipproto). It can, optionally, also include any subset of the filter mode (see filterMode and filterMask in the firmware config file). For example: filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe filterMask = protocol, port, vlan Exact values of the 5-tuple, the physical port, and VLAN tag would have to be provided while setting up a hash filter with the chip configuration above. Hash filters support all actions supported by TCAM filters. A packet that hits a hash filter can be dropped, let through (with optional steering to a specific queue or RSS region), switched out of another port (with optional L2 rewrite of DMAC, SMAC, VLAN tag), or get NAT'ed. (Support for some of these will show up in the driver in a follow-up commit very shortly). Sponsored by: Chelsio Communications
2018-05-09 04:09:49 +00:00
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/condvar.h>
#define INIT_ULPTX_WRH(w, wrlen, atomic, tid) do { \
(w)->wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
(w)->wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \
V_FW_WR_FLOWID(tid)); \
(w)->wr_lo = cpu_to_be64(0); \
} while (0)
#define INIT_ULPTX_WR(w, wrlen, atomic, tid) \
INIT_ULPTX_WRH(&((w)->wr), wrlen, atomic, tid)
#define INIT_TP_WR(w, tid) do { \
(w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | \
V_FW_WR_IMMDLEN(sizeof(*w) - sizeof(w->wr))); \
(w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(sizeof(*w), 16)) | \
V_FW_WR_FLOWID(tid)); \
(w)->wr.wr_lo = cpu_to_be64(0); \
} while (0)
#define INIT_TP_WR_MIT_CPL(w, cpl, tid) do { \
INIT_TP_WR(w, tid); \
OPCODE_TID(w) = htonl(MK_OPCODE_TID(cpl, tid)); \
} while (0)
TAILQ_HEAD(stid_head, stid_region);
struct listen_ctx;
struct stid_region {
TAILQ_ENTRY(stid_region) link;
u_int used; /* # of stids used by this region */
u_int free; /* # of contiguous stids free right after this region */
};
/*
* Max # of ATIDs. The absolute HW max is 14b (enough for 16K) but we reserve
* the upper 3b for use as a cookie to demux the reply.
*/
#define MAX_ATIDS 2048U
union aopen_entry {
void *data;
union aopen_entry *next;
};
/* cxgbe_rate_tag flags */
enum {
EO_FLOWC_PENDING = (1 << 0), /* flowc needs to be sent */
EO_FLOWC_RPL_PENDING = (1 << 1), /* flowc credits due back */
EO_SND_TAG_REF = (1 << 2), /* kernel has a ref on us */
EO_FLUSH_RPL_PENDING = (1 << 3), /* credit flush rpl due back */
};
struct cxgbe_snd_tag {
struct m_snd_tag com;
int type;
};
struct cxgbe_rate_tag {
struct cxgbe_snd_tag com;
struct adapter *adapter;
u_int flags;
struct mtx lock;
int port_id;
int etid;
struct mbufq pending_tx, pending_fwack;
int plen;
struct sge_wrq *eo_txq;
uint32_t ctrl0;
uint16_t iqid;
int8_t schedcl;
uint64_t max_rate; /* in bytes/s */
uint8_t tx_total; /* total tx WR credits (in 16B units) */
uint8_t tx_credits; /* tx WR credits (in 16B units) available */
uint8_t tx_nocompl; /* tx WR credits since last compl request */
uint8_t ncompl; /* # of completions outstanding. */
};
static inline struct cxgbe_snd_tag *
mst_to_cst(struct m_snd_tag *t)
{
return (__containerof(t, struct cxgbe_snd_tag, com));
}
static inline struct cxgbe_rate_tag *
mst_to_crt(struct m_snd_tag *t)
{
return ((struct cxgbe_rate_tag *)mst_to_cst(t));
}
union etid_entry {
struct cxgbe_rate_tag *cst;
union etid_entry *next;
};
/*
* Holds the size, base address, start, end, etc. of various types of TIDs. The
* tables themselves are allocated dynamically.
*/
struct tid_info {
u_int nstids;
u_int stid_base;
u_int natids;
u_int nftids;
u_int ftid_base;
u_int ftid_end;
u_int nhpftids;
u_int hpftid_base;
u_int hpftid_end;
u_int ntids;
u_int tid_base;
u_int netids;
u_int etid_base;
u_int etid_end;
struct mtx stid_lock __aligned(CACHE_LINE_SIZE);
struct listen_ctx **stid_tab;
u_int stids_in_use;
u_int nstids_free_head; /* # of available stids at the beginning */
struct stid_head stids;
struct mtx atid_lock __aligned(CACHE_LINE_SIZE);
union aopen_entry *atid_tab;
union aopen_entry *afree;
u_int atids_in_use;
/* High priority filters and normal filters share the lock and cv. */
struct mtx ftid_lock __aligned(CACHE_LINE_SIZE);
cxgbe(4): Add support for hash filters. These filters reside in the card's memory instead of its TCAM and can be configured via a new "hashfilter" subcommand in cxgbetool. Hash and normal TCAM filters can be used together. The hardware does an exact-match of packet fields for hash filters, unlike the masked match performed for TCAM filters. Any T5/T6 card with memory can support at least half a million hash filters. The sample config file with the driver configures 512K of these, it is possible to double this to 1 million+ in some cases. The chip does an exact-match of fields of incoming datagrams with hash filters and performs the action configured for the filter if it matches. The fields to match are specified in a "filter mask" in the firmware config file. The filter mask always includes the 5-tuple (sip, dip, sport, dport, ipproto). It can, optionally, also include any subset of the filter mode (see filterMode and filterMask in the firmware config file). For example: filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe filterMask = protocol, port, vlan Exact values of the 5-tuple, the physical port, and VLAN tag would have to be provided while setting up a hash filter with the chip configuration above. Hash filters support all actions supported by TCAM filters. A packet that hits a hash filter can be dropped, let through (with optional steering to a specific queue or RSS region), switched out of another port (with optional L2 rewrite of DMAC, SMAC, VLAN tag), or get NAT'ed. (Support for some of these will show up in the driver in a follow-up commit very shortly). Sponsored by: Chelsio Communications
2018-05-09 04:09:49 +00:00
struct cv ftid_cv;
struct filter_entry *ftid_tab;
struct filter_entry *hpftid_tab;
u_int ftids_in_use;
u_int hpftids_in_use;
/*
* hashfilter and TOE are mutually exclusive and both use ntids and
* tids_in_use. The lock and cv are used only by hashfilter.
*/
cxgbe(4): Add support for hash filters. These filters reside in the card's memory instead of its TCAM and can be configured via a new "hashfilter" subcommand in cxgbetool. Hash and normal TCAM filters can be used together. The hardware does an exact-match of packet fields for hash filters, unlike the masked match performed for TCAM filters. Any T5/T6 card with memory can support at least half a million hash filters. The sample config file with the driver configures 512K of these, it is possible to double this to 1 million+ in some cases. The chip does an exact-match of fields of incoming datagrams with hash filters and performs the action configured for the filter if it matches. The fields to match are specified in a "filter mask" in the firmware config file. The filter mask always includes the 5-tuple (sip, dip, sport, dport, ipproto). It can, optionally, also include any subset of the filter mode (see filterMode and filterMask in the firmware config file). For example: filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe filterMask = protocol, port, vlan Exact values of the 5-tuple, the physical port, and VLAN tag would have to be provided while setting up a hash filter with the chip configuration above. Hash filters support all actions supported by TCAM filters. A packet that hits a hash filter can be dropped, let through (with optional steering to a specific queue or RSS region), switched out of another port (with optional L2 rewrite of DMAC, SMAC, VLAN tag), or get NAT'ed. (Support for some of these will show up in the driver in a follow-up commit very shortly). Sponsored by: Chelsio Communications
2018-05-09 04:09:49 +00:00
struct mtx hftid_lock __aligned(CACHE_LINE_SIZE);
struct cv hftid_cv;
void **tid_tab;
u_int tids_in_use;
cxgbe(4): Add support for hash filters. These filters reside in the card's memory instead of its TCAM and can be configured via a new "hashfilter" subcommand in cxgbetool. Hash and normal TCAM filters can be used together. The hardware does an exact-match of packet fields for hash filters, unlike the masked match performed for TCAM filters. Any T5/T6 card with memory can support at least half a million hash filters. The sample config file with the driver configures 512K of these, it is possible to double this to 1 million+ in some cases. The chip does an exact-match of fields of incoming datagrams with hash filters and performs the action configured for the filter if it matches. The fields to match are specified in a "filter mask" in the firmware config file. The filter mask always includes the 5-tuple (sip, dip, sport, dport, ipproto). It can, optionally, also include any subset of the filter mode (see filterMode and filterMask in the firmware config file). For example: filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe filterMask = protocol, port, vlan Exact values of the 5-tuple, the physical port, and VLAN tag would have to be provided while setting up a hash filter with the chip configuration above. Hash filters support all actions supported by TCAM filters. A packet that hits a hash filter can be dropped, let through (with optional steering to a specific queue or RSS region), switched out of another port (with optional L2 rewrite of DMAC, SMAC, VLAN tag), or get NAT'ed. (Support for some of these will show up in the driver in a follow-up commit very shortly). Sponsored by: Chelsio Communications
2018-05-09 04:09:49 +00:00
void *hftid_hash_4t; /* LIST_HEAD(, filter_entry) *hftid_hash_4t; */
u_long hftid_4t_mask;
void *hftid_hash_tid; /* LIST_HEAD(, filter_entry) *hftid_hash_tid; */
u_long hftid_tid_mask;
struct mtx etid_lock __aligned(CACHE_LINE_SIZE);
union etid_entry *etid_tab;
union etid_entry *efree;
u_int etids_in_use;
};
struct t4_range {
u_int start;
u_int size;
};
struct t4_virt_res { /* virtualized HW resources */
struct t4_range ddp;
struct t4_range iscsi;
struct t4_range stag;
struct t4_range rq;
struct t4_range pbl;
struct t4_range qp;
struct t4_range cq;
struct t4_range srq;
struct t4_range ocq;
struct t4_range l2t;
struct t4_range key;
};
enum {
ULD_TOM = 0,
ULD_IWARP,
ULD_ISCSI,
ULD_MAX = ULD_ISCSI
};
struct adapter;
struct port_info;
struct uld_info {
SLIST_ENTRY(uld_info) link;
int refcount;
int uld_id;
int (*activate)(struct adapter *);
int (*deactivate)(struct adapter *);
};
struct tom_tunables {
int cong_algorithm;
int sndbuf;
int ddp;
int rx_coalesce;
Support for TLS offload of TOE connections on T6 adapters. The TOE engine in Chelsio T6 adapters supports offloading of TLS encryption and TCP segmentation for offloaded connections. Sockets using TLS are required to use a set of custom socket options to upload RX and TX keys to the NIC and to enable RX processing. Currently these socket options are implemented as TCP options in the vendor specific range. A patched OpenSSL library will be made available in a port / package for use with the TLS TOE support. TOE sockets can either offload both transmit and reception of TLS records or just transmit. TLS offload (both RX and TX) is enabled by setting the dev.t6nex.<x>.tls sysctl to 1 and requires TOE to be enabled on the relevant interface. Transmit offload can be used on any "normal" or TLS TOE socket by using the custom socket option to program a transmit key. This permits most TOE sockets to transparently offload TLS when applications use a patched SSL library (e.g. using LD_LIBRARY_PATH to request use of a patched OpenSSL library). Receive offload can only be used with TOE sockets using the TLS mode. The dev.t6nex.0.toe.tls_rx_ports sysctl can be set to a list of TCP port numbers. Any connection with either a local or remote port number in that list will be created as a TLS socket rather than a plain TOE socket. Note that although this sysctl accepts an arbitrary list of port numbers, the sysctl(8) tool is only able to set sysctl nodes to a single value. A TLS socket will hang without receiving data if used by an application that is not using a patched SSL library. Thus, the tls_rx_ports node should be used with care. For a server mostly concerned with offloading TLS transmit, this node is not needed as plain TOE sockets will fall back to software crypto when using an unpatched SSL library. New per-interface statistics nodes are added giving counts of TLS packets and payload bytes (payload bytes do not include TLS headers or authentication tags/MACs) offloaded via the TOE engine, e.g.: dev.cc.0.stats.rx_tls_octets: 149 dev.cc.0.stats.rx_tls_records: 13 dev.cc.0.stats.tx_tls_octets: 26501823 dev.cc.0.stats.tx_tls_records: 1620 TLS transmit work requests are constructed by a new variant of t4_push_frames() called t4_push_tls_records() in tom/t4_tls.c. TLS transmit work requests require a buffer containing IVs. If the IVs are too large to fit into the work request, a separate buffer is allocated when constructing a work request. This buffer is associated with the transmit descriptor and freed when the descriptor is ACKed by the adapter. Received TLS frames use two new CPL messages. The first message is a CPL_TLS_DATA containing the decryped payload of a single TLS record. The handler places the mbuf containing the received payload on an mbufq in the TOE pcb. The second message is a CPL_RX_TLS_CMP message which includes a copy of the TLS header and indicates if there were any errors. The handler for this message places the TLS header into the socket buffer followed by the saved mbuf with the payload data. Both of these handlers are contained in tom/t4_tls.c. A few routines were exposed from t4_cpl_io.c for use by t4_tls.c including send_rx_credits(), a new send_rx_modulate(), and t4_close_conn(). TLS keys for both transmit and receive are stored in onboard memory in the NIC in the "TLS keys" memory region. In some cases a TLS socket can hang with pending data available in the NIC that is not delivered to the host. As a workaround, TLS sockets are more aggressive about sending CPL_RX_DATA_ACK messages anytime that any data is read from a TLS socket. In addition, a fallback timer will periodically send CPL_RX_DATA_ACK messages to the NIC for connections that are still in the handshake phase. Once the connection has finished the handshake and programmed RX keys via the socket option, the timer is stopped. A new function select_ulp_mode() is used to determine what sub-mode a given TOE socket should use (plain TOE, DDP, or TLS). The existing set_tcpddp_ulp_mode() function has been renamed to set_ulp_mode() and handles initialization of TLS-specific state when necessary in addition to DDP-specific state. Since TLS sockets do not receive individual TCP segments but always receive full TLS records, they can receive more data than is available in the current window (e.g. if a 16k TLS record is received but the socket buffer is itself 16k). To cope with this, just drop the window to 0 when this happens, but track the overage and "eat" the overage as it is read from the socket buffer not opening the window (or adding rx_credits) for the overage bytes. Reviewed by: np (earlier version) Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D14529
2018-03-13 23:05:51 +00:00
int tls;
int *tls_rx_ports;
int num_tls_rx_ports;
int tx_align;
int tx_zcopy;
cxgbe(4): Add support for Connection Offload Policy (aka COP). COP allows fine-grained control on whether to offload a TCP connection using t4_tom, and what settings to apply to a connection selected for offload. t4_tom must still be loaded and IFCAP_TOE must still be enabled for full TCP offload to take place on an interface. The difference is that IFCAP_TOE used to be the only knob and would enable TOE for all new connections on the inteface, but now the driver will also consult the COP, if any, before offloading to the hardware TOE. A policy is a plain text file with any number of rules, one per line. Each rule has a "match" part consisting of a socket-type (L = listen, A = active open, P = passive open, D = don't care) and a pcap-filter(7) expression, and a "settings" part that specifies whether to offload the connection or not and the parameters to use if so. The general format of a rule is: [socket-type] expr => settings Example. See cxgbetool(8) for more information. [L] ip && port http => offload [L] port 443 => !offload [L] port ssh => offload [P] src net 192.168/16 && dst port ssh => offload !nagle !timestamp cong newreno [P] dst port ssh => offload !nagle ecn cong tahoe [P] dst port http => offload [A] dst port 443 => offload tls [A] dst net 192.168/16 => offload !timestamp cong highspeed The driver processes the rules for each new listen, active open, or passive open and stops at the first match. There is an implicit rule at the end of every policy that prohibits offload when no rule in the policy matches: [D] all => !offload This is a reworked and expanded version of a patch submitted by Krishnamraju Eraparaju @ Chelsio. Sponsored by: Chelsio Communications
2018-04-14 19:07:56 +00:00
int cop_managed_offloading;
int autorcvbuf_inc;
};
/* iWARP driver tunables */
struct iw_tunables {
int wc_en;
};
#ifdef TCP_OFFLOAD
int t4_register_uld(struct uld_info *);
int t4_unregister_uld(struct uld_info *);
int t4_activate_uld(struct adapter *, int);
int t4_deactivate_uld(struct adapter *, int);
int uld_active(struct adapter *, int);
#endif
#endif