cxgbe(4): Add support for hash filters.

These filters reside in the card's memory instead of its TCAM and can be
configured via a new "hashfilter" subcommand in cxgbetool.  Hash and
normal TCAM filters can be used together.  The hardware does an
exact-match of packet fields for hash filters, unlike the masked match
performed for TCAM filters.  Any T5/T6 card with memory can support at
least half a million hash filters.  The sample config file with the
driver configures 512K of these, it is possible to double this to 1
million+ in some cases.

The chip does an exact-match of fields of incoming datagrams with hash
filters and performs the action configured for the filter if it matches.
The fields to match are specified in a "filter mask" in the firmware
config file.  The filter mask always includes the 5-tuple (sip, dip,
sport, dport, ipproto).  It can, optionally, also include any subset of
the filter mode (see filterMode and filterMask in the firmware config
file).

For example:
filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe
filterMask = protocol, port, vlan

Exact values of the 5-tuple, the physical port, and VLAN tag would have
to be provided while setting up a hash filter with the chip
configuration above.

Hash filters support all actions supported by TCAM filters.  A packet
that hits a hash filter can be dropped, let through (with optional
steering to a specific queue or RSS region), switched out of another
port (with optional L2 rewrite of DMAC, SMAC, VLAN tag), or get NAT'ed.
(Support for some of these will show up in the driver in a follow-up
commit very shortly).

Sponsored by:	Chelsio Communications
This commit is contained in:
np 2018-05-09 04:09:49 +00:00
parent d595ecd177
commit d85f4bf1d5
11 changed files with 1381 additions and 272 deletions

View File

@ -371,7 +371,7 @@ enum {
CPL_COOKIE_DDP0,
CPL_COOKIE_DDP1,
CPL_COOKIE_TOM,
CPL_COOKIE_AVAILABLE1,
CPL_COOKIE_HASHFILTER,
CPL_COOKIE_AVAILABLE2,
CPL_COOKIE_AVAILABLE3,
@ -1244,6 +1244,9 @@ int get_filter(struct adapter *, struct t4_filter *);
int set_filter(struct adapter *, struct t4_filter *);
int del_filter(struct adapter *, struct t4_filter *);
int t4_filter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *);
int t4_hashfilter_ao_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *);
int t4_hashfilter_tcb_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *);
int t4_del_hashfilter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *);
static inline struct wrqe *
alloc_wrqe(int wr_len, struct sge_wrq *wrq)

View File

@ -370,6 +370,7 @@ struct adapter_params {
resources for TOE operation. */
unsigned int bypass:1; /* this is a bypass card */
unsigned int ethoffload:1;
unsigned int hash_filter:1;
unsigned int ofldq_wr_cred;
unsigned int eo_wr_cred;
@ -458,6 +459,11 @@ static inline int is_ethoffload(const struct adapter *adap)
return adap->params.ethoffload;
}
static inline int is_hashfilter(const struct adapter *adap)
{
return adap->params.hash_filter;
}
static inline int chip_id(struct adapter *adap)
{
return adap->params.chipid;

View File

@ -197,6 +197,30 @@ static inline int act_open_has_tid(int status)
status != CPL_ERR_CONN_EXIST);
}
/*
* Convert an ACT_OPEN_RPL status to an errno.
*/
static inline int
act_open_rpl_status_to_errno(int status)
{
switch (status) {
case CPL_ERR_CONN_RESET:
return (ECONNREFUSED);
case CPL_ERR_ARP_MISS:
return (EHOSTUNREACH);
case CPL_ERR_CONN_TIMEDOUT:
return (ETIMEDOUT);
case CPL_ERR_TCAM_FULL:
return (EAGAIN);
case CPL_ERR_CONN_EXIST:
return (EAGAIN);
default:
return (EIO);
}
}
enum {
CPL_CONN_POLICY_AUTO = 0,
CPL_CONN_POLICY_ASK = 1,
@ -1040,6 +1064,14 @@ struct cpl_abort_req {
__u8 rsvd2[6];
};
struct cpl_abort_req_core {
union opcode_tid ot;
__be32 rsvd0;
__u8 rsvd1;
__u8 cmd;
__u8 rsvd2[6];
};
struct cpl_abort_rpl_rss {
RSS_HDR
union opcode_tid ot;
@ -1062,6 +1094,14 @@ struct cpl_abort_rpl {
__u8 rsvd2[6];
};
struct cpl_abort_rpl_core {
union opcode_tid ot;
__be32 rsvd0;
__u8 rsvd1;
__u8 cmd;
__u8 rsvd2[6];
};
struct cpl_peer_close {
RSS_HDR
union opcode_tid ot;

View File

@ -0,0 +1,265 @@
# Firmware configuration file.
#
# Global limits (some are hardware limits, others are due to the firmware).
# nvi = 128 virtual interfaces
# niqflint = 1023 ingress queues with freelists and/or interrupts
# nethctrl = 64K Ethernet or ctrl egress queues
# neq = 64K egress queues of all kinds, including freelists
# nexactf = 512 MPS TCAM entries, can oversubscribe.
[global]
rss_glb_config_mode = basicvirtual
rss_glb_config_options = tnlmapen,hashtoeplitz,tnlalllkp
# PL_TIMEOUT register
pl_timeout_value = 200 # the timeout value in units of us
sge_timer_value = 1, 5, 10, 50, 100, 200 # SGE_TIMER_VALUE* in usecs
reg[0x10c4] = 0x20000000/0x20000000 # GK_CONTROL, enable 5th thread
reg[0x7dc0] = 0x0e2f8849 # TP_SHIFT_CNT
#Tick granularities in kbps
tsch_ticks = 100000, 10000, 1000, 10
filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe
filterMask = port, protocol
tp_pmrx = 20, 512
tp_pmrx_pagesize = 16K
# TP number of RX channels (0 = auto)
tp_nrxch = 0
tp_pmtx = 40, 512
tp_pmtx_pagesize = 64K
# TP number of TX channels (0 = auto)
tp_ntxch = 0
# TP OFLD MTUs
tp_mtus = 88, 256, 512, 576, 808, 1024, 1280, 1488, 1500, 2002, 2048, 4096, 4352, 8192, 9000, 9600
# enable TP_OUT_CONFIG.IPIDSPLITMODE and CRXPKTENC
reg[0x7d04] = 0x00010008/0x00010008
# TP_GLOBAL_CONFIG
reg[0x7d08] = 0x00000800/0x00000800 # set IssFromCplEnable
# TP_PC_CONFIG
reg[0x7d48] = 0x00000000/0x00000400 # clear EnableFLMError
# TP_PC_CONFIG2
reg[0x7d4c] = 0x00010000/0x00010000 # set DisableNewPshFlag
# TP_PARA_REG0
reg[0x7d60] = 0x06000000/0x07000000 # set InitCWND to 6
# TP_PARA_REG3
reg[0x7d6c] = 0x28000000/0x28000000 # set EnableTnlCngHdr
# set RxMacCheck (Note:
# Only for hash filter,
# no tcp offload)
# LE_DB_CONFIG
reg[0x19c04] = 0x00000000/0x02040000 # LE IPv4 compression disabled
# EXTN_HASH_IPV4 Disable
# LE_DB_RSP_CODE_0
reg[0x19c74] = 0x00000004/0x0000000f # TCAM_ACTV_HIT = 4
# LE_DB_RSP_CODE_1
reg[0x19c78] = 0x08000000/0x0e000000 # HASH_ACTV_HIT = 4
# LE_DB_HASH_CONFIG
reg[0x19c28] = 0x00800000/0x01f00000 # LE Hash bucket size 8,
# MC configuration
mc_mode_brc[0] = 0 # mc0 - 1: enable BRC, 0: enable RBC
# PFs 0-3. These get 8 MSI/8 MSI-X vectors each. VFs are supported by
# these 4 PFs only.
[function "0"]
nvf = 4
wx_caps = all
r_caps = all
nvi = 2
rssnvi = 2
niqflint = 4
nethctrl = 4
neq = 8
nexactf = 4
cmask = all
pmask = 0x1
[function "1"]
nvf = 4
wx_caps = all
r_caps = all
nvi = 2
rssnvi = 2
niqflint = 4
nethctrl = 4
neq = 8
nexactf = 4
cmask = all
pmask = 0x2
[function "2"]
nvf = 4
wx_caps = all
r_caps = all
nvi = 2
rssnvi = 2
niqflint = 4
nethctrl = 4
neq = 8
nexactf = 4
cmask = all
pmask = 0x4
[function "3"]
nvf = 4
wx_caps = all
r_caps = all
nvi = 2
rssnvi = 2
niqflint = 4
nethctrl = 4
neq = 8
nexactf = 4
cmask = all
pmask = 0x8
# PF4 is the resource-rich PF that the bus/nexus driver attaches to.
# It gets 32 MSI/128 MSI-X vectors.
[function "4"]
wx_caps = all
r_caps = all
nvi = 32
rssnvi = 8
niqflint = 512
nethctrl = 1024
neq = 2048
nqpcq = 8192
nexactf = 456
cmask = all
pmask = all
nclip = 320
# TCAM has 6K cells; each region must start at a multiple of 128 cell.
# Each entry in these categories takes 2 cells each. nhash will use the
# TCAM iff there is room left (that is, the rest don't add up to 3072).
nfilter = 2032
nserver = 512
nhpfilter = 0
nhash = 524288
protocol = nic_hashfilter
tp_l2t = 4096
# PF5 is the SCSI Controller PF. It gets 32 MSI/40 MSI-X vectors.
# Not used right now.
[function "5"]
nvi = 1
rssnvi = 0
# PF6 is the FCoE Controller PF. It gets 32 MSI/40 MSI-X vectors.
# Not used right now.
[function "6"]
nvi = 1
rssnvi = 0
# The following function, 1023, is not an actual PCIE function but is used to
# configure and reserve firmware internal resources that come from the global
# resource pool.
#
[function "1023"]
wx_caps = all
r_caps = all
nvi = 4
rssnvi = 0
cmask = all
pmask = all
nexactf = 8
nfilter = 16
# For Virtual functions, we only allow NIC functionality and we only allow
# access to one port (1 << PF). Note that because of limitations in the
# Scatter Gather Engine (SGE) hardware which checks writes to VF KDOORBELL
# and GTS registers, the number of Ingress and Egress Queues must be a power
# of 2.
#
[function "0/*"]
wx_caps = 0x82
r_caps = 0x86
nvi = 1
rssnvi = 1
niqflint = 2
nethctrl = 2
neq = 4
nexactf = 2
cmask = all
pmask = 0x1
[function "1/*"]
wx_caps = 0x82
r_caps = 0x86
nvi = 1
rssnvi = 1
niqflint = 2
nethctrl = 2
neq = 4
nexactf = 2
cmask = all
pmask = 0x2
[function "2/*"]
wx_caps = 0x82
r_caps = 0x86
nvi = 1
rssnvi = 1
niqflint = 2
nethctrl = 2
neq = 4
nexactf = 2
cmask = all
pmask = 0x1
[function "3/*"]
wx_caps = 0x82
r_caps = 0x86
nvi = 1
rssnvi = 1
niqflint = 2
nethctrl = 2
neq = 4
nexactf = 2
cmask = all
pmask = 0x2
# MPS has 192K buffer space for ingress packets from the wire as well as
# loopback path of the L2 switch.
[port "0"]
dcb = none
#bg_mem = 25
#lpbk_mem = 25
hwm = 60
lwm = 15
dwm = 30
[port "1"]
dcb = none
#bg_mem = 25
#lpbk_mem = 25
hwm = 60
lwm = 15
dwm = 30
[fini]
version = 0x1
checksum = 0xb577311e
#
# $FreeBSD$
#

View File

@ -32,6 +32,9 @@
#ifndef __T4_OFFLOAD_H__
#define __T4_OFFLOAD_H__
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/condvar.h>
#define INIT_ULPTX_WRH(w, wrlen, atomic, tid) do { \
(w)->wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
@ -100,11 +103,17 @@ struct tid_info {
u_int atids_in_use;
struct mtx ftid_lock __aligned(CACHE_LINE_SIZE);
struct cv ftid_cv;
struct filter_entry *ftid_tab;
u_int nftids;
u_int ftid_base;
u_int ftids_in_use;
struct mtx hftid_lock __aligned(CACHE_LINE_SIZE);
struct cv hftid_cv;
void **hftid_tab;
/* ntids, tids_in_use */
struct mtx etid_lock __aligned(CACHE_LINE_SIZE);
struct etid_entry *etid_tab;
u_int netids;

File diff suppressed because it is too large Load Diff

View File

@ -193,6 +193,7 @@ struct t4_filter_specification {
uint32_t hitcnts:1; /* count filter hits in TCB */
uint32_t prio:1; /* filter has priority over active/server */
uint32_t type:1; /* 0 => IPv4, 1 => IPv6 */
uint32_t hash:1; /* 0 => LE TCAM, 1 => Hash */
uint32_t action:2; /* drop, pass, switch */
uint32_t rpttid:1; /* report TID in RSS hash field */
uint32_t dirsteer:1; /* 0 => RSS, 1 => steer to iq */
@ -389,7 +390,7 @@ struct t4_offload_policy {
#define CHELSIO_T4_GET_FILTER_MODE _IOWR('f', T4_GET_FILTER_MODE, uint32_t)
#define CHELSIO_T4_SET_FILTER_MODE _IOW('f', T4_SET_FILTER_MODE, uint32_t)
#define CHELSIO_T4_GET_FILTER _IOWR('f', T4_GET_FILTER, struct t4_filter)
#define CHELSIO_T4_SET_FILTER _IOW('f', T4_SET_FILTER, struct t4_filter)
#define CHELSIO_T4_SET_FILTER _IOWR('f', T4_SET_FILTER, struct t4_filter)
#define CHELSIO_T4_DEL_FILTER _IOW('f', T4_DEL_FILTER, struct t4_filter)
#define CHELSIO_T4_GET_SGE_CONTEXT _IOWR('f', T4_GET_SGE_CONTEXT, \
struct t4_sge_context)

View File

@ -436,7 +436,8 @@ static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
FW_CAPS_CONFIG_SWITCH_EGRESS;
TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed);
static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC;
static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
FW_CAPS_CONFIG_NIC_HASHFILTER;
TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed);
static int t4_toecaps_allowed = -1;
@ -1375,6 +1376,9 @@ t4_detach_common(device_t dev)
free(sc->sge.iqmap, M_CXGBE);
free(sc->sge.eqmap, M_CXGBE);
free(sc->tids.ftid_tab, M_CXGBE);
free(sc->tids.hftid_tab, M_CXGBE);
free(sc->tids.atid_tab, M_CXGBE);
free(sc->tids.tid_tab, M_CXGBE);
free(sc->tt.tls_rx_ports, M_CXGBE);
t4_destroy_dma_tag(sc);
if (mtx_initialized(&sc->sc_lock)) {
@ -1385,8 +1389,16 @@ t4_detach_common(device_t dev)
}
callout_drain(&sc->sfl_callout);
if (mtx_initialized(&sc->tids.ftid_lock))
if (mtx_initialized(&sc->tids.ftid_lock)) {
mtx_destroy(&sc->tids.ftid_lock);
cv_destroy(&sc->tids.ftid_cv);
}
if (mtx_initialized(&sc->tids.hftid_lock)) {
mtx_destroy(&sc->tids.hftid_lock);
cv_destroy(&sc->tids.hftid_cv);
}
if (mtx_initialized(&sc->tids.atid_lock))
mtx_destroy(&sc->tids.atid_lock);
if (mtx_initialized(&sc->sfl_lock))
mtx_destroy(&sc->sfl_lock);
if (mtx_initialized(&sc->ifp_lock))
@ -3477,6 +3489,20 @@ partition_resources(struct adapter *sc, const struct firmware *default_cfg,
LIMIT_CAPS(fcoecaps);
#undef LIMIT_CAPS
if (caps.niccaps & htobe16(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
/*
* TOE and hashfilters are mutually exclusive. It is a config
* file or firmware bug if both are reported as available. Try
* to cope with the situation in non-debug builds by disabling
* TOE.
*/
MPASS(caps.toecaps == 0);
caps.toecaps = 0;
caps.rdmacaps = 0;
caps.iscsicaps = 0;
}
caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
@ -3630,18 +3656,22 @@ get_params__post_init(struct adapter *sc)
READ_CAPS(iscsicaps);
READ_CAPS(fcoecaps);
/*
* The firmware attempts memfree TOE configuration for -SO cards and
* will report toecaps=0 if it runs out of resources (this depends on
* the config file). It may not report 0 for other capabilities
* dependent on the TOE in this case. Set them to 0 here so that the
* driver doesn't bother tracking resources that will never be used.
*/
if (sc->toecaps == 0) {
sc->iscsicaps = 0;
sc->rdmacaps = 0;
}
if (sc->niccaps & FW_CAPS_CONFIG_NIC_HASHFILTER) {
MPASS(chip_id(sc) > CHELSIO_T4);
MPASS(sc->toecaps == 0);
sc->toecaps = 0;
param[0] = FW_PARAM_DEV(NTID);
rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
if (rc != 0) {
device_printf(sc->dev,
"failed to query HASHFILTER parameters: %d.\n", rc);
return (rc);
}
sc->tids.ntids = val[0];
sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
sc->params.hash_filter = 1;
}
if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
param[0] = FW_PARAM_PFVF(ETHOFLD_START);
param[1] = FW_PARAM_PFVF(ETHOFLD_END);
@ -3659,7 +3689,6 @@ get_params__post_init(struct adapter *sc)
sc->params.eo_wr_cred = val[2];
sc->params.ethoffload = 1;
}
if (sc->toecaps) {
/* query offload-related parameters */
param[0] = FW_PARAM_DEV(NTID);
@ -3682,6 +3711,17 @@ get_params__post_init(struct adapter *sc)
sc->vres.ddp.size = val[4] - val[3] + 1;
sc->params.ofldq_wr_cred = val[5];
sc->params.offload = 1;
} else {
/*
* The firmware attempts memfree TOE configuration for -SO cards
* and will report toecaps=0 if it runs out of resources (this
* depends on the config file). It may not report 0 for other
* capabilities dependent on the TOE in this case. Set them to
* 0 here so that the driver doesn't bother tracking resources
* that will never be used.
*/
sc->iscsicaps = 0;
sc->rdmacaps = 0;
}
if (sc->rdmacaps) {
param[0] = FW_PARAM_PFVF(STAG_START);
@ -9895,6 +9935,12 @@ mod_event(module_t mod, int cmd, void *arg)
t4_filter_rpl, CPL_COOKIE_FILTER);
t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL,
do_l2t_write_rpl, CPL_COOKIE_FILTER);
t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL,
t4_hashfilter_ao_rpl, CPL_COOKIE_HASHFILTER);
t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
t4_hashfilter_tcb_rpl, CPL_COOKIE_HASHFILTER);
t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS,
t4_del_hashfilter_rpl, CPL_COOKIE_HASHFILTER);
t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt);
t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt);
sx_init(&t4_list_lock, "T4/T5 adapters");

View File

@ -288,6 +288,7 @@ cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS];
cpl_handler_t set_tcb_rpl_handlers[NUM_CPL_COOKIES];
cpl_handler_t l2t_write_rpl_handlers[NUM_CPL_COOKIES];
cpl_handler_t act_open_rpl_handlers[NUM_CPL_COOKIES];
cpl_handler_t abort_rpl_rss_handlers[NUM_CPL_COOKIES];
void
t4_register_an_handler(an_handler_t h)
@ -384,6 +385,22 @@ act_open_rpl_handler(struct sge_iq *iq, const struct rss_header *rss,
return (act_open_rpl_handlers[cookie](iq, rss, m));
}
static int
abort_rpl_rss_handler(struct sge_iq *iq, const struct rss_header *rss,
struct mbuf *m)
{
struct adapter *sc = iq->adapter;
u_int cookie;
MPASS(m == NULL);
if (is_hashfilter(sc))
cookie = CPL_COOKIE_HASHFILTER;
else
cookie = CPL_COOKIE_TOM;
return (abort_rpl_rss_handlers[cookie](iq, rss, m));
}
static void
t4_init_shared_cpl_handlers(void)
{
@ -391,6 +408,7 @@ t4_init_shared_cpl_handlers(void)
t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl_handler);
t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl_handler);
t4_register_cpl_handler(CPL_ACT_OPEN_RPL, act_open_rpl_handler);
t4_register_cpl_handler(CPL_ABORT_RPL_RSS, abort_rpl_rss_handler);
}
void
@ -413,6 +431,9 @@ t4_register_shared_cpl_handler(int opcode, cpl_handler_t h, int cookie)
case CPL_ACT_OPEN_RPL:
loc = (uintptr_t *)&act_open_rpl_handlers[cookie];
break;
case CPL_ABORT_RPL_RSS:
loc = (uintptr_t *)&abort_rpl_rss_handlers[cookie];
break;
default:
MPASS(0);
return;

View File

@ -109,30 +109,6 @@ do_act_establish(struct sge_iq *iq, const struct rss_header *rss,
return (0);
}
/*
* Convert an ACT_OPEN_RPL status to an errno.
*/
static inline int
act_open_rpl_status_to_errno(int status)
{
switch (status) {
case CPL_ERR_CONN_RESET:
return (ECONNREFUSED);
case CPL_ERR_ARP_MISS:
return (EHOSTUNREACH);
case CPL_ERR_CONN_TIMEDOUT:
return (ETIMEDOUT);
case CPL_ERR_TCAM_FULL:
return (EAGAIN);
case CPL_ERR_CONN_EXIST:
log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n");
return (EAGAIN);
default:
return (EIO);
}
}
void
act_open_failure_cleanup(struct adapter *sc, u_int atid, u_int status)
{

View File

@ -1953,7 +1953,8 @@ t4_init_cpl_io_handlers(void)
t4_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close);
t4_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl);
t4_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req);
t4_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl);
t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl,
CPL_COOKIE_TOM);
t4_register_cpl_handler(CPL_RX_DATA, do_rx_data);
t4_register_cpl_handler(CPL_FW4_ACK, do_fw4_ack);
}