cxgbe(4): Use large clusters for TOE rx queues when TOE+TLS is enabled.

Rx is more efficient within the chip when the receive buffer size
matches the TLS PDU size.

MFC after:	3 days
Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D26127
This commit is contained in:
Navdeep Parhar 2020-08-23 04:16:20 +00:00
parent c5bc28b273
commit 6a59b9940e
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=364497
4 changed files with 68 additions and 11 deletions

View File

@ -246,6 +246,8 @@ struct tp_params {
uint32_t vlan_pri_map;
uint32_t ingress_config;
uint32_t max_rx_pdu;
uint32_t max_tx_pdu;
uint64_t hash_filter_mask;
__be16 err_vec_mask;

View File

@ -9614,7 +9614,7 @@ static void read_filter_mode_and_ingress_config(struct adapter *adap,
int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
{
int chan;
u32 v;
u32 tx_len, rx_len, r, v;
struct tp_params *tpp = &adap->params.tp;
v = t4_read_reg(adap, A_TP_TIMER_RESOLUTION);
@ -9641,6 +9641,21 @@ int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
}
}
rx_len = t4_read_reg(adap, A_TP_PMM_RX_PAGE_SIZE);
tx_len = t4_read_reg(adap, A_TP_PMM_TX_PAGE_SIZE);
r = t4_read_reg(adap, A_TP_PARA_REG2);
rx_len = min(rx_len, G_MAXRXDATA(r));
tx_len = min(tx_len, G_MAXRXDATA(r));
r = t4_read_reg(adap, A_TP_PARA_REG7);
v = min(G_PMMAXXFERLEN0(r), G_PMMAXXFERLEN1(r));
rx_len = min(rx_len, v);
tx_len = min(tx_len, v);
tpp->max_tx_pdu = tx_len;
tpp->max_rx_pdu = rx_len;
return 0;
}

View File

@ -736,6 +736,7 @@ static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
static int sysctl_cpus(SYSCTL_HANDLER_ARGS);
#ifdef TCP_OFFLOAD
static int sysctl_tls(SYSCTL_HANDLER_ARGS);
static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS);
static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
@ -6607,8 +6608,9 @@ t4_sysctls(struct adapter *sc)
CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
sc->tt.tls = 0;
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tls", CTLFLAG_RW,
&sc->tt.tls, 0, "Inline TLS allowed");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls", CTLTYPE_INT |
CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, sysctl_tls, "I",
"Inline TLS allowed");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
@ -9699,6 +9701,37 @@ sysctl_cpus(SYSCTL_HANDLER_ARGS)
}
#ifdef TCP_OFFLOAD
static int
sysctl_tls(SYSCTL_HANDLER_ARGS)
{
struct adapter *sc = arg1;
int i, j, v, rc;
struct vi_info *vi;
v = sc->tt.tls;
rc = sysctl_handle_int(oidp, &v, 0, req);
if (rc != 0 || req->newptr == NULL)
return (rc);
if (v != 0 && !(sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS))
return (ENOTSUP);
rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4stls");
if (rc)
return (rc);
sc->tt.tls = !!v;
for_each_port(sc, i) {
for_each_vi(sc->port[i], j, vi) {
if (vi->flags & VI_INIT_DONE)
t4_update_fl_bufsize(vi->ifp);
}
}
end_synchronized_op(sc, 0);
return (0);
}
static int
sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS)
{

View File

@ -1032,14 +1032,19 @@ t4_teardown_adapter_queues(struct adapter *sc)
return (0);
}
/* Maximum payload that can be delivered with a single iq descriptor */
/* Maximum payload that could arrive with a single iq descriptor. */
static inline int
mtu_to_max_payload(struct adapter *sc, int mtu)
max_rx_payload(struct adapter *sc, struct ifnet *ifp, const bool ofld)
{
int maxp;
/* large enough even when hw VLAN extraction is disabled */
return (sc->params.sge.fl_pktshift + ETHER_HDR_LEN +
ETHER_VLAN_ENCAP_LEN + mtu);
maxp = sc->params.sge.fl_pktshift + ETHER_HDR_LEN +
ETHER_VLAN_ENCAP_LEN + ifp->if_mtu;
if (ofld && sc->tt.tls && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS &&
maxp < sc->params.tp.max_rx_pdu)
maxp = sc->params.tp.max_rx_pdu;
return (maxp);
}
int
@ -1065,7 +1070,7 @@ t4_setup_vi_queues(struct vi_info *vi)
struct ifnet *ifp = vi->ifp;
struct sysctl_oid *oid = device_get_sysctl_tree(vi->dev);
struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
int maxp, mtu = ifp->if_mtu;
int maxp;
/* Interrupt vector to start from (when using multiple vectors) */
intr_idx = vi->first_intr;
@ -1109,7 +1114,7 @@ t4_setup_vi_queues(struct vi_info *vi)
* Allocate rx queues first because a default iqid is required when
* creating a tx queue.
*/
maxp = mtu_to_max_payload(sc, mtu);
maxp = max_rx_payload(sc, ifp, false);
oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq",
CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queues");
for_each_rxq(vi, i, rxq) {
@ -1131,6 +1136,7 @@ t4_setup_vi_queues(struct vi_info *vi)
intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq);
#endif
#ifdef TCP_OFFLOAD
maxp = max_rx_payload(sc, ifp, true);
oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq",
CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queues for offloaded TCP connections");
for_each_ofld_rxq(vi, i, ofld_rxq) {
@ -2144,9 +2150,9 @@ t4_update_fl_bufsize(struct ifnet *ifp)
struct sge_ofld_rxq *ofld_rxq;
#endif
struct sge_fl *fl;
int i, maxp, mtu = ifp->if_mtu;
int i, maxp;
maxp = mtu_to_max_payload(sc, mtu);
maxp = max_rx_payload(sc, ifp, false);
for_each_rxq(vi, i, rxq) {
fl = &rxq->fl;
@ -2156,6 +2162,7 @@ t4_update_fl_bufsize(struct ifnet *ifp)
FL_UNLOCK(fl);
}
#ifdef TCP_OFFLOAD
maxp = max_rx_payload(sc, ifp, true);
for_each_ofld_rxq(vi, i, ofld_rxq) {
fl = &ofld_rxq->fl;