mlx5en: Fix for inlining issues in transmit path

1) Don't exceed the drivers own hardcoded TX inline limit.

The blueflame register size can be much greater than the hardcoded limit
for inlining. Make sure we don't exceed the drivers own limit, because this
also means that the maximum number of TX fragments becomes invalid and
then memory size assumptions in the TX path no longer hold up.

2) Make sure the mlx5_query_min_inline() function returns an error code.

3) Header inlining is required when using TSO.

4) Catch failure to compute inline header size for TSO.

5) Add support for UDP when computing inline header size.

6) Fix for inlining issues with regards to DSCP.

Make sure we inline 4 bytes beyond the ethernet and/or
VLAN header to workaround a hardware bug extracting
the DSCP field from the IPv4/v6 header.

Submitted by:   hselasky@
Approved by:    hselasky (mentor)
MFC after:      1 week
Sponsored by:   Mellanox Technologies
This commit is contained in:
Slava Shwartsman 2018-12-05 14:21:28 +00:00
parent d51ced5fae
commit 3e581cabf0
8 changed files with 260 additions and 115 deletions

View File

@ -222,20 +222,28 @@ int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline);
void mlx5_query_min_inline(struct mlx5_core_dev *mdev,
u8 *min_inline_mode)
int mlx5_query_min_inline(struct mlx5_core_dev *mdev,
u8 *min_inline_mode)
{
int err;
switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) {
case MLX5_CAP_INLINE_MODE_L2:
*min_inline_mode = MLX5_INLINE_MODE_L2;
err = 0;
break;
case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode);
err = mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode);
break;
case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
*min_inline_mode = MLX5_INLINE_MODE_NONE;
err = 0;
break;
default:
err = -EINVAL;
break;
}
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_min_inline);

View File

@ -619,7 +619,9 @@ struct mlx5e_sq {
u32 mkey_be;
u16 max_inline;
u8 min_inline_mode;
u8 vlan_inline_cap;
u8 min_insert_caps;
#define MLX5E_INSERT_VLAN 1
#define MLX5E_INSERT_NON_VLAN 2
/* control path */
struct mlx5_wq_ctrl wq_ctrl;
@ -925,6 +927,7 @@ void mlx5e_drain_sq(struct mlx5e_sq *);
void mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value);
void mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value);
void mlx5e_resume_sq(struct mlx5e_sq *sq);
u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
void mlx5e_update_sq_inline(struct mlx5e_sq *sq);
void mlx5e_refresh_sq_inline(struct mlx5e_priv *priv);
#endif /* _MLX5_EN_H_ */

View File

@ -166,6 +166,7 @@ struct mlx5e_rl_priv_data {
int mlx5e_rl_init(struct mlx5e_priv *priv);
void mlx5e_rl_cleanup(struct mlx5e_priv *priv);
void mlx5e_rl_refresh_sq_inline(struct mlx5e_rl_priv_data *rl);
if_snd_tag_alloc_t mlx5e_rl_snd_tag_alloc;
if_snd_tag_modify_t mlx5e_rl_snd_tag_modify;
if_snd_tag_query_t mlx5e_rl_snd_tag_query;

View File

@ -374,6 +374,12 @@ mlx5e_trust_state_handler(SYSCTL_HANDLER_ARGS)
goto done;
priv->params_ethtool.trust_state = result;
/* update inline mode */
mlx5e_refresh_sq_inline(priv);
#ifdef RATELIMIT
mlx5e_rl_refresh_sq_inline(&priv->rl);
#endif
done:
PRIV_UNLOCK(priv);
return (err);

View File

@ -1126,6 +1126,52 @@ static const char *mlx5e_sq_stats_desc[] = {
MLX5E_SQ_STATS(MLX5E_STATS_DESC)
};
void
mlx5e_update_sq_inline(struct mlx5e_sq *sq)
{
sq->max_inline = sq->priv->params.tx_max_inline;
sq->min_inline_mode = sq->priv->params.tx_min_inline_mode;
/*
* Check if trust state is DSCP or if inline mode is NONE which
* indicates CX-5 or newer hardware.
*/
if (sq->priv->params_ethtool.trust_state != MLX5_QPTS_TRUST_PCP ||
sq->min_inline_mode == MLX5_INLINE_MODE_NONE) {
if (MLX5_CAP_ETH(sq->priv->mdev, wqe_vlan_insert))
sq->min_insert_caps = MLX5E_INSERT_VLAN | MLX5E_INSERT_NON_VLAN;
else
sq->min_insert_caps = MLX5E_INSERT_NON_VLAN;
} else {
sq->min_insert_caps = 0;
}
}
static void
mlx5e_refresh_sq_inline_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
{
int i;
for (i = 0; i != c->num_tc; i++) {
mtx_lock(&c->sq[i].lock);
mlx5e_update_sq_inline(&c->sq[i]);
mtx_unlock(&c->sq[i].lock);
}
}
void
mlx5e_refresh_sq_inline(struct mlx5e_priv *priv)
{
int i;
/* check if channels are closed */
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
return;
for (i = 0; i < priv->params.num_channels; i++)
mlx5e_refresh_sq_inline_sub(priv, priv->channel[i]);
}
static int
mlx5e_create_sq(struct mlx5e_channel *c,
int tc,
@ -1180,9 +1226,8 @@ mlx5e_create_sq(struct mlx5e_channel *c,
sq->ifp = priv->ifp;
sq->priv = priv;
sq->tc = tc;
sq->max_inline = priv->params.tx_max_inline;
sq->min_inline_mode = priv->params.tx_min_inline_mode;
sq->vlan_inline_cap = MLX5_CAP_ETH(mdev, wqe_vlan_insert);
mlx5e_update_sq_inline(sq);
snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
@ -2992,18 +3037,24 @@ mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
static u16
mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
{
int bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
uint32_t bf_buf_size = (1U << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2U;
return bf_buf_size -
sizeof(struct mlx5e_tx_wqe) +
2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
bf_buf_size -= sizeof(struct mlx5e_tx_wqe) - 2;
/* verify against driver hardware limit */
if (bf_buf_size > MLX5E_MAX_TX_INLINE)
bf_buf_size = MLX5E_MAX_TX_INLINE;
return (bf_buf_size);
}
static void
static int
mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
struct mlx5e_priv *priv,
int num_comp_vectors)
{
int err;
/*
* TODO: Consider link speed for setting "log_sq_size",
* "log_rq_size" and "cq_moderation_xxx":
@ -3035,7 +3086,10 @@ mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
priv->params.default_vlan_prio = 0;
priv->counter_set_id = -1;
priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
err = mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
if (err)
return (err);
/*
* hw lro is currently defaulted to off. when it won't anymore we
@ -3058,6 +3112,8 @@ mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
return (0);
}
static int
@ -3297,20 +3353,6 @@ mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
}
}
u8
mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev)
{
u8 min_inline_mode;
min_inline_mode = MLX5_INLINE_MODE_L2;
mlx5_query_min_inline(mdev, &min_inline_mode);
if (min_inline_mode == MLX5_INLINE_MODE_NONE &&
!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
min_inline_mode = MLX5_INLINE_MODE_L2;
return (min_inline_mode);
}
static void
mlx5e_add_hw_stats(struct mlx5e_priv *priv)
{
@ -3590,7 +3632,12 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev)
mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
goto err_free_sysctl;
}
mlx5e_build_ifp_priv(mdev, priv, ncv);
err = mlx5e_build_ifp_priv(mdev, priv, ncv);
if (err) {
mlx5_core_err(mdev, "mlx5e_build_ifp_priv() failed (%d)\n", err);
goto err_free_sysctl;
}
snprintf(unit, sizeof(unit), "mce%u_wq",
device_get_unit(mdev->pdev->dev.bsddev));

View File

@ -137,9 +137,8 @@ mlx5e_rl_create_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq,
sq->mkey_be = cpu_to_be32(priv->mr.key);
sq->ifp = priv->ifp;
sq->priv = priv;
sq->max_inline = priv->params.tx_max_inline;
sq->min_inline_mode = priv->params.tx_min_inline_mode;
sq->vlan_inline_cap = MLX5_CAP_ETH(mdev, wqe_vlan_insert);
mlx5e_update_sq_inline(sq);
return (0);
@ -1233,6 +1232,32 @@ mlx5e_rl_refresh_channel_params(struct mlx5e_rl_priv_data *rl)
return (0);
}
void
mlx5e_rl_refresh_sq_inline(struct mlx5e_rl_priv_data *rl)
{
uint64_t x;
uint64_t y;
for (y = 0; y != rl->param.tx_worker_threads_def; y++) {
struct mlx5e_rl_worker *rlw = rl->workers + y;
for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) {
struct mlx5e_rl_channel *channel;
struct mlx5e_sq *sq;
channel = rlw->channels + x;
sq = channel->sq;
if (sq == NULL)
continue;
mtx_lock(&sq->lock);
mlx5e_update_sq_inline(sq);
mtx_unlock(&sq->lock);
}
}
}
static int
mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *rl, uint64_t value)
{

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -154,49 +154,53 @@ mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb)
}
static inline u16
mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, struct mbuf *mb)
mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb)
{
struct ether_vlan_header *eh;
uint16_t eth_type;
int min_inline;
switch(sq->min_inline_mode) {
case MLX5_INLINE_MODE_NONE:
/*
* When inline mode is NONE, we do not need to copy
* headers into WQEs, except when vlan tag framing is
* requested. Hardware might offload vlan tagging on
* transmit. This is a separate capability, which is
* known to be disabled on ConnectX-5 due to a hardware
* bug RM 931383. If vlan_inline_cap is not present and
* the packet has vlan tag, fall back to inlining.
*/
if ((mb->m_flags & M_VLANTAG) != 0 &&
sq->vlan_inline_cap == 0)
break;
return (0);
case MLX5_INLINE_MODE_L2:
/*
* Due to hardware limitations, when trust mode is
* DSCP, the hardware may request MLX5_INLINE_MODE_L2
* while it really needs all L2 headers and the 4 first
* bytes of the IP header (which include the
* TOS/traffic-class).
*
* To avoid doing a firmware command for querying the
* trust state and parsing the mbuf for doing
* unnecessary checks (VLAN/eth_type) in the fast path,
* we are going for the worth case (22 Bytes) if
* the mb->m_pkthdr.len allows it.
*/
if (mb->m_pkthdr.len > ETHER_HDR_LEN +
ETHER_VLAN_ENCAP_LEN + 4)
return (MIN(sq->max_inline, ETHER_HDR_LEN +
ETHER_VLAN_ENCAP_LEN + 4));
break;
eh = mtod(mb, struct ether_vlan_header *);
if (unlikely(mb->m_len < ETHER_HDR_LEN)) {
goto max_inline;
} else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)))
goto max_inline;
eth_type = ntohs(eh->evl_proto);
min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
} else {
eth_type = ntohs(eh->evl_encap_proto);
min_inline = ETHER_HDR_LEN;
}
return (MIN(sq->max_inline, mb->m_pkthdr.len));
switch (eth_type) {
case ETHERTYPE_IP:
case ETHERTYPE_IPV6:
/*
* Make sure the TOS(IPv4) or traffic class(IPv6)
* field gets inlined. Else the SQ may stall.
*/
min_inline += 4;
break;
default:
goto max_inline;
}
/*
* m_copydata() will be used on the remaining header which
* does not need to reside within the first m_len bytes of
* data:
*/
if (mb->m_pkthdr.len < min_inline)
goto max_inline;
return (min_inline);
max_inline:
return (MIN(mb->m_pkthdr.len, sq->max_inline));
}
static int
mlx5e_get_header_size(struct mbuf *mb)
mlx5e_get_full_header_size(struct mbuf *mb)
{
struct ether_vlan_header *eh;
struct tcphdr *th;
@ -210,31 +214,46 @@ mlx5e_get_header_size(struct mbuf *mb)
if (mb->m_len < ETHER_HDR_LEN)
return (0);
if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
if (mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
return (0);
eth_type = ntohs(eh->evl_proto);
eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
} else {
eth_type = ntohs(eh->evl_encap_proto);
eth_hdr_len = ETHER_HDR_LEN;
}
if (mb->m_len < eth_hdr_len)
return (0);
switch (eth_type) {
case ETHERTYPE_IP:
ip = (struct ip *)(mb->m_data + eth_hdr_len);
if (mb->m_len < eth_hdr_len + sizeof(*ip))
return (0);
if (ip->ip_p != IPPROTO_TCP)
switch (ip->ip_p) {
case IPPROTO_TCP:
ip_hlen = ip->ip_hl << 2;
eth_hdr_len += ip_hlen;
break;
case IPPROTO_UDP:
ip_hlen = ip->ip_hl << 2;
eth_hdr_len += ip_hlen + 8;
goto done;
default:
return (0);
ip_hlen = ip->ip_hl << 2;
eth_hdr_len += ip_hlen;
}
break;
case ETHERTYPE_IPV6:
ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len);
if (mb->m_len < eth_hdr_len + sizeof(*ip6))
return (0);
if (ip6->ip6_nxt != IPPROTO_TCP)
switch (ip6->ip6_nxt) {
case IPPROTO_TCP:
eth_hdr_len += sizeof(*ip6);
break;
case IPPROTO_UDP:
eth_hdr_len += sizeof(*ip6) + 8;
goto done;
default:
return (0);
eth_hdr_len += sizeof(*ip6);
}
break;
default:
return (0);
@ -244,7 +263,13 @@ mlx5e_get_header_size(struct mbuf *mb)
th = (struct tcphdr *)(mb->m_data + eth_hdr_len);
tcp_hlen = th->th_off << 2;
eth_hdr_len += tcp_hlen;
if (mb->m_len < eth_hdr_len)
done:
/*
* m_copydata() will be used on the remaining header which
* does not need to reside within the first m_len bytes of
* data:
*/
if (mb->m_pkthdr.len < eth_hdr_len)
return (0);
return (eth_hdr_len);
}
@ -306,7 +331,11 @@ mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp)
wqe->eth.mss = cpu_to_be16(mss);
opcode = MLX5_OPCODE_LSO;
ihs = mlx5e_get_header_size(mb);
ihs = mlx5e_get_full_header_size(mb);
if (unlikely(ihs == 0)) {
err = EINVAL;
goto tx_drop;
}
payload_len = mb->m_pkthdr.len - ihs;
if (payload_len == 0)
num_pkts = 1;
@ -318,46 +347,72 @@ mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp)
sq->stats.tso_bytes += payload_len;
} else {
opcode = MLX5_OPCODE_SEND;
ihs = mlx5e_get_inline_hdr_size(sq, mb);
switch (sq->min_inline_mode) {
case MLX5_INLINE_MODE_IP:
case MLX5_INLINE_MODE_TCP_UDP:
ihs = mlx5e_get_full_header_size(mb);
if (unlikely(ihs == 0))
ihs = mlx5e_get_l2_header_size(sq, mb);
break;
case MLX5_INLINE_MODE_L2:
ihs = mlx5e_get_l2_header_size(sq, mb);
break;
case MLX5_INLINE_MODE_NONE:
/* FALLTHROUGH */
default:
if ((mb->m_flags & M_VLANTAG) != 0 &&
(sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) {
/* inlining VLAN data is not required */
wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */
wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag);
ihs = 0;
} else if ((mb->m_flags & M_VLANTAG) == 0 &&
(sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) {
/* inlining non-VLAN data is not required */
ihs = 0;
} else {
/* we are forced to inlining L2 header, if any */
ihs = mlx5e_get_l2_header_size(sq, mb);
}
break;
}
sq->mbuf[pi].num_bytes = max_t (unsigned int,
mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
}
if (ihs == 0) {
if ((mb->m_flags & M_VLANTAG) != 0) {
wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */
wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag);
} else {
wqe->eth.inline_hdr_sz = 0;
}
} else {
if ((mb->m_flags & M_VLANTAG) != 0) {
struct ether_vlan_header *eh = (struct ether_vlan_header
*)wqe->eth.inline_hdr_start;
/* Range checks */
if (ihs > (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN))
ihs = (MLX5E_MAX_TX_INLINE -
ETHER_VLAN_ENCAP_LEN);
else if (ihs < ETHER_HDR_LEN) {
err = EINVAL;
goto tx_drop;
}
m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh);
m_adj(mb, ETHER_HDR_LEN);
/* Insert 4 bytes VLAN tag into data stream */
eh->evl_proto = eh->evl_encap_proto;
eh->evl_encap_proto = htons(ETHERTYPE_VLAN);
eh->evl_tag = htons(mb->m_pkthdr.ether_vtag);
/* Copy rest of header data, if any */
m_copydata(mb, 0, ihs - ETHER_HDR_LEN, (caddr_t)(eh +
1));
m_adj(mb, ihs - ETHER_HDR_LEN);
/* Extend header by 4 bytes */
ihs += ETHER_VLAN_ENCAP_LEN;
} else {
m_copydata(mb, 0, ihs, wqe->eth.inline_hdr_start);
m_adj(mb, ihs);
if (likely(ihs == 0)) {
/* nothing to inline */
} else if (unlikely(ihs > sq->max_inline)) {
/* inline header size is too big */
err = EINVAL;
goto tx_drop;
} else if ((mb->m_flags & M_VLANTAG) != 0) {
struct ether_vlan_header *eh = (struct ether_vlan_header *)
wqe->eth.inline_hdr_start;
/* Range checks */
if (unlikely(ihs > (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN)))
ihs = (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN);
else if (unlikely(ihs < ETHER_HDR_LEN)) {
err = EINVAL;
goto tx_drop;
}
m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh);
m_adj(mb, ETHER_HDR_LEN);
/* Insert 4 bytes VLAN tag into data stream */
eh->evl_proto = eh->evl_encap_proto;
eh->evl_encap_proto = htons(ETHERTYPE_VLAN);
eh->evl_tag = htons(mb->m_pkthdr.ether_vtag);
/* Copy rest of header data, if any */
m_copydata(mb, 0, ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1));
m_adj(mb, ihs - ETHER_HDR_LEN);
/* Extend header by 4 bytes */
ihs += ETHER_VLAN_ENCAP_LEN;
wqe->eth.inline_hdr_sz = cpu_to_be16(ihs);
} else {
m_copydata(mb, 0, ihs, wqe->eth.inline_hdr_start);
m_adj(mb, ihs);
wqe->eth.inline_hdr_sz = cpu_to_be16(ihs);
}

View File

@ -88,7 +88,7 @@ int mlx5_set_nic_vport_current_mac(struct mlx5_core_dev *mdev, int vport,
bool other_vport, u8 *addr);
int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
u16 vport, u8 *min_inline);
void mlx5_query_min_inline(struct mlx5_core_dev *mdev, u8 *min_inline);
int mlx5_query_min_inline(struct mlx5_core_dev *mdev, u8 *min_inline);
int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
u16 vport, u8 min_inline);
int mlx5_modify_nic_vport_port_guid(struct mlx5_core_dev *mdev,