mlx5: support MTU configuration
Depending on the MTU and whether jumbo frames are enabled, RX queues may switch between SG and non-SG modes for better performance. Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
This commit is contained in:
parent
3ee8444608
commit
cf37ca9563
@ -140,6 +140,7 @@ static const struct eth_dev_ops mlx5_dev_ops = {
|
||||
.tx_queue_release = mlx5_tx_queue_release,
|
||||
.mac_addr_remove = mlx5_mac_addr_remove,
|
||||
.mac_addr_add = mlx5_mac_addr_add,
|
||||
.mtu_set = mlx5_dev_set_mtu,
|
||||
};
|
||||
|
||||
static struct {
|
||||
|
@ -162,6 +162,7 @@ int priv_get_mtu(struct priv *, uint16_t *);
|
||||
int priv_set_flags(struct priv *, unsigned int, unsigned int);
|
||||
int mlx5_dev_configure(struct rte_eth_dev *);
|
||||
void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *);
|
||||
int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t);
|
||||
int mlx5_ibv_device_to_pci_addr(const struct ibv_device *,
|
||||
struct rte_pci_addr *);
|
||||
|
||||
|
@ -346,6 +346,23 @@ priv_get_mtu(struct priv *priv, uint16_t *mtu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set device MTU.
|
||||
*
|
||||
* @param priv
|
||||
* Pointer to private structure.
|
||||
* @param mtu
|
||||
* MTU value to set.
|
||||
*
|
||||
* @return
|
||||
* 0 on success, -1 on failure and errno is set.
|
||||
*/
|
||||
static int
|
||||
priv_set_mtu(struct priv *priv, uint16_t mtu)
|
||||
{
|
||||
return priv_set_sysfs_ulong(priv, "mtu", mtu);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set device flags.
|
||||
*
|
||||
@ -517,6 +534,91 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
|
||||
priv_unlock(priv);
|
||||
}
|
||||
|
||||
/**
|
||||
* DPDK callback to change the MTU.
|
||||
*
|
||||
* Setting the MTU affects hardware MRU (packets larger than the MTU cannot be
|
||||
* received). Use this as a hint to enable/disable scattered packets support
|
||||
* and improve performance when not needed.
|
||||
* Since failure is not an option, reconfiguring queues on the fly is not
|
||||
* recommended.
|
||||
*
|
||||
* @param dev
|
||||
* Pointer to Ethernet device structure.
|
||||
* @param in_mtu
|
||||
* New MTU.
|
||||
*
|
||||
* @return
|
||||
* 0 on success, negative errno value on failure.
|
||||
*/
|
||||
int
|
||||
mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
|
||||
{
|
||||
struct priv *priv = dev->data->dev_private;
|
||||
int ret = 0;
|
||||
unsigned int i;
|
||||
uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) =
|
||||
mlx5_rx_burst;
|
||||
|
||||
priv_lock(priv);
|
||||
/* Set kernel interface MTU first. */
|
||||
if (priv_set_mtu(priv, mtu)) {
|
||||
ret = errno;
|
||||
WARN("cannot set port %u MTU to %u: %s", priv->port, mtu,
|
||||
strerror(ret));
|
||||
goto out;
|
||||
} else
|
||||
DEBUG("adapter port %u MTU set to %u", priv->port, mtu);
|
||||
priv->mtu = mtu;
|
||||
/* Temporarily replace RX handler with a fake one, assuming it has not
|
||||
* been copied elsewhere. */
|
||||
dev->rx_pkt_burst = removed_rx_burst;
|
||||
/* Make sure everyone has left mlx5_rx_burst() and uses
|
||||
* removed_rx_burst() instead. */
|
||||
rte_wmb();
|
||||
usleep(1000);
|
||||
/* Reconfigure each RX queue. */
|
||||
for (i = 0; (i != priv->rxqs_n); ++i) {
|
||||
struct rxq *rxq = (*priv->rxqs)[i];
|
||||
unsigned int max_frame_len;
|
||||
int sp;
|
||||
|
||||
if (rxq == NULL)
|
||||
continue;
|
||||
/* Calculate new maximum frame length according to MTU and
|
||||
* toggle scattered support (sp) if necessary. */
|
||||
max_frame_len = (priv->mtu + ETHER_HDR_LEN +
|
||||
(ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN));
|
||||
sp = (max_frame_len > (rxq->mb_len - RTE_PKTMBUF_HEADROOM));
|
||||
/* Provide new values to rxq_setup(). */
|
||||
dev->data->dev_conf.rxmode.jumbo_frame = sp;
|
||||
dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len;
|
||||
ret = rxq_rehash(dev, rxq);
|
||||
if (ret) {
|
||||
/* Force SP RX if that queue requires it and abort. */
|
||||
if (rxq->sp)
|
||||
rx_func = mlx5_rx_burst_sp;
|
||||
break;
|
||||
}
|
||||
/* Reenable non-RSS queue attributes. No need to check
|
||||
* for errors at this stage. */
|
||||
if (!priv->rss) {
|
||||
if (priv->started)
|
||||
rxq_mac_addrs_add(rxq);
|
||||
}
|
||||
/* Scattered burst function takes priority. */
|
||||
if (rxq->sp)
|
||||
rx_func = mlx5_rx_burst_sp;
|
||||
}
|
||||
/* Burst functions can now be called again. */
|
||||
rte_wmb();
|
||||
dev->rx_pkt_burst = rx_func;
|
||||
out:
|
||||
priv_unlock(priv);
|
||||
assert(ret >= 0);
|
||||
return -ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get PCI information from struct ibv_device.
|
||||
*
|
||||
|
@ -525,6 +525,184 @@ rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
|
||||
|
||||
#endif /* RSS_SUPPORT */
|
||||
|
||||
/**
|
||||
* Reconfigure a RX queue with new parameters.
|
||||
*
|
||||
* rxq_rehash() does not allocate mbufs, which, if not done from the right
|
||||
* thread (such as a control thread), may corrupt the pool.
|
||||
* In case of failure, the queue is left untouched.
|
||||
*
|
||||
* @param dev
|
||||
* Pointer to Ethernet device structure.
|
||||
* @param rxq
|
||||
* RX queue pointer.
|
||||
*
|
||||
* @return
|
||||
* 0 on success, errno value on failure.
|
||||
*/
|
||||
int
|
||||
rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
|
||||
{
|
||||
struct priv *priv = rxq->priv;
|
||||
struct rxq tmpl = *rxq;
|
||||
unsigned int mbuf_n;
|
||||
unsigned int desc_n;
|
||||
struct rte_mbuf **pool;
|
||||
unsigned int i, k;
|
||||
struct ibv_exp_qp_attr mod;
|
||||
struct ibv_recv_wr *bad_wr;
|
||||
int err;
|
||||
int parent = (rxq == &priv->rxq_parent);
|
||||
|
||||
if (parent) {
|
||||
ERROR("%p: cannot rehash parent queue %p",
|
||||
(void *)dev, (void *)rxq);
|
||||
return EINVAL;
|
||||
}
|
||||
DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
|
||||
/* Number of descriptors and mbufs currently allocated. */
|
||||
desc_n = (tmpl.elts_n * (tmpl.sp ? MLX5_PMD_SGE_WR_N : 1));
|
||||
mbuf_n = desc_n;
|
||||
/* Enable scattered packets support for this queue if necessary. */
|
||||
if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
|
||||
(dev->data->dev_conf.rxmode.max_rx_pkt_len >
|
||||
(tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
|
||||
tmpl.sp = 1;
|
||||
desc_n /= MLX5_PMD_SGE_WR_N;
|
||||
} else
|
||||
tmpl.sp = 0;
|
||||
DEBUG("%p: %s scattered packets support (%u WRs)",
|
||||
(void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc_n);
|
||||
/* If scatter mode is the same as before, nothing to do. */
|
||||
if (tmpl.sp == rxq->sp) {
|
||||
DEBUG("%p: nothing to do", (void *)dev);
|
||||
return 0;
|
||||
}
|
||||
/* Remove attached flows if RSS is disabled (no parent queue). */
|
||||
if (!priv->rss) {
|
||||
rxq_mac_addrs_del(&tmpl);
|
||||
/* Update original queue in case of failure. */
|
||||
memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
|
||||
}
|
||||
/* From now on, any failure will render the queue unusable.
|
||||
* Reinitialize QP. */
|
||||
mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
|
||||
err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
|
||||
if (err) {
|
||||
ERROR("%p: cannot reset QP: %s", (void *)dev, strerror(err));
|
||||
assert(err > 0);
|
||||
return err;
|
||||
}
|
||||
err = ibv_resize_cq(tmpl.cq, desc_n);
|
||||
if (err) {
|
||||
ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
|
||||
assert(err > 0);
|
||||
return err;
|
||||
}
|
||||
mod = (struct ibv_exp_qp_attr){
|
||||
/* Move the QP to this state. */
|
||||
.qp_state = IBV_QPS_INIT,
|
||||
/* Primary port number. */
|
||||
.port_num = priv->port
|
||||
};
|
||||
err = ibv_exp_modify_qp(tmpl.qp, &mod,
|
||||
(IBV_EXP_QP_STATE |
|
||||
#ifdef RSS_SUPPORT
|
||||
(parent ? IBV_EXP_QP_GROUP_RSS : 0) |
|
||||
#endif /* RSS_SUPPORT */
|
||||
IBV_EXP_QP_PORT));
|
||||
if (err) {
|
||||
ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
|
||||
(void *)dev, strerror(err));
|
||||
assert(err > 0);
|
||||
return err;
|
||||
};
|
||||
/* Reconfigure flows. Do not care for errors. */
|
||||
if (!priv->rss) {
|
||||
if (priv->started)
|
||||
rxq_mac_addrs_add(&tmpl);
|
||||
/* Update original queue in case of failure. */
|
||||
memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
|
||||
}
|
||||
/* Allocate pool. */
|
||||
pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
|
||||
if (pool == NULL) {
|
||||
ERROR("%p: cannot allocate memory", (void *)dev);
|
||||
return ENOBUFS;
|
||||
}
|
||||
/* Snatch mbufs from original queue. */
|
||||
k = 0;
|
||||
if (rxq->sp) {
|
||||
struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
|
||||
|
||||
for (i = 0; (i != RTE_DIM(*elts)); ++i) {
|
||||
struct rxq_elt_sp *elt = &(*elts)[i];
|
||||
unsigned int j;
|
||||
|
||||
for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
|
||||
assert(elt->bufs[j] != NULL);
|
||||
pool[k++] = elt->bufs[j];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp;
|
||||
|
||||
for (i = 0; (i != RTE_DIM(*elts)); ++i) {
|
||||
struct rxq_elt *elt = &(*elts)[i];
|
||||
struct rte_mbuf *buf = (void *)
|
||||
((uintptr_t)elt->sge.addr -
|
||||
WR_ID(elt->wr.wr_id).offset);
|
||||
|
||||
assert(WR_ID(elt->wr.wr_id).id == i);
|
||||
pool[k++] = buf;
|
||||
}
|
||||
}
|
||||
assert(k == mbuf_n);
|
||||
tmpl.elts_n = 0;
|
||||
tmpl.elts.sp = NULL;
|
||||
assert((void *)&tmpl.elts.sp == (void *)&tmpl.elts.no_sp);
|
||||
err = ((tmpl.sp) ?
|
||||
rxq_alloc_elts_sp(&tmpl, desc_n, pool) :
|
||||
rxq_alloc_elts(&tmpl, desc_n, pool));
|
||||
if (err) {
|
||||
ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
|
||||
rte_free(pool);
|
||||
assert(err > 0);
|
||||
return err;
|
||||
}
|
||||
assert(tmpl.elts_n == desc_n);
|
||||
assert(tmpl.elts.sp != NULL);
|
||||
rte_free(pool);
|
||||
/* Clean up original data. */
|
||||
rxq->elts_n = 0;
|
||||
rte_free(rxq->elts.sp);
|
||||
rxq->elts.sp = NULL;
|
||||
/* Post WRs. */
|
||||
err = ibv_post_recv(tmpl.qp,
|
||||
(tmpl.sp ?
|
||||
&(*tmpl.elts.sp)[0].wr :
|
||||
&(*tmpl.elts.no_sp)[0].wr),
|
||||
&bad_wr);
|
||||
if (err) {
|
||||
ERROR("%p: ibv_post_recv() failed for WR %p: %s",
|
||||
(void *)dev,
|
||||
(void *)bad_wr,
|
||||
strerror(err));
|
||||
goto skip_rtr;
|
||||
}
|
||||
mod = (struct ibv_exp_qp_attr){
|
||||
.qp_state = IBV_QPS_RTR
|
||||
};
|
||||
err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
|
||||
if (err)
|
||||
ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
|
||||
(void *)dev, strerror(err));
|
||||
skip_rtr:
|
||||
*rxq = tmpl;
|
||||
assert(err >= 0);
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure a RX queue.
|
||||
*
|
||||
|
@ -144,6 +144,7 @@ struct txq {
|
||||
/* mlx5_rxq.c */
|
||||
|
||||
void rxq_cleanup(struct rxq *);
|
||||
int rxq_rehash(struct rte_eth_dev *, struct rxq *);
|
||||
int rxq_setup(struct rte_eth_dev *, struct rxq *, uint16_t, unsigned int,
|
||||
const struct rte_eth_rxconf *, struct rte_mempool *);
|
||||
int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
|
||||
|
Loading…
Reference in New Issue
Block a user