mlx5: support MTU configuration

Depending on the MTU and whether jumbo frames are enabled, RX queues may
switch between SG and non-SG modes for better performance.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
This commit is contained in:
Adrien Mazarguil 2015-10-30 19:52:35 +01:00 committed by Thomas Monjalon
parent 3ee8444608
commit cf37ca9563
5 changed files with 283 additions and 0 deletions

View File

@ -140,6 +140,7 @@ static const struct eth_dev_ops mlx5_dev_ops = {
.tx_queue_release = mlx5_tx_queue_release,
.mac_addr_remove = mlx5_mac_addr_remove,
.mac_addr_add = mlx5_mac_addr_add,
.mtu_set = mlx5_dev_set_mtu,
};
static struct {

View File

@ -162,6 +162,7 @@ int priv_get_mtu(struct priv *, uint16_t *);
int priv_set_flags(struct priv *, unsigned int, unsigned int);
int mlx5_dev_configure(struct rte_eth_dev *);
void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *);
int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t);
int mlx5_ibv_device_to_pci_addr(const struct ibv_device *,
struct rte_pci_addr *);

View File

@ -346,6 +346,23 @@ priv_get_mtu(struct priv *priv, uint16_t *mtu)
return 0;
}
/**
* Set device MTU.
*
* @param priv
* Pointer to private structure.
* @param mtu
* MTU value to set.
*
* @return
* 0 on success, -1 on failure and errno is set.
*/
static int
priv_set_mtu(struct priv *priv, uint16_t mtu)
{
return priv_set_sysfs_ulong(priv, "mtu", mtu);
}
/**
* Set device flags.
*
@ -517,6 +534,91 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
priv_unlock(priv);
}
/**
* DPDK callback to change the MTU.
*
* Setting the MTU affects hardware MRU (packets larger than the MTU cannot be
* received). Use this as a hint to enable/disable scattered packets support
* and improve performance when not needed.
* Since failure is not an option, reconfiguring queues on the fly is not
* recommended.
*
* @param dev
* Pointer to Ethernet device structure.
* @param in_mtu
* New MTU.
*
* @return
* 0 on success, negative errno value on failure.
*/
int
mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
{
struct priv *priv = dev->data->dev_private;
int ret = 0;
unsigned int i;
uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) =
mlx5_rx_burst;
priv_lock(priv);
/* Set kernel interface MTU first. */
if (priv_set_mtu(priv, mtu)) {
ret = errno;
WARN("cannot set port %u MTU to %u: %s", priv->port, mtu,
strerror(ret));
goto out;
} else
DEBUG("adapter port %u MTU set to %u", priv->port, mtu);
priv->mtu = mtu;
/* Temporarily replace RX handler with a fake one, assuming it has not
* been copied elsewhere. */
dev->rx_pkt_burst = removed_rx_burst;
/* Make sure everyone has left mlx5_rx_burst() and uses
* removed_rx_burst() instead. */
rte_wmb();
usleep(1000);
/* Reconfigure each RX queue. */
for (i = 0; (i != priv->rxqs_n); ++i) {
struct rxq *rxq = (*priv->rxqs)[i];
unsigned int max_frame_len;
int sp;
if (rxq == NULL)
continue;
/* Calculate new maximum frame length according to MTU and
* toggle scattered support (sp) if necessary. */
max_frame_len = (priv->mtu + ETHER_HDR_LEN +
(ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN));
sp = (max_frame_len > (rxq->mb_len - RTE_PKTMBUF_HEADROOM));
/* Provide new values to rxq_setup(). */
dev->data->dev_conf.rxmode.jumbo_frame = sp;
dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len;
ret = rxq_rehash(dev, rxq);
if (ret) {
/* Force SP RX if that queue requires it and abort. */
if (rxq->sp)
rx_func = mlx5_rx_burst_sp;
break;
}
/* Reenable non-RSS queue attributes. No need to check
* for errors at this stage. */
if (!priv->rss) {
if (priv->started)
rxq_mac_addrs_add(rxq);
}
/* Scattered burst function takes priority. */
if (rxq->sp)
rx_func = mlx5_rx_burst_sp;
}
/* Burst functions can now be called again. */
rte_wmb();
dev->rx_pkt_burst = rx_func;
out:
priv_unlock(priv);
assert(ret >= 0);
return -ret;
}
/**
* Get PCI information from struct ibv_device.
*

View File

@ -525,6 +525,184 @@ rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
#endif /* RSS_SUPPORT */
/**
* Reconfigure a RX queue with new parameters.
*
* rxq_rehash() does not allocate mbufs, which, if not done from the right
* thread (such as a control thread), may corrupt the pool.
* In case of failure, the queue is left untouched.
*
* @param dev
* Pointer to Ethernet device structure.
* @param rxq
* RX queue pointer.
*
* @return
* 0 on success, errno value on failure.
*/
int
rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
{
struct priv *priv = rxq->priv;
struct rxq tmpl = *rxq;
unsigned int mbuf_n;
unsigned int desc_n;
struct rte_mbuf **pool;
unsigned int i, k;
struct ibv_exp_qp_attr mod;
struct ibv_recv_wr *bad_wr;
int err;
int parent = (rxq == &priv->rxq_parent);
if (parent) {
ERROR("%p: cannot rehash parent queue %p",
(void *)dev, (void *)rxq);
return EINVAL;
}
DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
/* Number of descriptors and mbufs currently allocated. */
desc_n = (tmpl.elts_n * (tmpl.sp ? MLX5_PMD_SGE_WR_N : 1));
mbuf_n = desc_n;
/* Enable scattered packets support for this queue if necessary. */
if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
(dev->data->dev_conf.rxmode.max_rx_pkt_len >
(tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
tmpl.sp = 1;
desc_n /= MLX5_PMD_SGE_WR_N;
} else
tmpl.sp = 0;
DEBUG("%p: %s scattered packets support (%u WRs)",
(void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc_n);
/* If scatter mode is the same as before, nothing to do. */
if (tmpl.sp == rxq->sp) {
DEBUG("%p: nothing to do", (void *)dev);
return 0;
}
/* Remove attached flows if RSS is disabled (no parent queue). */
if (!priv->rss) {
rxq_mac_addrs_del(&tmpl);
/* Update original queue in case of failure. */
memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
}
/* From now on, any failure will render the queue unusable.
* Reinitialize QP. */
mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
if (err) {
ERROR("%p: cannot reset QP: %s", (void *)dev, strerror(err));
assert(err > 0);
return err;
}
err = ibv_resize_cq(tmpl.cq, desc_n);
if (err) {
ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
assert(err > 0);
return err;
}
mod = (struct ibv_exp_qp_attr){
/* Move the QP to this state. */
.qp_state = IBV_QPS_INIT,
/* Primary port number. */
.port_num = priv->port
};
err = ibv_exp_modify_qp(tmpl.qp, &mod,
(IBV_EXP_QP_STATE |
#ifdef RSS_SUPPORT
(parent ? IBV_EXP_QP_GROUP_RSS : 0) |
#endif /* RSS_SUPPORT */
IBV_EXP_QP_PORT));
if (err) {
ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
(void *)dev, strerror(err));
assert(err > 0);
return err;
};
/* Reconfigure flows. Do not care for errors. */
if (!priv->rss) {
if (priv->started)
rxq_mac_addrs_add(&tmpl);
/* Update original queue in case of failure. */
memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
}
/* Allocate pool. */
pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
if (pool == NULL) {
ERROR("%p: cannot allocate memory", (void *)dev);
return ENOBUFS;
}
/* Snatch mbufs from original queue. */
k = 0;
if (rxq->sp) {
struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
for (i = 0; (i != RTE_DIM(*elts)); ++i) {
struct rxq_elt_sp *elt = &(*elts)[i];
unsigned int j;
for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
assert(elt->bufs[j] != NULL);
pool[k++] = elt->bufs[j];
}
}
} else {
struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp;
for (i = 0; (i != RTE_DIM(*elts)); ++i) {
struct rxq_elt *elt = &(*elts)[i];
struct rte_mbuf *buf = (void *)
((uintptr_t)elt->sge.addr -
WR_ID(elt->wr.wr_id).offset);
assert(WR_ID(elt->wr.wr_id).id == i);
pool[k++] = buf;
}
}
assert(k == mbuf_n);
tmpl.elts_n = 0;
tmpl.elts.sp = NULL;
assert((void *)&tmpl.elts.sp == (void *)&tmpl.elts.no_sp);
err = ((tmpl.sp) ?
rxq_alloc_elts_sp(&tmpl, desc_n, pool) :
rxq_alloc_elts(&tmpl, desc_n, pool));
if (err) {
ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
rte_free(pool);
assert(err > 0);
return err;
}
assert(tmpl.elts_n == desc_n);
assert(tmpl.elts.sp != NULL);
rte_free(pool);
/* Clean up original data. */
rxq->elts_n = 0;
rte_free(rxq->elts.sp);
rxq->elts.sp = NULL;
/* Post WRs. */
err = ibv_post_recv(tmpl.qp,
(tmpl.sp ?
&(*tmpl.elts.sp)[0].wr :
&(*tmpl.elts.no_sp)[0].wr),
&bad_wr);
if (err) {
ERROR("%p: ibv_post_recv() failed for WR %p: %s",
(void *)dev,
(void *)bad_wr,
strerror(err));
goto skip_rtr;
}
mod = (struct ibv_exp_qp_attr){
.qp_state = IBV_QPS_RTR
};
err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
if (err)
ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
(void *)dev, strerror(err));
skip_rtr:
*rxq = tmpl;
assert(err >= 0);
return err;
}
/**
* Configure a RX queue.
*

View File

@ -144,6 +144,7 @@ struct txq {
/* mlx5_rxq.c */
void rxq_cleanup(struct rxq *);
int rxq_rehash(struct rte_eth_dev *, struct rxq *);
int rxq_setup(struct rte_eth_dev *, struct rxq *, uint16_t, unsigned int,
const struct rte_eth_rxconf *, struct rte_mempool *);
int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,