mlx5: support RETA query and update
ConnectX-4 adapters do not have a constant indirection table size, which is set at runtime from the number of RX queues. The maximum size is retrieved using a hardware query and is normally 512. Since the current RETA API cannot handle a variable size, any query/update command causes it to be silently updated to RSS_INDIRECTION_TABLE_SIZE entries regardless of the original size. Also due to the underlying type of the configuration structure, the maximum size is limited to RSS_INDIRECTION_TABLE_SIZE (currently 128, at most 256 entries). A port stop/start must be done to apply the new RETA configuration. Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
This commit is contained in:
parent
647d1eaf75
commit
634efbc2c8
drivers/net/mlx5
@ -133,6 +133,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
|
|||||||
rte_free((*priv->rss_conf)[i]);
|
rte_free((*priv->rss_conf)[i]);
|
||||||
rte_free(priv->rss_conf);
|
rte_free(priv->rss_conf);
|
||||||
}
|
}
|
||||||
|
if (priv->reta_idx != NULL)
|
||||||
|
rte_free(priv->reta_idx);
|
||||||
priv_unlock(priv);
|
priv_unlock(priv);
|
||||||
memset(priv, 0, sizeof(*priv));
|
memset(priv, 0, sizeof(*priv));
|
||||||
}
|
}
|
||||||
@ -160,6 +162,8 @@ static const struct eth_dev_ops mlx5_dev_ops = {
|
|||||||
.mac_addr_remove = mlx5_mac_addr_remove,
|
.mac_addr_remove = mlx5_mac_addr_remove,
|
||||||
.mac_addr_add = mlx5_mac_addr_add,
|
.mac_addr_add = mlx5_mac_addr_add,
|
||||||
.mtu_set = mlx5_dev_set_mtu,
|
.mtu_set = mlx5_dev_set_mtu,
|
||||||
|
.reta_update = mlx5_dev_rss_reta_update,
|
||||||
|
.reta_query = mlx5_dev_rss_reta_query,
|
||||||
.rss_hash_update = mlx5_rss_hash_update,
|
.rss_hash_update = mlx5_rss_hash_update,
|
||||||
.rss_hash_conf_get = mlx5_rss_hash_conf_get,
|
.rss_hash_conf_get = mlx5_rss_hash_conf_get,
|
||||||
};
|
};
|
||||||
@ -373,7 +377,9 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
|
|||||||
DEBUG("L2 tunnel checksum offloads are %ssupported",
|
DEBUG("L2 tunnel checksum offloads are %ssupported",
|
||||||
(priv->hw_csum_l2tun ? "" : "not "));
|
(priv->hw_csum_l2tun ? "" : "not "));
|
||||||
|
|
||||||
priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size;
|
priv->ind_table_max_size =
|
||||||
|
RTE_MIN((unsigned int)RSS_INDIRECTION_TABLE_SIZE,
|
||||||
|
exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size);
|
||||||
DEBUG("maximum RX indirection table size is %u",
|
DEBUG("maximum RX indirection table size is %u",
|
||||||
priv->ind_table_max_size);
|
priv->ind_table_max_size);
|
||||||
|
|
||||||
|
@ -118,6 +118,8 @@ struct priv {
|
|||||||
/* RSS configuration array indexed by hash RX queue type. */
|
/* RSS configuration array indexed by hash RX queue type. */
|
||||||
struct rte_eth_rss_conf *(*rss_conf)[];
|
struct rte_eth_rss_conf *(*rss_conf)[];
|
||||||
struct rte_intr_handle intr_handle; /* Interrupt handler. */
|
struct rte_intr_handle intr_handle; /* Interrupt handler. */
|
||||||
|
unsigned int (*reta_idx)[]; /* RETA index table. */
|
||||||
|
unsigned int reta_idx_n; /* RETA index size. */
|
||||||
rte_spinlock_t lock; /* Lock for control functions. */
|
rte_spinlock_t lock; /* Lock for control functions. */
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -184,6 +186,11 @@ int rss_hash_rss_conf_new_key(struct priv *, const uint8_t *, unsigned int,
|
|||||||
uint64_t);
|
uint64_t);
|
||||||
int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *);
|
int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *);
|
||||||
int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *);
|
int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *);
|
||||||
|
int priv_rss_reta_index_resize(struct priv *, unsigned int);
|
||||||
|
int mlx5_dev_rss_reta_query(struct rte_eth_dev *,
|
||||||
|
struct rte_eth_rss_reta_entry64 *, uint16_t);
|
||||||
|
int mlx5_dev_rss_reta_update(struct rte_eth_dev *,
|
||||||
|
struct rte_eth_rss_reta_entry64 *, uint16_t);
|
||||||
|
|
||||||
/* mlx5_rxmode.c */
|
/* mlx5_rxmode.c */
|
||||||
|
|
||||||
|
@ -410,6 +410,9 @@ dev_configure(struct rte_eth_dev *dev)
|
|||||||
struct priv *priv = dev->data->dev_private;
|
struct priv *priv = dev->data->dev_private;
|
||||||
unsigned int rxqs_n = dev->data->nb_rx_queues;
|
unsigned int rxqs_n = dev->data->nb_rx_queues;
|
||||||
unsigned int txqs_n = dev->data->nb_tx_queues;
|
unsigned int txqs_n = dev->data->nb_tx_queues;
|
||||||
|
unsigned int i;
|
||||||
|
unsigned int j;
|
||||||
|
unsigned int reta_idx_n;
|
||||||
|
|
||||||
priv->rxqs = (void *)dev->data->rx_queues;
|
priv->rxqs = (void *)dev->data->rx_queues;
|
||||||
priv->txqs = (void *)dev->data->tx_queues;
|
priv->txqs = (void *)dev->data->tx_queues;
|
||||||
@ -418,11 +421,31 @@ dev_configure(struct rte_eth_dev *dev)
|
|||||||
(void *)dev, priv->txqs_n, txqs_n);
|
(void *)dev, priv->txqs_n, txqs_n);
|
||||||
priv->txqs_n = txqs_n;
|
priv->txqs_n = txqs_n;
|
||||||
}
|
}
|
||||||
|
if (rxqs_n > priv->ind_table_max_size) {
|
||||||
|
ERROR("cannot handle this many RX queues (%u)", rxqs_n);
|
||||||
|
return EINVAL;
|
||||||
|
}
|
||||||
if (rxqs_n == priv->rxqs_n)
|
if (rxqs_n == priv->rxqs_n)
|
||||||
return 0;
|
return 0;
|
||||||
INFO("%p: RX queues number update: %u -> %u",
|
INFO("%p: RX queues number update: %u -> %u",
|
||||||
(void *)dev, priv->rxqs_n, rxqs_n);
|
(void *)dev, priv->rxqs_n, rxqs_n);
|
||||||
priv->rxqs_n = rxqs_n;
|
priv->rxqs_n = rxqs_n;
|
||||||
|
/* If the requested number of RX queues is not a power of two, use the
|
||||||
|
* maximum indirection table size for better balancing.
|
||||||
|
* The result is always rounded to the next power of two. */
|
||||||
|
reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ?
|
||||||
|
priv->ind_table_max_size :
|
||||||
|
rxqs_n));
|
||||||
|
if (priv_rss_reta_index_resize(priv, reta_idx_n))
|
||||||
|
return ENOMEM;
|
||||||
|
/* When the number of RX queues is not a power of two, the remaining
|
||||||
|
* table entries are padded with reused WQs and hashes are not spread
|
||||||
|
* uniformly. */
|
||||||
|
for (i = 0, j = 0; (i != reta_idx_n); ++i) {
|
||||||
|
(*priv->reta_idx)[i] = j;
|
||||||
|
if (++j == rxqs_n)
|
||||||
|
j = 0;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -494,6 +517,12 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
|
|||||||
0);
|
0);
|
||||||
if (priv_get_ifname(priv, &ifname) == 0)
|
if (priv_get_ifname(priv, &ifname) == 0)
|
||||||
info->if_index = if_nametoindex(ifname);
|
info->if_index = if_nametoindex(ifname);
|
||||||
|
/* FIXME: RETA update/query API expects the callee to know the size of
|
||||||
|
* the indirection table, for this PMD the size varies depending on
|
||||||
|
* the number of RX queues, it becomes impossible to find the correct
|
||||||
|
* size if it is not fixed.
|
||||||
|
* The API should be updated to solve this problem. */
|
||||||
|
info->reta_size = priv->ind_table_max_size;
|
||||||
priv_unlock(priv);
|
priv_unlock(priv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -211,3 +211,166 @@ mlx5_rss_hash_conf_get(struct rte_eth_dev *dev,
|
|||||||
priv_unlock(priv);
|
priv_unlock(priv);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate/reallocate RETA index table.
|
||||||
|
*
|
||||||
|
* @param priv
|
||||||
|
* Pointer to private structure.
|
||||||
|
* @praram reta_size
|
||||||
|
* The size of the array to allocate.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* 0 on success, errno value on failure.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
priv_rss_reta_index_resize(struct priv *priv, unsigned int reta_size)
|
||||||
|
{
|
||||||
|
void *mem;
|
||||||
|
unsigned int old_size = priv->reta_idx_n;
|
||||||
|
|
||||||
|
if (priv->reta_idx_n == reta_size)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
mem = rte_realloc(priv->reta_idx,
|
||||||
|
reta_size * sizeof((*priv->reta_idx)[0]), 0);
|
||||||
|
if (!mem)
|
||||||
|
return ENOMEM;
|
||||||
|
priv->reta_idx = mem;
|
||||||
|
priv->reta_idx_n = reta_size;
|
||||||
|
|
||||||
|
if (old_size < reta_size)
|
||||||
|
memset(&(*priv->reta_idx)[old_size], 0,
|
||||||
|
(reta_size - old_size) *
|
||||||
|
sizeof((*priv->reta_idx)[0]));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Query RETA table.
|
||||||
|
*
|
||||||
|
* @param priv
|
||||||
|
* Pointer to private structure.
|
||||||
|
* @param[in, out] reta_conf
|
||||||
|
* Pointer to the first RETA configuration structure.
|
||||||
|
* @param reta_size
|
||||||
|
* Number of entries.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* 0 on success, errno value on failure.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
priv_dev_rss_reta_query(struct priv *priv,
|
||||||
|
struct rte_eth_rss_reta_entry64 *reta_conf,
|
||||||
|
unsigned int reta_size)
|
||||||
|
{
|
||||||
|
unsigned int idx;
|
||||||
|
unsigned int i;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* See RETA comment in mlx5_dev_infos_get(). */
|
||||||
|
ret = priv_rss_reta_index_resize(priv, priv->ind_table_max_size);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
/* Fill each entry of the table even if its bit is not set. */
|
||||||
|
for (idx = 0, i = 0; (i != reta_size); ++i) {
|
||||||
|
idx = i / RTE_RETA_GROUP_SIZE;
|
||||||
|
reta_conf[idx].reta[i % RTE_RETA_GROUP_SIZE] =
|
||||||
|
(*priv->reta_idx)[i];
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update RETA table.
|
||||||
|
*
|
||||||
|
* @param priv
|
||||||
|
* Pointer to private structure.
|
||||||
|
* @param[in] reta_conf
|
||||||
|
* Pointer to the first RETA configuration structure.
|
||||||
|
* @param reta_size
|
||||||
|
* Number of entries.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* 0 on success, errno value on failure.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
priv_dev_rss_reta_update(struct priv *priv,
|
||||||
|
struct rte_eth_rss_reta_entry64 *reta_conf,
|
||||||
|
unsigned int reta_size)
|
||||||
|
{
|
||||||
|
unsigned int idx;
|
||||||
|
unsigned int i;
|
||||||
|
unsigned int pos;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* See RETA comment in mlx5_dev_infos_get(). */
|
||||||
|
ret = priv_rss_reta_index_resize(priv, priv->ind_table_max_size);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
for (idx = 0, i = 0; (i != reta_size); ++i) {
|
||||||
|
idx = i / RTE_RETA_GROUP_SIZE;
|
||||||
|
pos = i % RTE_RETA_GROUP_SIZE;
|
||||||
|
if (((reta_conf[idx].mask >> i) & 0x1) == 0)
|
||||||
|
continue;
|
||||||
|
assert(reta_conf[idx].reta[pos] < priv->rxqs_n);
|
||||||
|
(*priv->reta_idx)[i] = reta_conf[idx].reta[pos];
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DPDK callback to get the RETA indirection table.
|
||||||
|
*
|
||||||
|
* @param dev
|
||||||
|
* Pointer to Ethernet device structure.
|
||||||
|
* @param reta_conf
|
||||||
|
* Pointer to RETA configuration structure array.
|
||||||
|
* @param reta_size
|
||||||
|
* Size of the RETA table.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* 0 on success, negative errno value on failure.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
mlx5_dev_rss_reta_query(struct rte_eth_dev *dev,
|
||||||
|
struct rte_eth_rss_reta_entry64 *reta_conf,
|
||||||
|
uint16_t reta_size)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct priv *priv = dev->data->dev_private;
|
||||||
|
|
||||||
|
priv_lock(priv);
|
||||||
|
ret = priv_dev_rss_reta_query(priv, reta_conf, reta_size);
|
||||||
|
priv_unlock(priv);
|
||||||
|
return -ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DPDK callback to update the RETA indirection table.
|
||||||
|
*
|
||||||
|
* @param dev
|
||||||
|
* Pointer to Ethernet device structure.
|
||||||
|
* @param reta_conf
|
||||||
|
* Pointer to RETA configuration structure array.
|
||||||
|
* @param reta_size
|
||||||
|
* Size of the RETA table.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* 0 on success, negative errno value on failure.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
mlx5_dev_rss_reta_update(struct rte_eth_dev *dev,
|
||||||
|
struct rte_eth_rss_reta_entry64 *reta_conf,
|
||||||
|
uint16_t reta_size)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct priv *priv = dev->data->dev_private;
|
||||||
|
|
||||||
|
priv_lock(priv);
|
||||||
|
ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
|
||||||
|
priv_unlock(priv);
|
||||||
|
return -ret;
|
||||||
|
}
|
||||||
|
@ -258,26 +258,6 @@ hash_rxq_flow_attr(const struct hash_rxq *hash_rxq,
|
|||||||
return flow_attr_size;
|
return flow_attr_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Return nearest power of two above input value.
|
|
||||||
*
|
|
||||||
* @param v
|
|
||||||
* Input value.
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
* Nearest power of two above input value.
|
|
||||||
*/
|
|
||||||
static unsigned int
|
|
||||||
log2above(unsigned int v)
|
|
||||||
{
|
|
||||||
unsigned int l;
|
|
||||||
unsigned int r;
|
|
||||||
|
|
||||||
for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
|
|
||||||
r |= (v & 1);
|
|
||||||
return (l + r);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the type corresponding to the n'th bit set.
|
* Return the type corresponding to the n'th bit set.
|
||||||
*
|
*
|
||||||
@ -360,14 +340,7 @@ priv_make_ind_table_init(struct priv *priv,
|
|||||||
int
|
int
|
||||||
priv_create_hash_rxqs(struct priv *priv)
|
priv_create_hash_rxqs(struct priv *priv)
|
||||||
{
|
{
|
||||||
/* If the requested number of WQs is not a power of two, use the
|
struct ibv_exp_wq *wqs[priv->reta_idx_n];
|
||||||
* maximum indirection table size for better balancing.
|
|
||||||
* The result is always rounded to the next power of two. */
|
|
||||||
unsigned int wqs_n =
|
|
||||||
(1 << log2above((priv->rxqs_n & (priv->rxqs_n - 1)) ?
|
|
||||||
priv->ind_table_max_size :
|
|
||||||
priv->rxqs_n));
|
|
||||||
struct ibv_exp_wq *wqs[wqs_n];
|
|
||||||
struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
|
struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
|
||||||
unsigned int ind_tables_n =
|
unsigned int ind_tables_n =
|
||||||
priv_make_ind_table_init(priv, &ind_table_init);
|
priv_make_ind_table_init(priv, &ind_table_init);
|
||||||
@ -393,25 +366,15 @@ priv_create_hash_rxqs(struct priv *priv)
|
|||||||
" indirection table cannot be created");
|
" indirection table cannot be created");
|
||||||
return EINVAL;
|
return EINVAL;
|
||||||
}
|
}
|
||||||
if ((wqs_n < priv->rxqs_n) || (wqs_n > priv->ind_table_max_size)) {
|
if (priv->rxqs_n & (priv->rxqs_n - 1)) {
|
||||||
ERROR("cannot handle this many RX queues (%u)", priv->rxqs_n);
|
|
||||||
err = ERANGE;
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (wqs_n != priv->rxqs_n) {
|
|
||||||
INFO("%u RX queues are configured, consider rounding this"
|
INFO("%u RX queues are configured, consider rounding this"
|
||||||
" number to the next power of two for better balancing",
|
" number to the next power of two for better balancing",
|
||||||
priv->rxqs_n);
|
priv->rxqs_n);
|
||||||
DEBUG("indirection table extended to assume %u WQs", wqs_n);
|
DEBUG("indirection table extended to assume %u WQs",
|
||||||
}
|
priv->reta_idx_n);
|
||||||
/* When the number of RX queues is not a power of two, the remaining
|
|
||||||
* table entries are padded with reused WQs and hashes are not spread
|
|
||||||
* uniformly. */
|
|
||||||
for (i = 0, j = 0; (i != wqs_n); ++i) {
|
|
||||||
wqs[i] = (*priv->rxqs)[j]->wq;
|
|
||||||
if (++j == priv->rxqs_n)
|
|
||||||
j = 0;
|
|
||||||
}
|
}
|
||||||
|
for (i = 0; (i != priv->reta_idx_n); ++i)
|
||||||
|
wqs[i] = (*priv->rxqs)[(*priv->reta_idx)[i]]->wq;
|
||||||
/* Get number of hash RX queues to configure. */
|
/* Get number of hash RX queues to configure. */
|
||||||
for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
|
for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
|
||||||
hash_rxqs_n += ind_table_init[i].hash_types_n;
|
hash_rxqs_n += ind_table_init[i].hash_types_n;
|
||||||
@ -436,8 +399,8 @@ priv_create_hash_rxqs(struct priv *priv)
|
|||||||
unsigned int ind_tbl_size = ind_table_init[i].max_size;
|
unsigned int ind_tbl_size = ind_table_init[i].max_size;
|
||||||
struct ibv_exp_rwq_ind_table *ind_table;
|
struct ibv_exp_rwq_ind_table *ind_table;
|
||||||
|
|
||||||
if (wqs_n < ind_tbl_size)
|
if (priv->reta_idx_n < ind_tbl_size)
|
||||||
ind_tbl_size = wqs_n;
|
ind_tbl_size = priv->reta_idx_n;
|
||||||
ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
|
ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
|
||||||
errno = 0;
|
errno = 0;
|
||||||
ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
|
ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
|
||||||
|
@ -161,4 +161,24 @@ pmd_drv_log_basename(const char *s)
|
|||||||
\
|
\
|
||||||
snprintf(name, sizeof(name), __VA_ARGS__)
|
snprintf(name, sizeof(name), __VA_ARGS__)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return nearest power of two above input value.
|
||||||
|
*
|
||||||
|
* @param v
|
||||||
|
* Input value.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* Nearest power of two above input value.
|
||||||
|
*/
|
||||||
|
static inline unsigned int
|
||||||
|
log2above(unsigned int v)
|
||||||
|
{
|
||||||
|
unsigned int l;
|
||||||
|
unsigned int r;
|
||||||
|
|
||||||
|
for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
|
||||||
|
r |= (v & 1);
|
||||||
|
return (l + r);
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* RTE_PMD_MLX5_UTILS_H_ */
|
#endif /* RTE_PMD_MLX5_UTILS_H_ */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user