mlx5: support RETA query and update

ConnectX-4 adapters do not have a constant indirection table size, which is
set at runtime from the number of RX queues. The maximum size is retrieved
using a hardware query and is normally 512.

Since the current RETA API cannot handle a variable size, any query/update
command causes it to be silently updated to RSS_INDIRECTION_TABLE_SIZE
entries regardless of the original size.

Also due to the underlying type of the configuration structure, the maximum
size is limited to RSS_INDIRECTION_TABLE_SIZE (currently 128, at most 256
entries).

A port stop/start must be done to apply the new RETA configuration.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
This commit is contained in:
Nelio Laranjeiro 2015-11-02 19:11:57 +01:00 committed by Thomas Monjalon
parent 647d1eaf75
commit 634efbc2c8
6 changed files with 234 additions and 46 deletions

View File

@ -133,6 +133,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
rte_free((*priv->rss_conf)[i]);
rte_free(priv->rss_conf);
}
if (priv->reta_idx != NULL)
rte_free(priv->reta_idx);
priv_unlock(priv);
memset(priv, 0, sizeof(*priv));
}
@ -160,6 +162,8 @@ static const struct eth_dev_ops mlx5_dev_ops = {
.mac_addr_remove = mlx5_mac_addr_remove,
.mac_addr_add = mlx5_mac_addr_add,
.mtu_set = mlx5_dev_set_mtu,
.reta_update = mlx5_dev_rss_reta_update,
.reta_query = mlx5_dev_rss_reta_query,
.rss_hash_update = mlx5_rss_hash_update,
.rss_hash_conf_get = mlx5_rss_hash_conf_get,
};
@ -373,7 +377,9 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
DEBUG("L2 tunnel checksum offloads are %ssupported",
(priv->hw_csum_l2tun ? "" : "not "));
priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size;
priv->ind_table_max_size =
RTE_MIN((unsigned int)RSS_INDIRECTION_TABLE_SIZE,
exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size);
DEBUG("maximum RX indirection table size is %u",
priv->ind_table_max_size);

View File

@ -118,6 +118,8 @@ struct priv {
/* RSS configuration array indexed by hash RX queue type. */
struct rte_eth_rss_conf *(*rss_conf)[];
struct rte_intr_handle intr_handle; /* Interrupt handler. */
unsigned int (*reta_idx)[]; /* RETA index table. */
unsigned int reta_idx_n; /* RETA index size. */
rte_spinlock_t lock; /* Lock for control functions. */
};
@ -184,6 +186,11 @@ int rss_hash_rss_conf_new_key(struct priv *, const uint8_t *, unsigned int,
uint64_t);
int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *);
int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *);
int priv_rss_reta_index_resize(struct priv *, unsigned int);
int mlx5_dev_rss_reta_query(struct rte_eth_dev *,
struct rte_eth_rss_reta_entry64 *, uint16_t);
int mlx5_dev_rss_reta_update(struct rte_eth_dev *,
struct rte_eth_rss_reta_entry64 *, uint16_t);
/* mlx5_rxmode.c */

View File

@ -410,6 +410,9 @@ dev_configure(struct rte_eth_dev *dev)
struct priv *priv = dev->data->dev_private;
unsigned int rxqs_n = dev->data->nb_rx_queues;
unsigned int txqs_n = dev->data->nb_tx_queues;
unsigned int i;
unsigned int j;
unsigned int reta_idx_n;
priv->rxqs = (void *)dev->data->rx_queues;
priv->txqs = (void *)dev->data->tx_queues;
@ -418,11 +421,31 @@ dev_configure(struct rte_eth_dev *dev)
(void *)dev, priv->txqs_n, txqs_n);
priv->txqs_n = txqs_n;
}
if (rxqs_n > priv->ind_table_max_size) {
ERROR("cannot handle this many RX queues (%u)", rxqs_n);
return EINVAL;
}
if (rxqs_n == priv->rxqs_n)
return 0;
INFO("%p: RX queues number update: %u -> %u",
(void *)dev, priv->rxqs_n, rxqs_n);
priv->rxqs_n = rxqs_n;
/* If the requested number of RX queues is not a power of two, use the
* maximum indirection table size for better balancing.
* The result is always rounded to the next power of two. */
reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ?
priv->ind_table_max_size :
rxqs_n));
if (priv_rss_reta_index_resize(priv, reta_idx_n))
return ENOMEM;
/* When the number of RX queues is not a power of two, the remaining
* table entries are padded with reused WQs and hashes are not spread
* uniformly. */
for (i = 0, j = 0; (i != reta_idx_n); ++i) {
(*priv->reta_idx)[i] = j;
if (++j == rxqs_n)
j = 0;
}
return 0;
}
@ -494,6 +517,12 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
0);
if (priv_get_ifname(priv, &ifname) == 0)
info->if_index = if_nametoindex(ifname);
/* FIXME: RETA update/query API expects the callee to know the size of
* the indirection table, for this PMD the size varies depending on
* the number of RX queues, it becomes impossible to find the correct
* size if it is not fixed.
* The API should be updated to solve this problem. */
info->reta_size = priv->ind_table_max_size;
priv_unlock(priv);
}

View File

@ -211,3 +211,166 @@ mlx5_rss_hash_conf_get(struct rte_eth_dev *dev,
priv_unlock(priv);
return 0;
}
/**
* Allocate/reallocate RETA index table.
*
* @param priv
* Pointer to private structure.
* @praram reta_size
* The size of the array to allocate.
*
* @return
* 0 on success, errno value on failure.
*/
int
priv_rss_reta_index_resize(struct priv *priv, unsigned int reta_size)
{
void *mem;
unsigned int old_size = priv->reta_idx_n;
if (priv->reta_idx_n == reta_size)
return 0;
mem = rte_realloc(priv->reta_idx,
reta_size * sizeof((*priv->reta_idx)[0]), 0);
if (!mem)
return ENOMEM;
priv->reta_idx = mem;
priv->reta_idx_n = reta_size;
if (old_size < reta_size)
memset(&(*priv->reta_idx)[old_size], 0,
(reta_size - old_size) *
sizeof((*priv->reta_idx)[0]));
return 0;
}
/**
* Query RETA table.
*
* @param priv
* Pointer to private structure.
* @param[in, out] reta_conf
* Pointer to the first RETA configuration structure.
* @param reta_size
* Number of entries.
*
* @return
* 0 on success, errno value on failure.
*/
static int
priv_dev_rss_reta_query(struct priv *priv,
struct rte_eth_rss_reta_entry64 *reta_conf,
unsigned int reta_size)
{
unsigned int idx;
unsigned int i;
int ret;
/* See RETA comment in mlx5_dev_infos_get(). */
ret = priv_rss_reta_index_resize(priv, priv->ind_table_max_size);
if (ret)
return ret;
/* Fill each entry of the table even if its bit is not set. */
for (idx = 0, i = 0; (i != reta_size); ++i) {
idx = i / RTE_RETA_GROUP_SIZE;
reta_conf[idx].reta[i % RTE_RETA_GROUP_SIZE] =
(*priv->reta_idx)[i];
}
return 0;
}
/**
* Update RETA table.
*
* @param priv
* Pointer to private structure.
* @param[in] reta_conf
* Pointer to the first RETA configuration structure.
* @param reta_size
* Number of entries.
*
* @return
* 0 on success, errno value on failure.
*/
static int
priv_dev_rss_reta_update(struct priv *priv,
struct rte_eth_rss_reta_entry64 *reta_conf,
unsigned int reta_size)
{
unsigned int idx;
unsigned int i;
unsigned int pos;
int ret;
/* See RETA comment in mlx5_dev_infos_get(). */
ret = priv_rss_reta_index_resize(priv, priv->ind_table_max_size);
if (ret)
return ret;
for (idx = 0, i = 0; (i != reta_size); ++i) {
idx = i / RTE_RETA_GROUP_SIZE;
pos = i % RTE_RETA_GROUP_SIZE;
if (((reta_conf[idx].mask >> i) & 0x1) == 0)
continue;
assert(reta_conf[idx].reta[pos] < priv->rxqs_n);
(*priv->reta_idx)[i] = reta_conf[idx].reta[pos];
}
return 0;
}
/**
* DPDK callback to get the RETA indirection table.
*
* @param dev
* Pointer to Ethernet device structure.
* @param reta_conf
* Pointer to RETA configuration structure array.
* @param reta_size
* Size of the RETA table.
*
* @return
* 0 on success, negative errno value on failure.
*/
int
mlx5_dev_rss_reta_query(struct rte_eth_dev *dev,
struct rte_eth_rss_reta_entry64 *reta_conf,
uint16_t reta_size)
{
int ret;
struct priv *priv = dev->data->dev_private;
priv_lock(priv);
ret = priv_dev_rss_reta_query(priv, reta_conf, reta_size);
priv_unlock(priv);
return -ret;
}
/**
* DPDK callback to update the RETA indirection table.
*
* @param dev
* Pointer to Ethernet device structure.
* @param reta_conf
* Pointer to RETA configuration structure array.
* @param reta_size
* Size of the RETA table.
*
* @return
* 0 on success, negative errno value on failure.
*/
int
mlx5_dev_rss_reta_update(struct rte_eth_dev *dev,
struct rte_eth_rss_reta_entry64 *reta_conf,
uint16_t reta_size)
{
int ret;
struct priv *priv = dev->data->dev_private;
priv_lock(priv);
ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
priv_unlock(priv);
return -ret;
}

View File

@ -258,26 +258,6 @@ hash_rxq_flow_attr(const struct hash_rxq *hash_rxq,
return flow_attr_size;
}
/**
* Return nearest power of two above input value.
*
* @param v
* Input value.
*
* @return
* Nearest power of two above input value.
*/
static unsigned int
log2above(unsigned int v)
{
unsigned int l;
unsigned int r;
for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
r |= (v & 1);
return (l + r);
}
/**
* Return the type corresponding to the n'th bit set.
*
@ -360,14 +340,7 @@ priv_make_ind_table_init(struct priv *priv,
int
priv_create_hash_rxqs(struct priv *priv)
{
/* If the requested number of WQs is not a power of two, use the
* maximum indirection table size for better balancing.
* The result is always rounded to the next power of two. */
unsigned int wqs_n =
(1 << log2above((priv->rxqs_n & (priv->rxqs_n - 1)) ?
priv->ind_table_max_size :
priv->rxqs_n));
struct ibv_exp_wq *wqs[wqs_n];
struct ibv_exp_wq *wqs[priv->reta_idx_n];
struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
unsigned int ind_tables_n =
priv_make_ind_table_init(priv, &ind_table_init);
@ -393,25 +366,15 @@ priv_create_hash_rxqs(struct priv *priv)
" indirection table cannot be created");
return EINVAL;
}
if ((wqs_n < priv->rxqs_n) || (wqs_n > priv->ind_table_max_size)) {
ERROR("cannot handle this many RX queues (%u)", priv->rxqs_n);
err = ERANGE;
goto error;
}
if (wqs_n != priv->rxqs_n) {
if (priv->rxqs_n & (priv->rxqs_n - 1)) {
INFO("%u RX queues are configured, consider rounding this"
" number to the next power of two for better balancing",
priv->rxqs_n);
DEBUG("indirection table extended to assume %u WQs", wqs_n);
}
/* When the number of RX queues is not a power of two, the remaining
* table entries are padded with reused WQs and hashes are not spread
* uniformly. */
for (i = 0, j = 0; (i != wqs_n); ++i) {
wqs[i] = (*priv->rxqs)[j]->wq;
if (++j == priv->rxqs_n)
j = 0;
DEBUG("indirection table extended to assume %u WQs",
priv->reta_idx_n);
}
for (i = 0; (i != priv->reta_idx_n); ++i)
wqs[i] = (*priv->rxqs)[(*priv->reta_idx)[i]]->wq;
/* Get number of hash RX queues to configure. */
for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
hash_rxqs_n += ind_table_init[i].hash_types_n;
@ -436,8 +399,8 @@ priv_create_hash_rxqs(struct priv *priv)
unsigned int ind_tbl_size = ind_table_init[i].max_size;
struct ibv_exp_rwq_ind_table *ind_table;
if (wqs_n < ind_tbl_size)
ind_tbl_size = wqs_n;
if (priv->reta_idx_n < ind_tbl_size)
ind_tbl_size = priv->reta_idx_n;
ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
errno = 0;
ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,

View File

@ -161,4 +161,24 @@ pmd_drv_log_basename(const char *s)
\
snprintf(name, sizeof(name), __VA_ARGS__)
/**
* Return nearest power of two above input value.
*
* @param v
* Input value.
*
* @return
* Nearest power of two above input value.
*/
static inline unsigned int
log2above(unsigned int v)
{
unsigned int l;
unsigned int r;
for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
r |= (v & 1);
return (l + r);
}
#endif /* RTE_PMD_MLX5_UTILS_H_ */