2018-01-29 14:11:31 +01:00
|
|
|
/* SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
* Copyright 2017 6WIND S.A.
|
2018-03-20 21:20:35 +02:00
|
|
|
* Copyright 2017 Mellanox Technologies, Ltd
|
2017-09-01 10:07:00 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
* Rx queues configuration for mlx4 driver.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
/* Verbs headers do not support -pedantic. */
|
|
|
|
#ifdef PEDANTIC
|
|
|
|
#pragma GCC diagnostic ignored "-Wpedantic"
|
|
|
|
#endif
|
2017-10-19 18:11:09 +02:00
|
|
|
#include <infiniband/mlx4dv.h>
|
2017-09-01 10:07:00 +02:00
|
|
|
#include <infiniband/verbs.h>
|
|
|
|
#ifdef PEDANTIC
|
|
|
|
#pragma GCC diagnostic error "-Wpedantic"
|
|
|
|
#endif
|
|
|
|
|
2017-10-12 14:29:57 +02:00
|
|
|
#include <rte_byteorder.h>
|
2017-09-01 10:07:00 +02:00
|
|
|
#include <rte_common.h>
|
|
|
|
#include <rte_errno.h>
|
2018-01-22 00:16:22 +00:00
|
|
|
#include <rte_ethdev_driver.h>
|
2017-10-12 14:19:29 +02:00
|
|
|
#include <rte_flow.h>
|
2017-09-01 10:07:00 +02:00
|
|
|
#include <rte_malloc.h>
|
|
|
|
#include <rte_mbuf.h>
|
|
|
|
#include <rte_mempool.h>
|
|
|
|
|
|
|
|
#include "mlx4.h"
|
2018-01-30 16:34:52 +01:00
|
|
|
#include "mlx4_glue.h"
|
2017-10-12 14:19:27 +02:00
|
|
|
#include "mlx4_flow.h"
|
2017-09-01 10:07:00 +02:00
|
|
|
#include "mlx4_rxtx.h"
|
|
|
|
#include "mlx4_utils.h"
|
|
|
|
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
/**
|
|
|
|
* Historical RSS hash key.
|
|
|
|
*
|
|
|
|
* This used to be the default for mlx4 in Linux before v3.19 switched to
|
|
|
|
* generating random hash keys through netdev_rss_key_fill().
|
|
|
|
*
|
|
|
|
* It is used in this PMD for consistency with past DPDK releases but can
|
|
|
|
* now be overridden through user configuration.
|
|
|
|
*
|
|
|
|
* Note: this is not const to work around API quirks.
|
|
|
|
*/
|
|
|
|
uint8_t
|
|
|
|
mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE] = {
|
|
|
|
0x2c, 0xc6, 0x81, 0xd1,
|
|
|
|
0x5b, 0xdb, 0xf4, 0xf7,
|
|
|
|
0xfc, 0xa2, 0x83, 0x19,
|
|
|
|
0xdb, 0x1a, 0x3e, 0x94,
|
|
|
|
0x6b, 0x9e, 0x38, 0xd9,
|
|
|
|
0x2c, 0x9c, 0x03, 0xd1,
|
|
|
|
0xad, 0x99, 0x44, 0xa7,
|
|
|
|
0xd9, 0x56, 0x3d, 0x59,
|
|
|
|
0x06, 0x3c, 0x25, 0xf3,
|
|
|
|
0xfc, 0x1f, 0xdc, 0x2a,
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Obtain a RSS context with specified properties.
|
|
|
|
*
|
|
|
|
* Used when creating a flow rule targeting one or several Rx queues.
|
|
|
|
*
|
|
|
|
* If a matching RSS context already exists, it is returned with its
|
|
|
|
* reference count incremented.
|
|
|
|
*
|
|
|
|
* @param priv
|
|
|
|
* Pointer to private structure.
|
|
|
|
* @param fields
|
|
|
|
* Fields for RSS processing (Verbs format).
|
|
|
|
* @param[in] key
|
|
|
|
* Hash key to use (whose size is exactly MLX4_RSS_HASH_KEY_SIZE).
|
|
|
|
* @param queues
|
|
|
|
* Number of target queues.
|
|
|
|
* @param[in] queue_id
|
|
|
|
* Target queues.
|
|
|
|
*
|
|
|
|
* @return
|
|
|
|
* Pointer to RSS context on success, NULL otherwise and rte_errno is set.
|
|
|
|
*/
|
|
|
|
struct mlx4_rss *
|
|
|
|
mlx4_rss_get(struct priv *priv, uint64_t fields,
|
2018-04-25 17:27:50 +02:00
|
|
|
const uint8_t key[MLX4_RSS_HASH_KEY_SIZE],
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
uint16_t queues, const uint16_t queue_id[])
|
|
|
|
{
|
|
|
|
struct mlx4_rss *rss;
|
|
|
|
size_t queue_id_size = sizeof(queue_id[0]) * queues;
|
|
|
|
|
|
|
|
LIST_FOREACH(rss, &priv->rss, next)
|
|
|
|
if (fields == rss->fields &&
|
|
|
|
queues == rss->queues &&
|
|
|
|
!memcmp(key, rss->key, MLX4_RSS_HASH_KEY_SIZE) &&
|
|
|
|
!memcmp(queue_id, rss->queue_id, queue_id_size)) {
|
|
|
|
++rss->refcnt;
|
|
|
|
return rss;
|
|
|
|
}
|
|
|
|
rss = rte_malloc(__func__, offsetof(struct mlx4_rss, queue_id) +
|
|
|
|
queue_id_size, 0);
|
|
|
|
if (!rss)
|
|
|
|
goto error;
|
|
|
|
*rss = (struct mlx4_rss){
|
|
|
|
.priv = priv,
|
|
|
|
.refcnt = 1,
|
|
|
|
.usecnt = 0,
|
|
|
|
.qp = NULL,
|
|
|
|
.ind = NULL,
|
|
|
|
.fields = fields,
|
|
|
|
.queues = queues,
|
|
|
|
};
|
|
|
|
memcpy(rss->key, key, MLX4_RSS_HASH_KEY_SIZE);
|
|
|
|
memcpy(rss->queue_id, queue_id, queue_id_size);
|
|
|
|
LIST_INSERT_HEAD(&priv->rss, rss, next);
|
|
|
|
return rss;
|
|
|
|
error:
|
|
|
|
rte_errno = ENOMEM;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Release a RSS context instance.
|
|
|
|
*
|
|
|
|
* Used when destroying a flow rule targeting one or several Rx queues.
|
|
|
|
*
|
|
|
|
* This function decrements the reference count of the context and destroys
|
|
|
|
* it after reaching 0. The context must have no users at this point; all
|
|
|
|
* prior calls to mlx4_rss_attach() must have been followed by matching
|
|
|
|
* calls to mlx4_rss_detach().
|
|
|
|
*
|
|
|
|
* @param rss
|
|
|
|
* RSS context to release.
|
|
|
|
*/
|
2017-11-02 19:14:19 +01:00
|
|
|
void
|
|
|
|
mlx4_rss_put(struct mlx4_rss *rss)
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
{
|
|
|
|
assert(rss->refcnt);
|
|
|
|
if (--rss->refcnt)
|
|
|
|
return;
|
|
|
|
assert(!rss->usecnt);
|
|
|
|
assert(!rss->qp);
|
|
|
|
assert(!rss->ind);
|
|
|
|
LIST_REMOVE(rss, next);
|
|
|
|
rte_free(rss);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Attach a user to a RSS context instance.
|
|
|
|
*
|
|
|
|
* Used when the RSS QP and indirection table objects must be instantiated,
|
|
|
|
* that is, when a flow rule must be enabled.
|
|
|
|
*
|
|
|
|
* This function increments the usage count of the context.
|
|
|
|
*
|
|
|
|
* @param rss
|
|
|
|
* RSS context to attach to.
|
2017-10-19 18:11:07 +02:00
|
|
|
*
|
|
|
|
* @return
|
|
|
|
* 0 on success, a negative errno value otherwise and rte_errno is set.
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
*/
|
2017-11-02 19:14:19 +01:00
|
|
|
int
|
|
|
|
mlx4_rss_attach(struct mlx4_rss *rss)
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
{
|
|
|
|
assert(rss->refcnt);
|
|
|
|
if (rss->usecnt++) {
|
|
|
|
assert(rss->qp);
|
|
|
|
assert(rss->ind);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ibv_wq *ind_tbl[rss->queues];
|
|
|
|
struct priv *priv = rss->priv;
|
|
|
|
const char *msg;
|
2017-10-19 18:11:09 +02:00
|
|
|
unsigned int i = 0;
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!rte_is_power_of_2(RTE_DIM(ind_tbl))) {
|
2017-10-19 18:11:07 +02:00
|
|
|
ret = EINVAL;
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
msg = "number of RSS queues must be a power of two";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
for (i = 0; i != RTE_DIM(ind_tbl); ++i) {
|
|
|
|
uint16_t id = rss->queue_id[i];
|
|
|
|
struct rxq *rxq = NULL;
|
|
|
|
|
|
|
|
if (id < priv->dev->data->nb_rx_queues)
|
|
|
|
rxq = priv->dev->data->rx_queues[id];
|
|
|
|
if (!rxq) {
|
2017-10-19 18:11:07 +02:00
|
|
|
ret = EINVAL;
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
msg = "RSS target queue is not configured";
|
|
|
|
goto error;
|
|
|
|
}
|
2017-10-19 18:11:09 +02:00
|
|
|
ret = mlx4_rxq_attach(rxq);
|
|
|
|
if (ret) {
|
|
|
|
ret = -ret;
|
|
|
|
msg = "unable to attach RSS target queue";
|
|
|
|
goto error;
|
|
|
|
}
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
ind_tbl[i] = rxq->wq;
|
|
|
|
}
|
2018-01-30 16:34:52 +01:00
|
|
|
rss->ind = mlx4_glue->create_rwq_ind_table
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
(priv->ctx,
|
|
|
|
&(struct ibv_rwq_ind_table_init_attr){
|
|
|
|
.log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)),
|
|
|
|
.ind_tbl = ind_tbl,
|
|
|
|
.comp_mask = 0,
|
|
|
|
});
|
|
|
|
if (!rss->ind) {
|
2017-10-19 18:11:07 +02:00
|
|
|
ret = errno ? errno : EINVAL;
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
msg = "RSS indirection table creation failure";
|
|
|
|
goto error;
|
|
|
|
}
|
2018-01-30 16:34:52 +01:00
|
|
|
rss->qp = mlx4_glue->create_qp_ex
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
(priv->ctx,
|
|
|
|
&(struct ibv_qp_init_attr_ex){
|
|
|
|
.comp_mask = (IBV_QP_INIT_ATTR_PD |
|
|
|
|
IBV_QP_INIT_ATTR_RX_HASH |
|
|
|
|
IBV_QP_INIT_ATTR_IND_TABLE),
|
|
|
|
.qp_type = IBV_QPT_RAW_PACKET,
|
|
|
|
.pd = priv->pd,
|
|
|
|
.rwq_ind_tbl = rss->ind,
|
|
|
|
.rx_hash_conf = {
|
|
|
|
.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
|
|
|
|
.rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE,
|
|
|
|
.rx_hash_key = rss->key,
|
|
|
|
.rx_hash_fields_mask = rss->fields,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
if (!rss->qp) {
|
2017-10-19 18:11:07 +02:00
|
|
|
ret = errno ? errno : EINVAL;
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
msg = "RSS hash QP creation failure";
|
|
|
|
goto error;
|
|
|
|
}
|
2018-01-30 16:34:52 +01:00
|
|
|
ret = mlx4_glue->modify_qp
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
(rss->qp,
|
|
|
|
&(struct ibv_qp_attr){
|
|
|
|
.qp_state = IBV_QPS_INIT,
|
|
|
|
.port_num = priv->port,
|
|
|
|
},
|
|
|
|
IBV_QP_STATE | IBV_QP_PORT);
|
|
|
|
if (ret) {
|
|
|
|
msg = "failed to switch RSS hash QP to INIT state";
|
|
|
|
goto error;
|
|
|
|
}
|
2018-01-30 16:34:52 +01:00
|
|
|
ret = mlx4_glue->modify_qp
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
(rss->qp,
|
|
|
|
&(struct ibv_qp_attr){
|
|
|
|
.qp_state = IBV_QPS_RTR,
|
|
|
|
},
|
|
|
|
IBV_QP_STATE);
|
|
|
|
if (ret) {
|
|
|
|
msg = "failed to switch RSS hash QP to RTR state";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
error:
|
2017-10-19 18:11:07 +02:00
|
|
|
if (rss->qp) {
|
2018-01-30 16:34:52 +01:00
|
|
|
claim_zero(mlx4_glue->destroy_qp(rss->qp));
|
2017-10-19 18:11:07 +02:00
|
|
|
rss->qp = NULL;
|
|
|
|
}
|
|
|
|
if (rss->ind) {
|
2018-01-30 16:34:52 +01:00
|
|
|
claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind));
|
2017-10-19 18:11:07 +02:00
|
|
|
rss->ind = NULL;
|
|
|
|
}
|
2017-10-19 18:11:09 +02:00
|
|
|
while (i--)
|
|
|
|
mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
ERROR("mlx4: %s", msg);
|
|
|
|
--rss->usecnt;
|
2017-10-19 18:11:07 +02:00
|
|
|
rte_errno = ret;
|
|
|
|
return -ret;
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Detach a user from a RSS context instance.
|
|
|
|
*
|
|
|
|
* Used when disabling (not destroying) a flow rule.
|
|
|
|
*
|
|
|
|
* This function decrements the usage count of the context and destroys
|
|
|
|
* usage resources after reaching 0.
|
|
|
|
*
|
|
|
|
* @param rss
|
|
|
|
* RSS context to detach from.
|
|
|
|
*/
|
2017-11-02 19:14:19 +01:00
|
|
|
void
|
|
|
|
mlx4_rss_detach(struct mlx4_rss *rss)
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
{
|
2017-10-19 18:11:09 +02:00
|
|
|
struct priv *priv = rss->priv;
|
|
|
|
unsigned int i;
|
|
|
|
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
assert(rss->refcnt);
|
|
|
|
assert(rss->qp);
|
|
|
|
assert(rss->ind);
|
|
|
|
if (--rss->usecnt)
|
|
|
|
return;
|
2018-01-30 16:34:52 +01:00
|
|
|
claim_zero(mlx4_glue->destroy_qp(rss->qp));
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
rss->qp = NULL;
|
2018-01-30 16:34:52 +01:00
|
|
|
claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind));
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
rss->ind = NULL;
|
2017-10-19 18:11:09 +02:00
|
|
|
for (i = 0; i != rss->queues; ++i)
|
|
|
|
mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
|
net/mlx4: add RSS flow rule action support
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.
Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.
The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.
The following hardware limitations apply to RSS contexts:
- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
long.
- The only supported hash algorithm is Toeplitz.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
2017-10-12 14:19:41 +02:00
|
|
|
}
|
|
|
|
|
2017-09-01 10:07:00 +02:00
|
|
|
/**
|
2017-10-19 18:11:09 +02:00
|
|
|
* Initialize common RSS context resources.
|
|
|
|
*
|
|
|
|
* Because ConnectX-3 hardware limitations require a fixed order in the
|
|
|
|
* indirection table, WQs must be allocated sequentially to be part of a
|
|
|
|
* common RSS context.
|
|
|
|
*
|
|
|
|
* Since a newly created WQ cannot be moved to a different context, this
|
|
|
|
* function allocates them all at once, one for each configured Rx queue,
|
|
|
|
* as well as all related resources (CQs and mbufs).
|
|
|
|
*
|
|
|
|
* This must therefore be done before creating any Rx flow rules relying on
|
|
|
|
* indirection tables.
|
|
|
|
*
|
|
|
|
* @param priv
|
|
|
|
* Pointer to private structure.
|
|
|
|
*
|
|
|
|
* @return
|
|
|
|
* 0 on success, a negative errno value otherwise and rte_errno is set.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
mlx4_rss_init(struct priv *priv)
|
|
|
|
{
|
|
|
|
struct rte_eth_dev *dev = priv->dev;
|
|
|
|
uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues);
|
|
|
|
uint32_t wq_num_prev = 0;
|
|
|
|
const char *msg;
|
|
|
|
unsigned int i;
|
|
|
|
int ret;
|
|
|
|
|
2018-04-26 18:26:13 +02:00
|
|
|
if (priv->rss_init)
|
|
|
|
return 0;
|
2017-10-19 18:11:09 +02:00
|
|
|
/* Prepare range for RSS contexts before creating the first WQ. */
|
2018-01-30 16:34:52 +01:00
|
|
|
ret = mlx4_glue->dv_set_context_attr
|
|
|
|
(priv->ctx,
|
|
|
|
MLX4DV_SET_CTX_ATTR_LOG_WQS_RANGE_SZ,
|
|
|
|
&log2_range);
|
2017-10-19 18:11:09 +02:00
|
|
|
if (ret) {
|
|
|
|
ERROR("cannot set up range size for RSS context to %u"
|
|
|
|
" (for %u Rx queues), error: %s",
|
|
|
|
1 << log2_range, dev->data->nb_rx_queues, strerror(ret));
|
|
|
|
rte_errno = ret;
|
|
|
|
return -ret;
|
|
|
|
}
|
|
|
|
for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
|
|
|
|
struct rxq *rxq = priv->dev->data->rx_queues[i];
|
|
|
|
struct ibv_cq *cq;
|
|
|
|
struct ibv_wq *wq;
|
|
|
|
uint32_t wq_num;
|
|
|
|
|
|
|
|
/* Attach the configured Rx queues. */
|
|
|
|
if (rxq) {
|
|
|
|
assert(!rxq->usecnt);
|
|
|
|
ret = mlx4_rxq_attach(rxq);
|
|
|
|
if (!ret) {
|
|
|
|
wq_num = rxq->wq->wq_num;
|
|
|
|
goto wq_num_check;
|
|
|
|
}
|
|
|
|
ret = -ret;
|
|
|
|
msg = "unable to create Rx queue resources";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* WQs are temporarily allocated for unconfigured Rx queues
|
|
|
|
* to maintain proper index alignment in indirection table
|
|
|
|
* by skipping unused WQ numbers.
|
|
|
|
*
|
|
|
|
* The reason this works at all even though these WQs are
|
|
|
|
* immediately destroyed is that WQNs are allocated
|
|
|
|
* sequentially and are guaranteed to never be reused in the
|
|
|
|
* same context by the underlying implementation.
|
|
|
|
*/
|
2018-01-30 16:34:52 +01:00
|
|
|
cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
|
2017-10-19 18:11:09 +02:00
|
|
|
if (!cq) {
|
|
|
|
ret = ENOMEM;
|
|
|
|
msg = "placeholder CQ creation failure";
|
|
|
|
goto error;
|
|
|
|
}
|
2018-01-30 16:34:52 +01:00
|
|
|
wq = mlx4_glue->create_wq
|
2017-10-19 18:11:09 +02:00
|
|
|
(priv->ctx,
|
|
|
|
&(struct ibv_wq_init_attr){
|
|
|
|
.wq_type = IBV_WQT_RQ,
|
|
|
|
.max_wr = 1,
|
|
|
|
.max_sge = 1,
|
|
|
|
.pd = priv->pd,
|
|
|
|
.cq = cq,
|
|
|
|
});
|
|
|
|
if (wq) {
|
|
|
|
wq_num = wq->wq_num;
|
2018-01-30 16:34:52 +01:00
|
|
|
claim_zero(mlx4_glue->destroy_wq(wq));
|
2017-10-19 18:11:09 +02:00
|
|
|
} else {
|
|
|
|
wq_num = 0; /* Shut up GCC 4.8 warnings. */
|
|
|
|
}
|
2018-01-30 16:34:52 +01:00
|
|
|
claim_zero(mlx4_glue->destroy_cq(cq));
|
2017-10-19 18:11:09 +02:00
|
|
|
if (!wq) {
|
|
|
|
ret = ENOMEM;
|
|
|
|
msg = "placeholder WQ creation failure";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
wq_num_check:
|
|
|
|
/*
|
|
|
|
* While guaranteed by the implementation, make sure WQ
|
|
|
|
* numbers are really sequential (as the saying goes,
|
|
|
|
* trust, but verify).
|
|
|
|
*/
|
|
|
|
if (i && wq_num - wq_num_prev != 1) {
|
|
|
|
if (rxq)
|
|
|
|
mlx4_rxq_detach(rxq);
|
|
|
|
ret = ERANGE;
|
|
|
|
msg = "WQ numbers are not sequential";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
wq_num_prev = wq_num;
|
|
|
|
}
|
2018-04-26 18:26:13 +02:00
|
|
|
priv->rss_init = 1;
|
2017-10-19 18:11:09 +02:00
|
|
|
return 0;
|
|
|
|
error:
|
|
|
|
ERROR("cannot initialize common RSS resources (queue %u): %s: %s",
|
|
|
|
i, msg, strerror(ret));
|
|
|
|
while (i--) {
|
|
|
|
struct rxq *rxq = priv->dev->data->rx_queues[i];
|
|
|
|
|
|
|
|
if (rxq)
|
|
|
|
mlx4_rxq_detach(rxq);
|
|
|
|
}
|
|
|
|
rte_errno = ret;
|
|
|
|
return -ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Release common RSS context resources.
|
|
|
|
*
|
|
|
|
* As the reverse of mlx4_rss_init(), this must be done after removing all
|
|
|
|
* flow rules relying on indirection tables.
|
|
|
|
*
|
|
|
|
* @param priv
|
|
|
|
* Pointer to private structure.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
mlx4_rss_deinit(struct priv *priv)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
2018-04-26 18:26:13 +02:00
|
|
|
if (!priv->rss_init)
|
|
|
|
return;
|
2017-10-19 18:11:09 +02:00
|
|
|
for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
|
|
|
|
struct rxq *rxq = priv->dev->data->rx_queues[i];
|
|
|
|
|
|
|
|
if (rxq) {
|
|
|
|
assert(rxq->usecnt == 1);
|
|
|
|
mlx4_rxq_detach(rxq);
|
|
|
|
}
|
|
|
|
}
|
2018-04-26 18:26:13 +02:00
|
|
|
priv->rss_init = 0;
|
2017-10-19 18:11:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Attach a user to a Rx queue.
|
|
|
|
*
|
|
|
|
* Used when the resources of an Rx queue must be instantiated for it to
|
|
|
|
* become in a usable state.
|
|
|
|
*
|
|
|
|
* This function increments the usage count of the Rx queue.
|
2017-09-01 10:07:00 +02:00
|
|
|
*
|
|
|
|
* @param rxq
|
|
|
|
* Pointer to Rx queue structure.
|
|
|
|
*
|
|
|
|
* @return
|
|
|
|
* 0 on success, negative errno value otherwise and rte_errno is set.
|
|
|
|
*/
|
2017-10-19 18:11:09 +02:00
|
|
|
int
|
|
|
|
mlx4_rxq_attach(struct rxq *rxq)
|
2017-09-01 10:07:00 +02:00
|
|
|
{
|
2017-10-19 18:11:09 +02:00
|
|
|
if (rxq->usecnt++) {
|
|
|
|
assert(rxq->cq);
|
|
|
|
assert(rxq->wq);
|
|
|
|
assert(rxq->wqes);
|
|
|
|
assert(rxq->rq_db);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct priv *priv = rxq->priv;
|
net/mlx4: add new memory region support
This is the new design of Memory Region (MR) for mlx PMD, in order to:
- Accommodate the new memory hotplug model.
- Support non-contiguous Mempool.
There are multiple layers for MR search.
L0 is to look up the last-hit entry which is pointed by mr_ctrl->mru (Most
Recently Used). If L0 misses, L1 is to look up the address in a fixed-sized
array by linear search. L0/L1 is in an inline function -
mlx4_mr_lookup_cache().
If L1 misses, the bottom-half function is called to look up the address
from the bigger local cache of the queue. This is L2 - mlx4_mr_addr2mr_bh()
and it is not an inline function. Data structure for L2 is the Binary Tree.
If L2 misses, the search falls into the slowest path which takes locks in
order to access global device cache (priv->mr.cache) which is also a B-tree
and caches the original MR list (priv->mr.mr_list) of the device. Unless
the global cache is overflowed, it is all-inclusive of the MR list. This is
L3 - mlx4_mr_lookup_dev(). The size of the L3 cache table is limited and
can't be expanded on the fly due to deadlock. Refer to the comments in the
code for the details - mr_lookup_dev(). If L3 is overflowed, the list will
have to be searched directly bypassing the cache although it is slower.
If L3 misses, a new MR for the address should be created -
mlx4_mr_create(). When it creates a new MR, it tries to register adjacent
memsegs as much as possible which are virtually contiguous around the
address. This must take two locks - memory_hotplug_lock and
priv->mr.rwlock. Due to memory_hotplug_lock, there can't be any
allocation/free of memory inside.
In the free callback of the memory hotplug event, freed space is searched
from the MR list and corresponding bits are cleared from the bitmap of MRs.
This can fragment a MR and the MR will have multiple search entries in the
caches. Once there's a change by the event, the global cache must be
rebuilt and all the per-queue caches will be flushed as well. If memory is
frequently freed in run-time, that may cause jitter on dataplane processing
in the worst case by incurring MR cache flush and rebuild. But, it would be
the least probable scenario.
To guarantee the most optimal performance, it is highly recommended to use
an EAL option - '--socket-mem'. Then, the reserved memory will be pinned
and won't be freed dynamically. And it is also recommended to configure
per-lcore cache of Mempool. Even though there're many MRs for a device or
MRs are highly fragmented, the cache of Mempool will be much helpful to
reduce misses on per-queue caches anyway.
'--legacy-mem' is also supported.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
2018-05-09 04:09:06 -07:00
|
|
|
struct rte_eth_dev *dev = priv->dev;
|
2017-10-12 14:29:57 +02:00
|
|
|
const uint32_t elts_n = 1 << rxq->elts_n;
|
|
|
|
const uint32_t sges_n = 1 << rxq->sges_n;
|
|
|
|
struct rte_mbuf *(*elts)[elts_n] = rxq->elts;
|
2017-10-19 18:11:09 +02:00
|
|
|
struct mlx4dv_obj mlxdv;
|
|
|
|
struct mlx4dv_rwq dv_rwq;
|
2017-10-25 18:37:27 +03:00
|
|
|
struct mlx4dv_cq dv_cq = { .comp_mask = MLX4DV_CQ_MASK_UAR, };
|
2017-10-19 18:11:09 +02:00
|
|
|
const char *msg;
|
|
|
|
struct ibv_cq *cq = NULL;
|
|
|
|
struct ibv_wq *wq = NULL;
|
2018-03-25 20:19:29 +00:00
|
|
|
uint32_t create_flags = 0;
|
|
|
|
uint32_t comp_mask = 0;
|
2017-10-19 18:11:09 +02:00
|
|
|
volatile struct mlx4_wqe_data_seg (*wqes)[];
|
2017-09-01 10:07:00 +02:00
|
|
|
unsigned int i;
|
2017-10-19 18:11:09 +02:00
|
|
|
int ret;
|
2017-09-01 10:07:00 +02:00
|
|
|
|
2017-10-12 14:29:57 +02:00
|
|
|
assert(rte_is_power_of_2(elts_n));
|
2018-01-30 16:34:52 +01:00
|
|
|
cq = mlx4_glue->create_cq(priv->ctx, elts_n / sges_n, NULL,
|
|
|
|
rxq->channel, 0);
|
2017-10-19 18:11:09 +02:00
|
|
|
if (!cq) {
|
|
|
|
ret = ENOMEM;
|
|
|
|
msg = "CQ creation failure";
|
|
|
|
goto error;
|
|
|
|
}
|
2018-03-25 20:19:29 +00:00
|
|
|
/* By default, FCS (CRC) is stripped by hardware. */
|
|
|
|
if (rxq->crc_present) {
|
|
|
|
create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
|
|
|
|
comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
|
|
|
|
}
|
2018-01-30 16:34:52 +01:00
|
|
|
wq = mlx4_glue->create_wq
|
2017-10-19 18:11:09 +02:00
|
|
|
(priv->ctx,
|
|
|
|
&(struct ibv_wq_init_attr){
|
|
|
|
.wq_type = IBV_WQT_RQ,
|
|
|
|
.max_wr = elts_n / sges_n,
|
|
|
|
.max_sge = sges_n,
|
|
|
|
.pd = priv->pd,
|
|
|
|
.cq = cq,
|
2018-03-25 20:19:29 +00:00
|
|
|
.comp_mask = comp_mask,
|
|
|
|
.create_flags = create_flags,
|
2017-10-19 18:11:09 +02:00
|
|
|
});
|
|
|
|
if (!wq) {
|
|
|
|
ret = errno ? errno : EINVAL;
|
|
|
|
msg = "WQ creation failure";
|
|
|
|
goto error;
|
|
|
|
}
|
2018-01-30 16:34:52 +01:00
|
|
|
ret = mlx4_glue->modify_wq
|
2017-10-19 18:11:09 +02:00
|
|
|
(wq,
|
|
|
|
&(struct ibv_wq_attr){
|
|
|
|
.attr_mask = IBV_WQ_ATTR_STATE,
|
|
|
|
.wq_state = IBV_WQS_RDY,
|
|
|
|
});
|
|
|
|
if (ret) {
|
|
|
|
msg = "WQ state change to IBV_WQS_RDY failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
/* Retrieve device queue information. */
|
|
|
|
mlxdv.cq.in = cq;
|
|
|
|
mlxdv.cq.out = &dv_cq;
|
|
|
|
mlxdv.rwq.in = wq;
|
|
|
|
mlxdv.rwq.out = &dv_rwq;
|
2018-01-30 16:34:52 +01:00
|
|
|
ret = mlx4_glue->dv_init_obj(&mlxdv, MLX4DV_OBJ_RWQ | MLX4DV_OBJ_CQ);
|
2017-10-19 18:11:09 +02:00
|
|
|
if (ret) {
|
|
|
|
msg = "failed to obtain device information from WQ/CQ objects";
|
|
|
|
goto error;
|
|
|
|
}
|
net/mlx4: add new memory region support
This is the new design of Memory Region (MR) for mlx PMD, in order to:
- Accommodate the new memory hotplug model.
- Support non-contiguous Mempool.
There are multiple layers for MR search.
L0 is to look up the last-hit entry which is pointed by mr_ctrl->mru (Most
Recently Used). If L0 misses, L1 is to look up the address in a fixed-sized
array by linear search. L0/L1 is in an inline function -
mlx4_mr_lookup_cache().
If L1 misses, the bottom-half function is called to look up the address
from the bigger local cache of the queue. This is L2 - mlx4_mr_addr2mr_bh()
and it is not an inline function. Data structure for L2 is the Binary Tree.
If L2 misses, the search falls into the slowest path which takes locks in
order to access global device cache (priv->mr.cache) which is also a B-tree
and caches the original MR list (priv->mr.mr_list) of the device. Unless
the global cache is overflowed, it is all-inclusive of the MR list. This is
L3 - mlx4_mr_lookup_dev(). The size of the L3 cache table is limited and
can't be expanded on the fly due to deadlock. Refer to the comments in the
code for the details - mr_lookup_dev(). If L3 is overflowed, the list will
have to be searched directly bypassing the cache although it is slower.
If L3 misses, a new MR for the address should be created -
mlx4_mr_create(). When it creates a new MR, it tries to register adjacent
memsegs as much as possible which are virtually contiguous around the
address. This must take two locks - memory_hotplug_lock and
priv->mr.rwlock. Due to memory_hotplug_lock, there can't be any
allocation/free of memory inside.
In the free callback of the memory hotplug event, freed space is searched
from the MR list and corresponding bits are cleared from the bitmap of MRs.
This can fragment a MR and the MR will have multiple search entries in the
caches. Once there's a change by the event, the global cache must be
rebuilt and all the per-queue caches will be flushed as well. If memory is
frequently freed in run-time, that may cause jitter on dataplane processing
in the worst case by incurring MR cache flush and rebuild. But, it would be
the least probable scenario.
To guarantee the most optimal performance, it is highly recommended to use
an EAL option - '--socket-mem'. Then, the reserved memory will be pinned
and won't be freed dynamically. And it is also recommended to configure
per-lcore cache of Mempool. Even though there're many MRs for a device or
MRs are highly fragmented, the cache of Mempool will be much helpful to
reduce misses on per-queue caches anyway.
'--legacy-mem' is also supported.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
2018-05-09 04:09:06 -07:00
|
|
|
/* Pre-register Rx mempool. */
|
|
|
|
DEBUG("port %u Rx queue %u registering mp %s having %u chunks",
|
|
|
|
priv->dev->data->port_id, rxq->stats.idx,
|
|
|
|
rxq->mp->name, rxq->mp->nb_mem_chunks);
|
|
|
|
mlx4_mr_update_mp(dev, &rxq->mr_ctrl, rxq->mp);
|
2017-10-19 18:11:09 +02:00
|
|
|
wqes = (volatile struct mlx4_wqe_data_seg (*)[])
|
|
|
|
((uintptr_t)dv_rwq.buf.buf + dv_rwq.rq.offset);
|
2017-10-12 14:19:38 +02:00
|
|
|
for (i = 0; i != RTE_DIM(*elts); ++i) {
|
2017-10-19 18:11:09 +02:00
|
|
|
volatile struct mlx4_wqe_data_seg *scat = &(*wqes)[i];
|
2017-09-01 10:07:00 +02:00
|
|
|
struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
|
|
|
|
|
|
|
|
if (buf == NULL) {
|
2017-10-12 14:19:38 +02:00
|
|
|
while (i--) {
|
2017-10-12 14:29:57 +02:00
|
|
|
rte_pktmbuf_free_seg((*elts)[i]);
|
|
|
|
(*elts)[i] = NULL;
|
2017-10-12 14:19:38 +02:00
|
|
|
}
|
2017-10-19 18:11:09 +02:00
|
|
|
ret = ENOMEM;
|
|
|
|
msg = "cannot allocate mbuf";
|
|
|
|
goto error;
|
2017-09-01 10:07:00 +02:00
|
|
|
}
|
|
|
|
/* Headroom is reserved by rte_pktmbuf_alloc(). */
|
|
|
|
assert(buf->data_off == RTE_PKTMBUF_HEADROOM);
|
|
|
|
/* Buffer is supposed to be empty. */
|
|
|
|
assert(rte_pktmbuf_data_len(buf) == 0);
|
|
|
|
assert(rte_pktmbuf_pkt_len(buf) == 0);
|
2017-10-12 14:29:57 +02:00
|
|
|
/* Only the first segment keeps headroom. */
|
|
|
|
if (i % sges_n)
|
|
|
|
buf->data_off = 0;
|
|
|
|
buf->port = rxq->port_id;
|
|
|
|
buf->data_len = rte_pktmbuf_tailroom(buf);
|
|
|
|
buf->pkt_len = rte_pktmbuf_tailroom(buf);
|
|
|
|
buf->nb_segs = 1;
|
|
|
|
*scat = (struct mlx4_wqe_data_seg){
|
|
|
|
.addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
|
|
|
|
uintptr_t)),
|
|
|
|
.byte_count = rte_cpu_to_be_32(buf->data_len),
|
net/mlx4: add new memory region support
This is the new design of Memory Region (MR) for mlx PMD, in order to:
- Accommodate the new memory hotplug model.
- Support non-contiguous Mempool.
There are multiple layers for MR search.
L0 is to look up the last-hit entry which is pointed by mr_ctrl->mru (Most
Recently Used). If L0 misses, L1 is to look up the address in a fixed-sized
array by linear search. L0/L1 is in an inline function -
mlx4_mr_lookup_cache().
If L1 misses, the bottom-half function is called to look up the address
from the bigger local cache of the queue. This is L2 - mlx4_mr_addr2mr_bh()
and it is not an inline function. Data structure for L2 is the Binary Tree.
If L2 misses, the search falls into the slowest path which takes locks in
order to access global device cache (priv->mr.cache) which is also a B-tree
and caches the original MR list (priv->mr.mr_list) of the device. Unless
the global cache is overflowed, it is all-inclusive of the MR list. This is
L3 - mlx4_mr_lookup_dev(). The size of the L3 cache table is limited and
can't be expanded on the fly due to deadlock. Refer to the comments in the
code for the details - mr_lookup_dev(). If L3 is overflowed, the list will
have to be searched directly bypassing the cache although it is slower.
If L3 misses, a new MR for the address should be created -
mlx4_mr_create(). When it creates a new MR, it tries to register adjacent
memsegs as much as possible which are virtually contiguous around the
address. This must take two locks - memory_hotplug_lock and
priv->mr.rwlock. Due to memory_hotplug_lock, there can't be any
allocation/free of memory inside.
In the free callback of the memory hotplug event, freed space is searched
from the MR list and corresponding bits are cleared from the bitmap of MRs.
This can fragment a MR and the MR will have multiple search entries in the
caches. Once there's a change by the event, the global cache must be
rebuilt and all the per-queue caches will be flushed as well. If memory is
frequently freed in run-time, that may cause jitter on dataplane processing
in the worst case by incurring MR cache flush and rebuild. But, it would be
the least probable scenario.
To guarantee the most optimal performance, it is highly recommended to use
an EAL option - '--socket-mem'. Then, the reserved memory will be pinned
and won't be freed dynamically. And it is also recommended to configure
per-lcore cache of Mempool. Even though there're many MRs for a device or
MRs are highly fragmented, the cache of Mempool will be much helpful to
reduce misses on per-queue caches anyway.
'--legacy-mem' is also supported.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
2018-05-09 04:09:06 -07:00
|
|
|
.lkey = mlx4_rx_mb2mr(rxq, buf),
|
2017-10-12 14:29:57 +02:00
|
|
|
};
|
|
|
|
(*elts)[i] = buf;
|
2017-09-01 10:07:00 +02:00
|
|
|
}
|
2017-10-12 14:29:57 +02:00
|
|
|
DEBUG("%p: allocated and configured %u segments (max %u packets)",
|
|
|
|
(void *)rxq, elts_n, elts_n / sges_n);
|
2017-10-19 18:11:09 +02:00
|
|
|
rxq->cq = cq;
|
|
|
|
rxq->wq = wq;
|
|
|
|
rxq->wqes = wqes;
|
|
|
|
rxq->rq_db = dv_rwq.rdb;
|
|
|
|
rxq->mcq.buf = dv_cq.buf.buf;
|
|
|
|
rxq->mcq.cqe_cnt = dv_cq.cqe_cnt;
|
|
|
|
rxq->mcq.set_ci_db = dv_cq.set_ci_db;
|
|
|
|
rxq->mcq.cqe_64 = (dv_cq.cqe_size & 64) ? 1 : 0;
|
2017-10-25 18:37:27 +03:00
|
|
|
rxq->mcq.arm_db = dv_cq.arm_db;
|
|
|
|
rxq->mcq.arm_sn = dv_cq.arm_sn;
|
|
|
|
rxq->mcq.cqn = dv_cq.cqn;
|
|
|
|
rxq->mcq.cq_uar = dv_cq.cq_uar;
|
|
|
|
rxq->mcq.cq_db_reg = (uint8_t *)dv_cq.cq_uar + MLX4_CQ_DOORBELL;
|
2017-10-19 18:11:09 +02:00
|
|
|
/* Update doorbell counter. */
|
|
|
|
rxq->rq_ci = elts_n / sges_n;
|
|
|
|
rte_wmb();
|
|
|
|
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
|
2017-09-01 10:07:00 +02:00
|
|
|
return 0;
|
2017-10-19 18:11:09 +02:00
|
|
|
error:
|
|
|
|
if (wq)
|
2018-01-30 16:34:52 +01:00
|
|
|
claim_zero(mlx4_glue->destroy_wq(wq));
|
2017-10-19 18:11:09 +02:00
|
|
|
if (cq)
|
2018-01-30 16:34:52 +01:00
|
|
|
claim_zero(mlx4_glue->destroy_cq(cq));
|
2018-04-26 18:26:13 +02:00
|
|
|
--rxq->usecnt;
|
2017-10-19 18:11:09 +02:00
|
|
|
rte_errno = ret;
|
|
|
|
ERROR("error while attaching Rx queue %p: %s: %s",
|
|
|
|
(void *)rxq, msg, strerror(ret));
|
|
|
|
return -ret;
|
2017-09-01 10:07:00 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2017-10-19 18:11:09 +02:00
|
|
|
* Detach a user from a Rx queue.
|
|
|
|
*
|
|
|
|
* This function decrements the usage count of the Rx queue and destroys
|
|
|
|
* usage resources after reaching 0.
|
2017-09-01 10:07:00 +02:00
|
|
|
*
|
|
|
|
* @param rxq
|
|
|
|
* Pointer to Rx queue structure.
|
|
|
|
*/
|
2017-10-19 18:11:09 +02:00
|
|
|
void
|
|
|
|
mlx4_rxq_detach(struct rxq *rxq)
|
2017-09-01 10:07:00 +02:00
|
|
|
{
|
|
|
|
unsigned int i;
|
2017-10-12 14:29:57 +02:00
|
|
|
struct rte_mbuf *(*elts)[1 << rxq->elts_n] = rxq->elts;
|
2017-09-01 10:07:00 +02:00
|
|
|
|
2017-10-19 18:11:09 +02:00
|
|
|
if (--rxq->usecnt)
|
|
|
|
return;
|
|
|
|
rxq->rq_ci = 0;
|
|
|
|
memset(&rxq->mcq, 0, sizeof(rxq->mcq));
|
|
|
|
rxq->rq_db = NULL;
|
|
|
|
rxq->wqes = NULL;
|
2018-01-30 16:34:52 +01:00
|
|
|
claim_zero(mlx4_glue->destroy_wq(rxq->wq));
|
2017-10-19 18:11:09 +02:00
|
|
|
rxq->wq = NULL;
|
2018-01-30 16:34:52 +01:00
|
|
|
claim_zero(mlx4_glue->destroy_cq(rxq->cq));
|
2017-10-19 18:11:09 +02:00
|
|
|
rxq->cq = NULL;
|
2017-10-12 14:29:57 +02:00
|
|
|
DEBUG("%p: freeing Rx queue elements", (void *)rxq);
|
2017-10-12 14:19:38 +02:00
|
|
|
for (i = 0; (i != RTE_DIM(*elts)); ++i) {
|
2017-10-12 14:29:57 +02:00
|
|
|
if (!(*elts)[i])
|
2017-10-12 14:19:38 +02:00
|
|
|
continue;
|
2017-10-12 14:29:57 +02:00
|
|
|
rte_pktmbuf_free_seg((*elts)[i]);
|
|
|
|
(*elts)[i] = NULL;
|
2017-10-12 14:19:38 +02:00
|
|
|
}
|
2017-09-01 10:07:00 +02:00
|
|
|
}
|
|
|
|
|
2018-01-10 11:17:03 +02:00
|
|
|
/**
|
|
|
|
* Returns the per-queue supported offloads.
|
|
|
|
*
|
|
|
|
* @param priv
|
|
|
|
* Pointer to private structure.
|
|
|
|
*
|
|
|
|
* @return
|
|
|
|
* Supported Tx offloads.
|
|
|
|
*/
|
|
|
|
uint64_t
|
|
|
|
mlx4_get_rx_queue_offloads(struct priv *priv)
|
|
|
|
{
|
2018-05-08 12:26:03 +00:00
|
|
|
uint64_t offloads = DEV_RX_OFFLOAD_SCATTER |
|
2018-06-29 13:41:13 +01:00
|
|
|
DEV_RX_OFFLOAD_CRC_STRIP |
|
|
|
|
DEV_RX_OFFLOAD_KEEP_CRC;
|
2018-01-10 11:17:03 +02:00
|
|
|
|
|
|
|
if (priv->hw_csum)
|
|
|
|
offloads |= DEV_RX_OFFLOAD_CHECKSUM;
|
|
|
|
return offloads;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the per-port supported offloads.
|
|
|
|
*
|
|
|
|
* @param priv
|
|
|
|
* Pointer to private structure.
|
|
|
|
*
|
|
|
|
* @return
|
|
|
|
* Supported Rx offloads.
|
|
|
|
*/
|
|
|
|
uint64_t
|
|
|
|
mlx4_get_rx_port_offloads(struct priv *priv)
|
|
|
|
{
|
|
|
|
uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER;
|
|
|
|
|
|
|
|
(void)priv;
|
|
|
|
return offloads;
|
|
|
|
}
|
|
|
|
|
2017-09-01 10:07:00 +02:00
|
|
|
/**
|
2017-10-12 14:19:37 +02:00
|
|
|
* DPDK callback to configure a Rx queue.
|
2017-09-01 10:07:00 +02:00
|
|
|
*
|
|
|
|
* @param dev
|
|
|
|
* Pointer to Ethernet device structure.
|
2017-10-12 14:19:37 +02:00
|
|
|
* @param idx
|
|
|
|
* Rx queue index.
|
2017-09-01 10:07:00 +02:00
|
|
|
* @param desc
|
|
|
|
* Number of descriptors to configure in queue.
|
|
|
|
* @param socket
|
|
|
|
* NUMA socket on which memory must be allocated.
|
|
|
|
* @param[in] conf
|
|
|
|
* Thresholds parameters.
|
|
|
|
* @param mp
|
|
|
|
* Memory pool for buffer allocations.
|
|
|
|
*
|
|
|
|
* @return
|
|
|
|
* 0 on success, negative errno value otherwise and rte_errno is set.
|
|
|
|
*/
|
2017-10-12 14:19:37 +02:00
|
|
|
int
|
|
|
|
mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
|
|
|
|
unsigned int socket, const struct rte_eth_rxconf *conf,
|
|
|
|
struct rte_mempool *mp)
|
2017-09-01 10:07:00 +02:00
|
|
|
{
|
|
|
|
struct priv *priv = dev->data->dev_private;
|
2017-10-12 14:19:37 +02:00
|
|
|
uint32_t mb_len = rte_pktmbuf_data_room_size(mp);
|
2017-10-12 14:29:57 +02:00
|
|
|
struct rte_mbuf *(*elts)[rte_align32pow2(desc)];
|
2017-10-12 14:19:37 +02:00
|
|
|
struct rxq *rxq;
|
2017-10-12 14:19:38 +02:00
|
|
|
struct mlx4_malloc_vec vec[] = {
|
|
|
|
{
|
|
|
|
.align = RTE_CACHE_LINE_SIZE,
|
|
|
|
.size = sizeof(*rxq),
|
|
|
|
.addr = (void **)&rxq,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.align = RTE_CACHE_LINE_SIZE,
|
|
|
|
.size = sizeof(*elts),
|
|
|
|
.addr = (void **)&elts,
|
|
|
|
},
|
|
|
|
};
|
2017-09-01 10:07:00 +02:00
|
|
|
int ret;
|
2018-03-25 20:19:29 +00:00
|
|
|
uint32_t crc_present;
|
2018-05-10 19:56:55 +08:00
|
|
|
uint64_t offloads;
|
|
|
|
|
|
|
|
offloads = conf->offloads | dev->data->dev_conf.rxmode.offloads;
|
2017-09-01 10:07:00 +02:00
|
|
|
|
2017-10-12 14:19:37 +02:00
|
|
|
DEBUG("%p: configuring queue %u for %u descriptors",
|
|
|
|
(void *)dev, idx, desc);
|
2018-05-10 19:56:55 +08:00
|
|
|
|
2017-10-12 14:19:37 +02:00
|
|
|
if (idx >= dev->data->nb_rx_queues) {
|
|
|
|
rte_errno = EOVERFLOW;
|
|
|
|
ERROR("%p: queue index out of range (%u >= %u)",
|
|
|
|
(void *)dev, idx, dev->data->nb_rx_queues);
|
|
|
|
return -rte_errno;
|
|
|
|
}
|
|
|
|
rxq = dev->data->rx_queues[idx];
|
|
|
|
if (rxq) {
|
|
|
|
rte_errno = EEXIST;
|
|
|
|
ERROR("%p: Rx queue %u already configured, release it first",
|
|
|
|
(void *)dev, idx);
|
|
|
|
return -rte_errno;
|
|
|
|
}
|
|
|
|
if (!desc) {
|
2017-09-01 10:07:00 +02:00
|
|
|
rte_errno = EINVAL;
|
|
|
|
ERROR("%p: invalid number of Rx descriptors", (void *)dev);
|
2017-10-12 14:19:37 +02:00
|
|
|
return -rte_errno;
|
|
|
|
}
|
2017-10-12 14:29:57 +02:00
|
|
|
if (desc != RTE_DIM(*elts)) {
|
|
|
|
desc = RTE_DIM(*elts);
|
|
|
|
WARN("%p: increased number of descriptors in Rx queue %u"
|
|
|
|
" to the next power of two (%u)",
|
|
|
|
(void *)dev, idx, desc);
|
|
|
|
}
|
2018-03-25 20:19:29 +00:00
|
|
|
/* By default, FCS (CRC) is stripped by hardware. */
|
2018-06-29 13:41:13 +01:00
|
|
|
crc_present = 0;
|
|
|
|
if (rte_eth_dev_must_keep_crc(offloads)) {
|
|
|
|
if (priv->hw_fcs_strip) {
|
|
|
|
crc_present = 1;
|
|
|
|
} else {
|
|
|
|
WARN("%p: CRC stripping has been disabled but will still"
|
|
|
|
" be performed by hardware, make sure MLNX_OFED and"
|
|
|
|
" firmware are up to date",
|
|
|
|
(void *)dev);
|
|
|
|
}
|
2018-03-25 20:19:29 +00:00
|
|
|
}
|
|
|
|
DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
|
|
|
|
" incoming frames to hide it",
|
|
|
|
(void *)dev,
|
|
|
|
crc_present ? "disabled" : "enabled",
|
|
|
|
crc_present << 2);
|
2017-10-12 14:19:37 +02:00
|
|
|
/* Allocate and initialize Rx queue. */
|
2017-10-12 14:19:38 +02:00
|
|
|
mlx4_zmallocv_socket("RXQ", vec, RTE_DIM(vec), socket);
|
2017-10-12 14:19:37 +02:00
|
|
|
if (!rxq) {
|
|
|
|
ERROR("%p: unable to allocate queue index %u",
|
|
|
|
(void *)dev, idx);
|
|
|
|
return -rte_errno;
|
2017-09-01 10:07:00 +02:00
|
|
|
}
|
2017-10-12 14:19:37 +02:00
|
|
|
*rxq = (struct rxq){
|
|
|
|
.priv = priv,
|
|
|
|
.mp = mp,
|
|
|
|
.port_id = dev->data->port_id,
|
2017-10-12 14:29:57 +02:00
|
|
|
.sges_n = 0,
|
|
|
|
.elts_n = rte_log2_u32(desc),
|
2017-10-12 14:19:38 +02:00
|
|
|
.elts = elts,
|
2017-10-12 14:29:59 +02:00
|
|
|
/* Toggle Rx checksum offload if hardware supports it. */
|
2018-01-10 11:17:03 +02:00
|
|
|
.csum = priv->hw_csum &&
|
2018-05-10 19:56:55 +08:00
|
|
|
(offloads & DEV_RX_OFFLOAD_CHECKSUM),
|
2018-01-10 11:17:03 +02:00
|
|
|
.csum_l2tun = priv->hw_csum_l2tun &&
|
2018-05-10 19:56:55 +08:00
|
|
|
(offloads & DEV_RX_OFFLOAD_CHECKSUM),
|
2018-03-25 20:19:29 +00:00
|
|
|
.crc_present = crc_present,
|
2017-11-09 10:59:33 +02:00
|
|
|
.l2tun_offload = priv->hw_csum_l2tun,
|
2017-10-13 11:31:05 +02:00
|
|
|
.stats = {
|
|
|
|
.idx = idx,
|
|
|
|
},
|
2017-10-12 14:19:37 +02:00
|
|
|
.socket = socket,
|
|
|
|
};
|
2017-09-01 10:07:00 +02:00
|
|
|
/* Enable scattered packets support for this queue if necessary. */
|
|
|
|
assert(mb_len >= RTE_PKTMBUF_HEADROOM);
|
|
|
|
if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
|
|
|
|
(mb_len - RTE_PKTMBUF_HEADROOM)) {
|
|
|
|
;
|
2018-05-10 19:56:55 +08:00
|
|
|
} else if (offloads & DEV_RX_OFFLOAD_SCATTER) {
|
2017-10-12 14:29:57 +02:00
|
|
|
uint32_t size =
|
|
|
|
RTE_PKTMBUF_HEADROOM +
|
|
|
|
dev->data->dev_conf.rxmode.max_rx_pkt_len;
|
|
|
|
uint32_t sges_n;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Determine the number of SGEs needed for a full packet
|
|
|
|
* and round it to the next power of two.
|
|
|
|
*/
|
|
|
|
sges_n = rte_log2_u32((size / mb_len) + !!(size % mb_len));
|
|
|
|
rxq->sges_n = sges_n;
|
|
|
|
/* Make sure sges_n did not overflow. */
|
|
|
|
size = mb_len * (1 << rxq->sges_n);
|
|
|
|
size -= RTE_PKTMBUF_HEADROOM;
|
|
|
|
if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
|
|
|
|
rte_errno = EOVERFLOW;
|
|
|
|
ERROR("%p: too many SGEs (%u) needed to handle"
|
|
|
|
" requested maximum packet size %u",
|
|
|
|
(void *)dev,
|
|
|
|
1 << sges_n,
|
|
|
|
dev->data->dev_conf.rxmode.max_rx_pkt_len);
|
|
|
|
goto error;
|
|
|
|
}
|
2017-09-01 10:07:00 +02:00
|
|
|
} else {
|
|
|
|
WARN("%p: the requested maximum Rx packet size (%u) is"
|
|
|
|
" larger than a single mbuf (%u) and scattered"
|
|
|
|
" mode has not been requested",
|
|
|
|
(void *)dev,
|
|
|
|
dev->data->dev_conf.rxmode.max_rx_pkt_len,
|
|
|
|
mb_len - RTE_PKTMBUF_HEADROOM);
|
|
|
|
}
|
2017-10-12 14:29:57 +02:00
|
|
|
DEBUG("%p: maximum number of segments per packet: %u",
|
|
|
|
(void *)dev, 1 << rxq->sges_n);
|
|
|
|
if (desc % (1 << rxq->sges_n)) {
|
|
|
|
rte_errno = EINVAL;
|
|
|
|
ERROR("%p: number of Rx queue descriptors (%u) is not a"
|
|
|
|
" multiple of maximum segments per packet (%u)",
|
|
|
|
(void *)dev,
|
|
|
|
desc,
|
|
|
|
1 << rxq->sges_n);
|
|
|
|
goto error;
|
|
|
|
}
|
net/mlx4: add new memory region support
This is the new design of Memory Region (MR) for mlx PMD, in order to:
- Accommodate the new memory hotplug model.
- Support non-contiguous Mempool.
There are multiple layers for MR search.
L0 is to look up the last-hit entry which is pointed by mr_ctrl->mru (Most
Recently Used). If L0 misses, L1 is to look up the address in a fixed-sized
array by linear search. L0/L1 is in an inline function -
mlx4_mr_lookup_cache().
If L1 misses, the bottom-half function is called to look up the address
from the bigger local cache of the queue. This is L2 - mlx4_mr_addr2mr_bh()
and it is not an inline function. Data structure for L2 is the Binary Tree.
If L2 misses, the search falls into the slowest path which takes locks in
order to access global device cache (priv->mr.cache) which is also a B-tree
and caches the original MR list (priv->mr.mr_list) of the device. Unless
the global cache is overflowed, it is all-inclusive of the MR list. This is
L3 - mlx4_mr_lookup_dev(). The size of the L3 cache table is limited and
can't be expanded on the fly due to deadlock. Refer to the comments in the
code for the details - mr_lookup_dev(). If L3 is overflowed, the list will
have to be searched directly bypassing the cache although it is slower.
If L3 misses, a new MR for the address should be created -
mlx4_mr_create(). When it creates a new MR, it tries to register adjacent
memsegs as much as possible which are virtually contiguous around the
address. This must take two locks - memory_hotplug_lock and
priv->mr.rwlock. Due to memory_hotplug_lock, there can't be any
allocation/free of memory inside.
In the free callback of the memory hotplug event, freed space is searched
from the MR list and corresponding bits are cleared from the bitmap of MRs.
This can fragment a MR and the MR will have multiple search entries in the
caches. Once there's a change by the event, the global cache must be
rebuilt and all the per-queue caches will be flushed as well. If memory is
frequently freed in run-time, that may cause jitter on dataplane processing
in the worst case by incurring MR cache flush and rebuild. But, it would be
the least probable scenario.
To guarantee the most optimal performance, it is highly recommended to use
an EAL option - '--socket-mem'. Then, the reserved memory will be pinned
and won't be freed dynamically. And it is also recommended to configure
per-lcore cache of Mempool. Even though there're many MRs for a device or
MRs are highly fragmented, the cache of Mempool will be much helpful to
reduce misses on per-queue caches anyway.
'--legacy-mem' is also supported.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
2018-05-09 04:09:06 -07:00
|
|
|
if (mlx4_mr_btree_init(&rxq->mr_ctrl.cache_bh,
|
|
|
|
MLX4_MR_BTREE_CACHE_N, socket)) {
|
|
|
|
/* rte_errno is already set. */
|
|
|
|
goto error;
|
|
|
|
}
|
2017-09-01 10:07:00 +02:00
|
|
|
if (dev->data->dev_conf.intr_conf.rxq) {
|
2018-01-30 16:34:52 +01:00
|
|
|
rxq->channel = mlx4_glue->create_comp_channel(priv->ctx);
|
2017-10-12 14:19:37 +02:00
|
|
|
if (rxq->channel == NULL) {
|
2017-09-01 10:07:00 +02:00
|
|
|
rte_errno = ENOMEM;
|
|
|
|
ERROR("%p: Rx interrupt completion channel creation"
|
|
|
|
" failure: %s",
|
|
|
|
(void *)dev, strerror(rte_errno));
|
|
|
|
goto error;
|
|
|
|
}
|
2017-10-12 14:19:37 +02:00
|
|
|
if (mlx4_fd_set_non_blocking(rxq->channel->fd) < 0) {
|
2017-09-01 10:07:00 +02:00
|
|
|
ERROR("%p: unable to make Rx interrupt completion"
|
|
|
|
" channel non-blocking: %s",
|
|
|
|
(void *)dev, strerror(rte_errno));
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
}
|
2017-10-12 14:19:37 +02:00
|
|
|
DEBUG("%p: adding Rx queue %p to list", (void *)dev, (void *)rxq);
|
|
|
|
dev->data->rx_queues[idx] = rxq;
|
2017-10-19 18:11:05 +02:00
|
|
|
return 0;
|
2017-09-01 10:07:00 +02:00
|
|
|
error:
|
2017-10-12 14:19:37 +02:00
|
|
|
dev->data->rx_queues[idx] = NULL;
|
2017-09-01 10:07:00 +02:00
|
|
|
ret = rte_errno;
|
2017-10-12 14:19:37 +02:00
|
|
|
mlx4_rx_queue_release(rxq);
|
2017-09-01 10:07:00 +02:00
|
|
|
rte_errno = ret;
|
|
|
|
assert(rte_errno > 0);
|
|
|
|
return -rte_errno;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* DPDK callback to release a Rx queue.
|
|
|
|
*
|
|
|
|
* @param dpdk_rxq
|
|
|
|
* Generic Rx queue pointer.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
mlx4_rx_queue_release(void *dpdk_rxq)
|
|
|
|
{
|
|
|
|
struct rxq *rxq = (struct rxq *)dpdk_rxq;
|
|
|
|
struct priv *priv;
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
if (rxq == NULL)
|
|
|
|
return;
|
|
|
|
priv = rxq->priv;
|
2017-09-01 10:07:06 +02:00
|
|
|
for (i = 0; i != priv->dev->data->nb_rx_queues; ++i)
|
|
|
|
if (priv->dev->data->rx_queues[i] == rxq) {
|
2017-09-01 10:07:00 +02:00
|
|
|
DEBUG("%p: removing Rx queue %p from list",
|
|
|
|
(void *)priv->dev, (void *)rxq);
|
2017-09-01 10:07:06 +02:00
|
|
|
priv->dev->data->rx_queues[i] = NULL;
|
2017-09-01 10:07:00 +02:00
|
|
|
break;
|
|
|
|
}
|
2017-10-19 18:11:09 +02:00
|
|
|
assert(!rxq->cq);
|
|
|
|
assert(!rxq->wq);
|
|
|
|
assert(!rxq->wqes);
|
|
|
|
assert(!rxq->rq_db);
|
2017-10-12 14:19:37 +02:00
|
|
|
if (rxq->channel)
|
2018-01-30 16:34:52 +01:00
|
|
|
claim_zero(mlx4_glue->destroy_comp_channel(rxq->channel));
|
net/mlx4: add new memory region support
This is the new design of Memory Region (MR) for mlx PMD, in order to:
- Accommodate the new memory hotplug model.
- Support non-contiguous Mempool.
There are multiple layers for MR search.
L0 is to look up the last-hit entry which is pointed by mr_ctrl->mru (Most
Recently Used). If L0 misses, L1 is to look up the address in a fixed-sized
array by linear search. L0/L1 is in an inline function -
mlx4_mr_lookup_cache().
If L1 misses, the bottom-half function is called to look up the address
from the bigger local cache of the queue. This is L2 - mlx4_mr_addr2mr_bh()
and it is not an inline function. Data structure for L2 is the Binary Tree.
If L2 misses, the search falls into the slowest path which takes locks in
order to access global device cache (priv->mr.cache) which is also a B-tree
and caches the original MR list (priv->mr.mr_list) of the device. Unless
the global cache is overflowed, it is all-inclusive of the MR list. This is
L3 - mlx4_mr_lookup_dev(). The size of the L3 cache table is limited and
can't be expanded on the fly due to deadlock. Refer to the comments in the
code for the details - mr_lookup_dev(). If L3 is overflowed, the list will
have to be searched directly bypassing the cache although it is slower.
If L3 misses, a new MR for the address should be created -
mlx4_mr_create(). When it creates a new MR, it tries to register adjacent
memsegs as much as possible which are virtually contiguous around the
address. This must take two locks - memory_hotplug_lock and
priv->mr.rwlock. Due to memory_hotplug_lock, there can't be any
allocation/free of memory inside.
In the free callback of the memory hotplug event, freed space is searched
from the MR list and corresponding bits are cleared from the bitmap of MRs.
This can fragment a MR and the MR will have multiple search entries in the
caches. Once there's a change by the event, the global cache must be
rebuilt and all the per-queue caches will be flushed as well. If memory is
frequently freed in run-time, that may cause jitter on dataplane processing
in the worst case by incurring MR cache flush and rebuild. But, it would be
the least probable scenario.
To guarantee the most optimal performance, it is highly recommended to use
an EAL option - '--socket-mem'. Then, the reserved memory will be pinned
and won't be freed dynamically. And it is also recommended to configure
per-lcore cache of Mempool. Even though there're many MRs for a device or
MRs are highly fragmented, the cache of Mempool will be much helpful to
reduce misses on per-queue caches anyway.
'--legacy-mem' is also supported.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
2018-05-09 04:09:06 -07:00
|
|
|
mlx4_mr_btree_free(&rxq->mr_ctrl.cache_bh);
|
2017-09-01 10:07:00 +02:00
|
|
|
rte_free(rxq);
|
|
|
|
}
|