From 078b8b452e6baa735139baa2b89354a991a38948 Mon Sep 17 00:00:00 2001 From: Adrien Mazarguil Date: Thu, 12 Oct 2017 14:19:41 +0200 Subject: [PATCH] net/mlx4: add RSS flow rule action support This patch dissociates single-queue indirection tables and hash QP objects from Rx queue structures to relinquish their control to users through the RSS flow rule action, while simultaneously allowing multiple queues to be associated with RSS contexts. Flow rules share identical RSS contexts (hashed fields, hash key, target queues) to save on memory and other resources. The trade-off is some added complexity due to reference counters management on RSS contexts. The QUEUE action is re-implemented on top of an automatically-generated single-queue RSS context. The following hardware limitations apply to RSS contexts: - The number of queues in a group must be a power of two. - Queue indices must be consecutive, for instance the [0 1 2 3] set is allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not. - The first queue of a group must be aligned to a multiple of the context size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not supported. - RSS hash key, while configurable per context, must be exactly 40 bytes long. - The only supported hash algorithm is Toeplitz. Signed-off-by: Adrien Mazarguil Acked-by: Nelio Laranjeiro --- doc/guides/nics/features/mlx4.ini | 1 + drivers/net/mlx4/Makefile | 2 +- drivers/net/mlx4/mlx4.c | 13 ++ drivers/net/mlx4/mlx4.h | 2 + drivers/net/mlx4/mlx4_ethdev.c | 1 + drivers/net/mlx4/mlx4_flow.c | 181 ++++++++++++++++-- drivers/net/mlx4/mlx4_flow.h | 3 +- drivers/net/mlx4/mlx4_rxq.c | 303 +++++++++++++++++++++++------- drivers/net/mlx4/mlx4_rxtx.h | 24 ++- mk/rte.app.mk | 2 +- 10 files changed, 445 insertions(+), 87 deletions(-) diff --git a/doc/guides/nics/features/mlx4.ini b/doc/guides/nics/features/mlx4.ini index 6f8c82ac90..9750ebfd7a 100644 --- a/doc/guides/nics/features/mlx4.ini +++ b/doc/guides/nics/features/mlx4.ini @@ -16,6 +16,7 @@ Promiscuous mode = Y Allmulticast mode = Y Unicast MAC filter = Y Multicast MAC filter = Y +RSS hash = Y SR-IOV = Y VLAN filter = Y Basic stats = Y diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index 0515cd7efa..3b3a02047a 100644 --- a/drivers/net/mlx4/Makefile +++ b/drivers/net/mlx4/Makefile @@ -54,7 +54,7 @@ CFLAGS += -D_BSD_SOURCE CFLAGS += -D_DEFAULT_SOURCE CFLAGS += -D_XOPEN_SOURCE=600 CFLAGS += $(WERROR_FLAGS) -LDLIBS += -libverbs +LDLIBS += -libverbs -lmlx4 # A few warnings cannot be avoided in external headers. CFLAGS += -Wno-error=cast-qual diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index 52f8d5126f..0db9a19775 100644 --- a/drivers/net/mlx4/mlx4.c +++ b/drivers/net/mlx4/mlx4.c @@ -50,6 +50,7 @@ #pragma GCC diagnostic ignored "-Wpedantic" #endif #include +#include #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif @@ -99,8 +100,20 @@ mlx4_dev_configure(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; struct rte_flow_error error; + uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues); int ret; + /* Prepare range for RSS contexts before creating the first WQ. */ + ret = mlx4dv_set_context_attr(priv->ctx, + MLX4DV_SET_CTX_ATTR_LOG_WQS_RANGE_SZ, + &log2_range); + if (ret) { + ERROR("cannot set up range size for RSS context to %u" + " (for %u Rx queues), error: %s", + 1 << log2_range, dev->data->nb_rx_queues, strerror(ret)); + rte_errno = ret; + return -ret; + } /* Prepare internal flow rules. */ ret = mlx4_flow_sync(priv, &error); if (ret) { diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index b04a104600..f4da8c64e5 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -95,6 +95,7 @@ enum { #define MLX4_DRIVER_NAME "net_mlx4" struct mlx4_drop; +struct mlx4_rss; struct rxq; struct txq; struct rte_flow; @@ -114,6 +115,7 @@ struct priv { uint32_t isolated:1; /**< Toggle isolated mode. */ struct rte_intr_handle intr_handle; /**< Port interrupt handle. */ struct mlx4_drop *drop; /**< Shared resources for drop flow rules. */ + LIST_HEAD(, mlx4_rss) rss; /**< Shared targets for Rx flow rules. */ LIST_HEAD(, rte_flow) flows; /**< Configured flow rule handles. */ struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES]; /**< Configured MAC addresses. Unused entries are zeroed. */ diff --git a/drivers/net/mlx4/mlx4_ethdev.c b/drivers/net/mlx4/mlx4_ethdev.c index 661e25212a..3623909670 100644 --- a/drivers/net/mlx4/mlx4_ethdev.c +++ b/drivers/net/mlx4/mlx4_ethdev.c @@ -769,6 +769,7 @@ mlx4_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) info->tx_offload_capa = 0; if (mlx4_get_ifname(priv, &ifname) == 0) info->if_index = if_nametoindex(ifname); + info->hash_key_size = MLX4_RSS_HASH_KEY_SIZE; info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G | diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c index 41423cd8c4..2b60d761af 100644 --- a/drivers/net/mlx4/mlx4_flow.c +++ b/drivers/net/mlx4/mlx4_flow.c @@ -102,6 +102,62 @@ struct mlx4_drop { uint32_t refcnt; /**< Reference count. */ }; +/** + * Convert DPDK RSS hash fields to their Verbs equivalent. + * + * @param rss_hf + * Hash fields in DPDK format (see struct rte_eth_rss_conf). + * + * @return + * A valid Verbs RSS hash fields mask for mlx4 on success, (uint64_t)-1 + * otherwise and rte_errno is set. + */ +static uint64_t +mlx4_conv_rss_hf(uint64_t rss_hf) +{ + enum { IPV4, IPV6, TCP, UDP, }; + const uint64_t in[] = { + [IPV4] = (ETH_RSS_IPV4 | + ETH_RSS_FRAG_IPV4 | + ETH_RSS_NONFRAG_IPV4_TCP | + ETH_RSS_NONFRAG_IPV4_UDP | + ETH_RSS_NONFRAG_IPV4_OTHER), + [IPV6] = (ETH_RSS_IPV6 | + ETH_RSS_FRAG_IPV6 | + ETH_RSS_NONFRAG_IPV6_TCP | + ETH_RSS_NONFRAG_IPV6_UDP | + ETH_RSS_NONFRAG_IPV6_OTHER | + ETH_RSS_IPV6_EX | + ETH_RSS_IPV6_TCP_EX | + ETH_RSS_IPV6_UDP_EX), + [TCP] = (ETH_RSS_NONFRAG_IPV4_TCP | + ETH_RSS_NONFRAG_IPV6_TCP | + ETH_RSS_IPV6_TCP_EX), + [UDP] = (ETH_RSS_NONFRAG_IPV4_UDP | + ETH_RSS_NONFRAG_IPV6_UDP | + ETH_RSS_IPV6_UDP_EX), + }; + const uint64_t out[RTE_DIM(in)] = { + [IPV4] = IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4, + [IPV6] = IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6, + [TCP] = IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP, + [UDP] = IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP, + }; + uint64_t seen = 0; + uint64_t conv = 0; + unsigned int i; + + for (i = 0; i != RTE_DIM(in); ++i) + if (rss_hf & in[i]) { + seen |= rss_hf & in[i]; + conv |= out[i]; + } + if (!(rss_hf & ~seen)) + return conv; + rte_errno = ENOTSUP; + return (uint64_t)-1; +} + /** * Merge Ethernet pattern item into flow rule handle. * @@ -663,6 +719,9 @@ mlx4_flow_prepare(struct priv *priv, for (action = actions; action->type; ++action) { switch (action->type) { const struct rte_flow_action_queue *queue; + const struct rte_flow_action_rss *rss; + const struct rte_eth_rss_conf *rss_conf; + unsigned int i; case RTE_FLOW_ACTION_TYPE_VOID: continue; @@ -670,23 +729,87 @@ mlx4_flow_prepare(struct priv *priv, flow->drop = 1; break; case RTE_FLOW_ACTION_TYPE_QUEUE: + if (flow->rss) + break; queue = action->conf; - if (queue->index >= priv->dev->data->nb_rx_queues) + flow->rss = mlx4_rss_get + (priv, 0, mlx4_rss_hash_key_default, 1, + &queue->index); + if (!flow->rss) { + msg = "not enough resources for additional" + " single-queue RSS context"; goto exit_action_not_supported; - flow->queue = 1; - flow->queue_id = queue->index; + } + break; + case RTE_FLOW_ACTION_TYPE_RSS: + if (flow->rss) + break; + rss = action->conf; + /* Default RSS configuration if none is provided. */ + rss_conf = + rss->rss_conf ? + rss->rss_conf : + &(struct rte_eth_rss_conf){ + .rss_key = mlx4_rss_hash_key_default, + .rss_key_len = MLX4_RSS_HASH_KEY_SIZE, + .rss_hf = (ETH_RSS_IPV4 | + ETH_RSS_NONFRAG_IPV4_UDP | + ETH_RSS_NONFRAG_IPV4_TCP | + ETH_RSS_IPV6 | + ETH_RSS_NONFRAG_IPV6_UDP | + ETH_RSS_NONFRAG_IPV6_TCP), + }; + /* Sanity checks. */ + if (!rte_is_power_of_2(rss->num)) { + msg = "for RSS, mlx4 requires the number of" + " queues to be a power of two"; + goto exit_action_not_supported; + } + if (rss_conf->rss_key_len != + sizeof(flow->rss->key)) { + msg = "mlx4 supports exactly one RSS hash key" + " length: " + MLX4_STR_EXPAND(MLX4_RSS_HASH_KEY_SIZE); + goto exit_action_not_supported; + } + for (i = 1; i < rss->num; ++i) + if (rss->queue[i] - rss->queue[i - 1] != 1) + break; + if (i != rss->num) { + msg = "mlx4 requires RSS contexts to use" + " consecutive queue indices only"; + goto exit_action_not_supported; + } + if (rss->queue[0] % rss->num) { + msg = "mlx4 requires the first queue of a RSS" + " context to be aligned on a multiple" + " of the context size"; + goto exit_action_not_supported; + } + flow->rss = mlx4_rss_get + (priv, mlx4_conv_rss_hf(rss_conf->rss_hf), + rss_conf->rss_key, rss->num, rss->queue); + if (!flow->rss) { + msg = "either invalid parameters or not enough" + " resources for additional multi-queue" + " RSS context"; + goto exit_action_not_supported; + } break; default: goto exit_action_not_supported; } } - if (!flow->queue && !flow->drop) + if (!flow->rss && !flow->drop) return rte_flow_error_set (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "no valid action"); /* Validation ends here. */ - if (!addr) + if (!addr) { + if (flow->rss) + mlx4_rss_put(flow->rss); return 0; + } if (flow == &temp) { /* Allocate proper handle based on collected data. */ const struct mlx4_malloc_vec vec[] = { @@ -711,6 +834,7 @@ mlx4_flow_prepare(struct priv *priv, *flow = (struct rte_flow){ .ibv_attr = temp.ibv_attr, .ibv_attr_size = sizeof(*flow->ibv_attr), + .rss = temp.rss, }; *flow->ibv_attr = (struct ibv_flow_attr){ .type = IBV_FLOW_ATTR_NORMAL, @@ -727,7 +851,7 @@ mlx4_flow_prepare(struct priv *priv, item, msg ? msg : "item not supported"); exit_action_not_supported: return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, - action, "action not supported"); + action, msg ? msg : "action not supported"); } /** @@ -850,6 +974,8 @@ mlx4_flow_toggle(struct priv *priv, flow->ibv_flow = NULL; if (flow->drop) mlx4_drop_put(priv->drop); + else if (flow->rss) + mlx4_rss_detach(flow->rss); return 0; } assert(flow->ibv_attr); @@ -861,6 +987,8 @@ mlx4_flow_toggle(struct priv *priv, flow->ibv_flow = NULL; if (flow->drop) mlx4_drop_put(priv->drop); + else if (flow->rss) + mlx4_rss_detach(flow->rss); } err = EACCES; msg = ("priority level " @@ -868,24 +996,42 @@ mlx4_flow_toggle(struct priv *priv, " is reserved when not in isolated mode"); goto error; } - if (flow->queue) { - struct rxq *rxq = NULL; + if (flow->rss) { + struct mlx4_rss *rss = flow->rss; + int missing = 0; + unsigned int i; - if (flow->queue_id < priv->dev->data->nb_rx_queues) - rxq = priv->dev->data->rx_queues[flow->queue_id]; + /* Stop at the first nonexistent target queue. */ + for (i = 0; i != rss->queues; ++i) + if (rss->queue_id[i] >= + priv->dev->data->nb_rx_queues || + !priv->dev->data->rx_queues[rss->queue_id[i]]) { + missing = 1; + break; + } if (flow->ibv_flow) { - if (!rxq ^ !flow->drop) + if (missing ^ !flow->drop) return 0; /* Verbs flow needs updating. */ claim_zero(ibv_destroy_flow(flow->ibv_flow)); flow->ibv_flow = NULL; if (flow->drop) mlx4_drop_put(priv->drop); + else + mlx4_rss_detach(rss); + } + if (!missing) { + err = mlx4_rss_attach(rss); + if (err) { + err = -err; + msg = "cannot create indirection table or hash" + " QP to associate flow rule with"; + goto error; + } + qp = rss->qp; } - if (rxq) - qp = rxq->qp; /* A missing target queue drops traffic implicitly. */ - flow->drop = !rxq; + flow->drop = missing; } if (flow->drop) { mlx4_drop_get(priv); @@ -904,6 +1050,8 @@ mlx4_flow_toggle(struct priv *priv, return 0; if (flow->drop) mlx4_drop_put(priv->drop); + else if (flow->rss) + mlx4_rss_detach(flow->rss); err = errno; msg = "flow rule rejected by device"; error: @@ -946,6 +1094,8 @@ mlx4_flow_create(struct rte_eth_dev *dev, } return flow; } + if (flow->rss) + mlx4_rss_put(flow->rss); rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, error->message); rte_free(flow); @@ -992,6 +1142,8 @@ mlx4_flow_destroy(struct rte_eth_dev *dev, if (err) return err; LIST_REMOVE(flow, next); + if (flow->rss) + mlx4_rss_put(flow->rss); rte_free(flow); return 0; } @@ -1320,6 +1472,7 @@ mlx4_flow_clean(struct priv *priv) while ((flow = LIST_FIRST(&priv->flows))) mlx4_flow_destroy(priv->dev, flow, NULL); + assert(LIST_EMPTY(&priv->rss)); } static const struct rte_flow_ops mlx4_flow_ops = { diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h index 134e14d93a..651fd37b60 100644 --- a/drivers/net/mlx4/mlx4_flow.h +++ b/drivers/net/mlx4/mlx4_flow.h @@ -70,8 +70,7 @@ struct rte_flow { uint32_t promisc:1; /**< This rule matches everything. */ uint32_t allmulti:1; /**< This rule matches all multicast traffic. */ uint32_t drop:1; /**< This rule drops packets. */ - uint32_t queue:1; /**< Target is a receive queue. */ - uint16_t queue_id; /**< Target queue. */ + struct mlx4_rss *rss; /**< Rx target. */ }; /* mlx4_flow.c */ diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c index 171fe3f5ab..483fe9b0db 100644 --- a/drivers/net/mlx4/mlx4_rxq.c +++ b/drivers/net/mlx4/mlx4_rxq.c @@ -64,6 +64,242 @@ #include "mlx4_rxtx.h" #include "mlx4_utils.h" +/** + * Historical RSS hash key. + * + * This used to be the default for mlx4 in Linux before v3.19 switched to + * generating random hash keys through netdev_rss_key_fill(). + * + * It is used in this PMD for consistency with past DPDK releases but can + * now be overridden through user configuration. + * + * Note: this is not const to work around API quirks. + */ +uint8_t +mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE] = { + 0x2c, 0xc6, 0x81, 0xd1, + 0x5b, 0xdb, 0xf4, 0xf7, + 0xfc, 0xa2, 0x83, 0x19, + 0xdb, 0x1a, 0x3e, 0x94, + 0x6b, 0x9e, 0x38, 0xd9, + 0x2c, 0x9c, 0x03, 0xd1, + 0xad, 0x99, 0x44, 0xa7, + 0xd9, 0x56, 0x3d, 0x59, + 0x06, 0x3c, 0x25, 0xf3, + 0xfc, 0x1f, 0xdc, 0x2a, +}; + +/** + * Obtain a RSS context with specified properties. + * + * Used when creating a flow rule targeting one or several Rx queues. + * + * If a matching RSS context already exists, it is returned with its + * reference count incremented. + * + * @param priv + * Pointer to private structure. + * @param fields + * Fields for RSS processing (Verbs format). + * @param[in] key + * Hash key to use (whose size is exactly MLX4_RSS_HASH_KEY_SIZE). + * @param queues + * Number of target queues. + * @param[in] queue_id + * Target queues. + * + * @return + * Pointer to RSS context on success, NULL otherwise and rte_errno is set. + */ +struct mlx4_rss * +mlx4_rss_get(struct priv *priv, uint64_t fields, + uint8_t key[MLX4_RSS_HASH_KEY_SIZE], + uint16_t queues, const uint16_t queue_id[]) +{ + struct mlx4_rss *rss; + size_t queue_id_size = sizeof(queue_id[0]) * queues; + + LIST_FOREACH(rss, &priv->rss, next) + if (fields == rss->fields && + queues == rss->queues && + !memcmp(key, rss->key, MLX4_RSS_HASH_KEY_SIZE) && + !memcmp(queue_id, rss->queue_id, queue_id_size)) { + ++rss->refcnt; + return rss; + } + rss = rte_malloc(__func__, offsetof(struct mlx4_rss, queue_id) + + queue_id_size, 0); + if (!rss) + goto error; + *rss = (struct mlx4_rss){ + .priv = priv, + .refcnt = 1, + .usecnt = 0, + .qp = NULL, + .ind = NULL, + .fields = fields, + .queues = queues, + }; + memcpy(rss->key, key, MLX4_RSS_HASH_KEY_SIZE); + memcpy(rss->queue_id, queue_id, queue_id_size); + LIST_INSERT_HEAD(&priv->rss, rss, next); + return rss; +error: + rte_errno = ENOMEM; + return NULL; +} + +/** + * Release a RSS context instance. + * + * Used when destroying a flow rule targeting one or several Rx queues. + * + * This function decrements the reference count of the context and destroys + * it after reaching 0. The context must have no users at this point; all + * prior calls to mlx4_rss_attach() must have been followed by matching + * calls to mlx4_rss_detach(). + * + * @param rss + * RSS context to release. + */ +void mlx4_rss_put(struct mlx4_rss *rss) +{ + assert(rss->refcnt); + if (--rss->refcnt) + return; + assert(!rss->usecnt); + assert(!rss->qp); + assert(!rss->ind); + LIST_REMOVE(rss, next); + rte_free(rss); +} + +/** + * Attach a user to a RSS context instance. + * + * Used when the RSS QP and indirection table objects must be instantiated, + * that is, when a flow rule must be enabled. + * + * This function increments the usage count of the context. + * + * @param rss + * RSS context to attach to. + */ +int mlx4_rss_attach(struct mlx4_rss *rss) +{ + assert(rss->refcnt); + if (rss->usecnt++) { + assert(rss->qp); + assert(rss->ind); + return 0; + } + + struct ibv_wq *ind_tbl[rss->queues]; + struct priv *priv = rss->priv; + const char *msg; + unsigned int i; + int ret; + + if (!rte_is_power_of_2(RTE_DIM(ind_tbl))) { + msg = "number of RSS queues must be a power of two"; + goto error; + } + for (i = 0; i != RTE_DIM(ind_tbl); ++i) { + uint16_t id = rss->queue_id[i]; + struct rxq *rxq = NULL; + + if (id < priv->dev->data->nb_rx_queues) + rxq = priv->dev->data->rx_queues[id]; + if (!rxq) { + msg = "RSS target queue is not configured"; + goto error; + } + ind_tbl[i] = rxq->wq; + } + rss->ind = ibv_create_rwq_ind_table + (priv->ctx, + &(struct ibv_rwq_ind_table_init_attr){ + .log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)), + .ind_tbl = ind_tbl, + .comp_mask = 0, + }); + if (!rss->ind) { + msg = "RSS indirection table creation failure"; + goto error; + } + rss->qp = ibv_create_qp_ex + (priv->ctx, + &(struct ibv_qp_init_attr_ex){ + .comp_mask = (IBV_QP_INIT_ATTR_PD | + IBV_QP_INIT_ATTR_RX_HASH | + IBV_QP_INIT_ATTR_IND_TABLE), + .qp_type = IBV_QPT_RAW_PACKET, + .pd = priv->pd, + .rwq_ind_tbl = rss->ind, + .rx_hash_conf = { + .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, + .rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE, + .rx_hash_key = rss->key, + .rx_hash_fields_mask = rss->fields, + }, + }); + if (!rss->qp) { + msg = "RSS hash QP creation failure"; + goto error; + } + ret = ibv_modify_qp + (rss->qp, + &(struct ibv_qp_attr){ + .qp_state = IBV_QPS_INIT, + .port_num = priv->port, + }, + IBV_QP_STATE | IBV_QP_PORT); + if (ret) { + msg = "failed to switch RSS hash QP to INIT state"; + goto error; + } + ret = ibv_modify_qp + (rss->qp, + &(struct ibv_qp_attr){ + .qp_state = IBV_QPS_RTR, + }, + IBV_QP_STATE); + if (ret) { + msg = "failed to switch RSS hash QP to RTR state"; + goto error; + } + return 0; +error: + ERROR("mlx4: %s", msg); + --rss->usecnt; + rte_errno = EINVAL; + return -rte_errno; +} + +/** + * Detach a user from a RSS context instance. + * + * Used when disabling (not destroying) a flow rule. + * + * This function decrements the usage count of the context and destroys + * usage resources after reaching 0. + * + * @param rss + * RSS context to detach from. + */ +void mlx4_rss_detach(struct mlx4_rss *rss) +{ + assert(rss->refcnt); + assert(rss->qp); + assert(rss->ind); + if (--rss->usecnt) + return; + claim_zero(ibv_destroy_qp(rss->qp)); + rss->qp = NULL; + claim_zero(ibv_destroy_rwq_ind_table(rss->ind)); + rss->ind = NULL; +} + /** * Allocate Rx queue elements. * @@ -295,57 +531,6 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, (void *)dev, strerror(rte_errno)); goto error; } - rxq->ind = ibv_create_rwq_ind_table - (priv->ctx, - &(struct ibv_rwq_ind_table_init_attr){ - .log_ind_tbl_size = 0, - .ind_tbl = (struct ibv_wq *[]){ - rxq->wq, - }, - .comp_mask = 0, - }); - if (!rxq->ind) { - rte_errno = errno ? errno : EINVAL; - ERROR("%p: indirection table creation failure: %s", - (void *)dev, strerror(errno)); - goto error; - } - rxq->qp = ibv_create_qp_ex - (priv->ctx, - &(struct ibv_qp_init_attr_ex){ - .comp_mask = (IBV_QP_INIT_ATTR_PD | - IBV_QP_INIT_ATTR_RX_HASH | - IBV_QP_INIT_ATTR_IND_TABLE), - .qp_type = IBV_QPT_RAW_PACKET, - .pd = priv->pd, - .rwq_ind_tbl = rxq->ind, - .rx_hash_conf = { - .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, - .rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE, - .rx_hash_key = - (uint8_t [MLX4_RSS_HASH_KEY_SIZE]){ 0 }, - .rx_hash_fields_mask = 0, - }, - }); - if (!rxq->qp) { - rte_errno = errno ? errno : EINVAL; - ERROR("%p: QP creation failure: %s", - (void *)dev, strerror(rte_errno)); - goto error; - } - ret = ibv_modify_qp - (rxq->qp, - &(struct ibv_qp_attr){ - .qp_state = IBV_QPS_INIT, - .port_num = priv->port, - }, - IBV_QP_STATE | IBV_QP_PORT); - if (ret) { - rte_errno = ret; - ERROR("%p: QP state to IBV_QPS_INIT failed: %s", - (void *)dev, strerror(rte_errno)); - goto error; - } ret = mlx4_rxq_alloc_elts(rxq); if (ret) { ERROR("%p: RXQ allocation failed: %s", @@ -361,18 +546,6 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, strerror(rte_errno)); goto error; } - ret = ibv_modify_qp - (rxq->qp, - &(struct ibv_qp_attr){ - .qp_state = IBV_QPS_RTR, - }, - IBV_QP_STATE); - if (ret) { - rte_errno = ret; - ERROR("%p: QP state to IBV_QPS_RTR failed: %s", - (void *)dev, strerror(rte_errno)); - goto error; - } DEBUG("%p: adding Rx queue %p to list", (void *)dev, (void *)rxq); dev->data->rx_queues[idx] = rxq; /* Enable associated flows. */ @@ -417,10 +590,6 @@ mlx4_rx_queue_release(void *dpdk_rxq) } mlx4_flow_sync(priv, NULL); mlx4_rxq_free_elts(rxq); - if (rxq->qp) - claim_zero(ibv_destroy_qp(rxq->qp)); - if (rxq->ind) - claim_zero(ibv_destroy_rwq_ind_table(rxq->ind)); if (rxq->wq) claim_zero(ibv_destroy_wq(rxq->wq)); if (rxq->cq) diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h index 897fd2a371..eca966fe32 100644 --- a/drivers/net/mlx4/mlx4_rxtx.h +++ b/drivers/net/mlx4/mlx4_rxtx.h @@ -35,6 +35,7 @@ #define MLX4_RXTX_H_ #include +#include /* Verbs headers do not support -pedantic. */ #ifdef PEDANTIC @@ -74,8 +75,6 @@ struct rxq { struct ibv_mr *mr; /**< Memory region (for mp). */ struct ibv_cq *cq; /**< Completion queue. */ struct ibv_wq *wq; /**< Work queue. */ - struct ibv_rwq_ind_table *ind; /**< Indirection table. */ - struct ibv_qp *qp; /**< Queue pair. */ struct ibv_comp_channel *channel; /**< Rx completion channel. */ unsigned int port_id; /**< Port ID for incoming packets. */ unsigned int elts_n; /**< (*elts)[] length. */ @@ -86,6 +85,20 @@ struct rxq { uint8_t data[]; /**< Remaining queue resources. */ }; +/** Shared flow target for Rx queues. */ +struct mlx4_rss { + LIST_ENTRY(mlx4_rss) next; /**< Next entry in list. */ + struct priv *priv; /**< Back pointer to private data. */ + uint32_t refcnt; /**< Reference count for this object. */ + uint32_t usecnt; /**< Number of users relying on @p qp and @p ind. */ + struct ibv_qp *qp; /**< Queue pair. */ + struct ibv_rwq_ind_table *ind; /**< Indirection table. */ + uint64_t fields; /**< Fields for RSS processing (Verbs format). */ + uint8_t key[MLX4_RSS_HASH_KEY_SIZE]; /**< Hash key to use. */ + uint16_t queues; /**< Number of target queues. */ + uint16_t queue_id[]; /**< Target queues. */ +}; + /** Tx element. */ struct txq_elt { struct ibv_send_wr wr; /**< Work request. */ @@ -126,6 +139,13 @@ struct txq { /* mlx4_rxq.c */ +uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE]; +struct mlx4_rss *mlx4_rss_get(struct priv *priv, uint64_t fields, + uint8_t key[MLX4_RSS_HASH_KEY_SIZE], + uint16_t queues, const uint16_t queue_id[]); +void mlx4_rss_put(struct mlx4_rss *rss); +int mlx4_rss_attach(struct mlx4_rss *rss); +void mlx4_rss_detach(struct mlx4_rss *rss); int mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, unsigned int socket, const struct rte_eth_rxconf *conf, diff --git a/mk/rte.app.mk b/mk/rte.app.mk index 83e042e416..8192b98bb0 100644 --- a/mk/rte.app.mk +++ b/mk/rte.app.mk @@ -135,7 +135,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += -lrte_pmd_kni endif _LDLIBS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += -lrte_pmd_lio -_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += -lrte_pmd_mlx4 -libverbs +_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += -lrte_pmd_mlx4 -libverbs -lmlx4 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += -lrte_pmd_mlx5 -libverbs -lmlx5 _LDLIBS-$(CONFIG_RTE_LIBRTE_MRVL_PMD) += -lrte_pmd_mrvl -L$(LIBMUSDK_PATH)/lib -lmusdk _LDLIBS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += -lrte_pmd_nfp