numam-dpdk/drivers/net/mlx4/mlx4_flow.c
Olivier Matz 35b2d13fd6 net: add rte prefix to ether defines
Add 'RTE_' prefix to defines:
- rename ETHER_ADDR_LEN as RTE_ETHER_ADDR_LEN.
- rename ETHER_TYPE_LEN as RTE_ETHER_TYPE_LEN.
- rename ETHER_CRC_LEN as RTE_ETHER_CRC_LEN.
- rename ETHER_HDR_LEN as RTE_ETHER_HDR_LEN.
- rename ETHER_MIN_LEN as RTE_ETHER_MIN_LEN.
- rename ETHER_MAX_LEN as RTE_ETHER_MAX_LEN.
- rename ETHER_MTU as RTE_ETHER_MTU.
- rename ETHER_MAX_VLAN_FRAME_LEN as RTE_ETHER_MAX_VLAN_FRAME_LEN.
- rename ETHER_MAX_VLAN_ID as RTE_ETHER_MAX_VLAN_ID.
- rename ETHER_MAX_JUMBO_FRAME_LEN as RTE_ETHER_MAX_JUMBO_FRAME_LEN.
- rename ETHER_MIN_MTU as RTE_ETHER_MIN_MTU.
- rename ETHER_LOCAL_ADMIN_ADDR as RTE_ETHER_LOCAL_ADMIN_ADDR.
- rename ETHER_GROUP_ADDR as RTE_ETHER_GROUP_ADDR.
- rename ETHER_TYPE_IPv4 as RTE_ETHER_TYPE_IPv4.
- rename ETHER_TYPE_IPv6 as RTE_ETHER_TYPE_IPv6.
- rename ETHER_TYPE_ARP as RTE_ETHER_TYPE_ARP.
- rename ETHER_TYPE_VLAN as RTE_ETHER_TYPE_VLAN.
- rename ETHER_TYPE_RARP as RTE_ETHER_TYPE_RARP.
- rename ETHER_TYPE_QINQ as RTE_ETHER_TYPE_QINQ.
- rename ETHER_TYPE_ETAG as RTE_ETHER_TYPE_ETAG.
- rename ETHER_TYPE_1588 as RTE_ETHER_TYPE_1588.
- rename ETHER_TYPE_SLOW as RTE_ETHER_TYPE_SLOW.
- rename ETHER_TYPE_TEB as RTE_ETHER_TYPE_TEB.
- rename ETHER_TYPE_LLDP as RTE_ETHER_TYPE_LLDP.
- rename ETHER_TYPE_MPLS as RTE_ETHER_TYPE_MPLS.
- rename ETHER_TYPE_MPLSM as RTE_ETHER_TYPE_MPLSM.
- rename ETHER_VXLAN_HLEN as RTE_ETHER_VXLAN_HLEN.
- rename ETHER_ADDR_FMT_SIZE as RTE_ETHER_ADDR_FMT_SIZE.
- rename VXLAN_GPE_TYPE_IPV4 as RTE_VXLAN_GPE_TYPE_IPV4.
- rename VXLAN_GPE_TYPE_IPV6 as RTE_VXLAN_GPE_TYPE_IPV6.
- rename VXLAN_GPE_TYPE_ETH as RTE_VXLAN_GPE_TYPE_ETH.
- rename VXLAN_GPE_TYPE_NSH as RTE_VXLAN_GPE_TYPE_NSH.
- rename VXLAN_GPE_TYPE_MPLS as RTE_VXLAN_GPE_TYPE_MPLS.
- rename VXLAN_GPE_TYPE_GBP as RTE_VXLAN_GPE_TYPE_GBP.
- rename VXLAN_GPE_TYPE_VBNG as RTE_VXLAN_GPE_TYPE_VBNG.
- rename ETHER_VXLAN_GPE_HLEN as RTE_ETHER_VXLAN_GPE_HLEN.

Do not update the command line library to avoid adding a dependency to
librte_net.

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
2019-05-24 13:34:45 +02:00

1626 lines
43 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2017 6WIND S.A.
* Copyright 2017 Mellanox Technologies, Ltd
*/
/**
* @file
* Flow API operations for mlx4 driver.
*/
#include <arpa/inet.h>
#include <assert.h>
#include <errno.h>
#include <stdalign.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <sys/queue.h>
/* Verbs headers do not support -pedantic. */
#ifdef PEDANTIC
#pragma GCC diagnostic ignored "-Wpedantic"
#endif
#include <infiniband/verbs.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-Wpedantic"
#endif
#include <rte_byteorder.h>
#include <rte_errno.h>
#include <rte_ethdev_driver.h>
#include <rte_ether.h>
#include <rte_flow.h>
#include <rte_flow_driver.h>
#include <rte_malloc.h>
/* PMD headers. */
#include "mlx4.h"
#include "mlx4_glue.h"
#include "mlx4_flow.h"
#include "mlx4_rxtx.h"
#include "mlx4_utils.h"
/** Static initializer for a list of subsequent item types. */
#define NEXT_ITEM(...) \
(const enum rte_flow_item_type []){ \
__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
}
/** Processor structure associated with a flow item. */
struct mlx4_flow_proc_item {
/** Bit-mask for fields supported by this PMD. */
const void *mask_support;
/** Bit-mask to use when @p item->mask is not provided. */
const void *mask_default;
/** Size in bytes for @p mask_support and @p mask_default. */
const unsigned int mask_sz;
/** Merge a pattern item into a flow rule handle. */
int (*merge)(struct rte_flow *flow,
const struct rte_flow_item *item,
const struct mlx4_flow_proc_item *proc,
struct rte_flow_error *error);
/** Size in bytes of the destination structure. */
const unsigned int dst_sz;
/** List of possible subsequent items. */
const enum rte_flow_item_type *const next_item;
};
/** Shared resources for drop flow rules. */
struct mlx4_drop {
struct ibv_qp *qp; /**< QP target. */
struct ibv_cq *cq; /**< CQ associated with above QP. */
struct mlx4_priv *priv; /**< Back pointer to private data. */
uint32_t refcnt; /**< Reference count. */
};
/**
* Convert supported RSS hash field types between DPDK and Verbs formats.
*
* This function returns the supported (default) set when @p types has
* special value 0.
*
* @param priv
* Pointer to private structure.
* @param types
* Depending on @p verbs_to_dpdk, hash types in either DPDK (see struct
* rte_eth_rss_conf) or Verbs format.
* @param verbs_to_dpdk
* A zero value converts @p types from DPDK to Verbs, a nonzero value
* performs the reverse operation.
*
* @return
* Converted RSS hash fields on success, (uint64_t)-1 otherwise and
* rte_errno is set.
*/
uint64_t
mlx4_conv_rss_types(struct mlx4_priv *priv, uint64_t types, int verbs_to_dpdk)
{
enum {
INNER,
IPV4, IPV4_1, IPV4_2, IPV6, IPV6_1, IPV6_2, IPV6_3,
TCP, UDP,
IPV4_TCP, IPV4_UDP, IPV6_TCP, IPV6_TCP_1, IPV6_UDP, IPV6_UDP_1,
};
enum {
VERBS_IPV4 = IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4,
VERBS_IPV6 = IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6,
VERBS_TCP = IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP,
VERBS_UDP = IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP,
};
static const uint64_t dpdk[] = {
[INNER] = 0,
[IPV4] = ETH_RSS_IPV4,
[IPV4_1] = ETH_RSS_FRAG_IPV4,
[IPV4_2] = ETH_RSS_NONFRAG_IPV4_OTHER,
[IPV6] = ETH_RSS_IPV6,
[IPV6_1] = ETH_RSS_FRAG_IPV6,
[IPV6_2] = ETH_RSS_NONFRAG_IPV6_OTHER,
[IPV6_3] = ETH_RSS_IPV6_EX,
[TCP] = 0,
[UDP] = 0,
[IPV4_TCP] = ETH_RSS_NONFRAG_IPV4_TCP,
[IPV4_UDP] = ETH_RSS_NONFRAG_IPV4_UDP,
[IPV6_TCP] = ETH_RSS_NONFRAG_IPV6_TCP,
[IPV6_TCP_1] = ETH_RSS_IPV6_TCP_EX,
[IPV6_UDP] = ETH_RSS_NONFRAG_IPV6_UDP,
[IPV6_UDP_1] = ETH_RSS_IPV6_UDP_EX,
};
static const uint64_t verbs[RTE_DIM(dpdk)] = {
[INNER] = IBV_RX_HASH_INNER,
[IPV4] = VERBS_IPV4,
[IPV4_1] = VERBS_IPV4,
[IPV4_2] = VERBS_IPV4,
[IPV6] = VERBS_IPV6,
[IPV6_1] = VERBS_IPV6,
[IPV6_2] = VERBS_IPV6,
[IPV6_3] = VERBS_IPV6,
[TCP] = VERBS_TCP,
[UDP] = VERBS_UDP,
[IPV4_TCP] = VERBS_IPV4 | VERBS_TCP,
[IPV4_UDP] = VERBS_IPV4 | VERBS_UDP,
[IPV6_TCP] = VERBS_IPV6 | VERBS_TCP,
[IPV6_TCP_1] = VERBS_IPV6 | VERBS_TCP,
[IPV6_UDP] = VERBS_IPV6 | VERBS_UDP,
[IPV6_UDP_1] = VERBS_IPV6 | VERBS_UDP,
};
const uint64_t *in = verbs_to_dpdk ? verbs : dpdk;
const uint64_t *out = verbs_to_dpdk ? dpdk : verbs;
uint64_t seen = 0;
uint64_t conv = 0;
unsigned int i;
if (!types) {
if (!verbs_to_dpdk)
return priv->hw_rss_sup;
types = priv->hw_rss_sup;
}
for (i = 0; i != RTE_DIM(dpdk); ++i)
if (in[i] && (types & in[i]) == in[i]) {
seen |= types & in[i];
conv |= out[i];
}
if ((verbs_to_dpdk || (conv & priv->hw_rss_sup) == conv) &&
!(types & ~seen))
return conv;
rte_errno = ENOTSUP;
return (uint64_t)-1;
}
/**
* Merge Ethernet pattern item into flow rule handle.
*
* Additional mlx4-specific constraints on supported fields:
*
* - No support for partial masks, except in the specific case of matching
* all multicast traffic (@p spec->dst and @p mask->dst equal to
* 01:00:00:00:00:00).
* - Not providing @p item->spec or providing an empty @p mask->dst is
* *only* supported if the rule doesn't specify additional matching
* criteria (i.e. rule is promiscuous-like).
*
* @param[in, out] flow
* Flow rule handle to update.
* @param[in] item
* Pattern item to merge.
* @param[in] proc
* Associated item-processing object.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx4_flow_merge_eth(struct rte_flow *flow,
const struct rte_flow_item *item,
const struct mlx4_flow_proc_item *proc,
struct rte_flow_error *error)
{
const struct rte_flow_item_eth *spec = item->spec;
const struct rte_flow_item_eth *mask =
spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
struct ibv_flow_spec_eth *eth;
const char *msg;
unsigned int i;
if (mask) {
uint32_t sum_dst = 0;
uint32_t sum_src = 0;
for (i = 0; i != sizeof(mask->dst.addr_bytes); ++i) {
sum_dst += mask->dst.addr_bytes[i];
sum_src += mask->src.addr_bytes[i];
}
if (sum_src) {
msg = "mlx4 does not support source MAC matching";
goto error;
} else if (!sum_dst) {
flow->promisc = 1;
} else if (sum_dst == 1 && mask->dst.addr_bytes[0] == 1) {
if (!(spec->dst.addr_bytes[0] & 1)) {
msg = "mlx4 does not support the explicit"
" exclusion of all multicast traffic";
goto error;
}
flow->allmulti = 1;
} else if (sum_dst != (UINT8_C(0xff) * RTE_ETHER_ADDR_LEN)) {
msg = "mlx4 does not support matching partial"
" Ethernet fields";
goto error;
}
}
if (!flow->ibv_attr)
return 0;
if (flow->promisc) {
flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
return 0;
}
if (flow->allmulti) {
flow->ibv_attr->type = IBV_FLOW_ATTR_MC_DEFAULT;
return 0;
}
++flow->ibv_attr->num_of_specs;
eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
*eth = (struct ibv_flow_spec_eth) {
.type = IBV_FLOW_SPEC_ETH,
.size = sizeof(*eth),
};
if (!mask) {
eth->val.dst_mac[0] = 0xff;
flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
flow->promisc = 1;
return 0;
}
memcpy(eth->val.dst_mac, spec->dst.addr_bytes, RTE_ETHER_ADDR_LEN);
memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, RTE_ETHER_ADDR_LEN);
/* Remove unwanted bits from values. */
for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i)
eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
return 0;
error:
return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
item, msg);
}
/**
* Merge VLAN pattern item into flow rule handle.
*
* Additional mlx4-specific constraints on supported fields:
*
* - Matching *all* VLAN traffic by omitting @p item->spec or providing an
* empty @p item->mask would also include non-VLAN traffic. Doing so is
* therefore unsupported.
* - No support for partial masks.
*
* @param[in, out] flow
* Flow rule handle to update.
* @param[in] item
* Pattern item to merge.
* @param[in] proc
* Associated item-processing object.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx4_flow_merge_vlan(struct rte_flow *flow,
const struct rte_flow_item *item,
const struct mlx4_flow_proc_item *proc,
struct rte_flow_error *error)
{
const struct rte_flow_item_vlan *spec = item->spec;
const struct rte_flow_item_vlan *mask =
spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
struct ibv_flow_spec_eth *eth;
const char *msg;
if (!mask || !mask->tci) {
msg = "mlx4 cannot match all VLAN traffic while excluding"
" non-VLAN traffic, TCI VID must be specified";
goto error;
}
if (mask->tci != RTE_BE16(0x0fff)) {
msg = "mlx4 does not support partial TCI VID matching";
goto error;
}
if (!flow->ibv_attr)
return 0;
eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size -
sizeof(*eth));
eth->val.vlan_tag = spec->tci;
eth->mask.vlan_tag = mask->tci;
eth->val.vlan_tag &= eth->mask.vlan_tag;
if (flow->ibv_attr->type == IBV_FLOW_ATTR_ALL_DEFAULT)
flow->ibv_attr->type = IBV_FLOW_ATTR_NORMAL;
return 0;
error:
return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
item, msg);
}
/**
* Merge IPv4 pattern item into flow rule handle.
*
* Additional mlx4-specific constraints on supported fields:
*
* - No support for partial masks.
*
* @param[in, out] flow
* Flow rule handle to update.
* @param[in] item
* Pattern item to merge.
* @param[in] proc
* Associated item-processing object.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx4_flow_merge_ipv4(struct rte_flow *flow,
const struct rte_flow_item *item,
const struct mlx4_flow_proc_item *proc,
struct rte_flow_error *error)
{
const struct rte_flow_item_ipv4 *spec = item->spec;
const struct rte_flow_item_ipv4 *mask =
spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
struct ibv_flow_spec_ipv4 *ipv4;
const char *msg;
if (mask &&
((uint32_t)(mask->hdr.src_addr + 1) > UINT32_C(1) ||
(uint32_t)(mask->hdr.dst_addr + 1) > UINT32_C(1))) {
msg = "mlx4 does not support matching partial IPv4 fields";
goto error;
}
if (!flow->ibv_attr)
return 0;
++flow->ibv_attr->num_of_specs;
ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
*ipv4 = (struct ibv_flow_spec_ipv4) {
.type = IBV_FLOW_SPEC_IPV4,
.size = sizeof(*ipv4),
};
if (!spec)
return 0;
ipv4->val = (struct ibv_flow_ipv4_filter) {
.src_ip = spec->hdr.src_addr,
.dst_ip = spec->hdr.dst_addr,
};
ipv4->mask = (struct ibv_flow_ipv4_filter) {
.src_ip = mask->hdr.src_addr,
.dst_ip = mask->hdr.dst_addr,
};
/* Remove unwanted bits from values. */
ipv4->val.src_ip &= ipv4->mask.src_ip;
ipv4->val.dst_ip &= ipv4->mask.dst_ip;
return 0;
error:
return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
item, msg);
}
/**
* Merge UDP pattern item into flow rule handle.
*
* Additional mlx4-specific constraints on supported fields:
*
* - No support for partial masks.
* - Due to HW/FW limitation, flow rule priority is not taken into account
* when matching UDP destination ports, doing is therefore only supported
* at the highest priority level (0).
*
* @param[in, out] flow
* Flow rule handle to update.
* @param[in] item
* Pattern item to merge.
* @param[in] proc
* Associated item-processing object.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx4_flow_merge_udp(struct rte_flow *flow,
const struct rte_flow_item *item,
const struct mlx4_flow_proc_item *proc,
struct rte_flow_error *error)
{
const struct rte_flow_item_udp *spec = item->spec;
const struct rte_flow_item_udp *mask =
spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
struct ibv_flow_spec_tcp_udp *udp;
const char *msg;
if (mask &&
((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
(uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
msg = "mlx4 does not support matching partial UDP fields";
goto error;
}
if (mask && mask->hdr.dst_port && flow->priority) {
msg = "combining UDP destination port matching with a nonzero"
" priority level is not supported";
goto error;
}
if (!flow->ibv_attr)
return 0;
++flow->ibv_attr->num_of_specs;
udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
*udp = (struct ibv_flow_spec_tcp_udp) {
.type = IBV_FLOW_SPEC_UDP,
.size = sizeof(*udp),
};
if (!spec)
return 0;
udp->val.dst_port = spec->hdr.dst_port;
udp->val.src_port = spec->hdr.src_port;
udp->mask.dst_port = mask->hdr.dst_port;
udp->mask.src_port = mask->hdr.src_port;
/* Remove unwanted bits from values. */
udp->val.src_port &= udp->mask.src_port;
udp->val.dst_port &= udp->mask.dst_port;
return 0;
error:
return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
item, msg);
}
/**
* Merge TCP pattern item into flow rule handle.
*
* Additional mlx4-specific constraints on supported fields:
*
* - No support for partial masks.
*
* @param[in, out] flow
* Flow rule handle to update.
* @param[in] item
* Pattern item to merge.
* @param[in] proc
* Associated item-processing object.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx4_flow_merge_tcp(struct rte_flow *flow,
const struct rte_flow_item *item,
const struct mlx4_flow_proc_item *proc,
struct rte_flow_error *error)
{
const struct rte_flow_item_tcp *spec = item->spec;
const struct rte_flow_item_tcp *mask =
spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
struct ibv_flow_spec_tcp_udp *tcp;
const char *msg;
if (mask &&
((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
(uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
msg = "mlx4 does not support matching partial TCP fields";
goto error;
}
if (!flow->ibv_attr)
return 0;
++flow->ibv_attr->num_of_specs;
tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
*tcp = (struct ibv_flow_spec_tcp_udp) {
.type = IBV_FLOW_SPEC_TCP,
.size = sizeof(*tcp),
};
if (!spec)
return 0;
tcp->val.dst_port = spec->hdr.dst_port;
tcp->val.src_port = spec->hdr.src_port;
tcp->mask.dst_port = mask->hdr.dst_port;
tcp->mask.src_port = mask->hdr.src_port;
/* Remove unwanted bits from values. */
tcp->val.src_port &= tcp->mask.src_port;
tcp->val.dst_port &= tcp->mask.dst_port;
return 0;
error:
return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
item, msg);
}
/**
* Perform basic sanity checks on a pattern item.
*
* @param[in] item
* Item specification.
* @param[in] proc
* Associated item-processing object.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx4_flow_item_check(const struct rte_flow_item *item,
const struct mlx4_flow_proc_item *proc,
struct rte_flow_error *error)
{
const uint8_t *mask;
unsigned int i;
/* item->last and item->mask cannot exist without item->spec. */
if (!item->spec && (item->mask || item->last))
return rte_flow_error_set
(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
"\"mask\" or \"last\" field provided without a"
" corresponding \"spec\"");
/* No spec, no mask, no problem. */
if (!item->spec)
return 0;
mask = item->mask ?
(const uint8_t *)item->mask :
(const uint8_t *)proc->mask_default;
assert(mask);
/*
* Single-pass check to make sure that:
* - Mask is supported, no bits are set outside proc->mask_support.
* - Both item->spec and item->last are included in mask.
*/
for (i = 0; i != proc->mask_sz; ++i) {
if (!mask[i])
continue;
if ((mask[i] | ((const uint8_t *)proc->mask_support)[i]) !=
((const uint8_t *)proc->mask_support)[i])
return rte_flow_error_set
(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
item, "unsupported field found in \"mask\"");
if (item->last &&
(((const uint8_t *)item->spec)[i] & mask[i]) !=
(((const uint8_t *)item->last)[i] & mask[i]))
return rte_flow_error_set
(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
item,
"range between \"spec\" and \"last\""
" is larger than \"mask\"");
}
return 0;
}
/** Graph of supported items and associated actions. */
static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
[RTE_FLOW_ITEM_TYPE_END] = {
.next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
},
[RTE_FLOW_ITEM_TYPE_ETH] = {
.next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
RTE_FLOW_ITEM_TYPE_IPV4),
.mask_support = &(const struct rte_flow_item_eth){
/* Only destination MAC can be matched. */
.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
},
.mask_default = &rte_flow_item_eth_mask,
.mask_sz = sizeof(struct rte_flow_item_eth),
.merge = mlx4_flow_merge_eth,
.dst_sz = sizeof(struct ibv_flow_spec_eth),
},
[RTE_FLOW_ITEM_TYPE_VLAN] = {
.next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
.mask_support = &(const struct rte_flow_item_vlan){
/* Only TCI VID matching is supported. */
.tci = RTE_BE16(0x0fff),
},
.mask_default = &rte_flow_item_vlan_mask,
.mask_sz = sizeof(struct rte_flow_item_vlan),
.merge = mlx4_flow_merge_vlan,
.dst_sz = 0,
},
[RTE_FLOW_ITEM_TYPE_IPV4] = {
.next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
RTE_FLOW_ITEM_TYPE_TCP),
.mask_support = &(const struct rte_flow_item_ipv4){
.hdr = {
.src_addr = RTE_BE32(0xffffffff),
.dst_addr = RTE_BE32(0xffffffff),
},
},
.mask_default = &rte_flow_item_ipv4_mask,
.mask_sz = sizeof(struct rte_flow_item_ipv4),
.merge = mlx4_flow_merge_ipv4,
.dst_sz = sizeof(struct ibv_flow_spec_ipv4),
},
[RTE_FLOW_ITEM_TYPE_UDP] = {
.mask_support = &(const struct rte_flow_item_udp){
.hdr = {
.src_port = RTE_BE16(0xffff),
.dst_port = RTE_BE16(0xffff),
},
},
.mask_default = &rte_flow_item_udp_mask,
.mask_sz = sizeof(struct rte_flow_item_udp),
.merge = mlx4_flow_merge_udp,
.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
},
[RTE_FLOW_ITEM_TYPE_TCP] = {
.mask_support = &(const struct rte_flow_item_tcp){
.hdr = {
.src_port = RTE_BE16(0xffff),
.dst_port = RTE_BE16(0xffff),
},
},
.mask_default = &rte_flow_item_tcp_mask,
.mask_sz = sizeof(struct rte_flow_item_tcp),
.merge = mlx4_flow_merge_tcp,
.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
},
};
/**
* Make sure a flow rule is supported and initialize associated structure.
*
* @param priv
* Pointer to private structure.
* @param[in] attr
* Flow rule attributes.
* @param[in] pattern
* Pattern specification (list terminated by the END pattern item).
* @param[in] actions
* Associated actions (list terminated by the END action).
* @param[out] error
* Perform verbose error reporting if not NULL.
* @param[in, out] addr
* Buffer where the resulting flow rule handle pointer must be stored.
* If NULL, stop processing after validation stage.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx4_flow_prepare(struct mlx4_priv *priv,
const struct rte_flow_attr *attr,
const struct rte_flow_item pattern[],
const struct rte_flow_action actions[],
struct rte_flow_error *error,
struct rte_flow **addr)
{
const struct rte_flow_item *item;
const struct rte_flow_action *action;
const struct mlx4_flow_proc_item *proc;
struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
struct rte_flow *flow = &temp;
const char *msg = NULL;
int overlap;
if (attr->group)
return rte_flow_error_set
(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
NULL, "groups are not supported");
if (attr->priority > MLX4_FLOW_PRIORITY_LAST)
return rte_flow_error_set
(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
NULL, "maximum priority level is "
MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
if (attr->egress)
return rte_flow_error_set
(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
NULL, "egress is not supported");
if (attr->transfer)
return rte_flow_error_set
(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
NULL, "transfer is not supported");
if (!attr->ingress)
return rte_flow_error_set
(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
NULL, "only ingress is supported");
fill:
overlap = 0;
proc = mlx4_flow_proc_item_list;
flow->priority = attr->priority;
/* Go over pattern. */
for (item = pattern; item->type; ++item) {
const struct mlx4_flow_proc_item *next = NULL;
unsigned int i;
int err;
if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
continue;
if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
flow->internal = 1;
continue;
}
if (flow->promisc || flow->allmulti) {
msg = "mlx4 does not support additional matching"
" criteria combined with indiscriminate"
" matching on Ethernet headers";
goto exit_item_not_supported;
}
for (i = 0; proc->next_item && proc->next_item[i]; ++i) {
if (proc->next_item[i] == item->type) {
next = &mlx4_flow_proc_item_list[item->type];
break;
}
}
if (!next)
goto exit_item_not_supported;
proc = next;
/*
* Perform basic sanity checks only once, while handle is
* not allocated.
*/
if (flow == &temp) {
err = mlx4_flow_item_check(item, proc, error);
if (err)
return err;
}
if (proc->merge) {
err = proc->merge(flow, item, proc, error);
if (err)
return err;
}
flow->ibv_attr_size += proc->dst_sz;
}
/* Go over actions list. */
for (action = actions; action->type; ++action) {
/* This one may appear anywhere multiple times. */
if (action->type == RTE_FLOW_ACTION_TYPE_VOID)
continue;
/* Fate-deciding actions may appear exactly once. */
if (overlap) {
msg = "cannot combine several fate-deciding actions,"
" choose between DROP, QUEUE or RSS";
goto exit_action_not_supported;
}
overlap = 1;
switch (action->type) {
const struct rte_flow_action_queue *queue;
const struct rte_flow_action_rss *rss;
const uint8_t *rss_key;
uint32_t rss_key_len;
uint64_t fields;
unsigned int i;
case RTE_FLOW_ACTION_TYPE_DROP:
flow->drop = 1;
break;
case RTE_FLOW_ACTION_TYPE_QUEUE:
if (flow->rss)
break;
queue = action->conf;
if (queue->index >= ETH_DEV(priv)->data->nb_rx_queues) {
msg = "queue target index beyond number of"
" configured Rx queues";
goto exit_action_not_supported;
}
flow->rss = mlx4_rss_get
(priv, 0, mlx4_rss_hash_key_default, 1,
&queue->index);
if (!flow->rss) {
msg = "not enough resources for additional"
" single-queue RSS context";
goto exit_action_not_supported;
}
break;
case RTE_FLOW_ACTION_TYPE_RSS:
if (flow->rss)
break;
rss = action->conf;
/* Default RSS configuration if none is provided. */
if (rss->key_len) {
rss_key = rss->key;
rss_key_len = rss->key_len;
} else {
rss_key = mlx4_rss_hash_key_default;
rss_key_len = MLX4_RSS_HASH_KEY_SIZE;
}
/* Sanity checks. */
for (i = 0; i < rss->queue_num; ++i)
if (rss->queue[i] >=
ETH_DEV(priv)->data->nb_rx_queues)
break;
if (i != rss->queue_num) {
msg = "queue index target beyond number of"
" configured Rx queues";
goto exit_action_not_supported;
}
if (!rte_is_power_of_2(rss->queue_num)) {
msg = "for RSS, mlx4 requires the number of"
" queues to be a power of two";
goto exit_action_not_supported;
}
if (rss_key_len != sizeof(flow->rss->key)) {
msg = "mlx4 supports exactly one RSS hash key"
" length: "
MLX4_STR_EXPAND(MLX4_RSS_HASH_KEY_SIZE);
goto exit_action_not_supported;
}
for (i = 1; i < rss->queue_num; ++i)
if (rss->queue[i] - rss->queue[i - 1] != 1)
break;
if (i != rss->queue_num) {
msg = "mlx4 requires RSS contexts to use"
" consecutive queue indices only";
goto exit_action_not_supported;
}
if (rss->queue[0] % rss->queue_num) {
msg = "mlx4 requires the first queue of a RSS"
" context to be aligned on a multiple"
" of the context size";
goto exit_action_not_supported;
}
if (rss->func &&
rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
msg = "the only supported RSS hash function"
" is Toeplitz";
goto exit_action_not_supported;
}
if (rss->level) {
msg = "a nonzero RSS encapsulation level is"
" not supported";
goto exit_action_not_supported;
}
rte_errno = 0;
fields = mlx4_conv_rss_types(priv, rss->types, 0);
if (fields == (uint64_t)-1 && rte_errno) {
msg = "unsupported RSS hash type requested";
goto exit_action_not_supported;
}
flow->rss = mlx4_rss_get
(priv, fields, rss_key, rss->queue_num,
rss->queue);
if (!flow->rss) {
msg = "either invalid parameters or not enough"
" resources for additional multi-queue"
" RSS context";
goto exit_action_not_supported;
}
break;
default:
goto exit_action_not_supported;
}
}
/* When fate is unknown, drop traffic. */
if (!overlap)
flow->drop = 1;
/* Validation ends here. */
if (!addr) {
if (flow->rss)
mlx4_rss_put(flow->rss);
return 0;
}
if (flow == &temp) {
/* Allocate proper handle based on collected data. */
const struct mlx4_malloc_vec vec[] = {
{
.align = alignof(struct rte_flow),
.size = sizeof(*flow),
.addr = (void **)&flow,
},
{
.align = alignof(struct ibv_flow_attr),
.size = temp.ibv_attr_size,
.addr = (void **)&temp.ibv_attr,
},
};
if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec))) {
if (temp.rss)
mlx4_rss_put(temp.rss);
return rte_flow_error_set
(error, -rte_errno,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"flow rule handle allocation failure");
}
/* Most fields will be updated by second pass. */
*flow = (struct rte_flow){
.ibv_attr = temp.ibv_attr,
.ibv_attr_size = sizeof(*flow->ibv_attr),
.rss = temp.rss,
};
*flow->ibv_attr = (struct ibv_flow_attr){
.type = IBV_FLOW_ATTR_NORMAL,
.size = sizeof(*flow->ibv_attr),
.priority = attr->priority,
.port = priv->port,
};
goto fill;
}
*addr = flow;
return 0;
exit_item_not_supported:
return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
item, msg ? msg : "item not supported");
exit_action_not_supported:
return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
action, msg ? msg : "action not supported");
}
/**
* Validate a flow supported by the NIC.
*
* @see rte_flow_validate()
* @see rte_flow_ops
*/
static int
mlx4_flow_validate(struct rte_eth_dev *dev,
const struct rte_flow_attr *attr,
const struct rte_flow_item pattern[],
const struct rte_flow_action actions[],
struct rte_flow_error *error)
{
struct mlx4_priv *priv = dev->data->dev_private;
return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL);
}
/**
* Get a drop flow rule resources instance.
*
* @param priv
* Pointer to private structure.
*
* @return
* Pointer to drop flow resources on success, NULL otherwise and rte_errno
* is set.
*/
static struct mlx4_drop *
mlx4_drop_get(struct mlx4_priv *priv)
{
struct mlx4_drop *drop = priv->drop;
if (drop) {
assert(drop->refcnt);
assert(drop->priv == priv);
++drop->refcnt;
return drop;
}
drop = rte_malloc(__func__, sizeof(*drop), 0);
if (!drop)
goto error;
*drop = (struct mlx4_drop){
.priv = priv,
.refcnt = 1,
};
drop->cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
if (!drop->cq)
goto error;
drop->qp = mlx4_glue->create_qp
(priv->pd,
&(struct ibv_qp_init_attr){
.send_cq = drop->cq,
.recv_cq = drop->cq,
.qp_type = IBV_QPT_RAW_PACKET,
});
if (!drop->qp)
goto error;
priv->drop = drop;
return drop;
error:
if (drop->qp)
claim_zero(mlx4_glue->destroy_qp(drop->qp));
if (drop->cq)
claim_zero(mlx4_glue->destroy_cq(drop->cq));
if (drop)
rte_free(drop);
rte_errno = ENOMEM;
return NULL;
}
/**
* Give back a drop flow rule resources instance.
*
* @param drop
* Pointer to drop flow rule resources.
*/
static void
mlx4_drop_put(struct mlx4_drop *drop)
{
assert(drop->refcnt);
if (--drop->refcnt)
return;
drop->priv->drop = NULL;
claim_zero(mlx4_glue->destroy_qp(drop->qp));
claim_zero(mlx4_glue->destroy_cq(drop->cq));
rte_free(drop);
}
/**
* Toggle a configured flow rule.
*
* @param priv
* Pointer to private structure.
* @param flow
* Flow rule handle to toggle.
* @param enable
* Whether associated Verbs flow must be created or removed.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx4_flow_toggle(struct mlx4_priv *priv,
struct rte_flow *flow,
int enable,
struct rte_flow_error *error)
{
struct ibv_qp *qp = NULL;
const char *msg;
int err;
if (!enable) {
if (!flow->ibv_flow)
return 0;
claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
flow->ibv_flow = NULL;
if (flow->drop)
mlx4_drop_put(priv->drop);
else if (flow->rss)
mlx4_rss_detach(flow->rss);
return 0;
}
assert(flow->ibv_attr);
if (!flow->internal &&
!priv->isolated &&
flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) {
if (flow->ibv_flow) {
claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
flow->ibv_flow = NULL;
if (flow->drop)
mlx4_drop_put(priv->drop);
else if (flow->rss)
mlx4_rss_detach(flow->rss);
}
err = EACCES;
msg = ("priority level "
MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST)
" is reserved when not in isolated mode");
goto error;
}
if (flow->rss) {
struct mlx4_rss *rss = flow->rss;
int missing = 0;
unsigned int i;
/* Stop at the first nonexistent target queue. */
for (i = 0; i != rss->queues; ++i)
if (rss->queue_id[i] >=
ETH_DEV(priv)->data->nb_rx_queues ||
!ETH_DEV(priv)->data->rx_queues[rss->queue_id[i]]) {
missing = 1;
break;
}
if (flow->ibv_flow) {
if (missing ^ !flow->drop)
return 0;
/* Verbs flow needs updating. */
claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
flow->ibv_flow = NULL;
if (flow->drop)
mlx4_drop_put(priv->drop);
else
mlx4_rss_detach(rss);
}
if (!missing) {
err = mlx4_rss_attach(rss);
if (err) {
err = -err;
msg = "cannot create indirection table or hash"
" QP to associate flow rule with";
goto error;
}
qp = rss->qp;
}
/* A missing target queue drops traffic implicitly. */
flow->drop = missing;
}
if (flow->drop) {
if (flow->ibv_flow)
return 0;
mlx4_drop_get(priv);
if (!priv->drop) {
err = rte_errno;
msg = "resources for drop flow rule cannot be created";
goto error;
}
qp = priv->drop->qp;
}
assert(qp);
if (flow->ibv_flow)
return 0;
flow->ibv_flow = mlx4_glue->create_flow(qp, flow->ibv_attr);
if (flow->ibv_flow)
return 0;
if (flow->drop)
mlx4_drop_put(priv->drop);
else if (flow->rss)
mlx4_rss_detach(flow->rss);
err = errno;
msg = "flow rule rejected by device";
error:
return rte_flow_error_set
(error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
}
/**
* Create a flow.
*
* @see rte_flow_create()
* @see rte_flow_ops
*/
static struct rte_flow *
mlx4_flow_create(struct rte_eth_dev *dev,
const struct rte_flow_attr *attr,
const struct rte_flow_item pattern[],
const struct rte_flow_action actions[],
struct rte_flow_error *error)
{
struct mlx4_priv *priv = dev->data->dev_private;
struct rte_flow *flow;
int err;
err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
if (err)
return NULL;
err = mlx4_flow_toggle(priv, flow, priv->started, error);
if (!err) {
struct rte_flow *curr = LIST_FIRST(&priv->flows);
/* New rules are inserted after internal ones. */
if (!curr || !curr->internal) {
LIST_INSERT_HEAD(&priv->flows, flow, next);
} else {
while (LIST_NEXT(curr, next) &&
LIST_NEXT(curr, next)->internal)
curr = LIST_NEXT(curr, next);
LIST_INSERT_AFTER(curr, flow, next);
}
return flow;
}
if (flow->rss)
mlx4_rss_put(flow->rss);
rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
error->message);
rte_free(flow);
return NULL;
}
/**
* Configure isolated mode.
*
* @see rte_flow_isolate()
* @see rte_flow_ops
*/
static int
mlx4_flow_isolate(struct rte_eth_dev *dev,
int enable,
struct rte_flow_error *error)
{
struct mlx4_priv *priv = dev->data->dev_private;
if (!!enable == !!priv->isolated)
return 0;
priv->isolated = !!enable;
if (mlx4_flow_sync(priv, error)) {
priv->isolated = !enable;
return -rte_errno;
}
return 0;
}
/**
* Destroy a flow rule.
*
* @see rte_flow_destroy()
* @see rte_flow_ops
*/
static int
mlx4_flow_destroy(struct rte_eth_dev *dev,
struct rte_flow *flow,
struct rte_flow_error *error)
{
struct mlx4_priv *priv = dev->data->dev_private;
int err = mlx4_flow_toggle(priv, flow, 0, error);
if (err)
return err;
LIST_REMOVE(flow, next);
if (flow->rss)
mlx4_rss_put(flow->rss);
rte_free(flow);
return 0;
}
/**
* Destroy user-configured flow rules.
*
* This function skips internal flows rules.
*
* @see rte_flow_flush()
* @see rte_flow_ops
*/
static int
mlx4_flow_flush(struct rte_eth_dev *dev,
struct rte_flow_error *error)
{
struct mlx4_priv *priv = dev->data->dev_private;
struct rte_flow *flow = LIST_FIRST(&priv->flows);
while (flow) {
struct rte_flow *next = LIST_NEXT(flow, next);
if (!flow->internal)
mlx4_flow_destroy(dev, flow, error);
flow = next;
}
return 0;
}
/**
* Helper function to determine the next configured VLAN filter.
*
* @param priv
* Pointer to private structure.
* @param vlan
* VLAN ID to use as a starting point.
*
* @return
* Next configured VLAN ID or a high value (>= 4096) if there is none.
*/
static uint16_t
mlx4_flow_internal_next_vlan(struct mlx4_priv *priv, uint16_t vlan)
{
while (vlan < 4096) {
if (ETH_DEV(priv)->data->vlan_filter_conf.ids[vlan / 64] &
(UINT64_C(1) << (vlan % 64)))
return vlan;
++vlan;
}
return vlan;
}
/**
* Generate internal flow rules.
*
* Various flow rules are created depending on the mode the device is in:
*
* 1. Promiscuous:
* port MAC + broadcast + catch-all (VLAN filtering is ignored).
* 2. All multicast:
* port MAC/VLAN + broadcast + catch-all multicast.
* 3. Otherwise:
* port MAC/VLAN + broadcast MAC/VLAN.
*
* About MAC flow rules:
*
* - MAC flow rules are generated from @p dev->data->mac_addrs
* (@p priv->mac array).
* - An additional flow rule for Ethernet broadcasts is also generated.
* - All these are per-VLAN if @p DEV_RX_OFFLOAD_VLAN_FILTER
* is enabled and VLAN filters are configured.
*
* @param priv
* Pointer to private structure.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
{
struct rte_flow_attr attr = {
.priority = MLX4_FLOW_PRIORITY_LAST,
.ingress = 1,
};
struct rte_flow_item_eth eth_spec;
const struct rte_flow_item_eth eth_mask = {
.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
};
const struct rte_flow_item_eth eth_allmulti = {
.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
};
struct rte_flow_item_vlan vlan_spec;
const struct rte_flow_item_vlan vlan_mask = {
.tci = RTE_BE16(0x0fff),
};
struct rte_flow_item pattern[] = {
{
.type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
},
{
.type = RTE_FLOW_ITEM_TYPE_ETH,
.spec = &eth_spec,
.mask = &eth_mask,
},
{
/* Replaced with VLAN if filtering is enabled. */
.type = RTE_FLOW_ITEM_TYPE_END,
},
{
.type = RTE_FLOW_ITEM_TYPE_END,
},
};
/*
* Round number of queues down to their previous power of 2 to
* comply with RSS context limitations. Extra queues silently do not
* get RSS by default.
*/
uint32_t queues =
rte_align32pow2(ETH_DEV(priv)->data->nb_rx_queues + 1) >> 1;
uint16_t queue[queues];
struct rte_flow_action_rss action_rss = {
.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
.level = 0,
.types = 0,
.key_len = MLX4_RSS_HASH_KEY_SIZE,
.queue_num = queues,
.key = mlx4_rss_hash_key_default,
.queue = queue,
};
struct rte_flow_action actions[] = {
{
.type = RTE_FLOW_ACTION_TYPE_RSS,
.conf = &action_rss,
},
{
.type = RTE_FLOW_ACTION_TYPE_END,
},
};
struct rte_ether_addr *rule_mac = &eth_spec.dst;
rte_be16_t *rule_vlan =
(ETH_DEV(priv)->data->dev_conf.rxmode.offloads &
DEV_RX_OFFLOAD_VLAN_FILTER) &&
!ETH_DEV(priv)->data->promiscuous ?
&vlan_spec.tci :
NULL;
uint16_t vlan = 0;
struct rte_flow *flow;
unsigned int i;
int err = 0;
/* Nothing to be done if there are no Rx queues. */
if (!queues)
goto error;
/* Prepare default RSS configuration. */
for (i = 0; i != queues; ++i)
queue[i] = i;
/*
* Set up VLAN item if filtering is enabled and at least one VLAN
* filter is configured.
*/
if (rule_vlan) {
vlan = mlx4_flow_internal_next_vlan(priv, 0);
if (vlan < 4096) {
pattern[2] = (struct rte_flow_item){
.type = RTE_FLOW_ITEM_TYPE_VLAN,
.spec = &vlan_spec,
.mask = &vlan_mask,
};
next_vlan:
*rule_vlan = rte_cpu_to_be_16(vlan);
} else {
rule_vlan = NULL;
}
}
for (i = 0; i != RTE_DIM(priv->mac) + 1; ++i) {
const struct rte_ether_addr *mac;
/* Broadcasts are handled by an extra iteration. */
if (i < RTE_DIM(priv->mac))
mac = &priv->mac[i];
else
mac = &eth_mask.dst;
if (rte_is_zero_ether_addr(mac))
continue;
/* Check if MAC flow rule is already present. */
for (flow = LIST_FIRST(&priv->flows);
flow && flow->internal;
flow = LIST_NEXT(flow, next)) {
const struct ibv_flow_spec_eth *eth =
(const void *)((uintptr_t)flow->ibv_attr +
sizeof(*flow->ibv_attr));
unsigned int j;
if (!flow->mac)
continue;
assert(flow->ibv_attr->type == IBV_FLOW_ATTR_NORMAL);
assert(flow->ibv_attr->num_of_specs == 1);
assert(eth->type == IBV_FLOW_SPEC_ETH);
assert(flow->rss);
if (rule_vlan &&
(eth->val.vlan_tag != *rule_vlan ||
eth->mask.vlan_tag != RTE_BE16(0x0fff)))
continue;
if (!rule_vlan && eth->mask.vlan_tag)
continue;
for (j = 0; j != sizeof(mac->addr_bytes); ++j)
if (eth->val.dst_mac[j] != mac->addr_bytes[j] ||
eth->mask.dst_mac[j] != UINT8_C(0xff) ||
eth->val.src_mac[j] != UINT8_C(0x00) ||
eth->mask.src_mac[j] != UINT8_C(0x00))
break;
if (j != sizeof(mac->addr_bytes))
continue;
if (flow->rss->queues != queues ||
memcmp(flow->rss->queue_id, action_rss.queue,
queues * sizeof(flow->rss->queue_id[0])))
continue;
break;
}
if (!flow || !flow->internal) {
/* Not found, create a new flow rule. */
memcpy(rule_mac, mac, sizeof(*mac));
flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
actions, error);
if (!flow) {
err = -rte_errno;
goto error;
}
}
flow->select = 1;
flow->mac = 1;
}
if (rule_vlan) {
vlan = mlx4_flow_internal_next_vlan(priv, vlan + 1);
if (vlan < 4096)
goto next_vlan;
}
/* Take care of promiscuous and all multicast flow rules. */
if (ETH_DEV(priv)->data->promiscuous ||
ETH_DEV(priv)->data->all_multicast) {
for (flow = LIST_FIRST(&priv->flows);
flow && flow->internal;
flow = LIST_NEXT(flow, next)) {
if (ETH_DEV(priv)->data->promiscuous) {
if (flow->promisc)
break;
} else {
assert(ETH_DEV(priv)->data->all_multicast);
if (flow->allmulti)
break;
}
}
if (flow && flow->internal) {
assert(flow->rss);
if (flow->rss->queues != queues ||
memcmp(flow->rss->queue_id, action_rss.queue,
queues * sizeof(flow->rss->queue_id[0])))
flow = NULL;
}
if (!flow || !flow->internal) {
/* Not found, create a new flow rule. */
if (ETH_DEV(priv)->data->promiscuous) {
pattern[1].spec = NULL;
pattern[1].mask = NULL;
} else {
assert(ETH_DEV(priv)->data->all_multicast);
pattern[1].spec = &eth_allmulti;
pattern[1].mask = &eth_allmulti;
}
pattern[2] = pattern[3];
flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
actions, error);
if (!flow) {
err = -rte_errno;
goto error;
}
}
assert(flow->promisc || flow->allmulti);
flow->select = 1;
}
error:
/* Clear selection and clean up stale internal flow rules. */
flow = LIST_FIRST(&priv->flows);
while (flow && flow->internal) {
struct rte_flow *next = LIST_NEXT(flow, next);
if (!flow->select)
claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
error));
else
flow->select = 0;
flow = next;
}
return err;
}
/**
* Synchronize flow rules.
*
* This function synchronizes flow rules with the state of the device by
* taking into account isolated mode and whether target queues are
* configured.
*
* @param priv
* Pointer to private structure.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx4_flow_sync(struct mlx4_priv *priv, struct rte_flow_error *error)
{
struct rte_flow *flow;
int ret;
/* Internal flow rules are guaranteed to come first in the list. */
if (priv->isolated) {
/*
* Get rid of them in isolated mode, stop at the first
* non-internal rule found.
*/
for (flow = LIST_FIRST(&priv->flows);
flow && flow->internal;
flow = LIST_FIRST(&priv->flows))
claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
error));
} else {
/* Refresh internal rules. */
ret = mlx4_flow_internal(priv, error);
if (ret)
return ret;
}
/* Toggle the remaining flow rules . */
LIST_FOREACH(flow, &priv->flows, next) {
ret = mlx4_flow_toggle(priv, flow, priv->started, error);
if (ret)
return ret;
}
if (!priv->started)
assert(!priv->drop);
return 0;
}
/**
* Clean up all flow rules.
*
* Unlike mlx4_flow_flush(), this function takes care of all remaining flow
* rules regardless of whether they are internal or user-configured.
*
* @param priv
* Pointer to private structure.
*/
void
mlx4_flow_clean(struct mlx4_priv *priv)
{
struct rte_flow *flow;
while ((flow = LIST_FIRST(&priv->flows)))
mlx4_flow_destroy(ETH_DEV(priv), flow, NULL);
assert(LIST_EMPTY(&priv->rss));
}
static const struct rte_flow_ops mlx4_flow_ops = {
.validate = mlx4_flow_validate,
.create = mlx4_flow_create,
.destroy = mlx4_flow_destroy,
.flush = mlx4_flow_flush,
.isolate = mlx4_flow_isolate,
};
/**
* Manage filter operations.
*
* @param dev
* Pointer to Ethernet device structure.
* @param filter_type
* Filter type.
* @param filter_op
* Operation to perform.
* @param arg
* Pointer to operation-specific structure.
*
* @return
* 0 on success, negative errno value otherwise and rte_errno is set.
*/
int
mlx4_filter_ctrl(struct rte_eth_dev *dev,
enum rte_filter_type filter_type,
enum rte_filter_op filter_op,
void *arg)
{
switch (filter_type) {
case RTE_ETH_FILTER_GENERIC:
if (filter_op != RTE_ETH_FILTER_GET)
break;
*(const void **)arg = &mlx4_flow_ops;
return 0;
default:
ERROR("%p: filter type (%d) not supported",
(void *)dev, filter_type);
break;
}
rte_errno = ENOTSUP;
return -rte_errno;
}