numam-dpdk/drivers/net/mlx5/mlx5_flow_tcf.c
Viacheslav Ovsiienko 95a464cecc net/mlx5: add E-switch VXLAN tunnel devices management
VXLAN interfaces are dynamically created for each local UDP port
of outer networks and then used as targets for TC "flower" filters
in order to perform encapsulation. These VXLAN interfaces are
system-wide, the only one device with given UDP port can exist
in the system (the attempt of creating another device with the
same UDP local port returns EEXIST), so PMD should support the
shared device instances database for PMD instances. These VXLAN
implicitly created devices are called VTEPs (Virtual Tunnel
End Points).

Creation of the VTEP occurs at the moment of rule applying. The
link is set up, root ingress qdisc is also initialized.

Encapsulation VTEPs are created on per port basis, the single
VTEP is attached to the outer interface and is shared for all
encapsulation rules on this interface. The source UDP port is
automatically selected in range 30000-60000.

For decapsulaton one VTEP is created per every unique UDP
local port to accept tunnel traffic. The name of created
VTEP consists of prefix "vmlx_" and the number of UDP port in
decimal digits without leading zeros (vmlx_4789). The VTEP
can be preliminary created in the system before the launching
application, it allows to share	UDP ports between primary
and secondary processes.

Suggested-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
2018-11-05 15:01:25 +01:00

4918 lines
140 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2018 6WIND S.A.
* Copyright 2018 Mellanox Technologies, Ltd
*/
#include <assert.h>
#include <errno.h>
#include <libmnl/libmnl.h>
#include <linux/gen_stats.h>
#include <linux/if_ether.h>
#include <linux/netlink.h>
#include <linux/pkt_cls.h>
#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/tc_act/tc_gact.h>
#include <linux/tc_act/tc_mirred.h>
#include <netinet/in.h>
#include <stdalign.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <rte_byteorder.h>
#include <rte_errno.h>
#include <rte_ether.h>
#include <rte_flow.h>
#include <rte_malloc.h>
#include <rte_common.h>
#include "mlx5.h"
#include "mlx5_flow.h"
#include "mlx5_autoconf.h"
#ifdef HAVE_TC_ACT_VLAN
#include <linux/tc_act/tc_vlan.h>
#else /* HAVE_TC_ACT_VLAN */
#define TCA_VLAN_ACT_POP 1
#define TCA_VLAN_ACT_PUSH 2
#define TCA_VLAN_ACT_MODIFY 3
#define TCA_VLAN_PARMS 2
#define TCA_VLAN_PUSH_VLAN_ID 3
#define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
#define TCA_VLAN_PAD 5
#define TCA_VLAN_PUSH_VLAN_PRIORITY 6
struct tc_vlan {
tc_gen;
int v_action;
};
#endif /* HAVE_TC_ACT_VLAN */
#ifdef HAVE_TC_ACT_PEDIT
#include <linux/tc_act/tc_pedit.h>
#else /* HAVE_TC_ACT_VLAN */
enum {
TCA_PEDIT_UNSPEC,
TCA_PEDIT_TM,
TCA_PEDIT_PARMS,
TCA_PEDIT_PAD,
TCA_PEDIT_PARMS_EX,
TCA_PEDIT_KEYS_EX,
TCA_PEDIT_KEY_EX,
__TCA_PEDIT_MAX
};
enum {
TCA_PEDIT_KEY_EX_HTYPE = 1,
TCA_PEDIT_KEY_EX_CMD = 2,
__TCA_PEDIT_KEY_EX_MAX
};
enum pedit_header_type {
TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
__PEDIT_HDR_TYPE_MAX,
};
enum pedit_cmd {
TCA_PEDIT_KEY_EX_CMD_SET = 0,
TCA_PEDIT_KEY_EX_CMD_ADD = 1,
__PEDIT_CMD_MAX,
};
struct tc_pedit_key {
__u32 mask; /* AND */
__u32 val; /*XOR */
__u32 off; /*offset */
__u32 at;
__u32 offmask;
__u32 shift;
};
__extension__
struct tc_pedit_sel {
tc_gen;
unsigned char nkeys;
unsigned char flags;
struct tc_pedit_key keys[0];
};
#endif /* HAVE_TC_ACT_VLAN */
#ifdef HAVE_TC_ACT_TUNNEL_KEY
#include <linux/tc_act/tc_tunnel_key.h>
#ifndef HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT
#define TCA_TUNNEL_KEY_ENC_DST_PORT 9
#endif
#ifndef HAVE_TCA_TUNNEL_KEY_NO_CSUM
#define TCA_TUNNEL_KEY_NO_CSUM 10
#endif
#else /* HAVE_TC_ACT_TUNNEL_KEY */
#define TCA_ACT_TUNNEL_KEY 17
#define TCA_TUNNEL_KEY_ACT_SET 1
#define TCA_TUNNEL_KEY_ACT_RELEASE 2
#define TCA_TUNNEL_KEY_PARMS 2
#define TCA_TUNNEL_KEY_ENC_IPV4_SRC 3
#define TCA_TUNNEL_KEY_ENC_IPV4_DST 4
#define TCA_TUNNEL_KEY_ENC_IPV6_SRC 5
#define TCA_TUNNEL_KEY_ENC_IPV6_DST 6
#define TCA_TUNNEL_KEY_ENC_KEY_ID 7
#define TCA_TUNNEL_KEY_ENC_DST_PORT 9
#define TCA_TUNNEL_KEY_NO_CSUM 10
struct tc_tunnel_key {
tc_gen;
int t_action;
};
#endif /* HAVE_TC_ACT_TUNNEL_KEY */
/* Normally found in linux/netlink.h. */
#ifndef NETLINK_CAP_ACK
#define NETLINK_CAP_ACK 10
#endif
/* Normally found in linux/pkt_sched.h. */
#ifndef TC_H_MIN_INGRESS
#define TC_H_MIN_INGRESS 0xfff2u
#endif
/* Normally found in linux/pkt_cls.h. */
#ifndef TCA_CLS_FLAGS_SKIP_SW
#define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
#endif
#ifndef HAVE_TCA_CHAIN
#define TCA_CHAIN 11
#endif
#ifndef HAVE_TCA_FLOWER_ACT
#define TCA_FLOWER_ACT 3
#endif
#ifndef HAVE_TCA_FLOWER_FLAGS
#define TCA_FLOWER_FLAGS 22
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
#define TCA_FLOWER_KEY_ETH_TYPE 8
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
#define TCA_FLOWER_KEY_ETH_DST 4
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
#define TCA_FLOWER_KEY_ETH_DST_MASK 5
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
#define TCA_FLOWER_KEY_ETH_SRC 6
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
#define TCA_FLOWER_KEY_ETH_SRC_MASK 7
#endif
#ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
#define TCA_FLOWER_KEY_IP_PROTO 9
#endif
#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
#define TCA_FLOWER_KEY_IPV4_SRC 10
#endif
#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
#define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
#endif
#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
#define TCA_FLOWER_KEY_IPV4_DST 12
#endif
#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
#define TCA_FLOWER_KEY_IPV4_DST_MASK 13
#endif
#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
#define TCA_FLOWER_KEY_IPV6_SRC 14
#endif
#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
#define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
#endif
#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
#define TCA_FLOWER_KEY_IPV6_DST 16
#endif
#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
#define TCA_FLOWER_KEY_IPV6_DST_MASK 17
#endif
#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
#define TCA_FLOWER_KEY_TCP_SRC 18
#endif
#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
#define TCA_FLOWER_KEY_TCP_SRC_MASK 35
#endif
#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
#define TCA_FLOWER_KEY_TCP_DST 19
#endif
#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
#define TCA_FLOWER_KEY_TCP_DST_MASK 36
#endif
#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
#define TCA_FLOWER_KEY_UDP_SRC 20
#endif
#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
#define TCA_FLOWER_KEY_UDP_SRC_MASK 37
#endif
#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
#define TCA_FLOWER_KEY_UDP_DST 21
#endif
#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
#define TCA_FLOWER_KEY_UDP_DST_MASK 38
#endif
#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
#define TCA_FLOWER_KEY_VLAN_ID 23
#endif
#ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
#define TCA_FLOWER_KEY_VLAN_PRIO 24
#endif
#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
#define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID
#define TCA_FLOWER_KEY_ENC_KEY_ID 26
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC
#define TCA_FLOWER_KEY_ENC_IPV4_SRC 27
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
#define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST
#define TCA_FLOWER_KEY_ENC_IPV4_DST 29
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK
#define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC
#define TCA_FLOWER_KEY_ENC_IPV6_SRC 31
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
#define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST
#define TCA_FLOWER_KEY_ENC_IPV6_DST 33
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK
#define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT
#define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK
#define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT
#define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45
#endif
#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK
#define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46
#endif
#ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
#define TCA_FLOWER_KEY_TCP_FLAGS 71
#endif
#ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
#define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
#endif
#ifndef HAVE_TC_ACT_GOTO_CHAIN
#define TC_ACT_GOTO_CHAIN 0x20000000
#endif
#ifndef IPV6_ADDR_LEN
#define IPV6_ADDR_LEN 16
#endif
#ifndef IPV4_ADDR_LEN
#define IPV4_ADDR_LEN 4
#endif
#ifndef TP_PORT_LEN
#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
#endif
#ifndef TTL_LEN
#define TTL_LEN 1
#endif
#ifndef TCA_ACT_MAX_PRIO
#define TCA_ACT_MAX_PRIO 32
#endif
/** UDP port range of VXLAN devices created by driver. */
#define MLX5_VXLAN_PORT_MIN 30000
#define MLX5_VXLAN_PORT_MAX 60000
#define MLX5_VXLAN_DEVICE_PFX "vmlx_"
/** Tunnel action type, used for @p type in header structure. */
enum flow_tcf_tunact_type {
FLOW_TCF_TUNACT_VXLAN_DECAP,
FLOW_TCF_TUNACT_VXLAN_ENCAP,
};
/** Flags used for @p mask in tunnel action encap descriptors. */
#define FLOW_TCF_ENCAP_ETH_SRC (1u << 0)
#define FLOW_TCF_ENCAP_ETH_DST (1u << 1)
#define FLOW_TCF_ENCAP_IPV4_SRC (1u << 2)
#define FLOW_TCF_ENCAP_IPV4_DST (1u << 3)
#define FLOW_TCF_ENCAP_IPV6_SRC (1u << 4)
#define FLOW_TCF_ENCAP_IPV6_DST (1u << 5)
#define FLOW_TCF_ENCAP_UDP_SRC (1u << 6)
#define FLOW_TCF_ENCAP_UDP_DST (1u << 7)
#define FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8)
/**
* Structure for holding netlink context.
* Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
* Using this (8KB) buffer size ensures that netlink messages will never be
* truncated.
*/
struct mlx5_flow_tcf_context {
struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
uint32_t seq; /* Message sequence number. */
uint32_t buf_size; /* Message buffer size. */
uint8_t *buf; /* Message buffer. */
};
/**
* Neigh rule structure. The neigh rule is applied via Netlink to
* outer tunnel iface in order to provide destination MAC address
* for the VXLAN encapsultion. The neigh rule is implicitly related
* to the Flow itself and can be shared by multiple Flows.
*/
struct tcf_neigh_rule {
LIST_ENTRY(tcf_neigh_rule) next;
uint32_t refcnt;
struct ether_addr eth;
uint16_t mask;
union {
struct {
rte_be32_t dst;
} ipv4;
struct {
uint8_t dst[IPV6_ADDR_LEN];
} ipv6;
};
};
/**
* Local rule structure. The local rule is applied via Netlink to
* outer tunnel iface in order to provide local and peer IP addresses
* of the VXLAN tunnel for encapsulation. The local rule is implicitly
* related to the Flow itself and can be shared by multiple Flows.
*/
struct tcf_local_rule {
LIST_ENTRY(tcf_local_rule) next;
uint32_t refcnt;
uint16_t mask;
union {
struct {
rte_be32_t dst;
rte_be32_t src;
} ipv4;
struct {
uint8_t dst[IPV6_ADDR_LEN];
uint8_t src[IPV6_ADDR_LEN];
} ipv6;
};
};
/** VXLAN virtual netdev. */
struct tcf_vtep {
LIST_ENTRY(tcf_vtep) next;
LIST_HEAD(, tcf_neigh_rule) neigh;
LIST_HEAD(, tcf_local_rule) local;
uint32_t refcnt;
unsigned int ifindex; /**< Own interface index. */
unsigned int ifouter; /**< Index of device attached to. */
uint16_t port;
uint8_t created;
};
/** Tunnel descriptor header, common for all tunnel types. */
struct flow_tcf_tunnel_hdr {
uint32_t type; /**< Tunnel action type. */
struct tcf_vtep *vtep; /**< Virtual tunnel endpoint device. */
unsigned int ifindex_org; /**< Original dst/src interface */
unsigned int *ifindex_ptr; /**< Interface ptr in message. */
};
struct flow_tcf_vxlan_decap {
struct flow_tcf_tunnel_hdr hdr;
uint16_t udp_port;
};
struct flow_tcf_vxlan_encap {
struct flow_tcf_tunnel_hdr hdr;
uint32_t mask;
struct {
struct ether_addr dst;
struct ether_addr src;
} eth;
union {
struct {
rte_be32_t dst;
rte_be32_t src;
} ipv4;
struct {
uint8_t dst[IPV6_ADDR_LEN];
uint8_t src[IPV6_ADDR_LEN];
} ipv6;
};
struct {
rte_be16_t src;
rte_be16_t dst;
} udp;
struct {
uint8_t vni[3];
} vxlan;
};
/** Structure used when extracting the values of a flow counters
* from a netlink message.
*/
struct flow_tcf_stats_basic {
bool valid;
struct gnet_stats_basic counters;
};
/** Empty masks for known item types. */
static const union {
struct rte_flow_item_port_id port_id;
struct rte_flow_item_eth eth;
struct rte_flow_item_vlan vlan;
struct rte_flow_item_ipv4 ipv4;
struct rte_flow_item_ipv6 ipv6;
struct rte_flow_item_tcp tcp;
struct rte_flow_item_udp udp;
struct rte_flow_item_vxlan vxlan;
} flow_tcf_mask_empty;
/** Supported masks for known item types. */
static const struct {
struct rte_flow_item_port_id port_id;
struct rte_flow_item_eth eth;
struct rte_flow_item_vlan vlan;
struct rte_flow_item_ipv4 ipv4;
struct rte_flow_item_ipv6 ipv6;
struct rte_flow_item_tcp tcp;
struct rte_flow_item_udp udp;
struct rte_flow_item_vxlan vxlan;
} flow_tcf_mask_supported = {
.port_id = {
.id = 0xffffffff,
},
.eth = {
.type = RTE_BE16(0xffff),
.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
},
.vlan = {
/* PCP and VID only, no DEI. */
.tci = RTE_BE16(0xefff),
.inner_type = RTE_BE16(0xffff),
},
.ipv4.hdr = {
.next_proto_id = 0xff,
.src_addr = RTE_BE32(0xffffffff),
.dst_addr = RTE_BE32(0xffffffff),
},
.ipv6.hdr = {
.proto = 0xff,
.src_addr =
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff",
.dst_addr =
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff",
},
.tcp.hdr = {
.src_port = RTE_BE16(0xffff),
.dst_port = RTE_BE16(0xffff),
.tcp_flags = 0xff,
},
.udp.hdr = {
.src_port = RTE_BE16(0xffff),
.dst_port = RTE_BE16(0xffff),
},
.vxlan = {
.vni = "\xff\xff\xff",
},
};
#define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
#define SZ_NLATTR_NEST SZ_NLATTR_HDR
#define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
#define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
#define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
#define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
/** DPDK port to network interface index (ifindex) conversion. */
struct flow_tcf_ptoi {
uint16_t port_id; /**< DPDK port ID. */
unsigned int ifindex; /**< Network interface index. */
};
/* Due to a limitation on driver/FW. */
#define MLX5_TCF_GROUP_ID_MAX 3
#define MLX5_TCF_GROUP_PRIORITY_MAX 14
#define MLX5_TCF_FATE_ACTIONS \
(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
MLX5_FLOW_ACTION_JUMP)
#define MLX5_TCF_VLAN_ACTIONS \
(MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
#define MLX5_TCF_VXLAN_ACTIONS \
(MLX5_FLOW_ACTION_VXLAN_ENCAP | MLX5_FLOW_ACTION_VXLAN_DECAP)
#define MLX5_TCF_PEDIT_ACTIONS \
(MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
#define MLX5_TCF_CONFIG_ACTIONS \
(MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
(MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
#define MAX_PEDIT_KEYS 128
#define SZ_PEDIT_KEY_VAL 4
#define NUM_OF_PEDIT_KEYS(sz) \
(((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
struct pedit_key_ex {
enum pedit_header_type htype;
enum pedit_cmd cmd;
};
struct pedit_parser {
struct tc_pedit_sel sel;
struct tc_pedit_key keys[MAX_PEDIT_KEYS];
struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
};
/**
* Create space for using the implicitly created TC flow counter.
*
* @param[in] dev
* Pointer to the Ethernet device structure.
*
* @return
* A pointer to the counter data structure, NULL otherwise and
* rte_errno is set.
*/
static struct mlx5_flow_counter *
flow_tcf_counter_new(void)
{
struct mlx5_flow_counter *cnt;
/*
* eswitch counter cannot be shared and its id is unknown.
* currently returning all with id 0.
* in the future maybe better to switch to unique numbers.
*/
struct mlx5_flow_counter tmpl = {
.ref_cnt = 1,
};
cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
if (!cnt) {
rte_errno = ENOMEM;
return NULL;
}
*cnt = tmpl;
/* Implicit counter, do not add to list. */
return cnt;
}
/**
* Set pedit key of MAC address
*
* @param[in] actions
* pointer to action specification
* @param[in,out] p_parser
* pointer to pedit_parser
*/
static void
flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
struct pedit_parser *p_parser)
{
int idx = p_parser->sel.nkeys;
uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
offsetof(struct ether_hdr, s_addr) :
offsetof(struct ether_hdr, d_addr);
const struct rte_flow_action_set_mac *conf =
(const struct rte_flow_action_set_mac *)actions->conf;
p_parser->keys[idx].off = off;
p_parser->keys[idx].mask = ~UINT32_MAX;
p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
memcpy(&p_parser->keys[idx].val,
conf->mac_addr, SZ_PEDIT_KEY_VAL);
idx++;
p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
p_parser->keys[idx].mask = 0xFFFF0000;
p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
memcpy(&p_parser->keys[idx].val,
conf->mac_addr + SZ_PEDIT_KEY_VAL,
ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
p_parser->sel.nkeys = (++idx);
}
/**
* Set pedit key of decrease/set ttl
*
* @param[in] actions
* pointer to action specification
* @param[in,out] p_parser
* pointer to pedit_parser
* @param[in] item_flags
* flags of all items presented
*/
static void
flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
struct pedit_parser *p_parser,
uint64_t item_flags)
{
int idx = p_parser->sel.nkeys;
p_parser->keys[idx].mask = 0xFFFFFF00;
if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
p_parser->keys[idx].off =
offsetof(struct ipv4_hdr, time_to_live);
}
if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
p_parser->keys[idx].off =
offsetof(struct ipv6_hdr, hop_limits);
}
if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
p_parser->keys[idx].val = 0x000000FF;
} else {
p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
p_parser->keys[idx].val =
(__u32)((const struct rte_flow_action_set_ttl *)
actions->conf)->ttl_value;
}
p_parser->sel.nkeys = (++idx);
}
/**
* Set pedit key of transport (TCP/UDP) port value
*
* @param[in] actions
* pointer to action specification
* @param[in,out] p_parser
* pointer to pedit_parser
* @param[in] item_flags
* flags of all items presented
*/
static void
flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
struct pedit_parser *p_parser,
uint64_t item_flags)
{
int idx = p_parser->sel.nkeys;
if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
/* offset of src/dst port is same for TCP and UDP */
p_parser->keys[idx].off =
actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
offsetof(struct tcp_hdr, src_port) :
offsetof(struct tcp_hdr, dst_port);
p_parser->keys[idx].mask = 0xFFFF0000;
p_parser->keys[idx].val =
(__u32)((const struct rte_flow_action_set_tp *)
actions->conf)->port;
p_parser->sel.nkeys = (++idx);
}
/**
* Set pedit key of ipv6 address
*
* @param[in] actions
* pointer to action specification
* @param[in,out] p_parser
* pointer to pedit_parser
*/
static void
flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
struct pedit_parser *p_parser)
{
int idx = p_parser->sel.nkeys;
int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
int off_base =
actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
offsetof(struct ipv6_hdr, src_addr) :
offsetof(struct ipv6_hdr, dst_addr);
const struct rte_flow_action_set_ipv6 *conf =
(const struct rte_flow_action_set_ipv6 *)actions->conf;
for (int i = 0; i < keys; i++, idx++) {
p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
p_parser->keys[idx].mask = ~UINT32_MAX;
memcpy(&p_parser->keys[idx].val,
conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
SZ_PEDIT_KEY_VAL);
}
p_parser->sel.nkeys += keys;
}
/**
* Set pedit key of ipv4 address
*
* @param[in] actions
* pointer to action specification
* @param[in,out] p_parser
* pointer to pedit_parser
*/
static void
flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
struct pedit_parser *p_parser)
{
int idx = p_parser->sel.nkeys;
p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
p_parser->keys[idx].off =
actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
offsetof(struct ipv4_hdr, src_addr) :
offsetof(struct ipv4_hdr, dst_addr);
p_parser->keys[idx].mask = ~UINT32_MAX;
p_parser->keys[idx].val =
((const struct rte_flow_action_set_ipv4 *)
actions->conf)->ipv4_addr;
p_parser->sel.nkeys = (++idx);
}
/**
* Create the pedit's na attribute in netlink message
* on pre-allocate message buffer
*
* @param[in,out] nl
* pointer to pre-allocated netlink message buffer
* @param[in,out] actions
* pointer to pointer of actions specification.
* @param[in,out] action_flags
* pointer to actions flags
* @param[in] item_flags
* flags of all item presented
*/
static void
flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
const struct rte_flow_action **actions,
uint64_t item_flags)
{
struct pedit_parser p_parser;
struct nlattr *na_act_options;
struct nlattr *na_pedit_keys;
memset(&p_parser, 0, sizeof(p_parser));
mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
/* all modify header actions should be in one tc-pedit action */
for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
switch ((*actions)->type) {
case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
break;
case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
break;
case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
flow_tcf_pedit_key_set_tp_port(*actions,
&p_parser, item_flags);
break;
case RTE_FLOW_ACTION_TYPE_SET_TTL:
case RTE_FLOW_ACTION_TYPE_DEC_TTL:
flow_tcf_pedit_key_set_dec_ttl(*actions,
&p_parser, item_flags);
break;
case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
flow_tcf_pedit_key_set_mac(*actions, &p_parser);
break;
default:
goto pedit_mnl_msg_done;
}
}
pedit_mnl_msg_done:
p_parser.sel.action = TC_ACT_PIPE;
mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
sizeof(p_parser.sel) +
p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
&p_parser);
na_pedit_keys =
mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
for (int i = 0; i < p_parser.sel.nkeys; i++) {
struct nlattr *na_pedit_key =
mnl_attr_nest_start(nl,
TCA_PEDIT_KEY_EX | NLA_F_NESTED);
mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
p_parser.keys_ex[i].htype);
mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
p_parser.keys_ex[i].cmd);
mnl_attr_nest_end(nl, na_pedit_key);
}
mnl_attr_nest_end(nl, na_pedit_keys);
mnl_attr_nest_end(nl, na_act_options);
(*actions)--;
}
/**
* Calculate max memory size of one TC-pedit actions.
* One TC-pedit action can contain set of keys each defining
* a rewrite element (rte_flow action)
*
* @param[in,out] actions
* actions specification.
* @param[in,out] action_flags
* actions flags
* @param[in,out] size
* accumulated size
* @return
* Max memory size of one TC-pedit action
*/
static int
flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
uint64_t *action_flags)
{
int pedit_size = 0;
int keys = 0;
uint64_t flags = 0;
pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
SZ_NLATTR_STRZ_OF("pedit") +
SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
switch ((*actions)->type) {
case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
break;
case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
break;
case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
break;
case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
break;
case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
/* TCP is as same as UDP */
keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
break;
case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
/* TCP is as same as UDP */
keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
flags |= MLX5_FLOW_ACTION_SET_TP_DST;
break;
case RTE_FLOW_ACTION_TYPE_SET_TTL:
keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
flags |= MLX5_FLOW_ACTION_SET_TTL;
break;
case RTE_FLOW_ACTION_TYPE_DEC_TTL:
keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
flags |= MLX5_FLOW_ACTION_DEC_TTL;
break;
case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
break;
case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
break;
default:
goto get_pedit_action_size_done;
}
}
get_pedit_action_size_done:
/* TCA_PEDIT_PARAMS_EX */
pedit_size +=
SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
keys * sizeof(struct tc_pedit_key));
pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
pedit_size += keys *
/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
SZ_NLATTR_DATA_OF(2));
(*action_flags) |= flags;
(*actions)--;
return pedit_size;
}
/**
* Retrieve mask for pattern item.
*
* This function does basic sanity checks on a pattern item in order to
* return the most appropriate mask for it.
*
* @param[in] item
* Item specification.
* @param[in] mask_default
* Default mask for pattern item as specified by the flow API.
* @param[in] mask_supported
* Mask fields supported by the implementation.
* @param[in] mask_empty
* Empty mask to return when there is no specification.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
* Either @p item->mask or one of the mask parameters on success, NULL
* otherwise and rte_errno is set.
*/
static const void *
flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
const void *mask_supported, const void *mask_empty,
size_t mask_size, struct rte_flow_error *error)
{
const uint8_t *mask;
size_t i;
/* item->last and item->mask cannot exist without item->spec. */
if (!item->spec && (item->mask || item->last)) {
rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
"\"mask\" or \"last\" field provided without"
" a corresponding \"spec\"");
return NULL;
}
/* No spec, no mask, no problem. */
if (!item->spec)
return mask_empty;
mask = item->mask ? item->mask : mask_default;
assert(mask);
/*
* Single-pass check to make sure that:
* - Mask is supported, no bits are set outside mask_supported.
* - Both item->spec and item->last are included in mask.
*/
for (i = 0; i != mask_size; ++i) {
if (!mask[i])
continue;
if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
((const uint8_t *)mask_supported)[i]) {
rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"unsupported field found"
" in \"mask\"");
return NULL;
}
if (item->last &&
(((const uint8_t *)item->spec)[i] & mask[i]) !=
(((const uint8_t *)item->last)[i] & mask[i])) {
rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM_LAST,
item->last,
"range between \"spec\" and \"last\""
" not comprised in \"mask\"");
return NULL;
}
}
return mask;
}
/**
* Build a conversion table between port ID and ifindex.
*
* @param[in] dev
* Pointer to Ethernet device.
* @param[out] ptoi
* Pointer to ptoi table.
* @param[in] len
* Size of ptoi table provided.
*
* @return
* Size of ptoi table filled.
*/
static unsigned int
flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
unsigned int len)
{
unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
uint16_t port_id[n + 1];
unsigned int i;
unsigned int own = 0;
/* At least one port is needed when no switch domain is present. */
if (!n) {
n = 1;
port_id[0] = dev->data->port_id;
} else {
n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
}
if (n > len)
return 0;
for (i = 0; i != n; ++i) {
struct rte_eth_dev_info dev_info;
rte_eth_dev_info_get(port_id[i], &dev_info);
if (port_id[i] == dev->data->port_id)
own = i;
ptoi[i].port_id = port_id[i];
ptoi[i].ifindex = dev_info.if_index;
}
/* Ensure first entry of ptoi[] is the current device. */
if (own) {
ptoi[n] = ptoi[0];
ptoi[0] = ptoi[own];
ptoi[own] = ptoi[n];
}
/* An entry with zero ifindex terminates ptoi[]. */
ptoi[n].port_id = 0;
ptoi[n].ifindex = 0;
return n;
}
/**
* Verify the @p attr will be correctly understood by the E-switch.
*
* @param[in] attr
* Pointer to flow attributes
* @param[out] error
* Pointer to error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
struct rte_flow_error *error)
{
/*
* Supported attributes: groups, some priorities and ingress only.
* group is supported only if kernel supports chain. Don't care about
* transfer as it is the caller's problem.
*/
if (attr->group > MLX5_TCF_GROUP_ID_MAX)
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
"group ID larger than "
RTE_STR(MLX5_TCF_GROUP_ID_MAX)
" isn't supported");
else if (attr->group > 0 &&
attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
attr,
"lowest priority level is "
RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
" when group is configured");
else if (attr->priority > 0xfffe)
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
attr,
"lowest priority level is 0xfffe");
if (!attr->ingress)
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
attr, "only ingress is supported");
if (attr->egress)
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
attr, "egress is not supported");
return 0;
}
/**
* Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_ETH item for E-Switch.
* The routine checks the L2 fields to be used in encapsulation header.
*
* @param[in] item
* Pointer to the item structure.
* @param[out] error
* Pointer to the error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
**/
static int
flow_tcf_validate_vxlan_encap_eth(const struct rte_flow_item *item,
struct rte_flow_error *error)
{
const struct rte_flow_item_eth *spec = item->spec;
const struct rte_flow_item_eth *mask = item->mask;
if (!spec) {
/*
* Specification for L2 addresses can be empty
* because these ones are optional and not
* required directly by tc rule. Kernel tries
* to resolve these ones on its own
*/
return 0;
}
if (!mask) {
/* If mask is not specified use the default one. */
mask = &rte_flow_item_eth_mask;
}
if (memcmp(&mask->dst,
&flow_tcf_mask_empty.eth.dst,
sizeof(flow_tcf_mask_empty.eth.dst))) {
if (memcmp(&mask->dst,
&rte_flow_item_eth_mask.dst,
sizeof(rte_flow_item_eth_mask.dst)))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"eth.dst\" field");
}
if (memcmp(&mask->src,
&flow_tcf_mask_empty.eth.src,
sizeof(flow_tcf_mask_empty.eth.src))) {
if (memcmp(&mask->src,
&rte_flow_item_eth_mask.src,
sizeof(rte_flow_item_eth_mask.src)))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"eth.src\" field");
}
if (mask->type != RTE_BE16(0x0000)) {
if (mask->type != RTE_BE16(0xffff))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"eth.type\" field");
DRV_LOG(WARNING,
"outer ethernet type field"
" cannot be forced for vxlan"
" encapsulation, parameter ignored");
}
return 0;
}
/**
* Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV4 item for E-Switch.
* The routine checks the IPv4 fields to be used in encapsulation header.
*
* @param[in] item
* Pointer to the item structure.
* @param[out] error
* Pointer to the error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
**/
static int
flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item *item,
struct rte_flow_error *error)
{
const struct rte_flow_item_ipv4 *spec = item->spec;
const struct rte_flow_item_ipv4 *mask = item->mask;
if (!spec) {
/*
* Specification for IP addresses cannot be empty
* because it is required by tunnel_key parameter.
*/
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
"NULL outer ipv4 address"
" specification for vxlan"
" encapsulation");
}
if (!mask)
mask = &rte_flow_item_ipv4_mask;
if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"ipv4.hdr.dst_addr\" field"
" for vxlan encapsulation");
/* More IPv4 address validations can be put here. */
} else {
/*
* Kernel uses the destination IP address to determine
* the routing path and obtain the MAC destination
* address, so IP destination address must be
* specified in the tc rule.
*/
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
"outer ipv4 destination address"
" must be specified for"
" vxlan encapsulation");
}
if (mask->hdr.src_addr != RTE_BE32(0x00000000)) {
if (mask->hdr.src_addr != RTE_BE32(0xffffffff))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"ipv4.hdr.src_addr\" field"
" for vxlan encapsulation");
/* More IPv4 address validations can be put here. */
} else {
/*
* Kernel uses the source IP address to select the
* interface for egress encapsulated traffic, so
* it must be specified in the tc rule.
*/
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
"outer ipv4 source address"
" must be specified for"
" vxlan encapsulation");
}
return 0;
}
/**
* Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV6 item for E-Switch.
* The routine checks the IPv6 fields to be used in encapsulation header.
*
* @param[in] item
* Pointer to the item structure.
* @param[out] error
* Pointer to the error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_ernno is set.
**/
static int
flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item,
struct rte_flow_error *error)
{
const struct rte_flow_item_ipv6 *spec = item->spec;
const struct rte_flow_item_ipv6 *mask = item->mask;
if (!spec) {
/*
* Specification for IP addresses cannot be empty
* because it is required by tunnel_key parameter.
*/
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
"NULL outer ipv6 address"
" specification for"
" vxlan encapsulation");
}
if (!mask)
mask = &rte_flow_item_ipv6_mask;
if (memcmp(&mask->hdr.dst_addr,
&flow_tcf_mask_empty.ipv6.hdr.dst_addr,
IPV6_ADDR_LEN)) {
if (memcmp(&mask->hdr.dst_addr,
&rte_flow_item_ipv6_mask.hdr.dst_addr,
IPV6_ADDR_LEN))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"ipv6.hdr.dst_addr\" field"
" for vxlan encapsulation");
/* More IPv6 address validations can be put here. */
} else {
/*
* Kernel uses the destination IP address to determine
* the routing path and obtain the MAC destination
* address (heigh or gate), so IP destination address
* must be specified within the tc rule.
*/
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
"outer ipv6 destination address"
" must be specified for"
" vxlan encapsulation");
}
if (memcmp(&mask->hdr.src_addr,
&flow_tcf_mask_empty.ipv6.hdr.src_addr,
IPV6_ADDR_LEN)) {
if (memcmp(&mask->hdr.src_addr,
&rte_flow_item_ipv6_mask.hdr.src_addr,
IPV6_ADDR_LEN))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"ipv6.hdr.src_addr\" field"
" for vxlan encapsulation");
/* More L3 address validation can be put here. */
} else {
/*
* Kernel uses the source IP address to select the
* interface for egress encapsulated traffic, so
* it must be specified in the tc rule.
*/
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
"outer L3 source address"
" must be specified for"
" vxlan encapsulation");
}
return 0;
}
/**
* Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_UDP item for E-Switch.
* The routine checks the UDP fields to be used in encapsulation header.
*
* @param[in] item
* Pointer to the item structure.
* @param[out] error
* Pointer to the error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_ernno is set.
**/
static int
flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item,
struct rte_flow_error *error)
{
const struct rte_flow_item_udp *spec = item->spec;
const struct rte_flow_item_udp *mask = item->mask;
if (!spec) {
/*
* Specification for UDP ports cannot be empty
* because it is required by tunnel_key parameter.
*/
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
"NULL UDP port specification "
" for vxlan encapsulation");
}
if (!mask)
mask = &rte_flow_item_udp_mask;
if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
if (mask->hdr.dst_port != RTE_BE16(0xffff))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"udp.hdr.dst_port\" field"
" for vxlan encapsulation");
if (!spec->hdr.dst_port)
return rte_flow_error_set
(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
"outer UDP remote port cannot be"
" 0 for vxlan encapsulation");
} else {
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
"outer UDP remote port"
" must be specified for"
" vxlan encapsulation");
}
if (mask->hdr.src_port != RTE_BE16(0x0000)) {
if (mask->hdr.src_port != RTE_BE16(0xffff))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"udp.hdr.src_port\" field"
" for vxlan encapsulation");
DRV_LOG(WARNING,
"outer UDP source port cannot be"
" forced for vxlan encapsulation,"
" parameter ignored");
}
return 0;
}
/**
* Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_VXLAN item for E-Switch.
* The routine checks the VNIP fields to be used in encapsulation header.
*
* @param[in] item
* Pointer to the item structure.
* @param[out] error
* Pointer to the error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_ernno is set.
**/
static int
flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item,
struct rte_flow_error *error)
{
const struct rte_flow_item_vxlan *spec = item->spec;
const struct rte_flow_item_vxlan *mask = item->mask;
if (!spec) {
/* Outer VNI is required by tunnel_key parameter. */
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
"NULL VNI specification"
" for vxlan encapsulation");
}
if (!mask)
mask = &rte_flow_item_vxlan_mask;
if (!mask->vni[0] && !mask->vni[1] && !mask->vni[2])
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
"outer VNI must be specified "
"for vxlan encapsulation");
if (mask->vni[0] != 0xff ||
mask->vni[1] != 0xff ||
mask->vni[2] != 0xff)
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"vxlan.vni\" field");
if (!spec->vni[0] && !spec->vni[1] && !spec->vni[2])
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
"vxlan vni cannot be 0");
return 0;
}
/**
* Validate VXLAN_ENCAP action item list for E-Switch.
* The routine checks items to be used in encapsulation header.
*
* @param[in] action
* Pointer to the VXLAN_ENCAP action structure.
* @param[out] error
* Pointer to the error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_ernno is set.
**/
static int
flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action,
struct rte_flow_error *error)
{
const struct rte_flow_item *items;
int ret;
uint32_t item_flags = 0;
if (!action->conf)
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION, action,
"Missing vxlan tunnel"
" action configuration");
items = ((const struct rte_flow_action_vxlan_encap *)
action->conf)->definition;
if (!items)
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION, action,
"Missing vxlan tunnel"
" encapsulation parameters");
for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
switch (items->type) {
case RTE_FLOW_ITEM_TYPE_VOID:
break;
case RTE_FLOW_ITEM_TYPE_ETH:
ret = mlx5_flow_validate_item_eth(items, item_flags,
error);
if (ret < 0)
return ret;
ret = flow_tcf_validate_vxlan_encap_eth(items, error);
if (ret < 0)
return ret;
item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
break;
break;
case RTE_FLOW_ITEM_TYPE_IPV4:
ret = mlx5_flow_validate_item_ipv4(items, item_flags,
error);
if (ret < 0)
return ret;
ret = flow_tcf_validate_vxlan_encap_ipv4(items, error);
if (ret < 0)
return ret;
item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
break;
case RTE_FLOW_ITEM_TYPE_IPV6:
ret = mlx5_flow_validate_item_ipv6(items, item_flags,
error);
if (ret < 0)
return ret;
ret = flow_tcf_validate_vxlan_encap_ipv6(items, error);
if (ret < 0)
return ret;
item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
break;
case RTE_FLOW_ITEM_TYPE_UDP:
ret = mlx5_flow_validate_item_udp(items, item_flags,
0xFF, error);
if (ret < 0)
return ret;
ret = flow_tcf_validate_vxlan_encap_udp(items, error);
if (ret < 0)
return ret;
item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
break;
case RTE_FLOW_ITEM_TYPE_VXLAN:
ret = mlx5_flow_validate_item_vxlan(items,
item_flags, error);
if (ret < 0)
return ret;
ret = flow_tcf_validate_vxlan_encap_vni(items, error);
if (ret < 0)
return ret;
item_flags |= MLX5_FLOW_LAYER_VXLAN;
break;
default:
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM, items,
"vxlan encap item not supported");
}
}
if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION, action,
"no outer IP layer found"
" for vxlan encapsulation");
if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION, action,
"no outer UDP layer found"
" for vxlan encapsulation");
if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION, action,
"no VXLAN VNI found"
" for vxlan encapsulation");
return 0;
}
/**
* Validate RTE_FLOW_ITEM_TYPE_IPV4 item if VXLAN_DECAP action
* is present in actions list.
*
* @param[in] ipv4
* Outer IPv4 address item (if any, NULL otherwise).
* @param[out] error
* Pointer to the error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_ernno is set.
**/
static int
flow_tcf_validate_vxlan_decap_ipv4(const struct rte_flow_item *ipv4,
struct rte_flow_error *error)
{
const struct rte_flow_item_ipv4 *spec = ipv4->spec;
const struct rte_flow_item_ipv4 *mask = ipv4->mask;
if (!spec) {
/*
* Specification for IP addresses cannot be empty
* because it is required as decap parameter.
*/
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
"NULL outer ipv4 address"
" specification for vxlan"
" for vxlan decapsulation");
}
if (!mask)
mask = &rte_flow_item_ipv4_mask;
if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"ipv4.hdr.dst_addr\" field");
/* More IP address validations can be put here. */
} else {
/*
* Kernel uses the destination IP address
* to determine the ingress network interface
* for traffic being decapsulated.
*/
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
"outer ipv4 destination address"
" must be specified for"
" vxlan decapsulation");
}
/* Source IP address is optional for decap. */
if (mask->hdr.src_addr != RTE_BE32(0x00000000) &&
mask->hdr.src_addr != RTE_BE32(0xffffffff))
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"ipv4.hdr.src_addr\" field");
return 0;
}
/**
* Validate RTE_FLOW_ITEM_TYPE_IPV6 item if VXLAN_DECAP action
* is present in actions list.
*
* @param[in] ipv6
* Outer IPv6 address item (if any, NULL otherwise).
* @param[out] error
* Pointer to the error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_ernno is set.
**/
static int
flow_tcf_validate_vxlan_decap_ipv6(const struct rte_flow_item *ipv6,
struct rte_flow_error *error)
{
const struct rte_flow_item_ipv6 *spec = ipv6->spec;
const struct rte_flow_item_ipv6 *mask = ipv6->mask;
if (!spec) {
/*
* Specification for IP addresses cannot be empty
* because it is required as decap parameter.
*/
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
"NULL outer ipv6 address"
" specification for vxlan"
" decapsulation");
}
if (!mask)
mask = &rte_flow_item_ipv6_mask;
if (memcmp(&mask->hdr.dst_addr,
&flow_tcf_mask_empty.ipv6.hdr.dst_addr,
IPV6_ADDR_LEN)) {
if (memcmp(&mask->hdr.dst_addr,
&rte_flow_item_ipv6_mask.hdr.dst_addr,
IPV6_ADDR_LEN))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"ipv6.hdr.dst_addr\" field");
/* More IP address validations can be put here. */
} else {
/*
* Kernel uses the destination IP address
* to determine the ingress network interface
* for traffic being decapsulated.
*/
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
"outer ipv6 destination address must be "
"specified for vxlan decapsulation");
}
/* Source IP address is optional for decap. */
if (memcmp(&mask->hdr.src_addr,
&flow_tcf_mask_empty.ipv6.hdr.src_addr,
IPV6_ADDR_LEN)) {
if (memcmp(&mask->hdr.src_addr,
&rte_flow_item_ipv6_mask.hdr.src_addr,
IPV6_ADDR_LEN))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"ipv6.hdr.src_addr\" field");
}
return 0;
}
/**
* Validate RTE_FLOW_ITEM_TYPE_UDP item if VXLAN_DECAP action
* is present in actions list.
*
* @param[in] udp
* Outer UDP layer item (if any, NULL otherwise).
* @param[out] error
* Pointer to the error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_ernno is set.
**/
static int
flow_tcf_validate_vxlan_decap_udp(const struct rte_flow_item *udp,
struct rte_flow_error *error)
{
const struct rte_flow_item_udp *spec = udp->spec;
const struct rte_flow_item_udp *mask = udp->mask;
if (!spec)
/*
* Specification for UDP ports cannot be empty
* because it is required as decap parameter.
*/
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, udp,
"NULL UDP port specification"
" for VXLAN decapsulation");
if (!mask)
mask = &rte_flow_item_udp_mask;
if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
if (mask->hdr.dst_port != RTE_BE16(0xffff))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"udp.hdr.dst_port\" field");
if (!spec->hdr.dst_port)
return rte_flow_error_set
(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, udp,
"zero decap local UDP port");
} else {
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, udp,
"outer UDP destination port must be "
"specified for vxlan decapsulation");
}
if (mask->hdr.src_port != RTE_BE16(0x0000)) {
if (mask->hdr.src_port != RTE_BE16(0xffff))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
"no support for partial mask on"
" \"udp.hdr.src_port\" field");
DRV_LOG(WARNING,
"outer UDP local port cannot be "
"forced for VXLAN encapsulation, "
"parameter ignored");
}
return 0;
}
/**
* Validate flow for E-Switch.
*
* @param[in] priv
* Pointer to the priv structure.
* @param[in] attr
* Pointer to the flow attributes.
* @param[in] items
* Pointer to the list of items.
* @param[in] actions
* Pointer to the list of actions.
* @param[out] error
* Pointer to the error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_ernno is set.
*/
static int
flow_tcf_validate(struct rte_eth_dev *dev,
const struct rte_flow_attr *attr,
const struct rte_flow_item items[],
const struct rte_flow_action actions[],
struct rte_flow_error *error)
{
union {
const struct rte_flow_item_port_id *port_id;
const struct rte_flow_item_eth *eth;
const struct rte_flow_item_vlan *vlan;
const struct rte_flow_item_ipv4 *ipv4;
const struct rte_flow_item_ipv6 *ipv6;
const struct rte_flow_item_tcp *tcp;
const struct rte_flow_item_udp *udp;
const struct rte_flow_item_vxlan *vxlan;
} spec, mask;
union {
const struct rte_flow_action_port_id *port_id;
const struct rte_flow_action_jump *jump;
const struct rte_flow_action_of_push_vlan *of_push_vlan;
const struct rte_flow_action_of_set_vlan_vid *
of_set_vlan_vid;
const struct rte_flow_action_of_set_vlan_pcp *
of_set_vlan_pcp;
const struct rte_flow_action_vxlan_encap *vxlan_encap;
const struct rte_flow_action_set_ipv4 *set_ipv4;
const struct rte_flow_action_set_ipv6 *set_ipv6;
} conf;
uint64_t item_flags = 0;
uint64_t action_flags = 0;
uint8_t next_protocol = -1;
unsigned int tcm_ifindex = 0;
uint8_t pedit_validated = 0;
struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
struct rte_eth_dev *port_id_dev = NULL;
bool in_port_id_set;
int ret;
claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
PTOI_TABLE_SZ_MAX(dev)));
ret = flow_tcf_validate_attributes(attr, error);
if (ret < 0)
return ret;
for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
unsigned int i;
uint64_t current_action_flag = 0;
switch (actions->type) {
case RTE_FLOW_ACTION_TYPE_VOID:
break;
case RTE_FLOW_ACTION_TYPE_PORT_ID:
current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
if (!actions->conf)
break;
conf.port_id = actions->conf;
if (conf.port_id->original)
i = 0;
else
for (i = 0; ptoi[i].ifindex; ++i)
if (ptoi[i].port_id == conf.port_id->id)
break;
if (!ptoi[i].ifindex)
return rte_flow_error_set
(error, ENODEV,
RTE_FLOW_ERROR_TYPE_ACTION_CONF,
conf.port_id,
"missing data to convert port ID to"
" ifindex");
port_id_dev = &rte_eth_devices[conf.port_id->id];
break;
case RTE_FLOW_ACTION_TYPE_JUMP:
current_action_flag = MLX5_FLOW_ACTION_JUMP;
if (!actions->conf)
break;
conf.jump = actions->conf;
if (attr->group >= conf.jump->group)
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"can jump only to a group forward");
break;
case RTE_FLOW_ACTION_TYPE_DROP:
current_action_flag = MLX5_FLOW_ACTION_DROP;
break;
case RTE_FLOW_ACTION_TYPE_COUNT:
break;
case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
break;
case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
break;
case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION, actions,
"vlan modify is not supported,"
" set action must follow push action");
current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
break;
case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION, actions,
"vlan modify is not supported,"
" set action must follow push action");
current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
break;
case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
current_action_flag = MLX5_FLOW_ACTION_VXLAN_DECAP;
break;
case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
ret = flow_tcf_validate_vxlan_encap(actions, error);
if (ret < 0)
return ret;
current_action_flag = MLX5_FLOW_ACTION_VXLAN_ENCAP;
break;
case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
break;
case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
break;
case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
break;
case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
break;
case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
break;
case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
break;
case RTE_FLOW_ACTION_TYPE_SET_TTL:
current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
break;
case RTE_FLOW_ACTION_TYPE_DEC_TTL:
current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
break;
case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
break;
case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
break;
default:
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"action not supported");
}
if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
if (!actions->conf)
return rte_flow_error_set
(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION_CONF,
actions,
"action configuration not set");
}
if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
pedit_validated)
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"set actions should be "
"listed successively");
if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
(action_flags & MLX5_TCF_PEDIT_ACTIONS))
pedit_validated = 1;
if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
(action_flags & MLX5_TCF_FATE_ACTIONS))
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"can't have multiple fate"
" actions");
if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) &&
(action_flags & MLX5_TCF_VXLAN_ACTIONS))
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"can't have multiple vxlan"
" actions");
if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) &&
(action_flags & MLX5_TCF_VLAN_ACTIONS))
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"can't have vxlan and vlan"
" actions in the same rule");
action_flags |= current_action_flag;
}
for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
unsigned int i;
if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
items->type != RTE_FLOW_ITEM_TYPE_ETH)
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM,
items,
"only L2 inner item"
" is supported");
switch (items->type) {
case RTE_FLOW_ITEM_TYPE_VOID:
break;
case RTE_FLOW_ITEM_TYPE_PORT_ID:
mask.port_id = flow_tcf_item_mask
(items, &rte_flow_item_port_id_mask,
&flow_tcf_mask_supported.port_id,
&flow_tcf_mask_empty.port_id,
sizeof(flow_tcf_mask_supported.port_id),
error);
if (!mask.port_id)
return -rte_errno;
if (mask.port_id == &flow_tcf_mask_empty.port_id) {
in_port_id_set = 1;
break;
}
spec.port_id = items->spec;
if (mask.port_id->id && mask.port_id->id != 0xffffffff)
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK,
mask.port_id,
"no support for partial mask on"
" \"id\" field");
if (!mask.port_id->id)
i = 0;
else
for (i = 0; ptoi[i].ifindex; ++i)
if (ptoi[i].port_id == spec.port_id->id)
break;
if (!ptoi[i].ifindex)
return rte_flow_error_set
(error, ENODEV,
RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
spec.port_id,
"missing data to convert port ID to"
" ifindex");
if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
spec.port_id,
"cannot match traffic for"
" several port IDs through"
" a single flow rule");
tcm_ifindex = ptoi[i].ifindex;
in_port_id_set = 1;
break;
case RTE_FLOW_ITEM_TYPE_ETH:
ret = mlx5_flow_validate_item_eth(items, item_flags,
error);
if (ret < 0)
return ret;
item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
MLX5_FLOW_LAYER_INNER_L2 :
MLX5_FLOW_LAYER_OUTER_L2;
/* TODO:
* Redundant check due to different supported mask.
* Same for the rest of items.
*/
mask.eth = flow_tcf_item_mask
(items, &rte_flow_item_eth_mask,
&flow_tcf_mask_supported.eth,
&flow_tcf_mask_empty.eth,
sizeof(flow_tcf_mask_supported.eth),
error);
if (!mask.eth)
return -rte_errno;
if (mask.eth->type && mask.eth->type !=
RTE_BE16(0xffff))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK,
mask.eth,
"no support for partial mask on"
" \"type\" field");
break;
case RTE_FLOW_ITEM_TYPE_VLAN:
ret = mlx5_flow_validate_item_vlan(items, item_flags,
error);
if (ret < 0)
return ret;
item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
mask.vlan = flow_tcf_item_mask
(items, &rte_flow_item_vlan_mask,
&flow_tcf_mask_supported.vlan,
&flow_tcf_mask_empty.vlan,
sizeof(flow_tcf_mask_supported.vlan),
error);
if (!mask.vlan)
return -rte_errno;
if ((mask.vlan->tci & RTE_BE16(0xe000) &&
(mask.vlan->tci & RTE_BE16(0xe000)) !=
RTE_BE16(0xe000)) ||
(mask.vlan->tci & RTE_BE16(0x0fff) &&
(mask.vlan->tci & RTE_BE16(0x0fff)) !=
RTE_BE16(0x0fff)) ||
(mask.vlan->inner_type &&
mask.vlan->inner_type != RTE_BE16(0xffff)))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK,
mask.vlan,
"no support for partial masks on"
" \"tci\" (PCP and VID parts) and"
" \"inner_type\" fields");
break;
case RTE_FLOW_ITEM_TYPE_IPV4:
ret = mlx5_flow_validate_item_ipv4(items, item_flags,
error);
if (ret < 0)
return ret;
item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
mask.ipv4 = flow_tcf_item_mask
(items, &rte_flow_item_ipv4_mask,
&flow_tcf_mask_supported.ipv4,
&flow_tcf_mask_empty.ipv4,
sizeof(flow_tcf_mask_supported.ipv4),
error);
if (!mask.ipv4)
return -rte_errno;
if (mask.ipv4->hdr.next_proto_id &&
mask.ipv4->hdr.next_proto_id != 0xff)
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK,
mask.ipv4,
"no support for partial mask on"
" \"hdr.next_proto_id\" field");
else if (mask.ipv4->hdr.next_proto_id)
next_protocol =
((const struct rte_flow_item_ipv4 *)
(items->spec))->hdr.next_proto_id;
if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
ret = flow_tcf_validate_vxlan_decap_ipv4
(items, error);
if (ret < 0)
return ret;
}
break;
case RTE_FLOW_ITEM_TYPE_IPV6:
ret = mlx5_flow_validate_item_ipv6(items, item_flags,
error);
if (ret < 0)
return ret;
item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
mask.ipv6 = flow_tcf_item_mask
(items, &rte_flow_item_ipv6_mask,
&flow_tcf_mask_supported.ipv6,
&flow_tcf_mask_empty.ipv6,
sizeof(flow_tcf_mask_supported.ipv6),
error);
if (!mask.ipv6)
return -rte_errno;
if (mask.ipv6->hdr.proto &&
mask.ipv6->hdr.proto != 0xff)
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK,
mask.ipv6,
"no support for partial mask on"
" \"hdr.proto\" field");
else if (mask.ipv6->hdr.proto)
next_protocol =
((const struct rte_flow_item_ipv6 *)
(items->spec))->hdr.proto;
if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
ret = flow_tcf_validate_vxlan_decap_ipv6
(items, error);
if (ret < 0)
return ret;
}
break;
case RTE_FLOW_ITEM_TYPE_UDP:
ret = mlx5_flow_validate_item_udp(items, item_flags,
next_protocol, error);
if (ret < 0)
return ret;
item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
mask.udp = flow_tcf_item_mask
(items, &rte_flow_item_udp_mask,
&flow_tcf_mask_supported.udp,
&flow_tcf_mask_empty.udp,
sizeof(flow_tcf_mask_supported.udp),
error);
if (!mask.udp)
return -rte_errno;
if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
ret = flow_tcf_validate_vxlan_decap_udp
(items, error);
if (ret < 0)
return ret;
}
break;
case RTE_FLOW_ITEM_TYPE_TCP:
ret = mlx5_flow_validate_item_tcp
(items, item_flags,
next_protocol,
&flow_tcf_mask_supported.tcp,
error);
if (ret < 0)
return ret;
item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
mask.tcp = flow_tcf_item_mask
(items, &rte_flow_item_tcp_mask,
&flow_tcf_mask_supported.tcp,
&flow_tcf_mask_empty.tcp,
sizeof(flow_tcf_mask_supported.tcp),
error);
if (!mask.tcp)
return -rte_errno;
break;
case RTE_FLOW_ITEM_TYPE_VXLAN:
if (!(action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP))
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM,
items,
"vni pattern should be followed by"
" vxlan decapsulation action");
ret = mlx5_flow_validate_item_vxlan(items,
item_flags, error);
if (ret < 0)
return ret;
item_flags |= MLX5_FLOW_LAYER_VXLAN;
mask.vxlan = flow_tcf_item_mask
(items, &rte_flow_item_vxlan_mask,
&flow_tcf_mask_supported.vxlan,
&flow_tcf_mask_empty.vxlan,
sizeof(flow_tcf_mask_supported.vxlan), error);
if (!mask.vxlan)
return -rte_errno;
if (mask.vxlan->vni[0] != 0xff ||
mask.vxlan->vni[1] != 0xff ||
mask.vxlan->vni[2] != 0xff)
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM_MASK,
mask.vxlan,
"no support for partial or "
"empty mask on \"vxlan.vni\" field");
break;
default:
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM,
items, "item not supported");
}
}
if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
(action_flags & MLX5_FLOW_ACTION_DROP))
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"set action is not compatible with "
"drop action");
if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
!(action_flags & MLX5_FLOW_ACTION_PORT_ID))
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"set action must be followed by "
"port_id action");
if (action_flags &
(MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"no ipv4 item found in"
" pattern");
}
if (action_flags &
(MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"no ipv6 item found in"
" pattern");
}
if (action_flags &
(MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
if (!(item_flags &
(MLX5_FLOW_LAYER_OUTER_L4_UDP |
MLX5_FLOW_LAYER_OUTER_L4_TCP)))
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"no TCP/UDP item found in"
" pattern");
}
/*
* FW syndrome (0xA9C090):
* set_flow_table_entry: push vlan action fte in fdb can ONLY be
* forward to the uplink.
*/
if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
(action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
((struct priv *)port_id_dev->data->dev_private)->representor)
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION, actions,
"vlan push can only be applied"
" when forwarding to uplink port");
/*
* FW syndrome (0x294609):
* set_flow_table_entry: modify/pop/push actions in fdb flow table
* are supported only while forwarding to vport.
*/
if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
!(action_flags & MLX5_FLOW_ACTION_PORT_ID))
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION, actions,
"vlan actions are supported"
" only with port_id action");
if ((action_flags & MLX5_TCF_VXLAN_ACTIONS) &&
!(action_flags & MLX5_FLOW_ACTION_PORT_ID))
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION, NULL,
"vxlan actions are supported"
" only with port_id action");
if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION, actions,
"no fate action is found");
if (action_flags &
(MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
if (!(item_flags &
(MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"no IP found in pattern");
}
if (action_flags &
(MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"no ethernet found in"
" pattern");
}
if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
if (!(item_flags &
(MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION,
NULL,
"no outer IP pattern found"
" for vxlan decap action");
if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION,
NULL,
"no outer UDP pattern found"
" for vxlan decap action");
if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION,
NULL,
"no VNI pattern found"
" for vxlan decap action");
}
return 0;
}
/**
* Calculate maximum size of memory for flow items of Linux TC flower and
* extract specified items.
*
* @param[in] items
* Pointer to the list of items.
* @param[out] item_flags
* Pointer to the detected items.
*
* @return
* Maximum size of memory for items.
*/
static int
flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
const struct rte_flow_item items[],
uint64_t *item_flags)
{
int size = 0;
uint64_t flags = 0;
size += SZ_NLATTR_STRZ_OF("flower") +
SZ_NLATTR_NEST + /* TCA_OPTIONS. */
SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
if (attr->group > 0)
size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
switch (items->type) {
case RTE_FLOW_ITEM_TYPE_VOID:
break;
case RTE_FLOW_ITEM_TYPE_PORT_ID:
break;
case RTE_FLOW_ITEM_TYPE_ETH:
size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
/* dst/src MAC addr and mask. */
flags |= MLX5_FLOW_LAYER_OUTER_L2;
break;
case RTE_FLOW_ITEM_TYPE_VLAN:
size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
SZ_NLATTR_TYPE_OF(uint16_t) +
/* VLAN Ether type. */
SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
break;
case RTE_FLOW_ITEM_TYPE_IPV4:
size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
SZ_NLATTR_TYPE_OF(uint32_t) * 4;
/* dst/src IP addr and mask. */
flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
break;
case RTE_FLOW_ITEM_TYPE_IPV6:
size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 4;
/* dst/src IP addr and mask. */
flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
break;
case RTE_FLOW_ITEM_TYPE_UDP:
size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
SZ_NLATTR_TYPE_OF(uint16_t) * 4;
/* dst/src port and mask. */
flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
break;
case RTE_FLOW_ITEM_TYPE_TCP:
size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
SZ_NLATTR_TYPE_OF(uint16_t) * 4;
/* dst/src port and mask. */
flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
break;
case RTE_FLOW_ITEM_TYPE_VXLAN:
size += SZ_NLATTR_TYPE_OF(uint32_t);
flags |= MLX5_FLOW_LAYER_VXLAN;
break;
default:
DRV_LOG(WARNING,
"unsupported item %p type %d,"
" items must be validated before flow creation",
(const void *)items, items->type);
break;
}
}
*item_flags = flags;
return size;
}
/**
* Calculate size of memory to store the VXLAN encapsultion
* related items in the Netlink message buffer. Items list
* is specified by RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action.
* The item list should be validated.
*
* @param[in] action
* RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object.
* List of pattern items to scan data from.
*
* @return
* The size the part of Netlink message buffer to store the
* VXLAN encapsulation item attributes.
*/
static int
flow_tcf_vxlan_encap_size(const struct rte_flow_action *action)
{
const struct rte_flow_item *items;
int size = 0;
assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
assert(action->conf);
items = ((const struct rte_flow_action_vxlan_encap *)
action->conf)->definition;
assert(items);
for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
switch (items->type) {
case RTE_FLOW_ITEM_TYPE_VOID:
break;
case RTE_FLOW_ITEM_TYPE_ETH:
/* This item does not require message buffer. */
break;
case RTE_FLOW_ITEM_TYPE_IPV4:
size += SZ_NLATTR_DATA_OF(IPV4_ADDR_LEN) * 2;
break;
case RTE_FLOW_ITEM_TYPE_IPV6:
size += SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 2;
break;
case RTE_FLOW_ITEM_TYPE_UDP: {
const struct rte_flow_item_udp *udp = items->mask;
size += SZ_NLATTR_TYPE_OF(uint16_t);
if (!udp || udp->hdr.src_port != RTE_BE16(0x0000))
size += SZ_NLATTR_TYPE_OF(uint16_t);
break;
}
case RTE_FLOW_ITEM_TYPE_VXLAN:
size += SZ_NLATTR_TYPE_OF(uint32_t);
break;
default:
assert(false);
DRV_LOG(WARNING,
"unsupported item %p type %d,"
" items must be validated"
" before flow creation",
(const void *)items, items->type);
return 0;
}
}
return size;
}
/**
* Calculate maximum size of memory for flow actions of Linux TC flower and
* extract specified actions.
*
* @param[in] actions
* Pointer to the list of actions.
* @param[out] action_flags
* Pointer to the detected actions.
*
* @return
* Maximum size of memory for actions.
*/
static int
flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
uint64_t *action_flags)
{
int size = 0;
uint64_t flags = 0;
size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
switch (actions->type) {
case RTE_FLOW_ACTION_TYPE_VOID:
break;
case RTE_FLOW_ACTION_TYPE_PORT_ID:
size += SZ_NLATTR_NEST + /* na_act_index. */
SZ_NLATTR_STRZ_OF("mirred") +
SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
SZ_NLATTR_TYPE_OF(struct tc_mirred);
flags |= MLX5_FLOW_ACTION_PORT_ID;
break;
case RTE_FLOW_ACTION_TYPE_JUMP:
size += SZ_NLATTR_NEST + /* na_act_index. */
SZ_NLATTR_STRZ_OF("gact") +
SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
SZ_NLATTR_TYPE_OF(struct tc_gact);
flags |= MLX5_FLOW_ACTION_JUMP;
break;
case RTE_FLOW_ACTION_TYPE_DROP:
size += SZ_NLATTR_NEST + /* na_act_index. */
SZ_NLATTR_STRZ_OF("gact") +
SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
SZ_NLATTR_TYPE_OF(struct tc_gact);
flags |= MLX5_FLOW_ACTION_DROP;
break;
case RTE_FLOW_ACTION_TYPE_COUNT:
break;
case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
goto action_of_vlan;
case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
goto action_of_vlan;
case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
goto action_of_vlan;
case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
goto action_of_vlan;
action_of_vlan:
size += SZ_NLATTR_NEST + /* na_act_index. */
SZ_NLATTR_STRZ_OF("vlan") +
SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
SZ_NLATTR_TYPE_OF(struct tc_vlan) +
SZ_NLATTR_TYPE_OF(uint16_t) +
/* VLAN protocol. */
SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
break;
case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
size += SZ_NLATTR_NEST + /* na_act_index. */
SZ_NLATTR_STRZ_OF("tunnel_key") +
SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
SZ_NLATTR_TYPE_OF(uint8_t);
size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
size += flow_tcf_vxlan_encap_size(actions) +
RTE_ALIGN_CEIL /* preceding encap params. */
(sizeof(struct flow_tcf_vxlan_encap),
MNL_ALIGNTO);
flags |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
break;
case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
size += SZ_NLATTR_NEST + /* na_act_index. */
SZ_NLATTR_STRZ_OF("tunnel_key") +
SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
SZ_NLATTR_TYPE_OF(uint8_t);
size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
size += RTE_ALIGN_CEIL /* preceding decap params. */
(sizeof(struct flow_tcf_vxlan_decap),
MNL_ALIGNTO);
flags |= MLX5_FLOW_ACTION_VXLAN_DECAP;
break;
case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
case RTE_FLOW_ACTION_TYPE_SET_TTL:
case RTE_FLOW_ACTION_TYPE_DEC_TTL:
case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
size += flow_tcf_get_pedit_actions_size(&actions,
&flags);
break;
default:
DRV_LOG(WARNING,
"unsupported action %p type %d,"
" items must be validated before flow creation",
(const void *)actions, actions->type);
break;
}
}
*action_flags = flags;
return size;
}
/**
* Brand rtnetlink buffer with unique handle.
*
* This handle should be unique for a given network interface to avoid
* collisions.
*
* @param nlh
* Pointer to Netlink message.
* @param handle
* Unique 32-bit handle to use.
*/
static void
flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
{
struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
tcm->tcm_handle = handle;
DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
(void *)nlh, handle);
}
/**
* Prepare a flow object for Linux TC flower. It calculates the maximum size of
* memory required, allocates the memory, initializes Netlink message headers
* and set unique TC message handle.
*
* @param[in] attr
* Pointer to the flow attributes.
* @param[in] items
* Pointer to the list of items.
* @param[in] actions
* Pointer to the list of actions.
* @param[out] item_flags
* Pointer to bit mask of all items detected.
* @param[out] action_flags
* Pointer to bit mask of all actions detected.
* @param[out] error
* Pointer to the error structure.
*
* @return
* Pointer to mlx5_flow object on success,
* otherwise NULL and rte_ernno is set.
*/
static struct mlx5_flow *
flow_tcf_prepare(const struct rte_flow_attr *attr,
const struct rte_flow_item items[],
const struct rte_flow_action actions[],
uint64_t *item_flags, uint64_t *action_flags,
struct rte_flow_error *error)
{
size_t size = RTE_ALIGN_CEIL
(sizeof(struct mlx5_flow),
alignof(struct flow_tcf_tunnel_hdr)) +
MNL_ALIGN(sizeof(struct nlmsghdr)) +
MNL_ALIGN(sizeof(struct tcmsg));
struct mlx5_flow *dev_flow;
struct nlmsghdr *nlh;
struct tcmsg *tcm;
uint8_t *sp, *tun = NULL;
size += flow_tcf_get_items_and_size(attr, items, item_flags);
size += flow_tcf_get_actions_and_size(actions, action_flags);
dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
if (!dev_flow) {
rte_flow_error_set(error, ENOMEM,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"not enough memory to create E-Switch flow");
return NULL;
}
sp = (uint8_t *)(dev_flow + 1);
if (*action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) {
sp = RTE_PTR_ALIGN
(sp, alignof(struct flow_tcf_tunnel_hdr));
tun = sp;
sp += RTE_ALIGN_CEIL
(sizeof(struct flow_tcf_vxlan_encap),
MNL_ALIGNTO);
#ifndef NDEBUG
size -= RTE_ALIGN_CEIL
(sizeof(struct flow_tcf_vxlan_encap),
MNL_ALIGNTO);
#endif
} else if (*action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
sp = RTE_PTR_ALIGN
(sp, alignof(struct flow_tcf_tunnel_hdr));
tun = sp;
sp += RTE_ALIGN_CEIL
(sizeof(struct flow_tcf_vxlan_decap),
MNL_ALIGNTO);
#ifndef NDEBUG
size -= RTE_ALIGN_CEIL
(sizeof(struct flow_tcf_vxlan_decap),
MNL_ALIGNTO);
#endif
} else {
sp = RTE_PTR_ALIGN(sp, MNL_ALIGNTO);
}
nlh = mnl_nlmsg_put_header(sp);
tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
*dev_flow = (struct mlx5_flow){
.tcf = (struct mlx5_flow_tcf){
#ifndef NDEBUG
.nlsize = size - RTE_ALIGN_CEIL
(sizeof(struct mlx5_flow),
alignof(struct flow_tcf_tunnel_hdr)),
#endif
.tunnel = (struct flow_tcf_tunnel_hdr *)tun,
.nlh = nlh,
.tcm = tcm,
},
};
if (*action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP)
dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_DECAP;
else if (*action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_ENCAP;
/*
* Generate a reasonably unique handle based on the address of the
* target buffer.
*
* This is straightforward on 32-bit systems where the flow pointer can
* be used directly. Otherwise, its least significant part is taken
* after shifting it by the previous power of two of the pointed buffer
* size.
*/
if (sizeof(dev_flow) <= 4)
flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
else
flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
rte_log2_u32(rte_align32prevpow2(size)));
return dev_flow;
}
/**
* Make adjustments for supporting count actions.
*
* @param[in] dev
* Pointer to the Ethernet device structure.
* @param[in] dev_flow
* Pointer to mlx5_flow.
* @param[out] error
* Pointer to error structure.
*
* @return
* 0 On success else a negative errno value is returned and rte_errno is set.
*/
static int
flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
struct mlx5_flow *dev_flow,
struct rte_flow_error *error)
{
struct rte_flow *flow = dev_flow->flow;
if (!flow->counter) {
flow->counter = flow_tcf_counter_new();
if (!flow->counter)
return rte_flow_error_set(error, rte_errno,
RTE_FLOW_ERROR_TYPE_ACTION,
NULL,
"cannot get counter"
" context.");
}
return 0;
}
/**
* Convert VXLAN VNI to 32-bit integer.
*
* @param[in] vni
* VXLAN VNI in 24-bit wire format.
*
* @return
* VXLAN VNI as a 32-bit integer value in network endian.
*/
static inline rte_be32_t
vxlan_vni_as_be32(const uint8_t vni[3])
{
union {
uint8_t vni[4];
rte_be32_t dword;
} ret = {
.vni = { 0, vni[0], vni[1], vni[2] },
};
return ret.dword;
}
/**
* Helper function to process RTE_FLOW_ITEM_TYPE_ETH entry in configuration
* of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the MAC address fields
* in the encapsulation parameters structure. The item must be prevalidated,
* no any validation checks performed by function.
*
* @param[in] spec
* RTE_FLOW_ITEM_TYPE_ETH entry specification.
* @param[in] mask
* RTE_FLOW_ITEM_TYPE_ETH entry mask.
* @param[out] encap
* Structure to fill the gathered MAC address data.
*/
static void
flow_tcf_parse_vxlan_encap_eth(const struct rte_flow_item_eth *spec,
const struct rte_flow_item_eth *mask,
struct flow_tcf_vxlan_encap *encap)
{
/* Item must be validated before. No redundant checks. */
assert(spec);
if (!mask || !memcmp(&mask->dst,
&rte_flow_item_eth_mask.dst,
sizeof(rte_flow_item_eth_mask.dst))) {
/*
* Ethernet addresses are not supported by
* tc as tunnel_key parameters. Destination
* address is needed to form encap packet
* header and retrieved by kernel from
* implicit sources (ARP table, etc),
* address masks are not supported at all.
*/
encap->eth.dst = spec->dst;
encap->mask |= FLOW_TCF_ENCAP_ETH_DST;
}
if (!mask || !memcmp(&mask->src,
&rte_flow_item_eth_mask.src,
sizeof(rte_flow_item_eth_mask.src))) {
/*
* Ethernet addresses are not supported by
* tc as tunnel_key parameters. Source ethernet
* address is ignored anyway.
*/
encap->eth.src = spec->src;
encap->mask |= FLOW_TCF_ENCAP_ETH_SRC;
}
}
/**
* Helper function to process RTE_FLOW_ITEM_TYPE_IPV4 entry in configuration
* of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the IPV4 address fields
* in the encapsulation parameters structure. The item must be prevalidated,
* no any validation checks performed by function.
*
* @param[in] spec
* RTE_FLOW_ITEM_TYPE_IPV4 entry specification.
* @param[out] encap
* Structure to fill the gathered IPV4 address data.
*/
static void
flow_tcf_parse_vxlan_encap_ipv4(const struct rte_flow_item_ipv4 *spec,
struct flow_tcf_vxlan_encap *encap)
{
/* Item must be validated before. No redundant checks. */
assert(spec);
encap->ipv4.dst = spec->hdr.dst_addr;
encap->ipv4.src = spec->hdr.src_addr;
encap->mask |= FLOW_TCF_ENCAP_IPV4_SRC |
FLOW_TCF_ENCAP_IPV4_DST;
}
/**
* Helper function to process RTE_FLOW_ITEM_TYPE_IPV6 entry in configuration
* of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the IPV6 address fields
* in the encapsulation parameters structure. The item must be prevalidated,
* no any validation checks performed by function.
*
* @param[in] spec
* RTE_FLOW_ITEM_TYPE_IPV6 entry specification.
* @param[out] encap
* Structure to fill the gathered IPV6 address data.
*/
static void
flow_tcf_parse_vxlan_encap_ipv6(const struct rte_flow_item_ipv6 *spec,
struct flow_tcf_vxlan_encap *encap)
{
/* Item must be validated before. No redundant checks. */
assert(spec);
memcpy(encap->ipv6.dst, spec->hdr.dst_addr, IPV6_ADDR_LEN);
memcpy(encap->ipv6.src, spec->hdr.src_addr, IPV6_ADDR_LEN);
encap->mask |= FLOW_TCF_ENCAP_IPV6_SRC |
FLOW_TCF_ENCAP_IPV6_DST;
}
/**
* Helper function to process RTE_FLOW_ITEM_TYPE_UDP entry in configuration
* of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the UDP port fields
* in the encapsulation parameters structure. The item must be prevalidated,
* no any validation checks performed by function.
*
* @param[in] spec
* RTE_FLOW_ITEM_TYPE_UDP entry specification.
* @param[in] mask
* RTE_FLOW_ITEM_TYPE_UDP entry mask.
* @param[out] encap
* Structure to fill the gathered UDP port data.
*/
static void
flow_tcf_parse_vxlan_encap_udp(const struct rte_flow_item_udp *spec,
const struct rte_flow_item_udp *mask,
struct flow_tcf_vxlan_encap *encap)
{
assert(spec);
encap->udp.dst = spec->hdr.dst_port;
encap->mask |= FLOW_TCF_ENCAP_UDP_DST;
if (!mask || mask->hdr.src_port != RTE_BE16(0x0000)) {
encap->udp.src = spec->hdr.src_port;
encap->mask |= FLOW_TCF_ENCAP_IPV4_SRC;
}
}
/**
* Helper function to process RTE_FLOW_ITEM_TYPE_VXLAN entry in configuration
* of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the VNI fields
* in the encapsulation parameters structure. The item must be prevalidated,
* no any validation checks performed by function.
*
* @param[in] spec
* RTE_FLOW_ITEM_TYPE_VXLAN entry specification.
* @param[out] encap
* Structure to fill the gathered VNI address data.
*/
static void
flow_tcf_parse_vxlan_encap_vni(const struct rte_flow_item_vxlan *spec,
struct flow_tcf_vxlan_encap *encap)
{
/* Item must be validated before. Do not redundant checks. */
assert(spec);
memcpy(encap->vxlan.vni, spec->vni, sizeof(encap->vxlan.vni));
encap->mask |= FLOW_TCF_ENCAP_VXLAN_VNI;
}
/**
* Populate consolidated encapsulation object from list of pattern items.
*
* Helper function to process configuration of action such as
* RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. The item list should be
* validated, there is no way to return an meaningful error.
*
* @param[in] action
* RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object.
* List of pattern items to gather data from.
* @param[out] src
* Structure to fill gathered data.
*/
static void
flow_tcf_vxlan_encap_parse(const struct rte_flow_action *action,
struct flow_tcf_vxlan_encap *encap)
{
union {
const struct rte_flow_item_eth *eth;
const struct rte_flow_item_ipv4 *ipv4;
const struct rte_flow_item_ipv6 *ipv6;
const struct rte_flow_item_udp *udp;
const struct rte_flow_item_vxlan *vxlan;
} spec, mask;
const struct rte_flow_item *items;
assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
assert(action->conf);
items = ((const struct rte_flow_action_vxlan_encap *)
action->conf)->definition;
assert(items);
for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
switch (items->type) {
case RTE_FLOW_ITEM_TYPE_VOID:
break;
case RTE_FLOW_ITEM_TYPE_ETH:
mask.eth = items->mask;
spec.eth = items->spec;
flow_tcf_parse_vxlan_encap_eth(spec.eth, mask.eth,
encap);
break;
case RTE_FLOW_ITEM_TYPE_IPV4:
spec.ipv4 = items->spec;
flow_tcf_parse_vxlan_encap_ipv4(spec.ipv4, encap);
break;
case RTE_FLOW_ITEM_TYPE_IPV6:
spec.ipv6 = items->spec;
flow_tcf_parse_vxlan_encap_ipv6(spec.ipv6, encap);
break;
case RTE_FLOW_ITEM_TYPE_UDP:
mask.udp = items->mask;
spec.udp = items->spec;
flow_tcf_parse_vxlan_encap_udp(spec.udp, mask.udp,
encap);
break;
case RTE_FLOW_ITEM_TYPE_VXLAN:
spec.vxlan = items->spec;
flow_tcf_parse_vxlan_encap_vni(spec.vxlan, encap);
break;
default:
assert(false);
DRV_LOG(WARNING,
"unsupported item %p type %d,"
" items must be validated"
" before flow creation",
(const void *)items, items->type);
encap->mask = 0;
return;
}
}
}
/**
* Translate flow for Linux TC flower and construct Netlink message.
*
* @param[in] priv
* Pointer to the priv structure.
* @param[in, out] flow
* Pointer to the sub flow.
* @param[in] attr
* Pointer to the flow attributes.
* @param[in] items
* Pointer to the list of items.
* @param[in] actions
* Pointer to the list of actions.
* @param[out] error
* Pointer to the error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_ernno is set.
*/
static int
flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
const struct rte_flow_attr *attr,
const struct rte_flow_item items[],
const struct rte_flow_action actions[],
struct rte_flow_error *error)
{
union {
const struct rte_flow_item_port_id *port_id;
const struct rte_flow_item_eth *eth;
const struct rte_flow_item_vlan *vlan;
const struct rte_flow_item_ipv4 *ipv4;
const struct rte_flow_item_ipv6 *ipv6;
const struct rte_flow_item_tcp *tcp;
const struct rte_flow_item_udp *udp;
const struct rte_flow_item_vxlan *vxlan;
} spec, mask;
union {
const struct rte_flow_action_port_id *port_id;
const struct rte_flow_action_jump *jump;
const struct rte_flow_action_of_push_vlan *of_push_vlan;
const struct rte_flow_action_of_set_vlan_vid *
of_set_vlan_vid;
const struct rte_flow_action_of_set_vlan_pcp *
of_set_vlan_pcp;
} conf;
union {
struct flow_tcf_tunnel_hdr *hdr;
struct flow_tcf_vxlan_decap *vxlan;
} decap = {
.hdr = NULL,
};
union {
struct flow_tcf_tunnel_hdr *hdr;
struct flow_tcf_vxlan_encap *vxlan;
} encap = {
.hdr = NULL,
};
struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
struct nlmsghdr *nlh = dev_flow->tcf.nlh;
struct tcmsg *tcm = dev_flow->tcf.tcm;
uint32_t na_act_index_cur;
bool eth_type_set = 0;
bool vlan_present = 0;
bool vlan_eth_type_set = 0;
bool ip_proto_set = 0;
struct nlattr *na_flower;
struct nlattr *na_flower_act;
struct nlattr *na_vlan_id = NULL;
struct nlattr *na_vlan_priority = NULL;
uint64_t item_flags = 0;
int ret;
claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
PTOI_TABLE_SZ_MAX(dev)));
if (dev_flow->tcf.tunnel) {
switch (dev_flow->tcf.tunnel->type) {
case FLOW_TCF_TUNACT_VXLAN_DECAP:
decap.vxlan = dev_flow->tcf.vxlan_decap;
break;
case FLOW_TCF_TUNACT_VXLAN_ENCAP:
encap.vxlan = dev_flow->tcf.vxlan_encap;
break;
/* New tunnel actions can be added here. */
default:
assert(false);
break;
}
}
nlh = dev_flow->tcf.nlh;
tcm = dev_flow->tcf.tcm;
/* Prepare API must have been called beforehand. */
assert(nlh != NULL && tcm != NULL);
tcm->tcm_family = AF_UNSPEC;
tcm->tcm_ifindex = ptoi[0].ifindex;
tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
/*
* Priority cannot be zero to prevent the kernel from picking one
* automatically.
*/
tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
RTE_BE16(ETH_P_ALL));
if (attr->group > 0)
mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
mnl_attr_put_strz(nlh, TCA_KIND, "flower");
na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
unsigned int i;
switch (items->type) {
case RTE_FLOW_ITEM_TYPE_VOID:
break;
case RTE_FLOW_ITEM_TYPE_PORT_ID:
mask.port_id = flow_tcf_item_mask
(items, &rte_flow_item_port_id_mask,
&flow_tcf_mask_supported.port_id,
&flow_tcf_mask_empty.port_id,
sizeof(flow_tcf_mask_supported.port_id),
error);
assert(mask.port_id);
if (mask.port_id == &flow_tcf_mask_empty.port_id)
break;
spec.port_id = items->spec;
if (!mask.port_id->id)
i = 0;
else
for (i = 0; ptoi[i].ifindex; ++i)
if (ptoi[i].port_id == spec.port_id->id)
break;
assert(ptoi[i].ifindex);
tcm->tcm_ifindex = ptoi[i].ifindex;
break;
case RTE_FLOW_ITEM_TYPE_ETH:
item_flags |= (item_flags & MLX5_FLOW_LAYER_VXLAN) ?
MLX5_FLOW_LAYER_INNER_L2 :
MLX5_FLOW_LAYER_OUTER_L2;
mask.eth = flow_tcf_item_mask
(items, &rte_flow_item_eth_mask,
&flow_tcf_mask_supported.eth,
&flow_tcf_mask_empty.eth,
sizeof(flow_tcf_mask_supported.eth),
error);
assert(mask.eth);
if (mask.eth == &flow_tcf_mask_empty.eth)
break;
spec.eth = items->spec;
if (decap.vxlan &&
!(item_flags & MLX5_FLOW_LAYER_VXLAN)) {
DRV_LOG(WARNING,
"outer L2 addresses cannot be forced"
" for vxlan decapsulation, parameter"
" ignored");
break;
}
if (mask.eth->type) {
mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
spec.eth->type);
eth_type_set = 1;
}
if (!is_zero_ether_addr(&mask.eth->dst)) {
mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
ETHER_ADDR_LEN,
spec.eth->dst.addr_bytes);
mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
ETHER_ADDR_LEN,
mask.eth->dst.addr_bytes);
}
if (!is_zero_ether_addr(&mask.eth->src)) {
mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
ETHER_ADDR_LEN,
spec.eth->src.addr_bytes);
mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
ETHER_ADDR_LEN,
mask.eth->src.addr_bytes);
}
assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
break;
case RTE_FLOW_ITEM_TYPE_VLAN:
assert(!encap.hdr);
assert(!decap.hdr);
item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
mask.vlan = flow_tcf_item_mask
(items, &rte_flow_item_vlan_mask,
&flow_tcf_mask_supported.vlan,
&flow_tcf_mask_empty.vlan,
sizeof(flow_tcf_mask_supported.vlan),
error);
assert(mask.vlan);
if (!eth_type_set)
mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
RTE_BE16(ETH_P_8021Q));
eth_type_set = 1;
vlan_present = 1;
if (mask.vlan == &flow_tcf_mask_empty.vlan)
break;
spec.vlan = items->spec;
if (mask.vlan->inner_type) {
mnl_attr_put_u16(nlh,
TCA_FLOWER_KEY_VLAN_ETH_TYPE,
spec.vlan->inner_type);
vlan_eth_type_set = 1;
}
if (mask.vlan->tci & RTE_BE16(0xe000))
mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
(rte_be_to_cpu_16
(spec.vlan->tci) >> 13) & 0x7);
if (mask.vlan->tci & RTE_BE16(0x0fff))
mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
rte_be_to_cpu_16
(spec.vlan->tci &
RTE_BE16(0x0fff)));
assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
break;
case RTE_FLOW_ITEM_TYPE_IPV4:
item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
mask.ipv4 = flow_tcf_item_mask
(items, &rte_flow_item_ipv4_mask,
&flow_tcf_mask_supported.ipv4,
&flow_tcf_mask_empty.ipv4,
sizeof(flow_tcf_mask_supported.ipv4),
error);
assert(mask.ipv4);
spec.ipv4 = items->spec;
if (!decap.vxlan) {
if (!eth_type_set && !vlan_eth_type_set)
mnl_attr_put_u16
(nlh,
vlan_present ?
TCA_FLOWER_KEY_VLAN_ETH_TYPE :
TCA_FLOWER_KEY_ETH_TYPE,
RTE_BE16(ETH_P_IP));
eth_type_set = 1;
vlan_eth_type_set = 1;
if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
break;
if (mask.ipv4->hdr.next_proto_id) {
mnl_attr_put_u8
(nlh, TCA_FLOWER_KEY_IP_PROTO,
spec.ipv4->hdr.next_proto_id);
ip_proto_set = 1;
}
} else {
assert(mask.ipv4 != &flow_tcf_mask_empty.ipv4);
}
if (mask.ipv4->hdr.src_addr) {
mnl_attr_put_u32
(nlh, decap.vxlan ?
TCA_FLOWER_KEY_ENC_IPV4_SRC :
TCA_FLOWER_KEY_IPV4_SRC,
spec.ipv4->hdr.src_addr);
mnl_attr_put_u32
(nlh, decap.vxlan ?
TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK :
TCA_FLOWER_KEY_IPV4_SRC_MASK,
mask.ipv4->hdr.src_addr);
}
if (mask.ipv4->hdr.dst_addr) {
mnl_attr_put_u32
(nlh, decap.vxlan ?
TCA_FLOWER_KEY_ENC_IPV4_DST :
TCA_FLOWER_KEY_IPV4_DST,
spec.ipv4->hdr.dst_addr);
mnl_attr_put_u32
(nlh, decap.vxlan ?
TCA_FLOWER_KEY_ENC_IPV4_DST_MASK :
TCA_FLOWER_KEY_IPV4_DST_MASK,
mask.ipv4->hdr.dst_addr);
}
assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
break;
case RTE_FLOW_ITEM_TYPE_IPV6:
item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
mask.ipv6 = flow_tcf_item_mask
(items, &rte_flow_item_ipv6_mask,
&flow_tcf_mask_supported.ipv6,
&flow_tcf_mask_empty.ipv6,
sizeof(flow_tcf_mask_supported.ipv6),
error);
assert(mask.ipv6);
spec.ipv6 = items->spec;
if (!decap.vxlan) {
if (!eth_type_set || !vlan_eth_type_set) {
mnl_attr_put_u16
(nlh,
vlan_present ?
TCA_FLOWER_KEY_VLAN_ETH_TYPE :
TCA_FLOWER_KEY_ETH_TYPE,
RTE_BE16(ETH_P_IPV6));
}
eth_type_set = 1;
vlan_eth_type_set = 1;
if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
break;
if (mask.ipv6->hdr.proto) {
mnl_attr_put_u8
(nlh, TCA_FLOWER_KEY_IP_PROTO,
spec.ipv6->hdr.proto);
ip_proto_set = 1;
}
} else {
assert(mask.ipv6 != &flow_tcf_mask_empty.ipv6);
}
if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
mnl_attr_put(nlh, decap.vxlan ?
TCA_FLOWER_KEY_ENC_IPV6_SRC :
TCA_FLOWER_KEY_IPV6_SRC,
IPV6_ADDR_LEN,
spec.ipv6->hdr.src_addr);
mnl_attr_put(nlh, decap.vxlan ?
TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK :
TCA_FLOWER_KEY_IPV6_SRC_MASK,
IPV6_ADDR_LEN,
mask.ipv6->hdr.src_addr);
}
if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
mnl_attr_put(nlh, decap.vxlan ?
TCA_FLOWER_KEY_ENC_IPV6_DST :
TCA_FLOWER_KEY_IPV6_DST,
IPV6_ADDR_LEN,
spec.ipv6->hdr.dst_addr);
mnl_attr_put(nlh, decap.vxlan ?
TCA_FLOWER_KEY_ENC_IPV6_DST_MASK :
TCA_FLOWER_KEY_IPV6_DST_MASK,
IPV6_ADDR_LEN,
mask.ipv6->hdr.dst_addr);
}
assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
break;
case RTE_FLOW_ITEM_TYPE_UDP:
item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
mask.udp = flow_tcf_item_mask
(items, &rte_flow_item_udp_mask,
&flow_tcf_mask_supported.udp,
&flow_tcf_mask_empty.udp,
sizeof(flow_tcf_mask_supported.udp),
error);
assert(mask.udp);
spec.udp = items->spec;
if (!decap.vxlan) {
if (!ip_proto_set)
mnl_attr_put_u8
(nlh, TCA_FLOWER_KEY_IP_PROTO,
IPPROTO_UDP);
if (mask.udp == &flow_tcf_mask_empty.udp)
break;
} else {
assert(mask.udp != &flow_tcf_mask_empty.udp);
decap.vxlan->udp_port =
rte_be_to_cpu_16
(spec.udp->hdr.dst_port);
}
if (mask.udp->hdr.src_port) {
mnl_attr_put_u16
(nlh, decap.vxlan ?
TCA_FLOWER_KEY_ENC_UDP_SRC_PORT :
TCA_FLOWER_KEY_UDP_SRC,
spec.udp->hdr.src_port);
mnl_attr_put_u16
(nlh, decap.vxlan ?
TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK :
TCA_FLOWER_KEY_UDP_SRC_MASK,
mask.udp->hdr.src_port);
}
if (mask.udp->hdr.dst_port) {
mnl_attr_put_u16
(nlh, decap.vxlan ?
TCA_FLOWER_KEY_ENC_UDP_DST_PORT :
TCA_FLOWER_KEY_UDP_DST,
spec.udp->hdr.dst_port);
mnl_attr_put_u16
(nlh, decap.vxlan ?
TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK :
TCA_FLOWER_KEY_UDP_DST_MASK,
mask.udp->hdr.dst_port);
}
assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
break;
case RTE_FLOW_ITEM_TYPE_TCP:
item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
mask.tcp = flow_tcf_item_mask
(items, &rte_flow_item_tcp_mask,
&flow_tcf_mask_supported.tcp,
&flow_tcf_mask_empty.tcp,
sizeof(flow_tcf_mask_supported.tcp),
error);
assert(mask.tcp);
if (!ip_proto_set)
mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
IPPROTO_TCP);
if (mask.tcp == &flow_tcf_mask_empty.tcp)
break;
spec.tcp = items->spec;
if (mask.tcp->hdr.src_port) {
mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
spec.tcp->hdr.src_port);
mnl_attr_put_u16(nlh,
TCA_FLOWER_KEY_TCP_SRC_MASK,
mask.tcp->hdr.src_port);
}
if (mask.tcp->hdr.dst_port) {
mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
spec.tcp->hdr.dst_port);
mnl_attr_put_u16(nlh,
TCA_FLOWER_KEY_TCP_DST_MASK,
mask.tcp->hdr.dst_port);
}
if (mask.tcp->hdr.tcp_flags) {
mnl_attr_put_u16
(nlh,
TCA_FLOWER_KEY_TCP_FLAGS,
rte_cpu_to_be_16
(spec.tcp->hdr.tcp_flags));
mnl_attr_put_u16
(nlh,
TCA_FLOWER_KEY_TCP_FLAGS_MASK,
rte_cpu_to_be_16
(mask.tcp->hdr.tcp_flags));
}
assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
break;
case RTE_FLOW_ITEM_TYPE_VXLAN:
assert(decap.vxlan);
item_flags |= MLX5_FLOW_LAYER_VXLAN;
spec.vxlan = items->spec;
mnl_attr_put_u32(nlh,
TCA_FLOWER_KEY_ENC_KEY_ID,
vxlan_vni_as_be32(spec.vxlan->vni));
assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
break;
default:
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM,
NULL, "item not supported");
}
}
na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
na_act_index_cur = 1;
for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
struct nlattr *na_act_index;
struct nlattr *na_act;
unsigned int vlan_act;
unsigned int i;
switch (actions->type) {
case RTE_FLOW_ACTION_TYPE_VOID:
break;
case RTE_FLOW_ACTION_TYPE_PORT_ID:
conf.port_id = actions->conf;
if (conf.port_id->original)
i = 0;
else
for (i = 0; ptoi[i].ifindex; ++i)
if (ptoi[i].port_id == conf.port_id->id)
break;
assert(ptoi[i].ifindex);
na_act_index =
mnl_attr_nest_start(nlh, na_act_index_cur++);
assert(na_act_index);
mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
assert(na_act);
if (encap.hdr) {
assert(dev_flow->tcf.tunnel);
dev_flow->tcf.tunnel->ifindex_ptr =
&((struct tc_mirred *)
mnl_attr_get_payload
(mnl_nlmsg_get_payload_tail
(nlh)))->ifindex;
}
mnl_attr_put(nlh, TCA_MIRRED_PARMS,
sizeof(struct tc_mirred),
&(struct tc_mirred){
.action = TC_ACT_STOLEN,
.eaction = TCA_EGRESS_REDIR,
.ifindex = ptoi[i].ifindex,
});
mnl_attr_nest_end(nlh, na_act);
mnl_attr_nest_end(nlh, na_act_index);
break;
case RTE_FLOW_ACTION_TYPE_JUMP:
conf.jump = actions->conf;
na_act_index =
mnl_attr_nest_start(nlh, na_act_index_cur++);
assert(na_act_index);
mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
assert(na_act);
mnl_attr_put(nlh, TCA_GACT_PARMS,
sizeof(struct tc_gact),
&(struct tc_gact){
.action = TC_ACT_GOTO_CHAIN |
conf.jump->group,
});
mnl_attr_nest_end(nlh, na_act);
mnl_attr_nest_end(nlh, na_act_index);
break;
case RTE_FLOW_ACTION_TYPE_DROP:
na_act_index =
mnl_attr_nest_start(nlh, na_act_index_cur++);
assert(na_act_index);
mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
assert(na_act);
mnl_attr_put(nlh, TCA_GACT_PARMS,
sizeof(struct tc_gact),
&(struct tc_gact){
.action = TC_ACT_SHOT,
});
mnl_attr_nest_end(nlh, na_act);
mnl_attr_nest_end(nlh, na_act_index);
break;
case RTE_FLOW_ACTION_TYPE_COUNT:
/*
* Driver adds the count action implicitly for
* each rule it creates.
*/
ret = flow_tcf_translate_action_count(dev,
dev_flow, error);
if (ret < 0)
return ret;
break;
case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
conf.of_push_vlan = NULL;
vlan_act = TCA_VLAN_ACT_POP;
goto action_of_vlan;
case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
conf.of_push_vlan = actions->conf;
vlan_act = TCA_VLAN_ACT_PUSH;
goto action_of_vlan;
case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
conf.of_set_vlan_vid = actions->conf;
if (na_vlan_id)
goto override_na_vlan_id;
vlan_act = TCA_VLAN_ACT_MODIFY;
goto action_of_vlan;
case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
conf.of_set_vlan_pcp = actions->conf;
if (na_vlan_priority)
goto override_na_vlan_priority;
vlan_act = TCA_VLAN_ACT_MODIFY;
goto action_of_vlan;
action_of_vlan:
na_act_index =
mnl_attr_nest_start(nlh, na_act_index_cur++);
assert(na_act_index);
mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
assert(na_act);
mnl_attr_put(nlh, TCA_VLAN_PARMS,
sizeof(struct tc_vlan),
&(struct tc_vlan){
.action = TC_ACT_PIPE,
.v_action = vlan_act,
});
if (vlan_act == TCA_VLAN_ACT_POP) {
mnl_attr_nest_end(nlh, na_act);
mnl_attr_nest_end(nlh, na_act_index);
break;
}
if (vlan_act == TCA_VLAN_ACT_PUSH)
mnl_attr_put_u16(nlh,
TCA_VLAN_PUSH_VLAN_PROTOCOL,
conf.of_push_vlan->ethertype);
na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
mnl_attr_nest_end(nlh, na_act);
mnl_attr_nest_end(nlh, na_act_index);
if (actions->type ==
RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
override_na_vlan_id:
na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
*(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
rte_be_to_cpu_16
(conf.of_set_vlan_vid->vlan_vid);
} else if (actions->type ==
RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
override_na_vlan_priority:
na_vlan_priority->nla_type =
TCA_VLAN_PUSH_VLAN_PRIORITY;
*(uint8_t *)mnl_attr_get_payload
(na_vlan_priority) =
conf.of_set_vlan_pcp->vlan_pcp;
}
break;
case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
assert(decap.vxlan);
assert(dev_flow->tcf.tunnel);
dev_flow->tcf.tunnel->ifindex_ptr =
(unsigned int *)&tcm->tcm_ifindex;
na_act_index =
mnl_attr_nest_start(nlh, na_act_index_cur++);
assert(na_act_index);
mnl_attr_put_strz(nlh, TCA_ACT_KIND, "tunnel_key");
na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
assert(na_act);
mnl_attr_put(nlh, TCA_TUNNEL_KEY_PARMS,
sizeof(struct tc_tunnel_key),
&(struct tc_tunnel_key){
.action = TC_ACT_PIPE,
.t_action = TCA_TUNNEL_KEY_ACT_RELEASE,
});
mnl_attr_nest_end(nlh, na_act);
mnl_attr_nest_end(nlh, na_act_index);
assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
break;
case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
assert(encap.vxlan);
flow_tcf_vxlan_encap_parse(actions, encap.vxlan);
na_act_index =
mnl_attr_nest_start(nlh, na_act_index_cur++);
assert(na_act_index);
mnl_attr_put_strz(nlh, TCA_ACT_KIND, "tunnel_key");
na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
assert(na_act);
mnl_attr_put(nlh, TCA_TUNNEL_KEY_PARMS,
sizeof(struct tc_tunnel_key),
&(struct tc_tunnel_key){
.action = TC_ACT_PIPE,
.t_action = TCA_TUNNEL_KEY_ACT_SET,
});
if (encap.vxlan->mask & FLOW_TCF_ENCAP_UDP_DST)
mnl_attr_put_u16(nlh,
TCA_TUNNEL_KEY_ENC_DST_PORT,
encap.vxlan->udp.dst);
if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV4_SRC)
mnl_attr_put_u32(nlh,
TCA_TUNNEL_KEY_ENC_IPV4_SRC,
encap.vxlan->ipv4.src);
if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV4_DST)
mnl_attr_put_u32(nlh,
TCA_TUNNEL_KEY_ENC_IPV4_DST,
encap.vxlan->ipv4.dst);
if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV6_SRC)
mnl_attr_put(nlh,
TCA_TUNNEL_KEY_ENC_IPV6_SRC,
sizeof(encap.vxlan->ipv6.src),
&encap.vxlan->ipv6.src);
if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV6_DST)
mnl_attr_put(nlh,
TCA_TUNNEL_KEY_ENC_IPV6_DST,
sizeof(encap.vxlan->ipv6.dst),
&encap.vxlan->ipv6.dst);
if (encap.vxlan->mask & FLOW_TCF_ENCAP_VXLAN_VNI)
mnl_attr_put_u32(nlh,
TCA_TUNNEL_KEY_ENC_KEY_ID,
vxlan_vni_as_be32
(encap.vxlan->vxlan.vni));
mnl_attr_put_u8(nlh, TCA_TUNNEL_KEY_NO_CSUM, 0);
mnl_attr_nest_end(nlh, na_act);
mnl_attr_nest_end(nlh, na_act_index);
assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
break;
case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
case RTE_FLOW_ACTION_TYPE_SET_TTL:
case RTE_FLOW_ACTION_TYPE_DEC_TTL:
case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
na_act_index =
mnl_attr_nest_start(nlh, na_act_index_cur++);
flow_tcf_create_pedit_mnl_msg(nlh,
&actions, item_flags);
mnl_attr_nest_end(nlh, na_act_index);
break;
default:
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"action not supported");
}
}
assert(na_flower);
assert(na_flower_act);
mnl_attr_nest_end(nlh, na_flower_act);
mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, decap.vxlan ?
0 : TCA_CLS_FLAGS_SKIP_SW);
mnl_attr_nest_end(nlh, na_flower);
if (dev_flow->tcf.tunnel && dev_flow->tcf.tunnel->ifindex_ptr)
dev_flow->tcf.tunnel->ifindex_org =
*dev_flow->tcf.tunnel->ifindex_ptr;
assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
return 0;
}
/**
* Send Netlink message with acknowledgment.
*
* @param tcf
* Flow context to use.
* @param nlh
* Message to send. This function always raises the NLM_F_ACK flag before
* sending.
* @param[in] msglen
* Message length. Message buffer may contain multiple commands and
* nlmsg_len field not always corresponds to actual message length.
* If 0 specified the nlmsg_len field in header is used as message length.
* @param[in] cb
* Callback handler for received message.
* @param[in] arg
* Context pointer for callback handler.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
flow_tcf_nl_ack(struct mlx5_flow_tcf_context *tcf,
struct nlmsghdr *nlh,
uint32_t msglen,
mnl_cb_t cb, void *arg)
{
unsigned int portid = mnl_socket_get_portid(tcf->nl);
uint32_t seq = tcf->seq++;
int err, ret;
assert(tcf->nl);
assert(tcf->buf);
if (!seq)
/* seq 0 is reserved for kernel event-driven notifications. */
seq = tcf->seq++;
nlh->nlmsg_seq = seq;
if (!msglen) {
msglen = nlh->nlmsg_len;
nlh->nlmsg_flags |= NLM_F_ACK;
}
ret = mnl_socket_sendto(tcf->nl, nlh, msglen);
err = (ret <= 0) ? errno : 0;
nlh = (struct nlmsghdr *)(tcf->buf);
/*
* The following loop postpones non-fatal errors until multipart
* messages are complete.
*/
if (ret > 0)
while (true) {
ret = mnl_socket_recvfrom(tcf->nl, tcf->buf,
tcf->buf_size);
if (ret < 0) {
err = errno;
if (err != ENOSPC)
break;
}
if (!err) {
ret = mnl_cb_run(nlh, ret, seq, portid,
cb, arg);
if (ret < 0) {
err = errno;
break;
}
}
/* Will receive till end of multipart message */
if (!(nlh->nlmsg_flags & NLM_F_MULTI) ||
nlh->nlmsg_type == NLMSG_DONE)
break;
}
if (!err)
return 0;
rte_errno = err;
return -err;
}
#define MNL_BUF_EXTRA_SPACE 16
#define MNL_REQUEST_SIZE_MIN 256
#define MNL_REQUEST_SIZE_MAX 2048
#define MNL_REQUEST_SIZE RTE_MIN(RTE_MAX(sysconf(_SC_PAGESIZE), \
MNL_REQUEST_SIZE_MIN), MNL_REQUEST_SIZE_MAX)
/* VTEP device list is shared between PMD port instances. */
static LIST_HEAD(, tcf_vtep) vtep_list_vxlan = LIST_HEAD_INITIALIZER();
static pthread_mutex_t vtep_list_mutex = PTHREAD_MUTEX_INITIALIZER;
/**
* Deletes VTEP network device.
*
* @param[in] tcf
* Context object initialized by mlx5_flow_tcf_context_create().
* @param[in] vtep
* Object represinting the network device to delete. Memory
* allocated for this object is freed by routine.
*/
static void
flow_tcf_vtep_delete(struct mlx5_flow_tcf_context *tcf,
struct tcf_vtep *vtep)
{
struct nlmsghdr *nlh;
struct ifinfomsg *ifm;
alignas(struct nlmsghdr)
uint8_t buf[mnl_nlmsg_size(MNL_ALIGN(sizeof(*ifm))) +
MNL_BUF_EXTRA_SPACE];
int ret;
assert(!vtep->refcnt);
/* Delete only ifaces those we actually created. */
if (vtep->created && vtep->ifindex) {
DRV_LOG(INFO, "VTEP delete (%d)", vtep->ifindex);
nlh = mnl_nlmsg_put_header(buf);
nlh->nlmsg_type = RTM_DELLINK;
nlh->nlmsg_flags = NLM_F_REQUEST;
ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
ifm->ifi_family = AF_UNSPEC;
ifm->ifi_index = vtep->ifindex;
assert(sizeof(buf) >= nlh->nlmsg_len);
ret = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL);
if (ret)
DRV_LOG(WARNING, "netlink: error deleting vxlan"
" encap/decap ifindex %u",
ifm->ifi_index);
}
rte_free(vtep);
}
/**
* Creates VTEP network device.
*
* @param[in] tcf
* Context object initialized by mlx5_flow_tcf_context_create().
* @param[in] ifouter
* Outer interface to attach new-created VXLAN device
* If zero the VXLAN device will not be attached to any device.
* These VTEPs are used for decapsulation and can be precreated
* and shared between processes.
* @param[in] port
* UDP port of created VTEP device.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
* Pointer to created device structure on success,
* NULL otherwise and rte_errno is set.
*/
#ifdef HAVE_IFLA_VXLAN_COLLECT_METADATA
static struct tcf_vtep*
flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf,
unsigned int ifouter,
uint16_t port, struct rte_flow_error *error)
{
struct tcf_vtep *vtep;
struct nlmsghdr *nlh;
struct ifinfomsg *ifm;
char name[sizeof(MLX5_VXLAN_DEVICE_PFX) + 24];
alignas(struct nlmsghdr)
uint8_t buf[mnl_nlmsg_size(sizeof(*ifm)) +
SZ_NLATTR_DATA_OF(sizeof(name)) +
SZ_NLATTR_NEST * 2 +
SZ_NLATTR_STRZ_OF("vxlan") +
SZ_NLATTR_DATA_OF(sizeof(uint32_t)) +
SZ_NLATTR_DATA_OF(sizeof(uint16_t)) +
SZ_NLATTR_DATA_OF(sizeof(uint8_t)) * 3 +
MNL_BUF_EXTRA_SPACE];
struct nlattr *na_info;
struct nlattr *na_vxlan;
rte_be16_t vxlan_port = rte_cpu_to_be_16(port);
int ret;
vtep = rte_zmalloc(__func__, sizeof(*vtep), alignof(struct tcf_vtep));
if (!vtep) {
rte_flow_error_set(error, ENOMEM,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"unable to allocate memory for VTEP");
return NULL;
}
*vtep = (struct tcf_vtep){
.port = port,
.local = LIST_HEAD_INITIALIZER(),
.neigh = LIST_HEAD_INITIALIZER(),
};
memset(buf, 0, sizeof(buf));
nlh = mnl_nlmsg_put_header(buf);
nlh->nlmsg_type = RTM_NEWLINK;
nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
ifm->ifi_family = AF_UNSPEC;
ifm->ifi_type = 0;
ifm->ifi_index = 0;
ifm->ifi_flags = IFF_UP;
ifm->ifi_change = 0xffffffff;
snprintf(name, sizeof(name), "%s%u", MLX5_VXLAN_DEVICE_PFX, port);
mnl_attr_put_strz(nlh, IFLA_IFNAME, name);
na_info = mnl_attr_nest_start(nlh, IFLA_LINKINFO);
assert(na_info);
mnl_attr_put_strz(nlh, IFLA_INFO_KIND, "vxlan");
na_vxlan = mnl_attr_nest_start(nlh, IFLA_INFO_DATA);
if (ifouter)
mnl_attr_put_u32(nlh, IFLA_VXLAN_LINK, ifouter);
assert(na_vxlan);
mnl_attr_put_u8(nlh, IFLA_VXLAN_COLLECT_METADATA, 1);
mnl_attr_put_u8(nlh, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 1);
mnl_attr_put_u8(nlh, IFLA_VXLAN_LEARNING, 0);
mnl_attr_put_u16(nlh, IFLA_VXLAN_PORT, vxlan_port);
mnl_attr_nest_end(nlh, na_vxlan);
mnl_attr_nest_end(nlh, na_info);
assert(sizeof(buf) >= nlh->nlmsg_len);
ret = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL);
if (ret) {
DRV_LOG(WARNING,
"netlink: VTEP %s create failure (%d)",
name, rte_errno);
if (rte_errno != EEXIST || ifouter)
/*
* Some unhandled error occurred or device is
* for encapsulation and cannot be shared.
*/
goto error;
} else {
/*
* Mark device we actually created.
* We should explicitly delete
* when we do not need it anymore.
*/
vtep->created = 1;
}
/* Try to get ifindex of created of pre-existing device. */
ret = if_nametoindex(name);
if (!ret) {
DRV_LOG(WARNING,
"VTEP %s failed to get index (%d)", name, errno);
rte_flow_error_set
(error, -errno,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"netlink: failed to retrieve VTEP ifindex");
goto error;
}
vtep->ifindex = ret;
vtep->ifouter = ifouter;
memset(buf, 0, sizeof(buf));
nlh = mnl_nlmsg_put_header(buf);
nlh->nlmsg_type = RTM_NEWLINK;
nlh->nlmsg_flags = NLM_F_REQUEST;
ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
ifm->ifi_family = AF_UNSPEC;
ifm->ifi_type = 0;
ifm->ifi_index = vtep->ifindex;
ifm->ifi_flags = IFF_UP;
ifm->ifi_change = IFF_UP;
ret = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL);
if (ret) {
rte_flow_error_set(error, -errno,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"netlink: failed to set VTEP link up");
DRV_LOG(WARNING, "netlink: VTEP %s set link up failure (%d)",
name, rte_errno);
goto clean;
}
ret = mlx5_flow_tcf_init(tcf, vtep->ifindex, error);
if (ret) {
DRV_LOG(WARNING, "VTEP %s init failure (%d)", name, rte_errno);
goto clean;
}
DRV_LOG(INFO, "VTEP create (%d, %d)", vtep->port, vtep->ifindex);
vtep->refcnt = 1;
return vtep;
clean:
flow_tcf_vtep_delete(tcf, vtep);
return NULL;
error:
rte_free(vtep);
return NULL;
}
#else
static struct tcf_vtep*
flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf __rte_unused,
unsigned int ifouter __rte_unused,
uint16_t port __rte_unused,
struct rte_flow_error *error)
{
rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"netlink: failed to create VTEP, "
"vxlan metadata are not supported by kernel");
return NULL;
}
#endif /* HAVE_IFLA_VXLAN_COLLECT_METADATA */
/**
* Acquire target interface index for VXLAN tunneling decapsulation.
* In order to share the UDP port within the other interfaces the
* VXLAN device created as not attached to any interface (if created).
*
* @param[in] tcf
* Context object initialized by mlx5_flow_tcf_context_create().
* @param[in] dev_flow
* Flow tcf object with tunnel structure pointer set.
* @param[out] error
* Perform verbose error reporting if not NULL.
* @return
* Interface descriptor pointer on success,
* NULL otherwise and rte_errno is set.
*/
static struct tcf_vtep*
flow_tcf_decap_vtep_acquire(struct mlx5_flow_tcf_context *tcf,
struct mlx5_flow *dev_flow,
struct rte_flow_error *error)
{
struct tcf_vtep *vtep;
uint16_t port = dev_flow->tcf.vxlan_decap->udp_port;
LIST_FOREACH(vtep, &vtep_list_vxlan, next) {
if (vtep->port == port)
break;
}
if (vtep && vtep->ifouter) {
rte_flow_error_set(error, -errno,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"Failed to create decap VTEP with specified"
" UDP port, atatched device exists");
return NULL;
}
if (vtep) {
/* Device exists, just increment the reference counter. */
vtep->refcnt++;
assert(vtep->ifindex);
return vtep;
}
/* No decapsulation device exists, try to create the new one. */
vtep = flow_tcf_vtep_create(tcf, 0, port, error);
if (vtep)
LIST_INSERT_HEAD(&vtep_list_vxlan, vtep, next);
return vtep;
}
/**
* Aqcuire target interface index for VXLAN tunneling encapsulation.
*
* @param[in] tcf
* Context object initialized by mlx5_flow_tcf_context_create().
* @param[in] ifouter
* Network interface index to attach VXLAN encap device to.
* @param[in] dev_flow
* Flow tcf object with tunnel structure pointer set.
* @param[out] error
* Perform verbose error reporting if not NULL.
* @return
* Interface descriptor pointer on success,
* NULL otherwise and rte_errno is set.
*/
static struct tcf_vtep*
flow_tcf_encap_vtep_acquire(struct mlx5_flow_tcf_context *tcf,
unsigned int ifouter,
struct mlx5_flow *dev_flow __rte_unused,
struct rte_flow_error *error)
{
static uint16_t encap_port = MLX5_VXLAN_PORT_MIN - 1;
struct tcf_vtep *vtep;
assert(ifouter);
/* Look whether the attached VTEP for encap is created. */
LIST_FOREACH(vtep, &vtep_list_vxlan, next) {
if (vtep->ifouter == ifouter)
break;
}
if (vtep) {
/* VTEP already exists, just increment the reference. */
vtep->refcnt++;
} else {
uint16_t pcnt;
/* Not found, we should create the new attached VTEP. */
for (pcnt = 0; pcnt <= (MLX5_VXLAN_PORT_MAX
- MLX5_VXLAN_PORT_MIN); pcnt++) {
encap_port++;
/* Wraparound the UDP port index. */
if (encap_port < MLX5_VXLAN_PORT_MIN ||
encap_port > MLX5_VXLAN_PORT_MAX)
encap_port = MLX5_VXLAN_PORT_MIN;
/* Check whether UDP port is in already in use. */
LIST_FOREACH(vtep, &vtep_list_vxlan, next) {
if (vtep->port == encap_port)
break;
}
if (vtep) {
/* Port is in use, try the next one. */
vtep = NULL;
continue;
}
vtep = flow_tcf_vtep_create(tcf, ifouter,
encap_port, error);
if (vtep) {
LIST_INSERT_HEAD(&vtep_list_vxlan, vtep, next);
break;
}
if (rte_errno != EEXIST)
break;
}
if (!vtep)
return NULL;
}
assert(vtep->ifouter == ifouter);
assert(vtep->ifindex);
return vtep;
}
/**
* Acquires target interface index for tunneling of any type.
* Creates the new VTEP if needed.
*
* @param[in] tcf
* Context object initialized by mlx5_flow_tcf_context_create().
* @param[in] ifouter
* Network interface index to attach VXLAN encap device to.
* @param[in] dev_flow
* Flow tcf object with tunnel structure pointer set.
* @param[out] error
* Perform verbose error reporting if not NULL.
* @return
* Interface descriptor pointer on success,
* NULL otherwise and rte_errno is set.
*/
static struct tcf_vtep*
flow_tcf_vtep_acquire(struct mlx5_flow_tcf_context *tcf,
unsigned int ifouter,
struct mlx5_flow *dev_flow,
struct rte_flow_error *error)
{
struct tcf_vtep *vtep = NULL;
assert(dev_flow->tcf.tunnel);
pthread_mutex_lock(&vtep_list_mutex);
switch (dev_flow->tcf.tunnel->type) {
case FLOW_TCF_TUNACT_VXLAN_ENCAP:
vtep = flow_tcf_encap_vtep_acquire(tcf, ifouter,
dev_flow, error);
break;
case FLOW_TCF_TUNACT_VXLAN_DECAP:
vtep = flow_tcf_decap_vtep_acquire(tcf, dev_flow, error);
break;
default:
rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"unsupported tunnel type");
break;
}
pthread_mutex_unlock(&vtep_list_mutex);
return vtep;
}
/**
* Release tunneling interface by ifindex. Decrements reference
* counter and actually removes the device if counter is zero.
*
* @param[in] tcf
* Context object initialized by mlx5_flow_tcf_context_create().
* @param[in] vtep
* VTEP device descriptor structure.
* @param[in] dev_flow
* Flow tcf object with tunnel structure pointer set.
*/
static void
flow_tcf_vtep_release(struct mlx5_flow_tcf_context *tcf,
struct tcf_vtep *vtep,
struct mlx5_flow *dev_flow)
{
assert(dev_flow->tcf.tunnel);
pthread_mutex_lock(&vtep_list_mutex);
switch (dev_flow->tcf.tunnel->type) {
case FLOW_TCF_TUNACT_VXLAN_DECAP:
break;
case FLOW_TCF_TUNACT_VXLAN_ENCAP:
break;
default:
assert(false);
DRV_LOG(WARNING, "Unsupported tunnel type");
break;
}
assert(vtep->refcnt);
if (--vtep->refcnt == 0) {
LIST_REMOVE(vtep, next);
flow_tcf_vtep_delete(tcf, vtep);
}
pthread_mutex_unlock(&vtep_list_mutex);
}
/**
* Apply flow to E-Switch by sending Netlink message.
*
* @param[in] dev
* Pointer to Ethernet device.
* @param[in, out] flow
* Pointer to the sub flow.
* @param[out] error
* Pointer to the error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_ernno is set.
*/
static int
flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
struct rte_flow_error *error)
{
struct priv *priv = dev->data->dev_private;
struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
struct mlx5_flow *dev_flow;
struct nlmsghdr *nlh;
dev_flow = LIST_FIRST(&flow->dev_flows);
/* E-Switch flow can't be expanded. */
assert(!LIST_NEXT(dev_flow, next));
if (dev_flow->tcf.applied)
return 0;
nlh = dev_flow->tcf.nlh;
nlh->nlmsg_type = RTM_NEWTFILTER;
nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
if (dev_flow->tcf.tunnel) {
/*
* Replace the interface index, target for
* encapsulation, source for decapsulation.
*/
assert(!dev_flow->tcf.tunnel->vtep);
assert(dev_flow->tcf.tunnel->ifindex_ptr);
/* Acquire actual VTEP device when rule is being applied. */
dev_flow->tcf.tunnel->vtep =
flow_tcf_vtep_acquire(ctx,
dev_flow->tcf.tunnel->ifindex_org,
dev_flow, error);
if (!dev_flow->tcf.tunnel->vtep)
return -rte_errno;
DRV_LOG(INFO, "Replace ifindex: %d->%d",
dev_flow->tcf.tunnel->vtep->ifindex,
dev_flow->tcf.tunnel->ifindex_org);
*dev_flow->tcf.tunnel->ifindex_ptr =
dev_flow->tcf.tunnel->vtep->ifindex;
}
if (!flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL)) {
dev_flow->tcf.applied = 1;
return 0;
}
return rte_flow_error_set(error, rte_errno,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"netlink: failed to create TC flow rule");
}
/**
* Remove flow from E-Switch by sending Netlink message.
*
* @param[in] dev
* Pointer to Ethernet device.
* @param[in, out] flow
* Pointer to the sub flow.
*/
static void
flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
{
struct priv *priv = dev->data->dev_private;
struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
struct mlx5_flow *dev_flow;
struct nlmsghdr *nlh;
if (!flow)
return;
dev_flow = LIST_FIRST(&flow->dev_flows);
if (!dev_flow)
return;
/* E-Switch flow can't be expanded. */
assert(!LIST_NEXT(dev_flow, next));
if (dev_flow->tcf.applied) {
nlh = dev_flow->tcf.nlh;
nlh->nlmsg_type = RTM_DELTFILTER;
nlh->nlmsg_flags = NLM_F_REQUEST;
flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL);
if (dev_flow->tcf.tunnel) {
assert(dev_flow->tcf.tunnel->vtep);
flow_tcf_vtep_release(ctx,
dev_flow->tcf.tunnel->vtep,
dev_flow);
dev_flow->tcf.tunnel->vtep = NULL;
}
dev_flow->tcf.applied = 0;
}
}
/**
* Remove flow from E-Switch and release resources of the device flow.
*
* @param[in] dev
* Pointer to Ethernet device.
* @param[in, out] flow
* Pointer to the sub flow.
*/
static void
flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
{
struct mlx5_flow *dev_flow;
if (!flow)
return;
flow_tcf_remove(dev, flow);
if (flow->counter) {
if (--flow->counter->ref_cnt == 0) {
rte_free(flow->counter);
flow->counter = NULL;
}
}
dev_flow = LIST_FIRST(&flow->dev_flows);
if (!dev_flow)
return;
/* E-Switch flow can't be expanded. */
assert(!LIST_NEXT(dev_flow, next));
LIST_REMOVE(dev_flow, next);
rte_free(dev_flow);
}
/**
* Helper routine for figuring the space size required for a parse buffer.
*
* @param array
* array of values to use.
* @param idx
* Current location in array.
* @param value
* Value to compare with.
*
* @return
* The maximum between the given value and the array value on index.
*/
static uint16_t
flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
{
return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
}
/**
* Parse rtnetlink message attributes filling the attribute table with the info
* retrieved.
*
* @param tb
* Attribute table to be filled.
* @param[out] max
* Maxinum entry in the attribute table.
* @param rte
* The attributes section in the message to be parsed.
* @param len
* The length of the attributes section in the message.
*/
static void
flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
struct rtattr *rta, int len)
{
unsigned short type;
memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
while (RTA_OK(rta, len)) {
type = rta->rta_type;
if (type <= max && !tb[type])
tb[type] = rta;
rta = RTA_NEXT(rta, len);
}
}
/**
* Extract flow counters from flower action.
*
* @param rta
* flower action stats properties in the Netlink message received.
* @param rta_type
* The backward sequence of rta_types, as written in the attribute table,
* we need to traverse in order to get to the requested object.
* @param idx
* Current location in rta_type table.
* @param[out] data
* data holding the count statistics of the rte_flow retrieved from
* the message.
*
* @return
* 0 if data was found and retrieved, -1 otherwise.
*/
static int
flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
uint16_t rta_type[], int idx,
struct gnet_stats_basic *data)
{
int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
TCA_STATS_BASIC);
struct rtattr *tbs[tca_stats_max + 1];
if (rta == NULL || idx < 0)
return -1;
flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
RTA_DATA(rta), RTA_PAYLOAD(rta));
switch (rta_type[idx]) {
case TCA_STATS_BASIC:
if (tbs[TCA_STATS_BASIC]) {
memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
sizeof(*data)));
return 0;
}
break;
default:
break;
}
return -1;
}
/**
* Parse flower single action retrieving the requested action attribute,
* if found.
*
* @param arg
* flower action properties in the Netlink message received.
* @param rta_type
* The backward sequence of rta_types, as written in the attribute table,
* we need to traverse in order to get to the requested object.
* @param idx
* Current location in rta_type table.
* @param[out] data
* Count statistics retrieved from the message query.
*
* @return
* 0 if data was found and retrieved, -1 otherwise.
*/
static int
flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
uint16_t rta_type[], int idx, void *data)
{
int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
struct rtattr *tb[tca_act_max + 1];
if (arg == NULL || idx < 0)
return -1;
flow_tcf_nl_parse_rtattr(tb, tca_act_max,
RTA_DATA(arg), RTA_PAYLOAD(arg));
if (tb[TCA_ACT_KIND] == NULL)
return -1;
switch (rta_type[idx]) {
case TCA_ACT_STATS:
if (tb[TCA_ACT_STATS])
return flow_tcf_nl_action_stats_parse_and_get
(tb[TCA_ACT_STATS],
rta_type, --idx,
(struct gnet_stats_basic *)data);
break;
default:
break;
}
return -1;
}
/**
* Parse flower action section in the message retrieving the requested
* attribute from the first action that provides it.
*
* @param opt
* flower section in the Netlink message received.
* @param rta_type
* The backward sequence of rta_types, as written in the attribute table,
* we need to traverse in order to get to the requested object.
* @param idx
* Current location in rta_type table.
* @param[out] data
* data retrieved from the message query.
*
* @return
* 0 if data was found and retrieved, -1 otherwise.
*/
static int
flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
uint16_t rta_type[], int idx, void *data)
{
struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
int i;
if (arg == NULL || idx < 0)
return -1;
flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
RTA_DATA(arg), RTA_PAYLOAD(arg));
switch (rta_type[idx]) {
/*
* flow counters are stored in the actions defined by the flow
* and not in the flow itself, therefore we need to traverse the
* flower chain of actions in search for them.
*
* Note that the index is not decremented here.
*/
case TCA_ACT_STATS:
for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
if (tb[i] &&
!flow_tcf_nl_parse_one_action_and_get(tb[i],
rta_type,
idx, data))
return 0;
}
break;
default:
break;
}
return -1;
}
/**
* Parse flower classifier options in the message, retrieving the requested
* attribute if found.
*
* @param opt
* flower section in the Netlink message received.
* @param rta_type
* The backward sequence of rta_types, as written in the attribute table,
* we need to traverse in order to get to the requested object.
* @param idx
* Current location in rta_type table.
* @param[out] data
* data retrieved from the message query.
*
* @return
* 0 if data was found and retrieved, -1 otherwise.
*/
static int
flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
uint16_t rta_type[], int idx, void *data)
{
int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
TCA_FLOWER_ACT);
struct rtattr *tb[tca_flower_max + 1];
if (!opt || idx < 0)
return -1;
flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
RTA_DATA(opt), RTA_PAYLOAD(opt));
switch (rta_type[idx]) {
case TCA_FLOWER_ACT:
if (tb[TCA_FLOWER_ACT])
return flow_tcf_nl_action_parse_and_get
(tb[TCA_FLOWER_ACT],
rta_type, --idx, data);
break;
default:
break;
}
return -1;
}
/**
* Parse Netlink reply on filter query, retrieving the flow counters.
*
* @param nlh
* Message received from Netlink.
* @param rta_type
* The backward sequence of rta_types, as written in the attribute table,
* we need to traverse in order to get to the requested object.
* @param idx
* Current location in rta_type table.
* @param[out] data
* data retrieved from the message query.
*
* @return
* 0 if data was found and retrieved, -1 otherwise.
*/
static int
flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
uint16_t rta_type[], int idx, void *data)
{
struct nlmsghdr *nlh = cnlh;
struct tcmsg *t = NLMSG_DATA(nlh);
int len = nlh->nlmsg_len;
int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
struct rtattr *tb[tca_max + 1];
if (idx < 0)
return -1;
if (nlh->nlmsg_type != RTM_NEWTFILTER &&
nlh->nlmsg_type != RTM_GETTFILTER &&
nlh->nlmsg_type != RTM_DELTFILTER)
return -1;
len -= NLMSG_LENGTH(sizeof(*t));
if (len < 0)
return -1;
flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
/* Not a TC flower flow - bail out */
if (!tb[TCA_KIND] ||
strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
return -1;
switch (rta_type[idx]) {
case TCA_OPTIONS:
if (tb[TCA_OPTIONS])
return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
rta_type,
--idx, data);
break;
default:
break;
}
return -1;
}
/**
* A callback to parse Netlink reply on TC flower query.
*
* @param nlh
* Message received from Netlink.
* @param[out] data
* Pointer to data area to be filled by the parsing routine.
* assumed to be a pinter to struct flow_tcf_stats_basic.
*
* @return
* MNL_CB_OK value.
*/
static int
flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
{
/*
* The backward sequence of rta_types to pass in order to get
* to the counters.
*/
uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
TCA_FLOWER_ACT, TCA_OPTIONS };
struct flow_tcf_stats_basic *sb_data = data;
union {
const struct nlmsghdr *c;
struct nlmsghdr *nc;
} tnlh = { .c = nlh };
if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
RTE_DIM(rta_type) - 1,
(void *)&sb_data->counters))
sb_data->valid = true;
return MNL_CB_OK;
}
/**
* Query a TC flower rule for its statistics via netlink.
*
* @param[in] dev
* Pointer to Ethernet device.
* @param[in] flow
* Pointer to the sub flow.
* @param[out] data
* data retrieved by the query.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
flow_tcf_query_count(struct rte_eth_dev *dev,
struct rte_flow *flow,
void *data,
struct rte_flow_error *error)
{
struct flow_tcf_stats_basic sb_data = { 0 };
struct rte_flow_query_count *qc = data;
struct priv *priv = dev->data->dev_private;
struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
struct mnl_socket *nl = ctx->nl;
struct mlx5_flow *dev_flow;
struct nlmsghdr *nlh;
uint32_t seq = priv->tcf_context->seq++;
ssize_t ret;
assert(qc);
dev_flow = LIST_FIRST(&flow->dev_flows);
/* E-Switch flow can't be expanded. */
assert(!LIST_NEXT(dev_flow, next));
if (!dev_flow->flow->counter)
goto notsup_exit;
nlh = dev_flow->tcf.nlh;
nlh->nlmsg_type = RTM_GETTFILTER;
nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
nlh->nlmsg_seq = seq;
if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
goto error_exit;
do {
ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
if (ret <= 0)
break;
ret = mnl_cb_run(ctx->buf, ret, seq,
mnl_socket_get_portid(nl),
flow_tcf_nl_message_get_stats_basic,
(void *)&sb_data);
} while (ret > 0);
/* Return the delta from last reset. */
if (sb_data.valid) {
/* Return the delta from last reset. */
qc->hits_set = 1;
qc->bytes_set = 1;
qc->hits = sb_data.counters.packets - flow->counter->hits;
qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
if (qc->reset) {
flow->counter->hits = sb_data.counters.packets;
flow->counter->bytes = sb_data.counters.bytes;
}
return 0;
}
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
NULL,
"flow does not have counter");
error_exit:
return rte_flow_error_set
(error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
NULL, "netlink: failed to read flow rule counters");
notsup_exit:
return rte_flow_error_set
(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
NULL, "counters are not available.");
}
/**
* Query a flow.
*
* @see rte_flow_query()
* @see rte_flow_ops
*/
static int
flow_tcf_query(struct rte_eth_dev *dev,
struct rte_flow *flow,
const struct rte_flow_action *actions,
void *data,
struct rte_flow_error *error)
{
int ret = -EINVAL;
for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
switch (actions->type) {
case RTE_FLOW_ACTION_TYPE_VOID:
break;
case RTE_FLOW_ACTION_TYPE_COUNT:
ret = flow_tcf_query_count(dev, flow, data, error);
break;
default:
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
"action not supported");
}
}
return ret;
}
const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
.validate = flow_tcf_validate,
.prepare = flow_tcf_prepare,
.translate = flow_tcf_translate,
.apply = flow_tcf_apply,
.remove = flow_tcf_remove,
.destroy = flow_tcf_destroy,
.query = flow_tcf_query,
};
/**
* Create and configure a libmnl socket for Netlink flow rules.
*
* @return
* A valid libmnl socket object pointer on success, NULL otherwise and
* rte_errno is set.
*/
static struct mnl_socket *
flow_tcf_mnl_socket_create(void)
{
struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
if (nl) {
mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
sizeof(int));
if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
return nl;
}
rte_errno = errno;
if (nl)
mnl_socket_close(nl);
return NULL;
}
/**
* Destroy a libmnl socket.
*
* @param nl
* Libmnl socket of the @p NETLINK_ROUTE kind.
*/
static void
flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
{
if (nl)
mnl_socket_close(nl);
}
/**
* Initialize ingress qdisc of a given network interface.
*
* @param ctx
* Pointer to tc-flower context to use.
* @param ifindex
* Index of network interface to initialize.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
unsigned int ifindex, struct rte_flow_error *error)
{
struct nlmsghdr *nlh;
struct tcmsg *tcm;
alignas(struct nlmsghdr)
uint8_t buf[mnl_nlmsg_size(sizeof(*tcm)) +
SZ_NLATTR_STRZ_OF("ingress") +
MNL_BUF_EXTRA_SPACE];
/* Destroy existing ingress qdisc and everything attached to it. */
nlh = mnl_nlmsg_put_header(buf);
nlh->nlmsg_type = RTM_DELQDISC;
nlh->nlmsg_flags = NLM_F_REQUEST;
tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
tcm->tcm_family = AF_UNSPEC;
tcm->tcm_ifindex = ifindex;
tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
tcm->tcm_parent = TC_H_INGRESS;
assert(sizeof(buf) >= nlh->nlmsg_len);
/* Ignore errors when qdisc is already absent. */
if (flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL) &&
rte_errno != EINVAL && rte_errno != ENOENT)
return rte_flow_error_set(error, rte_errno,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"netlink: failed to remove ingress"
" qdisc");
/* Create fresh ingress qdisc. */
nlh = mnl_nlmsg_put_header(buf);
nlh->nlmsg_type = RTM_NEWQDISC;
nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
tcm->tcm_family = AF_UNSPEC;
tcm->tcm_ifindex = ifindex;
tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
tcm->tcm_parent = TC_H_INGRESS;
mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
assert(sizeof(buf) >= nlh->nlmsg_len);
if (flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL))
return rte_flow_error_set(error, rte_errno,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"netlink: failed to create ingress"
" qdisc");
return 0;
}
/**
* Create libmnl context for Netlink flow rules.
*
* @return
* A valid libmnl socket object pointer on success, NULL otherwise and
* rte_errno is set.
*/
struct mlx5_flow_tcf_context *
mlx5_flow_tcf_context_create(void)
{
struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
sizeof(*ctx),
sizeof(uint32_t));
if (!ctx)
goto error;
ctx->nl = flow_tcf_mnl_socket_create();
if (!ctx->nl)
goto error;
ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
ctx->buf = rte_zmalloc(__func__,
ctx->buf_size, sizeof(uint32_t));
if (!ctx->buf)
goto error;
ctx->seq = random();
return ctx;
error:
mlx5_flow_tcf_context_destroy(ctx);
return NULL;
}
/**
* Destroy a libmnl context.
*
* @param ctx
* Libmnl socket of the @p NETLINK_ROUTE kind.
*/
void
mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
{
if (!ctx)
return;
flow_tcf_mnl_socket_destroy(ctx->nl);
rte_free(ctx->buf);
rte_free(ctx);
}