numam-dpdk/drivers/net/mlx5/mlx5.h

302 lines
12 KiB
C
Raw Normal View History

/*-
* BSD LICENSE
*
* Copyright 2015 6WIND S.A.
* Copyright 2015 Mellanox.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of 6WIND S.A. nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef RTE_PMD_MLX5_H_
#define RTE_PMD_MLX5_H_
#include <stddef.h>
#include <stdint.h>
#include <limits.h>
#include <net/if.h>
#include <netinet/in.h>
/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
#ifdef PEDANTIC
#pragma GCC diagnostic ignored "-Wpedantic"
#endif
#include <infiniband/verbs.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-Wpedantic"
#endif
#include <rte_pci.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_spinlock.h>
#include <rte_interrupts.h>
#include <rte_errno.h>
#include <rte_flow.h>
#include "mlx5_utils.h"
#include "mlx5_rxtx.h"
#include "mlx5_autoconf.h"
#include "mlx5_defs.h"
enum {
PCI_VENDOR_ID_MELLANOX = 0x15b3,
};
enum {
PCI_DEVICE_ID_MELLANOX_CONNECTX4 = 0x1013,
PCI_DEVICE_ID_MELLANOX_CONNECTX4VF = 0x1014,
PCI_DEVICE_ID_MELLANOX_CONNECTX4LX = 0x1015,
PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF = 0x1016,
PCI_DEVICE_ID_MELLANOX_CONNECTX5 = 0x1017,
PCI_DEVICE_ID_MELLANOX_CONNECTX5VF = 0x1018,
PCI_DEVICE_ID_MELLANOX_CONNECTX5EX = 0x1019,
PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a,
};
struct mlx5_xstats_ctrl {
/* Number of device stats. */
uint16_t stats_n;
/* Index in the device counters table. */
uint16_t dev_table_idx[MLX5_MAX_XSTATS];
uint64_t base[MLX5_MAX_XSTATS];
};
struct priv {
struct rte_eth_dev *dev; /* Ethernet device of master process. */
struct ibv_context *ctx; /* Verbs context. */
struct ibv_device_attr_ex device_attr; /* Device properties. */
struct ibv_pd *pd; /* Protection Domain. */
char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for secondary */
/*
* MAC addresses array and configuration bit-field.
* An extra entry that cannot be modified by the DPDK is reserved
* for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
*/
struct ether_addr mac[MLX5_MAX_MAC_ADDRESSES];
BITFIELD_DECLARE(mac_configured, uint32_t, MLX5_MAX_MAC_ADDRESSES);
uint16_t vlan_filter[MLX5_MAX_VLAN_IDS]; /* VLAN filters table. */
unsigned int vlan_filter_n; /* Number of configured VLAN filters. */
/* Device properties. */
uint16_t mtu; /* Configured MTU. */
uint8_t port; /* Physical port number. */
unsigned int promisc_req:1; /* Promiscuous mode requested. */
unsigned int allmulti_req:1; /* All multicast mode requested. */
unsigned int hw_csum:1; /* Checksum offload is supported. */
unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
unsigned int hw_vlan_strip:1; /* VLAN stripping is supported. */
unsigned int hw_fcs_strip:1; /* FCS stripping is supported. */
unsigned int hw_padding:1; /* End alignment padding is supported. */
unsigned int sriov:1; /* This is a VF or PF with VF devices. */
unsigned int mps:2; /* Multi-packet send mode (0: disabled). */
unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
net/mlx5: handle Rx CQE compression Mini (compressed) completion queue entries (CQEs) are returned by the NIC when PCI back pressure is detected, in which case the first CQE64 contains common packet information followed by a number of CQE8 providing the rest, followed by a matching number of empty CQE64 entries to be used by software for decompression. Before decompression: 0 1 2 6 7 8 +-------+ +---------+ +-------+ +-------+ +-------+ +-------+ | CQE64 | | CQE64 | | CQE64 | | CQE64 | | CQE64 | | CQE64 | |-------| |---------| |-------| |-------| |-------| |-------| | ..... | | cqe8[0] | | | . | | | | | ..... | | ..... | | cqe8[1] | | | . | | | | | ..... | | ..... | | ....... | | | . | | | | | ..... | | ..... | | cqe8[7] | | | | | | | | ..... | +-------+ +---------+ +-------+ +-------+ +-------+ +-------+ After decompression: 0 1 ... 8 +-------+ +-------+ +-------+ | CQE64 | | CQE64 | | CQE64 | |-------| |-------| |-------| | ..... | | ..... | . | ..... | | ..... | | ..... | . | ..... | | ..... | | ..... | . | ..... | | ..... | | ..... | | ..... | +-------+ +-------+ +-------+ This patch does not perform the entire decompression step as it would be really expensive, instead the first CQE64 is consumed and an internal context is maintained to interpret the following CQE8 entries directly. Intermediate empty CQE64 entries are handed back to HW without further processing. Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Signed-off-by: Olga Shern <olgas@mellanox.com> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
2016-06-24 13:17:54 +00:00
unsigned int cqe_comp:1; /* Whether CQE compression is enabled. */
unsigned int pending_alarm:1; /* An alarm is pending. */
unsigned int tso:1; /* Whether TSO is supported. */
unsigned int tunnel_en:1;
unsigned int isolated:1; /* Whether isolated mode is enabled. */
unsigned int tx_vec_en:1; /* Whether Tx vector is enabled. */
unsigned int rx_vec_en:1; /* Whether Rx vector is enabled. */
/* Whether Tx offloads for tunneled packets are supported. */
unsigned int max_tso_payload_sz; /* Maximum TCP payload for TSO. */
unsigned int txq_inline; /* Maximum packet size for inlining. */
unsigned int txqs_inline; /* Queue number threshold for inlining. */
unsigned int inline_max_packet_sz; /* Max packet size for inlining. */
/* RX/TX queues. */
unsigned int rxqs_n; /* RX queues array size. */
unsigned int txqs_n; /* TX queues array size. */
struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */
struct mlx5_txq_data *(*txqs)[]; /* TX queues. */
/* Indirection tables referencing all RX WQs. */
struct ibv_rwq_ind_table *(*ind_tables)[];
unsigned int ind_tables_n; /* Number of indirection tables. */
unsigned int ind_table_max_size; /* Maximum indirection table size. */
mlx5: refactor Rx code for the new verbs RSS API The new Verbs RSS API is lower-level than the previous one and much more flexible but requires RX queues to use Work Queues (WQs) internally instead of Queue Pairs (QPs), which are grouped in an indirection table used by a new kind of hash RX QPs. Hash RX QPs and the indirection table together replace the parent RSS QP while WQs are mostly similar to child QPs. RSS hash key is not configurable yet. Summary of changes: - Individual DPDK RX queues do not store flow properties anymore, this info is now part of the hash RX queues. - All functions affecting the parent queue when RSS is enabled or the basic queues otherwise are modified to affect hash RX queues instead. - Hash RX queues are also used when a single DPDK RX queue is configured (no RSS) to remove that special case. - Hash RX queues and indirection table are created/destroyed when device is started/stopped in addition to create/destroy flows. - Contrary to QPs, WQs are moved to the "ready" state before posting RX buffers, otherwise they are ignored. - Resource domain information is added to WQs for better performance. - CQs are not resized anymore when switching between non-SG and SG modes as it does not work correctly with WQs. Use the largest possible size instead, since CQ size does not have to be the same as the number of elements in the RX queue. This also applies to the maximum number of outstanding WRs in a WQ (max_recv_wr). Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Signed-off-by: Olga Shern <olgas@mellanox.com> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Signed-off-by: Or Ami <ora@mellanox.com> Signed-off-by: Yaacov Hazan <yaacovh@mellanox.com>
2015-10-30 18:55:06 +00:00
/* Hash RX QPs feeding the indirection table. */
struct hash_rxq (*hash_rxqs)[];
unsigned int hash_rxqs_n; /* Hash RX QPs array size. */
/* RSS configuration array indexed by hash RX queue type. */
struct rte_eth_rss_conf *(*rss_conf)[];
uint64_t rss_hf; /* RSS DPDK bit field of active RSS. */
struct rte_intr_handle intr_handle; /* Interrupt handler. */
unsigned int (*reta_idx)[]; /* RETA index table. */
unsigned int reta_idx_n; /* RETA index size. */
struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
uint32_t link_speed_capa; /* Link speed capabilities. */
struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
rte_spinlock_t lock; /* Lock for control functions. */
int primary_socket; /* Unix socket for primary process. */
struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
};
/**
* Lock private structure to protect it from concurrent access in the
* control path.
*
* @param priv
* Pointer to private structure.
*/
static inline void
priv_lock(struct priv *priv)
{
rte_spinlock_lock(&priv->lock);
}
/**
* Unlock private structure.
*
* @param priv
* Pointer to private structure.
*/
static inline void
priv_unlock(struct priv *priv)
{
rte_spinlock_unlock(&priv->lock);
}
/* mlx5.c */
int mlx5_getenv_int(const char *);
/* mlx5_ethdev.c */
struct priv *mlx5_get_priv(struct rte_eth_dev *dev);
int mlx5_is_secondary(void);
int priv_get_ifname(const struct priv *, char (*)[IF_NAMESIZE]);
int priv_ifreq(const struct priv *, int req, struct ifreq *);
int priv_is_ib_cntr(const char *);
int priv_get_cntr_sysfs(struct priv *, const char *, uint64_t *);
int priv_get_num_vfs(struct priv *, uint16_t *);
int priv_get_mtu(struct priv *, uint16_t *);
int priv_set_flags(struct priv *, unsigned int, unsigned int);
int mlx5_dev_configure(struct rte_eth_dev *);
void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *);
const uint32_t *mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev);
int mlx5_link_update(struct rte_eth_dev *, int);
int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t);
int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *);
int mlx5_dev_set_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *);
int mlx5_ibv_device_to_pci_addr(const struct ibv_device *,
struct rte_pci_addr *);
void mlx5_dev_link_status_handler(void *);
void mlx5_dev_interrupt_handler(void *);
void priv_dev_interrupt_handler_uninstall(struct priv *, struct rte_eth_dev *);
void priv_dev_interrupt_handler_install(struct priv *, struct rte_eth_dev *);
int mlx5_set_link_down(struct rte_eth_dev *dev);
int mlx5_set_link_up(struct rte_eth_dev *dev);
void priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev);
void priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev);
/* mlx5_mac.c */
int priv_get_mac(struct priv *, uint8_t (*)[ETHER_ADDR_LEN]);
mlx5: refactor Rx code for the new verbs RSS API The new Verbs RSS API is lower-level than the previous one and much more flexible but requires RX queues to use Work Queues (WQs) internally instead of Queue Pairs (QPs), which are grouped in an indirection table used by a new kind of hash RX QPs. Hash RX QPs and the indirection table together replace the parent RSS QP while WQs are mostly similar to child QPs. RSS hash key is not configurable yet. Summary of changes: - Individual DPDK RX queues do not store flow properties anymore, this info is now part of the hash RX queues. - All functions affecting the parent queue when RSS is enabled or the basic queues otherwise are modified to affect hash RX queues instead. - Hash RX queues are also used when a single DPDK RX queue is configured (no RSS) to remove that special case. - Hash RX queues and indirection table are created/destroyed when device is started/stopped in addition to create/destroy flows. - Contrary to QPs, WQs are moved to the "ready" state before posting RX buffers, otherwise they are ignored. - Resource domain information is added to WQs for better performance. - CQs are not resized anymore when switching between non-SG and SG modes as it does not work correctly with WQs. Use the largest possible size instead, since CQ size does not have to be the same as the number of elements in the RX queue. This also applies to the maximum number of outstanding WRs in a WQ (max_recv_wr). Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Signed-off-by: Olga Shern <olgas@mellanox.com> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Signed-off-by: Or Ami <ora@mellanox.com> Signed-off-by: Yaacov Hazan <yaacovh@mellanox.com>
2015-10-30 18:55:06 +00:00
void hash_rxq_mac_addrs_del(struct hash_rxq *);
void priv_mac_addrs_disable(struct priv *);
void mlx5_mac_addr_remove(struct rte_eth_dev *, uint32_t);
mlx5: refactor Rx code for the new verbs RSS API The new Verbs RSS API is lower-level than the previous one and much more flexible but requires RX queues to use Work Queues (WQs) internally instead of Queue Pairs (QPs), which are grouped in an indirection table used by a new kind of hash RX QPs. Hash RX QPs and the indirection table together replace the parent RSS QP while WQs are mostly similar to child QPs. RSS hash key is not configurable yet. Summary of changes: - Individual DPDK RX queues do not store flow properties anymore, this info is now part of the hash RX queues. - All functions affecting the parent queue when RSS is enabled or the basic queues otherwise are modified to affect hash RX queues instead. - Hash RX queues are also used when a single DPDK RX queue is configured (no RSS) to remove that special case. - Hash RX queues and indirection table are created/destroyed when device is started/stopped in addition to create/destroy flows. - Contrary to QPs, WQs are moved to the "ready" state before posting RX buffers, otherwise they are ignored. - Resource domain information is added to WQs for better performance. - CQs are not resized anymore when switching between non-SG and SG modes as it does not work correctly with WQs. Use the largest possible size instead, since CQ size does not have to be the same as the number of elements in the RX queue. This also applies to the maximum number of outstanding WRs in a WQ (max_recv_wr). Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Signed-off-by: Olga Shern <olgas@mellanox.com> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Signed-off-by: Or Ami <ora@mellanox.com> Signed-off-by: Yaacov Hazan <yaacovh@mellanox.com>
2015-10-30 18:55:06 +00:00
int hash_rxq_mac_addrs_add(struct hash_rxq *);
int priv_mac_addr_add(struct priv *, unsigned int,
const uint8_t (*)[ETHER_ADDR_LEN]);
mlx5: refactor Rx code for the new verbs RSS API The new Verbs RSS API is lower-level than the previous one and much more flexible but requires RX queues to use Work Queues (WQs) internally instead of Queue Pairs (QPs), which are grouped in an indirection table used by a new kind of hash RX QPs. Hash RX QPs and the indirection table together replace the parent RSS QP while WQs are mostly similar to child QPs. RSS hash key is not configurable yet. Summary of changes: - Individual DPDK RX queues do not store flow properties anymore, this info is now part of the hash RX queues. - All functions affecting the parent queue when RSS is enabled or the basic queues otherwise are modified to affect hash RX queues instead. - Hash RX queues are also used when a single DPDK RX queue is configured (no RSS) to remove that special case. - Hash RX queues and indirection table are created/destroyed when device is started/stopped in addition to create/destroy flows. - Contrary to QPs, WQs are moved to the "ready" state before posting RX buffers, otherwise they are ignored. - Resource domain information is added to WQs for better performance. - CQs are not resized anymore when switching between non-SG and SG modes as it does not work correctly with WQs. Use the largest possible size instead, since CQ size does not have to be the same as the number of elements in the RX queue. This also applies to the maximum number of outstanding WRs in a WQ (max_recv_wr). Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Signed-off-by: Olga Shern <olgas@mellanox.com> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Signed-off-by: Or Ami <ora@mellanox.com> Signed-off-by: Yaacov Hazan <yaacovh@mellanox.com>
2015-10-30 18:55:06 +00:00
int priv_mac_addrs_enable(struct priv *);
int mlx5_mac_addr_add(struct rte_eth_dev *, struct ether_addr *, uint32_t,
uint32_t);
void mlx5_mac_addr_set(struct rte_eth_dev *, struct ether_addr *);
/* mlx5_rss.c */
int rss_hash_rss_conf_new_key(struct priv *, const uint8_t *, unsigned int,
uint64_t);
int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *);
int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *);
int priv_rss_reta_index_resize(struct priv *, unsigned int);
int mlx5_dev_rss_reta_query(struct rte_eth_dev *,
struct rte_eth_rss_reta_entry64 *, uint16_t);
int mlx5_dev_rss_reta_update(struct rte_eth_dev *,
struct rte_eth_rss_reta_entry64 *, uint16_t);
/* mlx5_rxmode.c */
int priv_special_flow_enable(struct priv *, enum hash_rxq_flow_type);
void priv_special_flow_disable(struct priv *, enum hash_rxq_flow_type);
int priv_special_flow_enable_all(struct priv *);
void priv_special_flow_disable_all(struct priv *);
void mlx5_promiscuous_enable(struct rte_eth_dev *);
void mlx5_promiscuous_disable(struct rte_eth_dev *);
void mlx5_allmulticast_enable(struct rte_eth_dev *);
void mlx5_allmulticast_disable(struct rte_eth_dev *);
/* mlx5_stats.c */
void priv_xstats_init(struct priv *);
void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *);
void mlx5_stats_reset(struct rte_eth_dev *);
int mlx5_xstats_get(struct rte_eth_dev *,
struct rte_eth_xstat *, unsigned int);
void mlx5_xstats_reset(struct rte_eth_dev *);
int mlx5_xstats_get_names(struct rte_eth_dev *,
struct rte_eth_xstat_name *, unsigned int);
/* mlx5_vlan.c */
int mlx5_vlan_filter_set(struct rte_eth_dev *, uint16_t, int);
void mlx5_vlan_offload_set(struct rte_eth_dev *, int);
void mlx5_vlan_strip_queue_set(struct rte_eth_dev *, uint16_t, int);
/* mlx5_trigger.c */
int mlx5_dev_start(struct rte_eth_dev *);
void mlx5_dev_stop(struct rte_eth_dev *);
/* mlx5_flow.c */
int mlx5_dev_filter_ctrl(struct rte_eth_dev *, enum rte_filter_type,
enum rte_filter_op, void *);
int mlx5_flow_validate(struct rte_eth_dev *, const struct rte_flow_attr *,
const struct rte_flow_item [],
const struct rte_flow_action [],
struct rte_flow_error *);
struct rte_flow *mlx5_flow_create(struct rte_eth_dev *,
const struct rte_flow_attr *,
const struct rte_flow_item [],
const struct rte_flow_action [],
struct rte_flow_error *);
int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
struct rte_flow_error *);
int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
int priv_flow_start(struct priv *);
void priv_flow_stop(struct priv *);
int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *);
/* mlx5_socket.c */
int priv_socket_init(struct priv *priv);
int priv_socket_uninit(struct priv *priv);
void priv_socket_handle(struct priv *priv);
int priv_socket_connect(struct priv *priv);
#endif /* RTE_PMD_MLX5_H_ */