Add support for Multi-Physical Function Switch, MPFS, in mlx5en.

MPFS is a logical switch in the Mellanox device which forward packets
based on a hardware driven L2 address table, to one or more physical-
or virtual- functions. The physical- or virtual- function is required
to tell the MPFS by using the MPFS firmware commands, which unicast
MAC addresses it is requesting from the physical port's traffic.
Broadcast and multicast traffic however, is copied to all listening
physical- and virtual- functions and does not need a rule in the MPFS
switching table.

Linux commit:	eeb66cdb682678bfd1f02a4547e3649b38ffea7e
MFC after:	3 days
Sponsored by:	Mellanox Technologies
This commit is contained in:
Hans Petter Selasky 2019-10-02 09:22:22 +00:00
parent 2db3dd5061
commit 66b38bfe3d
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=352962
7 changed files with 306 additions and 21 deletions

View File

@ -4727,6 +4727,8 @@ dev/mlx5/mlx5_core/mlx5_main.c optional mlx5 pci \
compile-with "${OFED_C}"
dev/mlx5/mlx5_core/mlx5_mcg.c optional mlx5 pci \
compile-with "${OFED_C}"
dev/mlx5/mlx5_core/mlx5_mpfs.c optional mlx5 pci \
compile-with "${OFED_C}"
dev/mlx5/mlx5_core/mlx5_mr.c optional mlx5 pci \
compile-with "${OFED_C}"
dev/mlx5/mlx5_core/mlx5_pagealloc.c optional mlx5 pci \

View File

@ -717,6 +717,12 @@ struct mlx5_core_dev {
struct mlx5_rsvd_gids reserved_gids;
atomic_t roce_en;
} roce;
struct {
spinlock_t spinlock;
#define MLX5_MPFS_TABLE_MAX 32
long bitmap[BITS_TO_LONGS(MLX5_MPFS_TABLE_MAX)];
} mpfs;
#ifdef CONFIG_MLX5_FPGA
struct mlx5_fpga_device *fpga;
#endif

View File

@ -38,6 +38,7 @@
#include <dev/mlx5/cq.h>
#include <dev/mlx5/qp.h>
#include <dev/mlx5/srq.h>
#include <dev/mlx5/mpfs.h>
#include <linux/delay.h>
#include <dev/mlx5/mlx5_ifc.h>
#include <dev/mlx5/mlx5_fpga/core.h>
@ -1130,10 +1131,16 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto err_free_comp_eqs;
}
err = mlx5_mpfs_init(dev);
if (err) {
mlx5_core_err(dev, "mpfs init failed %d\n", err);
goto err_fs;
}
err = mlx5_fpga_device_start(dev);
if (err) {
dev_err(&pdev->dev, "fpga device start failed %d\n", err);
goto err_fs;
goto err_mpfs;
}
err = mlx5_register_device(dev);
@ -1151,6 +1158,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
err_fpga:
mlx5_fpga_device_stop(dev);
err_mpfs:
mlx5_mpfs_destroy(dev);
err_fs:
mlx5_cleanup_fs(dev);
@ -1216,6 +1226,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_unregister_device(dev);
mlx5_fpga_device_stop(dev);
mlx5_mpfs_destroy(dev);
mlx5_cleanup_fs(dev);
unmap_bf_area(dev);
mlx5_wait_for_reclaim_vfs_pages(dev);

View File

@ -0,0 +1,125 @@
/*-
* Copyright (c) 2019, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <linux/types.h>
#include <linux/etherdevice.h>
#include <dev/mlx5/mlx5_ifc.h>
#include <dev/mlx5/device.h>
#include <dev/mlx5/mpfs.h>
#include <dev/mlx5/driver.h>
#define MPFS_LOCK(dev) spin_lock(&(dev)->mpfs.spinlock)
#define MPFS_UNLOCK(dev) spin_unlock(&(dev)->mpfs.spinlock)
int
mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u32 *p_index, const u8 *mac)
{
const u32 l2table_size = MIN(1U << MLX5_CAP_GEN(dev, log_max_l2_table),
MLX5_MPFS_TABLE_MAX);
u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {};
u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {};
u8 *in_mac_addr;
u32 index;
int err;
if (!MLX5_CAP_GEN(dev, eswitch_flow_table)) {
*p_index = 0;
return (0);
}
MPFS_LOCK(dev);
index = find_first_zero_bit(dev->mpfs.bitmap, l2table_size);
if (index < l2table_size)
set_bit(index, dev->mpfs.bitmap);
MPFS_UNLOCK(dev);
if (index >= l2table_size)
return (-ENOMEM);
MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
MLX5_SET(set_l2_table_entry_in, in, table_index, index);
in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
ether_addr_copy(&in_mac_addr[2], mac);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err != 0) {
MPFS_LOCK(dev);
clear_bit(index, dev->mpfs.bitmap);
MPFS_UNLOCK(dev);
} else {
*p_index = index;
}
return (err);
}
int
mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u32 index)
{
u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {};
u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {};
int err;
if (!MLX5_CAP_GEN(dev, eswitch_flow_table)) {
if (index != 0)
return (-EINVAL);
return (0);
}
MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err == 0) {
MPFS_LOCK(dev);
clear_bit(index, dev->mpfs.bitmap);
MPFS_UNLOCK(dev);
}
return (err);
}
int
mlx5_mpfs_init(struct mlx5_core_dev *dev)
{
spin_lock_init(&dev->mpfs.spinlock);
bitmap_zero(dev->mpfs.bitmap, MLX5_MPFS_TABLE_MAX);
return (0);
}
void
mlx5_mpfs_destroy(struct mlx5_core_dev *dev)
{
u32 num;
num = bitmap_weight(dev->mpfs.bitmap, MLX5_MPFS_TABLE_MAX);
if (num != 0)
dev_err(&dev->pdev->dev, "Leaking %u MPFS MAC table entries\n", num);
spin_lock_destroy(&dev->mpfs.spinlock);
}

View File

@ -29,6 +29,7 @@
#include <linux/list.h>
#include <dev/mlx5/fs.h>
#include <dev/mlx5/mpfs.h>
#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
@ -54,6 +55,7 @@ enum {
struct mlx5e_eth_addr_hash_node {
LIST_ENTRY(mlx5e_eth_addr_hash_node) hlist;
u8 action;
u32 mpfs_index;
struct mlx5e_eth_addr_info ai;
};
@ -63,29 +65,23 @@ mlx5e_hash_eth_addr(const u8 * addr)
return (addr[5]);
}
static void
static bool
mlx5e_add_eth_addr_to_hash(struct mlx5e_eth_addr_hash_head *hash,
const u8 * addr)
struct mlx5e_eth_addr_hash_node *hn_new)
{
struct mlx5e_eth_addr_hash_node *hn;
int ix = mlx5e_hash_eth_addr(addr);
u32 ix = mlx5e_hash_eth_addr(hn_new->ai.addr);
LIST_FOREACH(hn, &hash[ix], hlist) {
if (bcmp(hn->ai.addr, addr, ETHER_ADDR_LEN) == 0) {
if (bcmp(hn->ai.addr, hn_new->ai.addr, ETHER_ADDR_LEN) == 0) {
if (hn->action == MLX5E_ACTION_DEL)
hn->action = MLX5E_ACTION_NONE;
return;
free(hn_new, M_MLX5EN);
return (false);
}
}
hn = malloc(sizeof(*hn), M_MLX5EN, M_NOWAIT | M_ZERO);
if (hn == NULL)
return;
ether_addr_copy(hn->ai.addr, addr);
hn->action = MLX5E_ACTION_ADD;
LIST_INSERT_HEAD(&hash[ix], hn, hlist);
LIST_INSERT_HEAD(&hash[ix], hn_new, hlist);
return (true);
}
static void
@ -757,6 +753,8 @@ mlx5e_execute_action(struct mlx5e_priv *priv,
case MLX5E_ACTION_DEL:
mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai);
if (hn->mpfs_index != -1U)
mlx5_mpfs_del_mac(priv->mdev, hn->mpfs_index);
mlx5e_del_eth_addr_from_hash(hn);
break;
@ -765,25 +763,57 @@ mlx5e_execute_action(struct mlx5e_priv *priv,
}
}
static struct mlx5e_eth_addr_hash_node *
mlx5e_move_hn(struct mlx5e_eth_addr_hash_head *fh, struct mlx5e_eth_addr_hash_head *uh)
{
struct mlx5e_eth_addr_hash_node *hn;
hn = LIST_FIRST(fh);
if (hn != NULL) {
LIST_REMOVE(hn, hlist);
LIST_INSERT_HEAD(uh, hn, hlist);
}
return (hn);
}
static struct mlx5e_eth_addr_hash_node *
mlx5e_remove_hn(struct mlx5e_eth_addr_hash_head *fh)
{
struct mlx5e_eth_addr_hash_node *hn;
hn = LIST_FIRST(fh);
if (hn != NULL)
LIST_REMOVE(hn, hlist);
return (hn);
}
static void
mlx5e_sync_ifp_addr(struct mlx5e_priv *priv)
{
struct mlx5e_eth_addr_hash_head head_free;
struct mlx5e_eth_addr_hash_head head_uc;
struct mlx5e_eth_addr_hash_head head_mc;
struct mlx5e_eth_addr_hash_node *hn;
struct ifnet *ifp = priv->ifp;
struct ifaddr *ifa;
struct ifmultiaddr *ifma;
bool success = false;
size_t x;
size_t num;
PRIV_ASSERT_LOCKED(priv);
/* XXX adding this entry might not be needed */
mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_uc,
LLADDR((struct sockaddr_dl *)(ifp->if_addr->ifa_addr)));
LIST_INIT(&head_free);
LIST_INIT(&head_uc);
LIST_INIT(&head_mc);
retry:
num = 1;
if_addr_rlock(ifp);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_LINK)
continue;
mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_uc,
LLADDR((struct sockaddr_dl *)ifa->ifa_addr));
num++;
}
if_addr_runlock(ifp);
@ -791,10 +821,81 @@ mlx5e_sync_ifp_addr(struct mlx5e_priv *priv)
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_mc,
num++;
}
if_maddr_runlock(ifp);
/* allocate place holders */
for (x = 0; x != num; x++) {
hn = malloc(sizeof(*hn), M_MLX5EN, M_WAITOK | M_ZERO);
hn->action = MLX5E_ACTION_ADD;
hn->mpfs_index = -1U;
LIST_INSERT_HEAD(&head_free, hn, hlist);
}
hn = mlx5e_move_hn(&head_free, &head_uc);
if (hn == NULL)
goto cleanup;
ether_addr_copy(hn->ai.addr,
LLADDR((struct sockaddr_dl *)(ifp->if_addr->ifa_addr)));
if_addr_rlock(ifp);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_LINK)
continue;
hn = mlx5e_move_hn(&head_free, &head_uc);
if (hn == NULL)
break;
ether_addr_copy(hn->ai.addr,
LLADDR((struct sockaddr_dl *)ifa->ifa_addr));
}
if_addr_runlock(ifp);
if (ifa != NULL)
goto cleanup;
if_maddr_rlock(ifp);
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
hn = mlx5e_move_hn(&head_free, &head_mc);
if (hn == NULL)
break;
ether_addr_copy(hn->ai.addr,
LLADDR((struct sockaddr_dl *)ifma->ifma_addr));
}
if_maddr_runlock(ifp);
if (ifma != NULL)
goto cleanup;
/* insert L2 unicast addresses into hash list */
while ((hn = mlx5e_remove_hn(&head_uc)) != NULL) {
if (mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_uc, hn) == 0)
continue;
if (hn->mpfs_index == -1U)
mlx5_mpfs_add_mac(priv->mdev, &hn->mpfs_index, hn->ai.addr);
}
/* insert L2 multicast addresses into hash list */
while ((hn = mlx5e_remove_hn(&head_mc)) != NULL) {
if (mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_mc, hn) == 0)
continue;
}
success = true;
cleanup:
while ((hn = mlx5e_remove_hn(&head_uc)) != NULL)
free(hn, M_MLX5EN);
while ((hn = mlx5e_remove_hn(&head_mc)) != NULL)
free(hn, M_MLX5EN);
while ((hn = mlx5e_remove_hn(&head_free)) != NULL)
free(hn, M_MLX5EN);
if (success == false)
goto retry;
}
static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type,
@ -1493,6 +1594,8 @@ mlx5e_open_flow_table(struct mlx5e_priv *priv)
void
mlx5e_close_flow_table(struct mlx5e_priv *priv)
{
mlx5e_handle_ifp_addr(priv);
mlx5e_destroy_inner_rss_flow_table(priv);
mlx5e_destroy_main_flow_table(priv);
mlx5e_destroy_vlan_flow_table(priv);

37
sys/dev/mlx5/mpfs.h Normal file
View File

@ -0,0 +1,37 @@
/*-
* Copyright (c) 2019, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _MLX5_MPFS_H_
#define _MLX5_MPFS_H_
struct mlx5_core_dev;
int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u32 *p_index, const u8 *mac);
int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u32 index);
int mlx5_mpfs_init(struct mlx5_core_dev *dev);
void mlx5_mpfs_destroy(struct mlx5_core_dev *dev);
#endif /* _MLX5_MPFS_H_ */

View File

@ -19,6 +19,7 @@ mlx5_health.c \
mlx5_mad.c \
mlx5_main.c \
mlx5_mcg.c \
mlx5_mpfs.c \
mlx5_mr.c \
mlx5_pagealloc.c \
mlx5_pd.c \