ice(4): Add RDMA Client Interface

This allows the "irdma" driver to communicate with the ice(4)
driver to allow it access to the underlying device's hardware
resources as well as synchronize access to shared resources.

This interface already existed in the standalone out-of-tree
1.34.2 driver; this commit adds and enables it in the in-kernel
driver.

Note:

Adds hack to module Makefile to compile interface/.m files

These are required for the RDMA client interface, but they don't
build as-is like the normal .c files. The source directory doesn't
seem to be included by default, so add lines that specifically
add them as libraries so that ice_rdma.h can be found and the
interface files will compile.

Signed-off-by: Eric Joyner <erj@FreeBSD.org>

MFC after:	1 week
Sponsored by:	Intel Corporation
Differential Revision:	https://reviews.freebsd.org/D30889
This commit is contained in:
Eric Joyner 2021-12-01 16:50:06 -08:00
parent f16e38162c
commit 8a13362d49
No known key found for this signature in database
GPG Key ID: 96F0C6FD61E05DE3
22 changed files with 2050 additions and 8 deletions

View File

@ -173,6 +173,12 @@ dev/ice/ice_fw_logging.c optional ice pci \
compile-with "${NORMAL_C} -I$S/dev/ice"
dev/ice/ice_fwlog.c optional ice pci \
compile-with "${NORMAL_C} -I$S/dev/ice"
dev/ice/ice_rdma.c optional ice pci \
compile-with "${NORMAL_C} -I$S/dev/ice"
dev/ice/irdma_if.m optional ice pci \
compile-with "${NORMAL_M} -I$S/dev/ice"
dev/ice/irdma_di_if.m optional ice pci \
compile-with "${NORMAL_M} -I$S/dev/ice"
ice_ddp.c optional ice_ddp \
compile-with "${AWK} -f $S/tools/fw_stub.awk ice_ddp.fw:ice_ddp:0x01031b00 -mice_ddp -c${.TARGET}" \
no-ctfconvert no-implicit-rule before-depend local \

View File

@ -232,6 +232,12 @@ dev/ice/ice_fw_logging.c optional ice pci \
compile-with "${NORMAL_C} -I$S/dev/ice"
dev/ice/ice_fwlog.c optional ice pci \
compile-with "${NORMAL_C} -I$S/dev/ice"
dev/ice/ice_rdma.c optional ice pci \
compile-with "${NORMAL_C} -I$S/dev/ice"
dev/ice/irdma_if.m optional ice pci \
compile-with "${NORMAL_M} -I$S/dev/ice"
dev/ice/irdma_di_if.m optional ice pci \
compile-with "${NORMAL_M} -I$S/dev/ice"
ice_ddp.c optional ice_ddp \
compile-with "${AWK} -f $S/tools/fw_stub.awk ice_ddp.fw:ice_ddp:0x01031b00 -mice_ddp -c${.TARGET}" \
no-ctfconvert no-implicit-rule before-depend local \

View File

@ -76,6 +76,12 @@ dev/ice/ice_fw_logging.c optional ice pci powerpc64 \
compile-with "${NORMAL_C} -I$S/dev/ice"
dev/ice/ice_fwlog.c optional ice pci powerpc64 \
compile-with "${NORMAL_C} -I$S/dev/ice"
dev/ice/ice_rdma.c optional ice pci powerpc64 \
compile-with "${NORMAL_C} -I$S/dev/ice"
dev/ice/irdma_if.m optional ice pci powerpc64 \
compile-with "${NORMAL_M} -I$S/dev/ice"
dev/ice/irdma_di_if.m optional ice pci powerpc64 \
compile-with "${NORMAL_M} -I$S/dev/ice"
ice_ddp.c optional ice_ddp powerpc64 \
compile-with "${AWK} -f $S/tools/fw_stub.awk ice_ddp.fw:ice_ddp:0x01031b00 -mice_ddp -c${.TARGET}" \
no-ctfconvert no-implicit-rule before-depend local \

View File

@ -2547,6 +2547,57 @@ struct ice_aqc_move_txqs_data {
struct ice_aqc_move_txqs_elem txqs[STRUCT_HACK_VAR_LEN];
};
/* Add Tx RDMA Queue Set (indirect 0x0C33) */
struct ice_aqc_add_rdma_qset {
u8 num_qset_grps;
u8 reserved[7];
__le32 addr_high;
__le32 addr_low;
};
/* This is the descriptor of each qset entry for the Add Tx RDMA Queue Set
* command (0x0C33). Only used within struct ice_aqc_add_rdma_qset.
*/
struct ice_aqc_add_tx_rdma_qset_entry {
__le16 tx_qset_id;
u8 rsvd[2];
__le32 qset_teid;
struct ice_aqc_txsched_elem info;
};
/* The format of the command buffer for Add Tx RDMA Queue Set(0x0C33)
* is an array of the following structs. Please note that the length of
* each struct ice_aqc_add_rdma_qset is variable due to the variable
* number of queues in each group!
*/
struct ice_aqc_add_rdma_qset_data {
__le32 parent_teid;
__le16 num_qsets;
u8 rsvd[2];
struct ice_aqc_add_tx_rdma_qset_entry rdma_qsets[STRUCT_HACK_VAR_LEN];
};
/* Move RDMA Queue Set (indirect 0x0C34) */
struct ice_aqc_move_rdma_qset_cmd {
u8 num_rdma_qset; /* Used by commands and response */
u8 flags;
u8 reserved[6];
__le32 addr_high;
__le32 addr_low;
};
/* Buffer */
struct ice_aqc_move_rdma_qset_buffer_desc {
__le16 tx_qset_id;
__le16 qset_teid;
};
struct ice_aqc_move_rdma_qset_buffer {
__le32 src_parent_teid;
__le32 dest_parent_teid;
struct ice_aqc_move_rdma_qset_buffer_desc descs[STRUCT_HACK_VAR_LEN];
};
/* Download Package (indirect 0x0C40) */
/* Also used for Update Package (indirect 0x0C42 and 0x0C41) */
struct ice_aqc_download_pkg {
@ -2897,6 +2948,7 @@ struct ice_aq_desc {
struct ice_aqc_add_txqs add_txqs;
struct ice_aqc_dis_txqs dis_txqs;
struct ice_aqc_move_txqs move_txqs;
struct ice_aqc_add_rdma_qset add_rdma_qset;
struct ice_aqc_txqs_cleanup txqs_cleanup;
struct ice_aqc_add_get_update_free_vsi vsi_cmd;
struct ice_aqc_add_update_free_vsi_resp add_update_free_vsi_res;
@ -3156,6 +3208,8 @@ enum ice_adminq_opc {
ice_aqc_opc_dis_txqs = 0x0C31,
ice_aqc_opc_txqs_cleanup = 0x0C31,
ice_aqc_opc_move_recfg_txqs = 0x0C32,
ice_aqc_opc_add_rdma_qset = 0x0C33,
ice_aqc_opc_move_rdma_qset = 0x0C34,
/* package commands */
ice_aqc_opc_download_pkg = 0x0C40,

View File

@ -1198,7 +1198,8 @@ enum ice_status ice_check_reset(struct ice_hw *hw)
GLNVM_ULD_POR_DONE_1_M |\
GLNVM_ULD_PCIER_DONE_2_M)
uld_mask = ICE_RESET_DONE_MASK;
uld_mask = ICE_RESET_DONE_MASK | (hw->func_caps.common_cap.iwarp ?
GLNVM_ULD_PE_DONE_M : 0);
/* Device is Active; check Global Reset processes are done */
for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
@ -2364,6 +2365,10 @@ ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps,
ice_debug(hw, ICE_DBG_INIT, "%s: mgmt_cem = %d\n", prefix,
caps->mgmt_cem);
break;
case ICE_AQC_CAPS_IWARP:
caps->iwarp = (number == 1);
ice_debug(hw, ICE_DBG_INIT, "%s: iwarp = %d\n", prefix, caps->iwarp);
break;
case ICE_AQC_CAPS_LED:
if (phys_id < ICE_MAX_SUPPORTED_GPIO_LED) {
caps->led[phys_id] = true;
@ -2481,6 +2486,16 @@ ice_recalc_port_limited_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps)
caps->maxtc = 4;
ice_debug(hw, ICE_DBG_INIT, "reducing maxtc to %d (based on #ports)\n",
caps->maxtc);
if (caps->iwarp) {
ice_debug(hw, ICE_DBG_INIT, "forcing RDMA off\n");
caps->iwarp = 0;
}
/* print message only when processing device capabilities
* during initialization.
*/
if (caps == &hw->dev_caps.common_cap)
ice_info(hw, "RDMA functionality is not available with the current device configuration.\n");
}
}
@ -4338,6 +4353,56 @@ ice_aq_move_recfg_lan_txq(struct ice_hw *hw, u8 num_qs, bool is_move,
return status;
}
/**
* ice_aq_add_rdma_qsets
* @hw: pointer to the hardware structure
* @num_qset_grps: Number of RDMA Qset groups
* @qset_list: list of qset groups to be added
* @buf_size: size of buffer for indirect command
* @cd: pointer to command details structure or NULL
*
* Add Tx RDMA Qsets (0x0C33)
*/
enum ice_status
ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps,
struct ice_aqc_add_rdma_qset_data *qset_list,
u16 buf_size, struct ice_sq_cd *cd)
{
struct ice_aqc_add_rdma_qset_data *list;
struct ice_aqc_add_rdma_qset *cmd;
struct ice_aq_desc desc;
u16 i, sum_size = 0;
ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__);
cmd = &desc.params.add_rdma_qset;
ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_rdma_qset);
if (!qset_list)
return ICE_ERR_PARAM;
if (num_qset_grps > ICE_LAN_TXQ_MAX_QGRPS)
return ICE_ERR_PARAM;
for (i = 0, list = qset_list; i < num_qset_grps; i++) {
u16 num_qsets = LE16_TO_CPU(list->num_qsets);
sum_size += ice_struct_size(list, rdma_qsets, num_qsets);
list = (struct ice_aqc_add_rdma_qset_data *)(list->rdma_qsets +
num_qsets);
}
if (buf_size != sum_size)
return ICE_ERR_PARAM;
desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD);
cmd->num_qset_grps = num_qset_grps;
return ice_aq_send_cmd(hw, &desc, qset_list, buf_size, cd);
}
/* End of FW Admin Queue command wrappers */
/**
@ -5100,6 +5165,158 @@ ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap,
ICE_SCHED_NODE_OWNER_LAN);
}
/**
* ice_cfg_vsi_rdma - configure the VSI RDMA queues
* @pi: port information structure
* @vsi_handle: software VSI handle
* @tc_bitmap: TC bitmap
* @max_rdmaqs: max RDMA queues array per TC
*
* This function adds/updates the VSI RDMA queues per TC.
*/
enum ice_status
ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap,
u16 *max_rdmaqs)
{
return ice_cfg_vsi_qs(pi, vsi_handle, tc_bitmap, max_rdmaqs,
ICE_SCHED_NODE_OWNER_RDMA);
}
/**
* ice_ena_vsi_rdma_qset
* @pi: port information structure
* @vsi_handle: software VSI handle
* @tc: TC number
* @rdma_qset: pointer to RDMA qset
* @num_qsets: number of RDMA qsets
* @qset_teid: pointer to qset node teids
*
* This function adds RDMA qset
*/
enum ice_status
ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
u16 *rdma_qset, u16 num_qsets, u32 *qset_teid)
{
struct ice_aqc_txsched_elem_data node = { 0 };
struct ice_aqc_add_rdma_qset_data *buf;
struct ice_sched_node *parent;
enum ice_status status;
struct ice_hw *hw;
u16 i, buf_size;
if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
return ICE_ERR_CFG;
hw = pi->hw;
if (!ice_is_vsi_valid(hw, vsi_handle))
return ICE_ERR_PARAM;
buf_size = ice_struct_size(buf, rdma_qsets, num_qsets);
buf = (struct ice_aqc_add_rdma_qset_data *)ice_malloc(hw, buf_size);
if (!buf)
return ICE_ERR_NO_MEMORY;
ice_acquire_lock(&pi->sched_lock);
parent = ice_sched_get_free_qparent(pi, vsi_handle, tc,
ICE_SCHED_NODE_OWNER_RDMA);
if (!parent) {
status = ICE_ERR_PARAM;
goto rdma_error_exit;
}
buf->parent_teid = parent->info.node_teid;
node.parent_teid = parent->info.node_teid;
buf->num_qsets = CPU_TO_LE16(num_qsets);
for (i = 0; i < num_qsets; i++) {
buf->rdma_qsets[i].tx_qset_id = CPU_TO_LE16(rdma_qset[i]);
buf->rdma_qsets[i].info.valid_sections =
ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
ICE_AQC_ELEM_VALID_EIR;
buf->rdma_qsets[i].info.generic = 0;
buf->rdma_qsets[i].info.cir_bw.bw_profile_idx =
CPU_TO_LE16(ICE_SCHED_DFLT_RL_PROF_ID);
buf->rdma_qsets[i].info.cir_bw.bw_alloc =
CPU_TO_LE16(ICE_SCHED_DFLT_BW_WT);
buf->rdma_qsets[i].info.eir_bw.bw_profile_idx =
CPU_TO_LE16(ICE_SCHED_DFLT_RL_PROF_ID);
buf->rdma_qsets[i].info.eir_bw.bw_alloc =
CPU_TO_LE16(ICE_SCHED_DFLT_BW_WT);
}
status = ice_aq_add_rdma_qsets(hw, 1, buf, buf_size, NULL);
if (status != ICE_SUCCESS) {
ice_debug(hw, ICE_DBG_RDMA, "add RDMA qset failed\n");
goto rdma_error_exit;
}
node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
for (i = 0; i < num_qsets; i++) {
node.node_teid = buf->rdma_qsets[i].qset_teid;
status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1,
&node);
if (status)
break;
qset_teid[i] = LE32_TO_CPU(node.node_teid);
}
rdma_error_exit:
ice_release_lock(&pi->sched_lock);
ice_free(hw, buf);
return status;
}
/**
* ice_dis_vsi_rdma_qset - free RDMA resources
* @pi: port_info struct
* @count: number of RDMA qsets to free
* @qset_teid: TEID of qset node
* @q_id: list of queue IDs being disabled
*/
enum ice_status
ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid,
u16 *q_id)
{
struct ice_aqc_dis_txq_item *qg_list;
enum ice_status status = ICE_SUCCESS;
struct ice_hw *hw;
u16 qg_size;
int i;
if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
return ICE_ERR_CFG;
hw = pi->hw;
qg_size = ice_struct_size(qg_list, q_id, 1);
qg_list = (struct ice_aqc_dis_txq_item *)ice_malloc(hw, qg_size);
if (!qg_list)
return ICE_ERR_NO_MEMORY;
ice_acquire_lock(&pi->sched_lock);
for (i = 0; i < count; i++) {
struct ice_sched_node *node;
node = ice_sched_find_node_by_teid(pi->root, qset_teid[i]);
if (!node)
continue;
qg_list->parent_teid = node->info.parent_teid;
qg_list->num_qs = 1;
qg_list->q_id[0] =
CPU_TO_LE16(q_id[i] |
ICE_AQC_Q_DIS_BUF_ELEM_TYPE_RDMA_QSET);
status = ice_aq_dis_lan_txq(hw, 1, qg_list, qg_size,
ICE_NO_RESET, 0, NULL);
if (status)
break;
ice_free_sched_node(pi, node);
}
ice_release_lock(&pi->sched_lock);
ice_free(hw, qg_list);
return status;
}
/**
* ice_is_main_vsi - checks whether the VSI is main VSI
* @hw: pointer to the HW struct

View File

@ -147,6 +147,11 @@ ice_aq_move_recfg_lan_txq(struct ice_hw *hw, u8 num_qs, bool is_move,
struct ice_aqc_move_txqs_data *buf, u16 buf_size,
u8 *txqs_moved, struct ice_sq_cd *cd);
enum ice_status
ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps,
struct ice_aqc_add_rdma_qset_data *qset_list,
u16 buf_size, struct ice_sq_cd *cd);
bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
@ -257,6 +262,15 @@ __ice_write_sr_word(struct ice_hw *hw, u32 offset, const u16 *data);
enum ice_status
__ice_write_sr_buf(struct ice_hw *hw, u32 offset, u16 words, const u16 *data);
enum ice_status
ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap,
u16 *max_rdmaqs);
enum ice_status
ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
u16 *rdma_qset, u16 num_qsets, u32 *qset_teid);
enum ice_status
ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid,
u16 *q_id);
enum ice_status
ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
u16 *q_handle, u16 *q_ids, u32 *q_teids,
enum ice_disq_rst_src rst_src, u16 vmvf_num,

View File

@ -45,6 +45,15 @@
#include <sys/sysctl.h>
/**
* @var ice_enable_irdma
* @brief boolean indicating if the iRDMA client interface is enabled
*
* Global sysctl variable indicating whether the RDMA client interface feature
* is enabled.
*/
bool ice_enable_irdma = true;
/**
* @var ice_enable_tx_fc_filter
* @brief boolean indicating if the Tx Flow Control filter should be enabled
@ -85,6 +94,15 @@ bool ice_enable_tx_lldp_filter = true;
*/
bool ice_enable_health_events = true;
/**
* @var ice_rdma_max_msix
* @brief maximum number of MSI-X vectors to reserve for RDMA interface
*
* Global sysctl variable indicating the maximum number of MSI-X vectors to
* reserve for a single RDMA interface.
*/
static uint16_t ice_rdma_max_msix = ICE_RDMA_MAX_MSIX;
/* sysctls marked as tunable, (i.e. with the CTLFLAG_TUN set) will
* automatically load tunable values, without the need to manually create the
* TUNABLE definition.
@ -105,6 +123,12 @@ SYSCTL_BOOL(_hw_ice, OID_AUTO, enable_health_events, CTLFLAG_RDTUN,
&ice_enable_health_events, 0,
"Enable FW health event reporting globally");
SYSCTL_BOOL(_hw_ice, OID_AUTO, irdma, CTLFLAG_RDTUN, &ice_enable_irdma, 0,
"Enable iRDMA client interface");
SYSCTL_U16(_hw_ice, OID_AUTO, rdma_max_msix, CTLFLAG_RDTUN, &ice_rdma_max_msix,
0, "Maximum number of MSI-X vectors to reserve per RDMA interface");
SYSCTL_BOOL(_hw_ice_debug, OID_AUTO, enable_tx_fc_filter, CTLFLAG_RDTUN,
&ice_enable_tx_fc_filter, 0,
"Drop Ethertype 0x8808 control frames originating from non-HW sources");

View File

@ -236,6 +236,11 @@ struct ice_softc {
struct mtx admin_mtx; /* mutex to protect the admin timer */
struct callout admin_timer; /* timer to trigger admin task */
/* iRDMA peer interface */
struct ice_rdma_entry rdma_entry;
int irdma_vectors;
u16 *rdma_imap;
struct ice_vsi **all_vsi; /* Array of VSI pointers */
u16 num_available_vsi; /* Size of VSI array */

View File

@ -3984,6 +3984,11 @@ ice_config_pfc(struct ice_softc *sc, u8 new_mode)
local_dcbx_cfg->pfc.willing = 0;
local_dcbx_cfg->pfc.mbc = 0;
/* Warn if PFC is being disabled with RoCE v2 in use */
if (new_mode == 0 && sc->rdma_entry.attached)
device_printf(dev,
"WARNING: Recommended that Priority Flow Control is enabled when RoCEv2 is in use\n");
status = ice_set_dcb_cfg(pi);
if (status) {
device_printf(dev,
@ -7800,6 +7805,8 @@ ice_do_dcb_reconfig(struct ice_softc *sc)
pi = sc->hw.port_info;
local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
ice_rdma_notify_dcb_qos_change(sc);
/* Set state when there's more than one TC */
tc_map = ice_dcb_get_tc_map(local_dcbx_cfg);
if (ice_dcb_num_tc(tc_map) > 1) {
@ -7826,6 +7833,9 @@ ice_do_dcb_reconfig(struct ice_softc *sc)
/* Change PF VSI configuration */
ice_dcb_recfg(sc);
/* Send new configuration to RDMA client driver */
ice_rdma_dcb_qos_update(sc, pi);
ice_request_stack_reinit(sc);
}
@ -8663,6 +8673,7 @@ ice_init_saved_phy_cfg(struct ice_softc *sc)
static int
ice_module_init(void)
{
ice_rdma_init();
return (0);
}
@ -8679,6 +8690,7 @@ ice_module_init(void)
static int
ice_module_exit(void)
{
ice_rdma_exit();
return (0);
}
@ -9029,8 +9041,17 @@ ice_alloc_intr_tracking(struct ice_softc *sc)
err = ENOMEM;
goto free_imgr;
}
if (!(sc->rdma_imap =
(u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
M_ICE, M_NOWAIT))) {
device_printf(dev, "Unable to allocate RDMA imap memory\n");
err = ENOMEM;
free(sc->pf_imap, M_ICE);
goto free_imgr;
}
for (u32 i = 0; i < hw->func_caps.common_cap.num_msix_vectors; i++) {
sc->pf_imap[i] = ICE_INVALID_RES_IDX;
sc->rdma_imap[i] = ICE_INVALID_RES_IDX;
}
return (0);
@ -9058,6 +9079,12 @@ ice_free_intr_tracking(struct ice_softc *sc)
free(sc->pf_imap, M_ICE);
sc->pf_imap = NULL;
}
if (sc->rdma_imap) {
ice_resmgr_release_map(&sc->imgr, sc->rdma_imap,
sc->lan_vectors);
free(sc->rdma_imap, M_ICE);
sc->rdma_imap = NULL;
}
ice_resmgr_destroy(&sc->imgr);
}

View File

@ -65,6 +65,8 @@
#include "ice_sched.h"
#include "ice_resmgr.h"
#include "ice_rdma_internal.h"
#include "ice_rss.h"
/* Hide debug sysctls unless INVARIANTS is enabled */

859
sys/dev/ice/ice_rdma.c Normal file
View File

@ -0,0 +1,859 @@
/* SPDX-License-Identifier: BSD-3-Clause */
/* Copyright (c) 2022, Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*$FreeBSD$*/
/**
* @file ice_rdma.c
* @brief RDMA client driver interface
*
* Functions to interface with the RDMA client driver, for enabling RMDA
* functionality for the ice driver.
*
* The RDMA client interface is based on a simple kobject interface which is
* defined by the rmda_if.m and irdma_di_if.m interfaces.
*
* The ice device driver provides the rmda_di_if.m interface methods, while
* the client RDMA driver provides the irdma_if.m interface methods as an
* extension ontop of the irdma_di_if kobject.
*
* The initial connection between drivers is done via the RDMA client driver
* calling ice_rdma_register.
*/
#include "ice_iflib.h"
#include "ice_rdma_internal.h"
#include "irdma_if.h"
#include "irdma_di_if.h"
/**
* @var ice_rdma
* @brief global RDMA driver state
*
* Contains global state the driver uses to connect to a client RDMA interface
* driver.
*/
static struct ice_rdma_state ice_rdma;
/*
* Helper function prototypes
*/
static int ice_rdma_pf_attach_locked(struct ice_softc *sc);
static void ice_rdma_pf_detach_locked(struct ice_softc *sc);
static int ice_rdma_check_version(struct ice_rdma_info *info);
static void ice_rdma_cp_qos_info(struct ice_hw *hw,
struct ice_dcbx_cfg *dcbx_cfg,
struct ice_qos_params *qos_info);
/*
* RDMA Device Interface prototypes
*/
static int ice_rdma_pf_reset(struct ice_rdma_peer *peer);
static int ice_rdma_pf_msix_init(struct ice_rdma_peer *peer,
struct ice_rdma_msix_mapping *msix_info);
static int ice_rdma_qset_register_request(struct ice_rdma_peer *peer,
struct ice_rdma_qset_update *res);
static int ice_rdma_update_vsi_filter(struct ice_rdma_peer *peer_dev,
bool enable);
static void ice_rdma_request_handler(struct ice_rdma_peer *peer,
struct ice_rdma_request *req);
/**
* @var ice_rdma_di_methods
* @brief RDMA driver interface methods
*
* Kobject methods implementing the driver-side interface for the RDMA peer
* clients. This method table contains the operations which the client can
* request from the driver.
*
* The client driver will then extend this kobject class with methods that the
* driver can request from the client.
*/
static kobj_method_t ice_rdma_di_methods[] = {
KOBJMETHOD(irdma_di_reset, ice_rdma_pf_reset),
KOBJMETHOD(irdma_di_msix_init, ice_rdma_pf_msix_init),
KOBJMETHOD(irdma_di_qset_register_request, ice_rdma_qset_register_request),
KOBJMETHOD(irdma_di_vsi_filter_update, ice_rdma_update_vsi_filter),
KOBJMETHOD(irdma_di_req_handler, ice_rdma_request_handler),
KOBJMETHOD_END
};
/* Define ice_rdma_di class which will be extended by the iRDMA driver */
DEFINE_CLASS_0(ice_rdma_di, ice_rdma_di_class, ice_rdma_di_methods, sizeof(struct ice_rdma_peer));
/**
* ice_rdma_pf_reset - RDMA client interface requested a reset
* @peer: the RDMA peer client structure
*
* Implements IRDMA_DI_RESET, called by the RDMA client driver to request
* a reset of an ice driver device.
*/
static int
ice_rdma_pf_reset(struct ice_rdma_peer *peer)
{
struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
/*
* Request that the driver re-initialize by bringing the interface
* down and up.
*/
ice_request_stack_reinit(sc);
return (0);
}
/**
* ice_rdma_pf_msix_init - RDMA client interface request MSI-X initialization
* @peer: the RDMA peer client structure
* @msix_info: requested MSI-X mapping
*
* Implements IRDMA_DI_MSIX_INIT, called by the RDMA client driver to
* initialize the MSI-X resources required for RDMA functionality.
*/
static int
ice_rdma_pf_msix_init(struct ice_rdma_peer *peer,
struct ice_rdma_msix_mapping __unused *msix_info)
{
struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
MPASS(msix_info != NULL);
device_printf(sc->dev, "%s: iRDMA MSI-X initialization request is not yet implemented\n", __func__);
/* TODO: implement MSI-X initialization for RDMA */
return (ENOSYS);
}
/**
* ice_rdma_register_request - RDMA client interface request qset
* registration or unregistration
* @peer: the RDMA peer client structure
* @res: resources to be registered or unregistered
*/
static int
ice_rdma_qset_register_request(struct ice_rdma_peer *peer, struct ice_rdma_qset_update *res)
{
struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
struct ice_vsi *vsi = NULL;
struct ice_dcbx_cfg *dcbx_cfg;
struct ice_hw *hw = &sc->hw;
enum ice_status status;
int count, i, ret = 0;
uint32_t *qset_teid;
uint16_t *qs_handle;
uint16_t max_rdmaqs[ICE_MAX_TRAFFIC_CLASS];
uint16_t vsi_id;
uint8_t ena_tc = 0;
if (!res)
return -EINVAL;
if (res->cnt_req > ICE_MAX_TXQ_PER_TXQG)
return -EINVAL;
switch(res->res_type) {
case ICE_RDMA_QSET_ALLOC:
count = res->cnt_req;
vsi_id = peer->pf_vsi_num;
break;
case ICE_RDMA_QSET_FREE:
count = res->res_allocated;
vsi_id = res->qsets.vsi_id;
break;
default:
return -EINVAL;
}
qset_teid = (uint32_t *)ice_calloc(hw, count, sizeof(*qset_teid));
if (!qset_teid)
return -ENOMEM;
qs_handle = (uint16_t *)ice_calloc(hw, count, sizeof(*qs_handle));
if (!qs_handle) {
ice_free(hw, qset_teid);
return -ENOMEM;
}
ice_for_each_traffic_class(i)
max_rdmaqs[i] = 0;
for (i = 0; i < sc->num_available_vsi; i++) {
if (sc->all_vsi[i] &&
ice_get_hw_vsi_num(hw, sc->all_vsi[i]->idx) == vsi_id) {
vsi = sc->all_vsi[i];
break;
}
}
if (!vsi) {
ice_debug(hw, ICE_DBG_RDMA, "RDMA QSet invalid VSI\n");
ret = -EINVAL;
goto out;
}
if (sc != vsi->sc) {
ice_debug(hw, ICE_DBG_RDMA, "VSI is tied to unexpected device\n");
ret = -EXDEV;
goto out;
}
for (i = 0; i < count; i++) {
struct ice_rdma_qset_params *qset;
qset = &res->qsets;
if (qset->vsi_id != peer->pf_vsi_num) {
ice_debug(hw, ICE_DBG_RDMA, "RDMA QSet invalid VSI requested %d %d\n",
qset->vsi_id, peer->pf_vsi_num);
ret = -EINVAL;
goto out;
}
max_rdmaqs[qset->tc]++;
qs_handle[i] = qset->qs_handle;
qset_teid[i] = qset->teid;
}
switch(res->res_type) {
case ICE_RDMA_QSET_ALLOC:
dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
ena_tc |= BIT(dcbx_cfg->etscfg.prio_table[i]);
}
ice_debug(hw, ICE_DBG_RDMA, "%s:%d ena_tc=%x\n", __func__, __LINE__, ena_tc);
status = ice_cfg_vsi_rdma(hw->port_info, vsi->idx, ena_tc,
max_rdmaqs);
if (status) {
ice_debug(hw, ICE_DBG_RDMA, "Failed VSI RDMA qset config\n");
ret = -EINVAL;
goto out;
}
for (i = 0; i < count; i++) {
struct ice_rdma_qset_params *qset;
qset = &res->qsets;
status = ice_ena_vsi_rdma_qset(hw->port_info, vsi->idx,
qset->tc, &qs_handle[i], 1,
&qset_teid[i]);
if (status) {
ice_debug(hw, ICE_DBG_RDMA, "Failed VSI RDMA qset enable\n");
ret = -EINVAL;
goto out;
}
qset->teid = qset_teid[i];
}
break;
case ICE_RDMA_QSET_FREE:
status = ice_dis_vsi_rdma_qset(hw->port_info, count, qset_teid, qs_handle);
if (status)
ret = -EINVAL;
break;
default:
ret = -EINVAL;
break;
}
out:
ice_free(hw, qs_handle);
ice_free(hw, qset_teid);
return ret;
}
/**
* ice_rdma_update_vsi_filter - configure vsi information
* when opening or closing rdma driver
* @peer: the RDMA peer client structure
* @enable: enable or disable the rdma filter
*/
static int
ice_rdma_update_vsi_filter(struct ice_rdma_peer *peer,
bool enable)
{
struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
struct ice_vsi *vsi;
int ret;
vsi = &sc->pf_vsi;
if (!vsi)
return -EINVAL;
ret = ice_cfg_iwarp_fltr(&sc->hw, vsi->idx, enable);
if (ret) {
device_printf(sc->dev, "Failed to %sable iWARP filtering\n",
enable ? "en" : "dis");
} else {
if (enable)
vsi->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
else
vsi->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
}
return ret;
}
/**
* ice_rdma_request_handler - handle requests incoming from RDMA driver
* @peer: the RDMA peer client structure
* @req: structure containing request
*/
static void
ice_rdma_request_handler(struct ice_rdma_peer *peer,
struct ice_rdma_request *req)
{
if (!req || !peer) {
log(LOG_WARNING, "%s: peer or req are not valid\n", __func__);
return;
}
switch(req->type) {
case ICE_RDMA_EVENT_RESET:
break;
case ICE_RDMA_EVENT_QSET_REGISTER:
ice_rdma_qset_register_request(peer, &req->res);
break;
case ICE_RDMA_EVENT_VSI_FILTER_UPDATE:
ice_rdma_update_vsi_filter(peer, req->enable_filter);
break;
default:
log(LOG_WARNING, "%s: Event %d not supported\n", __func__, req->type);
break;
}
}
/**
* ice_rdma_cp_qos_info - gather current QOS/DCB settings in LAN to pass
* to RDMA driver
* @hw: ice hw structure
* @dcbx_cfg: current DCB settings in ice driver
* @qos_info: destination of the DCB settings
*/
static void
ice_rdma_cp_qos_info(struct ice_hw *hw, struct ice_dcbx_cfg *dcbx_cfg,
struct ice_qos_params *qos_info)
{
u32 up2tc;
u8 j;
u8 num_tc = 0;
u8 val_tc = 0; /* number of TC for validation */
u8 cnt_tc = 0;
/* setup qos_info fields with defaults */
qos_info->num_apps = 0;
qos_info->num_tc = 1;
for (j = 0; j < ICE_TC_MAX_USER_PRIORITY; j++)
qos_info->up2tc[j] = 0;
qos_info->tc_info[0].rel_bw = 100;
for (j = 1; j < IEEE_8021QAZ_MAX_TCS; j++)
qos_info->tc_info[j].rel_bw = 0;
/* gather current values */
up2tc = rd32(hw, PRTDCB_TUP2TC);
qos_info->num_apps = dcbx_cfg->numapps;
for (j = 0; j < ICE_MAX_TRAFFIC_CLASS; j++) {
num_tc |= BIT(dcbx_cfg->etscfg.prio_table[j]);
}
for (j = 0; j < ICE_MAX_TRAFFIC_CLASS; j++) {
if (num_tc & BIT(j)) {
cnt_tc++;
val_tc |= BIT(j);
} else {
break;
}
}
qos_info->num_tc = (val_tc == num_tc && num_tc != 0) ? cnt_tc : 1;
for (j = 0; j < ICE_TC_MAX_USER_PRIORITY; j++)
qos_info->up2tc[j] = (up2tc >> (j * 3)) & 0x7;
for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++)
qos_info->tc_info[j].rel_bw = dcbx_cfg->etscfg.tcbwtable[j];
for (j = 0; j < qos_info->num_apps; j++) {
qos_info->apps[j].priority = dcbx_cfg->app[j].priority;
qos_info->apps[j].prot_id = dcbx_cfg->app[j].prot_id;
qos_info->apps[j].selector = dcbx_cfg->app[j].selector;
}
}
/**
* ice_rdma_check_version - Check that the provided RDMA version is compatible
* @info: the RDMA client information structure
*
* Verify that the client RDMA driver provided a version that is compatible
* with the driver interface.
*/
static int
ice_rdma_check_version(struct ice_rdma_info *info)
{
/* Make sure the MAJOR version matches */
if (info->major_version != ICE_RDMA_MAJOR_VERSION) {
log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports major version %d.x.x\n",
__func__,
info->major_version, info->minor_version, info->patch_version,
ICE_RDMA_MAJOR_VERSION);
return (ENOTSUP);
}
/*
* Make sure that the MINOR version is compatible.
*
* This means that the RDMA client driver version MUST not be greater
* than the version provided by the driver, as it would indicate that
* the RDMA client expects features which are not supported by the
* main driver.
*/
if (info->minor_version > ICE_RDMA_MINOR_VERSION) {
log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports up to minor version %d.%d.x\n",
__func__,
info->major_version, info->minor_version, info->patch_version,
ICE_RDMA_MAJOR_VERSION, ICE_RDMA_MINOR_VERSION);
return (ENOTSUP);
}
/*
* Make sure that the PATCH version is compatible.
*
* This means that the RDMA client version MUST not be greater than
* the version provided by the driver, as it may indicate that the
* RDMA client expects certain backwards compatible bug fixes which
* are not implemented by this version of the main driver.
*/
if ((info->minor_version == ICE_RDMA_MINOR_VERSION) &&
(info->patch_version > ICE_RDMA_PATCH_VERSION)) {
log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports up to patch version %d.%d.%d\n",
__func__,
info->major_version, info->minor_version, info->patch_version,
ICE_RDMA_MAJOR_VERSION, ICE_RDMA_MINOR_VERSION, ICE_RDMA_PATCH_VERSION);
return (ENOTSUP);
}
/* Make sure that the kobject class is initialized */
if (info->rdma_class == NULL) {
log(LOG_WARNING, "%s: the iRDMA driver did not specify a kobject interface\n",
__func__);
return (EINVAL);
}
return (0);
}
/**
* ice_rdma_register - Register an RDMA client driver
* @info: the RDMA client information structure
*
* Called by the RDMA client driver on load. Used to initialize the RDMA
* client driver interface and enable interop between the ice driver and the
* RDMA client driver.
*
* The RDMA client driver must provide the version number it expects, along
* with a pointer to a kobject class that extends the irdma_di_if class, and
* implements the irdma_if class interface.
*/
int
ice_rdma_register(struct ice_rdma_info *info)
{
struct ice_rdma_entry *entry;
int err = 0;
sx_xlock(&ice_rdma.mtx);
if (!ice_enable_irdma) {
log(LOG_INFO, "%s: The iRDMA driver interface has been disabled\n", __func__);
err = (ECONNREFUSED);
goto return_unlock;
}
if (ice_rdma.registered) {
log(LOG_WARNING, "%s: iRDMA driver already registered\n", __func__);
err = (EBUSY);
goto return_unlock;
}
/* Make sure the iRDMA version is compatible */
err = ice_rdma_check_version(info);
if (err)
goto return_unlock;
log(LOG_INFO, "%s: iRDMA driver registered using version %d.%d.%d\n",
__func__, info->major_version, info->minor_version, info->patch_version);
ice_rdma.peer_class = info->rdma_class;
/*
* Initialize the kobject interface and notify the RDMA client of each
* existing PF interface.
*/
LIST_FOREACH(entry, &ice_rdma.peers, node) {
kobj_init((kobj_t)&entry->peer, ice_rdma.peer_class);
IRDMA_PROBE(&entry->peer);
if (entry->initiated)
IRDMA_OPEN(&entry->peer);
}
ice_rdma.registered = true;
return_unlock:
sx_xunlock(&ice_rdma.mtx);
return (err);
}
/**
* ice_rdma_unregister - Unregister an RDMA client driver
*
* Called by the RDMA client driver on unload. Used to de-initialize the RDMA
* client driver interface and shut down communication between the ice driver
* and the RDMA client driver.
*/
int
ice_rdma_unregister(void)
{
struct ice_rdma_entry *entry;
sx_xlock(&ice_rdma.mtx);
if (!ice_rdma.registered) {
log(LOG_WARNING, "%s: iRDMA driver was not previously registered\n",
__func__);
sx_xunlock(&ice_rdma.mtx);
return (ENOENT);
}
log(LOG_INFO, "%s: iRDMA driver unregistered\n", __func__);
ice_rdma.registered = false;
ice_rdma.peer_class = NULL;
/*
* Release the kobject interface for each of the existing PF
* interfaces. Note that we do not notify the client about removing
* each PF, as it is assumed that the client will have already cleaned
* up any associated resources when it is unregistered.
*/
LIST_FOREACH(entry, &ice_rdma.peers, node)
kobj_delete((kobj_t)&entry->peer, NULL);
sx_xunlock(&ice_rdma.mtx);
return (0);
}
/**
* ice_rdma_init - RDMA driver init routine
*
* Called during ice driver module initialization to setup the RDMA client
* interface mutex and RDMA peer structure list.
*/
void
ice_rdma_init(void)
{
LIST_INIT(&ice_rdma.peers);
sx_init_flags(&ice_rdma.mtx, "ice rdma interface", SX_DUPOK);
ice_rdma.registered = false;
ice_rdma.peer_class = NULL;
}
/**
* ice_rdma_exit - RDMA driver exit routine
*
* Called during ice driver module exit to shutdown the RDMA client interface
* mutex.
*/
void
ice_rdma_exit(void)
{
MPASS(LIST_EMPTY(&ice_rdma.peers));
sx_destroy(&ice_rdma.mtx);
}
/**
* ice_rdma_pf_attach_locked - Prepare a PF for RDMA connections
* @sc: the ice driver softc
*
* Initialize a peer entry for this PF and add it to the RDMA interface list.
* Notify the client RDMA driver of a new PF device.
*
* @pre must be called while holding the ice_rdma mutex.
*/
static int
ice_rdma_pf_attach_locked(struct ice_softc *sc)
{
struct ice_rdma_entry *entry;
/* Do not attach the PF unless RDMA is supported */
if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA))
return (0);
entry = &sc->rdma_entry;
if (entry->attached) {
device_printf(sc->dev, "iRDMA peer entry already exists\n");
return (EEXIST);
}
entry->attached = true;
entry->peer.dev = sc->dev;
entry->peer.ifp = sc->ifp;
entry->peer.pf_id = sc->hw.pf_id;
entry->peer.pci_mem = sc->bar0.res;
entry->peer.pf_vsi_num = ice_get_hw_vsi_num(&sc->hw, sc->pf_vsi.idx);
if (sc->rdma_imap && sc->rdma_imap[0] != ICE_INVALID_RES_IDX &&
sc->irdma_vectors > 0) {
entry->peer.msix.base = sc->rdma_imap[0];
entry->peer.msix.count = sc->irdma_vectors;
}
/* Gather DCB/QOS info into peer */
memset(&entry->peer.initial_qos_info, 0, sizeof(entry->peer.initial_qos_info));
ice_rdma_cp_qos_info(&sc->hw, &sc->hw.port_info->qos_cfg.local_dcbx_cfg,
&entry->peer.initial_qos_info);
/*
* If the RDMA client driver has already registered, initialize the
* kobject and notify the client of a new PF
*/
if (ice_rdma.registered) {
kobj_init((kobj_t)&entry->peer, ice_rdma.peer_class);
IRDMA_PROBE(&entry->peer);
}
LIST_INSERT_HEAD(&ice_rdma.peers, entry, node);
ice_set_bit(ICE_FEATURE_RDMA, sc->feat_en);
return (0);
}
/**
* ice_rdma_pf_attach - Notify the RDMA client of a new PF
* @sc: the ice driver softc
*
* Called during PF attach to notify the RDMA client of a new PF.
*/
int
ice_rdma_pf_attach(struct ice_softc *sc)
{
int err;
sx_xlock(&ice_rdma.mtx);
err = ice_rdma_pf_attach_locked(sc);
sx_xunlock(&ice_rdma.mtx);
return (err);
}
/**
* ice_rdma_pf_detach_locked - Notify the RDMA client on PF detach
* @sc: the ice driver softc
*
* Notify the RDMA peer client driver of removal of a PF, and release any
* RDMA-specific resources associated with that PF. Remove the PF from the
* list of available RDMA entries.
*
* @pre must be called while holding the ice_rdma mutex.
*/
static void
ice_rdma_pf_detach_locked(struct ice_softc *sc)
{
struct ice_rdma_entry *entry;
/* No need to detach the PF if RDMA is not enabled */
if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RDMA))
return;
entry = &sc->rdma_entry;
if (!entry->attached) {
device_printf(sc->dev, "iRDMA peer entry was not attached\n");
return;
}
/*
* If the RDMA client driver is registered, notify the client that
* a PF has been removed, and release the kobject reference.
*/
if (ice_rdma.registered) {
IRDMA_REMOVE(&entry->peer);
kobj_delete((kobj_t)&entry->peer, NULL);
}
LIST_REMOVE(entry, node);
entry->attached = false;
ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_en);
}
/**
* ice_rdma_pf_detach - Notify the RDMA client of a PF detaching
* @sc: the ice driver softc
*
* Take the ice_rdma mutex and then notify the RDMA client that a PF has been
* removed.
*/
void
ice_rdma_pf_detach(struct ice_softc *sc)
{
sx_xlock(&ice_rdma.mtx);
ice_rdma_pf_detach_locked(sc);
sx_xunlock(&ice_rdma.mtx);
}
/**
* ice_rdma_pf_init - Notify the RDMA client that a PF has initialized
* @sc: the ice driver softc
*
* Called by the ice driver when a PF has been initialized. Notifies the RDMA
* client that a PF is up and ready to operate.
*/
int
ice_rdma_pf_init(struct ice_softc *sc)
{
struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
sx_xlock(&ice_rdma.mtx);
/* Update the MTU */
peer->mtu = sc->ifp->if_mtu;
sc->rdma_entry.initiated = true;
if (sc->rdma_entry.attached && ice_rdma.registered) {
sx_xunlock(&ice_rdma.mtx);
return IRDMA_OPEN(peer);
}
sx_xunlock(&ice_rdma.mtx);
return (0);
}
/**
* ice_rdma_pf_stop - Notify the RDMA client of a stopped PF device
* @sc: the ice driver softc
*
* Called by the ice driver when a PF is stopped. Notifies the RDMA client
* driver that the PF has stopped and is not ready to operate.
*/
int
ice_rdma_pf_stop(struct ice_softc *sc)
{
sx_xlock(&ice_rdma.mtx);
sc->rdma_entry.initiated = false;
if (sc->rdma_entry.attached && ice_rdma.registered) {
sx_xunlock(&ice_rdma.mtx);
return IRDMA_CLOSE(&sc->rdma_entry.peer);
}
sx_xunlock(&ice_rdma.mtx);
return (0);
}
/**
* ice_rdma_link_change - Notify RDMA client of a change in link status
* @sc: the ice driver softc
* @linkstate: the link status
* @baudrate: the link rate in bits per second
*
* Notify the RDMA client of a link status change, by sending it the new link
* state and baudrate.
*
* The link state is represented the same was as in the ifnet structure. It
* should be LINK_STATE_UNKNOWN, LINK_STATE_DOWN, or LINK_STATE_UP.
*/
void
ice_rdma_link_change(struct ice_softc *sc, int linkstate, uint64_t baudrate)
{
struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
struct ice_rdma_event event;
memset(&event, 0, sizeof(struct ice_rdma_event));
event.type = ICE_RDMA_EVENT_LINK_CHANGE;
event.linkstate = linkstate;
event.baudrate = baudrate;
sx_xlock(&ice_rdma.mtx);
if (sc->rdma_entry.attached && ice_rdma.registered)
IRDMA_EVENT_HANDLER(peer, &event);
sx_xunlock(&ice_rdma.mtx);
}
/**
* ice_rdma_notify_dcb_qos_change - notify RDMA driver to pause traffic
* @sc: the ice driver softc
*
* Notify the RDMA driver that QOS/DCB settings are about to change.
* Once the function return, all the QPs should be suspended.
*/
void
ice_rdma_notify_dcb_qos_change(struct ice_softc *sc)
{
struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
struct ice_rdma_event event;
memset(&event, 0, sizeof(struct ice_rdma_event));
event.type = ICE_RDMA_EVENT_TC_CHANGE;
/* pre-event */
event.prep = true;
sx_xlock(&ice_rdma.mtx);
if (sc->rdma_entry.attached && ice_rdma.registered)
IRDMA_EVENT_HANDLER(peer, &event);
sx_xunlock(&ice_rdma.mtx);
}
/**
* ice_rdma_dcb_qos_update - pass the changed dcb settings to RDMA driver
* @sc: the ice driver softc
* @pi: the port info structure
*
* Pass the changed DCB settings to RDMA traffic. This function should be
* called only after ice_rdma_notify_dcb_qos_change has been called and
* returned before. After the function returns, all the RDMA traffic
* should be resumed.
*/
void
ice_rdma_dcb_qos_update(struct ice_softc *sc, struct ice_port_info *pi)
{
struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
struct ice_rdma_event event;
memset(&event, 0, sizeof(struct ice_rdma_event));
event.type = ICE_RDMA_EVENT_TC_CHANGE;
/* post-event */
event.prep = false;
/* gather current configuration */
ice_rdma_cp_qos_info(&sc->hw, &pi->qos_cfg.local_dcbx_cfg, &event.port_qos);
sx_xlock(&ice_rdma.mtx);
if (sc->rdma_entry.attached && ice_rdma.registered)
IRDMA_EVENT_HANDLER(peer, &event);
sx_xunlock(&ice_rdma.mtx);
}

311
sys/dev/ice/ice_rdma.h Normal file
View File

@ -0,0 +1,311 @@
/* SPDX-License-Identifier: BSD-3-Clause */
/* Copyright (c) 2021, Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*$FreeBSD$*/
/**
* @file ice_rdma.h
* @brief header file for RDMA client interface functions
*
* Contains definitions and function calls shared by the ice driver and the
* RDMA client interface driver.
*
* Since these definitions are shared between drivers it is important that any
* changes are considered carefully for backwards compatibility.
*/
#ifndef _ICE_RDMA_H_
#define _ICE_RDMA_H_
/*
* The RDMA client interface version is used to help determine
* incompatibilities between the interface definition shared between the main
* driver and the client driver.
*
* It will follows the semantic version guidelines, that is:
* Given the version number MAJOR.MINOR.PATCH, increment the:
*
* MAJOR version when you make incompatible changes,
* MINOR version when you add functionality in a backwards-compatible manner, and
* PATCH version when you make backwards-compatible bug fixes.
*
* Any change to this file, or one of the kobject interface files must come
* with an associated change in one of the MAJOR, MINOR, or PATCH versions,
* and care must be taken that backwards incompatible changes MUST increment
* the MAJOR version.
*
* Note: Until the MAJOR version is set to at least 1, the above semantic
* version guarantees may not hold, and this interface should not be
* considered stable.
*/
#define ICE_RDMA_MAJOR_VERSION 1
#define ICE_RDMA_MINOR_VERSION 0
#define ICE_RDMA_PATCH_VERSION 0
/**
* @def ICE_RDMA_MAX_MSIX
* @brief Maximum number of MSI-X vectors that will be reserved
*
* Defines the maximum number of MSI-X vectors that an RDMA interface will
* have reserved in advance. Does not guarantee that many vectors have
* actually been enabled.
*/
#define ICE_RDMA_MAX_MSIX 64
/**
* @struct ice_rdma_info
* @brief RDMA information from the client driver
*
* The RDMA client driver will fill in this structure and pass its contents
* back to the main driver using the ice_rdma_register function.
*
* It should fill the version in with the ICE_RDMA_* versions as defined in
* the ice_rdma.h header.
*
* Additionally it must provide a pointer to a kobject class which extends the
* ice_rdma_di_class with the operations defined in the rdma_if.m interface.
*
* If the version specified is not compatible, then the registration will
* of the RDMA driver will fail.
*/
struct ice_rdma_info {
uint16_t major_version;
uint16_t minor_version;
uint16_t patch_version;
kobj_class_t rdma_class;
};
#define ICE_RDMA_MAX_USER_PRIORITY 8
#define ICE_RDMA_MAX_MSIX 64
/* Declare the ice_rdma_di kobject class */
DECLARE_CLASS(ice_rdma_di_class);
/**
* @struct ice_rdma_msix_mapping
* @brief MSI-X mapping requested by the peer RDMA driver
*
* Defines a mapping for MSI-X vectors being requested by the peer RDMA driver
* for a given PF.
*/
struct ice_rdma_msix_mapping {
uint8_t itr_indx;
int aeq_vector;
int ceq_cnt;
int *ceq_vector;
};
/**
* @struct ice_rdma_msix
* @brief RDMA MSI-X vectors reserved for the peer RDMA driver
*
* Defines the segment of the MSI-X vectors for use by the RDMA driver. These
* are reserved by the PF when it initializes.
*/
struct ice_rdma_msix {
int base;
int count;
};
/**
* @struct ice_qos_info
* @brief QoS information to be shared with RDMA driver
*/
struct ice_qos_info {
uint64_t tc_ctx;
uint8_t rel_bw;
uint8_t prio_type;
uint8_t egress_virt_up;
uint8_t ingress_virt_up;
};
/**
* @struct ice_qos_app_priority_table
* @brief Application priority data
*/
struct ice_qos_app_priority_table {
uint16_t prot_id;
uint8_t priority;
uint8_t selector;
};
#define IEEE_8021QAZ_MAX_TCS 8
#define ICE_TC_MAX_USER_PRIORITY 8
#define ICE_QOS_MAX_APPS 32
#define ICE_QOS_DSCP_NUM_VAL 64
/**
* @struct ice_qos_params
* @brief Holds all necessary data for RDMA to work with DCB
*
* Struct to hold QoS info
*/
struct ice_qos_params {
struct ice_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
uint8_t up2tc[ICE_TC_MAX_USER_PRIORITY];
uint8_t vsi_relative_bw;
uint8_t vsi_priority_type;
uint32_t num_apps;
uint8_t pfc_mode;
uint8_t dscp_map[ICE_QOS_DSCP_NUM_VAL];
struct ice_qos_app_priority_table apps[ICE_QOS_MAX_APPS];
uint8_t num_tc;
};
/**
* @struct ice_rdma_peer
* @brief RDMA driver information
*
* Shared structure used by the RDMA client driver when talking with the main
* device driver.
*
* Because the definition of this structure is shared between the two drivers,
* its ABI should be handled carefully.
*/
struct ice_rdma_peer {
/**
* The KOBJ_FIELDS macro must come first, in order for it to be used
* as a kobject.
*/
KOBJ_FIELDS;
struct ifnet *ifp;
device_t dev;
struct resource *pci_mem;
struct ice_qos_params initial_qos_info;
struct ice_rdma_msix msix;
uint16_t mtu;
uint16_t pf_vsi_num;
uint8_t pf_id;
};
/**
* @enum ice_res_type
* @brief enum for type of resource registration
*
* enum for type of resource registration.
* created for plausible compatibility with IDC
*/
enum ice_res_type {
ICE_INVAL_RES = 0x0,
ICE_RDMA_QSET_ALLOC = 0x8,
ICE_RDMA_QSET_FREE = 0x18,
};
/**
* @struct ice_rdma_qset_params
* @brief struct to hold per RDMA Qset info
*/
struct ice_rdma_qset_params {
uint32_t teid; /* qset TEID */
uint16_t qs_handle; /* RDMA driver provides this */
uint16_t vsi_id; /* VSI index */
uint8_t tc; /* TC branch the QSet should belong to */
uint8_t reserved[3];
};
#define ICE_MAX_TXQ_PER_TXQG 128
/**
* @struct ice_rdma_qset_update
* @brief struct used to register and unregister qsets for RDMA driver
*/
struct ice_rdma_qset_update {
enum ice_res_type res_type;
uint16_t cnt_req;
uint16_t res_allocated;
uint32_t res_handle;
struct ice_rdma_qset_params qsets;
};
/**
* @enum ice_rdma_event_type
* @brief enum for type of event from base driver
*/
enum ice_rdma_event_type {
ICE_RDMA_EVENT_NONE = 0,
ICE_RDMA_EVENT_LINK_CHANGE,
ICE_RDMA_EVENT_MTU_CHANGE,
ICE_RDMA_EVENT_TC_CHANGE,
ICE_RDMA_EVENT_API_CHANGE,
ICE_RDMA_EVENT_CRIT_ERR,
ICE_RDMA_EVENT_RESET,
ICE_RDMA_EVENT_QSET_REGISTER,
ICE_RDMA_EVENT_VSI_FILTER_UPDATE,
ICE_RDMA_EVENT_LAST
};
/**
* @struct ice_rdma_event
* @brief struct for event information to pass to RDMA driver
*/
struct ice_rdma_event {
enum ice_rdma_event_type type;
union {
/* link change event */
struct {
int linkstate;
uint64_t baudrate;
};
/* MTU change event */
struct {
int mtu;
};
/*
* TC/QoS/DCB change event
* RESET event use prep variable only
* prep: if true, this is a pre-event, post-event otherwise
*/
struct {
struct ice_qos_params port_qos;
bool prep;
};
};
};
/**
* @struct ice_rdma_request
* @brief struct with data for a request from the RDMA driver
*/
struct ice_rdma_request {
enum ice_rdma_event_type type;
union {
struct {
struct ice_rdma_qset_update res;
};
struct {
bool enable_filter;
};
};
};
int ice_rdma_register(struct ice_rdma_info *info);
int ice_rdma_unregister(void);
#endif

View File

@ -0,0 +1,102 @@
/* SPDX-License-Identifier: BSD-3-Clause */
/* Copyright (c) 2022, Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*$FreeBSD$*/
/**
* @file ice_rdma_internal.h
* @brief internal header for the RMDA driver interface setup
*
* Contains the definitions and functions used by the ice driver to setup the
* RDMA driver interface. Functions and definitions in this file are not
* shared with the RDMA client driver.
*/
#ifndef _ICE_RDMA_INTERNAL_H_
#define _ICE_RDMA_INTERNAL_H_
#include "ice_rdma.h"
/* Forward declare the softc structure */
struct ice_softc;
/* Global sysctl variable indicating if the RDMA client interface is enabled */
extern bool ice_enable_irdma;
/**
* @struct ice_rdma_entry
* @brief RDMA peer list node
*
* Structure used to store peer entries for each PF in a linked list.
*/
struct ice_rdma_entry {
LIST_ENTRY(ice_rdma_entry) node;
struct ice_rdma_peer peer;
bool attached;
bool initiated;
};
#define ice_rdma_peer_to_entry(p) __containerof(p, struct ice_rdma_entry, peer)
#define ice_rdma_entry_to_sc(e) __containerof(e, struct ice_softc, rdma_entry)
#define ice_rdma_peer_to_sc(p) ice_rdma_entry_to_sc(ice_rdma_peer_to_entry(p))
/**
* @struct ice_rdma_peers
* @brief Head list structure for the RDMA entry list
*
* Type defining the head of the linked list of RDMA entries.
*/
LIST_HEAD(ice_rdma_peers, ice_rdma_entry);
/**
* @struct ice_rdma_state
* @brief global driver state for RDMA
*
* Contains global state shared across all PFs by the device driver, such as
* the kobject class of the currently connected peer driver, and the linked
* list of peer entries for each PF.
*/
struct ice_rdma_state {
bool registered;
kobj_class_t peer_class;
struct sx mtx;
struct ice_rdma_peers peers;
};
void ice_rdma_init(void);
void ice_rdma_exit(void);
int ice_rdma_pf_attach(struct ice_softc *sc);
void ice_rdma_pf_detach(struct ice_softc *sc);
int ice_rdma_pf_init(struct ice_softc *sc);
int ice_rdma_pf_stop(struct ice_softc *sc);
void ice_rdma_link_change(struct ice_softc *sc, int linkstate, uint64_t baudrate);
void ice_rdma_notify_dcb_qos_change(struct ice_softc *sc);
void ice_rdma_dcb_qos_update(struct ice_softc *sc, struct ice_port_info *pi);
#endif

View File

@ -620,6 +620,48 @@ ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
return ICE_SUCCESS;
}
/**
* ice_alloc_rdma_q_ctx - allocate RDMA queue contexts for the given VSI and TC
* @hw: pointer to the HW struct
* @vsi_handle: VSI handle
* @tc: TC number
* @new_numqs: number of queues
*/
static enum ice_status
ice_alloc_rdma_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
{
struct ice_vsi_ctx *vsi_ctx;
struct ice_q_ctx *q_ctx;
vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
if (!vsi_ctx)
return ICE_ERR_PARAM;
/* allocate RDMA queue contexts */
if (!vsi_ctx->rdma_q_ctx[tc]) {
vsi_ctx->rdma_q_ctx[tc] = (struct ice_q_ctx *)
ice_calloc(hw, new_numqs, sizeof(*q_ctx));
if (!vsi_ctx->rdma_q_ctx[tc])
return ICE_ERR_NO_MEMORY;
vsi_ctx->num_rdma_q_entries[tc] = new_numqs;
return ICE_SUCCESS;
}
/* num queues are increased, update the queue contexts */
if (new_numqs > vsi_ctx->num_rdma_q_entries[tc]) {
u16 prev_num = vsi_ctx->num_rdma_q_entries[tc];
q_ctx = (struct ice_q_ctx *)
ice_calloc(hw, new_numqs, sizeof(*q_ctx));
if (!q_ctx)
return ICE_ERR_NO_MEMORY;
ice_memcpy(q_ctx, vsi_ctx->rdma_q_ctx[tc],
prev_num * sizeof(*q_ctx), ICE_DMA_TO_NONDMA);
ice_free(hw, vsi_ctx->rdma_q_ctx[tc]);
vsi_ctx->rdma_q_ctx[tc] = q_ctx;
vsi_ctx->num_rdma_q_entries[tc] = new_numqs;
}
return ICE_SUCCESS;
}
/**
* ice_aq_rl_profile - performs a rate limiting task
* @hw: pointer to the HW struct
@ -1904,13 +1946,22 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
if (!vsi_ctx)
return ICE_ERR_PARAM;
prev_numqs = vsi_ctx->sched.max_lanq[tc];
if (owner == ICE_SCHED_NODE_OWNER_LAN)
prev_numqs = vsi_ctx->sched.max_lanq[tc];
else
prev_numqs = vsi_ctx->sched.max_rdmaq[tc];
/* num queues are not changed or less than the previous number */
if (new_numqs <= prev_numqs)
return status;
status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs);
if (status)
return status;
if (owner == ICE_SCHED_NODE_OWNER_LAN) {
status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs);
if (status)
return status;
} else {
status = ice_alloc_rdma_q_ctx(hw, vsi_handle, tc, new_numqs);
if (status)
return status;
}
if (new_numqs)
ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
@ -1925,7 +1976,10 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
new_num_nodes, owner);
if (status)
return status;
vsi_ctx->sched.max_lanq[tc] = new_numqs;
if (owner == ICE_SCHED_NODE_OWNER_LAN)
vsi_ctx->sched.max_lanq[tc] = new_numqs;
else
vsi_ctx->sched.max_rdmaq[tc] = new_numqs;
return ICE_SUCCESS;
}
@ -1991,6 +2045,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
* recreate the child nodes all the time in these cases.
*/
vsi_ctx->sched.max_lanq[tc] = 0;
vsi_ctx->sched.max_rdmaq[tc] = 0;
}
/* update the VSI child nodes */
@ -2121,6 +2176,8 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
}
if (owner == ICE_SCHED_NODE_OWNER_LAN)
vsi_ctx->sched.max_lanq[i] = 0;
else
vsi_ctx->sched.max_rdmaq[i] = 0;
}
status = ICE_SUCCESS;
@ -2142,6 +2199,19 @@ enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN);
}
/**
* ice_rm_vsi_rdma_cfg - remove VSI and its RDMA children nodes
* @pi: port information structure
* @vsi_handle: software VSI handle
*
* This function clears the VSI and its RDMA children nodes from scheduler tree
* for all TCs.
*/
enum ice_status ice_rm_vsi_rdma_cfg(struct ice_port_info *pi, u16 vsi_handle)
{
return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_RDMA);
}
/**
* ice_sched_is_tree_balanced - Check tree nodes are identical or not
* @hw: pointer to the HW struct

View File

@ -139,6 +139,7 @@ enum ice_status
ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
u8 owner, bool enable);
enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle);
enum ice_status ice_rm_vsi_rdma_cfg(struct ice_port_info *pi, u16 vsi_handle);
struct ice_sched_node *
ice_sched_get_vsi_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
u16 vsi_handle);

View File

@ -537,6 +537,10 @@ static void ice_clear_vsi_q_ctx(struct ice_hw *hw, u16 vsi_handle)
ice_free(hw, vsi->lan_q_ctx[i]);
vsi->lan_q_ctx[i] = NULL;
}
if (vsi->rdma_q_ctx[i]) {
ice_free(hw, vsi->rdma_q_ctx[i]);
vsi->rdma_q_ctx[i] = NULL;
}
}
}
@ -658,6 +662,47 @@ ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
return ice_aq_update_vsi(hw, vsi_ctx, cd);
}
/**
* ice_cfg_iwarp_fltr - enable/disable iWARP filtering on VSI
* @hw: pointer to HW struct
* @vsi_handle: VSI SW index
* @enable: boolean for enable/disable
*/
enum ice_status
ice_cfg_iwarp_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable)
{
struct ice_vsi_ctx *ctx, *cached_ctx;
enum ice_status status;
cached_ctx = ice_get_vsi_ctx(hw, vsi_handle);
if (!cached_ctx)
return ICE_ERR_DOES_NOT_EXIST;
ctx = (struct ice_vsi_ctx *)ice_calloc(hw, 1, sizeof(*ctx));
if (!ctx)
return ICE_ERR_NO_MEMORY;
ctx->info.q_opt_rss = cached_ctx->info.q_opt_rss;
ctx->info.q_opt_tc = cached_ctx->info.q_opt_tc;
ctx->info.q_opt_flags = cached_ctx->info.q_opt_flags;
ctx->info.valid_sections = CPU_TO_LE16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
if (enable)
ctx->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
else
ctx->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
status = ice_update_vsi(hw, vsi_handle, ctx, NULL);
if (!status) {
cached_ctx->info.q_opt_flags = ctx->info.q_opt_flags;
cached_ctx->info.valid_sections |= ctx->info.valid_sections;
}
ice_free(hw, ctx);
return status;
}
/**
* ice_aq_get_vsi_params
* @hw: pointer to the HW struct

View File

@ -77,6 +77,8 @@ struct ice_vsi_ctx {
u8 vf_num;
u16 num_lan_q_entries[ICE_MAX_TRAFFIC_CLASS];
struct ice_q_ctx *lan_q_ctx[ICE_MAX_TRAFFIC_CLASS];
u16 num_rdma_q_entries[ICE_MAX_TRAFFIC_CLASS];
struct ice_q_ctx *rdma_q_ctx[ICE_MAX_TRAFFIC_CLASS];
};
/* This is to be used by add/update mirror rule Admin Queue command */
@ -452,6 +454,8 @@ enum ice_status
ice_add_eth_mac(struct ice_hw *hw, struct LIST_HEAD_TYPE *em_list);
enum ice_status
ice_remove_eth_mac(struct ice_hw *hw, struct LIST_HEAD_TYPE *em_list);
enum ice_status
ice_cfg_iwarp_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable);
enum ice_status
ice_add_mac_with_sw_marker(struct ice_hw *hw, struct ice_fltr_info *f_info,

View File

@ -153,6 +153,7 @@ static inline u32 ice_round_to_num(u32 N, u32 R)
#define ICE_DBG_SW BIT_ULL(13)
#define ICE_DBG_SCHED BIT_ULL(14)
#define ICE_DBG_RDMA BIT_ULL(15)
#define ICE_DBG_PKG BIT_ULL(16)
#define ICE_DBG_RES BIT_ULL(17)
#define ICE_DBG_AQ_MSG BIT_ULL(24)
@ -404,6 +405,7 @@ struct ice_hw_common_caps {
u8 dcb;
u8 iscsi;
u8 mgmt_cem;
u8 iwarp;
/* WoL and APM support */
#define ICE_WOL_SUPPORT_M BIT(0)
@ -774,6 +776,7 @@ struct ice_sched_vsi_info {
struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS];
struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS];
u16 max_lanq[ICE_MAX_TRAFFIC_CLASS];
u16 max_rdmaq[ICE_MAX_TRAFFIC_CLASS];
/* bw_t_info saves VSI BW information */
struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS];
};

View File

@ -675,12 +675,14 @@ ice_update_link_status(struct ice_softc *sc, bool update_media)
ice_set_default_local_lldp_mib(sc);
iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
ice_link_up_msg(sc);
update_media = true;
} else { /* link is down */
iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
update_media = true;
}
@ -788,6 +790,10 @@ ice_if_attach_post(if_ctx_t ctx)
/* Enable ITR 0 right away, so that we can handle admin interrupts */
ice_enable_intr(&sc->hw, sc->irqvs[0].me);
err = ice_rdma_pf_attach(sc);
if (err)
return (err);
/* Start the admin timer */
mtx_lock(&sc->admin_mtx);
callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
@ -884,6 +890,8 @@ ice_if_detach(if_ctx_t ctx)
mtx_unlock(&sc->admin_mtx);
mtx_destroy(&sc->admin_mtx);
ice_rdma_pf_detach(sc);
/* Free allocated media types */
ifmedia_removeall(sc->media);
@ -1308,6 +1316,7 @@ ice_allocate_msix(struct ice_softc *sc)
cpuset_t cpus;
int bar, queues, vectors, requested;
int err = 0;
int rdma;
/* Allocate the MSI-X bar */
bar = scctx->isc_msix_bar;
@ -1353,11 +1362,24 @@ ice_allocate_msix(struct ice_softc *sc)
queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
/*
* Choose a number of RDMA vectors based on the number of CPUs
* up to a maximum
*/
rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
/* Further limit by the user configurable tunable */
rdma = min(rdma, ice_rdma_max_msix);
} else {
rdma = 0;
}
/*
* Determine the number of vectors to request. Note that we also need
* to allocate one vector for administrative tasks.
*/
requested = queues + 1;
requested = rdma + queues + 1;
vectors = requested;
@ -1375,6 +1397,23 @@ ice_allocate_msix(struct ice_softc *sc)
device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
requested, vectors);
/*
* The OS didn't grant us the requested number of vectors.
* Check to see if we can reduce demands by limiting the
* number of vectors allocated to certain features.
*/
if (rdma >= diff) {
/* Reduce the number of RDMA vectors we reserve */
rdma -= diff;
diff = 0;
} else {
/* Disable RDMA and reduce the difference */
ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
diff -= rdma;
rdma = 0;
}
/*
* If we still have a difference, we need to reduce the number
* of queue pairs.
@ -1392,6 +1431,9 @@ ice_allocate_msix(struct ice_softc *sc)
}
device_printf(dev, "Using %d Tx and Rx queues\n", queues);
if (rdma)
device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
rdma);
device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
vectors);
@ -1400,6 +1442,8 @@ ice_allocate_msix(struct ice_softc *sc)
scctx->isc_ntxqsets = queues;
scctx->isc_intr = IFLIB_INTR_MSIX;
sc->irdma_vectors = rdma;
/* Interrupt allocation tracking isn't required in recovery mode,
* since neither RDMA nor VFs are enabled.
*/
@ -1407,13 +1451,21 @@ ice_allocate_msix(struct ice_softc *sc)
return (0);
/* Keep track of which interrupt indices are being used for what */
sc->lan_vectors = vectors;
sc->lan_vectors = vectors - rdma;
err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors);
if (err) {
device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
ice_err_str(err));
goto err_pci_release_msi;
}
err = ice_resmgr_assign_contiguous(&sc->imgr, sc->rdma_imap, rdma);
if (err) {
device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
ice_err_str(err));
ice_resmgr_release_map(&sc->imgr, sc->pf_imap,
sc->lan_vectors);
goto err_pci_release_msi;
}
return (0);
@ -1916,6 +1968,8 @@ ice_if_init(if_ctx_t ctx)
/* Configure promiscuous mode */
ice_if_promisc_set(ctx, if_getflags(sc->ifp));
ice_rdma_pf_init(sc);
ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
return;
@ -2061,6 +2115,9 @@ ice_transition_recovery_mode(struct ice_softc *sc)
/* Request that the device be re-initialized */
ice_request_stack_reinit(sc);
ice_rdma_pf_detach(sc);
ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
@ -2106,6 +2163,9 @@ ice_transition_safe_mode(struct ice_softc *sc)
ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
ice_rdma_pf_detach(sc);
ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
@ -2222,6 +2282,9 @@ ice_prepare_for_reset(struct ice_softc *sc)
if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
return;
/* stop the RDMA client */
ice_rdma_pf_stop(sc);
/* Release the main PF VSI queue mappings */
ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
sc->pf_vsi.num_tx_queues);
@ -2480,6 +2543,8 @@ ice_rebuild(struct ice_softc *sc)
ice_get_link_status(sc->hw.port_info, &sc->link_up);
ice_update_link_status(sc, true);
/* RDMA interface will be restarted by the stack re-init */
/* Configure interrupt causes for the administrative interrupt */
ice_configure_misc_interrupts(sc);
@ -2633,6 +2698,7 @@ ice_init_device_features(struct ice_softc *sc)
/* Set capabilities that all devices support */
ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
@ -2643,6 +2709,8 @@ ice_init_device_features(struct ice_softc *sc)
/* Disable features due to hardware limitations... */
if (!sc->hw.func_caps.common_cap.rss_table_size)
ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
if (!sc->hw.func_caps.common_cap.iwarp || !ice_enable_irdma)
ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
/* Disable features due to firmware limitations... */
if (!ice_is_fw_health_report_supported(&sc->hw))
ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
@ -2795,6 +2863,8 @@ ice_if_stop(if_ctx_t ctx)
return;
}
ice_rdma_pf_stop(sc);
/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
* return of these functions because there's nothing we can really do
* if they fail, and the functions already print error messages.

97
sys/dev/ice/irdma_di_if.m Normal file
View File

@ -0,0 +1,97 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2021, Intel Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# $FreeBSD$
/**
* @file irdma_di_if.m
* @brief RDMA client kobject driver interface
*
* KObject methods implemented by the ice driver. These functions are called
* by the RDMA client driver to connect with the ice driver and request
* operations or notify the driver of RDMA events.
*/
#include "ice_rdma.h"
INTERFACE irdma_di;
/**
* reset - Request the ice driver to perform a reset
* @peer: the RDMA peer structure
*
* Called by the RDMA client driver to request a reset of the ice device.
*/
METHOD int reset {
struct ice_rdma_peer *peer;
};
/**
* msix_init - Initialize MSI-X resources for the RDMA driver
* @peer: the RDMA peer structure
* @msix_info: the requested MSI-X mapping
*
* Called by the RDMA client driver to request initialization of the MSI-X
* resources used for RDMA functionality.
*/
METHOD int msix_init {
struct ice_rdma_peer *peer;
struct ice_rdma_msix_mapping *msix_info;
};
/**
* qset_register_request - RDMA client interface request qset
* registration or deregistration
* @peer: the RDMA peer client structure
* @res: resources to be registered or unregistered
*/
METHOD int qset_register_request {
struct ice_rdma_peer *peer;
struct ice_rdma_qset_update *res;
};
/**
* vsi_filter_update - configure vsi information
* when opening or closing rdma driver
* @peer: the RDMA peer client structure
* @enable: enable or disable the rdma filter
*/
METHOD int vsi_filter_update {
struct ice_rdma_peer *peer;
bool enable;
};
/**
* req_handler - handle requests incoming from RDMA driver
* @peer: the RDMA peer client structure
* @req: structure containing request
*/
METHOD void req_handler {
struct ice_rdma_peer *peer;
struct ice_rdma_request *req;
};

106
sys/dev/ice/irdma_if.m Normal file
View File

@ -0,0 +1,106 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2021, Intel Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# $FreeBSD$
/**
* @file irdma_if.m
* @brief RDMA client kobject interface
*
* KOBject methods implemented by the RDMA client driver. These functions will
* be called from the ice driver to notify the RDMA client driver of device
* driver events.
*/
#include "ice_rdma.h"
INTERFACE irdma;
/**
* probe - Notify the RDMA client driver that a peer device has been created
* @peer: the RDMA peer structure
*
* Called by the ice driver during attach to notify the RDMA client driver
* that a new PF has been initialized.
*/
METHOD int probe {
struct ice_rdma_peer *peer;
};
/**
* open - Notify the RDMA client driver that a peer device has been opened
* @peer: the RDMA peer structure
*
* Called by the ice driver during the if_init routine to notify the RDMA
* client driver that a PF has been activated.
*/
METHOD int open {
struct ice_rdma_peer *peer;
};
/**
* close - Notify the RDMA client driver that a peer device has closed
* @peer: the RDMA peer structure
*
* Called by the ice driver during the if_stop routine to notify the RDMA
* client driver that a PF has been deactivated.
*/
METHOD int close {
struct ice_rdma_peer *peer;
};
/**
* remove - Notify the RDMA client driver that a peer device has been removed
* @peer: the RDMA peer structure
*
* Called by the ice driver during detach to notify the RDMA client driver
* that a PF has been removed.
*/
METHOD int remove {
struct ice_rdma_peer *peer;
}
/**
* link_change - Notify the RDMA client driver that link status has changed
* @peer: the RDMA peer structure
* @linkstate: link status
* @baudrate: link rate in bits per second
*
* Called by the ice driver when link status changes to notify the RDMA client
* driver of the new status.
*/
METHOD void link_change {
struct ice_rdma_peer *peer;
int linkstate;
uint64_t baudrate;
}
METHOD void event_handler {
struct ice_rdma_peer *peer;
struct ice_rdma_event *event;
}

View File

@ -3,12 +3,25 @@
.PATH: ${SRCTOP}/sys/dev/ice
KMOD = if_ice
# Interface headers
SRCS = device_if.h bus_if.h pci_if.h ifdi_if.h
SRCS += irdma_di_if.h irdma_if.h
# Option headers
SRCS += opt_inet.h opt_inet6.h opt_rss.h opt_iflib.h
# Core source
SRCS += ice_lib.c ice_osdep.c ice_resmgr.c ice_strings.c
SRCS += ice_iflib_recovery_txrx.c ice_iflib_txrx.c if_ice_iflib.c
SRCS += ice_fw_logging.c
# RDMA Client interface
# TODO: Is this the right way to compile this?
SRCS += irdma_di_if.c irdma_if.c
CFLAGS.irdma_di_if.c += -I${SRCTOP}/sys/dev/ice
CFLAGS.irdma_if.c += -I${SRCTOP}/sys/dev/ice
# Shared source
SRCS += ice_common.c ice_controlq.c ice_dcb.c ice_flex_pipe.c ice_flow.c
SRCS += ice_nvm.c ice_sched.c ice_switch.c ice_vlan_mode.c ice_fwlog.c