net/hns3: add simple Tx path

This patch adds simple Tx process function. When multiple segment
packets are not needed, Which means that DEV_TX_OFFLOAD_MBUF_FAST_FREE
offload is not set, we can simple Tx process.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
This commit is contained in:
Wei Hu (Xavier) 2020-09-09 17:23:35 +08:00 committed by Ferruh Yigit
parent 521ab3e933
commit 7ef933908f
5 changed files with 292 additions and 15 deletions

View File

@ -2352,6 +2352,7 @@ hns3_dev_configure(struct rte_eth_dev *dev)
goto cfg_err;
hns->rx_simple_allowed = true;
hns->tx_simple_allowed = true;
hns3_init_rx_ptype_tble(dev);
hw->adapter_state = HNS3_NIC_CONFIGURED;
@ -2511,6 +2512,10 @@ hns3_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info)
.rx_drop_en = 1,
.offloads = 0,
};
info->default_txconf = (struct rte_eth_txconf) {
.tx_rs_thresh = HNS3_DEFAULT_TX_RS_THRESH,
.offloads = 0,
};
info->vmdq_queue_num = 0;
@ -5544,6 +5549,7 @@ static const struct eth_dev_ops hns3_eth_dev_ops = {
.rxq_info_get = hns3_rxq_info_get,
.txq_info_get = hns3_txq_info_get,
.rx_burst_mode_get = hns3_rx_burst_mode_get,
.tx_burst_mode_get = hns3_tx_burst_mode_get,
.flow_ctrl_get = hns3_flow_ctrl_get,
.flow_ctrl_set = hns3_flow_ctrl_set,
.priority_flow_ctrl_set = hns3_priority_flow_ctrl_set,

View File

@ -643,6 +643,7 @@ struct hns3_adapter {
};
bool rx_simple_allowed;
bool tx_simple_allowed;
struct hns3_ptype_table ptype_tbl __rte_cache_min_aligned;
};

View File

@ -822,6 +822,7 @@ hns3vf_dev_configure(struct rte_eth_dev *dev)
goto cfg_err;
hns->rx_simple_allowed = true;
hns->tx_simple_allowed = true;
hns3_init_rx_ptype_tble(dev);
hw->adapter_state = HNS3_NIC_CONFIGURED;
@ -956,6 +957,10 @@ hns3vf_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info)
.rx_drop_en = 1,
.offloads = 0,
};
info->default_txconf = (struct rte_eth_txconf) {
.tx_rs_thresh = HNS3_DEFAULT_TX_RS_THRESH,
.offloads = 0,
};
info->vmdq_queue_num = 0;
@ -2540,6 +2545,7 @@ static const struct eth_dev_ops hns3vf_eth_dev_ops = {
.rxq_info_get = hns3_rxq_info_get,
.txq_info_get = hns3_txq_info_get,
.rx_burst_mode_get = hns3_rx_burst_mode_get,
.tx_burst_mode_get = hns3_tx_burst_mode_get,
.mac_addr_add = hns3vf_add_mac_addr,
.mac_addr_remove = hns3vf_remove_mac_addr,
.mac_addr_set = hns3vf_set_default_mac_addr,

View File

@ -1952,27 +1952,72 @@ hns3_get_rx_function(struct rte_eth_dev *dev)
return hns3_recv_scattered_pkts;
}
static int
hns3_tx_queue_conf_check(struct hns3_hw *hw, const struct rte_eth_txconf *conf,
uint16_t nb_desc, uint16_t *tx_rs_thresh,
uint16_t *tx_free_thresh, uint16_t idx)
{
#define HNS3_TX_RS_FREE_THRESH_GAP 8
uint16_t rs_thresh, free_thresh, fast_free_thresh;
if (nb_desc > HNS3_MAX_RING_DESC || nb_desc < HNS3_MIN_RING_DESC ||
nb_desc % HNS3_ALIGN_RING_DESC) {
hns3_err(hw, "number (%u) of tx descriptors is invalid",
nb_desc);
return -EINVAL;
}
rs_thresh = (conf->tx_rs_thresh > 0) ?
conf->tx_rs_thresh : HNS3_DEFAULT_TX_RS_THRESH;
free_thresh = (conf->tx_free_thresh > 0) ?
conf->tx_free_thresh : HNS3_DEFAULT_TX_FREE_THRESH;
if (rs_thresh + free_thresh > nb_desc || nb_desc % rs_thresh ||
rs_thresh >= nb_desc - HNS3_TX_RS_FREE_THRESH_GAP ||
free_thresh >= nb_desc - HNS3_TX_RS_FREE_THRESH_GAP) {
hns3_err(hw, "tx_rs_thresh (%d) tx_free_thresh (%d) nb_desc "
"(%d) of tx descriptors for port=%d queue=%d check "
"fail!",
rs_thresh, free_thresh, nb_desc, hw->data->port_id,
idx);
return -EINVAL;
}
if (conf->tx_free_thresh == 0) {
/* Fast free Tx memory buffer to improve cache hit rate */
fast_free_thresh = nb_desc - rs_thresh;
if (fast_free_thresh >=
HNS3_TX_FAST_FREE_AHEAD + HNS3_DEFAULT_TX_FREE_THRESH)
free_thresh = fast_free_thresh -
HNS3_TX_FAST_FREE_AHEAD;
}
*tx_rs_thresh = rs_thresh;
*tx_free_thresh = free_thresh;
return 0;
}
int
hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
unsigned int socket_id, const struct rte_eth_txconf *conf)
{
struct hns3_adapter *hns = dev->data->dev_private;
uint16_t tx_rs_thresh, tx_free_thresh;
struct hns3_hw *hw = &hns->hw;
struct hns3_queue_info q_info;
struct hns3_tx_queue *txq;
int tx_entry_len;
int ret;
if (dev->data->dev_started) {
hns3_err(hw, "tx_queue_setup after dev_start no supported");
return -EINVAL;
}
if (nb_desc > HNS3_MAX_RING_DESC || nb_desc < HNS3_MIN_RING_DESC ||
nb_desc % HNS3_ALIGN_RING_DESC) {
hns3_err(hw, "Number (%u) of tx descriptors is invalid",
nb_desc);
return -EINVAL;
}
ret = hns3_tx_queue_conf_check(hw, conf, nb_desc,
&tx_rs_thresh, &tx_free_thresh, idx);
if (ret)
return ret;
if (dev->data->tx_queues[idx] != NULL) {
hns3_tx_queue_release(dev->data->tx_queues[idx]);
@ -2005,11 +2050,15 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
txq->next_to_use = 0;
txq->next_to_clean = 0;
txq->tx_bd_ready = txq->nb_tx_desc - 1;
txq->tx_free_thresh = tx_free_thresh;
txq->tx_rs_thresh = tx_rs_thresh;
txq->port_id = dev->data->port_id;
txq->pvid_state = hw->port_base_vlan_cfg.state;
txq->configured = true;
txq->io_base = (void *)((char *)hw->io_base + HNS3_TQP_REG_OFFSET +
idx * HNS3_TQP_REG_SIZE);
txq->io_tail_reg = (volatile void *)((char *)txq->io_base +
HNS3_RING_TX_TAIL_REG);
txq->min_tx_pkt_len = hw->min_tx_pkt_len;
txq->over_length_pkt_cnt = 0;
txq->exceed_limit_bd_pkt_cnt = 0;
@ -2024,12 +2073,6 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
return 0;
}
static inline void
hns3_queue_xmit(struct hns3_tx_queue *txq, uint32_t buf_num)
{
hns3_write_dev(txq, HNS3_RING_TX_TAIL_REG, buf_num);
}
static void
hns3_tx_free_useless_buffer(struct hns3_tx_queue *txq)
{
@ -2798,6 +2841,154 @@ hns3_check_non_tso_pkt(uint16_t nb_buf, struct rte_mbuf **m_seg,
return 0;
}
static inline void
hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq)
{
struct hns3_entry *tx_entry;
struct hns3_desc *desc;
uint16_t tx_next_clean;
int i;
while (1) {
if (HNS3_GET_TX_QUEUE_PEND_BD_NUM(txq) < txq->tx_rs_thresh)
break;
/*
* All mbufs can be released only when the VLD bits of all
* descriptors in a batch are cleared.
*/
tx_next_clean = (txq->next_to_clean + txq->tx_rs_thresh - 1) %
txq->nb_tx_desc;
desc = &txq->tx_ring[tx_next_clean];
for (i = 0; i < txq->tx_rs_thresh; i++) {
if (rte_le_to_cpu_16(desc->tx.tp_fe_sc_vld_ra_ri) &
BIT(HNS3_TXD_VLD_B))
return;
desc--;
}
tx_entry = &txq->sw_ring[txq->next_to_clean];
for (i = 0; i < txq->tx_rs_thresh; i++)
rte_prefetch0((tx_entry + i)->mbuf);
for (i = 0; i < txq->tx_rs_thresh; i++, tx_entry++) {
rte_mempool_put(tx_entry->mbuf->pool, tx_entry->mbuf);
tx_entry->mbuf = NULL;
}
txq->next_to_clean = (tx_next_clean + 1) % txq->nb_tx_desc;
txq->tx_bd_ready += txq->tx_rs_thresh;
}
}
static inline void
hns3_tx_backup_1mbuf(struct hns3_entry *tx_entry, struct rte_mbuf **pkts)
{
tx_entry->mbuf = pkts[0];
}
static inline void
hns3_tx_backup_4mbuf(struct hns3_entry *tx_entry, struct rte_mbuf **pkts)
{
hns3_tx_backup_1mbuf(&tx_entry[0], &pkts[0]);
hns3_tx_backup_1mbuf(&tx_entry[1], &pkts[1]);
hns3_tx_backup_1mbuf(&tx_entry[2], &pkts[2]);
hns3_tx_backup_1mbuf(&tx_entry[3], &pkts[3]);
}
static inline void
hns3_tx_setup_4bd(struct hns3_desc *txdp, struct rte_mbuf **pkts)
{
#define PER_LOOP_NUM 4
const uint16_t bd_flag = BIT(HNS3_TXD_VLD_B) | BIT(HNS3_TXD_FE_B);
uint64_t dma_addr;
uint32_t i;
for (i = 0; i < PER_LOOP_NUM; i++, txdp++, pkts++) {
dma_addr = rte_mbuf_data_iova(*pkts);
txdp->addr = rte_cpu_to_le_64(dma_addr);
txdp->tx.send_size = rte_cpu_to_le_16((*pkts)->data_len);
txdp->tx.paylen = 0;
txdp->tx.type_cs_vlan_tso_len = 0;
txdp->tx.ol_type_vlan_len_msec = 0;
txdp->tx.tp_fe_sc_vld_ra_ri = rte_cpu_to_le_16(bd_flag);
}
}
static inline void
hns3_tx_setup_1bd(struct hns3_desc *txdp, struct rte_mbuf **pkts)
{
const uint16_t bd_flag = BIT(HNS3_TXD_VLD_B) | BIT(HNS3_TXD_FE_B);
uint64_t dma_addr;
dma_addr = rte_mbuf_data_iova(*pkts);
txdp->addr = rte_cpu_to_le_64(dma_addr);
txdp->tx.send_size = rte_cpu_to_le_16((*pkts)->data_len);
txdp->tx.paylen = 0;
txdp->tx.type_cs_vlan_tso_len = 0;
txdp->tx.ol_type_vlan_len_msec = 0;
txdp->tx.tp_fe_sc_vld_ra_ri = rte_cpu_to_le_16(bd_flag);
}
static inline void
hns3_tx_fill_hw_ring(struct hns3_tx_queue *txq,
struct rte_mbuf **pkts,
uint16_t nb_pkts)
{
#define PER_LOOP_NUM 4
#define PER_LOOP_MASK (PER_LOOP_NUM - 1)
struct hns3_desc *txdp = &txq->tx_ring[txq->next_to_use];
struct hns3_entry *tx_entry = &txq->sw_ring[txq->next_to_use];
const uint32_t mainpart = (nb_pkts & ((uint32_t)~PER_LOOP_MASK));
const uint32_t leftover = (nb_pkts & ((uint32_t)PER_LOOP_MASK));
uint32_t i;
for (i = 0; i < mainpart; i += PER_LOOP_NUM) {
hns3_tx_backup_4mbuf(tx_entry + i, pkts + i);
hns3_tx_setup_4bd(txdp + i, pkts + i);
}
if (unlikely(leftover > 0)) {
for (i = 0; i < leftover; i++) {
hns3_tx_backup_1mbuf(tx_entry + mainpart + i,
pkts + mainpart + i);
hns3_tx_setup_1bd(txdp + mainpart + i,
pkts + mainpart + i);
}
}
}
uint16_t
hns3_xmit_pkts_simple(void *tx_queue,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
struct hns3_tx_queue *txq = tx_queue;
uint16_t nb_tx = 0;
hns3_tx_free_buffer_simple(txq);
nb_pkts = RTE_MIN(txq->tx_bd_ready, nb_pkts);
if (unlikely(nb_pkts == 0)) {
if (txq->tx_bd_ready == 0)
txq->queue_full_cnt++;
return 0;
}
txq->tx_bd_ready -= nb_pkts;
if (txq->next_to_use + nb_pkts > txq->nb_tx_desc) {
nb_tx = txq->nb_tx_desc - txq->next_to_use;
hns3_tx_fill_hw_ring(txq, tx_pkts, nb_tx);
txq->next_to_use = 0;
}
hns3_tx_fill_hw_ring(txq, tx_pkts + nb_tx, nb_pkts - nb_tx);
txq->next_to_use += nb_pkts - nb_tx;
hns3_write_reg_opt(txq->io_tail_reg, nb_pkts);
return nb_pkts;
}
uint16_t
hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
@ -2909,11 +3100,47 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
end_of_tx:
if (likely(nb_tx))
hns3_queue_xmit(txq, nb_hold);
hns3_write_reg_opt(txq->io_tail_reg, nb_hold);
return nb_tx;
}
int
hns3_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
struct rte_eth_burst_mode *mode)
{
eth_tx_burst_t pkt_burst = dev->tx_pkt_burst;
const char *info = NULL;
if (pkt_burst == hns3_xmit_pkts_simple)
info = "Scalar Simple";
else if (pkt_burst == hns3_xmit_pkts)
info = "Scalar";
if (info == NULL)
return -EINVAL;
snprintf(mode->info, sizeof(mode->info), "%s", info);
return 0;
}
static eth_tx_burst_t
hns3_get_tx_function(struct rte_eth_dev *dev, eth_tx_prep_t *prep)
{
uint64_t offloads = dev->data->dev_conf.txmode.offloads;
struct hns3_adapter *hns = dev->data->dev_private;
if (hns->tx_simple_allowed &&
offloads == (offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE)) {
*prep = NULL;
return hns3_xmit_pkts_simple;
}
*prep = hns3_prep_pkts;
return hns3_xmit_pkts;
}
static uint16_t
hns3_dummy_rxtx_burst(void *dpdk_txq __rte_unused,
struct rte_mbuf **pkts __rte_unused,
@ -2925,12 +3152,13 @@ hns3_dummy_rxtx_burst(void *dpdk_txq __rte_unused,
void hns3_set_rxtx_function(struct rte_eth_dev *eth_dev)
{
struct hns3_adapter *hns = eth_dev->data->dev_private;
eth_tx_prep_t prep = NULL;
if (hns->hw.adapter_state == HNS3_NIC_STARTED &&
rte_atomic16_read(&hns->hw.reset.resetting) == 0) {
eth_dev->rx_pkt_burst = hns3_get_rx_function(eth_dev);
eth_dev->tx_pkt_burst = hns3_xmit_pkts;
eth_dev->tx_pkt_prepare = hns3_prep_pkts;
eth_dev->tx_pkt_burst = hns3_get_tx_function(eth_dev, &prep);
eth_dev->tx_pkt_prepare = prep;
} else {
eth_dev->rx_pkt_burst = hns3_dummy_rxtx_burst;
eth_dev->tx_pkt_burst = hns3_dummy_rxtx_burst;
@ -2966,5 +3194,7 @@ hns3_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
qinfo->nb_desc = txq->nb_tx_desc;
qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads;
qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
}

View File

@ -13,6 +13,9 @@
#define HNS3_BULK_ALLOC_MBUF_NUM 32
#define HNS3_DEFAULT_RX_FREE_THRESH 32
#define HNS3_DEFAULT_TX_FREE_THRESH 32
#define HNS3_DEFAULT_TX_RS_THRESH 32
#define HNS3_TX_FAST_FREE_AHEAD 64
#define HNS3_512_BD_BUF_SIZE 512
#define HNS3_1K_BD_BUF_SIZE 1024
@ -282,6 +285,7 @@ struct hns3_rx_queue {
struct hns3_tx_queue {
void *io_base;
volatile void *io_tail_reg;
struct hns3_adapter *hns;
struct hns3_desc *tx_ring;
uint64_t tx_ring_phys_addr; /* TX ring DMA address */
@ -291,10 +295,32 @@ struct hns3_tx_queue {
uint16_t queue_id;
uint16_t port_id;
uint16_t nb_tx_desc;
/*
* index of next BD whose corresponding rte_mbuf can be released by
* driver.
*/
uint16_t next_to_clean;
/* index of next BD to be filled by driver to send packet */
uint16_t next_to_use;
/* num of remaining BDs ready to be filled by driver to send packet */
uint16_t tx_bd_ready;
/* threshold for free tx buffer if available BDs less than this value */
uint16_t tx_free_thresh;
/*
* For better performance in tx datapath, releasing mbuf in batches is
* required.
* Only checking the VLD bit of the last descriptor in a batch of the
* thresh descriptors does not mean that these descriptors are all sent
* by hardware successfully. So we need to check that the VLD bits of
* all descriptors are cleared. and then free all mbufs in the batch.
* - tx_rs_thresh
* Number of mbufs released at a time.
*/
uint16_t tx_rs_thresh;
/*
* port based vlan configuration state.
* value range: HNS3_PORT_BASE_VLAN_DISABLE / HNS3_PORT_BASE_VLAN_ENABLE
@ -360,6 +386,9 @@ struct hns3_tx_queue {
uint64_t pkt_padding_fail_cnt;
};
#define HNS3_GET_TX_QUEUE_PEND_BD_NUM(txq) \
((txq)->nb_tx_desc - 1 - (txq)->tx_bd_ready)
struct hns3_queue_info {
const char *type; /* point to queue memory name */
const char *ring_name; /* point to hardware ring name */
@ -525,8 +554,13 @@ int hns3_rx_burst_mode_get(struct rte_eth_dev *dev,
struct rte_eth_burst_mode *mode);
uint16_t hns3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
uint16_t hns3_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
uint16_t hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
int hns3_tx_burst_mode_get(struct rte_eth_dev *dev,
__rte_unused uint16_t queue_id,
struct rte_eth_burst_mode *mode);
const uint32_t *hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev);
void hns3_init_rx_ptype_tble(struct rte_eth_dev *dev);
void hns3_set_rxtx_function(struct rte_eth_dev *eth_dev);