net/mana: send packets

With all the TX queues created, MANA can send packets over those queues.

Signed-off-by: Long Li <longli@microsoft.com>
This commit is contained in:
Long Li 2022-10-05 16:22:05 -07:00 committed by Ferruh Yigit
parent eb9994dd16
commit 7f322844f4
3 changed files with 316 additions and 0 deletions

View File

@ -56,6 +56,47 @@ struct mana_shared_data {
#define NOT_USING_CLIENT_DATA_UNIT 0
enum tx_packet_format_v2 {
SHORT_PACKET_FORMAT = 0,
LONG_PACKET_FORMAT = 1
};
struct transmit_short_oob_v2 {
enum tx_packet_format_v2 packet_format : 2;
uint32_t tx_is_outer_ipv4 : 1;
uint32_t tx_is_outer_ipv6 : 1;
uint32_t tx_compute_IP_header_checksum : 1;
uint32_t tx_compute_TCP_checksum : 1;
uint32_t tx_compute_UDP_checksum : 1;
uint32_t suppress_tx_CQE_generation : 1;
uint32_t VCQ_number : 24;
uint32_t tx_transport_header_offset : 10;
uint32_t VSQ_frame_num : 14;
uint32_t short_vport_offset : 8;
};
struct transmit_long_oob_v2 {
uint32_t tx_is_encapsulated_packet : 1;
uint32_t tx_inner_is_ipv6 : 1;
uint32_t tx_inner_TCP_options_present : 1;
uint32_t inject_vlan_prior_tag : 1;
uint32_t reserved1 : 12;
uint32_t priority_code_point : 3;
uint32_t drop_eligible_indicator : 1;
uint32_t vlan_identifier : 12;
uint32_t tx_inner_frame_offset : 10;
uint32_t tx_inner_IP_header_relative_offset : 6;
uint32_t long_vport_offset : 12;
uint32_t reserved3 : 4;
uint32_t reserved4 : 32;
uint32_t reserved5 : 32;
};
struct transmit_oob_v2 {
struct transmit_short_oob_v2 short_oob;
struct transmit_long_oob_v2 long_oob;
};
enum gdma_queue_types {
GDMA_QUEUE_TYPE_INVALID = 0,
GDMA_QUEUE_SEND,
@ -177,6 +218,17 @@ enum mana_cqe_type {
CQE_RX_COALESCED_4 = 2,
CQE_RX_OBJECT_FENCE = 3,
CQE_RX_TRUNCATED = 4,
CQE_TX_OKAY = 32,
CQE_TX_SA_DROP = 33,
CQE_TX_MTU_DROP = 34,
CQE_TX_INVALID_OOB = 35,
CQE_TX_INVALID_ETH_TYPE = 36,
CQE_TX_HDR_PROCESSING_ERROR = 37,
CQE_TX_VF_DISABLED = 38,
CQE_TX_VPORT_IDX_OUT_OF_RANGE = 39,
CQE_TX_VPORT_DISABLED = 40,
CQE_TX_VLAN_TAGGING_VIOLATION = 41,
};
struct mana_cqe_header {
@ -185,6 +237,17 @@ struct mana_cqe_header {
uint32_t vendor_err : 24;
}; /* HW DATA */
struct mana_tx_comp_oob {
struct mana_cqe_header cqe_hdr;
uint32_t tx_data_offset;
uint32_t tx_sgl_offset : 5;
uint32_t tx_wqe_offset : 27;
uint32_t reserved[12];
}; /* HW DATA */
/* NDIS HASH Types */
#define NDIS_HASH_IPV4 RTE_BIT32(0)
#define NDIS_HASH_TCP_IPV4 RTE_BIT32(1)
@ -348,6 +411,7 @@ struct mana_txq {
uint32_t desc_ring_head, desc_ring_tail;
struct mana_mr_btree mr_btree;
struct mana_stats stats;
unsigned int socket;
};
@ -399,6 +463,8 @@ uint8_t *gdma_get_wqe_pointer(struct mana_gdma_queue *queue);
uint16_t mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **rx_pkts,
uint16_t pkts_n);
uint16_t mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts,
uint16_t pkts_n);
uint16_t mana_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
uint16_t pkts_n);

View File

@ -141,6 +141,7 @@ mana_mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
case MANA_MP_REQ_START_RXTX:
DRV_LOG(INFO, "Port %u starting datapath", dev->data->port_id);
dev->tx_pkt_burst = mana_tx_burst;
dev->rx_pkt_burst = mana_rx_burst;
rte_mb();

View File

@ -164,3 +164,252 @@ get_vsq_frame_num(uint32_t vsq)
v.gdma_txq_id = vsq;
return v.vsq_frame;
}
uint16_t
mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
struct mana_txq *txq = dpdk_txq;
struct mana_priv *priv = txq->priv;
struct gdma_comp comp;
int ret;
void *db_page;
uint16_t pkt_sent = 0;
/* Process send completions from GDMA */
while (gdma_poll_completion_queue(&txq->gdma_cq, &comp) == 1) {
struct mana_txq_desc *desc =
&txq->desc_ring[txq->desc_ring_tail];
struct mana_tx_comp_oob *oob =
(struct mana_tx_comp_oob *)&comp.completion_data[0];
if (oob->cqe_hdr.cqe_type != CQE_TX_OKAY) {
DRV_LOG(ERR,
"mana_tx_comp_oob cqe_type %u vendor_err %u",
oob->cqe_hdr.cqe_type, oob->cqe_hdr.vendor_err);
txq->stats.errors++;
} else {
DRV_LOG(DEBUG, "mana_tx_comp_oob CQE_TX_OKAY");
txq->stats.packets++;
}
if (!desc->pkt) {
DRV_LOG(ERR, "mana_txq_desc has a NULL pkt");
} else {
txq->stats.bytes += desc->pkt->data_len;
rte_pktmbuf_free(desc->pkt);
}
desc->pkt = NULL;
txq->desc_ring_tail = (txq->desc_ring_tail + 1) % txq->num_desc;
txq->gdma_sq.tail += desc->wqe_size_in_bu;
}
/* Post send requests to GDMA */
for (uint16_t pkt_idx = 0; pkt_idx < nb_pkts; pkt_idx++) {
struct rte_mbuf *m_pkt = tx_pkts[pkt_idx];
struct rte_mbuf *m_seg = m_pkt;
struct transmit_oob_v2 tx_oob = {0};
struct one_sgl sgl = {0};
uint16_t seg_idx;
/* Drop the packet if it exceeds max segments */
if (m_pkt->nb_segs > priv->max_send_sge) {
DRV_LOG(ERR, "send packet segments %d exceeding max",
m_pkt->nb_segs);
continue;
}
/* Fill in the oob */
tx_oob.short_oob.packet_format = SHORT_PACKET_FORMAT;
tx_oob.short_oob.tx_is_outer_ipv4 =
m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4 ? 1 : 0;
tx_oob.short_oob.tx_is_outer_ipv6 =
m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6 ? 1 : 0;
tx_oob.short_oob.tx_compute_IP_header_checksum =
m_pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM ? 1 : 0;
if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
RTE_MBUF_F_TX_TCP_CKSUM) {
struct rte_tcp_hdr *tcp_hdr;
/* HW needs partial TCP checksum */
tcp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
struct rte_tcp_hdr *,
m_pkt->l2_len + m_pkt->l3_len);
if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
struct rte_ipv4_hdr *ip_hdr;
ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
struct rte_ipv4_hdr *,
m_pkt->l2_len);
tcp_hdr->cksum = rte_ipv4_phdr_cksum(ip_hdr,
m_pkt->ol_flags);
} else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
struct rte_ipv6_hdr *ip_hdr;
ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
struct rte_ipv6_hdr *,
m_pkt->l2_len);
tcp_hdr->cksum = rte_ipv6_phdr_cksum(ip_hdr,
m_pkt->ol_flags);
} else {
DRV_LOG(ERR, "Invalid input for TCP CKSUM");
}
tx_oob.short_oob.tx_compute_TCP_checksum = 1;
tx_oob.short_oob.tx_transport_header_offset =
m_pkt->l2_len + m_pkt->l3_len;
}
if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
RTE_MBUF_F_TX_UDP_CKSUM) {
struct rte_udp_hdr *udp_hdr;
/* HW needs partial UDP checksum */
udp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
struct rte_udp_hdr *,
m_pkt->l2_len + m_pkt->l3_len);
if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
struct rte_ipv4_hdr *ip_hdr;
ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
struct rte_ipv4_hdr *,
m_pkt->l2_len);
udp_hdr->dgram_cksum =
rte_ipv4_phdr_cksum(ip_hdr,
m_pkt->ol_flags);
} else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
struct rte_ipv6_hdr *ip_hdr;
ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
struct rte_ipv6_hdr *,
m_pkt->l2_len);
udp_hdr->dgram_cksum =
rte_ipv6_phdr_cksum(ip_hdr,
m_pkt->ol_flags);
} else {
DRV_LOG(ERR, "Invalid input for UDP CKSUM");
}
tx_oob.short_oob.tx_compute_UDP_checksum = 1;
}
tx_oob.short_oob.suppress_tx_CQE_generation = 0;
tx_oob.short_oob.VCQ_number = txq->gdma_cq.id;
tx_oob.short_oob.VSQ_frame_num =
get_vsq_frame_num(txq->gdma_sq.id);
tx_oob.short_oob.short_vport_offset = txq->tx_vp_offset;
DRV_LOG(DEBUG, "tx_oob packet_format %u ipv4 %u ipv6 %u",
tx_oob.short_oob.packet_format,
tx_oob.short_oob.tx_is_outer_ipv4,
tx_oob.short_oob.tx_is_outer_ipv6);
DRV_LOG(DEBUG, "tx_oob checksum ip %u tcp %u udp %u offset %u",
tx_oob.short_oob.tx_compute_IP_header_checksum,
tx_oob.short_oob.tx_compute_TCP_checksum,
tx_oob.short_oob.tx_compute_UDP_checksum,
tx_oob.short_oob.tx_transport_header_offset);
DRV_LOG(DEBUG, "pkt[%d]: buf_addr 0x%p, nb_segs %d, pkt_len %d",
pkt_idx, m_pkt->buf_addr, m_pkt->nb_segs,
m_pkt->pkt_len);
/* Create SGL for packet data buffers */
for (seg_idx = 0; seg_idx < m_pkt->nb_segs; seg_idx++) {
struct mana_mr_cache *mr =
mana_find_pmd_mr(&txq->mr_btree, priv, m_seg);
if (!mr) {
DRV_LOG(ERR, "failed to get MR, pkt_idx %u",
pkt_idx);
break;
}
sgl.gdma_sgl[seg_idx].address =
rte_cpu_to_le_64(rte_pktmbuf_mtod(m_seg,
uint64_t));
sgl.gdma_sgl[seg_idx].size = m_seg->data_len;
sgl.gdma_sgl[seg_idx].memory_key = mr->lkey;
DRV_LOG(DEBUG,
"seg idx %u addr 0x%" PRIx64 " size %x key %x",
seg_idx, sgl.gdma_sgl[seg_idx].address,
sgl.gdma_sgl[seg_idx].size,
sgl.gdma_sgl[seg_idx].memory_key);
m_seg = m_seg->next;
}
/* Skip this packet if we can't populate all segments */
if (seg_idx != m_pkt->nb_segs)
continue;
struct gdma_work_request work_req = {0};
struct gdma_posted_wqe_info wqe_info = {0};
work_req.gdma_header.struct_size = sizeof(work_req);
wqe_info.gdma_header.struct_size = sizeof(wqe_info);
work_req.sgl = sgl.gdma_sgl;
work_req.num_sgl_elements = m_pkt->nb_segs;
work_req.inline_oob_size_in_bytes =
sizeof(struct transmit_short_oob_v2);
work_req.inline_oob_data = &tx_oob;
work_req.flags = 0;
work_req.client_data_unit = NOT_USING_CLIENT_DATA_UNIT;
ret = gdma_post_work_request(&txq->gdma_sq, &work_req,
&wqe_info);
if (!ret) {
struct mana_txq_desc *desc =
&txq->desc_ring[txq->desc_ring_head];
/* Update queue for tracking pending requests */
desc->pkt = m_pkt;
desc->wqe_size_in_bu = wqe_info.wqe_size_in_bu;
txq->desc_ring_head =
(txq->desc_ring_head + 1) % txq->num_desc;
pkt_sent++;
DRV_LOG(DEBUG, "nb_pkts %u pkt[%d] sent",
nb_pkts, pkt_idx);
} else {
DRV_LOG(INFO, "pkt[%d] failed to post send ret %d",
pkt_idx, ret);
break;
}
}
/* Ring hardware door bell */
db_page = priv->db_page;
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
struct rte_eth_dev *dev =
&rte_eth_devices[priv->dev_data->port_id];
struct mana_process_priv *process_priv = dev->process_private;
db_page = process_priv->db_page;
}
if (pkt_sent) {
ret = mana_ring_doorbell(db_page, GDMA_QUEUE_SEND,
txq->gdma_sq.id,
txq->gdma_sq.head *
GDMA_WQE_ALIGNMENT_UNIT_SIZE);
if (ret)
DRV_LOG(ERR, "mana_ring_doorbell failed ret %d", ret);
}
return pkt_sent;
}