vmxnet3: improve Rx performance
This patch includes two small performance optimizations on the rx path: (1) It adds unlikely hints on various infrequent error paths to the compiler to make branch prediction more efficient. (2) It also moves a constant assignment out of the pkt polling loop. This saves one branching per packet. Performance evaluation configs: - On the DPDK-side, it's running some l3 forwarding app inside a VM on ESXi with one core assigned for polling. - On the client side, pktgen/dpdk is used to generate 64B tcp packets at line rate (14.8M PPS). Performance results on a Nehalem box (4cores@2.8GHzx2) shown below. CPU usage is collected factoring out the idle loop cost. - Before the patch, ~900K PPS with 65% CPU of a core used for DPDK. - After the patch, only 45% of a core used, while maintaining the same packet rate. Signed-off-by: Yong Wang <yongwang@vmware.com>
This commit is contained in:
parent
d768f6273c
commit
14680e3747
@ -451,6 +451,19 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
|
||||
uint32_t i = 0, val = 0;
|
||||
struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
|
||||
|
||||
if (ring_id == 0) {
|
||||
/* Usually: One HEAD type buf per packet
|
||||
* val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
|
||||
* VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
|
||||
*/
|
||||
|
||||
/* We use single packet buffer so all heads here */
|
||||
val = VMXNET3_RXD_BTYPE_HEAD;
|
||||
} else {
|
||||
/* All BODY type buffers for 2nd ring */
|
||||
val = VMXNET3_RXD_BTYPE_BODY;
|
||||
}
|
||||
|
||||
while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
|
||||
struct Vmxnet3_RxDesc *rxd;
|
||||
struct rte_mbuf *mbuf;
|
||||
@ -458,22 +471,9 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
|
||||
|
||||
rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
|
||||
|
||||
if (ring->rid == 0) {
|
||||
/* Usually: One HEAD type buf per packet
|
||||
* val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
|
||||
* VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
|
||||
*/
|
||||
|
||||
/* We use single packet buffer so all heads here */
|
||||
val = VMXNET3_RXD_BTYPE_HEAD;
|
||||
} else {
|
||||
/* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */
|
||||
val = VMXNET3_RXD_BTYPE_BODY;
|
||||
}
|
||||
|
||||
/* Allocate blank mbuf for the current Rx Descriptor */
|
||||
mbuf = rte_rxmbuf_alloc(rxq->mp);
|
||||
if (mbuf == NULL) {
|
||||
if (unlikely(mbuf == NULL)) {
|
||||
PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
|
||||
rxq->stats.rx_buf_alloc_failure++;
|
||||
err = ENOMEM;
|
||||
@ -536,150 +536,140 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
|
||||
|
||||
rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
|
||||
|
||||
if (rxq->stopped) {
|
||||
if (unlikely(rxq->stopped)) {
|
||||
PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (rcd->gen == rxq->comp_ring.gen) {
|
||||
|
||||
if (nb_rx >= nb_pkts)
|
||||
break;
|
||||
|
||||
idx = rcd->rxdIdx;
|
||||
ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
|
||||
rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
|
||||
rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
|
||||
|
||||
if (rcd->sop != 1 || rcd->eop != 1) {
|
||||
if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
|
||||
rte_pktmbuf_free_seg(rbi->m);
|
||||
|
||||
PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
|
||||
goto rcd_done;
|
||||
}
|
||||
|
||||
PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
|
||||
|
||||
#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
|
||||
VMXNET3_ASSERT(rcd->len <= rxd->len);
|
||||
VMXNET3_ASSERT(rbi->m);
|
||||
#endif
|
||||
if (unlikely(rcd->len == 0)) {
|
||||
PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
|
||||
ring_idx, idx);
|
||||
#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
|
||||
VMXNET3_ASSERT(rcd->sop && rcd->eop);
|
||||
#endif
|
||||
rte_pktmbuf_free_seg(rbi->m);
|
||||
goto rcd_done;
|
||||
}
|
||||
|
||||
/* Assuming a packet is coming in a single packet buffer */
|
||||
if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
|
||||
PMD_RX_LOG(DEBUG,
|
||||
"Alert : Misbehaving device, incorrect "
|
||||
" buffer type used. iPacket dropped.");
|
||||
rte_pktmbuf_free_seg(rbi->m);
|
||||
goto rcd_done;
|
||||
}
|
||||
#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
|
||||
VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
|
||||
#endif
|
||||
/* Get the packet buffer pointer from buf_info */
|
||||
rxm = rbi->m;
|
||||
|
||||
/* Clear descriptor associated buf_info to be reused */
|
||||
rbi->m = NULL;
|
||||
rbi->bufPA = 0;
|
||||
|
||||
/* Update the index that we received a packet */
|
||||
rxq->cmd_ring[ring_idx].next2comp = idx;
|
||||
|
||||
/* For RCD with EOP set, check if there is frame error */
|
||||
if (unlikely(rcd->err)) {
|
||||
rxq->stats.drop_total++;
|
||||
rxq->stats.drop_err++;
|
||||
|
||||
if (!rcd->fcs) {
|
||||
rxq->stats.drop_fcs++;
|
||||
PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
|
||||
}
|
||||
PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
|
||||
(int)(rcd - (struct Vmxnet3_RxCompDesc *)
|
||||
rxq->comp_ring.base), rcd->rxdIdx);
|
||||
rte_pktmbuf_free_seg(rxm);
|
||||
goto rcd_done;
|
||||
}
|
||||
|
||||
/* Check for hardware stripped VLAN tag */
|
||||
if (rcd->ts) {
|
||||
PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.",
|
||||
rcd->tci);
|
||||
rxm->ol_flags = PKT_RX_VLAN_PKT;
|
||||
/* Copy vlan tag in packet buffer */
|
||||
rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
|
||||
} else {
|
||||
rxm->ol_flags = 0;
|
||||
rxm->vlan_tci = 0;
|
||||
}
|
||||
|
||||
PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
|
||||
/* Initialize newly received packet buffer */
|
||||
rxm->port = rxq->port_id;
|
||||
rxm->nb_segs = 1;
|
||||
rxm->next = NULL;
|
||||
rxm->pkt_len = (uint16_t)rcd->len;
|
||||
rxm->data_len = (uint16_t)rcd->len;
|
||||
rxm->data_off = RTE_PKTMBUF_HEADROOM;
|
||||
|
||||
#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
|
||||
VMXNET3_ASSERT(rcd->len <= rxd->len);
|
||||
VMXNET3_ASSERT(rbi->m);
|
||||
#endif
|
||||
if (rcd->len == 0) {
|
||||
PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
|
||||
ring_idx, idx);
|
||||
#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
|
||||
VMXNET3_ASSERT(rcd->sop && rcd->eop);
|
||||
#endif
|
||||
rte_pktmbuf_free_seg(rbi->m);
|
||||
/* Check packet type, checksum errors, etc. Only support IPv4 for now. */
|
||||
if (rcd->v4) {
|
||||
struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
|
||||
struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
|
||||
|
||||
goto rcd_done;
|
||||
if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
|
||||
rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
|
||||
else
|
||||
rxm->ol_flags |= PKT_RX_IPV4_HDR;
|
||||
|
||||
if (!rcd->cnc) {
|
||||
if (!rcd->ipc)
|
||||
rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
|
||||
|
||||
if ((rcd->tcp || rcd->udp) && !rcd->tuc)
|
||||
rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
|
||||
}
|
||||
}
|
||||
|
||||
/* Assuming a packet is coming in a single packet buffer */
|
||||
if (rxd->btype != VMXNET3_RXD_BTYPE_HEAD) {
|
||||
PMD_RX_LOG(DEBUG,
|
||||
"Alert : Misbehaving device, incorrect "
|
||||
" buffer type used. iPacket dropped.");
|
||||
rte_pktmbuf_free_seg(rbi->m);
|
||||
goto rcd_done;
|
||||
}
|
||||
#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
|
||||
VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
|
||||
#endif
|
||||
/* Get the packet buffer pointer from buf_info */
|
||||
rxm = rbi->m;
|
||||
|
||||
/* Clear descriptor associated buf_info to be reused */
|
||||
rbi->m = NULL;
|
||||
rbi->bufPA = 0;
|
||||
|
||||
/* Update the index that we received a packet */
|
||||
rxq->cmd_ring[ring_idx].next2comp = idx;
|
||||
|
||||
/* For RCD with EOP set, check if there is frame error */
|
||||
if (rcd->err) {
|
||||
rxq->stats.drop_total++;
|
||||
rxq->stats.drop_err++;
|
||||
|
||||
if (!rcd->fcs) {
|
||||
rxq->stats.drop_fcs++;
|
||||
PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
|
||||
}
|
||||
PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
|
||||
(int)(rcd - (struct Vmxnet3_RxCompDesc *)
|
||||
rxq->comp_ring.base), rcd->rxdIdx);
|
||||
rte_pktmbuf_free_seg(rxm);
|
||||
|
||||
goto rcd_done;
|
||||
}
|
||||
|
||||
/* Check for hardware stripped VLAN tag */
|
||||
if (rcd->ts) {
|
||||
PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.",
|
||||
rcd->tci);
|
||||
rxm->ol_flags = PKT_RX_VLAN_PKT;
|
||||
#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
|
||||
VMXNET3_ASSERT(rxm &&
|
||||
rte_pktmbuf_mtod(rxm, void *));
|
||||
#endif
|
||||
/* Copy vlan tag in packet buffer */
|
||||
rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
|
||||
} else {
|
||||
rxm->ol_flags = 0;
|
||||
rxm->vlan_tci = 0;
|
||||
}
|
||||
|
||||
/* Initialize newly received packet buffer */
|
||||
rxm->port = rxq->port_id;
|
||||
rxm->nb_segs = 1;
|
||||
rxm->next = NULL;
|
||||
rxm->pkt_len = (uint16_t)rcd->len;
|
||||
rxm->data_len = (uint16_t)rcd->len;
|
||||
rxm->port = rxq->port_id;
|
||||
rxm->data_off = RTE_PKTMBUF_HEADROOM;
|
||||
|
||||
/* Check packet types, rx checksum errors, etc. Only support IPv4 so far. */
|
||||
if (rcd->v4) {
|
||||
struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
|
||||
struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
|
||||
|
||||
if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
|
||||
rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
|
||||
else
|
||||
rxm->ol_flags |= PKT_RX_IPV4_HDR;
|
||||
|
||||
if (!rcd->cnc) {
|
||||
if (!rcd->ipc)
|
||||
rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
|
||||
|
||||
if ((rcd->tcp || rcd->udp) && !rcd->tuc)
|
||||
rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
|
||||
}
|
||||
}
|
||||
|
||||
rx_pkts[nb_rx++] = rxm;
|
||||
rx_pkts[nb_rx++] = rxm;
|
||||
rcd_done:
|
||||
rxq->cmd_ring[ring_idx].next2comp = idx;
|
||||
VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
|
||||
rxq->cmd_ring[ring_idx].next2comp = idx;
|
||||
VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
|
||||
|
||||
/* It's time to allocate some new buf and renew descriptors */
|
||||
vmxnet3_post_rx_bufs(rxq, ring_idx);
|
||||
if (unlikely(rxq->shared->ctrl.updateRxProd)) {
|
||||
VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
|
||||
rxq->cmd_ring[ring_idx].next2fill);
|
||||
}
|
||||
/* It's time to allocate some new buf and renew descriptors */
|
||||
vmxnet3_post_rx_bufs(rxq, ring_idx);
|
||||
if (unlikely(rxq->shared->ctrl.updateRxProd)) {
|
||||
VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
|
||||
rxq->cmd_ring[ring_idx].next2fill);
|
||||
}
|
||||
|
||||
/* Advance to the next descriptor in comp_ring */
|
||||
vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
|
||||
/* Advance to the next descriptor in comp_ring */
|
||||
vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
|
||||
|
||||
rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
|
||||
nb_rxd++;
|
||||
if (nb_rxd > rxq->cmd_ring[0].size) {
|
||||
PMD_RX_LOG(ERR,
|
||||
"Used up quota of receiving packets,"
|
||||
" relinquish control.");
|
||||
break;
|
||||
}
|
||||
rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
|
||||
nb_rxd++;
|
||||
if (nb_rxd > rxq->cmd_ring[0].size) {
|
||||
PMD_RX_LOG(ERR,
|
||||
"Used up quota of receiving packets,"
|
||||
" relinquish control.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user