virtio: mergeable buffers
This patch supports mergeable buffer feature in DPDK based virtio PMD, which can receive jumbo frame with larger size, like 3K, 4K or even 9K. Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com> Acked-by: Huawei Xie <huawei.xie@intel.com> Tested-by: Jingguo Fu <jingguox.fu@intel.com>
This commit is contained in:
parent
3f6a696f10
commit
13ce5e7eb9
@ -337,7 +337,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
|
||||
snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
|
||||
dev->data->port_id, queue_idx);
|
||||
vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
|
||||
vq_size * sizeof(struct virtio_net_hdr),
|
||||
vq_size * hw->vtnet_hdr_size,
|
||||
socket_id, 0, CACHE_LINE_SIZE);
|
||||
if (vq->virtio_net_hdr_mz == NULL) {
|
||||
rte_free(vq);
|
||||
@ -346,7 +346,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
|
||||
vq->virtio_net_hdr_mem =
|
||||
vq->virtio_net_hdr_mz->phys_addr;
|
||||
memset(vq->virtio_net_hdr_mz->addr, 0,
|
||||
vq_size * sizeof(struct virtio_net_hdr));
|
||||
vq_size * hw->vtnet_hdr_size);
|
||||
} else if (queue_type == VTNET_CQ) {
|
||||
/* Allocate a page for control vq command, data and status */
|
||||
snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone",
|
||||
@ -571,9 +571,6 @@ virtio_negotiate_features(struct virtio_hw *hw)
|
||||
mask |= VIRTIO_NET_F_GUEST_TSO4 | VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN;
|
||||
mask |= VTNET_LRO_FEATURES;
|
||||
|
||||
/* rx_mbuf should not be in multiple merged segments */
|
||||
mask |= VIRTIO_NET_F_MRG_RXBUF;
|
||||
|
||||
/* not negotiating INDIRECT descriptor table support */
|
||||
mask |= VIRTIO_RING_F_INDIRECT_DESC;
|
||||
|
||||
@ -746,7 +743,6 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
|
||||
}
|
||||
|
||||
eth_dev->dev_ops = &virtio_eth_dev_ops;
|
||||
eth_dev->rx_pkt_burst = &virtio_recv_pkts;
|
||||
eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
|
||||
|
||||
if (rte_eal_process_type() == RTE_PROC_SECONDARY)
|
||||
@ -801,10 +797,13 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
|
||||
virtio_negotiate_features(hw);
|
||||
|
||||
/* Setting up rx_header size for the device */
|
||||
if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
|
||||
if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
|
||||
eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
|
||||
hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
||||
else
|
||||
} else {
|
||||
eth_dev->rx_pkt_burst = &virtio_recv_pkts;
|
||||
hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
|
||||
}
|
||||
|
||||
/* Allocate memory for storing MAC addresses */
|
||||
eth_dev->data->mac_addrs = rte_zmalloc("virtio", ETHER_ADDR_LEN, 0);
|
||||
@ -1009,7 +1008,7 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
|
||||
|
||||
while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
|
||||
dev->data->rx_queues[i])) != NULL) {
|
||||
rte_pktmbuf_free_seg(buf);
|
||||
rte_pktmbuf_free(buf);
|
||||
mbuf_num++;
|
||||
}
|
||||
|
||||
@ -1028,7 +1027,8 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
|
||||
mbuf_num = 0;
|
||||
while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
|
||||
dev->data->tx_queues[i])) != NULL) {
|
||||
rte_pktmbuf_free_seg(buf);
|
||||
rte_pktmbuf_free(buf);
|
||||
|
||||
mbuf_num++;
|
||||
}
|
||||
|
||||
|
@ -104,6 +104,9 @@ int virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
|
||||
uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
|
||||
uint16_t nb_pkts);
|
||||
|
||||
uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
|
||||
uint16_t nb_pkts);
|
||||
|
||||
uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
|
||||
uint16_t nb_pkts);
|
||||
|
||||
|
@ -146,6 +146,7 @@ static inline int
|
||||
virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
|
||||
{
|
||||
struct vq_desc_extra *dxp;
|
||||
struct virtio_hw *hw = vq->hw;
|
||||
struct vring_desc *start_dp;
|
||||
uint16_t needed = 1;
|
||||
uint16_t head_idx, idx;
|
||||
@ -165,9 +166,11 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
|
||||
dxp->ndescs = needed;
|
||||
|
||||
start_dp = vq->vq_ring.desc;
|
||||
start_dp[idx].addr =
|
||||
(uint64_t) (cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
|
||||
start_dp[idx].len = cookie->buf_len - RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr);
|
||||
start_dp[idx].addr =
|
||||
(uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
|
||||
- hw->vtnet_hdr_size);
|
||||
start_dp[idx].len =
|
||||
cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
|
||||
start_dp[idx].flags = VRING_DESC_F_WRITE;
|
||||
idx = start_dp[idx].next;
|
||||
vq->vq_desc_head_idx = idx;
|
||||
@ -184,8 +187,10 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
|
||||
{
|
||||
struct vq_desc_extra *dxp;
|
||||
struct vring_desc *start_dp;
|
||||
uint16_t needed = 2;
|
||||
uint16_t seg_num = cookie->pkt.nb_segs;
|
||||
uint16_t needed = 1 + seg_num;
|
||||
uint16_t head_idx, idx;
|
||||
uint16_t head_size = txvq->hw->vtnet_hdr_size;
|
||||
|
||||
if (unlikely(txvq->vq_free_cnt == 0))
|
||||
return -ENOSPC;
|
||||
@ -198,19 +203,25 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
|
||||
idx = head_idx;
|
||||
dxp = &txvq->vq_descx[idx];
|
||||
if (dxp->cookie != NULL)
|
||||
rte_pktmbuf_free_seg(dxp->cookie);
|
||||
rte_pktmbuf_free(dxp->cookie);
|
||||
dxp->cookie = (void *)cookie;
|
||||
dxp->ndescs = needed;
|
||||
|
||||
start_dp = txvq->vq_ring.desc;
|
||||
start_dp[idx].addr =
|
||||
txvq->virtio_net_hdr_mem + idx * sizeof(struct virtio_net_hdr);
|
||||
start_dp[idx].len = sizeof(struct virtio_net_hdr);
|
||||
start_dp[idx].addr =
|
||||
txvq->virtio_net_hdr_mem + idx * head_size;
|
||||
start_dp[idx].len = (uint32_t)head_size;
|
||||
start_dp[idx].flags = VRING_DESC_F_NEXT;
|
||||
idx = start_dp[idx].next;
|
||||
start_dp[idx].addr = RTE_MBUF_DATA_DMA_ADDR(cookie);
|
||||
start_dp[idx].len = cookie->pkt.data_len;
|
||||
start_dp[idx].flags = 0;
|
||||
|
||||
for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
|
||||
idx = start_dp[idx].next;
|
||||
start_dp[idx].addr = RTE_MBUF_DATA_DMA_ADDR(cookie);
|
||||
start_dp[idx].len = cookie->pkt.data_len;
|
||||
start_dp[idx].flags = VRING_DESC_F_NEXT;
|
||||
cookie = cookie->pkt.next;
|
||||
}
|
||||
|
||||
start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
|
||||
idx = start_dp[idx].next;
|
||||
txvq->vq_desc_head_idx = idx;
|
||||
if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
|
||||
@ -284,7 +295,7 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
|
||||
error = virtqueue_enqueue_recv_refill(vq, m);
|
||||
|
||||
if (error) {
|
||||
rte_pktmbuf_free_seg(m);
|
||||
rte_pktmbuf_free(m);
|
||||
break;
|
||||
}
|
||||
nbufs++;
|
||||
@ -423,7 +434,7 @@ virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
|
||||
error = virtqueue_enqueue_recv_refill(vq, m);
|
||||
if (unlikely(error)) {
|
||||
RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
|
||||
rte_pktmbuf_free_seg(m);
|
||||
rte_pktmbuf_free(m);
|
||||
}
|
||||
}
|
||||
|
||||
@ -433,13 +444,13 @@ uint16_t
|
||||
virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
|
||||
{
|
||||
struct virtqueue *rxvq = rx_queue;
|
||||
struct virtio_hw *hw = rxvq->hw;
|
||||
struct rte_mbuf *rxm, *new_mbuf;
|
||||
uint16_t nb_used, num, nb_rx = 0;
|
||||
uint32_t len[VIRTIO_MBUF_BURST_SZ];
|
||||
struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
|
||||
int error;
|
||||
uint32_t i, nb_enqueued = 0;
|
||||
const uint32_t hdr_size = sizeof(struct virtio_net_hdr);
|
||||
|
||||
nb_used = VIRTQUEUE_NUSED(rxvq);
|
||||
|
||||
@ -460,8 +471,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
|
||||
|
||||
PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
|
||||
|
||||
if (unlikely(len[i]
|
||||
< (uint32_t)hw->vtnet_hdr_size + ETHER_HDR_LEN)) {
|
||||
if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
|
||||
PMD_RX_LOG(ERR, "Packet drop");
|
||||
nb_enqueued++;
|
||||
virtio_discard_rxbuf(rxvq, rxm);
|
||||
@ -471,17 +481,16 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
|
||||
|
||||
rxm->pkt.in_port = rxvq->port_id;
|
||||
rxm->pkt.data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
|
||||
|
||||
rxm->pkt.nb_segs = 1;
|
||||
rxm->pkt.next = NULL;
|
||||
rxm->pkt.pkt_len = (uint32_t)(len[i]
|
||||
- sizeof(struct virtio_net_hdr));
|
||||
rxm->pkt.data_len = (uint16_t)(len[i]
|
||||
- sizeof(struct virtio_net_hdr));
|
||||
rxm->pkt.pkt_len = (uint32_t)(len[i] - hdr_size);
|
||||
rxm->pkt.data_len = (uint16_t)(len[i] - hdr_size);
|
||||
|
||||
VIRTIO_DUMP_PACKET(rxm, rxm->pkt.data_len);
|
||||
|
||||
rx_pkts[nb_rx++] = rxm;
|
||||
rxvq->bytes += len[i] - sizeof(struct virtio_net_hdr);
|
||||
rxvq->bytes += rx_pkts[nb_rx - 1]->pkt.pkt_len;
|
||||
}
|
||||
|
||||
rxvq->packets += nb_rx;
|
||||
@ -498,11 +507,165 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
|
||||
}
|
||||
error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
|
||||
if (unlikely(error)) {
|
||||
rte_pktmbuf_free_seg(new_mbuf);
|
||||
rte_pktmbuf_free(new_mbuf);
|
||||
break;
|
||||
}
|
||||
nb_enqueued++;
|
||||
}
|
||||
|
||||
if (likely(nb_enqueued)) {
|
||||
if (unlikely(virtqueue_kick_prepare(rxvq))) {
|
||||
virtqueue_notify(rxvq);
|
||||
PMD_RX_LOG(DEBUG, "Notified\n");
|
||||
}
|
||||
}
|
||||
|
||||
vq_update_avail_idx(rxvq);
|
||||
|
||||
return nb_rx;
|
||||
}
|
||||
|
||||
uint16_t
|
||||
virtio_recv_mergeable_pkts(void *rx_queue,
|
||||
struct rte_mbuf **rx_pkts,
|
||||
uint16_t nb_pkts)
|
||||
{
|
||||
struct virtqueue *rxvq = rx_queue;
|
||||
struct rte_mbuf *rxm, *new_mbuf;
|
||||
uint16_t nb_used, num, nb_rx = 0;
|
||||
uint32_t len[VIRTIO_MBUF_BURST_SZ];
|
||||
struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
|
||||
struct rte_mbuf *prev;
|
||||
int error;
|
||||
uint32_t i = 0, nb_enqueued = 0;
|
||||
uint32_t seg_num = 0;
|
||||
uint16_t extra_idx = 0;
|
||||
uint32_t seg_res = 0;
|
||||
const uint32_t hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
||||
|
||||
nb_used = VIRTQUEUE_NUSED(rxvq);
|
||||
|
||||
rmb();
|
||||
|
||||
if (nb_used == 0)
|
||||
return 0;
|
||||
|
||||
PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
|
||||
|
||||
while (i < nb_used) {
|
||||
struct virtio_net_hdr_mrg_rxbuf *header;
|
||||
|
||||
if (nb_rx == nb_pkts)
|
||||
break;
|
||||
|
||||
num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
|
||||
if (num != 1)
|
||||
continue;
|
||||
|
||||
i++;
|
||||
|
||||
PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
|
||||
PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
|
||||
|
||||
rxm = rcv_pkts[0];
|
||||
|
||||
if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
|
||||
PMD_RX_LOG(ERR, "Packet drop\n");
|
||||
nb_enqueued++;
|
||||
virtio_discard_rxbuf(rxvq, rxm);
|
||||
rxvq->errors++;
|
||||
continue;
|
||||
}
|
||||
|
||||
header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
|
||||
RTE_PKTMBUF_HEADROOM - hdr_size);
|
||||
seg_num = header->num_buffers;
|
||||
|
||||
if (seg_num == 0)
|
||||
seg_num = 1;
|
||||
|
||||
rxm->pkt.data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
|
||||
rxm->pkt.nb_segs = seg_num;
|
||||
rxm->pkt.next = NULL;
|
||||
rxm->pkt.pkt_len = (uint32_t)(len[0] - hdr_size);
|
||||
rxm->pkt.data_len = (uint16_t)(len[0] - hdr_size);
|
||||
|
||||
rxm->pkt.in_port = rxvq->port_id;
|
||||
rx_pkts[nb_rx] = rxm;
|
||||
prev = rxm;
|
||||
|
||||
seg_res = seg_num - 1;
|
||||
|
||||
while (seg_res != 0) {
|
||||
/*
|
||||
* Get extra segments for current uncompleted packet.
|
||||
*/
|
||||
uint32_t rcv_cnt =
|
||||
RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
|
||||
if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
|
||||
uint32_t rx_num =
|
||||
virtqueue_dequeue_burst_rx(rxvq,
|
||||
rcv_pkts, len, rcv_cnt);
|
||||
i += rx_num;
|
||||
rcv_cnt = rx_num;
|
||||
} else {
|
||||
PMD_RX_LOG(ERR,
|
||||
"No enough segments for packet.\n");
|
||||
nb_enqueued++;
|
||||
virtio_discard_rxbuf(rxvq, rxm);
|
||||
rxvq->errors++;
|
||||
break;
|
||||
}
|
||||
|
||||
extra_idx = 0;
|
||||
|
||||
while (extra_idx < rcv_cnt) {
|
||||
rxm = rcv_pkts[extra_idx];
|
||||
|
||||
rxm->pkt.data =
|
||||
(char *)rxm->buf_addr +
|
||||
RTE_PKTMBUF_HEADROOM - hdr_size;
|
||||
rxm->pkt.next = NULL;
|
||||
rxm->pkt.pkt_len = (uint32_t)(len[extra_idx]);
|
||||
rxm->pkt.data_len = (uint16_t)(len[extra_idx]);
|
||||
|
||||
if (prev)
|
||||
prev->pkt.next = rxm;
|
||||
|
||||
prev = rxm;
|
||||
rx_pkts[nb_rx]->pkt.pkt_len += rxm->pkt.pkt_len;
|
||||
extra_idx++;
|
||||
};
|
||||
seg_res -= rcv_cnt;
|
||||
}
|
||||
|
||||
VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
|
||||
rx_pkts[nb_rx]->pkt.data_len);
|
||||
|
||||
rxvq->bytes += rx_pkts[nb_rx]->pkt.pkt_len;
|
||||
nb_rx++;
|
||||
}
|
||||
|
||||
rxvq->packets += nb_rx;
|
||||
|
||||
/* Allocate new mbuf for the used descriptor */
|
||||
error = ENOSPC;
|
||||
while (likely(!virtqueue_full(rxvq))) {
|
||||
new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
|
||||
if (unlikely(new_mbuf == NULL)) {
|
||||
struct rte_eth_dev *dev
|
||||
= &rte_eth_devices[rxvq->port_id];
|
||||
dev->data->rx_mbuf_alloc_failed++;
|
||||
break;
|
||||
}
|
||||
error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
|
||||
if (unlikely(error)) {
|
||||
rte_pktmbuf_free(new_mbuf);
|
||||
break;
|
||||
}
|
||||
nb_enqueued++;
|
||||
}
|
||||
|
||||
if (likely(nb_enqueued)) {
|
||||
if (unlikely(virtqueue_kick_prepare(rxvq))) {
|
||||
virtqueue_notify(rxvq);
|
||||
@ -536,12 +699,16 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
|
||||
num = (uint16_t)(likely(nb_used < VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ);
|
||||
|
||||
while (nb_tx < nb_pkts) {
|
||||
if (virtqueue_full(txvq) && num) {
|
||||
int need = tx_pkts[nb_tx]->pkt.nb_segs - txvq->vq_free_cnt;
|
||||
int deq_cnt = RTE_MIN(need, (int)num);
|
||||
|
||||
num -= (deq_cnt > 0) ? deq_cnt : 0;
|
||||
while (deq_cnt > 0) {
|
||||
virtqueue_dequeue_pkt_tx(txvq);
|
||||
num--;
|
||||
deq_cnt--;
|
||||
}
|
||||
|
||||
if (!virtqueue_full(txvq)) {
|
||||
if (tx_pkts[nb_tx]->pkt.nb_segs <= txvq->vq_free_cnt) {
|
||||
txm = tx_pkts[nb_tx];
|
||||
/* Enqueue Packet buffers */
|
||||
error = virtqueue_enqueue_xmit(txvq, txm);
|
||||
@ -555,7 +722,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
|
||||
break;
|
||||
}
|
||||
nb_tx++;
|
||||
txvq->bytes += txm->pkt.data_len;
|
||||
txvq->bytes += txm->pkt.pkt_len;
|
||||
} else {
|
||||
PMD_TX_LOG(ERR, "No free tx descriptors to transmit");
|
||||
break;
|
||||
|
Loading…
x
Reference in New Issue
Block a user