mbuf: add prefetch helpers

Some architectures (ex: Power8) have a cache line size of 128 bytes,
so the drivers should not expect that prefetching the second part of
the mbuf with rte_prefetch0(&m->cacheline1) is valid.

This commit add helpers that can be used by drivers to prefetch the
rx or tx part of the mbuf, whatever the cache line size.

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
Reviewed-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
This commit is contained in:
Olivier Matz 2016-05-18 18:02:08 +02:00 committed by Thomas Monjalon
parent 125e39139b
commit e89880957f
7 changed files with 55 additions and 17 deletions

View File

@ -487,10 +487,10 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
rte_compiler_barrier();
if (split_packet) {
rte_prefetch0(&rx_pkts[pos]->cacheline1);
rte_prefetch0(&rx_pkts[pos + 1]->cacheline1);
rte_prefetch0(&rx_pkts[pos + 2]->cacheline1);
rte_prefetch0(&rx_pkts[pos + 3]->cacheline1);
rte_mbuf_prefetch_part2(rx_pkts[pos]);
rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
}
/* D.1 pkt 3,4 convert format from desc to pktmbuf */

View File

@ -297,10 +297,10 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
_mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
if (split_packet) {
rte_prefetch0(&rx_pkts[pos]->cacheline1);
rte_prefetch0(&rx_pkts[pos + 1]->cacheline1);
rte_prefetch0(&rx_pkts[pos + 2]->cacheline1);
rte_prefetch0(&rx_pkts[pos + 3]->cacheline1);
rte_mbuf_prefetch_part2(rx_pkts[pos]);
rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
}
/* avoid compiler reorder optimization */

View File

@ -307,10 +307,10 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
_mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
if (split_packet) {
rte_prefetch0(&rx_pkts[pos]->cacheline1);
rte_prefetch0(&rx_pkts[pos + 1]->cacheline1);
rte_prefetch0(&rx_pkts[pos + 2]->cacheline1);
rte_prefetch0(&rx_pkts[pos + 3]->cacheline1);
rte_mbuf_prefetch_part2(rx_pkts[pos]);
rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
}
/* avoid compiler reorder optimization */

View File

@ -3283,8 +3283,8 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
* Fetch initial bytes of packet descriptor into a
* cacheline while allocating rep.
*/
rte_prefetch0(seg);
rte_prefetch0(&seg->cacheline1);
rte_mbuf_prefetch_part1(seg);
rte_mbuf_prefetch_part2(seg);
ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
&flags);
if (unlikely(ret < 0)) {

View File

@ -1134,8 +1134,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
* Fetch initial bytes of packet descriptor into a
* cacheline while allocating rep.
*/
rte_prefetch0(seg);
rte_prefetch0(&seg->cacheline1);
rte_mbuf_prefetch_part1(seg);
rte_mbuf_prefetch_part2(seg);
ret = rxq->poll(rxq->cq, NULL, NULL, &flags, &vlan_tci);
if (unlikely(ret < 0)) {
struct ibv_wc wc;

View File

@ -298,7 +298,7 @@ prepare_tx_burst(struct rte_mbuf *pkts[], uint16_t nb_pkts, uint8_t port)
const int32_t prefetch_offset = 2;
for (i = 0; i < (nb_pkts - prefetch_offset); i++) {
rte_prefetch0(pkts[i + prefetch_offset]->cacheline1);
rte_mbuf_prefetch_part2(pkts[i + prefetch_offset]);
prepare_tx_pkt(pkts[i], port);
}
/* Process left packets */

View File

@ -842,6 +842,44 @@ struct rte_mbuf {
uint16_t timesync;
} __rte_cache_aligned;
/**
* Prefetch the first part of the mbuf
*
* The first 64 bytes of the mbuf corresponds to fields that are used early
* in the receive path. If the cache line of the architecture is higher than
* 64B, the second part will also be prefetched.
*
* @param m
* The pointer to the mbuf.
*/
static inline void
rte_mbuf_prefetch_part1(struct rte_mbuf *m)
{
rte_prefetch0(&m->cacheline0);
}
/**
* Prefetch the second part of the mbuf
*
* The next 64 bytes of the mbuf corresponds to fields that are used in the
* transmit path. If the cache line of the architecture is higher than 64B,
* this function does nothing as it is expected that the full mbuf is
* already in cache.
*
* @param m
* The pointer to the mbuf.
*/
static inline void
rte_mbuf_prefetch_part2(struct rte_mbuf *m)
{
#if RTE_CACHE_LINE_SIZE == 64
rte_prefetch0(&m->cacheline1);
#else
RTE_SET_USED(m);
#endif
}
static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
/**