mbuf: add prefetch helpers
Some architectures (ex: Power8) have a cache line size of 128 bytes, so the drivers should not expect that prefetching the second part of the mbuf with rte_prefetch0(&m->cacheline1) is valid. This commit add helpers that can be used by drivers to prefetch the rx or tx part of the mbuf, whatever the cache line size. Signed-off-by: Olivier Matz <olivier.matz@6wind.com> Reviewed-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
This commit is contained in:
parent
125e39139b
commit
e89880957f
@ -487,10 +487,10 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
|
||||
rte_compiler_barrier();
|
||||
|
||||
if (split_packet) {
|
||||
rte_prefetch0(&rx_pkts[pos]->cacheline1);
|
||||
rte_prefetch0(&rx_pkts[pos + 1]->cacheline1);
|
||||
rte_prefetch0(&rx_pkts[pos + 2]->cacheline1);
|
||||
rte_prefetch0(&rx_pkts[pos + 3]->cacheline1);
|
||||
rte_mbuf_prefetch_part2(rx_pkts[pos]);
|
||||
rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
|
||||
rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
|
||||
rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
|
||||
}
|
||||
|
||||
/* D.1 pkt 3,4 convert format from desc to pktmbuf */
|
||||
|
@ -297,10 +297,10 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
|
||||
_mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
|
||||
|
||||
if (split_packet) {
|
||||
rte_prefetch0(&rx_pkts[pos]->cacheline1);
|
||||
rte_prefetch0(&rx_pkts[pos + 1]->cacheline1);
|
||||
rte_prefetch0(&rx_pkts[pos + 2]->cacheline1);
|
||||
rte_prefetch0(&rx_pkts[pos + 3]->cacheline1);
|
||||
rte_mbuf_prefetch_part2(rx_pkts[pos]);
|
||||
rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
|
||||
rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
|
||||
rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
|
||||
}
|
||||
|
||||
/* avoid compiler reorder optimization */
|
||||
|
@ -307,10 +307,10 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
|
||||
_mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
|
||||
|
||||
if (split_packet) {
|
||||
rte_prefetch0(&rx_pkts[pos]->cacheline1);
|
||||
rte_prefetch0(&rx_pkts[pos + 1]->cacheline1);
|
||||
rte_prefetch0(&rx_pkts[pos + 2]->cacheline1);
|
||||
rte_prefetch0(&rx_pkts[pos + 3]->cacheline1);
|
||||
rte_mbuf_prefetch_part2(rx_pkts[pos]);
|
||||
rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
|
||||
rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
|
||||
rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
|
||||
}
|
||||
|
||||
/* avoid compiler reorder optimization */
|
||||
|
@ -3283,8 +3283,8 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
* Fetch initial bytes of packet descriptor into a
|
||||
* cacheline while allocating rep.
|
||||
*/
|
||||
rte_prefetch0(seg);
|
||||
rte_prefetch0(&seg->cacheline1);
|
||||
rte_mbuf_prefetch_part1(seg);
|
||||
rte_mbuf_prefetch_part2(seg);
|
||||
ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
|
||||
&flags);
|
||||
if (unlikely(ret < 0)) {
|
||||
|
@ -1134,8 +1134,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
* Fetch initial bytes of packet descriptor into a
|
||||
* cacheline while allocating rep.
|
||||
*/
|
||||
rte_prefetch0(seg);
|
||||
rte_prefetch0(&seg->cacheline1);
|
||||
rte_mbuf_prefetch_part1(seg);
|
||||
rte_mbuf_prefetch_part2(seg);
|
||||
ret = rxq->poll(rxq->cq, NULL, NULL, &flags, &vlan_tci);
|
||||
if (unlikely(ret < 0)) {
|
||||
struct ibv_wc wc;
|
||||
|
@ -298,7 +298,7 @@ prepare_tx_burst(struct rte_mbuf *pkts[], uint16_t nb_pkts, uint8_t port)
|
||||
const int32_t prefetch_offset = 2;
|
||||
|
||||
for (i = 0; i < (nb_pkts - prefetch_offset); i++) {
|
||||
rte_prefetch0(pkts[i + prefetch_offset]->cacheline1);
|
||||
rte_mbuf_prefetch_part2(pkts[i + prefetch_offset]);
|
||||
prepare_tx_pkt(pkts[i], port);
|
||||
}
|
||||
/* Process left packets */
|
||||
|
@ -842,6 +842,44 @@ struct rte_mbuf {
|
||||
uint16_t timesync;
|
||||
} __rte_cache_aligned;
|
||||
|
||||
/**
|
||||
* Prefetch the first part of the mbuf
|
||||
*
|
||||
* The first 64 bytes of the mbuf corresponds to fields that are used early
|
||||
* in the receive path. If the cache line of the architecture is higher than
|
||||
* 64B, the second part will also be prefetched.
|
||||
*
|
||||
* @param m
|
||||
* The pointer to the mbuf.
|
||||
*/
|
||||
static inline void
|
||||
rte_mbuf_prefetch_part1(struct rte_mbuf *m)
|
||||
{
|
||||
rte_prefetch0(&m->cacheline0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prefetch the second part of the mbuf
|
||||
*
|
||||
* The next 64 bytes of the mbuf corresponds to fields that are used in the
|
||||
* transmit path. If the cache line of the architecture is higher than 64B,
|
||||
* this function does nothing as it is expected that the full mbuf is
|
||||
* already in cache.
|
||||
*
|
||||
* @param m
|
||||
* The pointer to the mbuf.
|
||||
*/
|
||||
static inline void
|
||||
rte_mbuf_prefetch_part2(struct rte_mbuf *m)
|
||||
{
|
||||
#if RTE_CACHE_LINE_SIZE == 64
|
||||
rte_prefetch0(&m->cacheline1);
|
||||
#else
|
||||
RTE_SET_USED(m);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user