diff --git a/doc/guides/prog_guide/mempool_lib.rst b/doc/guides/prog_guide/mempool_lib.rst index 3bb84b0a6f..f8b430d656 100644 --- a/doc/guides/prog_guide/mempool_lib.rst +++ b/doc/guides/prog_guide/mempool_lib.rst @@ -27,10 +27,10 @@ In debug mode (CONFIG_RTE_LIBRTE_MEMPOOL_DEBUG is enabled), statistics about get from/put in the pool are stored in the mempool structure. Statistics are per-lcore to avoid concurrent access to statistics counters. -Memory Alignment Constraints ----------------------------- +Memory Alignment Constraints on x86 architecture +------------------------------------------------ -Depending on hardware memory configuration, performance can be greatly improved by adding a specific padding between objects. +Depending on hardware memory configuration on X86 architecture, performance can be greatly improved by adding a specific padding between objects. The objective is to ensure that the beginning of each object starts on a different channel and rank in memory so that all channels are equally loaded. This is particularly true for packet buffers when doing L3 forwarding or flow classification. diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index 08906df9ee..712c839a08 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -45,6 +45,7 @@ EAL_REGISTER_TAILQ(rte_mempool_tailq) #define CALC_CACHE_FLUSHTHRESH(c) \ ((typeof(c))((c) * CACHE_FLUSHTHRESH_MULTIPLIER)) +#if defined(RTE_ARCH_X86) /* * return the greatest common divisor between a and b (fast algorithm) * @@ -74,12 +75,13 @@ static unsigned get_gcd(unsigned a, unsigned b) } /* - * Depending on memory configuration, objects addresses are spread + * Depending on memory configuration on x86 arch, objects addresses are spread * between channels and ranks in RAM: the pool allocator will add * padding between objects. This function return the new size of the * object. */ -static unsigned optimize_object_size(unsigned obj_size) +static unsigned int +arch_mem_object_align(unsigned int obj_size) { unsigned nrank, nchan; unsigned new_obj_size; @@ -99,6 +101,13 @@ static unsigned optimize_object_size(unsigned obj_size) new_obj_size++; return new_obj_size * RTE_MEMPOOL_ALIGN; } +#else +static unsigned int +arch_mem_object_align(unsigned int obj_size) +{ + return obj_size; +} +#endif struct pagesz_walk_arg { int socket_id; @@ -234,8 +243,8 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags, */ if ((flags & MEMPOOL_F_NO_SPREAD) == 0) { unsigned new_size; - new_size = optimize_object_size(sz->header_size + sz->elt_size + - sz->trailer_size); + new_size = arch_mem_object_align + (sz->header_size + sz->elt_size + sz->trailer_size); sz->trailer_size = new_size - sz->header_size - sz->elt_size; } diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index 0a1dc6059f..a2c92727a6 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -260,7 +260,8 @@ struct rte_mempool { #endif } __rte_cache_aligned; -#define MEMPOOL_F_NO_SPREAD 0x0001 /**< Do not spread among memory channels. */ +#define MEMPOOL_F_NO_SPREAD 0x0001 + /**< Spreading among memory channels not required. */ #define MEMPOOL_F_NO_CACHE_ALIGN 0x0002 /**< Do not align objs on cache lines.*/ #define MEMPOOL_F_SP_PUT 0x0004 /**< Default put is "single-producer".*/ #define MEMPOOL_F_SC_GET 0x0008 /**< Default get is "single-consumer".*/