From b77b5639726eeb5f60269bc82d21f6b87c3e6ebe Mon Sep 17 00:00:00 2001 From: Chao Zhu Date: Tue, 25 Nov 2014 17:17:14 -0500 Subject: [PATCH] mem: add huge page sizes for IBM Power IBM Power architecture has different huge page sizes (16MB, 16GB) than x86.This patch defines RTE_PGSIZE_16M and RTE_PGSIZE_16G in the rte_page_sizes enum variable and adds huge page size support of DPDK for IBM Power architecture. Signed-off-by: Chao Zhu Acked-by: David Marchand --- app/test/test_memzone.c | 123 +++++++++++++++++++- lib/librte_eal/common/eal_common_memzone.c | 19 ++- lib/librte_eal/common/include/rte_memory.h | 9 +- lib/librte_eal/common/include/rte_memzone.h | 8 ++ lib/librte_eal/linuxapp/eal/eal.c | 9 +- 5 files changed, 156 insertions(+), 12 deletions(-) diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c index 381f643bb4..387dbbcc2e 100644 --- a/app/test/test_memzone.c +++ b/app/test/test_memzone.c @@ -133,6 +133,8 @@ test_memzone_reserve_flags(void) const struct rte_memseg *ms; int hugepage_2MB_avail = 0; int hugepage_1GB_avail = 0; + int hugepage_16MB_avail = 0; + int hugepage_16GB_avail = 0; const size_t size = 100; int i = 0; ms = rte_eal_get_physmem_layout(); @@ -141,12 +143,20 @@ test_memzone_reserve_flags(void) hugepage_2MB_avail = 1; if (ms[i].hugepage_sz == RTE_PGSIZE_1G) hugepage_1GB_avail = 1; + if (ms[i].hugepage_sz == RTE_PGSIZE_16M) + hugepage_16MB_avail = 1; + if (ms[i].hugepage_sz == RTE_PGSIZE_16G) + hugepage_16GB_avail = 1; } - /* Display the availability of 2MB and 1GB pages */ + /* Display the availability of 2MB ,1GB, 16MB, 16GB pages */ if (hugepage_2MB_avail) printf("2MB Huge pages available\n"); if (hugepage_1GB_avail) printf("1GB Huge pages available\n"); + if (hugepage_16MB_avail) + printf("16MB Huge pages available\n"); + if (hugepage_16GB_avail) + printf("16GB Huge pages available\n"); /* * If 2MB pages available, check that a small memzone is correctly * reserved from 2MB huge pages when requested by the RTE_MEMZONE_2MB flag. @@ -255,6 +265,117 @@ test_memzone_reserve_flags(void) } } } + /* + * This option is for IBM Power. If 16MB pages available, check + * that a small memzone is correctly reserved from 16MB huge pages + * when requested by the RTE_MEMZONE_16MB flag. Also check that + * RTE_MEMZONE_SIZE_HINT_ONLY flag only defaults to an available + * page size (i.e 16GB ) when 16MB pages are unavailable. + */ + if (hugepage_16MB_avail) { + mz = rte_memzone_reserve("flag_zone_16M", size, SOCKET_ID_ANY, + RTE_MEMZONE_16MB); + if (mz == NULL) { + printf("MEMZONE FLAG 16MB\n"); + return -1; + } + if (mz->hugepage_sz != RTE_PGSIZE_16M) { + printf("hugepage_sz not equal 16M\n"); + return -1; + } + + mz = rte_memzone_reserve("flag_zone_16M_HINT", size, + SOCKET_ID_ANY, RTE_MEMZONE_16MB|RTE_MEMZONE_SIZE_HINT_ONLY); + if (mz == NULL) { + printf("MEMZONE FLAG 2MB\n"); + return -1; + } + if (mz->hugepage_sz != RTE_PGSIZE_16M) { + printf("hugepage_sz not equal 16M\n"); + return -1; + } + + /* Check if 1GB huge pages are unavailable, that function fails + * unless HINT flag is indicated + */ + if (!hugepage_16GB_avail) { + mz = rte_memzone_reserve("flag_zone_16G_HINT", size, + SOCKET_ID_ANY, + RTE_MEMZONE_16GB|RTE_MEMZONE_SIZE_HINT_ONLY); + if (mz == NULL) { + printf("MEMZONE FLAG 16GB & HINT\n"); + return -1; + } + if (mz->hugepage_sz != RTE_PGSIZE_16M) { + printf("hugepage_sz not equal 16M\n"); + return -1; + } + + mz = rte_memzone_reserve("flag_zone_16G", size, + SOCKET_ID_ANY, RTE_MEMZONE_16GB); + if (mz != NULL) { + printf("MEMZONE FLAG 16GB\n"); + return -1; + } + } + } + /*As with 16MB tests above for 16GB huge page requests*/ + if (hugepage_16GB_avail) { + mz = rte_memzone_reserve("flag_zone_16G", size, SOCKET_ID_ANY, + RTE_MEMZONE_16GB); + if (mz == NULL) { + printf("MEMZONE FLAG 16GB\n"); + return -1; + } + if (mz->hugepage_sz != RTE_PGSIZE_16G) { + printf("hugepage_sz not equal 16G\n"); + return -1; + } + + mz = rte_memzone_reserve("flag_zone_16G_HINT", size, + SOCKET_ID_ANY, RTE_MEMZONE_16GB|RTE_MEMZONE_SIZE_HINT_ONLY); + if (mz == NULL) { + printf("MEMZONE FLAG 16GB\n"); + return -1; + } + if (mz->hugepage_sz != RTE_PGSIZE_16G) { + printf("hugepage_sz not equal 16G\n"); + return -1; + } + + /* Check if 1GB huge pages are unavailable, that function fails + * unless HINT flag is indicated + */ + if (!hugepage_16MB_avail) { + mz = rte_memzone_reserve("flag_zone_16M_HINT", size, + SOCKET_ID_ANY, + RTE_MEMZONE_16MB|RTE_MEMZONE_SIZE_HINT_ONLY); + if (mz == NULL) { + printf("MEMZONE FLAG 16MB & HINT\n"); + return -1; + } + if (mz->hugepage_sz != RTE_PGSIZE_16G) { + printf("hugepage_sz not equal 16G\n"); + return -1; + } + mz = rte_memzone_reserve("flag_zone_16M", size, + SOCKET_ID_ANY, RTE_MEMZONE_16MB); + if (mz != NULL) { + printf("MEMZONE FLAG 16MB\n"); + return -1; + } + } + + if (hugepage_16MB_avail && hugepage_16GB_avail) { + mz = rte_memzone_reserve("flag_zone_16M_HINT", size, + SOCKET_ID_ANY, + RTE_MEMZONE_16MB|RTE_MEMZONE_16GB); + if (mz != NULL) { + printf("BOTH SIZES SET\n"); + return -1; + } + } + } return 0; } diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index 5acd9ce491..f1fc4a78dc 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -216,10 +216,16 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, /* check flags for hugepage sizes */ if ((flags & RTE_MEMZONE_2MB) && - free_memseg[i].hugepage_sz == RTE_PGSIZE_1G ) + free_memseg[i].hugepage_sz == RTE_PGSIZE_1G) continue; if ((flags & RTE_MEMZONE_1GB) && - free_memseg[i].hugepage_sz == RTE_PGSIZE_2M ) + free_memseg[i].hugepage_sz == RTE_PGSIZE_2M) + continue; + if ((flags & RTE_MEMZONE_16MB) && + free_memseg[i].hugepage_sz == RTE_PGSIZE_16G) + continue; + if ((flags & RTE_MEMZONE_16GB) && + free_memseg[i].hugepage_sz == RTE_PGSIZE_16M) continue; /* this segment is the best until now */ @@ -256,7 +262,8 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, * try allocating again without the size parameter otherwise -fail. */ if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) && - ((flags & RTE_MEMZONE_1GB) || (flags & RTE_MEMZONE_2MB))) + ((flags & RTE_MEMZONE_1GB) || (flags & RTE_MEMZONE_2MB) + || (flags & RTE_MEMZONE_16MB) || (flags & RTE_MEMZONE_16GB))) return memzone_reserve_aligned_thread_unsafe(name, len, socket_id, 0, align, bound); @@ -313,7 +320,8 @@ rte_memzone_reserve_aligned(const char *name, size_t len, const struct rte_memzone *mz = NULL; /* both sizes cannot be explicitly called for */ - if ((flags & RTE_MEMZONE_1GB) && (flags & RTE_MEMZONE_2MB)) { + if (((flags & RTE_MEMZONE_1GB) && (flags & RTE_MEMZONE_2MB)) + || ((flags & RTE_MEMZONE_16MB) && (flags & RTE_MEMZONE_16GB))) { rte_errno = EINVAL; return NULL; } @@ -344,7 +352,8 @@ rte_memzone_reserve_bounded(const char *name, size_t len, const struct rte_memzone *mz = NULL; /* both sizes cannot be explicitly called for */ - if ((flags & RTE_MEMZONE_1GB) && (flags & RTE_MEMZONE_2MB)) { + if (((flags & RTE_MEMZONE_1GB) && (flags & RTE_MEMZONE_2MB)) + || ((flags & RTE_MEMZONE_16MB) && (flags & RTE_MEMZONE_16GB))) { rte_errno = EINVAL; return NULL; } diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index 4cf8ea9c05..2ed2637e42 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -53,9 +53,12 @@ extern "C" { #endif enum rte_page_sizes { - RTE_PGSIZE_4K = 1 << 12, - RTE_PGSIZE_2M = RTE_PGSIZE_4K << 9, - RTE_PGSIZE_1G = RTE_PGSIZE_2M <<9 + RTE_PGSIZE_4K = 1ULL << 12, + RTE_PGSIZE_2M = 1ULL << 21, + RTE_PGSIZE_1G = 1ULL << 30, + RTE_PGSIZE_64K = 1ULL << 16, + RTE_PGSIZE_16M = 1ULL << 24, + RTE_PGSIZE_16G = 1ULL << 34 }; #define SOCKET_ID_ANY -1 /**< Any NUMA socket. */ diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h index 50144094c4..7d47bffecc 100644 --- a/lib/librte_eal/common/include/rte_memzone.h +++ b/lib/librte_eal/common/include/rte_memzone.h @@ -60,6 +60,8 @@ extern "C" { #define RTE_MEMZONE_2MB 0x00000001 /**< Use 2MB pages. */ #define RTE_MEMZONE_1GB 0x00000002 /**< Use 1GB pages. */ +#define RTE_MEMZONE_16MB 0x00000100 /**< Use 16MB pages. */ +#define RTE_MEMZONE_16GB 0x00000200 /**< Use 16GB pages. */ #define RTE_MEMZONE_SIZE_HINT_ONLY 0x00000004 /**< Use available page size */ /** @@ -111,6 +113,8 @@ struct rte_memzone { * taken from 1GB or 2MB hugepages. * - RTE_MEMZONE_2MB - Reserve from 2MB pages * - RTE_MEMZONE_1GB - Reserve from 1GB pages + * - RTE_MEMZONE_16MB - Reserve from 16MB pages + * - RTE_MEMZONE_16GB - Reserve from 16GB pages * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if * the requested page size is unavailable. * If this flag is not set, the function @@ -156,6 +160,8 @@ const struct rte_memzone *rte_memzone_reserve(const char *name, * taken from 1GB or 2MB hugepages. * - RTE_MEMZONE_2MB - Reserve from 2MB pages * - RTE_MEMZONE_1GB - Reserve from 1GB pages + * - RTE_MEMZONE_16MB - Reserve from 16MB pages + * - RTE_MEMZONE_16GB - Reserve from 16GB pages * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if * the requested page size is unavailable. * If this flag is not set, the function @@ -206,6 +212,8 @@ const struct rte_memzone *rte_memzone_reserve_aligned(const char *name, * taken from 1GB or 2MB hugepages. * - RTE_MEMZONE_2MB - Reserve from 2MB pages * - RTE_MEMZONE_1GB - Reserve from 1GB pages + * - RTE_MEMZONE_16MB - Reserve from 16MB pages + * - RTE_MEMZONE_16GB - Reserve from 16GB pages * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if * the requested page size is unavailable. * If this flag is not set, the function diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 68cae884a8..1d1fbdd3a8 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -453,9 +453,12 @@ eal_parse_base_virtaddr(const char *arg) return -1; #endif - /* align the addr on 2M boundary */ - internal_config.base_virtaddr = RTE_PTR_ALIGN_CEIL((uintptr_t)addr, - RTE_PGSIZE_2M); + /* align the addr on 16M boundary, 16MB is the minimum huge page + * size on IBM Power architecture. If the addr is aligned to 16MB, + * it can align to 2MB for x86. So this alignment can also be used + * on x86 */ + internal_config.base_virtaddr = + RTE_PTR_ALIGN_CEIL((uintptr_t)addr, RTE_PGSIZE_16M); return 0; }