From 025a453103290ba2933a82c1b715b433331ace24 Mon Sep 17 00:00:00 2001 From: quackerd Date: Fri, 5 Feb 2021 06:22:43 +0000 Subject: [PATCH] FreeBSD contigmem and eal NUMA support --- config/rte_config.h | 3 +- kernel/freebsd/contigmem/contigmem.c | 170 ++++++++---- lib/librte_eal/freebsd/eal_hugepage_info.c | 38 ++- lib/librte_eal/freebsd/eal_lcore.c | 23 +- lib/librte_eal/freebsd/eal_memory.c | 305 +++++++++++---------- lib/librte_eal/freebsd/include/rte_os.h | 16 ++ 6 files changed, 349 insertions(+), 206 deletions(-) diff --git a/config/rte_config.h b/config/rte_config.h index a0b5160ff2..27ffb4ac38 100644 --- a/config/rte_config.h +++ b/config/rte_config.h @@ -27,7 +27,7 @@ #define RTE_VER_PREFIX "DPDK" /****** library defines ********/ - +#define RTE_LIBRTE_IEEE1588 1 /* EAL defines */ #define RTE_MAX_HEAPS 32 #define RTE_MAX_MEMSEG_LISTS 128 @@ -44,6 +44,7 @@ /* bsd module defines */ #define RTE_CONTIGMEM_MAX_NUM_BUFS 64 #define RTE_CONTIGMEM_DEFAULT_NUM_BUFS 1 +#define RTE_CONTIGMEM_DEFAULT_NODE_AFFINITY 0x1 #define RTE_CONTIGMEM_DEFAULT_BUF_SIZE (512*1024*1024) /* mempool defines */ diff --git a/kernel/freebsd/contigmem/contigmem.c b/kernel/freebsd/contigmem/contigmem.c index bd72f4d620..ff11a49ab6 100644 --- a/kernel/freebsd/contigmem/contigmem.c +++ b/kernel/freebsd/contigmem/contigmem.c @@ -20,6 +20,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include @@ -31,6 +32,15 @@ __FBSDID("$FreeBSD$"); #include #include +#define CONTIGMEM_DEBUG + +#ifdef CONTIGMEM_DEBUG +#define DBGPRINT(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define DBGPRINT(fmt, ...) +#endif + +#define NEXT_CPU_NULL -1; struct contigmem_buffer { void *addr; int refcnt; @@ -39,6 +49,7 @@ struct contigmem_buffer { struct contigmem_vm_handle { int buffer_index; + int node_index; }; static int contigmem_load(void); @@ -49,19 +60,46 @@ static d_mmap_single_t contigmem_mmap_single; static d_open_t contigmem_open; static d_close_t contigmem_close; +static uint64_t contigmem_node_affinity = RTE_CONTIGMEM_DEFAULT_NODE_AFFINITY; static int contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS; static int64_t contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE; +#define CONTIGMEM_MAX_NODECNT RTE_MAX_NUMA_NODES + static eventhandler_tag contigmem_eh_tag; -static struct contigmem_buffer contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS]; +static struct contigmem_buffer contigmem_buffers[CONTIGMEM_MAX_NODECNT][RTE_CONTIGMEM_MAX_NUM_BUFS]; static struct cdev *contigmem_cdev = NULL; static int contigmem_refcnt; +static int contigmem_nodecnt; +/* + * offset: 0-11 bits page alignment + * 12-15: node idx + * 16-31: +*/ +static inline int offset_to_node(off_t offset) +{ + return (offset >> 12) & 0xF; +} + +static inline int offset_to_buffer(off_t offset) +{ + return (offset >> 16) & 0xFFFF; +} + +static inline off_t offset_make(int node, int buffer) +{ + return (((off_t)node & 0xF) << 12) | (((off_t)buffer & 0xFFFF) << 16); +} + +TUNABLE_QUAD("hw.contigmem.node_affinity", &contigmem_node_affinity); TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers); TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size); static SYSCTL_NODE(_hw, OID_AUTO, contigmem, CTLFLAG_RD, 0, "contigmem"); +SYSCTL_QUAD(_hw_contigmem, OID_AUTO, node_affinity, CTLFLAG_RD, + &contigmem_node_affinity, 0, "The node/NUMA affinity of buffers."); SYSCTL_INT(_hw_contigmem, OID_AUTO, num_buffers, CTLFLAG_RD, &contigmem_num_buffers, 0, "Number of contigmem buffers allocated"); SYSCTL_QUAD(_hw_contigmem, OID_AUTO, buffer_size, CTLFLAG_RD, @@ -110,12 +148,34 @@ static struct cdevsw contigmem_ops = { .d_close = contigmem_close, }; +static void +contigmem_free() +{ + for (int j = 0; j < contigmem_nodecnt; j++) { + for (int i = 0; i < contigmem_num_buffers; i++) { + if (contigmem_buffers[j][i].addr != NULL) { + DBGPRINT("contigmem_free: freeing contigmem node buffer %d, node %d, virt 0x%p, size 0x%lx\n", + i, j, contigmem_buffers[j][i].addr, contigmem_buffer_size); + contigfree(contigmem_buffers[j][i].addr, + contigmem_buffer_size, M_CONTIGMEM); + contigmem_buffers[j][i].addr = NULL; + } + if (mtx_initialized(&contigmem_buffers[j][i].mtx)) + mtx_destroy(&contigmem_buffers[j][i].mtx); + } + } +} + static int contigmem_load() { char index_string[8], description[32]; - int i, error = 0; + int i, j, error = 0; void *addr; + struct domainset ds; + struct domainset *rds; + ds.ds_policy = DOMAINSET_POLICY_ROUNDROBIN; + if (contigmem_num_buffers > RTE_CONTIGMEM_MAX_NUM_BUFS) { printf("%d buffers requested is greater than %d allowed\n", @@ -132,30 +192,51 @@ contigmem_load() goto error; } - for (i = 0; i < contigmem_num_buffers; i++) { - addr = contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO, - 0, BUS_SPACE_MAXADDR, contigmem_buffer_size, 0); - if (addr == NULL) { - printf("contigmalloc failed for buffer %d\n", i); - error = ENOMEM; - goto error; + // init cputop + contigmem_nodecnt = vm_ndomains; + + if (contigmem_nodecnt > CONTIGMEM_MAX_NODECNT) { + printf("contigmem_load: too many NUMA nodes detected: %d\n", contigmem_nodecnt); + error = EINVAL; + goto error; + } + + DBGPRINT("contigmem_load: detected %d vmdomain\n", contigmem_nodecnt); + + for (j = 0; j < contigmem_nodecnt; j++) { + if ((contigmem_node_affinity & (1 << j)) == 0) { + DBGPRINT("contigmem_load: skipping node %d...\n", j); + continue; } + DOMAINSET_ZERO(&ds.ds_mask); + DOMAINSET_SET(j, &ds.ds_mask); + rds = domainset_create(&ds); + for (i = 0; i < contigmem_num_buffers; i++) { + DBGPRINT("contigmem_load: allocating 0x%lx bytes memory for buffer %d node %d\n", contigmem_buffer_size, i, j); + addr = contigmalloc_domainset(contigmem_buffer_size, M_CONTIGMEM, rds, + M_ZERO, 0, BUS_SPACE_MAXADDR, contigmem_buffer_size, 0); + if (addr == NULL) { + printf("contigmalloc failed for buffer %d node %d\n", i, j); + error = ENOMEM; + goto error; + } - printf("%2u: virt=%p phys=%p\n", i, addr, - (void *)pmap_kextract((vm_offset_t)addr)); + printf("%2u: virt=%p phys=%p\n", i, addr, + (void *)pmap_kextract((vm_offset_t)addr)); - mtx_init(&contigmem_buffers[i].mtx, "contigmem", NULL, MTX_DEF); - contigmem_buffers[i].addr = addr; - contigmem_buffers[i].refcnt = 0; + mtx_init(&contigmem_buffers[j][i].mtx, "contigmem", NULL, MTX_DEF); + contigmem_buffers[j][i].addr = addr; + contigmem_buffers[j][i].refcnt = 0; - snprintf(index_string, sizeof(index_string), "%d", i); - snprintf(description, sizeof(description), - "phys addr for buffer %d", i); - SYSCTL_ADD_PROC(NULL, - &SYSCTL_NODE_CHILDREN(_hw_contigmem, physaddr), OID_AUTO, - index_string, CTLTYPE_U64 | CTLFLAG_RD, - (void *)(uintptr_t)i, 0, contigmem_physaddr, "LU", - description); + snprintf(index_string, sizeof(index_string), "%ld", offset_make(j, i)); + snprintf(description, sizeof(description), + "phys addr for node %d buffer %d", j, i); + SYSCTL_ADD_PROC(NULL, + &SYSCTL_NODE_CHILDREN(_hw_contigmem, physaddr), OID_AUTO, + index_string, CTLTYPE_U64 | CTLFLAG_RD, + (void*)offset_make(j, i), 0, contigmem_physaddr, "LU", + description); + } } contigmem_cdev = make_dev_credf(0, &contigmem_ops, 0, NULL, UID_ROOT, @@ -164,24 +245,13 @@ contigmem_load() return 0; error: - for (i = 0; i < contigmem_num_buffers; i++) { - if (contigmem_buffers[i].addr != NULL) { - contigfree(contigmem_buffers[i].addr, - contigmem_buffer_size, M_CONTIGMEM); - contigmem_buffers[i].addr = NULL; - } - if (mtx_initialized(&contigmem_buffers[i].mtx)) - mtx_destroy(&contigmem_buffers[i].mtx); - } - + contigmem_free(); return error; } static int contigmem_unload() { - int i; - if (contigmem_refcnt > 0) return EBUSY; @@ -191,14 +261,7 @@ contigmem_unload() if (contigmem_eh_tag != NULL) EVENTHANDLER_DEREGISTER(process_exit, contigmem_eh_tag); - for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++) { - if (contigmem_buffers[i].addr != NULL) - contigfree(contigmem_buffers[i].addr, - contigmem_buffer_size, M_CONTIGMEM); - if (mtx_initialized(&contigmem_buffers[i].mtx)) - mtx_destroy(&contigmem_buffers[i].mtx); - } - + contigmem_free(); return 0; } @@ -206,9 +269,12 @@ static int contigmem_physaddr(SYSCTL_HANDLER_ARGS) { uint64_t physaddr; - int index = (int)(uintptr_t)arg1; + off_t offset = (off_t)arg1; + int index = offset_to_buffer(offset); + int node = offset_to_node(offset); - physaddr = (uint64_t)vtophys(contigmem_buffers[index].addr); + physaddr = (uint64_t)vtophys(contigmem_buffers[node][index].addr); + DBGPRINT("contigmem_physaddr sysctl: buffer %d node %d paddr 0x%lx\n", index, node, physaddr); return sysctl_handle_64(oidp, &physaddr, 0, req); } @@ -239,7 +305,8 @@ contigmem_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, struct contigmem_vm_handle *vmh = handle; struct contigmem_buffer *buf; - buf = &contigmem_buffers[vmh->buffer_index]; + DBGPRINT("contigmem_cdev_pager_ctor: buffer %d node %d\n", vmh->buffer_index, vmh->node_index); + buf = &contigmem_buffers[vmh->node_index][vmh->buffer_index]; atomic_add_int(&contigmem_refcnt, 1); @@ -258,7 +325,8 @@ contigmem_cdev_pager_dtor(void *handle) struct contigmem_vm_handle *vmh = handle; struct contigmem_buffer *buf; - buf = &contigmem_buffers[vmh->buffer_index]; + DBGPRINT("contigmem_cdev_pager_dtor: buffer %d node %d\n", vmh->buffer_index, vmh->node_index); + buf = &contigmem_buffers[vmh->node_index][vmh->buffer_index]; mtx_lock(&buf->mtx); buf->refcnt--; @@ -335,14 +403,17 @@ contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, { struct contigmem_vm_handle *vmh; uint64_t buffer_index; + uint64_t node_index; /* * The buffer index is encoded in the offset. Divide the offset by * PAGE_SIZE to get the index of the buffer requested by the user * app. */ - buffer_index = *offset / PAGE_SIZE; - if (buffer_index >= contigmem_num_buffers) + buffer_index = offset_to_buffer(*offset); + node_index = offset_to_node(*offset); + DBGPRINT("contigmem_mmap_single: buffer %lu node %lu size 0x%lx\n", buffer_index, node_index, size); + if (buffer_index >= contigmem_num_buffers || node_index >= contigmem_nodecnt) return EINVAL; if (size > contigmem_buffer_size) @@ -352,8 +423,9 @@ contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, if (vmh == NULL) return ENOMEM; vmh->buffer_index = buffer_index; + vmh->node_index = node_index; - *offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index].addr); + *offset = (vm_ooffset_t)vtophys(contigmem_buffers[node_index][buffer_index].addr); *obj = cdev_pager_allocate(vmh, OBJT_DEVICE, &contigmem_cdev_pager_ops, size, nprot, *offset, curthread->td_ucred); diff --git a/lib/librte_eal/freebsd/eal_hugepage_info.c b/lib/librte_eal/freebsd/eal_hugepage_info.c index 408f054f7a..2e3e9e40f7 100644 --- a/lib/librte_eal/freebsd/eal_hugepage_info.c +++ b/lib/librte_eal/freebsd/eal_hugepage_info.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -13,6 +14,7 @@ #include "eal_hugepages.h" #include "eal_internal_cfg.h" #include "eal_filesystem.h" +#include "rte_build_config.h" #define CONTIGMEM_DEV "/dev/contigmem" @@ -48,6 +50,21 @@ create_shared_memory(const char *filename, const size_t mem_size) return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT); } +#define NEXT_CPU_NULL -1; +static inline int +cmask_get_next_cpu(uint64_t *mask) +{ + int ffs = ffsll(*mask); + *mask &= ~(1ul << (ffs - 1)); + return ffs - 1; +} + +static inline int +cmask_get_num_cpus(const uint64_t mask) +{ + return _mm_popcnt_u64(mask); +} + /* * No hugepage support on freebsd, but we dummy it, using contigmem driver */ @@ -57,6 +74,8 @@ eal_hugepage_info_init(void) size_t sysctl_size; int num_buffers, fd, error; int64_t buffer_size; + uint64_t node_affinity = 0; + int num_nodes; struct internal_config *internal_conf = eal_get_internal_configuration(); @@ -76,6 +95,14 @@ eal_hugepage_info_init(void) return -1; } + sysctl_size = sizeof(node_affinity); + error = sysctlbyname("hw.contigmem.node_affinity", &node_affinity, &sysctl_size, NULL, 0); + num_nodes = cmask_get_num_cpus(node_affinity); + if (error != 0 || num_nodes == 0 || num_nodes > RTE_MAX_NUMA_NODES) { + RTE_LOG(ERR, EAL, "could not read or invalid sysctl hw.contigmem.node_affinity 0x%lx\n", node_affinity); + return -1; + } + sysctl_size = sizeof(buffer_size); error = sysctlbyname("hw.contigmem.buffer_size", &buffer_size, &sysctl_size, NULL, 0); @@ -101,9 +128,18 @@ eal_hugepage_info_init(void) RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dKB\n", num_buffers, (int)(buffer_size>>10)); + RTE_LOG(INFO, EAL, "Contigmem driver's node affinity is 0x%lx, %d domains in total\n", + node_affinity, num_nodes); + strlcpy(hpi->hugedir, CONTIGMEM_DEV, sizeof(hpi->hugedir)); hpi->hugepage_sz = buffer_size; - hpi->num_pages[0] = num_buffers; + for (int i = 0; i < num_nodes; i++) { + if ((node_affinity & (1 << i)) != 0) { + hpi->num_pages[i] = num_buffers; + } else { + hpi->num_pages[i] = 0; + } + } hpi->lock_descriptor = fd; /* for no shared files mode, do not create shared memory config */ diff --git a/lib/librte_eal/freebsd/eal_lcore.c b/lib/librte_eal/freebsd/eal_lcore.c index d9ef4bc9c5..9ceafaedf2 100644 --- a/lib/librte_eal/freebsd/eal_lcore.c +++ b/lib/librte_eal/freebsd/eal_lcore.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2010-2014 Intel Corporation */ - +#include #include #include @@ -16,9 +16,10 @@ /* No topology information available on FreeBSD including NUMA info */ unsigned -eal_cpu_core_id(__rte_unused unsigned lcore_id) +eal_cpu_core_id(unsigned lcore_id) { - return 0; + /* DPDK uses 1-1 mapping */ + return lcore_id; } static int @@ -36,9 +37,21 @@ eal_get_ncpus(void) } unsigned -eal_cpu_socket_id(__rte_unused unsigned cpu_id) +eal_cpu_socket_id(unsigned cpu_id) { - return 0; + int error; + int domain; + size_t domain_sz = sizeof(domain); + + char sysctl_str[32]; + snprintf(sysctl_str, sizeof(sysctl_str), "dev.cpu.%d.%%domain", cpu_id); + + error = sysctlbyname(sysctl_str, &domain, &domain_sz, NULL, 0); + if (error < 0) { + RTE_LOG(WARNING, EAL, "Failed to get socket id for core %u, returning 0...\n", cpu_id); + domain = 0; + } + return domain; } /* Check if a cpu is present by the presence of the diff --git a/lib/librte_eal/freebsd/eal_memory.c b/lib/librte_eal/freebsd/eal_memory.c index 78ac142b82..8a211cef81 100644 --- a/lib/librte_eal/freebsd/eal_memory.c +++ b/lib/librte_eal/freebsd/eal_memory.c @@ -107,114 +107,117 @@ rte_eal_hugepage_init(void) hpi = &internal_conf->hugepage_info[i]; page_sz = hpi->hugepage_sz; - max_pages = hpi->num_pages[0]; - mem_needed = RTE_ALIGN_CEIL(internal_conf->memory - total_mem, - page_sz); + for (int k = 0; k < RTE_MAX_NUMA_NODES; k++) { + max_pages = hpi->num_pages[k]; - n_pages = RTE_MIN(mem_needed / page_sz, max_pages); + mem_needed = RTE_ALIGN_CEIL(internal_conf->memory - total_mem, + page_sz); - for (j = 0; j < n_pages; j++) { - struct rte_memseg_list *msl; - struct rte_fbarray *arr; - struct rte_memseg *seg; - int msl_idx, ms_idx; - rte_iova_t physaddr; - int error; - size_t sysctl_size = sizeof(physaddr); - char physaddr_str[64]; - bool is_adjacent; + n_pages = RTE_MIN(mem_needed / page_sz, max_pages); - /* first, check if this segment is IOVA-adjacent to - * the previous one. - */ - snprintf(physaddr_str, sizeof(physaddr_str), - "hw.contigmem.physaddr.%d", j); - error = sysctlbyname(physaddr_str, &physaddr, - &sysctl_size, NULL, 0); - if (error < 0) { - RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u " - "from %s\n", j, hpi->hugedir); - return -1; - } + for (j = 0; j < n_pages; j++) { + struct rte_memseg_list *msl; + struct rte_fbarray *arr; + struct rte_memseg *seg; + int msl_idx, ms_idx; + rte_iova_t physaddr; + int error; + size_t sysctl_size = sizeof(physaddr); + char physaddr_str[64]; + bool is_adjacent; - is_adjacent = prev_end != 0 && physaddr == prev_end; - prev_end = physaddr + hpi->hugepage_sz; + /* first, check if this segment is IOVA-adjacent to + * the previous one. + */ + snprintf(physaddr_str, sizeof(physaddr_str), + "hw.contigmem.physaddr.%ld", offset_make(k, j)); + error = sysctlbyname(physaddr_str, &physaddr, + &sysctl_size, NULL, 0); + if (error < 0) { + RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u " + "from %s\n", j, hpi->hugedir); + return -1; + } - for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; - msl_idx++) { - bool empty, need_hole; - msl = &mcfg->memsegs[msl_idx]; + is_adjacent = prev_end != 0 && physaddr == prev_end; + prev_end = physaddr + hpi->hugepage_sz; + + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; + msl_idx++) { + bool empty, need_hole; + msl = &mcfg->memsegs[msl_idx]; + arr = &msl->memseg_arr; + + if (msl->page_sz != page_sz || msl->socket_id != k) + continue; + + empty = arr->count == 0; + + /* we need a hole if this isn't an empty memseg + * list, and if previous segment was not + * adjacent to current one. + */ + need_hole = !empty && !is_adjacent; + + /* we need 1, plus hole if not adjacent */ + ms_idx = rte_fbarray_find_next_n_free(arr, + 0, 1 + (need_hole ? 1 : 0)); + + /* memseg list is full? */ + if (ms_idx < 0) + continue; + + if (need_hole && prev_ms_idx == ms_idx - 1) + ms_idx++; + prev_ms_idx = ms_idx; + + break; + } + if (msl_idx == RTE_MAX_MEMSEG_LISTS) { + RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n", + RTE_STR(RTE_MAX_MEMSEG_PER_TYPE), + RTE_STR(RTE_MAX_MEM_MB_PER_TYPE)); + return -1; + } arr = &msl->memseg_arr; + seg = rte_fbarray_get(arr, ms_idx); - if (msl->page_sz != page_sz) - continue; + addr = RTE_PTR_ADD(msl->base_va, + (size_t)msl->page_sz * ms_idx); - empty = arr->count == 0; + /* address is already mapped in memseg list, so using + * MAP_FIXED here is safe. + */ + addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE, + MAP_SHARED | MAP_FIXED, + hpi->lock_descriptor, + offset_make(k, j)); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", + j, hpi->hugedir); + return -1; + } - /* we need a hole if this isn't an empty memseg - * list, and if previous segment was not - * adjacent to current one. - */ - need_hole = !empty && !is_adjacent; + seg->addr = addr; + seg->iova = physaddr; + seg->hugepage_sz = page_sz; + seg->len = page_sz; + seg->nchannel = mcfg->nchannel; + seg->nrank = mcfg->nrank; + seg->socket_id = k; - /* we need 1, plus hole if not adjacent */ - ms_idx = rte_fbarray_find_next_n_free(arr, - 0, 1 + (need_hole ? 1 : 0)); + rte_fbarray_set_used(arr, ms_idx); - /* memseg list is full? */ - if (ms_idx < 0) - continue; + RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%" + PRIx64", len %zu, socket %d\n", + seg_idx++, addr, physaddr, page_sz, k); - if (need_hole && prev_ms_idx == ms_idx - 1) - ms_idx++; - prev_ms_idx = ms_idx; - - break; + total_mem += seg->len; } - if (msl_idx == RTE_MAX_MEMSEG_LISTS) { - RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n", - RTE_STR(RTE_MAX_MEMSEG_PER_TYPE), - RTE_STR(RTE_MAX_MEM_MB_PER_TYPE)); - return -1; - } - arr = &msl->memseg_arr; - seg = rte_fbarray_get(arr, ms_idx); - - addr = RTE_PTR_ADD(msl->base_va, - (size_t)msl->page_sz * ms_idx); - - /* address is already mapped in memseg list, so using - * MAP_FIXED here is safe. - */ - addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE, - MAP_SHARED | MAP_FIXED, - hpi->lock_descriptor, - j * EAL_PAGE_SIZE); - if (addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", - j, hpi->hugedir); - return -1; - } - - seg->addr = addr; - seg->iova = physaddr; - seg->hugepage_sz = page_sz; - seg->len = page_sz; - seg->nchannel = mcfg->nchannel; - seg->nrank = mcfg->nrank; - seg->socket_id = 0; - - rte_fbarray_set_used(arr, ms_idx); - - RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%" - PRIx64", len %zu\n", - seg_idx++, addr, physaddr, page_sz); - - total_mem += seg->len; } if (total_mem >= internal_conf->memory) break; - } + } if (total_mem < internal_conf->memory) { RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, " "requested: %" PRIu64 "M " @@ -334,7 +337,7 @@ static int memseg_primary_init(void) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - int hpi_idx, msl_idx = 0; + int hpi_idx, msl_idx = 0, cur_socket; struct rte_memseg_list *msl; uint64_t max_mem, total_mem; struct internal_config *internal_conf = @@ -360,78 +363,80 @@ memseg_primary_init(void) /* create memseg lists */ for (hpi_idx = 0; hpi_idx < (int) internal_conf->num_hugepage_sizes; hpi_idx++) { - uint64_t max_type_mem, total_type_mem = 0; - uint64_t avail_mem; - int type_msl_idx, max_segs, avail_segs, total_segs = 0; struct hugepage_info *hpi; uint64_t hugepage_sz; hpi = &internal_conf->hugepage_info[hpi_idx]; hugepage_sz = hpi->hugepage_sz; - /* no NUMA support on FreeBSD */ + for (cur_socket = 0; cur_socket < (int)rte_socket_count(); cur_socket++) { + uint64_t avail_mem; + int type_msl_idx, max_segs, avail_segs, total_segs = 0, total_pages = 0; + uint64_t max_type_mem, total_type_mem = 0; + /* check if we've already exceeded total memory amount */ + if (total_mem >= max_mem) + goto endloop; - /* check if we've already exceeded total memory amount */ - if (total_mem >= max_mem) - break; + /* first, calculate theoretical limits according to config */ + max_type_mem = RTE_MIN(max_mem - total_mem, + (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20); + max_segs = RTE_MAX_MEMSEG_PER_TYPE; - /* first, calculate theoretical limits according to config */ - max_type_mem = RTE_MIN(max_mem - total_mem, - (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20); - max_segs = RTE_MAX_MEMSEG_PER_TYPE; + /* now, limit all of that to whatever will actually be + * available to us, because without dynamic allocation support, + * all of that extra memory will be sitting there being useless + * and slowing down core dumps in case of a crash. + * + * we need (N*2)-1 segments because we cannot guarantee that + * each segment will be IOVA-contiguous with the previous one, + * so we will allocate more and put spaces between segments + * that are non-contiguous. + */ + total_pages = hpi->num_pages[cur_socket]; + avail_segs = (total_pages * 2) - 1; + avail_mem = avail_segs * hugepage_sz; - /* now, limit all of that to whatever will actually be - * available to us, because without dynamic allocation support, - * all of that extra memory will be sitting there being useless - * and slowing down core dumps in case of a crash. - * - * we need (N*2)-1 segments because we cannot guarantee that - * each segment will be IOVA-contiguous with the previous one, - * so we will allocate more and put spaces between segments - * that are non-contiguous. - */ - avail_segs = (hpi->num_pages[0] * 2) - 1; - avail_mem = avail_segs * hugepage_sz; + max_type_mem = RTE_MIN(avail_mem, max_type_mem); + max_segs = RTE_MIN(avail_segs, max_segs); - max_type_mem = RTE_MIN(avail_mem, max_type_mem); - max_segs = RTE_MIN(avail_segs, max_segs); + type_msl_idx = 0; + while (total_type_mem < max_type_mem && + total_segs < max_segs) { + uint64_t cur_max_mem, cur_mem; + unsigned int n_segs; - type_msl_idx = 0; - while (total_type_mem < max_type_mem && - total_segs < max_segs) { - uint64_t cur_max_mem, cur_mem; - unsigned int n_segs; + if (msl_idx >= RTE_MAX_MEMSEG_LISTS) { + RTE_LOG(ERR, EAL, + "No more space in memseg lists, please increase %s\n", + RTE_STR(RTE_MAX_MEMSEG_LISTS)); + return -1; + } - if (msl_idx >= RTE_MAX_MEMSEG_LISTS) { - RTE_LOG(ERR, EAL, - "No more space in memseg lists, please increase %s\n", - RTE_STR(RTE_MAX_MEMSEG_LISTS)); - return -1; - } - - msl = &mcfg->memsegs[msl_idx++]; - - cur_max_mem = max_type_mem - total_type_mem; - - cur_mem = get_mem_amount(hugepage_sz, - cur_max_mem); - n_segs = cur_mem / hugepage_sz; - - if (eal_memseg_list_init(msl, hugepage_sz, n_segs, - 0, type_msl_idx, false)) - return -1; - - total_segs += msl->memseg_arr.len; - total_type_mem = total_segs * hugepage_sz; - type_msl_idx++; - - if (memseg_list_alloc(msl)) { - RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n"); - return -1; + msl = &mcfg->memsegs[msl_idx++]; + + cur_max_mem = max_type_mem - total_type_mem; + + cur_mem = get_mem_amount(hugepage_sz, + cur_max_mem); + n_segs = cur_mem / hugepage_sz; + + if (eal_memseg_list_init(msl, hugepage_sz, n_segs, + cur_socket, type_msl_idx, false)) + return -1; + + total_segs += msl->memseg_arr.len; + total_type_mem = total_segs * hugepage_sz; + type_msl_idx++; + + if (memseg_list_alloc(msl)) { + RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n"); + return -1; + } } + total_mem += total_type_mem; } - total_mem += total_type_mem; } +endloop: return 0; } diff --git a/lib/librte_eal/freebsd/include/rte_os.h b/lib/librte_eal/freebsd/include/rte_os.h index eeb750cd81..ecb9d8d801 100644 --- a/lib/librte_eal/freebsd/include/rte_os.h +++ b/lib/librte_eal/freebsd/include/rte_os.h @@ -12,6 +12,7 @@ */ #include +#include typedef cpuset_t rte_cpuset_t; #define RTE_CPU_AND(dst, src1, src2) do \ @@ -49,4 +50,19 @@ typedef cpuset_t rte_cpuset_t; } while (0) #endif +static inline int offset_to_node(uint64_t offset) +{ + return (offset >> 12) & 0xF; +} + +static inline int offset_to_buffer(uint64_t offset) +{ + return (offset >> 16) & 0xFFFF; +} + +static inline uint64_t offset_make(int node, int buffer) +{ + return (((uint64_t)node & 0xF) << 12) | (((uint64_t)buffer & 0xFFFF) << 16); +} + #endif /* _RTE_OS_H_ */