FreeBSD contigmem and eal NUMA support

This commit is contained in:
quackerd 2021-02-05 06:22:43 +00:00 committed by quackerd
parent b1d36cf828
commit 025a453103
6 changed files with 349 additions and 206 deletions

View File

@ -27,7 +27,7 @@
#define RTE_VER_PREFIX "DPDK"
/****** library defines ********/
#define RTE_LIBRTE_IEEE1588 1
/* EAL defines */
#define RTE_MAX_HEAPS 32
#define RTE_MAX_MEMSEG_LISTS 128
@ -44,6 +44,7 @@
/* bsd module defines */
#define RTE_CONTIGMEM_MAX_NUM_BUFS 64
#define RTE_CONTIGMEM_DEFAULT_NUM_BUFS 1
#define RTE_CONTIGMEM_DEFAULT_NODE_AFFINITY 0x1
#define RTE_CONTIGMEM_DEFAULT_BUF_SIZE (512*1024*1024)
/* mempool defines */

View File

@ -20,6 +20,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#include <sys/eventhandler.h>
#include <sys/domainset.h>
#include <machine/bus.h>
@ -31,6 +32,15 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_pager.h>
#include <vm/vm_phys.h>
#define CONTIGMEM_DEBUG
#ifdef CONTIGMEM_DEBUG
#define DBGPRINT(fmt, ...) printf(fmt, ##__VA_ARGS__)
#else
#define DBGPRINT(fmt, ...)
#endif
#define NEXT_CPU_NULL -1;
struct contigmem_buffer {
void *addr;
int refcnt;
@ -39,6 +49,7 @@ struct contigmem_buffer {
struct contigmem_vm_handle {
int buffer_index;
int node_index;
};
static int contigmem_load(void);
@ -49,19 +60,46 @@ static d_mmap_single_t contigmem_mmap_single;
static d_open_t contigmem_open;
static d_close_t contigmem_close;
static uint64_t contigmem_node_affinity = RTE_CONTIGMEM_DEFAULT_NODE_AFFINITY;
static int contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS;
static int64_t contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE;
#define CONTIGMEM_MAX_NODECNT RTE_MAX_NUMA_NODES
static eventhandler_tag contigmem_eh_tag;
static struct contigmem_buffer contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS];
static struct contigmem_buffer contigmem_buffers[CONTIGMEM_MAX_NODECNT][RTE_CONTIGMEM_MAX_NUM_BUFS];
static struct cdev *contigmem_cdev = NULL;
static int contigmem_refcnt;
static int contigmem_nodecnt;
/*
* offset: 0-11 bits page alignment
* 12-15: node idx
* 16-31:
*/
static inline int offset_to_node(off_t offset)
{
return (offset >> 12) & 0xF;
}
static inline int offset_to_buffer(off_t offset)
{
return (offset >> 16) & 0xFFFF;
}
static inline off_t offset_make(int node, int buffer)
{
return (((off_t)node & 0xF) << 12) | (((off_t)buffer & 0xFFFF) << 16);
}
TUNABLE_QUAD("hw.contigmem.node_affinity", &contigmem_node_affinity);
TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers);
TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size);
static SYSCTL_NODE(_hw, OID_AUTO, contigmem, CTLFLAG_RD, 0, "contigmem");
SYSCTL_QUAD(_hw_contigmem, OID_AUTO, node_affinity, CTLFLAG_RD,
&contigmem_node_affinity, 0, "The node/NUMA affinity of buffers.");
SYSCTL_INT(_hw_contigmem, OID_AUTO, num_buffers, CTLFLAG_RD,
&contigmem_num_buffers, 0, "Number of contigmem buffers allocated");
SYSCTL_QUAD(_hw_contigmem, OID_AUTO, buffer_size, CTLFLAG_RD,
@ -110,12 +148,34 @@ static struct cdevsw contigmem_ops = {
.d_close = contigmem_close,
};
static void
contigmem_free()
{
for (int j = 0; j < contigmem_nodecnt; j++) {
for (int i = 0; i < contigmem_num_buffers; i++) {
if (contigmem_buffers[j][i].addr != NULL) {
DBGPRINT("contigmem_free: freeing contigmem node buffer %d, node %d, virt 0x%p, size 0x%lx\n",
i, j, contigmem_buffers[j][i].addr, contigmem_buffer_size);
contigfree(contigmem_buffers[j][i].addr,
contigmem_buffer_size, M_CONTIGMEM);
contigmem_buffers[j][i].addr = NULL;
}
if (mtx_initialized(&contigmem_buffers[j][i].mtx))
mtx_destroy(&contigmem_buffers[j][i].mtx);
}
}
}
static int
contigmem_load()
{
char index_string[8], description[32];
int i, error = 0;
int i, j, error = 0;
void *addr;
struct domainset ds;
struct domainset *rds;
ds.ds_policy = DOMAINSET_POLICY_ROUNDROBIN;
if (contigmem_num_buffers > RTE_CONTIGMEM_MAX_NUM_BUFS) {
printf("%d buffers requested is greater than %d allowed\n",
@ -132,30 +192,51 @@ contigmem_load()
goto error;
}
for (i = 0; i < contigmem_num_buffers; i++) {
addr = contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO,
0, BUS_SPACE_MAXADDR, contigmem_buffer_size, 0);
if (addr == NULL) {
printf("contigmalloc failed for buffer %d\n", i);
error = ENOMEM;
goto error;
// init cputop
contigmem_nodecnt = vm_ndomains;
if (contigmem_nodecnt > CONTIGMEM_MAX_NODECNT) {
printf("contigmem_load: too many NUMA nodes detected: %d\n", contigmem_nodecnt);
error = EINVAL;
goto error;
}
DBGPRINT("contigmem_load: detected %d vmdomain\n", contigmem_nodecnt);
for (j = 0; j < contigmem_nodecnt; j++) {
if ((contigmem_node_affinity & (1 << j)) == 0) {
DBGPRINT("contigmem_load: skipping node %d...\n", j);
continue;
}
DOMAINSET_ZERO(&ds.ds_mask);
DOMAINSET_SET(j, &ds.ds_mask);
rds = domainset_create(&ds);
for (i = 0; i < contigmem_num_buffers; i++) {
DBGPRINT("contigmem_load: allocating 0x%lx bytes memory for buffer %d node %d\n", contigmem_buffer_size, i, j);
addr = contigmalloc_domainset(contigmem_buffer_size, M_CONTIGMEM, rds,
M_ZERO, 0, BUS_SPACE_MAXADDR, contigmem_buffer_size, 0);
if (addr == NULL) {
printf("contigmalloc failed for buffer %d node %d\n", i, j);
error = ENOMEM;
goto error;
}
printf("%2u: virt=%p phys=%p\n", i, addr,
(void *)pmap_kextract((vm_offset_t)addr));
printf("%2u: virt=%p phys=%p\n", i, addr,
(void *)pmap_kextract((vm_offset_t)addr));
mtx_init(&contigmem_buffers[i].mtx, "contigmem", NULL, MTX_DEF);
contigmem_buffers[i].addr = addr;
contigmem_buffers[i].refcnt = 0;
mtx_init(&contigmem_buffers[j][i].mtx, "contigmem", NULL, MTX_DEF);
contigmem_buffers[j][i].addr = addr;
contigmem_buffers[j][i].refcnt = 0;
snprintf(index_string, sizeof(index_string), "%d", i);
snprintf(description, sizeof(description),
"phys addr for buffer %d", i);
SYSCTL_ADD_PROC(NULL,
&SYSCTL_NODE_CHILDREN(_hw_contigmem, physaddr), OID_AUTO,
index_string, CTLTYPE_U64 | CTLFLAG_RD,
(void *)(uintptr_t)i, 0, contigmem_physaddr, "LU",
description);
snprintf(index_string, sizeof(index_string), "%ld", offset_make(j, i));
snprintf(description, sizeof(description),
"phys addr for node %d buffer %d", j, i);
SYSCTL_ADD_PROC(NULL,
&SYSCTL_NODE_CHILDREN(_hw_contigmem, physaddr), OID_AUTO,
index_string, CTLTYPE_U64 | CTLFLAG_RD,
(void*)offset_make(j, i), 0, contigmem_physaddr, "LU",
description);
}
}
contigmem_cdev = make_dev_credf(0, &contigmem_ops, 0, NULL, UID_ROOT,
@ -164,24 +245,13 @@ contigmem_load()
return 0;
error:
for (i = 0; i < contigmem_num_buffers; i++) {
if (contigmem_buffers[i].addr != NULL) {
contigfree(contigmem_buffers[i].addr,
contigmem_buffer_size, M_CONTIGMEM);
contigmem_buffers[i].addr = NULL;
}
if (mtx_initialized(&contigmem_buffers[i].mtx))
mtx_destroy(&contigmem_buffers[i].mtx);
}
contigmem_free();
return error;
}
static int
contigmem_unload()
{
int i;
if (contigmem_refcnt > 0)
return EBUSY;
@ -191,14 +261,7 @@ contigmem_unload()
if (contigmem_eh_tag != NULL)
EVENTHANDLER_DEREGISTER(process_exit, contigmem_eh_tag);
for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++) {
if (contigmem_buffers[i].addr != NULL)
contigfree(contigmem_buffers[i].addr,
contigmem_buffer_size, M_CONTIGMEM);
if (mtx_initialized(&contigmem_buffers[i].mtx))
mtx_destroy(&contigmem_buffers[i].mtx);
}
contigmem_free();
return 0;
}
@ -206,9 +269,12 @@ static int
contigmem_physaddr(SYSCTL_HANDLER_ARGS)
{
uint64_t physaddr;
int index = (int)(uintptr_t)arg1;
off_t offset = (off_t)arg1;
int index = offset_to_buffer(offset);
int node = offset_to_node(offset);
physaddr = (uint64_t)vtophys(contigmem_buffers[index].addr);
physaddr = (uint64_t)vtophys(contigmem_buffers[node][index].addr);
DBGPRINT("contigmem_physaddr sysctl: buffer %d node %d paddr 0x%lx\n", index, node, physaddr);
return sysctl_handle_64(oidp, &physaddr, 0, req);
}
@ -239,7 +305,8 @@ contigmem_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
struct contigmem_vm_handle *vmh = handle;
struct contigmem_buffer *buf;
buf = &contigmem_buffers[vmh->buffer_index];
DBGPRINT("contigmem_cdev_pager_ctor: buffer %d node %d\n", vmh->buffer_index, vmh->node_index);
buf = &contigmem_buffers[vmh->node_index][vmh->buffer_index];
atomic_add_int(&contigmem_refcnt, 1);
@ -258,7 +325,8 @@ contigmem_cdev_pager_dtor(void *handle)
struct contigmem_vm_handle *vmh = handle;
struct contigmem_buffer *buf;
buf = &contigmem_buffers[vmh->buffer_index];
DBGPRINT("contigmem_cdev_pager_dtor: buffer %d node %d\n", vmh->buffer_index, vmh->node_index);
buf = &contigmem_buffers[vmh->node_index][vmh->buffer_index];
mtx_lock(&buf->mtx);
buf->refcnt--;
@ -335,14 +403,17 @@ contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
{
struct contigmem_vm_handle *vmh;
uint64_t buffer_index;
uint64_t node_index;
/*
* The buffer index is encoded in the offset. Divide the offset by
* PAGE_SIZE to get the index of the buffer requested by the user
* app.
*/
buffer_index = *offset / PAGE_SIZE;
if (buffer_index >= contigmem_num_buffers)
buffer_index = offset_to_buffer(*offset);
node_index = offset_to_node(*offset);
DBGPRINT("contigmem_mmap_single: buffer %lu node %lu size 0x%lx\n", buffer_index, node_index, size);
if (buffer_index >= contigmem_num_buffers || node_index >= contigmem_nodecnt)
return EINVAL;
if (size > contigmem_buffer_size)
@ -352,8 +423,9 @@ contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
if (vmh == NULL)
return ENOMEM;
vmh->buffer_index = buffer_index;
vmh->node_index = node_index;
*offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index].addr);
*offset = (vm_ooffset_t)vtophys(contigmem_buffers[node_index][buffer_index].addr);
*obj = cdev_pager_allocate(vmh, OBJT_DEVICE, &contigmem_cdev_pager_ops,
size, nprot, *offset, curthread->td_ucred);

View File

@ -5,6 +5,7 @@
#include <sys/sysctl.h>
#include <sys/mman.h>
#include <string.h>
#include <immintrin.h>
#include <rte_log.h>
#include <fcntl.h>
@ -13,6 +14,7 @@
#include "eal_hugepages.h"
#include "eal_internal_cfg.h"
#include "eal_filesystem.h"
#include "rte_build_config.h"
#define CONTIGMEM_DEV "/dev/contigmem"
@ -48,6 +50,21 @@ create_shared_memory(const char *filename, const size_t mem_size)
return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT);
}
#define NEXT_CPU_NULL -1;
static inline int
cmask_get_next_cpu(uint64_t *mask)
{
int ffs = ffsll(*mask);
*mask &= ~(1ul << (ffs - 1));
return ffs - 1;
}
static inline int
cmask_get_num_cpus(const uint64_t mask)
{
return _mm_popcnt_u64(mask);
}
/*
* No hugepage support on freebsd, but we dummy it, using contigmem driver
*/
@ -57,6 +74,8 @@ eal_hugepage_info_init(void)
size_t sysctl_size;
int num_buffers, fd, error;
int64_t buffer_size;
uint64_t node_affinity = 0;
int num_nodes;
struct internal_config *internal_conf =
eal_get_internal_configuration();
@ -76,6 +95,14 @@ eal_hugepage_info_init(void)
return -1;
}
sysctl_size = sizeof(node_affinity);
error = sysctlbyname("hw.contigmem.node_affinity", &node_affinity, &sysctl_size, NULL, 0);
num_nodes = cmask_get_num_cpus(node_affinity);
if (error != 0 || num_nodes == 0 || num_nodes > RTE_MAX_NUMA_NODES) {
RTE_LOG(ERR, EAL, "could not read or invalid sysctl hw.contigmem.node_affinity 0x%lx\n", node_affinity);
return -1;
}
sysctl_size = sizeof(buffer_size);
error = sysctlbyname("hw.contigmem.buffer_size", &buffer_size,
&sysctl_size, NULL, 0);
@ -101,9 +128,18 @@ eal_hugepage_info_init(void)
RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dKB\n",
num_buffers, (int)(buffer_size>>10));
RTE_LOG(INFO, EAL, "Contigmem driver's node affinity is 0x%lx, %d domains in total\n",
node_affinity, num_nodes);
strlcpy(hpi->hugedir, CONTIGMEM_DEV, sizeof(hpi->hugedir));
hpi->hugepage_sz = buffer_size;
hpi->num_pages[0] = num_buffers;
for (int i = 0; i < num_nodes; i++) {
if ((node_affinity & (1 << i)) != 0) {
hpi->num_pages[i] = num_buffers;
} else {
hpi->num_pages[i] = 0;
}
}
hpi->lock_descriptor = fd;
/* for no shared files mode, do not create shared memory config */

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2014 Intel Corporation
*/
#include <stdio.h>
#include <unistd.h>
#include <sys/sysctl.h>
@ -16,9 +16,10 @@
/* No topology information available on FreeBSD including NUMA info */
unsigned
eal_cpu_core_id(__rte_unused unsigned lcore_id)
eal_cpu_core_id(unsigned lcore_id)
{
return 0;
/* DPDK uses 1-1 mapping */
return lcore_id;
}
static int
@ -36,9 +37,21 @@ eal_get_ncpus(void)
}
unsigned
eal_cpu_socket_id(__rte_unused unsigned cpu_id)
eal_cpu_socket_id(unsigned cpu_id)
{
return 0;
int error;
int domain;
size_t domain_sz = sizeof(domain);
char sysctl_str[32];
snprintf(sysctl_str, sizeof(sysctl_str), "dev.cpu.%d.%%domain", cpu_id);
error = sysctlbyname(sysctl_str, &domain, &domain_sz, NULL, 0);
if (error < 0) {
RTE_LOG(WARNING, EAL, "Failed to get socket id for core %u, returning 0...\n", cpu_id);
domain = 0;
}
return domain;
}
/* Check if a cpu is present by the presence of the

View File

@ -107,114 +107,117 @@ rte_eal_hugepage_init(void)
hpi = &internal_conf->hugepage_info[i];
page_sz = hpi->hugepage_sz;
max_pages = hpi->num_pages[0];
mem_needed = RTE_ALIGN_CEIL(internal_conf->memory - total_mem,
page_sz);
for (int k = 0; k < RTE_MAX_NUMA_NODES; k++) {
max_pages = hpi->num_pages[k];
n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
mem_needed = RTE_ALIGN_CEIL(internal_conf->memory - total_mem,
page_sz);
for (j = 0; j < n_pages; j++) {
struct rte_memseg_list *msl;
struct rte_fbarray *arr;
struct rte_memseg *seg;
int msl_idx, ms_idx;
rte_iova_t physaddr;
int error;
size_t sysctl_size = sizeof(physaddr);
char physaddr_str[64];
bool is_adjacent;
n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
/* first, check if this segment is IOVA-adjacent to
* the previous one.
*/
snprintf(physaddr_str, sizeof(physaddr_str),
"hw.contigmem.physaddr.%d", j);
error = sysctlbyname(physaddr_str, &physaddr,
&sysctl_size, NULL, 0);
if (error < 0) {
RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
"from %s\n", j, hpi->hugedir);
return -1;
}
for (j = 0; j < n_pages; j++) {
struct rte_memseg_list *msl;
struct rte_fbarray *arr;
struct rte_memseg *seg;
int msl_idx, ms_idx;
rte_iova_t physaddr;
int error;
size_t sysctl_size = sizeof(physaddr);
char physaddr_str[64];
bool is_adjacent;
is_adjacent = prev_end != 0 && physaddr == prev_end;
prev_end = physaddr + hpi->hugepage_sz;
/* first, check if this segment is IOVA-adjacent to
* the previous one.
*/
snprintf(physaddr_str, sizeof(physaddr_str),
"hw.contigmem.physaddr.%ld", offset_make(k, j));
error = sysctlbyname(physaddr_str, &physaddr,
&sysctl_size, NULL, 0);
if (error < 0) {
RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
"from %s\n", j, hpi->hugedir);
return -1;
}
for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
msl_idx++) {
bool empty, need_hole;
msl = &mcfg->memsegs[msl_idx];
is_adjacent = prev_end != 0 && physaddr == prev_end;
prev_end = physaddr + hpi->hugepage_sz;
for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
msl_idx++) {
bool empty, need_hole;
msl = &mcfg->memsegs[msl_idx];
arr = &msl->memseg_arr;
if (msl->page_sz != page_sz || msl->socket_id != k)
continue;
empty = arr->count == 0;
/* we need a hole if this isn't an empty memseg
* list, and if previous segment was not
* adjacent to current one.
*/
need_hole = !empty && !is_adjacent;
/* we need 1, plus hole if not adjacent */
ms_idx = rte_fbarray_find_next_n_free(arr,
0, 1 + (need_hole ? 1 : 0));
/* memseg list is full? */
if (ms_idx < 0)
continue;
if (need_hole && prev_ms_idx == ms_idx - 1)
ms_idx++;
prev_ms_idx = ms_idx;
break;
}
if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
RTE_STR(RTE_MAX_MEMSEG_PER_TYPE),
RTE_STR(RTE_MAX_MEM_MB_PER_TYPE));
return -1;
}
arr = &msl->memseg_arr;
seg = rte_fbarray_get(arr, ms_idx);
if (msl->page_sz != page_sz)
continue;
addr = RTE_PTR_ADD(msl->base_va,
(size_t)msl->page_sz * ms_idx);
empty = arr->count == 0;
/* address is already mapped in memseg list, so using
* MAP_FIXED here is safe.
*/
addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
MAP_SHARED | MAP_FIXED,
hpi->lock_descriptor,
offset_make(k, j));
if (addr == MAP_FAILED) {
RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
j, hpi->hugedir);
return -1;
}
/* we need a hole if this isn't an empty memseg
* list, and if previous segment was not
* adjacent to current one.
*/
need_hole = !empty && !is_adjacent;
seg->addr = addr;
seg->iova = physaddr;
seg->hugepage_sz = page_sz;
seg->len = page_sz;
seg->nchannel = mcfg->nchannel;
seg->nrank = mcfg->nrank;
seg->socket_id = k;
/* we need 1, plus hole if not adjacent */
ms_idx = rte_fbarray_find_next_n_free(arr,
0, 1 + (need_hole ? 1 : 0));
rte_fbarray_set_used(arr, ms_idx);
/* memseg list is full? */
if (ms_idx < 0)
continue;
RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
PRIx64", len %zu, socket %d\n",
seg_idx++, addr, physaddr, page_sz, k);
if (need_hole && prev_ms_idx == ms_idx - 1)
ms_idx++;
prev_ms_idx = ms_idx;
break;
total_mem += seg->len;
}
if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
RTE_STR(RTE_MAX_MEMSEG_PER_TYPE),
RTE_STR(RTE_MAX_MEM_MB_PER_TYPE));
return -1;
}
arr = &msl->memseg_arr;
seg = rte_fbarray_get(arr, ms_idx);
addr = RTE_PTR_ADD(msl->base_va,
(size_t)msl->page_sz * ms_idx);
/* address is already mapped in memseg list, so using
* MAP_FIXED here is safe.
*/
addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
MAP_SHARED | MAP_FIXED,
hpi->lock_descriptor,
j * EAL_PAGE_SIZE);
if (addr == MAP_FAILED) {
RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
j, hpi->hugedir);
return -1;
}
seg->addr = addr;
seg->iova = physaddr;
seg->hugepage_sz = page_sz;
seg->len = page_sz;
seg->nchannel = mcfg->nchannel;
seg->nrank = mcfg->nrank;
seg->socket_id = 0;
rte_fbarray_set_used(arr, ms_idx);
RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
PRIx64", len %zu\n",
seg_idx++, addr, physaddr, page_sz);
total_mem += seg->len;
}
if (total_mem >= internal_conf->memory)
break;
}
}
if (total_mem < internal_conf->memory) {
RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, "
"requested: %" PRIu64 "M "
@ -334,7 +337,7 @@ static int
memseg_primary_init(void)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
int hpi_idx, msl_idx = 0;
int hpi_idx, msl_idx = 0, cur_socket;
struct rte_memseg_list *msl;
uint64_t max_mem, total_mem;
struct internal_config *internal_conf =
@ -360,78 +363,80 @@ memseg_primary_init(void)
/* create memseg lists */
for (hpi_idx = 0; hpi_idx < (int) internal_conf->num_hugepage_sizes;
hpi_idx++) {
uint64_t max_type_mem, total_type_mem = 0;
uint64_t avail_mem;
int type_msl_idx, max_segs, avail_segs, total_segs = 0;
struct hugepage_info *hpi;
uint64_t hugepage_sz;
hpi = &internal_conf->hugepage_info[hpi_idx];
hugepage_sz = hpi->hugepage_sz;
/* no NUMA support on FreeBSD */
for (cur_socket = 0; cur_socket < (int)rte_socket_count(); cur_socket++) {
uint64_t avail_mem;
int type_msl_idx, max_segs, avail_segs, total_segs = 0, total_pages = 0;
uint64_t max_type_mem, total_type_mem = 0;
/* check if we've already exceeded total memory amount */
if (total_mem >= max_mem)
goto endloop;
/* check if we've already exceeded total memory amount */
if (total_mem >= max_mem)
break;
/* first, calculate theoretical limits according to config */
max_type_mem = RTE_MIN(max_mem - total_mem,
(uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
max_segs = RTE_MAX_MEMSEG_PER_TYPE;
/* first, calculate theoretical limits according to config */
max_type_mem = RTE_MIN(max_mem - total_mem,
(uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
max_segs = RTE_MAX_MEMSEG_PER_TYPE;
/* now, limit all of that to whatever will actually be
* available to us, because without dynamic allocation support,
* all of that extra memory will be sitting there being useless
* and slowing down core dumps in case of a crash.
*
* we need (N*2)-1 segments because we cannot guarantee that
* each segment will be IOVA-contiguous with the previous one,
* so we will allocate more and put spaces between segments
* that are non-contiguous.
*/
total_pages = hpi->num_pages[cur_socket];
avail_segs = (total_pages * 2) - 1;
avail_mem = avail_segs * hugepage_sz;
/* now, limit all of that to whatever will actually be
* available to us, because without dynamic allocation support,
* all of that extra memory will be sitting there being useless
* and slowing down core dumps in case of a crash.
*
* we need (N*2)-1 segments because we cannot guarantee that
* each segment will be IOVA-contiguous with the previous one,
* so we will allocate more and put spaces between segments
* that are non-contiguous.
*/
avail_segs = (hpi->num_pages[0] * 2) - 1;
avail_mem = avail_segs * hugepage_sz;
max_type_mem = RTE_MIN(avail_mem, max_type_mem);
max_segs = RTE_MIN(avail_segs, max_segs);
max_type_mem = RTE_MIN(avail_mem, max_type_mem);
max_segs = RTE_MIN(avail_segs, max_segs);
type_msl_idx = 0;
while (total_type_mem < max_type_mem &&
total_segs < max_segs) {
uint64_t cur_max_mem, cur_mem;
unsigned int n_segs;
type_msl_idx = 0;
while (total_type_mem < max_type_mem &&
total_segs < max_segs) {
uint64_t cur_max_mem, cur_mem;
unsigned int n_segs;
if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
RTE_LOG(ERR, EAL,
"No more space in memseg lists, please increase %s\n",
RTE_STR(RTE_MAX_MEMSEG_LISTS));
return -1;
}
if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
RTE_LOG(ERR, EAL,
"No more space in memseg lists, please increase %s\n",
RTE_STR(RTE_MAX_MEMSEG_LISTS));
return -1;
}
msl = &mcfg->memsegs[msl_idx++];
cur_max_mem = max_type_mem - total_type_mem;
cur_mem = get_mem_amount(hugepage_sz,
cur_max_mem);
n_segs = cur_mem / hugepage_sz;
if (eal_memseg_list_init(msl, hugepage_sz, n_segs,
0, type_msl_idx, false))
return -1;
total_segs += msl->memseg_arr.len;
total_type_mem = total_segs * hugepage_sz;
type_msl_idx++;
if (memseg_list_alloc(msl)) {
RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
return -1;
msl = &mcfg->memsegs[msl_idx++];
cur_max_mem = max_type_mem - total_type_mem;
cur_mem = get_mem_amount(hugepage_sz,
cur_max_mem);
n_segs = cur_mem / hugepage_sz;
if (eal_memseg_list_init(msl, hugepage_sz, n_segs,
cur_socket, type_msl_idx, false))
return -1;
total_segs += msl->memseg_arr.len;
total_type_mem = total_segs * hugepage_sz;
type_msl_idx++;
if (memseg_list_alloc(msl)) {
RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
return -1;
}
}
total_mem += total_type_mem;
}
total_mem += total_type_mem;
}
endloop:
return 0;
}

View File

@ -12,6 +12,7 @@
*/
#include <pthread_np.h>
#include <stdint.h>
typedef cpuset_t rte_cpuset_t;
#define RTE_CPU_AND(dst, src1, src2) do \
@ -49,4 +50,19 @@ typedef cpuset_t rte_cpuset_t;
} while (0)
#endif
static inline int offset_to_node(uint64_t offset)
{
return (offset >> 12) & 0xF;
}
static inline int offset_to_buffer(uint64_t offset)
{
return (offset >> 16) & 0xFFFF;
}
static inline uint64_t offset_make(int node, int buffer)
{
return (((uint64_t)node & 0xF) << 12) | (((uint64_t)buffer & 0xFFFF) << 16);
}
#endif /* _RTE_OS_H_ */