memory: fix for multi process support

Signed-off-by: Intel
This commit is contained in:
Intel 2012-12-20 00:00:00 +01:00 committed by Thomas Monjalon
parent b6a468ad41
commit 916e4f4f4e
17 changed files with 552 additions and 157 deletions

View File

@ -35,6 +35,7 @@
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
#include <unistd.h>
#include <sys/queue.h>
@ -51,6 +52,7 @@
#include <rte_lcore.h>
#include <rte_cycles.h>
#include <rte_spinlock.h>
#include <rte_atomic.h>
#include "test.h"
@ -80,7 +82,9 @@
static rte_spinlock_t sl, sl_try;
static rte_spinlock_t sl_tab[RTE_MAX_LCORE];
static rte_spinlock_recursive_t slr;
static unsigned count;
static unsigned count = 0;
static rte_atomic32_t synchro;
static int
test_spinlock_per_core(__attribute__((unused)) void *arg)
@ -126,60 +130,77 @@ test_spinlock_recursive_per_core(__attribute__((unused)) void *arg)
return 0;
}
static volatile int count1, count2;
static rte_spinlock_t lk = RTE_SPINLOCK_INITIALIZER;
static unsigned int max = 10000000; /* 10M */
static volatile uint64_t looptime[RTE_MAX_LCORE];
static uint64_t lock_count[RTE_MAX_LCORE] = {0};
#define TIME_S 5
static int
load_loop_fn(__attribute__((unused)) void *dummy)
load_loop_fn(void *func_param)
{
uint64_t end, begin;
uint64_t time_diff = 0, begin;
uint64_t hz = rte_get_hpet_hz();
uint64_t lcount = 0;
const int use_lock = *(int*)func_param;
const unsigned lcore = rte_lcore_id();
/* wait synchro for slaves */
if (lcore != rte_get_master_lcore())
while (rte_atomic32_read(&synchro) == 0);
begin = rte_get_hpet_cycles();
unsigned int i = 0;
for ( i = 0; i < max; i++) {
rte_spinlock_lock(&lk);
count1++;
rte_spinlock_unlock(&lk);
count2++;
while (time_diff / hz < TIME_S) {
if (use_lock)
rte_spinlock_lock(&lk);
lcount++;
if (use_lock)
rte_spinlock_unlock(&lk);
/* delay to make lock duty cycle slighlty realistic */
rte_delay_us(1);
time_diff = rte_get_hpet_cycles() - begin;
}
end = rte_get_hpet_cycles();
looptime[rte_lcore_id()] = end - begin;
lock_count[lcore] = lcount;
return 0;
}
static int
test_spinlock_load(void)
test_spinlock_perf(void)
{
if (rte_lcore_count()<= 1) {
printf("no cores counted\n");
return -1;
}
printf ("Running %u tests.......\n", max);
printf ("Number of cores = %u\n", rte_lcore_count());
unsigned int i;
uint64_t total = 0;
int lock = 0;
const unsigned lcore = rte_lcore_id();
printf("\nTest with no lock on single core...\n");
load_loop_fn(&lock);
printf("Core [%u] count = %"PRIu64"\n", lcore, lock_count[lcore]);
memset(lock_count, 0, sizeof(lock_count));
printf("\nTest with lock on single core...\n");
lock = 1;
load_loop_fn(&lock);
printf("Core [%u] count = %"PRIu64"\n", lcore, lock_count[lcore]);
memset(lock_count, 0, sizeof(lock_count));
printf("\nTest with lock on %u cores...\n", rte_lcore_count());
/* Clear synchro and start slaves */
rte_atomic32_set(&synchro, 0);
rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
/* start synchro and launch test on master */
rte_atomic32_set(&synchro, 1);
load_loop_fn(&lock);
rte_eal_mp_remote_launch(load_loop_fn, NULL , CALL_MASTER);
rte_eal_mp_wait_lcore();
unsigned int k = 0;
uint64_t avgtime = 0;
RTE_LCORE_FOREACH(k) {
printf("Core [%u] time = %"PRIu64"\n", k, looptime[k]);
avgtime += looptime[k];
RTE_LCORE_FOREACH(i) {
printf("Core [%u] count = %"PRIu64"\n", i, lock_count[i]);
total += lock_count[i];
}
avgtime = avgtime / rte_lcore_count();
printf("Average time = %"PRIu64"\n", avgtime);
printf("Total count = %"PRIu64"\n", total);
int check = 0;
check = max * rte_lcore_count();
if (count1 == check && count2 != check)
printf("Passed Load test\n");
else {
printf("Failed load test\n");
return -1;
}
return 0;
}
@ -246,9 +267,6 @@ test_spinlock(void)
rte_eal_mp_wait_lcore();
if (test_spinlock_load()<0)
return -1;
rte_spinlock_recursive_lock(&slr);
/*
@ -313,5 +331,8 @@ test_spinlock(void)
rte_spinlock_recursive_unlock(&slr);
rte_spinlock_recursive_unlock(&slr);
if (test_spinlock_perf() < 0)
return -1;
return ret;
}

View File

@ -43,6 +43,7 @@
#include <rte_memzone.h>
#include <rte_tailq.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_log.h>
#include "eal_private.h"
@ -93,12 +94,18 @@ rte_dump_physmem_layout(void)
for (i=0; i<RTE_MAX_MEMSEG; i++) {
if (mcfg->memseg[i].addr == NULL)
break;
printf("phys:0x%"PRIx64", len:0x%"PRIx64", virt:%p, "
"socket_id:%"PRId32"\n",
printf("Segment %o: phys:0x%"PRIx64", len:0x%"PRIx64", "
"virt:%p, socket_id:%"PRId32", "
"hugepage_sz:0x%"PRIx64", nchannel:%"PRIx32", "
"nrank:%"PRIx32"\n", i,
mcfg->memseg[i].phys_addr,
mcfg->memseg[i].len,
mcfg->memseg[i].addr,
mcfg->memseg[i].socket_id);
mcfg->memseg[i].socket_id,
mcfg->memseg[i].hugepage_sz,
mcfg->memseg[i].nchannel,
mcfg->memseg[i].nrank);
}
}

View File

@ -46,6 +46,7 @@
#include <rte_memzone.h>
#include <rte_tailq.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_per_lcore.h>
#include <rte_errno.h>
#include <rte_string_fns.h>
@ -54,10 +55,28 @@
#include "eal_private.h"
/* internal copy of free memory segments */
static struct rte_memseg free_memseg[RTE_MAX_MEMSEG];
static struct rte_memseg *free_memseg = NULL;
/* pointer to last reserved memzone */
static unsigned memzone_idx;
static inline const struct rte_memzone *
memzone_lookup_thread_unsafe(const char *name)
{
const struct rte_mem_config *mcfg;
unsigned i = 0;
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
/*
* the algorithm is not optimal (linear), but there are few
* zones and this function should be called at init only
*/
for (i = 0; i < RTE_MAX_MEMZONE && mcfg->memzone[i].addr != NULL; i++) {
if (!strncmp(name, mcfg->memzone[i].name, RTE_MEMZONE_NAMESIZE))
return &mcfg->memzone[i];
}
return NULL;
}
/*
* Return a pointer to a correctly filled memzone descriptor. If the
@ -71,60 +90,30 @@ rte_memzone_reserve(const char *name, uint64_t len, int socket_id,
len, socket_id, flags, CACHE_LINE_SIZE);
}
/*
* Return a pointer to a correctly filled memzone descriptor (with a
* specified alignment). If the allocation cannot be done, return NULL.
*/
const struct rte_memzone *
rte_memzone_reserve_aligned(const char *name, uint64_t len,
static const struct rte_memzone *
memzone_reserve_aligned_thread_unsafe(const char *name, uint64_t len,
int socket_id, unsigned flags, unsigned align)
{
struct rte_config *config;
struct rte_mem_config *mcfg;
unsigned i = 0;
int memseg_idx = -1;
uint64_t requested_len;
uint64_t addr_offset, requested_len;
uint64_t memseg_len = 0;
phys_addr_t memseg_physaddr;
void *memseg_addr;
uintptr_t addr_offset;
/* if secondary processes return error */
if (rte_eal_process_type() == RTE_PROC_SECONDARY){
RTE_LOG(ERR, EAL, "%s(): Not allowed in secondary process\n", __func__);
rte_errno = E_RTE_SECONDARY;
return NULL;
}
/* if alignment is not a power of two */
if (!rte_is_power_of_2(align)) {
RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
align);
rte_errno = EINVAL;
return NULL;
}
/* alignment less than cache size is not allowed */
if (align < CACHE_LINE_SIZE)
align = CACHE_LINE_SIZE;
/* get pointer to global configuration */
config = rte_eal_get_configuration();
mcfg = rte_eal_get_configuration()->mem_config;
/* no more room in config */
if (memzone_idx >= RTE_MAX_MEMZONE) {
if (mcfg->memzone_idx >= RTE_MAX_MEMZONE) {
RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
rte_errno = ENOSPC;
return NULL;
}
/* both sizes cannot be explicitly called for */
if ((flags & RTE_MEMZONE_1GB) && (flags & RTE_MEMZONE_2MB)) {
rte_errno = EINVAL;
return NULL;
}
/* zone already exist */
if (rte_memzone_lookup(name) != NULL) {
if ((memzone_lookup_thread_unsafe(name)) != NULL) {
RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
__func__, name);
rte_errno = EEXIST;
@ -133,9 +122,14 @@ rte_memzone_reserve_aligned(const char *name, uint64_t len,
/* align length on cache boundary */
len += CACHE_LINE_MASK;
len &= ~((uint64_t)CACHE_LINE_MASK);
len &= ~((uint64_t) CACHE_LINE_MASK);
/* save original length */
requested_len = len;
/* reserve extra space for future alignment */
if (len)
len += align;
/* save requested length */
requested_len = len;
@ -146,7 +140,6 @@ rte_memzone_reserve_aligned(const char *name, uint64_t len,
/* find the smallest segment matching requirements */
for (i = 0; i < RTE_MAX_MEMSEG; i++) {
/* last segment */
if (free_memseg[i].addr == NULL)
break;
@ -201,8 +194,8 @@ rte_memzone_reserve_aligned(const char *name, uint64_t len,
* try allocating again without the size parameter otherwise -fail.
*/
if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) &&
((flags & RTE_MEMZONE_1GB) || (flags & RTE_MEMZONE_2MB)))
return rte_memzone_reserve_aligned(name, len - align,
((flags & RTE_MEMZONE_1GB) || (flags & RTE_MEMZONE_2MB)))
return memzone_reserve_aligned_thread_unsafe(name, len - align,
socket_id, 0, align);
RTE_LOG(ERR, EAL, "%s(): No appropriate segment found\n", __func__);
@ -211,13 +204,12 @@ rte_memzone_reserve_aligned(const char *name, uint64_t len,
}
/* get offset needed to adjust alignment */
addr_offset = (uintptr_t) RTE_PTR_SUB(
RTE_ALIGN_CEIL(free_memseg[memseg_idx].addr, (uintptr_t) align),
(uintptr_t) free_memseg[memseg_idx].addr);
addr_offset = RTE_ALIGN_CEIL(free_memseg[memseg_idx].phys_addr, align) -
free_memseg[memseg_idx].phys_addr;
/* save aligned physical and virtual addresses */
memseg_physaddr = free_memseg[memseg_idx].phys_addr + addr_offset;
memseg_addr = RTE_PTR_ADD(free_memseg[memseg_idx].addr, addr_offset);
memseg_addr = RTE_PTR_ADD(free_memseg[memseg_idx].addr, (uintptr_t) addr_offset);
/* if we are looking for a biggest memzone */
if (requested_len == 0)
@ -233,7 +225,7 @@ rte_memzone_reserve_aligned(const char *name, uint64_t len,
(char *)free_memseg[memseg_idx].addr + len;
/* fill the zone in config */
struct rte_memzone *mz = &config->mem_config->memzone[memzone_idx++];
struct rte_memzone *mz = &mcfg->memzone[mcfg->memzone_idx++];
rte_snprintf(mz->name, sizeof(mz->name), "%s", name);
mz->phys_addr = memseg_physaddr;
mz->addr = memseg_addr;
@ -245,51 +237,93 @@ rte_memzone_reserve_aligned(const char *name, uint64_t len,
return mz;
}
/*
* Return a pointer to a correctly filled memzone descriptor (with a
* specified alignment). If the allocation cannot be done, return NULL.
*/
const struct rte_memzone *
rte_memzone_reserve_aligned(const char *name, uint64_t len,
int socket_id, unsigned flags, unsigned align)
{
struct rte_mem_config *mcfg;
const struct rte_memzone *mz = NULL;
/* both sizes cannot be explicitly called for */
if ((flags & RTE_MEMZONE_1GB) && (flags & RTE_MEMZONE_2MB)) {
rte_errno = EINVAL;
return NULL;
}
/* if alignment is not a power of two */
if (!rte_is_power_of_2(align)) {
RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
align);
rte_errno = EINVAL;
return NULL;
}
/* alignment less than cache size is not allowed */
if (align < CACHE_LINE_SIZE)
align = CACHE_LINE_SIZE;
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
rte_rwlock_write_lock(&mcfg->mlock);
mz = memzone_reserve_aligned_thread_unsafe(
name, len, socket_id, flags, align);
rte_rwlock_write_unlock(&mcfg->mlock);
return mz;
}
/*
* Lookup for the memzone identified by the given name
*/
const struct rte_memzone *
rte_memzone_lookup(const char *name)
{
const struct rte_mem_config *mcfg;
unsigned i = 0;
struct rte_mem_config *mcfg;
const struct rte_memzone *memzone = NULL;
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
rte_rwlock_read_lock(&mcfg->mlock);
/*
* the algorithm is not optimal (linear), but there are few
* zones and this function should be called at init only
*/
for (i = 0; i < RTE_MAX_MEMZONE && mcfg->memzone[i].addr != NULL; i++) {
if (!strncmp(name, mcfg->memzone[i].name, RTE_MEMZONE_NAMESIZE))
return &mcfg->memzone[i];
}
return NULL;
memzone = memzone_lookup_thread_unsafe(name);
rte_rwlock_read_unlock(&mcfg->mlock);
return memzone;
}
/* Dump all reserved memory zones on console */
void
rte_memzone_dump(void)
{
const struct rte_mem_config *mcfg;
struct rte_mem_config *mcfg;
unsigned i = 0;
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
rte_rwlock_read_lock(&mcfg->mlock);
/* dump all zones */
for (i=0; i<RTE_MAX_MEMZONE; i++) {
if (mcfg->memzone[i].addr == NULL)
break;
printf("name:<%s>, phys:0x%"PRIx64", len:0x%"PRIx64""
", virt:%p, socket_id:%"PRId32"\n",
printf("Zone %o: name:<%s>, phys:0x%"PRIx64", len:0x%"PRIx64""
", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i,
mcfg->memzone[i].name,
mcfg->memzone[i].phys_addr,
mcfg->memzone[i].len,
mcfg->memzone[i].addr,
mcfg->memzone[i].socket_id);
mcfg->memzone[i].socket_id,
mcfg->memzone[i].flags);
}
rte_rwlock_read_unlock(&mcfg->mlock);
}
/*
@ -337,25 +371,30 @@ memseg_sanitize(struct rte_memseg *memseg)
int
rte_eal_memzone_init(void)
{
struct rte_config *config;
struct rte_mem_config *mcfg;
const struct rte_memseg *memseg;
unsigned i = 0;
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
/* mirror the runtime memsegs from config */
free_memseg = mcfg->free_memseg;
/* secondary processes don't need to initialise anything */
if (rte_eal_process_type() == RTE_PROC_SECONDARY)
return 0;
/* get pointer to global configuration */
config = rte_eal_get_configuration();
memseg = rte_eal_get_physmem_layout();
if (memseg == NULL) {
RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__);
return -1;
}
rte_rwlock_write_lock(&mcfg->mlock);
/* duplicate the memsegs from config */
memcpy(free_memseg, memseg, sizeof(free_memseg));
memcpy(free_memseg, memseg, sizeof(struct rte_memseg) * RTE_MAX_MEMSEG);
/* make all zones cache-aligned */
for (i=0; i<RTE_MAX_MEMSEG; i++) {
@ -363,13 +402,16 @@ rte_eal_memzone_init(void)
break;
if (memseg_sanitize(&free_memseg[i]) < 0) {
RTE_LOG(ERR, EAL, "%s(): Sanity check failed\n", __func__);
rte_rwlock_write_unlock(&mcfg->mlock);
return -1;
}
}
/* delete all zones */
memzone_idx = 0;
memset(config->mem_config->memzone, 0, sizeof(config->mem_config->memzone));
mcfg->memzone_idx = 0;
memset(mcfg->memzone, 0, sizeof(mcfg->memzone));
rte_rwlock_write_unlock(&mcfg->mlock);
return 0;
}

View File

@ -116,6 +116,7 @@ rte_dump_tailq(void)
mcfg = rte_eal_get_configuration()->mem_config;
rte_rwlock_read_lock(&mcfg->qlock);
for (i=0; i < RTE_MAX_TAILQ; i++) {
const struct rte_tailq_head *tailq = &mcfg->tailq_head[i];
const struct rte_dummy_head *head = &tailq->tailq_head;
@ -124,6 +125,7 @@ rte_dump_tailq(void)
(rte_tailq_names[i] != NULL ? rte_tailq_names[i]:"nil"),
head->tqh_first, head->tqh_last);
}
rte_rwlock_read_unlock(&mcfg->qlock);
}
int

View File

@ -148,7 +148,18 @@ enum rte_proc_type_t rte_eal_process_type(void);
int rte_eal_init(int argc, char **argv);
/**
* Utility macro to do a tailq 'INSERT' of rte_mem_config
* macro to get the lock of tailq in mem_config
*/
#define RTE_EAL_TAILQ_RWLOCK (&rte_eal_get_configuration()->mem_config->qlock)
/**
* macro to get the multiple lock of mempool shared by mutiple-instance
*/
#define RTE_EAL_MEMPOOL_RWLOCK (&rte_eal_get_configuration()->mem_config->mplock)
/**
* Utility macro to do a thread-safe tailq 'INSERT' of rte_mem_config
*
* @param idx
* a kind of tailq define in enum rte_tailq_t
@ -163,11 +174,13 @@ int rte_eal_init(int argc, char **argv);
#define RTE_EAL_TAILQ_INSERT_TAIL(idx, type, elm) do { \
struct type *list; \
list = RTE_TAILQ_LOOKUP_BY_IDX(idx, type); \
rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); \
TAILQ_INSERT_TAIL(list, elm, next); \
rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); \
} while (0)
/**
* Utility macro to do a tailq 'REMOVE' of rte_mem_config
* Utility macro to do a thread-safe tailq 'REMOVE' of rte_mem_config
*
* @param idx
* a kind of tailq define in enum rte_tailq_t
@ -182,7 +195,9 @@ int rte_eal_init(int argc, char **argv);
#define RTE_EAL_TAILQ_REMOVE(idx, type, elm) do { \
struct type *list; \
list = RTE_TAILQ_LOOKUP_BY_IDX(idx, type); \
rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); \
TAILQ_REMOVE(list, elm, next); \
rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); \
} while (0) \

View File

@ -38,6 +38,7 @@
#include <rte_tailq.h>
#include <rte_memory.h>
#include <rte_memzone.h>
#include <rte_rwlock.h>
#ifdef __cplusplus
extern "C" {
@ -58,18 +59,44 @@ enum rte_tailq_t {
* support, the memory details should be shared across instances
*/
struct rte_mem_config {
volatile uint32_t magic; /**< Magic number - Sanity check. */
/* memory topology */
uint32_t nchannel; /**< Number of channels (0 if unknown). */
uint32_t nrank; /**< Number of ranks (0 if unknown). */
/**
* current lock nest order
* - qlock->mlock (ring/hash/lpm)
* - mplock->qlock->mlock (mempool)
* Notice:
* *ALWAYS* obtain qlock first if having to obtain both qlock and mlock
*/
rte_rwlock_t mlock; /**< only used by memzone LIB for thread-safe. */
rte_rwlock_t qlock; /**< used for tailq operation for thread safe. */
rte_rwlock_t mplock; /**< only used by mempool LIB for thread-safe. */
uint32_t memzone_idx; /**< Index of memzone */
/* memory segments and zones */
struct rte_memseg memseg[RTE_MAX_MEMSEG]; /**< Physmem descriptors. */
struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */
/* Runtime Physmem descriptors. */
struct rte_memseg free_memseg[RTE_MAX_MEMSEG];
struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
} __attribute__((__packed__));
inline static void
rte_eal_mcfg_wait_complete(struct rte_mem_config* mcfg)
{
/* wait until shared mem_config finish initialising */
while(mcfg->magic != RTE_MAGIC)
rte_pause();
}
#ifdef __cplusplus
}
#endif

View File

@ -66,9 +66,7 @@ enum rte_page_sizes {
*/
#define __rte_cache_aligned __attribute__((__aligned__(CACHE_LINE_SIZE)))
#ifndef __KERNEL__ /* so we can include this header in kernel modules */
typedef uint64_t phys_addr_t; /**< Physical address definition. */
#endif
/**
* Physical memory segment descriptor.

View File

@ -40,7 +40,7 @@
#include <unistd.h>
#include <pthread.h>
#include <getopt.h>
#include <fcntl.h>
#include <sys/file.h>
#include <stddef.h>
#include <errno.h>
#include <limits.h>
@ -55,6 +55,7 @@
#include <rte_launch.h>
#include <rte_tailq.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_log.h>
@ -66,6 +67,7 @@
#include <rte_pci.h>
#include <rte_common.h>
#include <rte_version.h>
#include <rte_atomic.h>
#include "eal_private.h"
#include "eal_thread.h"
@ -203,14 +205,13 @@ rte_eal_config_create(void)
}
rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
if (rte_mem_cfg_addr == MAP_FAILED){
rte_panic("Cannot mmap memory for rte_config\n");
}
memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
memcpy(rte_config.mem_config, &early_mem_config,
sizeof(early_mem_config));
}
/* attach to an existing shared memory config */
@ -224,13 +225,13 @@ rte_eal_config_attach(void)
return;
if (mem_cfg_fd < 0){
mem_cfg_fd = open(pathname, O_RDONLY);
mem_cfg_fd = open(pathname, O_RDWR);
if (mem_cfg_fd < 0)
rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
}
rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), PROT_READ,
MAP_SHARED, mem_cfg_fd, 0);
rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
close(mem_cfg_fd);
if (rte_mem_cfg_addr == MAP_FAILED)
rte_panic("Cannot mmap memory for rte_config\n");
@ -274,6 +275,7 @@ rte_config_init(void)
break;
case RTE_PROC_SECONDARY:
rte_eal_config_attach();
rte_eal_mcfg_wait_complete(rte_config.mem_config);
break;
case RTE_PROC_AUTO:
case RTE_PROC_INVALID:
@ -281,6 +283,25 @@ rte_config_init(void)
}
}
/* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
static void
eal_hugedirs_unlock(void)
{
int i;
for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
{
/* skip uninitialized */
if (internal_config.hugepage_info[i].lock_descriptor == 0)
continue;
/* unlock hugepage file */
flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN);
close(internal_config.hugepage_info[i].lock_descriptor);
/* reset the field */
internal_config.hugepage_info[i].lock_descriptor = 0;
}
}
/* display usage */
static void
eal_usage(const char *prgname)
@ -494,6 +515,10 @@ eal_parse_args(int argc, char **argv)
for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
internal_config.socket_mem[i] = 0;
/* zero out hugedir descriptors */
for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
internal_config.hugepage_info[i].lock_descriptor = 0;
while ((opt = getopt_long(argc, argvopt, "b:c:m:n:r:v",
lgopts, &option_index)) != EOF) {
@ -663,12 +688,30 @@ eal_check_mem_on_local_socket(void)
"memory on local socket!\n");
}
static int
sync_func(__attribute__((unused)) void *arg)
{
return 0;
}
inline static void
rte_eal_mcfg_complete(void)
{
/* ALL shared mem_config related INIT DONE */
if (rte_config.process_type == RTE_PROC_PRIMARY)
rte_config.mem_config->magic = RTE_MAGIC;
}
/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
{
int i, fctret, ret;
pthread_t thread_id;
static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
if (!rte_atomic32_test_and_set(&run_once))
return -1;
thread_id = pthread_self();
@ -679,7 +722,9 @@ rte_eal_init(int argc, char **argv)
if (fctret < 0)
exit(1);
if (eal_hugepage_info_init() < 0)
if (internal_config.no_hugetlbfs == 0 &&
internal_config.process_type != RTE_PROC_SECONDARY &&
eal_hugepage_info_init() < 0)
rte_panic("Cannot get hugepage information\n");
if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
@ -690,14 +735,18 @@ rte_eal_init(int argc, char **argv)
}
rte_srand(rte_rdtsc());
rte_config_init();
rte_config_init();
if (rte_eal_cpu_init() < 0)
rte_panic("Cannot detect lcores\n");
if (rte_eal_memory_init() < 0)
rte_panic("Cannot init memory\n");
/* the directories are locked during eal_hugepage_info_init */
eal_hugedirs_unlock();
if (rte_eal_memzone_init() < 0)
rte_panic("Cannot init memzone\n");
@ -724,6 +773,8 @@ rte_eal_init(int argc, char **argv)
eal_check_mem_on_local_socket();
rte_eal_mcfg_complete();
RTE_LCORE_FOREACH_SLAVE(i) {
/*
@ -746,6 +797,13 @@ rte_eal_init(int argc, char **argv)
eal_thread_init_master(rte_config.master_lcore);
/*
* Launch a dummy function on all slave lcores, so that master lcore
* knows they are all ready when this function returns.
*/
rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
rte_eal_mp_wait_lcore();
return fctret;
}

View File

@ -34,25 +34,28 @@
#include <string.h>
#include <sys/types.h>
#include <sys/file.h>
#include <dirent.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <fnmatch.h>
#include <inttypes.h>
#include <stdarg.h>
#include <unistd.h>
#include <errno.h>
#include <sys/queue.h>
#include "rte_memory.h"
#include "rte_memzone.h"
#include "rte_tailq.h"
#include "rte_eal.h"
#include "rte_launch.h"
#include "rte_per_lcore.h"
#include "rte_lcore.h"
#include "rte_debug.h"
#include "rte_log.h"
#include "rte_common.h"
#include <rte_memory.h>
#include <rte_memzone.h>
#include <rte_tailq.h>
#include <rte_eal.h>
#include <rte_launch.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_debug.h>
#include <rte_log.h>
#include <rte_common.h>
#include "rte_string_fns.h"
#include "eal_internal_cfg.h"
#include "eal_hugepages.h"
@ -63,9 +66,16 @@ static const char sys_dir_path[] = "/sys/kernel/mm/hugepages";
static int32_t
get_num_hugepages(const char *subdir)
{
const char nr_hp_file[] = "nr_hugepages";
char path[BUFSIZ];
unsigned num_pages = 0;
char path[PATH_MAX];
long unsigned num_pages = 0;
const char *nr_hp_file;
/* if secondary process, just look at the number of hugepages,
* otherwise look at number of free hugepages */
if (internal_config.process_type == RTE_PROC_SECONDARY)
nr_hp_file = "nr_hugepages";
else
nr_hp_file = "free_hugepages";
rte_snprintf(path, sizeof(path), "%s/%s/%s",
sys_dir_path, subdir, nr_hp_file);
@ -73,7 +83,10 @@ get_num_hugepages(const char *subdir)
if (eal_parse_sysfs_value(path, &num_pages) < 0)
return 0;
return num_pages;
if (num_pages == 0)
RTE_LOG(ERR, EAL, "Error - no free hugepages available!\n");
return (int32_t)num_pages;
}
static uint64_t
@ -169,9 +182,79 @@ static inline void
swap_hpi(struct hugepage_info *a, struct hugepage_info *b)
{
char buf[sizeof(*a)];
memcpy(buf, a, sizeof(*a));
memcpy(a, b, sizeof(*a));
memcpy(b, buf, sizeof(*a));
memcpy(buf, a, sizeof(buf));
memcpy(a, b, sizeof(buf));
memcpy(b, buf, sizeof(buf));
}
/*
* Clear the hugepage directory of whatever hugepage files
* there are. Checks if the file is locked (i.e.
* if it's in use by another DPDK process).
*/
static int
clear_hugedir(const char * hugedir)
{
DIR *dir;
struct dirent *dirent;
int dir_fd, fd, lck_result;
const char filter[] = "*map_*"; /* matches hugepage files */
/* open directory */
dir = opendir(hugedir);
if (!dir) {
RTE_LOG(INFO, EAL, "Unable to open hugepage directory %s\n",
hugedir);
goto error;
}
dir_fd = dirfd(dir);
dirent = readdir(dir);
if (!dirent) {
RTE_LOG(INFO, EAL, "Unable to read hugepage directory %s\n",
hugedir);
goto error;
}
while(dirent != NULL){
/* skip files that don't match the hugepage pattern */
if (fnmatch(filter, dirent->d_name, 0) > 0) {
dirent = readdir(dir);
continue;
}
/* try and lock the file */
fd = openat(dir_fd, dirent->d_name, O_RDONLY);
/* skip to next file */
if (fd == -1) {
dirent = readdir(dir);
continue;
}
/* non-blocking lock */
lck_result = flock(fd, LOCK_EX | LOCK_NB);
/* if lock succeeds, unlock and remove the file */
if (lck_result != -1) {
flock(fd, LOCK_UN);
unlinkat(dir_fd, dirent->d_name, 0);
}
close (fd);
dirent = readdir(dir);
}
closedir(dir);
return 0;
error:
if (dir)
closedir(dir);
RTE_LOG(INFO, EAL, "Error while clearing hugepage dir: %s\n",
strerror(errno));
return -1;
}
/*
@ -206,6 +289,18 @@ eal_hugepage_info_init(void)
(unsigned) get_num_hugepages(dirent->d_name),
(unsigned long long)hpi->hugepage_sz);
} else {
/* try to obtain a writelock */
hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY);
/* if blocking lock failed */
if (flock(hpi->lock_descriptor, LOCK_EX) == -1) {
RTE_LOG(CRIT, EAL, "Failed to lock hugepage directory!\n");
return -1;
}
/* clear out the hugepages dir from unused pages */
if (clear_hugedir(hpi->hugedir) == -1)
return -1;
/* for now, put all pages into socket 0,
* later they will be sorted */
hpi->num_pages[0] = get_num_hugepages(dirent->d_name);

View File

@ -44,11 +44,13 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/queue.h>
#include <fcntl.h>
#include <sys/file.h>
#include <unistd.h>
#include <limits.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <rte_log.h>
#include <rte_memory.h>
@ -56,6 +58,7 @@
#include <rte_launch.h>
#include <rte_tailq.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_common.h>
@ -111,6 +114,44 @@ aslr_enabled(void)
}
}
/*
* Increase limit for open files for current process
*/
static int
increase_open_file_limit(void)
{
struct rlimit limit;
/* read current limits */
if (getrlimit(RLIMIT_NOFILE, &limit) != 0) {
RTE_LOG(ERR, EAL, "Error reading resource limit: %s\n",
strerror(errno));
return -1;
}
/* check if current soft limit matches the hard limit */
if (limit.rlim_cur < limit.rlim_max) {
/* set soft limit to match hard limit */
limit.rlim_cur = limit.rlim_max;
}
else {
/* we can't increase the soft limit so now we try to increase
* soft and hard limit. this might fail when run as non-root.
*/
limit.rlim_cur *= 2;
limit.rlim_max *= 2;
}
/* set current resource limit */
if (setrlimit(RLIMIT_NOFILE, &limit) != 0) {
RTE_LOG(ERR, EAL, "Error increasing open files limit: %s\n",
strerror(errno));
return -1;
}
return 0;
}
/*
* Try to mmap *size bytes in /dev/zero. If it is succesful, return the
* pointer to the mmap'd area and keep *size unmodified. Else, retry
@ -219,6 +260,7 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
vma_len = hugepage_sz;
}
/* try to create hugepage file */
fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
if (fd < 0) {
RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__,
@ -243,9 +285,11 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
hugepg_tbl[i].final_va = virtaddr;
}
/* close the file descriptor, files will be locked later */
close(fd);
vma_addr = (char *)vma_addr + hugepage_sz;
vma_len -= hugepage_sz;
close(fd);
}
return 0;
}
@ -518,7 +562,30 @@ unmap_unneeded_hugepages(struct hugepage *hugepg_tbl,
munmap(hp->final_va, hp->size);
hp->final_va = NULL;
}
/* lock the page and skip */
else {
/* try and open the hugepage file */
while ((fd = open(hp->filepath, O_CREAT | O_RDWR, 0755)) < 0) {
/* if we can't open due to resource limits */
if (errno == EMFILE) {
RTE_LOG(INFO, EAL, "Increasing open file limit\n");
/* if we manage to increase resource limit, try again */
if (increase_open_file_limit() == 0)
continue;
}
else
RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__,
strerror(errno));
return -1;
}
/* try and lock the hugepage */
if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
RTE_LOG(ERR, EAL, "Locking hugepage file failed!\n");
close(fd);
return -1;
}
hp->page_lock = fd;
pages_found++;
}
} /* match page */

View File

@ -49,7 +49,8 @@ struct hugepage {
int socket_id; /**< NUMA socket ID */
int file_id; /**< the '%d' in HUGEFILE_FMT */
int memseg_id; /**< the memory segment to which page belongs */
char filepath[MAX_HUGEPAGE_PATH]; /**< Path to backing file on filesystem */
char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
int page_lock; /**< descriptor for hugepage file */
};
/**

View File

@ -53,6 +53,7 @@ struct hugepage_info {
const char *hugedir; /**< dir where hugetlbfs is mounted */
uint32_t num_pages[RTE_MAX_NUMA_NODES];
/**< number of hugepages of that size on each socket */
int lock_descriptor; /**< file descriptor for hugepage dir */
};
/**

View File

@ -51,6 +51,7 @@
#include <rte_string_fns.h>
#include <rte_cpuflags.h>
#include <rte_log.h>
#include <rte_spinlock.h>
#include "rte_fbk_hash.h"
@ -79,10 +80,12 @@ rte_fbk_hash_find_existing(const char *name)
return NULL;
}
rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
TAILQ_FOREACH(h, fbk_hash_list, next) {
if (strncmp(name, h->name, RTE_FBK_HASH_NAMESIZE) == 0)
break;
}
rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
if (h == NULL)
rte_errno = ENOENT;
return h;
@ -129,19 +132,22 @@ rte_fbk_hash_create(const struct rte_fbk_hash_params *params)
rte_snprintf(hash_name, sizeof(hash_name), "FBK_%s", params->name);
rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
/* guarantee there's no existing */
TAILQ_FOREACH(ht, fbk_hash_list, next) {
if (strncmp(params->name, ht->name, RTE_FBK_HASH_NAMESIZE) == 0)
break;
}
if (ht != NULL)
return NULL;
goto exit;
/* Allocate memory for table. */
ht = (struct rte_fbk_hash_table *)rte_malloc_socket(hash_name, mem_size,
0, params->socket_id);
if (ht == NULL)
return NULL;
goto exit;
memset(ht, 0, mem_size);
/* Set up hash table context. */
@ -165,6 +171,10 @@ rte_fbk_hash_create(const struct rte_fbk_hash_params *params)
}
TAILQ_INSERT_TAIL(fbk_hash_list, ht, next);
exit:
rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
return ht;
}

View File

@ -55,6 +55,8 @@
#include <rte_string_fns.h>
#include <rte_cpuflags.h>
#include <rte_log.h>
#include <rte_rwlock.h>
#include <rte_spinlock.h>
#include "rte_hash.h"
@ -149,10 +151,13 @@ rte_hash_find_existing(const char *name)
return NULL;
}
rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
TAILQ_FOREACH(h, hash_list, next) {
if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
break;
}
rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
if (h == NULL)
rte_errno = ENOENT;
return h;
@ -205,19 +210,21 @@ rte_hash_create(const struct rte_hash_parameters *params)
/* Total memory required for hash context */
mem_size = hash_tbl_size + sig_tbl_size + key_tbl_size;
rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
/* guarantee there's no existing */
TAILQ_FOREACH(h, hash_list, next) {
if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
break;
}
if (h != NULL)
return NULL;
goto exit;
h = (struct rte_hash *)rte_zmalloc_socket(hash_name, mem_size,
CACHE_LINE_SIZE, params->socket_id);
if (h == NULL) {
RTE_LOG(ERR, HASH, "memory allocation failed\n");
return NULL;
goto exit;
}
/* Setup hash context */
@ -237,6 +244,10 @@ rte_hash_create(const struct rte_hash_parameters *params)
DEFAULT_HASH_FUNC : params->hash_func;
TAILQ_INSERT_TAIL(hash_list, h, next);
exit:
rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
return h;
}

View File

@ -52,6 +52,8 @@
#include <rte_per_lcore.h>
#include <rte_string_fns.h>
#include <rte_errno.h>
#include <rte_rwlock.h>
#include <rte_spinlock.h>
#include "rte_lpm.h"
@ -126,10 +128,12 @@ rte_lpm_find_existing(const char *name)
return NULL;
}
rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
TAILQ_FOREACH(l, lpm_list, next) {
if (strncmp(name, l->name, RTE_LPM_NAMESIZE) == 0)
break;
}
rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
if (l == NULL)
rte_errno = ENOENT;
@ -179,20 +183,22 @@ rte_lpm_create(const char *name, int socket_id, int max_rules,
/* Determine the amount of memory to allocate. */
mem_size = sizeof(*lpm) + (sizeof(lpm->rules_tbl[0]) * max_rules);
rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
/* guarantee there's no existing */
TAILQ_FOREACH(lpm, lpm_list, next) {
if (strncmp(name, lpm->name, RTE_LPM_NAMESIZE) == 0)
break;
}
if (lpm != NULL)
return NULL;
goto exit;
/* Allocate memory to store the LPM data structures. */
lpm = (struct rte_lpm *)rte_zmalloc_socket(mem_name, mem_size,
CACHE_LINE_SIZE, socket_id);
if (lpm == NULL) {
RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
return NULL;
goto exit;
}
/* Save user arguments. */
@ -201,6 +207,9 @@ rte_lpm_create(const char *name, int socket_id, int max_rules,
TAILQ_INSERT_TAIL(lpm_list, lpm, next);
exit:
rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
return lpm;
}

View File

@ -56,6 +56,7 @@
#include <rte_ring.h>
#include <rte_errno.h>
#include <rte_string_fns.h>
#include <rte_spinlock.h>
#include "rte_mempool.h"
@ -178,6 +179,8 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
if (flags & MEMPOOL_F_SC_GET)
rg_flags |= RING_F_SC_DEQ;
rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK);
/* allocate the ring that will be used to store objects */
/* Ring functions will return appropriate errors if we are
* running as a secondary process etc., so no checks made
@ -185,7 +188,7 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
rte_snprintf(rg_name, sizeof(rg_name), "MP_%s", name);
r = rte_ring_create(rg_name, rte_align32pow2(n+1), socket_id, rg_flags);
if (r == NULL)
return NULL;
goto exit;
/*
* In header, we have at least the pointer to the pool, and
@ -236,6 +239,7 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
mempool_size = total_elt_size * n +
sizeof(struct rte_mempool) + private_data_size;
rte_snprintf(mz_name, sizeof(mz_name), "MP_%s", name);
mz = rte_memzone_reserve(mz_name, mempool_size, socket_id, mz_flags);
/*
@ -243,7 +247,7 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
* space for the as we cannot free it
*/
if (mz == NULL)
return NULL;
goto exit;
/* init the mempool structure */
mp = mz->addr;
@ -289,6 +293,9 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
RTE_EAL_TAILQ_INSERT_TAIL(RTE_TAILQ_MEMPOOL, rte_mempool_list, mp);
exit:
rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK);
return mp;
}
@ -398,6 +405,9 @@ rte_mempool_audit(const struct rte_mempool *mp)
{
mempool_audit_cache(mp);
mempool_audit_cookies(mp);
/* For case where mempool DEBUG is not set, and cache size is 0 */
RTE_SET_USED(mp);
}
/* dump the status of the mempool on the console */
@ -465,9 +475,13 @@ rte_mempool_list_dump(void)
return;
}
rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK);
TAILQ_FOREACH(mp, mempool_list, next) {
rte_mempool_dump(mp);
}
rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK);
}
/* search a mempool from its name */
@ -483,10 +497,15 @@ rte_mempool_lookup(const char *name)
return NULL;
}
rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK);
TAILQ_FOREACH(mp, mempool_list, next) {
if (strncmp(name, mp->name, RTE_MEMPOOL_NAMESIZE) == 0)
break;
}
rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK);
if (mp == NULL)
rte_errno = ENOENT;

View File

@ -86,6 +86,7 @@
#include <rte_branch_prediction.h>
#include <rte_errno.h>
#include <rte_string_fns.h>
#include <rte_spinlock.h>
#include "rte_ring.h"
@ -138,6 +139,8 @@ rte_ring_create(const char *name, unsigned count, int socket_id,
rte_snprintf(mz_name, sizeof(mz_name), "RG_%s", name);
ring_size = count * sizeof(void *) + sizeof(struct rte_ring);
rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
/* reserve a memory zone for this ring. If we can't get rte_config or
* we are secondary process, the memzone_reserve function will set
* rte_errno for us appropriately - hence no check in this this function */
@ -162,6 +165,7 @@ rte_ring_create(const char *name, unsigned count, int socket_id,
r = NULL;
RTE_LOG(ERR, RING, "Cannot reserve memory\n");
}
rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
return r;
}
@ -252,9 +256,13 @@ rte_ring_list_dump(void)
return;
}
rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
TAILQ_FOREACH(mp, ring_list, next) {
rte_ring_dump(mp);
}
rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
}
/* search a ring from its name */
@ -271,11 +279,15 @@ rte_ring_lookup(const char *name)
return NULL;
}
rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
TAILQ_FOREACH(r, ring_list, next) {
if (strncmp(name, r->name, RTE_RING_NAMESIZE) == 0)
break;
}
rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
if (r == NULL)
rte_errno = ENOENT;