ivshmem: library changes for mmaping using ivshmem

These library changes provide a new Intel DPDK feature for communicating
with virtual machines using QEMU's IVSHMEM mechanism.

The feature works by providing a command line for QEMU to map several hugepages
into a single IVSHMEM device. For the guest to know what is inside any given IVSHMEM
device (and to distinguish between Intel(R) DPDK and non-Intel(R) DPDK IVSHMEM
devices), a metadata file is also mapped into the IVSHMEM segment. No work needs to
be done by the guest application to map IVSHMEM devices into memory; they are
automatically recognized by the Intel(R) DPDK Environment Abstraction Layer (EAL).

Changes in this patch:
* Changes to EAL to allow mapping of all hugepages in a memseg into a single file
* Changes to EAL to allow ivshmem devices to be transparently mapped in
  the process running on the guest.
* New ivshmem library to create and manage metadata exported to guest VM's
* New ivshmem compilation targets
* Mempool and ring changes to allow export of structures to a VM and allow
  a VM to attach to those structures.
* New autotests to unit tests this functionality.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
This commit is contained in:
Bruce Richardson 2014-02-11 10:28:51 +00:00 committed by David Marchand
parent 013615a784
commit 40b966a211
23 changed files with 3092 additions and 106 deletions

View File

@ -92,6 +92,7 @@ SRCS-$(CONFIG_RTE_APP_TEST) += test_kni.c
SRCS-$(CONFIG_RTE_APP_TEST) += test_power.c
SRCS-$(CONFIG_RTE_APP_TEST) += test_common.c
SRCS-$(CONFIG_RTE_APP_TEST) += test_timer_perf.c
SRCS-$(CONFIG_RTE_APP_TEST) += test_ivshmem.c
ifeq ($(CONFIG_RTE_APP_TEST),y)
SRCS-$(CONFIG_RTE_LIBRTE_ACL) += test_acl.c
@ -107,6 +108,7 @@ CFLAGS_test_kni.o += -wd1478
else
CFLAGS_test_kni.o += -Wno-deprecated-declarations
endif
CFLAGS += -D_GNU_SOURCE
# this application needs libraries first
DEPDIRS-$(CONFIG_RTE_APP_TEST) += lib

View File

@ -174,6 +174,12 @@ parallel_test_group_list = [
"Func" : default_autotest,
"Report" : None,
},
{
"Name" : "IVSHMEM autotest",
"Command" : "ivshmem_autotest",
"Func" : default_autotest,
"Report" : None,
},
{
"Name" : "Memcpy autotest",
"Command" : "memcpy_autotest",

View File

@ -184,6 +184,8 @@ static void cmd_autotest_parsed(void *parsed_result,
ret |= test_power();
if (all || !strcmp(res->autotest, "common_autotest"))
ret |= test_common();
if (all || !strcmp(res->autotest, "ivshmem_autotest"))
ret = test_ivshmem();
#ifdef RTE_LIBRTE_PMD_RING
if (all || !strcmp(res->autotest, "ring_pmd_autotest"))
ret |= test_pmd_ring();
@ -224,7 +226,7 @@ cmdline_parse_token_string_t cmd_autotest_autotest =
"memcpy_perf_autotest#ring_perf_autotest#"
"red_autotest#meter_autotest#sched_autotest#"
"memcpy_perf_autotest#kni_autotest#"
"pm_autotest#"
"pm_autotest#ivshmem_autotest#"
#ifdef RTE_LIBRTE_ACL
"acl_autotest#"
#endif

View File

@ -86,6 +86,7 @@ do_recursive_call(void)
{ "test_memory_flags", no_action },
{ "test_file_prefix", no_action },
{ "test_no_huge_flag", no_action },
{ "test_ivshmem", test_ivshmem },
};
if (recursive_call == NULL)

View File

@ -95,6 +95,7 @@ int test_kni(void);
int test_power(void);
int test_common(void);
int test_pmd_ring(void);
int test_ivshmem(void);
int test_pci_run;

441
app/test/test_ivshmem.c Normal file
View File

@ -0,0 +1,441 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <fcntl.h>
#include <limits.h>
#include <unistd.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <stdio.h>
#include <cmdline_parse.h>
#include "test.h"
#ifdef RTE_LIBRTE_IVSHMEM
#include <rte_common.h>
#include <rte_ivshmem.h>
#include <rte_string_fns.h>
#include "process.h"
#define DUPLICATE_METADATA "duplicate"
#define METADATA_NAME "metadata"
#define NONEXISTENT_METADATA "nonexistent"
#define FIRST_TEST 'a'
#define launch_proc(ARGV) process_dup(ARGV, \
sizeof(ARGV)/(sizeof(ARGV[0])), "test_ivshmem")
#define ASSERT(cond,msg) do { \
if (!(cond)) { \
printf("**** TEST %s() failed: %s\n", \
__func__, msg); \
return -1; \
} \
} while(0)
static char*
get_current_prefix(char * prefix, int size)
{
char path[PATH_MAX] = {0};
char buf[PATH_MAX] = {0};
/* get file for config (fd is always 3) */
rte_snprintf(path, sizeof(path), "/proc/self/fd/%d", 3);
/* return NULL on error */
if (readlink(path, buf, sizeof(buf)) == -1)
return NULL;
/* get the basename */
rte_snprintf(buf, sizeof(buf), "%s", basename(buf));
/* copy string all the way from second char up to start of _config */
rte_snprintf(prefix, size, "%.*s",
strnlen(buf, sizeof(buf)) - sizeof("_config"), &buf[1]);
return prefix;
}
static struct rte_ivshmem_metadata*
mmap_metadata(const char *name)
{
int fd;
char pathname[PATH_MAX];
struct rte_ivshmem_metadata *metadata;
rte_snprintf(pathname, sizeof(pathname),
"/var/run/.dpdk_ivshmem_metadata_%s", name);
fd = open(pathname, O_RDWR, 0660);
if (fd < 0)
return NULL;
metadata = (struct rte_ivshmem_metadata*) mmap(NULL,
sizeof(struct rte_ivshmem_metadata), PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
if (metadata == MAP_FAILED)
return NULL;
close(fd);
return metadata;
}
static int
create_duplicate(void)
{
/* create a metadata that another process will then try to overwrite */
ASSERT (rte_ivshmem_metadata_create(DUPLICATE_METADATA) == 0,
"Creating metadata failed");
return 0;
}
static int
test_ivshmem_create_lots_of_memzones(void)
{
int i;
char name[IVSHMEM_NAME_LEN];
const struct rte_memzone *mz;
ASSERT(rte_ivshmem_metadata_create(METADATA_NAME) == 0,
"Failed to create metadata");
for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES; i++) {
rte_snprintf(name, sizeof(name), "mz_%i", i);
mz = rte_memzone_reserve(name, CACHE_LINE_SIZE, SOCKET_ID_ANY, 0);
ASSERT(mz != NULL, "Failed to reserve memzone");
ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) == 0,
"Failed to add memzone");
}
mz = rte_memzone_reserve("one too many", CACHE_LINE_SIZE, SOCKET_ID_ANY, 0);
ASSERT(mz != NULL, "Failed to reserve memzone");
ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) < 0,
"Metadata should have been full");
return 0;
}
static int
test_ivshmem_create_duplicate_memzone(void)
{
const struct rte_memzone *mz;
ASSERT(rte_ivshmem_metadata_create(METADATA_NAME) == 0,
"Failed to create metadata");
mz = rte_memzone_reserve("mz", CACHE_LINE_SIZE, SOCKET_ID_ANY, 0);
ASSERT(mz != NULL, "Failed to reserve memzone");
ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) == 0,
"Failed to add memzone");
ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) < 0,
"Added the same memzone twice");
return 0;
}
static int
test_ivshmem_api_test(void)
{
const struct rte_memzone * mz;
struct rte_mempool * mp;
struct rte_ring * r;
char buf[BUFSIZ];
memset(buf, 0, sizeof(buf));
r = rte_ring_create("ring", 1, SOCKET_ID_ANY, 0);
mp = rte_mempool_create("mempool", 1, 1, 1, 1, NULL, NULL, NULL, NULL,
SOCKET_ID_ANY, 0);
mz = rte_memzone_reserve("memzone", 64, SOCKET_ID_ANY, 0);
ASSERT(r != NULL, "Failed to create ring");
ASSERT(mp != NULL, "Failed to create mempool");
ASSERT(mz != NULL, "Failed to reserve memzone");
/* try to create NULL metadata */
ASSERT(rte_ivshmem_metadata_create(NULL) < 0,
"Created metadata with NULL name");
/* create valid metadata to do tests on */
ASSERT(rte_ivshmem_metadata_create(METADATA_NAME) == 0,
"Failed to create metadata");
/* test adding memzone */
ASSERT(rte_ivshmem_metadata_add_memzone(NULL, NULL) < 0,
"Added NULL memzone to NULL metadata");
ASSERT(rte_ivshmem_metadata_add_memzone(NULL, METADATA_NAME) < 0,
"Added NULL memzone");
ASSERT(rte_ivshmem_metadata_add_memzone(mz, NULL) < 0,
"Added memzone to NULL metadata");
ASSERT(rte_ivshmem_metadata_add_memzone(mz, NONEXISTENT_METADATA) < 0,
"Added memzone to nonexistent metadata");
/* test adding ring */
ASSERT(rte_ivshmem_metadata_add_ring(NULL, NULL) < 0,
"Added NULL ring to NULL metadata");
ASSERT(rte_ivshmem_metadata_add_ring(NULL, METADATA_NAME) < 0,
"Added NULL ring");
ASSERT(rte_ivshmem_metadata_add_ring(r, NULL) < 0,
"Added ring to NULL metadata");
ASSERT(rte_ivshmem_metadata_add_ring(r, NONEXISTENT_METADATA) < 0,
"Added ring to nonexistent metadata");
/* test adding mempool */
ASSERT(rte_ivshmem_metadata_add_mempool(NULL, NULL) < 0,
"Added NULL mempool to NULL metadata");
ASSERT(rte_ivshmem_metadata_add_mempool(NULL, METADATA_NAME) < 0,
"Added NULL mempool");
ASSERT(rte_ivshmem_metadata_add_mempool(mp, NULL) < 0,
"Added mempool to NULL metadata");
ASSERT(rte_ivshmem_metadata_add_mempool(mp, NONEXISTENT_METADATA) < 0,
"Added mempool to nonexistent metadata");
/* test creating command line */
ASSERT(rte_ivshmem_metadata_cmdline_generate(NULL, sizeof(buf), METADATA_NAME) < 0,
"Written command line into NULL buffer");
ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty");
ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, 0, METADATA_NAME) < 0,
"Written command line into small buffer");
ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty");
ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, sizeof(buf), NULL) < 0,
"Written command line for NULL metadata");
ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty");
ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, sizeof(buf),
NONEXISTENT_METADATA) < 0,
"Writen command line for nonexistent metadata");
ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty");
/* add stuff to config */
ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) == 0,
"Failed to add memzone to valid config");
ASSERT(rte_ivshmem_metadata_add_ring(r, METADATA_NAME) == 0,
"Failed to add ring to valid config");
ASSERT(rte_ivshmem_metadata_add_mempool(mp, METADATA_NAME) == 0,
"Failed to add mempool to valid config");
/* create config */
ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, sizeof(buf),
METADATA_NAME) == 0, "Failed to write command-line");
/* check if something was written */
ASSERT(strnlen(buf, sizeof(buf)) != 0, "Buffer is empty");
/* make sure we don't segfault */
rte_ivshmem_metadata_dump(NULL);
/* dump our metadata */
rte_ivshmem_metadata_dump(METADATA_NAME);
return 0;
}
static int
test_ivshmem_create_duplicate_metadata(void)
{
ASSERT(rte_ivshmem_metadata_create(DUPLICATE_METADATA) < 0,
"Creating duplicate metadata should have failed");
return 0;
}
static int
test_ivshmem_create_metadata_config(void)
{
struct rte_ivshmem_metadata *metadata;
rte_ivshmem_metadata_create(METADATA_NAME);
metadata = mmap_metadata(METADATA_NAME);
ASSERT(metadata != MAP_FAILED, "Metadata mmaping failed");
ASSERT(metadata->magic_number == IVSHMEM_MAGIC,
"Magic number is not that magic");
ASSERT(strncmp(metadata->name, METADATA_NAME, sizeof(metadata->name)) == 0,
"Name has not been set up");
ASSERT(metadata->entry[0].offset == 0, "Offest is not initialized");
ASSERT(metadata->entry[0].mz.addr == 0, "mz.addr is not initialized");
ASSERT(metadata->entry[0].mz.len == 0, "mz.len is not initialized");
return 0;
}
static int
test_ivshmem_create_multiple_metadata_configs(void)
{
int i;
char name[IVSHMEM_NAME_LEN];
struct rte_ivshmem_metadata *metadata;
for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES / 2; i++) {
rte_snprintf(name, sizeof(name), "test_%d", i);
rte_ivshmem_metadata_create(name);
metadata = mmap_metadata(name);
ASSERT(metadata->magic_number == IVSHMEM_MAGIC,
"Magic number is not that magic");
ASSERT(strncmp(metadata->name, name, sizeof(metadata->name)) == 0,
"Name has not been set up");
}
return 0;
}
static int
test_ivshmem_create_too_many_metadata_configs(void)
{
int i;
char name[IVSHMEM_NAME_LEN];
for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES; i++) {
rte_snprintf(name, sizeof(name), "test_%d", i);
ASSERT(rte_ivshmem_metadata_create(name) == 0,
"Create config file failed");
}
ASSERT(rte_ivshmem_metadata_create(name) < 0,
"Create config file didn't fail");
return 0;
}
enum rte_ivshmem_tests {
_test_ivshmem_api_test = 0,
_test_ivshmem_create_metadata_config,
_test_ivshmem_create_multiple_metadata_configs,
_test_ivshmem_create_too_many_metadata_configs,
_test_ivshmem_create_duplicate_metadata,
_test_ivshmem_create_lots_of_memzones,
_test_ivshmem_create_duplicate_memzone,
_last_test,
};
#define RTE_IVSHMEM_TEST_ID "RTE_IVSHMEM_TEST_ID"
static int
launch_all_tests_on_secondary_processes(void)
{
int ret = 0;
char id;
char testid;
char tmp[PATH_MAX] = {0};
char prefix[PATH_MAX] = {0};
get_current_prefix(tmp, sizeof(tmp));
rte_snprintf(prefix, sizeof(prefix), "--file-prefix=%s", tmp);
const char *argv[] = { prgname, "-c", "1", "-n", "3",
"--proc-type=secondary", prefix };
for (id = 0; id < _last_test; id++) {
testid = (char)(FIRST_TEST + id);
setenv(RTE_IVSHMEM_TEST_ID, &testid, 1);
if (launch_proc(argv) != 0)
return -1;
}
return ret;
}
int
test_ivshmem(void)
{
int testid;
/* We want to have a clean execution for every test without exposing
* private global data structures in rte_ivshmem so we launch each test
* on a different secondary process. */
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
/* first, create metadata */
ASSERT(create_duplicate() == 0, "Creating metadata failed");
return launch_all_tests_on_secondary_processes();
}
testid = *(getenv(RTE_IVSHMEM_TEST_ID)) - FIRST_TEST;
printf("Secondary process running test %d \n", testid);
switch (testid) {
case _test_ivshmem_api_test:
return test_ivshmem_api_test();
case _test_ivshmem_create_metadata_config:
return test_ivshmem_create_metadata_config();
case _test_ivshmem_create_multiple_metadata_configs:
return test_ivshmem_create_multiple_metadata_configs();
case _test_ivshmem_create_too_many_metadata_configs:
return test_ivshmem_create_too_many_metadata_configs();
case _test_ivshmem_create_duplicate_metadata:
return test_ivshmem_create_duplicate_metadata();
case _test_ivshmem_create_lots_of_memzones:
return test_ivshmem_create_lots_of_memzones();
case _test_ivshmem_create_duplicate_memzone:
return test_ivshmem_create_duplicate_memzone();
default:
break;
}
return -1;
}
#else /* RTE_LIBRTE_IVSHMEM */
int
test_ivshmem(void)
{
printf("This binary was not compiled with IVSHMEM support!\n");
return 0;
}
#endif /* RTE_LIBRTE_IVSHMEM */

View File

@ -0,0 +1,49 @@
# BSD LICENSE
#
# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# use default config
#
#include "defconfig_x86_64-default-linuxapp-gcc"
#
# Compile IVSHMEM library
#
CONFIG_RTE_LIBRTE_IVSHMEM=y
CONFIG_RTE_LIBRTE_IVSHMEM_DEBUG=n
CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS=4
CONFIG_RTE_LIBRTE_IVSHMEM_MAX_ENTRIES=128
CONFIG_RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES=32
# Set EAL to single file segments
CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS=y

View File

@ -0,0 +1,49 @@
# BSD LICENSE
#
# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# use default config
#
#include "defconfig_x86_64-default-linuxapp-icc"
#
# Compile IVSHMEM library
#
CONFIG_RTE_LIBRTE_IVSHMEM=y
CONFIG_RTE_LIBRTE_IVSHMEM_DEBUG=n
CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS=4
CONFIG_RTE_LIBRTE_IVSHMEM_MAX_ENTRIES=128
CONFIG_RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES=32
# Set EAL to single file segments
CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS=y

View File

@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += librte_ivshmem
endif
include $(RTE_SDK)/mk/rte.sharelib.mk

View File

@ -479,11 +479,17 @@ rte_eal_memzone_init(void)
rte_rwlock_write_lock(&mcfg->mlock);
/* duplicate the memsegs from config */
memcpy(free_memseg, memseg, sizeof(struct rte_memseg) * RTE_MAX_MEMSEG);
/* fill in uninitialized free_memsegs */
for (i = 0; i < RTE_MAX_MEMSEG; i++) {
if (memseg[i].addr == NULL)
break;
if (free_memseg[i].addr != NULL)
continue;
memcpy(&free_memseg[i], &memseg[i], sizeof(struct rte_memseg));
}
/* make all zones cache-aligned */
for (i=0; i<RTE_MAX_MEMSEG; i++) {
for (i = 0; i < RTE_MAX_MEMSEG; i++) {
if (free_memseg[i].addr == NULL)
break;
if (memseg_sanitize(&free_memseg[i]) < 0) {

View File

@ -128,6 +128,28 @@ int rte_eal_log_init(const char *id, int facility);
*/
int rte_eal_pci_init(void);
#ifdef RTE_LIBRTE_IVSHMEM
/**
* Init the memory from IVSHMEM devices
*
* This function is private to EAL.
*
* @return
* 0 on success, negative on error
*/
int rte_eal_ivshmem_init(void);
/**
* Init objects in IVSHMEM devices
*
* This function is private to EAL.
*
* @return
* 0 on success, negative on error
*/
int rte_eal_ivshmem_obj_init(void);
#endif
struct rte_pci_driver;
struct rte_pci_device;

View File

@ -79,6 +79,9 @@ struct rte_memseg {
void *addr; /**< Start virtual address. */
uint64_t addr_64; /**< Makes sure addr is always 64 bits */
};
#ifdef RTE_LIBRTE_IVSHMEM
phys_addr_t ioremap_addr; /**< Real physical address inside the VM */
#endif
size_t len; /**< Length of the segment. */
size_t hugepage_sz; /**< The pagesize of underlying memory */
int32_t socket_id; /**< NUMA socket ID. */

View File

@ -75,6 +75,9 @@ struct rte_memzone {
void *addr; /**< Start virtual address. */
uint64_t addr_64; /**< Makes sure addr is always 64-bits */
};
#ifdef RTE_LIBRTE_IVSHMEM
phys_addr_t ioremap_addr; /**< Real physical address inside the VM */
#endif
size_t len; /**< Length of the memzone. */
size_t hugepage_sz; /**< The page size of underlying memory */

View File

@ -41,6 +41,7 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_ring
CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
CFLAGS += -I$(RTE_SDK)/lib/librte_ether
CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem
CFLAGS += -I$(RTE_SDK)/lib/librte_pmd_ring
CFLAGS += -I$(RTE_SDK)/lib/librte_pmd_pcap
CFLAGS += $(WERROR_FLAGS) -O3
@ -57,6 +58,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_lcore.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_timer.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_interrupts.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_alarm.c
ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_ivshmem.c
endif
# from common dir
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
@ -75,6 +79,9 @@ CFLAGS_eal.o := -D_GNU_SOURCE
CFLAGS_eal_thread.o := -D_GNU_SOURCE
CFLAGS_eal_log.o := -D_GNU_SOURCE
CFLAGS_eal_common_log.o := -D_GNU_SOURCE
CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
CFLAGS_eal_pci.o := -D_GNU_SOURCE
CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
# workaround for a gcc bug with noreturn attribute
# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603

View File

@ -935,6 +935,14 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_cpu_init() < 0)
rte_panic("Cannot detect lcores\n");
if (rte_eal_pci_init() < 0)
rte_panic("Cannot init PCI\n");
#ifdef RTE_LIBRTE_IVSHMEM
if (rte_eal_ivshmem_init() < 0)
rte_panic("Cannot init IVSHMEM\n");
#endif
if (rte_eal_memory_init() < 0)
rte_panic("Cannot init memory\n");
@ -947,6 +955,11 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_tailqs_init() < 0)
rte_panic("Cannot init tail queues for objects\n");
#ifdef RTE_LIBRTE_IVSHMEM
if (rte_eal_ivshmem_obj_init() < 0)
rte_panic("Cannot init IVSHMEM objects\n");
#endif
if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0)
rte_panic("Cannot init logs\n");
@ -959,9 +972,6 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_timer_init() < 0)
rte_panic("Cannot init HPET or TSC timers\n");
if (rte_eal_pci_init() < 0)
rte_panic("Cannot init PCI\n");
RTE_LOG(DEBUG, EAL, "Master core %u is ready (tid=%x)\n",
rte_config.master_lcore, (int)thread_id);

View File

@ -0,0 +1,953 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */
#include <stdint.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/mman.h>
#include <sys/file.h>
#include <string.h>
#include <sys/queue.h>
#include <rte_log.h>
#include <rte_pci.h>
#include <rte_memory.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_string_fns.h>
#include <rte_errno.h>
#include <rte_ring.h>
#include <rte_mempool.h>
#include <rte_common.h>
#include <rte_ivshmem.h>
#include <rte_tailq_elem.h>
#include "eal_internal_cfg.h"
#include "eal_private.h"
#define PCI_VENDOR_ID_IVSHMEM 0x1Af4
#define PCI_DEVICE_ID_IVSHMEM 0x1110
#define IVSHMEM_MAGIC 0x0BADC0DE
#define IVSHMEM_METADATA_SIZE 0x1000
#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2"
#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config"
#define PHYS 0x1
#define VIRT 0x2
#define IOREMAP 0x4
#define FULL (PHYS|VIRT|IOREMAP)
#define METADATA_SIZE_ALIGNED \
(RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
#define CONTAINS(x,y)\
(((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len))
#define DIM(x) (sizeof(x)/sizeof(x[0]))
struct ivshmem_pci_device {
char path[PATH_MAX];
phys_addr_t ioremap_addr;
};
/* data type to store in config */
struct ivshmem_segment {
struct rte_ivshmem_metadata_entry entry;
uint64_t align;
char path[PATH_MAX];
};
struct ivshmem_shared_config {
struct ivshmem_segment segment[RTE_MAX_MEMSEG];
uint32_t segment_idx;
struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS];
uint32_t pci_devs_idx;
};
static struct ivshmem_shared_config * ivshmem_config;
static int memseg_idx;
static int pagesz;
/* Tailq heads to add rings to */
TAILQ_HEAD(rte_ring_list, rte_ring);
/*
* Utility functions
*/
static int
is_ivshmem_device(struct rte_pci_device * dev)
{
return (dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM
&& dev->id.device_id == PCI_DEVICE_ID_IVSHMEM);
}
static void *
map_metadata(int fd, uint64_t len)
{
size_t metadata_len = sizeof(struct rte_ivshmem_metadata);
size_t aligned_len = METADATA_SIZE_ALIGNED;
return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, len - aligned_len);
}
static void
unmap_metadata(void * ptr)
{
munmap(ptr, sizeof(struct rte_ivshmem_metadata));
}
static int
has_ivshmem_metadata(int fd, uint64_t len)
{
struct rte_ivshmem_metadata metadata;
void * ptr;
ptr = map_metadata(fd, len);
if (ptr == MAP_FAILED)
return -1;
metadata = *(struct rte_ivshmem_metadata*) (ptr);
unmap_metadata(ptr);
return metadata.magic_number == IVSHMEM_MAGIC;
}
static void
remove_segment(struct ivshmem_segment * ms, int len, int idx)
{
int i;
for (i = idx; i < len - 1; i++)
memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment));
memset(&ms[len-1], 0, sizeof(struct ivshmem_segment));
}
static int
overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
{
uint64_t start1, end1, start2, end2;
uint64_t p_start1, p_end1, p_start2, p_end2;
uint64_t i_start1, i_end1, i_start2, i_end2;
int result = 0;
/* gather virtual addresses */
start1 = mz1->addr_64;
end1 = mz1->addr_64 + mz1->len;
start2 = mz2->addr_64;
end2 = mz2->addr_64 + mz2->len;
/* gather physical addresses */
p_start1 = mz1->phys_addr;
p_end1 = mz1->phys_addr + mz1->len;
p_start2 = mz2->phys_addr;
p_end2 = mz2->phys_addr + mz2->len;
/* gather ioremap addresses */
i_start1 = mz1->ioremap_addr;
i_end1 = mz1->ioremap_addr + mz1->len;
i_start2 = mz2->ioremap_addr;
i_end2 = mz2->ioremap_addr + mz2->len;
/* check for overlap in virtual addresses */
if (start1 >= start2 && start1 < end2)
result |= VIRT;
if (start2 >= start1 && start2 < end1)
result |= VIRT;
/* check for overlap in physical addresses */
if (p_start1 >= p_start2 && p_start1 < p_end2)
result |= PHYS;
if (p_start2 >= p_start1 && p_start2 < p_end1)
result |= PHYS;
/* check for overlap in ioremap addresses */
if (i_start1 >= i_start2 && i_start1 < i_end2)
result |= IOREMAP;
if (i_start2 >= i_start1 && i_start2 < i_end1)
result |= IOREMAP;
return result;
}
static int
adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
{
uint64_t start1, end1, start2, end2;
uint64_t p_start1, p_end1, p_start2, p_end2;
uint64_t i_start1, i_end1, i_start2, i_end2;
int result = 0;
/* gather virtual addresses */
start1 = mz1->addr_64;
end1 = mz1->addr_64 + mz1->len;
start2 = mz2->addr_64;
end2 = mz2->addr_64 + mz2->len;
/* gather physical addresses */
p_start1 = mz1->phys_addr;
p_end1 = mz1->phys_addr + mz1->len;
p_start2 = mz2->phys_addr;
p_end2 = mz2->phys_addr + mz2->len;
/* gather ioremap addresses */
i_start1 = mz1->ioremap_addr;
i_end1 = mz1->ioremap_addr + mz1->len;
i_start2 = mz2->ioremap_addr;
i_end2 = mz2->ioremap_addr + mz2->len;
/* check if segments are virtually adjacent */
if (start1 == end2)
result |= VIRT;
if (start2 == end1)
result |= VIRT;
/* check if segments are physically adjacent */
if (p_start1 == p_end2)
result |= PHYS;
if (p_start2 == p_end1)
result |= PHYS;
/* check if segments are ioremap-adjacent */
if (i_start1 == i_end2)
result |= IOREMAP;
if (i_start2 == i_end1)
result |= IOREMAP;
return result;
}
static int
has_adjacent_segments(struct ivshmem_segment * ms, int len)
{
int i, j, a;
for (i = 0; i < len; i++)
for (j = i + 1; j < len; j++) {
a = adjacent(&ms[i].entry.mz, &ms[j].entry.mz);
/* check if segments are adjacent virtually and/or physically but
* not ioremap (since that would indicate that they are from
* different PCI devices and thus don't need to be concatenated.
*/
if ((a & (VIRT|PHYS)) > 0 && (a & IOREMAP) == 0)
return 1;
}
return 0;
}
static int
has_overlapping_segments(struct ivshmem_segment * ms, int len)
{
int i, j;
for (i = 0; i < len; i++)
for (j = i + 1; j < len; j++)
if (overlap(&ms[i].entry.mz, &ms[j].entry.mz))
return 1;
return 0;
}
static int
seg_compare(const void * a, const void * b)
{
const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a;
const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b;
/* move unallocated zones to the end */
if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL)
return 0;
if (s1->entry.mz.addr == 0)
return 1;
if (s2->entry.mz.addr == 0)
return -1;
return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr;
}
#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
static void
entry_dump(struct rte_ivshmem_metadata_entry *e)
{
RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr,
RTE_PTR_ADD(e->mz.addr, e->mz.len));
RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n",
e->mz.phys_addr,
e->mz.phys_addr + e->mz.len);
RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n",
e->mz.ioremap_addr,
e->mz.ioremap_addr + e->mz.len);
RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len);
RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset);
}
#endif
/*
* Actual useful code
*/
/* read through metadata mapped from the IVSHMEM device */
static int
read_metadata(char * path, int path_len, int fd, uint64_t flen)
{
struct rte_ivshmem_metadata metadata;
struct rte_ivshmem_metadata_entry * entry;
int idx, i;
void * ptr;
ptr = map_metadata(fd, flen);
if (ptr == MAP_FAILED)
return -1;
metadata = *(struct rte_ivshmem_metadata*) (ptr);
unmap_metadata(ptr);
RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name);
idx = ivshmem_config->segment_idx;
for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES &&
idx <= RTE_MAX_MEMSEG; i++) {
if (idx == RTE_MAX_MEMSEG) {
RTE_LOG(ERR, EAL, "Not enough memory segments!\n");
return -1;
}
entry = &metadata.entry[i];
/* stop on uninitialized memzone */
if (entry->mz.len == 0)
break;
/* copy metadata entry */
memcpy(&ivshmem_config->segment[idx].entry, entry,
sizeof(struct rte_ivshmem_metadata_entry));
/* copy path */
rte_snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path);
idx++;
}
ivshmem_config->segment_idx = idx;
return 0;
}
/* check through each segment and look for adjacent or overlapping ones. */
static int
cleanup_segments(struct ivshmem_segment * ms, int tbl_len)
{
struct ivshmem_segment * s, * tmp;
int i, j, concat, seg_adjacent, seg_overlapping;
uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2;
qsort(ms, tbl_len, sizeof(struct ivshmem_segment),
seg_compare);
while (has_overlapping_segments(ms, tbl_len) ||
has_adjacent_segments(ms, tbl_len)) {
for (i = 0; i < tbl_len; i++) {
s = &ms[i];
concat = 0;
for (j = i + 1; j < tbl_len; j++) {
tmp = &ms[j];
/* check if this segment is overlapping with existing segment,
* or is adjacent to existing segment */
seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz);
seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz);
/* check if segments fully overlap or are fully adjacent */
if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) {
#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
RTE_LOG(DEBUG, EAL, "Concatenating segments\n");
RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
entry_dump(&s->entry);
RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
entry_dump(&tmp->entry);
#endif
start1 = s->entry.mz.addr_64;
start2 = tmp->entry.mz.addr_64;
p_start1 = s->entry.mz.phys_addr;
p_start2 = tmp->entry.mz.phys_addr;
i_start1 = s->entry.mz.ioremap_addr;
i_start2 = tmp->entry.mz.ioremap_addr;
end1 = s->entry.mz.addr_64 + s->entry.mz.len;
end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len;
/* settle for minimum start address and maximum length */
s->entry.mz.addr_64 = RTE_MIN(start1, start2);
s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2);
s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2);
s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset);
s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64;
concat = 1;
#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
RTE_LOG(DEBUG, EAL, "Resulting segment:\n");
entry_dump(&s->entry);
#endif
}
/* if segments not fully overlap, we have an error condition.
* adjacent segments can coexist.
*/
else if (seg_overlapping > 0) {
RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j);
#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
entry_dump(&s->entry);
RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
entry_dump(&tmp->entry);
#endif
return -1;
}
if (concat)
break;
}
/* if we concatenated, remove segment at j */
if (concat) {
remove_segment(ms, tbl_len, j);
tbl_len--;
break;
}
}
}
return tbl_len;
}
static int
create_shared_config(void)
{
char path[PATH_MAX];
int fd;
/* build ivshmem config file path */
rte_snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
internal_config.hugefile_prefix);
fd = open(path, O_CREAT | O_RDWR);
if (fd < 0) {
RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno));
return -1;
}
/* try ex-locking first - if the file is locked, we have a problem */
if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno));
close(fd);
return -1;
}
ftruncate(fd, sizeof(struct ivshmem_shared_config));
ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (ivshmem_config == MAP_FAILED)
return -1;
memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config));
/* change the exclusive lock we got earlier to a shared lock */
if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
return -1;
}
close(fd);
return 0;
}
/* open shared config file and, if present, map the config.
* having no config file is not an error condition, as we later check if
* ivshmem_config is NULL (if it is, that means nothing was mapped). */
static int
open_shared_config(void)
{
char path[PATH_MAX];
int fd;
/* build ivshmem config file path */
rte_snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
internal_config.hugefile_prefix);
fd = open(path, O_RDONLY);
/* if the file doesn't exist, just return success */
if (fd < 0 && errno == ENOENT)
return 0;
/* else we have an error condition */
else if (fd < 0) {
RTE_LOG(ERR, EAL, "Could not open %s: %s\n",
path, strerror(errno));
return -1;
}
/* try ex-locking first - if the lock *does* succeed, this means it's a
* stray config file, so it should be deleted.
*/
if (flock(fd, LOCK_EX | LOCK_NB) != -1) {
/* if we can't remove the file, something is wrong */
if (unlink(path) < 0) {
RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path,
strerror(errno));
return -1;
}
/* release the lock */
flock(fd, LOCK_UN);
close(fd);
/* return success as having a stray config file is equivalent to not
* having config file at all.
*/
return 0;
}
ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
PROT_READ, MAP_SHARED, fd, 0);
if (ivshmem_config == MAP_FAILED)
return -1;
/* place a shared lock on config file */
if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
return -1;
}
close(fd);
return 0;
}
/*
* This function does the following:
*
* 1) Builds a table of ivshmem_segments with proper offset alignment
* 2) Cleans up that table so that we don't have any overlapping or adjacent
* memory segments
* 3) Creates memsegs from this table and maps them into memory.
*/
static inline int
map_all_segments(void)
{
struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG];
struct ivshmem_pci_device * pci_dev;
struct rte_mem_config * mcfg;
struct ivshmem_segment * seg;
int fd, fd_zero;
unsigned i, j;
struct rte_memzone mz;
struct rte_memseg ms;
void * base_addr;
uint64_t align, len;
phys_addr_t ioremap_addr;
ioremap_addr = 0;
memset(ms_tbl, 0, sizeof(ms_tbl));
memset(&mz, 0, sizeof(struct rte_memzone));
memset(&ms, 0, sizeof(struct rte_memseg));
/* first, build a table of memsegs to map, to avoid failed mmaps due to
* overlaps
*/
for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) {
if (i == RTE_MAX_MEMSEG) {
RTE_LOG(ERR, EAL, "Too many segments requested!\n");
return -1;
}
seg = &ivshmem_config->segment[i];
/* copy segment to table */
memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment));
/* find ioremap addr */
for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) {
pci_dev = &ivshmem_config->pci_devs[j];
if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) {
ioremap_addr = pci_dev->ioremap_addr;
break;
}
}
if (ioremap_addr == 0) {
RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n");
return -1;
}
/* work out alignments */
align = seg->entry.mz.addr_64 -
RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000);
len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000);
/* save original alignments */
ms_tbl[i].align = align;
/* create a memory zone */
mz.addr_64 = seg->entry.mz.addr_64 - align;
mz.len = len;
mz.hugepage_sz = seg->entry.mz.hugepage_sz;
mz.phys_addr = seg->entry.mz.phys_addr - align;
/* find true physical address */
mz.ioremap_addr = ioremap_addr + seg->entry.offset - align;
ms_tbl[i].entry.offset = seg->entry.offset - align;
memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone));
}
/* clean up the segments */
memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx);
if (memseg_idx < 0)
return -1;
mcfg = rte_eal_get_configuration()->mem_config;
fd_zero = open("/dev/zero", O_RDWR);
if (fd_zero < 0) {
RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno));
return -1;
}
/* create memsegs and put them into DPDK memory */
for (i = 0; i < (unsigned) memseg_idx; i++) {
seg = &ms_tbl[i];
ms.addr_64 = seg->entry.mz.addr_64;
ms.hugepage_sz = seg->entry.mz.hugepage_sz;
ms.len = seg->entry.mz.len;
ms.nchannel = rte_memory_get_nchannel();
ms.nrank = rte_memory_get_nrank();
ms.phys_addr = seg->entry.mz.phys_addr;
ms.ioremap_addr = seg->entry.mz.ioremap_addr;
ms.socket_id = seg->entry.mz.socket_id;
base_addr = mmap(ms.addr, ms.len,
PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0);
if (base_addr == MAP_FAILED || base_addr != ms.addr) {
RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n");
return -1;
}
fd = open(seg->path, O_RDWR);
if (fd < 0) {
RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path,
strerror(errno));
return -1;
}
munmap(ms.addr, ms.len);
base_addr = mmap(ms.addr, ms.len,
PROT_READ | PROT_WRITE, MAP_SHARED, fd,
seg->entry.offset);
if (base_addr == MAP_FAILED || base_addr != ms.addr) {
RTE_LOG(ERR, EAL, "Cannot map segment into memory: "
"expected %p got %p (%s)\n", ms.addr, base_addr,
strerror(errno));
return -1;
}
RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at "
"offset 0x%" PRIx64 "\n",
ms.addr, ms.len, seg->entry.offset);
/* put the pointers back into their real positions using original
* alignment */
ms.addr_64 += seg->align;
ms.phys_addr += seg->align;
ms.ioremap_addr += seg->align;
ms.len -= seg->align;
/* at this point, the rest of DPDK memory is not initialized, so we
* expect memsegs to be empty */
memcpy(&mcfg->memseg[i], &ms,
sizeof(struct rte_memseg));
memcpy(&mcfg->free_memseg[i], &ms,
sizeof(struct rte_memseg));
/* adjust the free_memseg so that there's no free space left */
mcfg->free_memseg[i].ioremap_addr += mcfg->free_memseg[i].len;
mcfg->free_memseg[i].phys_addr += mcfg->free_memseg[i].len;
mcfg->free_memseg[i].addr_64 += mcfg->free_memseg[i].len;
mcfg->free_memseg[i].len = 0;
close(fd);
RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n",
ms.len);
}
return 0;
}
/* this happens at a later stage, after general EAL memory initialization */
int
rte_eal_ivshmem_obj_init(void)
{
struct rte_ring_list* ring_list = NULL;
struct rte_mem_config * mcfg;
struct ivshmem_segment * seg;
struct rte_memzone * mz;
struct rte_ring * r;
unsigned i, ms, idx;
uint64_t offset;
/* secondary process would not need any object discovery - it'll all
* already be in shared config */
if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL)
return 0;
/* check that we have an initialised ring tail queue */
if ((ring_list =
RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_RING, rte_ring_list)) == NULL) {
RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n");
return -1;
}
mcfg = rte_eal_get_configuration()->mem_config;
/* create memzones */
for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) {
seg = &ivshmem_config->segment[i];
/* add memzone */
if (mcfg->memzone_idx == RTE_MAX_MEMZONE) {
RTE_LOG(ERR, EAL, "No more memory zones available!\n");
return -1;
}
idx = mcfg->memzone_idx;
RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n",
seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len);
memcpy(&mcfg->memzone[idx], &seg->entry.mz,
sizeof(struct rte_memzone));
/* find ioremap address */
for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) {
if (ms == RTE_MAX_MEMSEG) {
RTE_LOG(ERR, EAL, "Physical address of segment not found!\n");
return -1;
}
if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) {
offset = mcfg->memzone[idx].addr_64 -
mcfg->memseg[ms].addr_64;
mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr +
offset;
break;
}
}
mcfg->memzone_idx++;
}
/* find rings */
for (i = 0; i < mcfg->memzone_idx; i++) {
mz = &mcfg->memzone[i];
/* check if memzone has a ring prefix */
if (strncmp(mz->name, RTE_RING_MZ_PREFIX,
sizeof(RTE_RING_MZ_PREFIX) - 1) != 0)
continue;
r = (struct rte_ring*) (mz->addr_64);
TAILQ_INSERT_TAIL(ring_list, r, next);
RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr);
}
#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
rte_memzone_dump();
rte_ring_list_dump();
#endif
return 0;
}
/* initialize ivshmem structures */
int rte_eal_ivshmem_init(void)
{
struct rte_pci_device * dev;
struct rte_pci_resource * res;
int fd, ret;
char path[PATH_MAX];
/* initialize everything to 0 */
memset(path, 0, sizeof(path));
ivshmem_config = NULL;
pagesz = getpagesize();
RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n");
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
if (open_shared_config() < 0) {
RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n");
return -1;
}
}
else {
TAILQ_FOREACH(dev, &device_list, next) {
if (is_ivshmem_device(dev)) {
/* IVSHMEM memory is always on BAR2 */
res = &dev->mem_resource[2];
/* if we don't have a BAR2 */
if (res->len == 0)
continue;
/* construct pci device path */
rte_snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH,
dev->addr.domain, dev->addr.bus, dev->addr.devid,
dev->addr.function);
/* try to find memseg */
fd = open(path, O_RDWR);
if (fd < 0) {
RTE_LOG(ERR, EAL, "Could not open %s\n", path);
return -1;
}
/* check if it's a DPDK IVSHMEM device */
ret = has_ivshmem_metadata(fd, res->len);
/* is DPDK device */
if (ret == 1) {
/* config file creation is deferred until the first
* DPDK device is found. then, it has to be created
* only once. */
if (ivshmem_config == NULL &&
create_shared_config() < 0) {
RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n");
close(fd);
return -1;
}
if (read_metadata(path, sizeof(path), fd, res->len) < 0) {
RTE_LOG(ERR, EAL, "Could not read metadata from"
" device %02x:%02x.%x!\n", dev->addr.bus,
dev->addr.devid, dev->addr.function);
close(fd);
return -1;
}
if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) {
RTE_LOG(WARNING, EAL,
"IVSHMEM PCI device limit exceeded. Increase "
"CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS in "
"your config file.\n");
break;
}
RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n",
dev->addr.bus, dev->addr.devid, dev->addr.function);
ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr;
rte_snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path,
sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path),
path);
ivshmem_config->pci_devs_idx++;
}
/* failed to read */
else if (ret < 0) {
RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n",
strerror(errno));
close(fd);
return -1;
}
/* not a DPDK device */
else
RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n");
/* close the BAR fd */
close(fd);
}
}
}
/* ivshmem_config is not NULL only if config was created and/or mapped */
if (ivshmem_config) {
if (map_all_segments() < 0) {
RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n");
return -1;
}
}
else {
RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n");
}
return 0;
}
#endif

View File

@ -113,6 +113,68 @@ static uint64_t baseaddr_offset;
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
static uint64_t
get_physaddr(void * virtaddr)
{
int fd;
uint64_t page, physaddr;
unsigned long virt_pfn;
int page_size;
/* standard page size */
page_size = getpagesize();
fd = open("/proc/self/pagemap", O_RDONLY);
if (fd < 0) {
RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
__func__, strerror(errno));
return (uint64_t) -1;
}
off_t offset;
virt_pfn = (unsigned long)virtaddr / page_size;
offset = sizeof(uint64_t) * virt_pfn;
if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
__func__, strerror(errno));
close(fd);
return (uint64_t) -1;
}
if (read(fd, &page, sizeof(uint64_t)) < 0) {
RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
__func__, strerror(errno));
close(fd);
return (uint64_t) -1;
}
/*
* the pfn (page frame number) are bits 0-54 (see
* pagemap.txt in linux Documentation)
*/
physaddr = ((page & 0x7fffffffffffffULL) * page_size);
close(fd);
return physaddr;
}
/*
* For each hugepage in hugepg_tbl, fill the physaddr value. We find
* it by browsing the /proc/self/pagemap special file.
*/
static int
find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
unsigned i;
phys_addr_t addr;
for (i = 0; i < hpi->num_pages[0]; i++) {
addr = get_physaddr(hugepg_tbl[i].orig_va);
if (addr == (phys_addr_t) -1)
return -1;
hugepg_tbl[i].physaddr = addr;
}
return 0;
}
/*
* Check whether address-space layout randomization is enabled in
* the kernel. This is important for multi-process as it can prevent
@ -209,7 +271,7 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
* map continguous physical blocks in contiguous virtual blocks.
*/
static int
map_all_hugepages(struct hugepage *hugepg_tbl,
map_all_hugepages(struct hugepage_file *hugepg_tbl,
struct hugepage_info *hpi, int orig)
{
int fd;
@ -218,15 +280,25 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
void *vma_addr = NULL;
size_t vma_len = 0;
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
RTE_SET_USED(vma_len);
#endif
for (i = 0; i < hpi->num_pages[0]; i++) {
size_t hugepage_sz = hpi->hugepage_sz;
if (orig) {
hugepg_tbl[i].file_id = i;
hugepg_tbl[i].size = hugepage_sz;
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
eal_get_hugefile_temp_path(hugepg_tbl[i].filepath,
sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
hugepg_tbl[i].file_id);
#else
eal_get_hugefile_path(hugepg_tbl[i].filepath,
sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
hugepg_tbl[i].file_id);
#endif
hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
}
#ifndef RTE_ARCH_X86_64
@ -239,6 +311,8 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
continue;
}
#endif
#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS
else if (vma_len == 0) {
unsigned j, num_pages;
@ -260,6 +334,7 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
if (vma_addr == NULL)
vma_len = hugepage_sz;
}
#endif
/* try to create hugepage file */
fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
@ -302,77 +377,189 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
return 0;
}
/* Unmap all hugepages from original mapping. */
static int
unmap_all_hugepages_orig(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
{
unsigned i;
for (i = 0; i < hpi->num_pages[0]; i++) {
if (hugepg_tbl[i].orig_va) {
munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz);
hugepg_tbl[i].orig_va = NULL;
}
}
return 0;
}
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
/*
* For each hugepage in hugepg_tbl, fill the physaddr value. We find
* it by browsing the /proc/self/pagemap special file.
* Remaps all hugepages into single file segments
*/
static int
find_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
int fd;
unsigned i;
uint64_t page;
unsigned long virt_pfn;
int page_size;
unsigned i = 0, j, num_pages, page_idx = 0;
void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL;
size_t vma_len = 0;
size_t hugepage_sz = hpi->hugepage_sz;
size_t total_size, offset;
char filepath[MAX_HUGEPAGE_PATH];
phys_addr_t physaddr;
int socket;
/* standard page size */
page_size = getpagesize();
while (i < hpi->num_pages[0]) {
fd = open("/proc/self/pagemap", O_RDONLY);
if (fd < 0) {
RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
__func__, strerror(errno));
return -1;
}
for (i = 0; i < hpi->num_pages[0]; i++) {
off_t offset;
virt_pfn = (unsigned long)hugepg_tbl[i].orig_va /
page_size;
offset = sizeof(uint64_t) * virt_pfn;
if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
__func__, strerror(errno));
close(fd);
return -1;
}
if (read(fd, &page, sizeof(uint64_t)) < 0) {
RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
__func__, strerror(errno));
close(fd);
return -1;
}
/*
* the pfn (page frame number) are bits 0-54 (see
* pagemap.txt in linux Documentation)
#ifndef RTE_ARCH_X86_64
/* for 32-bit systems, don't remap 1G pages, just reuse original
* map address as final map address.
*/
hugepg_tbl[i].physaddr = ((page & 0x7fffffffffffffULL) * page_size);
if (hugepage_sz == RTE_PGSIZE_1G){
hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
hugepg_tbl[i].orig_va = NULL;
i++;
continue;
}
#endif
/* reserve a virtual area for next contiguous
* physical block: count the number of
* contiguous physical pages. */
for (j = i+1; j < hpi->num_pages[0] ; j++) {
if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr + hugepage_sz)
break;
}
num_pages = j - i;
vma_len = num_pages * hugepage_sz;
socket = hugepg_tbl[i].socket_id;
/* get the biggest virtual memory area up to
* vma_len. If it fails, vma_addr is NULL, so
* let the kernel provide the address. */
vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
/* If we can't find a big enough virtual area, work out how many pages
* we are going to get */
if (vma_addr == NULL)
j = i + 1;
else if (vma_len != num_pages * hugepage_sz) {
num_pages = vma_len / hugepage_sz;
j = i + num_pages;
}
hugepg_tbl[page_idx].file_id = page_idx;
eal_get_hugefile_path(filepath,
sizeof(filepath),
hpi->hugedir,
hugepg_tbl[page_idx].file_id);
/* try to create hugepage file */
fd = open(filepath, O_CREAT | O_RDWR, 0755);
if (fd < 0) {
RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno));
return -1;
}
total_size = 0;
for (;i < j; i++) {
/* unmap current segment */
if (total_size > 0)
munmap(vma_addr, total_size);
/* unmap original page */
munmap(hugepg_tbl[i].orig_va, hugepage_sz);
unlink(hugepg_tbl[i].filepath);
total_size += hugepage_sz;
old_addr = vma_addr;
/* map new, bigger segment */
vma_addr = mmap(vma_addr, total_size,
PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (vma_addr == MAP_FAILED || vma_addr != old_addr) {
RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno));
close(fd);
return -1;
}
/* touch the page. this is needed because kernel postpones mapping
* creation until the first page fault. with this, we pin down
* the page and it is marked as used and gets into process' pagemap.
*/
for (offset = 0; offset < total_size; offset += hugepage_sz)
*((volatile uint8_t*) RTE_PTR_ADD(vma_addr, offset));
}
/* set shared flock on the file. */
if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
__func__, strerror(errno));
close(fd);
return -1;
}
rte_snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s",
filepath);
physaddr = get_physaddr(vma_addr);
if (physaddr == (phys_addr_t) -1)
return -1;
hugepg_tbl[page_idx].final_va = vma_addr;
hugepg_tbl[page_idx].physaddr = physaddr;
hugepg_tbl[page_idx].repeated = num_pages;
hugepg_tbl[page_idx].socket_id = socket;
close(fd);
/* verify the memory segment - that is, check that every VA corresponds
* to the physical address we expect to see
*/
for (offset = 0; offset < vma_len; offset += hugepage_sz) {
uint64_t expected_physaddr;
expected_physaddr = hugepg_tbl[page_idx].physaddr + offset;
page_addr = RTE_PTR_ADD(vma_addr, offset);
physaddr = get_physaddr(page_addr);
if (physaddr != expected_physaddr) {
RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr "
"at %p (offset 0x%" PRIx64 ": 0x%" PRIx64
" (expected 0x%" PRIx64 ")\n",
page_addr, offset, physaddr, expected_physaddr);
return -1;
}
}
/* zero out the whole segment */
memset(hugepg_tbl[page_idx].final_va, 0, total_size);
page_idx++;
}
close(fd);
return 0;
/* zero out the rest */
memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file));
return page_idx;
}
#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */
/* Unmap all hugepages from original mapping */
static int
unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
unsigned i;
for (i = 0; i < hpi->num_pages[0]; i++) {
if (hugepg_tbl[i].orig_va) {
munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz);
hugepg_tbl[i].orig_va = NULL;
}
}
return 0;
}
#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */
/*
* Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
* page.
*/
static int
find_numasocket(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
int socket_id;
char *end, *nodestr;
@ -455,12 +642,12 @@ error:
* is only done at init time.
*/
static int
sort_by_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
unsigned i, j;
int smallest_idx;
uint64_t smallest_addr;
struct hugepage tmp;
struct hugepage_file tmp;
for (i = 0; i < hpi->num_pages[0]; i++) {
smallest_addr = 0;
@ -486,10 +673,10 @@ sort_by_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
}
/* swap the 2 entries in the table */
memcpy(&tmp, &hugepg_tbl[smallest_idx], sizeof(struct hugepage));
memcpy(&tmp, &hugepg_tbl[smallest_idx], sizeof(struct hugepage_file));
memcpy(&hugepg_tbl[smallest_idx], &hugepg_tbl[i],
sizeof(struct hugepage));
memcpy(&hugepg_tbl[i], &tmp, sizeof(struct hugepage));
sizeof(struct hugepage_file));
memcpy(&hugepg_tbl[i], &tmp, sizeof(struct hugepage_file));
}
return 0;
}
@ -519,8 +706,8 @@ create_shared_memory(const char *filename, const size_t mem_size)
* destination is typically the shared memory.
*/
static int
copy_hugepages_to_shared_mem(struct hugepage * dst, int dest_size,
const struct hugepage * src, int src_size)
copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size,
const struct hugepage_file * src, int src_size)
{
int src_pos, dst_pos = 0;
@ -529,7 +716,7 @@ copy_hugepages_to_shared_mem(struct hugepage * dst, int dest_size,
/* error on overflow attempt */
if (dst_pos == dest_size)
return -1;
memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage));
memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage_file));
dst_pos++;
}
}
@ -541,7 +728,7 @@ copy_hugepages_to_shared_mem(struct hugepage * dst, int dest_size,
* ALL hugepages (not just those we need), additional unmapping needs to be done.
*/
static int
unmap_unneeded_hugepages(struct hugepage *hugepg_tbl,
unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
struct hugepage_info *hpi,
unsigned num_hp_info)
{
@ -556,9 +743,16 @@ unmap_unneeded_hugepages(struct hugepage *hugepg_tbl,
for (size = 0; size < num_hp_info; size++) {
for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
unsigned pages_found = 0;
/* traverse until we have unmapped all the unused pages */
for (page = 0; page < nrpages; page++) {
struct hugepage *hp = &hugepg_tbl[page];
struct hugepage_file *hp = &hugepg_tbl[page];
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
/* if this page was already cleared */
if (hp->final_va == NULL)
continue;
#endif
/* find a page that matches the criteria */
if ((hp->size == hpi[size].hugepage_sz) &&
@ -566,17 +760,67 @@ unmap_unneeded_hugepages(struct hugepage *hugepg_tbl,
/* if we skipped enough pages, unmap the rest */
if (pages_found == hpi[size].num_pages[socket]) {
munmap(hp->final_va, hp->size);
uint64_t unmap_len;
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
unmap_len = hp->size * hp->repeated;
#else
unmap_len = hp->size;
#endif
/* get start addr and len of the remaining segment */
munmap(hp->final_va, (size_t) unmap_len);
hp->final_va = NULL;
if (remove(hp->filepath) == -1) {
if (unlink(hp->filepath) == -1) {
RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n",
__func__, hp->filepath, strerror(errno));
return -1;
}
}
/* lock the page and skip */
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
/* else, check how much do we need to map */
else {
int nr_pg_left =
hpi[size].num_pages[socket] - pages_found;
/* if we need enough memory to fit into the segment */
if (hp->repeated <= nr_pg_left) {
pages_found += hp->repeated;
}
/* truncate the segment */
else {
uint64_t final_size = nr_pg_left * hp->size;
uint64_t seg_size = hp->repeated * hp->size;
void * unmap_va = RTE_PTR_ADD(hp->final_va,
final_size);
int fd;
munmap(unmap_va, seg_size - final_size);
fd = open(hp->filepath, O_RDWR);
if (fd < 0) {
RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
hp->filepath, strerror(errno));
return -1;
}
if (ftruncate(fd, final_size) < 0) {
RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n",
hp->filepath, strerror(errno));
return -1;
}
close(fd);
pages_found += nr_pg_left;
hp->repeated = nr_pg_left;
}
}
#else
/* else, lock the page and skip */
else
pages_found++;
#endif
} /* match page */
} /* foreach page */
@ -712,15 +956,18 @@ static int
rte_eal_hugepage_init(void)
{
struct rte_mem_config *mcfg;
struct hugepage *hugepage, *tmp_hp = NULL;
struct hugepage_file *hugepage, *tmp_hp = NULL;
struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
uint64_t memory[RTE_MAX_NUMA_NODES];
unsigned hp_offset;
int i, j, new_memseg;
int nrpages, total_pages = 0;
int nr_hugefiles, nr_hugepages = 0;
void *addr;
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
int new_pages_count[MAX_HUGEPAGE_SIZES];
#endif
memset(used_hp, 0, sizeof(used_hp));
@ -744,7 +991,7 @@ rte_eal_hugepage_init(void)
/* meanwhile, also initialize used_hp hugepage sizes in used_hp */
used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz;
total_pages += internal_config.hugepage_info[i].num_pages[0];
nr_hugepages += internal_config.hugepage_info[i].num_pages[0];
}
/*
@ -753,11 +1000,11 @@ rte_eal_hugepage_init(void)
* processing done on these pages, shared memory will be created
* at a later stage.
*/
tmp_hp = malloc(total_pages * sizeof(struct hugepage));
tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file));
if (tmp_hp == NULL)
goto fail;
memset(tmp_hp, 0, total_pages * sizeof(struct hugepage));
memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file));
hp_offset = 0; /* where we start the current page size entries */
@ -772,7 +1019,7 @@ rte_eal_hugepage_init(void)
*/
hpi = &internal_config.hugepage_info[i];
if (hpi->num_pages == 0)
if (hpi->num_pages[0] == 0)
continue;
/* map all hugepages available */
@ -783,7 +1030,7 @@ rte_eal_hugepage_init(void)
}
/* find physical addresses and sockets for each hugepage */
if (find_physaddr(&tmp_hp[hp_offset], hpi) < 0){
if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
(unsigned)(hpi->hugepage_sz / 0x100000));
goto fail;
@ -798,6 +1045,18 @@ rte_eal_hugepage_init(void)
if (sort_by_physaddr(&tmp_hp[hp_offset], hpi) < 0)
goto fail;
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
/* remap all hugepages into single file segments */
new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi);
if (new_pages_count[i] < 0){
RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
(unsigned)(hpi->hugepage_sz / 0x100000));
goto fail;
}
/* we have processed a num of hugepages of this size, so inc offset */
hp_offset += new_pages_count[i];
#else
/* remap all hugepages */
if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){
RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
@ -811,22 +1070,38 @@ rte_eal_hugepage_init(void)
/* we have processed a num of hugepages of this size, so inc offset */
hp_offset += hpi->num_pages[0];
#endif
}
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
nr_hugefiles = 0;
for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
nr_hugefiles += new_pages_count[i];
}
#else
nr_hugefiles = nr_hugepages;
#endif
/* clean out the numbers of pages */
for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++)
for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
internal_config.hugepage_info[i].num_pages[j] = 0;
/* get hugepages for each socket */
for (i = 0; i < total_pages; i++) {
for (i = 0; i < nr_hugefiles; i++) {
int socket = tmp_hp[i].socket_id;
/* find a hugepage info with right size and increment num_pages */
for (j = 0; j < (int) internal_config.num_hugepage_sizes; j++) {
if (tmp_hp[i].size ==
internal_config.hugepage_info[j].hugepage_sz) {
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
internal_config.hugepage_info[j].num_pages[socket] +=
tmp_hp[i].repeated;
#else
internal_config.hugepage_info[j].num_pages[socket]++;
#endif
}
}
}
@ -836,12 +1111,12 @@ rte_eal_hugepage_init(void)
memory[i] = internal_config.socket_mem[i];
/* calculate final number of pages */
nrpages = calc_num_pages_per_socket(memory,
nr_hugepages = calc_num_pages_per_socket(memory,
internal_config.hugepage_info, used_hp,
internal_config.num_hugepage_sizes);
/* error if not enough memory available */
if (nrpages < 0)
if (nr_hugepages < 0)
goto fail;
/* reporting in! */
@ -861,12 +1136,13 @@ rte_eal_hugepage_init(void)
/* create shared memory */
hugepage = create_shared_memory(eal_hugepage_info_path(),
nrpages * sizeof(struct hugepage));
nr_hugefiles * sizeof(struct hugepage_file));
if (hugepage == NULL) {
RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
goto fail;
}
memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file));
/*
* unmap pages that we won't need (looks at used_hp).
@ -883,8 +1159,8 @@ rte_eal_hugepage_init(void)
* this procedure only copies those hugepages that have final_va
* not NULL. has overflow protection.
*/
if (copy_hugepages_to_shared_mem(hugepage, nrpages,
tmp_hp, total_pages) < 0) {
if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles,
tmp_hp, nr_hugefiles) < 0) {
RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n");
goto fail;
}
@ -893,9 +1169,16 @@ rte_eal_hugepage_init(void)
free(tmp_hp);
tmp_hp = NULL;
memset(mcfg->memseg, 0, sizeof(mcfg->memseg));
j = -1;
for (i = 0; i < nrpages; i++) {
/* find earliest free memseg - this is needed because in case of IVSHMEM,
* segments might have already been initialized */
for (j = 0; j < RTE_MAX_MEMSEG; j++)
if (mcfg->memseg[j].addr == NULL) {
/* move to previous segment and exit loop */
j--;
break;
}
for (i = 0; i < nr_hugefiles; i++) {
new_memseg = 0;
/* if this is a new section, create a new memseg */
@ -919,7 +1202,11 @@ rte_eal_hugepage_init(void)
mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
mcfg->memseg[j].addr = hugepage[i].final_va;
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated;
#else
mcfg->memseg[j].len = hugepage[i].size;
#endif
mcfg->memseg[j].socket_id = hugepage[i].socket_id;
mcfg->memseg[j].hugepage_sz = hugepage[i].size;
}
@ -930,21 +1217,19 @@ rte_eal_hugepage_init(void)
hugepage[i].memseg_id = j;
}
if (i < nrpages) {
if (i < nr_hugefiles) {
RTE_LOG(ERR, EAL, "Can only reserve %d pages "
"from %d requested\n"
"Current %s=%d is not enough\n"
"Please either increase it or request less amount "
"of memory.\n",
i, nrpages, RTE_STR(CONFIG_RTE_MAX_MEMSEG),
i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG),
RTE_MAX_MEMSEG);
return (-ENOMEM);
}
return 0;
fail:
if (tmp_hp)
free(tmp_hp);
@ -973,7 +1258,7 @@ static int
rte_eal_hugepage_attach(void)
{
const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
const struct hugepage *hp = NULL;
const struct hugepage_file *hp = NULL;
unsigned num_hp = 0;
unsigned i, s = 0; /* s used to track the segment number */
off_t size;
@ -1008,6 +1293,15 @@ rte_eal_hugepage_attach(void)
if (mcfg->memseg[s].len == 0)
break;
#ifdef RTE_LIBRTE_IVSHMEM
/*
* if segment has ioremap address set, it's an IVSHMEM segment and
* doesn't need mapping as it was already mapped earlier
*/
if (mcfg->memseg[s].ioremap_addr != 0)
continue;
#endif
/*
* fdzero is mmapped to get a contiguous block of virtual
* addresses of the appropriate memseg size.
@ -1018,9 +1312,9 @@ rte_eal_hugepage_attach(void)
if (base_addr == MAP_FAILED ||
base_addr != mcfg->memseg[s].addr) {
RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
"in /dev/zero to requested address [%p]\n",
"in /dev/zero to requested address [%p]: '%s'\n",
(unsigned long long)mcfg->memseg[s].len,
mcfg->memseg[s].addr);
mcfg->memseg[s].addr, strerror(errno));
if (aslr_enabled() > 0) {
RTE_LOG(ERR, EAL, "It is recommended to "
"disable ASLR in the kernel "
@ -1038,14 +1332,24 @@ rte_eal_hugepage_attach(void)
goto error;
}
num_hp = size / sizeof(struct hugepage);
RTE_LOG(DEBUG, EAL, "Analysing %u hugepages\n", num_hp);
num_hp = size / sizeof(struct hugepage_file);
RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp);
s = 0;
while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){
void *addr, *base_addr;
uintptr_t offset = 0;
size_t mapping_size;
#ifdef RTE_LIBRTE_IVSHMEM
/*
* if segment has ioremap address set, it's an IVSHMEM segment and
* doesn't need mapping as it was already mapped earlier
*/
if (mcfg->memseg[s].ioremap_addr != 0) {
s++;
continue;
}
#endif
/*
* free previously mapped memory so we can map the
* hugepages into the space
@ -1064,16 +1368,22 @@ rte_eal_hugepage_attach(void)
hp[i].filepath);
goto error;
}
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
mapping_size = hp[i].size * hp[i].repeated;
#else
mapping_size = hp[i].size;
#endif
addr = mmap(RTE_PTR_ADD(base_addr, offset),
hp[i].size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, fd, 0);
mapping_size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
close(fd); /* close file both on success and on failure */
if (addr == MAP_FAILED) {
if (addr == MAP_FAILED ||
addr != RTE_PTR_ADD(base_addr, offset)) {
RTE_LOG(ERR, EAL, "Could not mmap %s\n",
hp[i].filepath);
goto error;
}
offset+=hp[i].size;
offset+=mapping_size;
}
}
RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s,

View File

@ -46,6 +46,8 @@
#include <stdint.h>
#include <limits.h>
#include <unistd.h>
#include <stdlib.h>
#include <rte_string_fns.h>
#include "eal_internal_cfg.h"
@ -84,6 +86,7 @@ eal_hugepage_info_path(void)
/** String format for hugepage map files. */
#define HUGEFILE_FMT "%s/%smap_%d"
#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d"
static inline const char *
eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
@ -94,6 +97,17 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id
return buffer;
}
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
static inline const char *
eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
{
rte_snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir,
internal_config.hugefile_prefix, f_id);
buffer[buflen - 1] = '\0';
return buffer;
}
#endif
/** define the default filename prefix for the %s values above */
#define HUGEFILE_PREFIX_DEFAULT "rte"

View File

@ -35,6 +35,8 @@
#define RTE_LINUXAPP_HUGEPAGES_H_
#include <stddef.h>
#include <stdint.h>
#include <limits.h>
#define MAX_HUGEPAGE_PATH PATH_MAX
@ -42,7 +44,7 @@
* Structure used to store informations about hugepages that we mapped
* through the files in hugetlbfs.
*/
struct hugepage {
struct hugepage_file {
void *orig_va; /**< virtual addr of first mmap() */
void *final_va; /**< virtual addr of 2nd mmap() */
uint64_t physaddr; /**< physical addr */
@ -50,6 +52,9 @@ struct hugepage {
int socket_id; /**< NUMA socket ID */
int file_id; /**< the '%d' in HUGEFILE_FMT */
int memseg_id; /**< the memory segment to which page belongs */
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
int repeated; /**< number of times the page size is repeated */
#endif
char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
};

View File

@ -0,0 +1,48 @@
# BSD LICENSE
#
# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include $(RTE_SDK)/mk/rte.vars.mk
# library name
LIB = librte_ivshmem.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
# all source are stored in SRCS-y
SRCS-$(CONFIG_RTE_LIBRTE_IVSHMEM) := rte_ivshmem.c
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_IVSHMEM)-include := rte_ivshmem.h
# this lib needs eal
DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_mempool
include $(RTE_SDK)/mk/rte.lib.mk

View File

@ -0,0 +1,884 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <fcntl.h>
#include <limits.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#include <rte_eal_memconfig.h>
#include <rte_memory.h>
#include <rte_ivshmem.h>
#include <rte_string_fns.h>
#include <rte_common.h>
#include <rte_log.h>
#include <rte_debug.h>
#include <rte_spinlock.h>
#include <rte_common.h>
#include <rte_malloc.h>
#include "rte_ivshmem.h"
#define IVSHMEM_CONFIG_FILE_FMT "/var/run/.dpdk_ivshmem_metadata_%s"
#define IVSHMEM_QEMU_CMD_LINE_HEADER_FMT "-device ivshmem,size=%" PRIu64 "M,shm=fd%s"
#define IVSHMEM_QEMU_CMD_FD_FMT ":%s:0x%" PRIx64 ":0x%" PRIx64
#define IVSHMEM_QEMU_CMDLINE_BUFSIZE 1024
#define IVSHMEM_MAX_PAGES (1 << 12)
#define adjacent(x,y) (((x).phys_addr+(x).len)==(y).phys_addr)
#define METADATA_SIZE_ALIGNED \
(RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
#define GET_PAGEMAP_ADDR(in,addr,dlm,err) \
{ \
char *end; \
errno = 0; \
addr = strtoull((in), &end, 16); \
if (errno != 0 || *end != (dlm)) { \
RTE_LOG(ERR, EAL, err); \
goto error; \
} \
(in) = end + 1; \
}
static int pagesz;
struct memseg_cache_entry {
char filepath[PATH_MAX];
uint64_t offset;
uint64_t len;
};
struct ivshmem_config {
struct rte_ivshmem_metadata * metadata;
struct memseg_cache_entry memseg_cache[IVSHMEM_MAX_PAGES];
/**< account for multiple files per segment case */
struct flock lock;
rte_spinlock_t sl;
};
static struct ivshmem_config
ivshmem_global_config[RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES];
static rte_spinlock_t global_cfg_sl;
static struct ivshmem_config *
get_config_by_name(const char * name)
{
struct rte_ivshmem_metadata * config;
unsigned i;
for (i = 0; i < RTE_DIM(ivshmem_global_config); i++) {
config = ivshmem_global_config[i].metadata;
if (config == NULL)
return NULL;
if (strncmp(name, config->name, IVSHMEM_NAME_LEN) == 0)
return &ivshmem_global_config[i];
}
return NULL;
}
static int
overlap(const struct rte_memzone * s1, const struct rte_memzone * s2)
{
uint64_t start1, end1, start2, end2;
start1 = s1->addr_64;
end1 = s1->addr_64 + s1->len;
start2 = s2->addr_64;
end2 = s2->addr_64 + s2->len;
if (start1 >= start2 && start1 < end2)
return 1;
if (start2 >= start1 && start2 < end1)
return 1;
return 0;
}
static struct rte_memzone *
get_memzone_by_addr(const void * addr)
{
struct rte_memzone * tmp, * mz;
struct rte_mem_config * mcfg;
int i;
mcfg = rte_eal_get_configuration()->mem_config;
mz = NULL;
/* find memzone for the ring */
for (i = 0; i < RTE_MAX_MEMZONE; i++) {
tmp = &mcfg->memzone[i];
if (tmp->addr_64 == (uint64_t) addr) {
mz = tmp;
break;
}
}
return mz;
}
static int
entry_compare(const void * a, const void * b)
{
const struct rte_ivshmem_metadata_entry * e1 =
(const struct rte_ivshmem_metadata_entry*) a;
const struct rte_ivshmem_metadata_entry * e2 =
(const struct rte_ivshmem_metadata_entry*) b;
/* move unallocated zones to the end */
if (e1->mz.addr == NULL && e2->mz.addr == NULL)
return 0;
if (e1->mz.addr == 0)
return 1;
if (e2->mz.addr == 0)
return -1;
return e1->mz.phys_addr > e2->mz.phys_addr;
}
/* fills hugepage cache entry for a given start virt_addr */
static int
get_hugefile_by_virt_addr(uint64_t virt_addr, struct memseg_cache_entry * e)
{
uint64_t start_addr, end_addr;
char *start,*path_end;
char buf[PATH_MAX*2];
FILE *f;
start = NULL;
path_end = NULL;
start_addr = 0;
memset(e->filepath, 0, sizeof(e->filepath));
/* open /proc/self/maps */
f = fopen("/proc/self/maps", "r");
if (f == NULL) {
RTE_LOG(ERR, EAL, "cannot open /proc/self/maps!\n");
return -1;
}
/* parse maps */
while (fgets(buf, sizeof(buf), f) != NULL) {
/* get endptr to end of start addr */
start = buf;
GET_PAGEMAP_ADDR(start,start_addr,'-',
"Cannot find start address in maps!\n");
/* if start address is bigger than our address, skip */
if (start_addr > virt_addr)
continue;
GET_PAGEMAP_ADDR(start,end_addr,' ',
"Cannot find end address in maps!\n");
/* if end address is less than our address, skip */
if (end_addr <= virt_addr)
continue;
/* find where the path starts */
start = strstr(start, "/");
if (start == NULL)
continue;
/* at this point, we know that this is our map.
* now let's find the file */
path_end = strstr(start, "\n");
break;
}
if (path_end == NULL) {
RTE_LOG(ERR, EAL, "Hugefile path not found!\n");
goto error;
}
/* calculate offset and copy the file path */
rte_snprintf(e->filepath, RTE_PTR_DIFF(path_end, start) + 1, "%s", start);
e->offset = virt_addr - start_addr;
fclose(f);
return 0;
error:
fclose(f);
return -1;
}
/*
* This is a complex function. What it does is the following:
* 1. Goes through metadata and gets list of hugepages involved
* 2. Sorts the hugepages by size (1G first)
* 3. Goes through metadata again and writes correct offsets
* 4. Goes through pages and finds out their filenames, offsets etc.
*/
static int
build_config(struct rte_ivshmem_metadata * metadata)
{
struct rte_ivshmem_metadata_entry * e_local;
struct memseg_cache_entry * ms_local;
struct rte_memseg pages[IVSHMEM_MAX_PAGES];
struct rte_ivshmem_metadata_entry *entry;
struct memseg_cache_entry * c_entry, * prev_entry;
struct ivshmem_config * config;
unsigned i, j, mz_iter, ms_iter;
uint64_t biggest_len;
int biggest_idx;
/* return error if we try to use an unknown config file */
config = get_config_by_name(metadata->name);
if (config == NULL) {
RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", metadata->name);
goto fail_e;
}
memset(pages, 0, sizeof(pages));
e_local = malloc(sizeof(config->metadata->entry));
if (e_local == NULL)
goto fail_e;
ms_local = malloc(sizeof(config->memseg_cache));
if (ms_local == NULL)
goto fail_ms;
/* make local copies before doing anything */
memcpy(e_local, config->metadata->entry, sizeof(config->metadata->entry));
memcpy(ms_local, config->memseg_cache, sizeof(config->memseg_cache));
qsort(e_local, RTE_DIM(config->metadata->entry), sizeof(struct rte_ivshmem_metadata_entry),
entry_compare);
/* first pass - collect all huge pages */
for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
entry = &e_local[mz_iter];
uint64_t start_addr = RTE_ALIGN_FLOOR(entry->mz.addr_64,
entry->mz.hugepage_sz);
uint64_t offset = entry->mz.addr_64 - start_addr;
uint64_t len = RTE_ALIGN_CEIL(entry->mz.len + offset,
entry->mz.hugepage_sz);
if (entry->mz.addr_64 == 0 || start_addr == 0 || len == 0)
continue;
int start_page;
/* find first unused page - mz are phys_addr sorted so we don't have to
* look out for holes */
for (i = 0; i < RTE_DIM(pages); i++) {
/* skip if we already have this page */
if (pages[i].addr_64 == start_addr) {
start_addr += entry->mz.hugepage_sz;
len -= entry->mz.hugepage_sz;
continue;
}
/* we found a new page */
else if (pages[i].addr_64 == 0) {
start_page = i;
break;
}
}
if (i == RTE_DIM(pages)) {
RTE_LOG(ERR, EAL, "Cannot find unused page!\n");
goto fail;
}
/* populate however many pages the memzone has */
for (i = start_page; i < RTE_DIM(pages) && len != 0; i++) {
pages[i].addr_64 = start_addr;
pages[i].len = entry->mz.hugepage_sz;
start_addr += entry->mz.hugepage_sz;
len -= entry->mz.hugepage_sz;
}
/* if there's still length left */
if (len != 0) {
RTE_LOG(ERR, EAL, "Not enough space for pages!\n");
goto fail;
}
}
/* second pass - sort pages by size */
for (i = 0; i < RTE_DIM(pages); i++) {
if (pages[i].addr == NULL)
break;
biggest_len = 0;
biggest_idx = -1;
/*
* browse all entries starting at 'i', and find the
* entry with the smallest addr
*/
for (j=i; j< RTE_DIM(pages); j++) {
if (pages[j].addr == NULL)
break;
if (biggest_len == 0 ||
pages[j].len > biggest_len) {
biggest_len = pages[j].len;
biggest_idx = j;
}
}
/* should not happen */
if (biggest_idx == -1) {
RTE_LOG(ERR, EAL, "Error sorting by size!\n");
goto fail;
}
if (i != (unsigned) biggest_idx) {
struct rte_memseg tmp;
memcpy(&tmp, &pages[biggest_idx], sizeof(struct rte_memseg));
/* we don't want to break contiguousness, so instead of just
* swapping segments, we move all the preceding segments to the
* right and then put the old segment @ biggest_idx in place of
* segment @ i */
for (j = biggest_idx - 1; j >= i; j--) {
memcpy(&pages[j+1], &pages[j], sizeof(struct rte_memseg));
memset(&pages[j], 0, sizeof(struct rte_memseg));
}
/* put old biggest segment to its new place */
memcpy(&pages[i], &tmp, sizeof(struct rte_memseg));
}
}
/* third pass - write correct offsets */
for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
uint64_t offset = 0;
entry = &e_local[mz_iter];
if (entry->mz.addr_64 == 0)
break;
/* find page for current memzone */
for (i = 0; i < RTE_DIM(pages); i++) {
/* we found our page */
if (entry->mz.addr_64 >= pages[i].addr_64 &&
entry->mz.addr_64 < pages[i].addr_64 + pages[i].len) {
entry->offset = (entry->mz.addr_64 - pages[i].addr_64) +
offset;
break;
}
offset += pages[i].len;
}
if (i == RTE_DIM(pages)) {
RTE_LOG(ERR, EAL, "Page not found!\n");
goto fail;
}
}
ms_iter = 0;
prev_entry = NULL;
/* fourth pass - create proper memseg cache */
for (i = 0; i < RTE_DIM(pages) &&
ms_iter <= RTE_DIM(config->memseg_cache); i++) {
if (pages[i].addr_64 == 0)
break;
if (ms_iter == RTE_DIM(pages)) {
RTE_LOG(ERR, EAL, "The universe has collapsed!\n");
goto fail;
}
c_entry = &ms_local[ms_iter];
c_entry->len = pages[i].len;
if (get_hugefile_by_virt_addr(pages[i].addr_64, c_entry) < 0)
goto fail;
/* if previous entry has the same filename and is contiguous,
* clear current entry and increase previous entry's length
*/
if (prev_entry != NULL &&
strncmp(c_entry->filepath, prev_entry->filepath,
sizeof(c_entry->filepath)) == 0 &&
prev_entry->offset + prev_entry->len == c_entry->offset) {
prev_entry->len += pages[i].len;
memset(c_entry, 0, sizeof(struct memseg_cache_entry));
}
else {
prev_entry = c_entry;
ms_iter++;
}
}
/* update current configuration with new valid data */
memcpy(config->metadata->entry, e_local, sizeof(config->metadata->entry));
memcpy(config->memseg_cache, ms_local, sizeof(config->memseg_cache));
free(ms_local);
free(e_local);
return 0;
fail:
free(ms_local);
fail_ms:
free(e_local);
fail_e:
return -1;
}
static int
add_memzone_to_metadata(const struct rte_memzone * mz,
struct ivshmem_config * config)
{
struct rte_ivshmem_metadata_entry * entry;
unsigned i;
rte_spinlock_lock(&config->sl);
/* find free slot in this config */
for (i = 0; i < RTE_DIM(config->metadata->entry); i++) {
entry = &config->metadata->entry[i];
if (&entry->mz.addr_64 != 0 && overlap(mz, &entry->mz)) {
RTE_LOG(ERR, EAL, "Overlapping memzones!\n");
goto fail;
}
/* if addr is zero, the memzone is probably free */
if (entry->mz.addr_64 == 0) {
RTE_LOG(DEBUG, EAL, "Adding memzone '%s' at %p to metadata %s\n",
mz->name, mz->addr, config->metadata->name);
memcpy(&entry->mz, mz, sizeof(struct rte_memzone));
/* run config file parser */
if (build_config(config->metadata) < 0)
goto fail;
break;
}
}
/* if we reached the maximum, that means we have no place in config */
if (i == RTE_DIM(config->metadata->entry)) {
RTE_LOG(ERR, EAL, "No space left in IVSHMEM metadata %s!\n",
config->metadata->name);
goto fail;
}
rte_spinlock_unlock(&config->sl);
return 0;
fail:
rte_spinlock_unlock(&config->sl);
return -1;
}
static int
add_ring_to_metadata(const struct rte_ring * r,
struct ivshmem_config * config)
{
struct rte_memzone * mz;
mz = get_memzone_by_addr(r);
if (!mz) {
RTE_LOG(ERR, EAL, "Cannot find memzone for ring!\n");
return -1;
}
return add_memzone_to_metadata(mz, config);
}
static int
add_mempool_to_metadata(const struct rte_mempool * mp,
struct ivshmem_config * config)
{
struct rte_memzone * mz;
int ret;
mz = get_memzone_by_addr(mp);
ret = 0;
if (!mz) {
RTE_LOG(ERR, EAL, "Cannot find memzone for mempool!\n");
return -1;
}
/* mempool consists of memzone and ring */
ret = add_memzone_to_metadata(mz, config);
if (ret < 0)
return -1;
return add_ring_to_metadata(mp->ring, config);
}
int
rte_ivshmem_metadata_add_ring(const struct rte_ring * r, const char * name)
{
struct ivshmem_config * config;
if (name == NULL || r == NULL)
return -1;
config = get_config_by_name(name);
if (config == NULL) {
RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
return -1;
}
return add_ring_to_metadata(r, config);
}
int
rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, const char * name)
{
struct ivshmem_config * config;
if (name == NULL || mz == NULL)
return -1;
config = get_config_by_name(name);
if (config == NULL) {
RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
return -1;
}
return add_memzone_to_metadata(mz, config);
}
int
rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, const char * name)
{
struct ivshmem_config * config;
if (name == NULL || mp == NULL)
return -1;
config = get_config_by_name(name);
if (config == NULL) {
RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
return -1;
}
return add_mempool_to_metadata(mp, config);
}
static inline void
ivshmem_config_path(char *buffer, size_t bufflen, const char *name)
{
rte_snprintf(buffer, bufflen, IVSHMEM_CONFIG_FILE_FMT, name);
}
static inline
void *ivshmem_metadata_create(const char *name, size_t size,
struct flock *lock)
{
int retval, fd;
void *metadata_addr;
char pathname[PATH_MAX];
ivshmem_config_path(pathname, sizeof(pathname), name);
fd = open(pathname, O_RDWR | O_CREAT, 0660);
if (fd < 0) {
RTE_LOG(ERR, EAL, "Cannot open '%s'\n", pathname);
return NULL;
}
size = METADATA_SIZE_ALIGNED;
retval = fcntl(fd, F_SETLK, lock);
if (retval < 0){
close(fd);
RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another "
"process using it?\n", pathname);
return NULL;
}
retval = ftruncate(fd, size);
if (retval < 0){
close(fd);
RTE_LOG(ERR, EAL, "Cannot resize '%s'\n", pathname);
return NULL;
}
metadata_addr = mmap(NULL, size,
PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (metadata_addr == MAP_FAILED){
RTE_LOG(ERR, EAL, "Cannot mmap memory for '%s'\n", pathname);
/* we don't care if we can't unlock */
fcntl(fd, F_UNLCK, lock);
close(fd);
return NULL;
}
return metadata_addr;
}
int rte_ivshmem_metadata_create(const char *name)
{
struct ivshmem_config * ivshmem_config;
unsigned index;
if (pagesz == 0)
pagesz = getpagesize();
if (name == NULL)
return -1;
rte_spinlock_lock(&global_cfg_sl);
for (index = 0; index < RTE_DIM(ivshmem_global_config); index++) {
if (ivshmem_global_config[index].metadata == NULL) {
ivshmem_config = &ivshmem_global_config[index];
break;
}
}
if (index == RTE_DIM(ivshmem_global_config)) {
RTE_LOG(ERR, EAL, "Cannot create more ivshmem config files. "
"Maximum has been reached\n");
rte_spinlock_unlock(&global_cfg_sl);
return -1;
}
ivshmem_config->lock.l_type = F_WRLCK;
ivshmem_config->lock.l_whence = SEEK_SET;
ivshmem_config->lock.l_start = 0;
ivshmem_config->lock.l_len = METADATA_SIZE_ALIGNED;
ivshmem_global_config[index].metadata = ((struct rte_ivshmem_metadata *)
ivshmem_metadata_create(
name,
sizeof(struct rte_ivshmem_metadata),
&ivshmem_config->lock));
if (ivshmem_global_config[index].metadata == NULL) {
rte_spinlock_unlock(&global_cfg_sl);
return -1;
}
/* Metadata setup */
memset(ivshmem_config->metadata, 0, sizeof(struct rte_ivshmem_metadata));
ivshmem_config->metadata->magic_number = IVSHMEM_MAGIC;
rte_snprintf(ivshmem_config->metadata->name,
sizeof(ivshmem_config->metadata->name), "%s", name);
rte_spinlock_unlock(&global_cfg_sl);
return 0;
}
int
rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name)
{
const struct memseg_cache_entry * ms_cache, *entry;
struct ivshmem_config * config;
char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr;
char cfg_file_path[PATH_MAX];
unsigned remaining_len, tmplen, iter;
uint64_t shared_mem_size, zero_size, total_size;
if (buffer == NULL || name == NULL)
return -1;
config = get_config_by_name(name);
if (config == NULL) {
RTE_LOG(ERR, EAL, "Config %s not found!\n", name);
return -1;
}
rte_spinlock_lock(&config->sl);
/* prepare metadata file path */
rte_snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT,
config->metadata->name);
ms_cache = config->memseg_cache;
cmdline_ptr = cmdline;
remaining_len = sizeof(cmdline);
shared_mem_size = 0;
iter = 0;
while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) {
entry = &ms_cache[iter];
/* Offset and sizes within the current pathname */
tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
entry->filepath, entry->offset, entry->len);
shared_mem_size += entry->len;
cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
remaining_len -= tmplen;
if (remaining_len == 0) {
RTE_LOG(ERR, EAL, "Command line too long!\n");
rte_spinlock_unlock(&config->sl);
return -1;
}
iter++;
}
total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED);
zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED;
/* add /dev/zero to command-line to fill the space */
tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
"/dev/zero",
0x0,
zero_size);
cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
remaining_len -= tmplen;
if (remaining_len == 0) {
RTE_LOG(ERR, EAL, "Command line too long!\n");
rte_spinlock_unlock(&config->sl);
return -1;
}
/* add metadata file to the end of command-line */
tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
cfg_file_path,
0x0,
METADATA_SIZE_ALIGNED);
cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
remaining_len -= tmplen;
if (remaining_len == 0) {
RTE_LOG(ERR, EAL, "Command line too long!\n");
rte_spinlock_unlock(&config->sl);
return -1;
}
/* if current length of the command line is bigger than the buffer supplied
* by the user, or if command-line is bigger than what IVSHMEM accepts */
if ((sizeof(cmdline) - remaining_len) > size) {
RTE_LOG(ERR, EAL, "Buffer is too short!\n");
rte_spinlock_unlock(&config->sl);
return -1;
}
/* complete the command-line */
rte_snprintf(buffer, size,
IVSHMEM_QEMU_CMD_LINE_HEADER_FMT,
total_size >> 20,
cmdline);
rte_spinlock_unlock(&config->sl);
return 0;
}
void
rte_ivshmem_metadata_dump(const char *name)
{
unsigned i = 0;
struct ivshmem_config * config;
struct rte_ivshmem_metadata_entry *entry;
#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
uint64_t addr;
uint64_t end, hugepage_sz;
struct memseg_cache_entry e;
#endif
if (name == NULL)
return;
/* return error if we try to use an unknown config file */
config = get_config_by_name(name);
if (config == NULL) {
RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
return;
}
rte_spinlock_lock(&config->sl);
entry = &config->metadata->entry[0];
while (entry->mz.addr != NULL && i < RTE_DIM(config->metadata->entry)) {
printf("Entry %u: name:<%-20s>, phys:0x%-15lx, len:0x%-15lx, "
"virt:%-15p, off:0x%-15lx\n",
i,
entry->mz.name,
entry->mz.phys_addr,
entry->mz.len,
entry->mz.addr,
entry->offset);
i++;
#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
printf("\tHugepage files:\n");
hugepage_sz = entry->mz.hugepage_sz;
addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, hugepage_sz);
end = addr + RTE_ALIGN_CEIL(entry->mz.len + (entry->mz.addr_64 - addr),
hugepage_sz);
for (; addr < end; addr += hugepage_sz) {
memset(&e, 0, sizeof(e));
get_hugefile_by_virt_addr(addr, &e);
printf("\t0x%"PRIx64 "-0x%" PRIx64 " offset: 0x%" PRIx64 " %s\n",
addr, addr + hugepage_sz, e.offset, e.filepath);
}
#endif
entry++;
}
rte_spinlock_unlock(&config->sl);
}

View File

@ -0,0 +1,163 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef RTE_IVSHMEM_H_
#define RTE_IVSHMEM_H_
#include <rte_memzone.h>
#include <rte_mempool.h>
/**
* @file
*
* The RTE IVSHMEM interface provides functions to create metadata files
* describing memory segments to be shared via QEMU IVSHMEM.
*/
#ifdef __cplusplus
extern "C" {
#endif
#define IVSHMEM_MAGIC 0x0BADC0DE
#define IVSHMEM_NAME_LEN 32
/**
* Structure that holds IVSHMEM shared metadata entry.
*/
struct rte_ivshmem_metadata_entry {
struct rte_memzone mz; /**< shared memzone */
uint64_t offset; /**< offset of memzone within IVSHMEM device */
};
/**
* Structure that holds IVSHMEM metadata.
*/
struct rte_ivshmem_metadata {
int magic_number; /**< magic number */
char name[IVSHMEM_NAME_LEN]; /**< name of the metadata file */
struct rte_ivshmem_metadata_entry entry[RTE_LIBRTE_IVSHMEM_MAX_ENTRIES];
/**< metadata entries */
};
/**
* Creates metadata file with a given name
*
* @param name
* Name of metadata file to be created
*
* @return
* - On success, zero
* - On failure, a negative value
*/
int rte_ivshmem_metadata_create(const char * name);
/**
* Adds memzone to a specific metadata file
*
* @param mz
* Memzone to be added
* @param md_name
* Name of metadata file for the memzone to be added to
*
* @return
* - On success, zero
* - On failure, a negative value
*/
int rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz,
const char * md_name);
/**
* Adds a ring descriptor to a specific metadata file
*
* @param r
* Ring descriptor to be added
* @param md_name
* Name of metadata file for the ring to be added to
*
* @return
* - On success, zero
* - On failure, a negative value
*/
int rte_ivshmem_metadata_add_ring(const struct rte_ring * r,
const char * md_name);
/**
* Adds a mempool to a specific metadata file
*
* @param mp
* Mempool to be added
* @param md_name
* Name of metadata file for the mempool to be added to
*
* @return
* - On success, zero
* - On failure, a negative value
*/
int rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp,
const char * md_name);
/**
* Generates the QEMU command-line for IVSHMEM device for a given metadata file.
* This function is to be called after all the objects were added.
*
* @param buffer
* Buffer to be filled with the command line arguments.
* @param size
* Size of the buffer.
* @param name
* Name of metadata file to generate QEMU command-line parameters for
*
* @return
* - On success, zero
* - On failure, a negative value
*/
int rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size,
const char *name);
/**
* Dump all metadata entries from a given metadata file to the console.
*
* @name
* Name of the metadata file to be dumped to console.
*/
void rte_ivshmem_metadata_dump(const char *name);
#ifdef __cplusplus
}
#endif
#endif /* RTE_IVSHMEM_H_ */

View File

@ -64,6 +64,12 @@ LDLIBS += -lrte_kni
endif
endif
ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
LDLIBS += -lrte_ivshmem
endif
endif
ifeq ($(CONFIG_RTE_LIBRTE_E1000_PMD),y)
LDLIBS += -lrte_pmd_e1000
endif