xen: core library changes

Core support for using the Intel DPDK with Xen Dom0 - including EAL
changes and mempool changes. These changes encompass how memory mapping
is done, including support for initializing a memory pool inside an
already-allocated block of memory.
KNI sample app updated to use KNI close function when used with Xen.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
This commit is contained in:
Bruce Richardson 2014-02-12 15:32:25 +00:00 committed by David Marchand
parent 40b966a211
commit 148f963fb5
39 changed files with 2832 additions and 147 deletions

View File

@ -58,6 +58,12 @@ SRCS-$(CONFIG_RTE_TEST_PMD) += csumonly.c
ifeq ($(CONFIG_RTE_LIBRTE_IEEE1588),y)
SRCS-$(CONFIG_RTE_TEST_PMD) += ieee1588fwd.c
endif
SRCS-$(CONFIG_RTE_TEST_PMD) += mempool_anon.c
ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
CFLAGS_mempool_anon.o := -D_GNU_SOURCE
endif
CFLAGS_cmdline.o := -D_GNU_SOURCE
# this application needs libraries first
DEPDIRS-$(CONFIG_RTE_TEST_PMD) += lib

View File

@ -974,10 +974,12 @@ pkt_fwd_config_display(struct fwd_config *cfg)
streamid_t sm_id;
printf("%s packet forwarding - ports=%d - cores=%d - streams=%d - "
"NUMA support %s\n",
cfg->fwd_eng->fwd_mode_name,
cfg->nb_fwd_ports, cfg->nb_fwd_lcores, cfg->nb_fwd_streams,
numa_support == 1 ? "enabled" : "disabled");
"NUMA support %s, MP over anonymous pages %s\n",
cfg->fwd_eng->fwd_mode_name,
cfg->nb_fwd_ports, cfg->nb_fwd_lcores, cfg->nb_fwd_streams,
numa_support == 1 ? "enabled" : "disabled",
mp_anon != 0 ? "enabled" : "disabled");
for (lc_id = 0; lc_id < cfg->nb_fwd_lcores; lc_id++) {
printf("Logical Core %u (socket %u) forwards packets on "
"%d streams:",

201
app/test-pmd/mempool_anon.c Normal file
View File

@ -0,0 +1,201 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include "mempool_osdep.h"
#include <rte_errno.h>
#ifdef RTE_EXEC_ENV_LINUXAPP
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#define PAGEMAP_FNAME "/proc/self/pagemap"
/*
* the pfn (page frame number) are bits 0-54 (see pagemap.txt in linux
* Documentation).
*/
#define PAGEMAP_PFN_BITS 54
#define PAGEMAP_PFN_MASK RTE_LEN2MASK(PAGEMAP_PFN_BITS, phys_addr_t)
static int
get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num, uint32_t pg_sz)
{
int32_t fd, rc;
uint32_t i, nb;
off_t ofs;
ofs = (uintptr_t)va / pg_sz * sizeof(*pa);
nb = pg_num * sizeof(*pa);
if ((fd = open(PAGEMAP_FNAME, O_RDONLY)) < 0)
return (ENOENT);
if ((rc = pread(fd, pa, nb, ofs)) < 0 || (rc -= nb) != 0) {
RTE_LOG(ERR, USER1, "failed read of %u bytes from \'%s\' "
"at offset %zu, error code: %d\n",
nb, PAGEMAP_FNAME, (size_t)ofs, errno);
rc = ENOENT;
}
close(fd);
for (i = 0; i != pg_num; i++)
pa[i] = (pa[i] & PAGEMAP_PFN_MASK) * pg_sz;
return (rc);
}
struct rte_mempool *
mempool_anon_create(const char *name, unsigned elt_num, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags)
{
struct rte_mempool *mp;
phys_addr_t *pa;
char *va, *uv;
uint32_t n, pg_num, pg_shift, pg_sz, total_size;
size_t sz;
ssize_t usz;
int32_t rc;
rc = ENOMEM;
mp = NULL;
pg_sz = getpagesize();
if (rte_is_power_of_2(pg_sz) == 0) {
rte_errno = EINVAL;
return (mp);
}
pg_shift = rte_bsf32(pg_sz);
total_size = rte_mempool_calc_obj_size(elt_size, flags, NULL);
/* calc max memory size and max number of pages needed. */
sz = rte_mempool_xmem_size(elt_num, total_size, pg_shift);
pg_num = sz >> pg_shift;
/* get chunk of virtually continuos memory.*/
if ((va = mmap(NULL, sz, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS | MAP_LOCKED,
-1, 0)) == MAP_FAILED) {
RTE_LOG(ERR, USER1, "%s(%s) failed mmap of %zu bytes, "
"error code: %d\n",
__func__, name, sz, errno);
rte_errno = rc;
return (mp);
}
/* extract physical mappings of the allocated memory. */
if ((pa = calloc(pg_num, sizeof (*pa))) != NULL &&
(rc = get_phys_map(va, pa, pg_num, pg_sz)) == 0) {
/*
* Check that allocated size is big enough to hold elt_num
* objects and a calcualte how many bytes are actually required.
*/
if ((usz = rte_mempool_xmem_usage(va, elt_num, total_size, pa,
pg_num, pg_shift)) < 0) {
n = -usz;
rc = ENOENT;
RTE_LOG(ERR, USER1, "%s(%s) only %u objects from %u "
"requested can be created over "
"mmaped region %p of %zu bytes\n",
__func__, name, n, elt_num, va, sz);
} else {
/* unmap unused pages if any */
if ((size_t)usz < sz) {
uv = va + usz;
usz = sz - usz;
RTE_LOG(INFO, USER1,
"%s(%s): unmap unused %zu of %zu "
"mmaped bytes @%p\n",
__func__, name, (size_t)usz, sz, uv);
munmap(uv, usz);
sz -= usz;
pg_num = sz >> pg_shift;
}
if ((mp = rte_mempool_xmem_create(name, elt_num,
elt_size, cache_size, private_data_size,
mp_init, mp_init_arg,
obj_init, obj_init_arg,
socket_id, flags, va, pa, pg_num,
pg_shift)) != NULL)
RTE_VERIFY(elt_num == mp->size);
}
}
if (mp == NULL) {
munmap(va, sz);
rte_errno = rc;
}
free(pa);
return (mp);
}
#else /* RTE_EXEC_ENV_LINUXAPP */
struct rte_mempool *
mempool_anon_create(__rte_unused const char *name,
__rte_unused unsigned elt_num, __rte_unused unsigned elt_size,
__rte_unused unsigned cache_size,
__rte_unused unsigned private_data_size,
__rte_unused rte_mempool_ctor_t *mp_init,
__rte_unused void *mp_init_arg,
__rte_unused rte_mempool_obj_ctor_t *obj_init,
__rte_unused void *obj_init_arg,
__rte_unused int socket_id, __rte_unused unsigned flags)
{
rte_errno = ENOTSUP;
return (NULL);
}
#endif /* RTE_EXEC_ENV_LINUXAPP */

View File

@ -0,0 +1,54 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _MEMPOOL_OSDEP_H_
#define _MEMPOOL_OSDEP_H_
#include <rte_mempool.h>
/**
* @file
* mempool OS specific header.
*/
/*
* Create mempool over objects from mmap(..., MAP_ANONYMOUS, ...).
*/
struct rte_mempool *
mempool_anon_create(const char *name, unsigned n, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags);
#endif /*_RTE_MEMPOOL_OSDEP_H_ */

View File

@ -478,6 +478,7 @@ launch_args_parse(int argc, char** argv)
{ "coremask", 1, 0, 0 },
{ "portmask", 1, 0, 0 },
{ "numa", 0, 0, 0 },
{ "mp-anon", 0, 0, 0 },
{ "port-numa-config", 1, 0, 0 },
{ "ring-numa-config", 1, 0, 0 },
{ "socket-num", 1, 0, 0 },
@ -594,6 +595,9 @@ launch_args_parse(int argc, char** argv)
memset(rxring_numa,NUMA_NO_CONFIG,RTE_MAX_ETHPORTS);
memset(txring_numa,NUMA_NO_CONFIG,RTE_MAX_ETHPORTS);
}
if (!strcmp(lgopts[opt_idx].name, "mp-anon")) {
mp_anon = 1;
}
if (!strcmp(lgopts[opt_idx].name, "port-numa-config")) {
if (parse_portnuma_config(optarg))
rte_exit(EXIT_FAILURE,

View File

@ -72,8 +72,12 @@
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_string_fns.h>
#ifdef RTE_LIBRTE_PMD_XENVIRT
#include <rte_eth_xenvirt.h>
#endif
#include "testpmd.h"
#include "mempool_osdep.h"
uint16_t verbose_level = 0; /**< Silent by default. */
@ -95,6 +99,11 @@ uint8_t numa_support = 0; /**< No numa support by default */
*/
uint8_t socket_num = UMA_NO_CONFIG;
/*
* Use ANONYMOUS mapped memory (might be not physically continuous) for mbufs.
*/
uint8_t mp_anon = 0;
/*
* Record the Ethernet address of peer target ports to which packets are
* forwarded.
@ -407,8 +416,7 @@ testpmd_mbuf_pool_ctor(struct rte_mempool *mp,
return;
}
mbp_ctor_arg = (struct mbuf_pool_ctor_arg *) opaque_arg;
mbp_priv = (struct rte_pktmbuf_pool_private *)
((char *)mp + sizeof(struct rte_mempool));
mbp_priv = rte_mempool_get_priv(mp);
mbp_priv->mbuf_data_room_size = mbp_ctor_arg->seg_buf_size;
}
@ -429,15 +437,40 @@ mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
mb_ctor_arg.seg_buf_size = mbp_ctor_arg.seg_buf_size;
mb_size = mb_ctor_arg.seg_buf_offset + mb_ctor_arg.seg_buf_size;
mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name));
rte_mp = rte_mempool_create(pool_name, nb_mbuf, (unsigned) mb_size,
#ifdef RTE_LIBRTE_PMD_XENVIRT
rte_mp = rte_mempool_gntalloc_create(pool_name, nb_mbuf, mb_size,
(unsigned) mb_mempool_cache,
sizeof(struct rte_pktmbuf_pool_private),
testpmd_mbuf_pool_ctor, &mbp_ctor_arg,
testpmd_mbuf_ctor, &mb_ctor_arg,
socket_id, 0);
#else
if (mp_anon != 0)
rte_mp = mempool_anon_create(pool_name, nb_mbuf, mb_size,
(unsigned) mb_mempool_cache,
sizeof(struct rte_pktmbuf_pool_private),
testpmd_mbuf_pool_ctor, &mbp_ctor_arg,
testpmd_mbuf_ctor, &mb_ctor_arg,
socket_id, 0);
else
rte_mp = rte_mempool_create(pool_name, nb_mbuf, mb_size,
(unsigned) mb_mempool_cache,
sizeof(struct rte_pktmbuf_pool_private),
testpmd_mbuf_pool_ctor, &mbp_ctor_arg,
testpmd_mbuf_ctor, &mb_ctor_arg,
socket_id, 0);
#endif
if (rte_mp == NULL) {
rte_exit(EXIT_FAILURE, "Creation of mbuf pool for socket %u "
"failed\n", socket_id);
} else if (verbose_level > 0) {
rte_mempool_dump(rte_mp);
}
}
@ -1136,7 +1169,7 @@ all_ports_started(void)
return 1;
}
void
int
start_port(portid_t pid)
{
int diag, need_check_link_status = 0;
@ -1146,12 +1179,12 @@ start_port(portid_t pid)
if (test_done == 0) {
printf("Please stop forwarding first\n");
return;
return -1;
}
if (init_fwd_streams() < 0) {
printf("Fail from init_fwd_streams()\n");
return;
return -1;
}
if(dcb_config)
@ -1183,7 +1216,7 @@ start_port(portid_t pid)
printf("Fail to configure port %d\n", pi);
/* try to reconfigure port next time */
port->need_reconfig = 1;
return;
return -1;
}
}
if (port->need_reconfig_queues > 0) {
@ -1212,7 +1245,7 @@ start_port(portid_t pid)
printf("Fail to configure port %d tx queues\n", pi);
/* try to reconfigure queues next time */
port->need_reconfig_queues = 1;
return;
return -1;
}
/* setup rx queues */
for (qi = 0; qi < nb_rxq; qi++) {
@ -1225,7 +1258,7 @@ start_port(portid_t pid)
"No mempool allocation"
"on the socket %d\n",
rxring_numa[pi]);
return;
return -1;
}
diag = rte_eth_rx_queue_setup(pi, qi,
@ -1251,7 +1284,7 @@ start_port(portid_t pid)
printf("Fail to configure port %d rx queues\n", pi);
/* try to reconfigure queues next time */
port->need_reconfig_queues = 1;
return;
return -1;
}
}
/* start port */
@ -1280,6 +1313,7 @@ start_port(portid_t pid)
printf("Please stop the ports first\n");
printf("Done\n");
return 0;
}
void
@ -1732,7 +1766,8 @@ main(int argc, char** argv)
nb_rxq, nb_txq);
init_config();
start_port(RTE_PORT_ALL);
if (start_port(RTE_PORT_ALL) != 0)
rte_exit(EXIT_FAILURE, "Start ports failed\n");
/* set all ports to promiscuous mode by default */
for (port_id = 0; port_id < nb_ports; port_id++)

View File

@ -271,6 +271,7 @@ extern uint8_t interactive;
extern uint8_t numa_support; /**< set by "--numa" parameter */
extern uint16_t port_topology; /**< set by "--port-topology" parameter */
extern uint8_t no_flush_rx; /**<set by "--no-flush-rx" parameter */
extern uint8_t mp_anon; /**< set by "--mp-anon" parameter */
#ifdef RTE_NIC_BYPASS
extern uint32_t bypass_timeout; /**< Store the NIC bypass watchdog timeout */
@ -489,7 +490,7 @@ void start_packet_forwarding(int with_tx_first);
void stop_packet_forwarding(void);
void init_port_config(void);
int init_port_dcb_config(portid_t pid,struct dcb_config *dcb_conf);
void start_port(portid_t pid);
int start_port(portid_t pid);
void stop_port(portid_t pid);
void close_port(portid_t pid);
int all_ports_stopped(void);

View File

@ -53,7 +53,12 @@
static inline int
process_dup(const char *const argv[], int numargs, const char *env_value)
{
int num;
#ifdef RTE_LIBRTE_XEN_DOM0
char *argv_cpy[numargs + 2];
#else
char *argv_cpy[numargs + 1];
#endif
int i, fd, status;
char path[32];
@ -64,7 +69,14 @@ process_dup(const char *const argv[], int numargs, const char *env_value)
/* make a copy of the arguments to be passed to exec */
for (i = 0; i < numargs; i++)
argv_cpy[i] = strdup(argv[i]);
#ifdef RTE_LIBRTE_XEN_DOM0
argv_cpy[i] = strdup("--xen-dom0");
argv_cpy[i + 1] = NULL;
num = numargs + 1;
#else
argv_cpy[i] = NULL;
num = numargs;
#endif
/* close all open file descriptors, check /proc/self/fd to only
* call close on open fds. Exclude fds 0, 1 and 2*/
@ -74,7 +86,7 @@ process_dup(const char *const argv[], int numargs, const char *env_value)
close(fd);
}
printf("Running binary with argv[]:");
for (i = 0; i < numargs; i++)
for (i = 0; i < num; i++)
printf("'%s' ", argv_cpy[i]);
printf("\n");

View File

@ -50,10 +50,12 @@
#include <rte_memzone.h>
#include <rte_tailq.h>
#include <rte_eal.h>
#include <rte_timer.h>
#include <rte_cycles.h>
#include <rte_log.h>
#include <rte_string_fns.h>
#ifdef RTE_LIBRTE_TIMER
#include <rte_timer.h>
#endif
#include "test.h"
@ -82,7 +84,11 @@ do_recursive_call(void)
{ "test_whitelist_flag", no_action },
{ "test_invalid_b_flag", no_action },
{ "test_invalid_r_flag", no_action },
#ifdef RTE_LIBRTE_XEN_DOM0
{ "test_dom0_misc_flags", no_action },
#else
{ "test_misc_flags", no_action },
#endif
{ "test_memory_flags", no_action },
{ "test_file_prefix", no_action },
{ "test_no_huge_flag", no_action },
@ -110,7 +116,9 @@ main(int argc, char **argv)
if (ret < 0)
return -1;
#ifdef RTE_LIBRTE_TIMER
rte_timer_subsystem_init();
#endif
argv += ret;

View File

@ -644,6 +644,72 @@ test_no_huge_flag(void)
return 0;
}
#ifdef RTE_LIBRTE_XEN_DOM0
static int
test_dom0_misc_flags(void)
{
char prefix[PATH_MAX], tmp[PATH_MAX];
if (get_current_prefix(tmp, sizeof(tmp)) == NULL) {
printf("Error - unable to get current prefix!\n");
return -1;
}
rte_snprintf(prefix, sizeof(prefix), "--file-prefix=%s", tmp);
/* check that some general flags don't prevent things from working.
* All cases, apart from the first, app should run.
* No futher testing of output done.
*/
/* sanity check - failure with invalid option */
const char *argv0[] = {prgname, prefix, mp_flag, "-c", "1", "--invalid-opt"};
/* With --no-pci */
const char *argv1[] = {prgname, prefix, mp_flag, "-c", "1", "--no-pci"};
/* With -v */
const char *argv2[] = {prgname, prefix, mp_flag, "-c", "1", "-v"};
/* With valid --syslog */
const char *argv3[] = {prgname, prefix, mp_flag, "-c", "1",
"--syslog", "syslog"};
/* With empty --syslog (should fail) */
const char *argv4[] = {prgname, prefix, mp_flag, "-c", "1", "--syslog"};
/* With invalid --syslog */
const char *argv5[] = {prgname, prefix, mp_flag, "-c", "1", "--syslog", "error"};
/* With no-sh-conf */
const char *argv6[] = {prgname, "-c", "1", "-n", "2", "-m", "20",
"--no-shconf", "--file-prefix=noshconf" };
if (launch_proc(argv0) == 0) {
printf("Error - process ran ok with invalid flag\n");
return -1;
}
if (launch_proc(argv1) != 0) {
printf("Error - process did not run ok with --no-pci flag\n");
return -1;
}
if (launch_proc(argv2) != 0) {
printf("Error - process did not run ok with -v flag\n");
return -1;
}
if (launch_proc(argv3) != 0) {
printf("Error - process did not run ok with --syslog flag\n");
return -1;
}
if (launch_proc(argv4) == 0) {
printf("Error - process run ok with empty --syslog flag\n");
return -1;
}
if (launch_proc(argv5) == 0) {
printf("Error - process run ok with invalid --syslog flag\n");
return -1;
}
if (launch_proc(argv6) != 0) {
printf("Error - process did not run ok with --no-shconf flag\n");
return -1;
}
return 0;
}
#else
static int
test_misc_flags(void)
{
@ -736,6 +802,10 @@ test_misc_flags(void)
* effect on secondary processes) */
const char *argv10[] = {prgname, prefix, mp_flag, "-c", "1", "--huge-dir", "invalid"};
/* try running with base-virtaddr param */
const char *argv11[] = {prgname, "--file-prefix=virtaddr",
"-c", "1", "-n", "2", "--base-virtaddr=0x12345678"};
if (launch_proc(argv0) == 0) {
printf("Error - process ran ok with invalid flag\n");
@ -784,8 +854,13 @@ test_misc_flags(void)
printf("Error - secondary process did not run ok with invalid --huge-dir flag\n");
return -1;
}
if (launch_proc(argv11) != 0) {
printf("Error - process did not run ok with --base-virtaddr parameter\n");
return -1;
}
return 0;
}
#endif
static int
test_file_prefix(void)
@ -822,6 +897,9 @@ test_file_prefix(void)
printf("Error - unable to get current prefix!\n");
return -1;
}
#ifdef RTE_LIBRTE_XEN_DOM0
return 0;
#endif
/* check if files for current prefix are present */
if (process_hugefiles(prefix, HUGEPAGE_CHECK_EXISTS) != 1) {
@ -905,6 +983,7 @@ test_file_prefix(void)
static int
test_memory_flags(void)
{
const char* mem_size = NULL;
#ifdef RTE_EXEC_ENV_BSDAPP
/* BSD target doesn't support prefixes at this point */
const char * prefix = "";
@ -916,13 +995,20 @@ test_memory_flags(void)
}
rte_snprintf(prefix, sizeof(prefix), "--file-prefix=%s", tmp);
#endif
/* valid -m flag */
const char *argv0[] = {prgname, "-c", "10", "-n", "2",
"--file-prefix=" memtest, "-m", "2"};
#ifdef RTE_LIBRTE_XEN_DOM0
mem_size = "30";
#else
mem_size = "2";
#endif
/* valid -m flag and mp flag */
const char *argv1[] = {prgname, prefix, mp_flag, "-c", "10",
"-n", "2", "-m", "2"};
const char *argv0[] = {prgname, prefix, mp_flag, "-c", "10",
"-n", "2", "-m", mem_size};
/* valid -m flag */
const char *argv1[] = {prgname, "-c", "10", "-n", "2",
"--file-prefix=" memtest, "-m", mem_size};
/* invalid (zero) --socket-mem flag */
const char *argv2[] = {prgname, "-c", "10", "-n", "2",
@ -1016,10 +1102,12 @@ test_memory_flags(void)
#endif
if (launch_proc(argv1) != 0) {
printf("Error - secondary process failed with valid -m flag !\n");
printf("Error - process failed with valid -m flag!\n");
return -1;
}
#ifdef RTE_LIBRTE_XEN_DOM0
return 0;
#endif
if (launch_proc(argv2) == 0) {
printf("Error - process run ok with invalid (zero) --socket-mem!\n");
return -1;
@ -1132,7 +1220,11 @@ test_eal_flags(void)
return ret;
}
#ifdef RTE_LIBRTE_XEN_DOM0
ret = test_dom0_misc_flags();
#else
ret = test_misc_flags();
#endif
if (ret < 0) {
printf("Error in test_misc_flags()");
return ret;

View File

@ -126,12 +126,14 @@ test_mempool_basic(void)
printf("get private data\n");
if (rte_mempool_get_priv(mp) !=
(char*) mp + sizeof(struct rte_mempool))
(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
return -1;
printf("get physical address of an object\n");
if (rte_mempool_virt2phy(mp, obj) !=
(phys_addr_t) (mp->phys_addr + (phys_addr_t) ((char*) obj - (char*) mp)))
if (MEMPOOL_IS_CONTIG(mp) &&
rte_mempool_virt2phy(mp, obj) !=
(phys_addr_t) (mp->phys_addr +
(phys_addr_t) ((char*) obj - (char*) mp)))
return -1;
printf("put the object back\n");
@ -428,6 +430,33 @@ test_mempool_same_name_twice_creation(void)
return 0;
}
/*
* BAsic test for mempool_xmem functions.
*/
static int
test_mempool_xmem_misc(void)
{
uint32_t elt_num, total_size;
size_t sz;
ssize_t usz;
elt_num = MAX_KEEP;
total_size = rte_mempool_calc_obj_size(MEMPOOL_ELT_SIZE, 0, NULL);
sz = rte_mempool_xmem_size(elt_num, total_size, MEMPOOL_PG_SHIFT_MAX);
usz = rte_mempool_xmem_usage(NULL, elt_num, total_size, 0, 1,
MEMPOOL_PG_SHIFT_MAX);
if(sz != (size_t)usz) {
printf("failure @ %s: rte_mempool_xmem_usage(%u, %u) "
"returns: %#zx, while expected: %#zx;\n",
__func__, elt_num, total_size, sz, (size_t)usz);
return (-1);
}
return (0);
}
int
test_mempool(void)
{
@ -487,6 +516,9 @@ test_mempool(void)
if (test_mempool_same_name_twice_creation() < 0)
return -1;
if (test_mempool_xmem_misc() < 0)
return -1;
rte_mempool_list_dump();
return 0;

View File

@ -293,6 +293,11 @@ CONFIG_RTE_KNI_VHOST_VNET_HDR_EN=n
CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
#
#Compile Xen domain0 support
#
CONFIG_RTE_LIBRTE_XEN_DOM0=n
#
# Enable warning directives
#

View File

@ -292,6 +292,11 @@ CONFIG_RTE_KNI_VHOST_VNET_HDR_EN=n
CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
#
#Compile Xen domain0 support
#
CONFIG_RTE_LIBRTE_XEN_DOM0=n
#
# Enable warning directives
#

View File

@ -195,6 +195,9 @@ CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16
#
CONFIG_RTE_LIBRTE_PMD_PCAP=n
CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
#
# Do prefetch of packet data within PMD driver receive function
#
@ -293,6 +296,11 @@ CONFIG_RTE_KNI_VHOST_VNET_HDR_EN=n
CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
#
#Compile Xen domain0 support
#
CONFIG_RTE_LIBRTE_XEN_DOM0=n
#
# Enable warning directives
#

View File

@ -292,6 +292,11 @@ CONFIG_RTE_KNI_VHOST_VNET_HDR_EN=n
CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
#
#Compile Xen domain0 support
#
CONFIG_RTE_LIBRTE_XEN_DOM0=n
#
# Enable warning directives
#

View File

@ -939,6 +939,9 @@ main(int argc, char** argv)
continue;
kni_free_kni(port);
}
#ifdef RTE_LIBRTE_XEN_DOM0
rte_kni_close();
#endif
for (i = 0; i < RTE_MAX_ETHPORTS; i++)
if (kni_port_params_array[i]) {
rte_free(kni_port_params_array[i]);

View File

@ -78,6 +78,11 @@ void rte_dump_registers(void);
#define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__)
#define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy")
#define RTE_VERIFY(exp) do { \
if (!(exp)) \
rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \
} while (0)
/*
* Provide notification of a critical non-recoverable error and stop.
*

View File

@ -43,6 +43,10 @@
#include <stdint.h>
#include <stddef.h>
#ifdef RTE_EXEC_ENV_LINUXAPP
#include <exec-env/rte_dom0_common.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
@ -87,6 +91,10 @@ struct rte_memseg {
int32_t socket_id; /**< NUMA socket ID. */
uint32_t nchannel; /**< Number of channels. */
uint32_t nrank; /**< Number of ranks. */
#ifdef RTE_LIBRTE_XEN_DOM0
/**< store segment MFNs */
uint64_t mfn[DOM0_NUM_MEMBLOCK];
#endif
} __attribute__((__packed__));
@ -138,6 +146,42 @@ unsigned rte_memory_get_nchannel(void);
*/
unsigned rte_memory_get_nrank(void);
#ifdef RTE_LIBRTE_XEN_DOM0
/**
* Return the physical address of elt, which is an element of the pool mp.
*
* @param memseg_id
* The mempool is from which memory segment.
* @param phy_addr
* physical address of elt.
*
* @return
* The physical address or error.
*/
phys_addr_t rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr);
/**
* Memory init for supporting application running on Xen domain0.
*
* @param void
*
* @return
* 0: successfully
* negative: error
*/
int rte_xen_dom0_memory_init(void);
/**
* Attach to memory setments of primary process on Xen domain0.
*
* @param void
*
* @return
* 0: successfully
* negative: error
*/
int rte_xen_dom0_memory_attach(void);
#endif
#ifdef __cplusplus
}
#endif

View File

@ -36,5 +36,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal
ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += kni
endif
ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += xen_dom0
endif
include $(RTE_SDK)/mk/rte.subdir.mk

View File

@ -50,6 +50,9 @@ CFLAGS += $(WERROR_FLAGS) -O3
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_hugepage_info.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_memory.c
ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_xen_memory.c
endif
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_thread.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_log.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci.c
@ -90,7 +93,7 @@ CFLAGS_eal_thread.o += -Wno-return-type
CFLAGS_eal_hpet.o += -Wno-return-type
endif
INC := rte_per_lcore.h rte_lcore.h rte_interrupts.h rte_kni_common.h
INC := rte_per_lcore.h rte_lcore.h rte_interrupts.h rte_kni_common.h rte_dom0_common.h
SYMLINK-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP)-include/exec-env := \
$(addprefix include/exec-env/,$(INC))

View File

@ -48,8 +48,6 @@
#include <sys/mman.h>
#include <sys/queue.h>
#include <sys/io.h>
#include <sys/user.h>
#include <linux/binfmts.h>
#include <rte_common.h>
#include <rte_debug.h>
@ -92,6 +90,7 @@
#define OPT_USE_DEVICE "use-device"
#define OPT_SYSLOG "syslog"
#define OPT_BASE_VIRTADDR "base-virtaddr"
#define OPT_XEN_DOM0 "xen-dom0"
#define RTE_EAL_BLACKLIST_SIZE 0x100
@ -335,6 +334,8 @@ eal_usage(const char *prgname)
" (multiple -b options are allowed)\n"
" -m MB : memory to allocate (see also --"OPT_SOCKET_MEM")\n"
" -r NUM : force number of memory ranks (don't detect)\n"
" --"OPT_XEN_DOM0" : support application running on Xen Domain0 "
"without hugetlbfs\n"
" --"OPT_SYSLOG" : set syslog facility\n"
" --"OPT_SOCKET_MEM" : memory to allocate on specific \n"
" sockets (use comma separated values)\n"
@ -409,7 +410,7 @@ eal_parse_coremask(const char *coremask)
if (coremask[0] == '0' && ((coremask[1] == 'x')
|| (coremask[1] == 'X')) )
coremask += 2;
i = strnlen(coremask, MAX_ARG_STRLEN);
i = strnlen(coremask, PATH_MAX);
while ((i > 0) && isblank(coremask[i - 1]))
i--;
if (i == 0)
@ -627,6 +628,7 @@ eal_parse_args(int argc, char **argv)
{OPT_USE_DEVICE, 1, 0, 0},
{OPT_SYSLOG, 1, NULL, 0},
{OPT_BASE_VIRTADDR, 1, 0, 0},
{OPT_XEN_DOM0, 0, 0, 0},
{0, 0, 0, 0}
};
@ -639,6 +641,7 @@ eal_parse_args(int argc, char **argv)
internal_config.hugepage_dir = NULL;
internal_config.force_sockets = 0;
internal_config.syslog_facility = LOG_DAEMON;
internal_config.xen_dom0_support = 0;
#ifdef RTE_LIBEAL_USE_HPET
internal_config.no_hpet = 0;
#else
@ -714,6 +717,16 @@ eal_parse_args(int argc, char **argv)
if (!strcmp(lgopts[option_index].name, OPT_NO_HUGE)) {
internal_config.no_hugetlbfs = 1;
}
if (!strcmp(lgopts[option_index].name, OPT_XEN_DOM0)) {
#ifdef RTE_LIBRTE_XEN_DOM0
internal_config.xen_dom0_support = 1;
#else
RTE_LOG(ERR, EAL, "Can't support DPDK app "
"running on Dom0, please configure"
" RTE_LIBRTE_XEN_DOM0=y\n");
return -1;
#endif
}
else if (!strcmp(lgopts[option_index].name, OPT_NO_PCI)) {
internal_config.no_pci = 1;
}
@ -810,7 +823,13 @@ eal_parse_args(int argc, char **argv)
eal_usage(prgname);
return -1;
}
/* --xen-dom0 doesn't make sense with --socket-mem */
if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) {
RTE_LOG(ERR, EAL, "Options --socket-mem cannot be specified "
"together with --xen_dom0!\n");
eal_usage(prgname);
return -1;
}
/* if no blacklist, parse a whitelist */
if (blacklist_index > 0) {
if (eal_dev_whitelist_exists()) {
@ -904,6 +923,7 @@ rte_eal_init(int argc, char **argv)
if (internal_config.no_hugetlbfs == 0 &&
internal_config.process_type != RTE_PROC_SECONDARY &&
internal_config.xen_dom0_support == 0 &&
eal_hugepage_info_init() < 0)
rte_panic("Cannot get hugepage information\n");

View File

@ -984,6 +984,17 @@ rte_eal_hugepage_init(void)
return 0;
}
/* check if app runs on Xen Dom0 */
if (internal_config.xen_dom0_support) {
#ifdef RTE_LIBRTE_XEN_DOM0
/* use dom0_mm kernel driver to init memory */
if (rte_xen_dom0_memory_init() < 0)
return -1;
else
return 0;
#endif
}
/* calculate total number of hugepages available. at this point we haven't
* yet started sorting them so they all are on socket 0 */
@ -1271,6 +1282,17 @@ rte_eal_hugepage_attach(void)
"into secondary processes\n");
}
if (internal_config.xen_dom0_support) {
#ifdef RTE_LIBRTE_XEN_DOM0
if (rte_xen_dom0_memory_attach() < 0) {
RTE_LOG(ERR, EAL,"Failed to attach memory setments of primay "
"process\n");
return -1;
}
return 0;
#endif
}
fd_zero = open("/dev/zero", O_RDONLY);
if (fd_zero < 0) {
RTE_LOG(ERR, EAL, "Could not open /dev/zero\n");

View File

@ -493,14 +493,14 @@ pci_uio_map_resource(struct rte_pci_device *dev)
* or uio:uioX */
rte_snprintf(dirname, sizeof(dirname),
"/sys/bus/pci/devices/" PCI_PRI_FMT "/uio",
SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio",
loc->domain, loc->bus, loc->devid, loc->function);
dir = opendir(dirname);
if (dir == NULL) {
/* retry with the parent directory */
rte_snprintf(dirname, sizeof(dirname),
"/sys/bus/pci/devices/" PCI_PRI_FMT,
SYSFS_PCI_DEVICES "/" PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
dir = opendir(dirname);

View File

@ -0,0 +1,370 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <errno.h>
#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
#include <stdarg.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/queue.h>
#include <sys/file.h>
#include <unistd.h>
#include <limits.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/time.h>
#include <rte_log.h>
#include <rte_memory.h>
#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_tailq.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_common.h>
#include <rte_string_fns.h>
#include "eal_private.h"
#include "eal_internal_cfg.h"
#include "eal_filesystem.h"
#include <exec-env/rte_dom0_common.h>
#define PAGE_SIZE RTE_PGSIZE_4K
#define DEFAUL_DOM0_NAME "dom0-mem"
static int xen_fd = -1;
static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB";
/*
* Try to mmap *size bytes in /dev/zero. If it is succesful, return the
* pointer to the mmap'd area and keep *size unmodified. Else, retry
* with a smaller zone: decrease *size by mem_size until it reaches
* 0. In this case, return NULL. Note: this function returns an address
* which is a multiple of mem_size size.
*/
static void *
xen_get_virtual_area(size_t *size, size_t mem_size)
{
void *addr;
int fd;
long aligned_addr;
RTE_LOG(INFO, EAL, "Ask a virtual area of 0x%zu bytes\n", *size);
fd = open("/dev/zero", O_RDONLY);
if (fd < 0){
RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n");
return NULL;
}
do {
addr = mmap(NULL, (*size) + mem_size, PROT_READ,
MAP_PRIVATE, fd, 0);
if (addr == MAP_FAILED)
*size -= mem_size;
} while (addr == MAP_FAILED && *size > 0);
if (addr == MAP_FAILED) {
close(fd);
RTE_LOG(INFO, EAL, "Cannot get a virtual area\n");
return NULL;
}
munmap(addr, (*size) + mem_size);
close(fd);
/* align addr to a mem_size boundary */
aligned_addr = (uintptr_t)addr;
aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size);
addr = (void *)(aligned_addr);
RTE_LOG(INFO, EAL, "Virtual area found at %p (size = 0x%zx)\n",
addr, *size);
return addr;
}
/**
* Get memory size configuration from /sys/devices/virtual/misc/dom0_mm
* /memsize-mB/memsize file, and the size unit is mB.
*/
static int
get_xen_memory_size(void)
{
char path[PATH_MAX];
unsigned long mem_size = 0;
static const char *file_name;
file_name = "memsize";
rte_snprintf(path, sizeof(path), "%s/%s",
sys_dir_path, file_name);
if (eal_parse_sysfs_value(path, &mem_size) < 0)
return -1;
if (mem_size == 0)
rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not"
" configured.\n",sys_dir_path, file_name);
if (mem_size % 2)
rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be"
" even number.\n",sys_dir_path, file_name);
if (mem_size > DOM0_CONFIG_MEMSIZE)
rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger"
" than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE);
return mem_size;
}
/**
* Based on physical address to caculate MFN in Xen Dom0.
*/
phys_addr_t
rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr)
{
int mfn_id;
uint64_t mfn, mfn_offset;
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
struct rte_memseg *memseg = mcfg->memseg;
mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M;
/*the MFN is contiguous in 2M */
mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) %
RTE_PGSIZE_2M / PAGE_SIZE;
mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id];
/** return mechine address */
return (mfn * PAGE_SIZE + phy_addr % PAGE_SIZE);
}
int
rte_xen_dom0_memory_init(void)
{
void *vir_addr, *vma_addr = NULL;
int err, ret = 0;
uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0;
size_t vma_len = 0;
struct memory_info meminfo;
struct memseg_info seginfo[RTE_MAX_MEMSEG];
int flags, page_size = getpagesize();
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
struct rte_memseg *memseg = mcfg->memseg;
uint64_t total_mem = internal_config.memory;
memset(seginfo, 0, sizeof(seginfo));
memset(&meminfo, 0, sizeof(struct memory_info));
mem_size = get_xen_memory_size();
requested = (unsigned) (total_mem / 0x100000);
if (requested > mem_size)
/* if we didn't satisfy total memory requirements */
rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB,"
" available: %uMB\n", requested, mem_size);
else if (total_mem != 0)
mem_size = requested;
/* Check FD and open once */
if (xen_fd < 0) {
xen_fd = open(DOM0_MM_DEV, O_RDWR);
if (xen_fd < 0) {
RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
return -1;
}
}
meminfo.size = mem_size;
/* construct memory mangement name for Dom0 */
rte_snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s",
internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
/* Notify kernel driver to allocate memory */
ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo);
if (ret < 0) {
RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n");
err = -EIO;
goto fail;
}
/* Get number of memory segment from driver */
ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg);
if (ret < 0) {
RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n");
err = -EIO;
goto fail;
}
if(num_memseg > RTE_MAX_MEMSEG){
RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater"
" than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG);
err = -EIO;
goto fail;
}
/* get all memory segements information */
ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo);
if (ret < 0) {
RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n");
err = -EIO;
goto fail;
}
/* map all memory segments to contiguous user space */
for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++)
{
vma_len = seginfo[memseg_idx].size;
/**
* get the biggest virtual memory area up to vma_len. If it fails,
* vma_addr is NULL, so let the kernel provide the address.
*/
vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M);
if (vma_addr == NULL) {
flags = MAP_SHARED;
vma_len = RTE_PGSIZE_2M;
} else
flags = MAP_SHARED | MAP_FIXED;
seginfo[memseg_idx].size = vma_len;
vir_addr = mmap(vma_addr, seginfo[memseg_idx].size,
PROT_READ|PROT_WRITE, flags, xen_fd,
memseg_idx * page_size);
if (vir_addr == MAP_FAILED) {
RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n",
DOM0_MM_DEV);
err = -EIO;
goto fail;
}
memseg[memseg_idx].addr = vir_addr;
memseg[memseg_idx].phys_addr = page_size *
seginfo[memseg_idx].pfn ;
memseg[memseg_idx].len = seginfo[memseg_idx].size;
for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++)
memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i];
/* MFNs are continuous in 2M, so assume that page size is 2M */
memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M;
memseg[memseg_idx].nchannel = mcfg->nchannel;
memseg[memseg_idx].nrank = mcfg->nrank;
/* NUMA is not suppoted in Xen Dom0, so only set socket 0*/
memseg[memseg_idx].socket_id = 0;
}
return 0;
fail:
if (xen_fd > 0) {
close(xen_fd);
xen_fd = -1;
}
return err;
}
/*
* This creates the memory mappings in the secondary process to match that of
* the server process. It goes through each memory segment in the DPDK runtime
* configuration, mapping them in order to form a contiguous block in the
* virtual memory space
*/
int
rte_xen_dom0_memory_attach(void)
{
const struct rte_mem_config *mcfg;
unsigned s = 0; /* s used to track the segment number */
int xen_fd = -1;
int ret = -1;
void *vir_addr;
char name[DOM0_NAME_MAX] = {0};
int page_size = getpagesize();
mcfg = rte_eal_get_configuration()->mem_config;
/* Check FD and open once */
if (xen_fd < 0) {
xen_fd = open(DOM0_MM_DEV, O_RDWR);
if (xen_fd < 0) {
RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
goto error;
}
}
/* construct memory mangement name for Dom0 */
rte_snprintf(name, DOM0_NAME_MAX, "%s-%s",
internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
/* attach to memory segments of primary process */
ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name);
if (ret) {
RTE_LOG(ERR, EAL,"attach memory segments fail.\n");
goto error;
}
/* map all segments into memory to make sure we get the addrs */
for (s = 0; s < RTE_MAX_MEMSEG; ++s) {
/*
* the first memory segment with len==0 is the one that
* follows the last valid segment.
*/
if (mcfg->memseg[s].len == 0)
break;
vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd,
s * page_size);
if (vir_addr == MAP_FAILED) {
RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
"in %s to requested address [%p]\n",
(unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV,
mcfg->memseg[s].addr);
goto error;
}
}
return 0;
error:
if (xen_fd >= 0) {
close(xen_fd);
xen_fd = -1;
}
return -1;
}

View File

@ -63,6 +63,7 @@ struct internal_config {
volatile unsigned force_nchannel; /**< force number of channels */
volatile unsigned force_nrank; /**< force number of ranks */
volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */
volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
volatile unsigned no_pci; /**< true to disable PCI */
volatile unsigned no_hpet; /**< true to disable HPET */
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping

View File

@ -0,0 +1,107 @@
/*-
* This file is provided under a dual BSD/LGPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GNU LESSER GENERAL PUBLIC LICENSE
*
* Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2.1 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
* Contact Information:
* Intel Corporation
*
*
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef _RTE_DOM0_COMMON_H_
#define _RTE_DOM0_COMMON_H_
#ifdef __KERNEL__
#include <linux/if.h>
#endif
#define DOM0_NAME_MAX 256
#define DOM0_MM_DEV "/dev/dom0_mm"
#define DOM0_CONTIG_NUM_ORDER 9 /**< 2M order */
#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */
#define DOM0_MEMBLOCK_SIZE 0x200000 /**< Maximum nb. of memory block(2M). */
#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */
#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */
#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info)
#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *)
#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int)
#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *)
/**
* A structure used to store memory information.
*/
struct memory_info {
char name[DOM0_NAME_MAX];
uint64_t size;
};
/**
* A structure used to store memory segment information.
*/
struct memseg_info {
uint32_t idx;
uint64_t pfn;
uint64_t size;
uint64_t mfn[DOM0_NUM_MEMBLOCK];
};
/**
* A structure used to store memory block information.
*/
struct memblock_info {
uint8_t exchange_flag;
uint64_t vir_addr;
uint64_t pfn;
uint64_t mfn;
};
#endif /* _RTE_DOM0_COMMON_H_ */

View File

@ -30,6 +30,10 @@
#include <linux/msi.h>
#include <linux/version.h>
#ifdef CONFIG_XEN_DOM0
#include <xen/xen.h>
#endif
/**
* MSI-X related macros, copy from linux/pci_regs.h in kernel 2.6.39,
* but none of them in kernel 2.6.35.
@ -312,6 +316,48 @@ igbuio_pci_irqhandler(int irq, struct uio_info *info)
return ret;
}
#ifdef CONFIG_XEN_DOM0
static int
igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma)
{
int idx;
idx = (int)vma->vm_pgoff;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vma->vm_page_prot.pgprot |= _PAGE_IOMAP;
return remap_pfn_range(vma,
vma->vm_start,
info->mem[idx].addr >> PAGE_SHIFT,
vma->vm_end - vma->vm_start,
vma->vm_page_prot);
}
/**
* This is uio device mmap method which will use igbuio mmap for Xen
* Dom0 enviroment.
*/
static int
igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma)
{
int idx;
if (vma->vm_pgoff >= MAX_UIO_MAPS)
return -EINVAL;
if(info->mem[vma->vm_pgoff].size == 0)
return -EINVAL;
idx = (int)vma->vm_pgoff;
switch (info->mem[idx].memtype) {
case UIO_MEM_PHYS:
return igbuio_dom0_mmap_phys(info, vma);
case UIO_MEM_LOGICAL:
case UIO_MEM_VIRTUAL:
default:
return -EINVAL;
}
}
#endif
/* Remap pci resources described by bar #pci_bar in uio resource n. */
static int
igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
@ -462,6 +508,11 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
udev->info.version = "0.1";
udev->info.handler = igbuio_pci_irqhandler;
udev->info.irqcontrol = igbuio_pci_irqcontrol;
#ifdef CONFIG_XEN_DOM0
/* check if the driver run on Xen Dom0 */
if (xen_initial_domain())
udev->info.mmap = igbuio_dom0_pci_mmap;
#endif
udev->info.priv = udev;
udev->pdev = dev;
udev->mode = 0; /* set the default value for interrupt mode */

View File

@ -0,0 +1,56 @@
# BSD LICENSE
#
# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include $(RTE_SDK)/mk/rte.vars.mk
#
# module name and path
#
MODULE = rte_dom0_mm
#
# CFLAGS
#
MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
MODULE_CFLAGS += -Wall -Werror
# this lib needs main eal
DEPDIRS-y += lib/librte_eal/linuxapp/eal
#
# all source are stored in SRCS-y
#
SRCS-y += dom0_mm_misc.c
include $(RTE_SDK)/mk/rte.module.mk

View File

@ -0,0 +1,99 @@
/*-
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
* The full GNU General Public License is included in this distribution
* in the file called LICENSE.GPL.
*
* Contact Information:
* Intel Corporation
*
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef _DOM0_MM_DEV_H_
#define _DOM0_MM_DEV_H_
#include <linux/wait.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <exec-env/rte_dom0_common.h>
#define NUM_MEM_CTX 256 /**< Maximum number of memory context*/
#define MAX_EXCHANGE_FAIL_TIME 5 /**< Maximum times of allowing exchange fail .*/
/**
* A structure describing the private information for a dom0 device.
*/
struct dom0_mm_dev {
struct miscdevice miscdev;
uint32_t allocated_memsize;
uint32_t num_mem_ctx;
uint32_t config_memsize;
struct dom0_mm_data *mm_data[NUM_MEM_CTX];
struct mutex data_lock;
};
struct dom0_mm_data{
uint8_t fail_times;
uint32_t refcnt;
uint32_t num_memseg; /**< Number of memory segment. */
uint32_t mem_size; /**< Size of requesting memory. */
char name[DOM0_NAME_MAX];
/** Storing memory block information.*/
struct memblock_info block_info[DOM0_NUM_MEMBLOCK];
/** Storing memory segment information.*/
struct memseg_info seg_info[DOM0_NUM_MEMSEG];
};
#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args)
#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args)
#endif

View File

@ -0,0 +1,620 @@
/*-
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
* The full GNU General Public License is included in this distribution
* in the file called LICENSE.GPL.
*
* Contact Information:
* Intel Corporation
*
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <xen/xen.h>
#include <xen/page.h>
#include <xen/xen-ops.h>
#include <xen/interface/memory.h>
#include <rte_config.h>
#include <exec-env/rte_dom0_common.h>
#include "dom0_mm_dev.h"
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0");
static struct dom0_mm_dev dom0_dev;
static struct kobject *dom0_kobj = NULL;
static int dom0_open(struct inode *inode, struct file *file);
static int dom0_release(struct inode *inode, struct file *file);
static int dom0_ioctl(struct file *file, unsigned int ioctl_num,
unsigned long ioctl_param);
static int dom0_mmap(struct file *file, struct vm_area_struct *vma);
static int dom0_memory_free(struct dom0_mm_data *mm_data);
static const struct file_operations data_fops = {
.owner = THIS_MODULE,
.open = dom0_open,
.release = dom0_release,
.mmap = dom0_mmap,
.unlocked_ioctl = (void *)dom0_ioctl,
};
static ssize_t
show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf)
{
return snprintf(buf, 10, "%u\n", dom0_dev.allocated_memsize);
}
static ssize_t
show_memsize(struct device *dev, struct device_attribute *attr, char *buf)
{
return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize);
}
static ssize_t
store_memsize(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
int err = 0;
unsigned long mem_size;
if (0 != strict_strtoul(buf, 0, &mem_size))
return -EINVAL;
mutex_lock(&dom0_dev.data_lock);
if (0 == mem_size) {
err = -EINVAL;
goto fail;
} else if (mem_size < dom0_dev.allocated_memsize ||
mem_size > DOM0_CONFIG_MEMSIZE) {
XEN_ERR("configure memory size fail\n");
err = -EINVAL;
goto fail;
} else
dom0_dev.config_memsize = mem_size;
fail:
mutex_unlock(&dom0_dev.data_lock);
return err ? err : count;
}
static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize);
static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL);
static struct attribute *dev_attrs[] = {
&dev_attr_memsize.attr,
&dev_attr_memsize_rsvd.attr,
NULL,
};
/* the memory size unit is MB */
static const struct attribute_group dev_attr_grp = {
.name = "memsize-mB",
.attrs = dev_attrs,
};
static void
sort_viraddr(struct memblock_info *mb, int cnt)
{
int i,j;
uint64_t tmp_pfn;
uint64_t tmp_viraddr;
/*sort virtual address and pfn */
for(i = 0; i < cnt; i ++) {
for(j = cnt - 1; j > i; j--) {
if(mb[j].pfn < mb[j - 1].pfn) {
tmp_pfn = mb[j - 1].pfn;
mb[j - 1].pfn = mb[j].pfn;
mb[j].pfn = tmp_pfn;
tmp_viraddr = mb[j - 1].vir_addr;
mb[j - 1].vir_addr = mb[j].vir_addr;
mb[j].vir_addr = tmp_viraddr;
}
}
}
}
static int
dom0_find_memdata(const char * mem_name)
{
unsigned i;
int idx = -1;
for(i = 0; i< NUM_MEM_CTX; i++) {
if(dom0_dev.mm_data[i] == NULL)
continue;
if (!strncmp(dom0_dev.mm_data[i]->name, mem_name,
sizeof(char) * DOM0_NAME_MAX)) {
idx = i;
break;
}
}
return idx;
}
static int
dom0_find_mempos(const char * mem_name)
{
unsigned i;
int idx = -1;
for(i = 0; i< NUM_MEM_CTX; i++) {
if(dom0_dev.mm_data[i] == NULL){
idx = i;
break;
}
}
return idx;
}
static int
dom0_memory_free(struct dom0_mm_data * mm_data)
{
int idx;
uint64_t vstart, vaddr;
uint32_t i, num_block, size;
if (!xen_pv_domain())
return -1;
/* each memory block is 2M */
num_block = mm_data->mem_size / 2;
if (num_block == 0)
return -1;
/* free memory and destory contiguous region in Xen*/
for (i = 0; i< num_block; i++) {
vstart = mm_data->block_info[i].vir_addr;
if (vstart) {
if (mm_data->block_info[i].exchange_flag)
xen_destroy_contiguous_region(vstart,
DOM0_CONTIG_NUM_ORDER);
size = DOM0_MEMBLOCK_SIZE;
vaddr = vstart;
while (size > 0) {
ClearPageReserved(virt_to_page(vaddr));
vaddr += PAGE_SIZE;
size -= PAGE_SIZE;
}
free_pages(vstart, DOM0_CONTIG_NUM_ORDER);
}
}
/* reset global memory data */
idx = dom0_find_memdata(mm_data->name);
if (idx >= 0) {
dom0_dev.allocated_memsize -= mm_data->mem_size;
dom0_dev.mm_data[idx] = NULL;
dom0_dev.num_mem_ctx--;
}
memset(mm_data, 0, sizeof(struct dom0_mm_data));
vfree(mm_data);
return 0;
}
/**
* Find all memory segments in which physical addresses are contiguous.
*/
static void
find_memseg(int count, struct dom0_mm_data * mm_data)
{
int i = 0;
int j, k, idx = 0;
uint64_t zone_len, pfn, num_block;
while(i < count) {
if (mm_data->block_info[i].exchange_flag == 0) {
i++;
continue;
}
k = 0;
pfn = mm_data->block_info[i].pfn;
mm_data->seg_info[idx].pfn = pfn;
mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn;
for (j = i + 1; j < count; j++) {
/* ignore exchange fail memory block */
if (mm_data->block_info[j].exchange_flag == 0)
break;
if (mm_data->block_info[j].pfn !=
(mm_data->block_info[j - 1].pfn +
DOM0_MEMBLOCK_SIZE / PAGE_SIZE))
break;
++k;
mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn;
}
num_block = j - i;
zone_len = num_block * DOM0_MEMBLOCK_SIZE;
mm_data->seg_info[idx].size = zone_len;
XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len);
i = i+ num_block;
idx++;
if (idx == DOM0_NUM_MEMSEG)
break;
}
mm_data->num_memseg = idx;
}
static int
dom0_prepare_memsegs(struct memory_info* meminfo, struct dom0_mm_data *mm_data)
{
uint64_t pfn, vstart, vaddr;
uint32_t i, num_block, size;
int idx;
/* Allocate 2M memory once */
num_block = meminfo->size / 2;
for (i = 0; i< num_block; i++) {
vstart = (unsigned long)
__get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER);
if (vstart == 0) {
XEN_ERR("allocate memory fail.\n");
mm_data->mem_size = 2 * i;
dom0_memory_free(mm_data);
return -ENOMEM;
}
size = DOM0_MEMBLOCK_SIZE;
vaddr = vstart;
while (size > 0) {
SetPageReserved(virt_to_page(vaddr));
vaddr += PAGE_SIZE;
size -= PAGE_SIZE;
}
pfn = virt_to_pfn(vstart);
mm_data->block_info[i].pfn = pfn;
mm_data->block_info[i].vir_addr = vstart;
}
sort_viraddr(mm_data->block_info, num_block);
for (i = 0; i< num_block; i++) {
/*
* This API is used to exchage MFN for getting a block of
* contiguous physical addresses, its maximum size is 2M.
*/
if (xen_create_contiguous_region(mm_data->block_info[i].vir_addr,
DOM0_CONTIG_NUM_ORDER, 0) == 0) {
mm_data->block_info[i].exchange_flag = 1;
mm_data->block_info[i].mfn =
pfn_to_mfn(mm_data->block_info[i].pfn);
} else {
XEN_ERR("exchange memeory fail\n");
mm_data->block_info[i].exchange_flag = 0;
mm_data->fail_times++;
if (mm_data->fail_times > MAX_EXCHANGE_FAIL_TIME) {
mm_data->mem_size = meminfo->size;
dom0_memory_free(mm_data);
return -1;
}
}
}
find_memseg(num_block, mm_data);
/* update private memory data */
mm_data->refcnt++;
mm_data->mem_size = meminfo->size;
memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX);
mm_data->name[DOM0_NAME_MAX -1] = '\0';
/* update global memory data */
idx = dom0_find_mempos(meminfo->name);
if (idx < 0) {
dom0_memory_free(mm_data);
return -1;
}
dom0_dev.mm_data[idx] = mm_data;
dom0_dev.num_mem_ctx++;
dom0_dev.allocated_memsize += mm_data->mem_size;
return 0;
}
static int
dom0_check_memory (struct memory_info *meminfo)
{
int idx;
uint64_t mem_size;
/* round memory size to the next even number. */
if (meminfo->size % 2)
++meminfo->size;
mem_size = meminfo->size;
if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) {
XEN_ERR("Memory data space is full in Dom0 driver\n");
return -1;
}
idx = dom0_find_memdata(meminfo->name);
if (idx >= 0) {
XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n",
meminfo->name);
return -1;
}
if ((dom0_dev.allocated_memsize + mem_size) >
dom0_dev.config_memsize) {
XEN_ERR("total memory size can't be larger than config memory size.\n");
return -1;
}
return 0;
}
static int __init
dom0_init(void)
{
if (!xen_domain())
return -ENODEV;
/* Setup the misc device */
dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR;
dom0_dev.miscdev.name = "dom0_mm";
dom0_dev.miscdev.fops = &data_fops;
/* register misc char device */
if (misc_register(&dom0_dev.miscdev) != 0) {
XEN_ERR("Misc device registration failed\n");
return -EPERM;
}
mutex_init(&dom0_dev.data_lock);
dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj);
if (!dom0_kobj) {
XEN_ERR("dom0-mm object creation failed\n");
misc_deregister(&dom0_dev.miscdev);
return -ENOMEM;
}
if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) {
sysfs_remove_group(dom0_kobj, &dev_attr_grp);
kobject_put(dom0_kobj);
misc_deregister(&dom0_dev.miscdev);
return -EPERM;
}
XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n");
return 0;
}
static void __exit
dom0_exit(void)
{
sysfs_remove_group(dom0_kobj, &dev_attr_grp);
kobject_put(dom0_kobj);
misc_deregister(&dom0_dev.miscdev);
XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n");
}
static int
dom0_open(struct inode *inode, struct file *file)
{
file->private_data = NULL;
XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n");
return 0;
}
static int
dom0_release(struct inode *inode, struct file *file)
{
int ret = 0;
struct dom0_mm_data *mm_data = file->private_data;
if (mm_data == NULL)
return ret;
mutex_lock(&dom0_dev.data_lock);
if (--mm_data->refcnt == 0)
ret = dom0_memory_free(mm_data);
mutex_unlock(&dom0_dev.data_lock);
file->private_data = NULL;
XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n");
return ret;
}
static int
dom0_mmap(struct file *file, struct vm_area_struct *vm)
{
int status = 0;
uint32_t idx = vm->vm_pgoff;
uint64_t pfn, size = vm->vm_end - vm->vm_start;
struct dom0_mm_data *mm_data = file->private_data;
if(mm_data == NULL)
return -EINVAL;
mutex_lock(&dom0_dev.data_lock);
if (idx >= mm_data->num_memseg) {
mutex_unlock(&dom0_dev.data_lock);
return -EINVAL;
}
if (size > mm_data->seg_info[idx].size){
mutex_unlock(&dom0_dev.data_lock);
return -EINVAL;
}
XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size);
pfn = mm_data->seg_info[idx].pfn;
mutex_unlock(&dom0_dev.data_lock);
status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED);
return status;
}
static int
dom0_ioctl(struct file *file,
unsigned int ioctl_num,
unsigned long ioctl_param)
{
int idx, ret;
char name[DOM0_NAME_MAX] = {0};
struct memory_info meminfo;
struct dom0_mm_data *mm_data = file->private_data;
XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param);
/**
* Switch according to the ioctl called
*/
switch _IOC_NR(ioctl_num) {
case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG):
ret = copy_from_user(&meminfo, (void *)ioctl_param,
sizeof(struct memory_info));
if (ret)
return -EFAULT;
if (mm_data != NULL) {
XEN_ERR("Cannot create memory segment for the same"
" file descriptor\n");
return -EINVAL;
}
/* Allocate private data */
mm_data = vmalloc(sizeof(struct dom0_mm_data));
if (!mm_data) {
XEN_ERR("Unable to allocate device private data\n");
return -ENOMEM;
}
memset(mm_data, 0, sizeof(struct dom0_mm_data));
mutex_lock(&dom0_dev.data_lock);
/* check if we can allocate memory*/
if (dom0_check_memory(&meminfo) < 0) {
mutex_unlock(&dom0_dev.data_lock);
vfree(mm_data);
return -EINVAL;
}
/* allocate memories and created memory segments*/
if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) {
XEN_ERR("create memory segment fail.\n");
mutex_unlock(&dom0_dev.data_lock);
return -EIO;
}
file->private_data = mm_data;
mutex_unlock(&dom0_dev.data_lock);
break;
/* support multiple process in term of memory mapping*/
case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG):
ret = copy_from_user(name, (void *)ioctl_param,
sizeof(char) * DOM0_NAME_MAX);
if (ret)
return -EFAULT;
mutex_lock(&dom0_dev.data_lock);
idx = dom0_find_memdata(name);
if (idx < 0) {
mutex_unlock(&dom0_dev.data_lock);
return -EINVAL;
}
mm_data = dom0_dev.mm_data[idx];
mm_data->refcnt++;
file->private_data = mm_data;
mutex_unlock(&dom0_dev.data_lock);
break;
case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG):
ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg,
sizeof(int));
if (ret)
return -EFAULT;
break;
case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO):
ret = copy_to_user((void *)ioctl_param,
&mm_data->seg_info[0],
sizeof(struct memseg_info) *
mm_data->num_memseg);
if (ret)
return -EFAULT;
break;
default:
XEN_PRINT("IOCTL default \n");
break;
}
return 0;
}
module_init(dom0_init);
module_exit(dom0_exit);

View File

@ -757,8 +757,7 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
(int) sizeof(struct rte_pktmbuf_pool_private));
return (-ENOSPC);
}
mbp_priv = (struct rte_pktmbuf_pool_private *)
((char *)mp + sizeof(struct rte_mempool));
mbp_priv = rte_mempool_get_priv(mp);
if ((uint32_t) (mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM) <
dev_info.min_rx_bufsize) {
PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d "

View File

@ -37,8 +37,10 @@ LIB = librte_mempool.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
# all source are stored in SRCS-y
SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) := rte_mempool.c
SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool.c
ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_dom0_mempool.c
endif
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h

View File

@ -0,0 +1,134 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <stdarg.h>
#include <inttypes.h>
#include <errno.h>
#include <sys/queue.h>
#include <rte_common.h>
#include <rte_log.h>
#include <rte_debug.h>
#include <rte_memory.h>
#include <rte_memzone.h>
#include <rte_atomic.h>
#include <rte_launch.h>
#include <rte_tailq.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_branch_prediction.h>
#include <rte_ring.h>
#include <rte_errno.h>
#include <rte_string_fns.h>
#include <rte_spinlock.h>
#include "rte_mempool.h"
static void
get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num,
uint32_t pg_sz, uint32_t memseg_id)
{
uint32_t i;
uint64_t virt_addr, mfn_id;
struct rte_mem_config *mcfg;
uint32_t page_size = getpagesize();
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
virt_addr =(uintptr_t) mcfg->memseg[memseg_id].addr;
for (i = 0; i != pg_num; i++) {
mfn_id = ((uintptr_t)va + i * pg_sz - virt_addr) / RTE_PGSIZE_2M;
pa[i] = mcfg->memseg[memseg_id].mfn[mfn_id] * page_size;
}
}
/* create the mempool for supporting Dom0 */
struct rte_mempool *
rte_dom0_mempool_create(const char *name, unsigned elt_num, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags)
{
struct rte_mempool *mp = NULL;
phys_addr_t *pa;
char *va;
size_t sz;
uint32_t pg_num, pg_shift, pg_sz, total_size;
const struct rte_memzone *mz;
char mz_name[RTE_MEMZONE_NAMESIZE];
int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
pg_sz = RTE_PGSIZE_2M;
pg_shift = rte_bsf32(pg_sz);
total_size = rte_mempool_calc_obj_size(elt_size, flags, NULL);
/* calc max memory size and max number of pages needed. */
sz = rte_mempool_xmem_size(elt_num, total_size, pg_shift) +
RTE_PGSIZE_2M;
pg_num = sz >> pg_shift;
/* extract physical mappings of the allocated memory. */
pa = calloc(pg_num, sizeof (*pa));
if (pa == NULL)
return mp;
rte_snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME, name);
mz = rte_memzone_reserve(mz_name, sz, socket_id, mz_flags);
if (mz == NULL) {
free(pa);
return mp;
}
va = (char *)RTE_ALIGN_CEIL((uintptr_t)mz->addr, RTE_PGSIZE_2M);
/* extract physical mappings of the allocated memory. */
get_phys_map(va, pa, pg_num, pg_sz, mz->memseg_id);
mp = rte_mempool_xmem_create(name, elt_num, elt_size,
cache_size, private_data_size,
mp_init, mp_init_arg,
obj_init, obj_init_arg,
socket_id, flags, va, pa, pg_num, pg_shift);
free(pa);
return (mp);
}

View File

@ -119,6 +119,232 @@ static unsigned optimize_object_size(unsigned obj_size)
return new_obj_size * CACHE_LINE_SIZE;
}
static void
mempool_add_elem(struct rte_mempool *mp, void *obj, uint32_t obj_idx,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg)
{
struct rte_mempool **mpp;
obj = (char *)obj + mp->header_size;
/* set mempool ptr in header */
mpp = __mempool_from_obj(obj);
*mpp = mp;
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
__mempool_write_header_cookie(obj, 1);
__mempool_write_trailer_cookie(obj);
#endif
/* call the initializer */
if (obj_init)
obj_init(mp, obj_init_arg, obj, obj_idx);
/* enqueue in ring */
rte_ring_sp_enqueue(mp->ring, obj);
}
uint32_t
rte_mempool_obj_iter(void *vaddr, uint32_t elt_num, size_t elt_sz, size_t align,
const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg)
{
uint32_t i, j, k;
uint32_t pgn;
uintptr_t end, start, va;
uintptr_t pg_sz;
pg_sz = (uintptr_t)1 << pg_shift;
va = (uintptr_t)vaddr;
i = 0;
j = 0;
while (i != elt_num && j != pg_num) {
start = RTE_ALIGN_CEIL(va, align);
end = start + elt_sz;
pgn = (end >> pg_shift) - (start >> pg_shift);
pgn += j;
/* do we have enough space left for the next element. */
if (pgn >= pg_num)
break;
for (k = j;
k != pgn &&
paddr[k] + pg_sz == paddr[k + 1];
k++)
;
/*
* if next pgn chunks of memory physically continuous,
* use it to create next element.
* otherwise, just skip that chunk unused.
*/
if (k == pgn) {
if (obj_iter != NULL)
obj_iter(obj_iter_arg, (void *)start,
(void *)end, i);
va = end;
j = pgn;
i++;
} else {
va = RTE_ALIGN_CEIL((va + 1), pg_sz);
j++;
}
}
return (i);
}
/*
* Populate mempool with the objects.
*/
struct mempool_populate_arg {
struct rte_mempool *mp;
rte_mempool_obj_ctor_t *obj_init;
void *obj_init_arg;
};
static void
mempool_obj_populate(void *arg, void *start, void *end, uint32_t idx)
{
struct mempool_populate_arg *pa = arg;
mempool_add_elem(pa->mp, start, idx, pa->obj_init, pa->obj_init_arg);
pa->mp->elt_va_end = (uintptr_t)end;
}
static void
mempool_populate(struct rte_mempool *mp, size_t num, size_t align,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg)
{
uint32_t elt_sz;
struct mempool_populate_arg arg;
elt_sz = mp->elt_size + mp->header_size + mp->trailer_size;
arg.mp = mp;
arg.obj_init = obj_init;
arg.obj_init_arg = obj_init_arg;
mp->size = rte_mempool_obj_iter((void *)mp->elt_va_start,
num, elt_sz, align,
mp->elt_pa, mp->pg_num, mp->pg_shift,
mempool_obj_populate, &arg);
}
uint32_t
rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
struct rte_mempool_objsz *sz)
{
struct rte_mempool_objsz lsz;
sz = (sz != NULL) ? sz : &lsz;
/*
* In header, we have at least the pointer to the pool, and
* optionaly a 64 bits cookie.
*/
sz->header_size = 0;
sz->header_size += sizeof(struct rte_mempool *); /* ptr to pool */
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
sz->header_size += sizeof(uint64_t); /* cookie */
#endif
if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0)
sz->header_size = RTE_ALIGN_CEIL(sz->header_size,
CACHE_LINE_SIZE);
/* trailer contains the cookie in debug mode */
sz->trailer_size = 0;
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
sz->trailer_size += sizeof(uint64_t); /* cookie */
#endif
/* element size is 8 bytes-aligned at least */
sz->elt_size = RTE_ALIGN_CEIL(elt_size, sizeof(uint64_t));
/* expand trailer to next cache line */
if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) {
sz->total_size = sz->header_size + sz->elt_size +
sz->trailer_size;
sz->trailer_size += ((CACHE_LINE_SIZE -
(sz->total_size & CACHE_LINE_MASK)) &
CACHE_LINE_MASK);
}
/*
* increase trailer to add padding between objects in order to
* spread them accross memory channels/ranks
*/
if ((flags & MEMPOOL_F_NO_SPREAD) == 0) {
unsigned new_size;
new_size = optimize_object_size(sz->header_size + sz->elt_size +
sz->trailer_size);
sz->trailer_size = new_size - sz->header_size - sz->elt_size;
}
/* this is the size of an object, including header and trailer */
sz->total_size = sz->header_size + sz->elt_size + sz->trailer_size;
return (sz->total_size);
}
/*
* Calculate maximum amount of memory required to store given number of objects.
*/
size_t
rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz, uint32_t pg_shift)
{
size_t n, pg_num, pg_sz, sz;
pg_sz = (size_t)1 << pg_shift;
if ((n = pg_sz / elt_sz) > 0) {
pg_num = (elt_num + n - 1) / n;
sz = pg_num << pg_shift;
} else {
sz = RTE_ALIGN_CEIL(elt_sz, pg_sz) * elt_num;
}
return (sz);
}
/*
* Calculate how much memory would be actually required with the
* given memory footprint to store required number of elements.
*/
static void
mempool_lelem_iter(void *arg, __rte_unused void *start, void *end,
__rte_unused uint32_t idx)
{
*(uintptr_t *)arg = (uintptr_t)end;
}
ssize_t
rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz,
const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift)
{
uint32_t n;
uintptr_t va, uv;
size_t pg_sz, usz;
pg_sz = (size_t)1 << pg_shift;
va = (uintptr_t)vaddr;
uv = va;
if ((n = rte_mempool_obj_iter(vaddr, elt_num, elt_sz, 1,
paddr, pg_num, pg_shift, mempool_lelem_iter,
&uv)) != elt_num) {
return (-n);
}
uv = RTE_ALIGN_CEIL(uv, pg_sz);
usz = uv - va;
return (usz);
}
/* create the mempool */
struct rte_mempool *
rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
@ -126,18 +352,48 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags)
{
#ifdef RTE_LIBRTE_XEN_DOM0
return (rte_dom0_mempool_create(name, n, elt_size,
cache_size, private_data_size,
mp_init, mp_init_arg,
obj_init, obj_init_arg,
socket_id, flags));
#else
return (rte_mempool_xmem_create(name, n, elt_size,
cache_size, private_data_size,
mp_init, mp_init_arg,
obj_init, obj_init_arg,
socket_id, flags,
NULL, NULL, MEMPOOL_PG_NUM_DEFAULT, MEMPOOL_PG_SHIFT_MAX));
#endif
}
/*
* Create the mempool over already allocated chunk of memory.
* That external memory buffer can consists of physically disjoint pages.
* Setting vaddr to NULL, makes mempool to fallback to original behaviour
* and allocate space for mempool and it's elements as one big chunk of
* physically continuos memory.
* */
struct rte_mempool *
rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags, void *vaddr,
const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift)
{
char mz_name[RTE_MEMZONE_NAMESIZE];
char rg_name[RTE_RING_NAMESIZE];
struct rte_mempool *mp = NULL;
struct rte_ring *r;
const struct rte_memzone *mz;
size_t mempool_size, total_elt_size;
size_t mempool_size;
int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
int rg_flags = 0;
uint32_t header_size, trailer_size;
unsigned i;
void *obj;
void *obj;
struct rte_mempool_objsz objsz;
/* compilation-time checks */
RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
@ -156,13 +412,26 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
#endif
/* check that we have an initialised tail queue */
if (RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_MEMPOOL, rte_mempool_list) == NULL) {
if (RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_MEMPOOL,
rte_mempool_list) == NULL) {
rte_errno = E_RTE_NO_TAILQ;
return NULL;
}
/* asked cache too big */
if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE){
if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) {
rte_errno = EINVAL;
return NULL;
}
/* check that we have both VA and PA */
if (vaddr != NULL && paddr == NULL) {
rte_errno = EINVAL;
return NULL;
}
/* Check that pg_num and pg_shift parameters are valid. */
if (pg_num < RTE_DIM(mp->elt_pa) || pg_shift > MEMPOOL_PG_SHIFT_MAX) {
rte_errno = EINVAL;
return NULL;
}
@ -177,6 +446,9 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
if (flags & MEMPOOL_F_SC_GET)
rg_flags |= RING_F_SC_DEQ;
/* calculate mempool object sizes. */
rte_mempool_calc_obj_size(elt_size, flags, &objsz);
rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK);
/* allocate the ring that will be used to store objects */
@ -189,53 +461,21 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
goto exit;
/*
* In header, we have at least the pointer to the pool, and
* optionaly a 64 bits cookie.
* reserve a memory zone for this mempool: private data is
* cache-aligned
*/
header_size = 0;
header_size += sizeof(struct rte_mempool *); /* ptr to pool */
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
header_size += sizeof(uint64_t); /* cookie */
#endif
if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0)
header_size = (header_size + CACHE_LINE_MASK) & (~CACHE_LINE_MASK);
/* trailer contains the cookie in debug mode */
trailer_size = 0;
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
trailer_size += sizeof(uint64_t); /* cookie */
#endif
/* element size is 8 bytes-aligned at least */
elt_size = (elt_size + 7) & (~7);
/* expand trailer to next cache line */
if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) {
total_elt_size = header_size + elt_size + trailer_size;
trailer_size += ((CACHE_LINE_SIZE -
(total_elt_size & CACHE_LINE_MASK)) &
CACHE_LINE_MASK);
}
/*
* increase trailer to add padding between objects in order to
* spread them accross memory channels/ranks
*/
if ((flags & MEMPOOL_F_NO_SPREAD) == 0) {
unsigned new_size;
new_size = optimize_object_size(header_size + elt_size +
trailer_size);
trailer_size = new_size - header_size - elt_size;
}
/* this is the size of an object, including header and trailer */
total_elt_size = header_size + elt_size + trailer_size;
/* reserve a memory zone for this mempool: private data is
* cache-aligned */
private_data_size = (private_data_size +
CACHE_LINE_MASK) & (~CACHE_LINE_MASK);
mempool_size = total_elt_size * n +
sizeof(struct rte_mempool) + private_data_size;
/*
* If user provided an external memory buffer, then use it to
* store mempool objects. Otherwise reserve memzone big enough to
* hold mempool header and metadata plus mempool objects.
*/
mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
if (vaddr == NULL)
mempool_size += (size_t)objsz.total_size * n;
rte_snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, name);
mz = rte_memzone_reserve(mz_name, mempool_size, socket_id, mz_flags);
@ -255,39 +495,42 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
mp->ring = r;
mp->size = n;
mp->flags = flags;
mp->elt_size = elt_size;
mp->header_size = header_size;
mp->trailer_size = trailer_size;
mp->elt_size = objsz.elt_size;
mp->header_size = objsz.header_size;
mp->trailer_size = objsz.trailer_size;
mp->cache_size = cache_size;
mp->cache_flushthresh = (uint32_t)(cache_size * CACHE_FLUSHTHRESH_MULTIPLIER);
mp->cache_flushthresh = (uint32_t)
(cache_size * CACHE_FLUSHTHRESH_MULTIPLIER);
mp->private_data_size = private_data_size;
/* calculate address of the first element for continuous mempool. */
obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
private_data_size;
/* populate address translation fields. */
mp->pg_num = pg_num;
mp->pg_shift = pg_shift;
mp->pg_mask = RTE_LEN2MASK(mp->pg_shift, typeof(mp->pg_mask));
/* mempool elements allocated together with mempool */
if (vaddr == NULL) {
mp->elt_va_start = (uintptr_t)obj;
mp->elt_pa[0] = mp->phys_addr +
(mp->elt_va_start - (uintptr_t)mp);
/* mempool elements in a separate chunk of memory. */
} else {
mp->elt_va_start = (uintptr_t)vaddr;
memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
}
mp->elt_va_end = mp->elt_va_start;
/* call the initializer */
if (mp_init)
mp_init(mp, mp_init_arg);
/* fill the headers and trailers, and add objects in ring */
obj = (char *)mp + sizeof(struct rte_mempool) + private_data_size;
for (i = 0; i < n; i++) {
struct rte_mempool **mpp;
obj = (char *)obj + header_size;
/* set mempool ptr in header */
mpp = __mempool_from_obj(obj);
*mpp = mp;
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
__mempool_write_header_cookie(obj, 1);
__mempool_write_trailer_cookie(obj);
#endif
/* call the initializer */
if (obj_init)
obj_init(mp, obj_init_arg, obj, i);
/* enqueue in ring */
rte_ring_sp_enqueue(mp->ring, obj);
obj = (char *)obj + elt_size + trailer_size;
}
mempool_populate(mp, n, 1, obj_init, obj_init_arg);
RTE_EAL_TAILQ_INSERT_TAIL(RTE_TAILQ_MEMPOOL, rte_mempool_list, mp);
@ -355,21 +598,56 @@ rte_mempool_dump_cache(const struct rte_mempool *mp)
#ifndef __INTEL_COMPILER
#pragma GCC diagnostic ignored "-Wcast-qual"
#endif
struct mempool_audit_arg {
const struct rte_mempool *mp;
uintptr_t obj_end;
uint32_t obj_num;
};
static void
mempool_obj_audit(void *arg, void *start, void *end, uint32_t idx)
{
struct mempool_audit_arg *pa = arg;
void *obj;
obj = (char *)start + pa->mp->header_size;
pa->obj_end = (uintptr_t)end;
pa->obj_num = idx + 1;
__mempool_check_cookies(pa->mp, &obj, 1, 2);
}
static void
mempool_audit_cookies(const struct rte_mempool *mp)
{
unsigned i;
void *obj;
void * const *obj_table;
uint32_t elt_sz, num;
struct mempool_audit_arg arg;
obj = (char *)mp + sizeof(struct rte_mempool) + mp->private_data_size;
for (i = 0; i < mp->size; i++) {
obj = (char *)obj + mp->header_size;
obj_table = &obj;
__mempool_check_cookies(mp, obj_table, 1, 2);
obj = (char *)obj + mp->elt_size + mp->trailer_size;
elt_sz = mp->elt_size + mp->header_size + mp->trailer_size;
arg.mp = mp;
arg.obj_end = mp->elt_va_start;
arg.obj_num = 0;
num = rte_mempool_obj_iter((void *)mp->elt_va_start,
mp->size, elt_sz, 1,
mp->elt_pa, mp->pg_num, mp->pg_shift,
mempool_obj_audit, &arg);
if (num != mp->size) {
rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) "
"iterated only over %u elements\n",
mp, mp->size, num);
} else if (arg.obj_end != mp->elt_va_end || arg.obj_num != mp->size) {
rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) "
"last callback va_end: %#tx (%#tx expeceted), "
"num of objects: %u (%u expected)\n",
mp, mp->size,
arg.obj_end, mp->elt_va_end,
arg.obj_num, mp->size);
}
}
#ifndef __INTEL_COMPILER
#pragma GCC diagnostic error "-Wcast-qual"
#endif
@ -422,6 +700,7 @@ rte_mempool_dump(const struct rte_mempool *mp)
printf("mempool <%s>@%p\n", mp->name, mp);
printf(" flags=%x\n", mp->flags);
printf(" ring=<%s>@%p\n", mp->ring->name, mp->ring);
printf(" phys_addr=0x%" PRIx64 "\n", mp->phys_addr);
printf(" size=%"PRIu32"\n", mp->size);
printf(" header_size=%"PRIu32"\n", mp->header_size);
printf(" elt_size=%"PRIu32"\n", mp->elt_size);
@ -429,6 +708,19 @@ rte_mempool_dump(const struct rte_mempool *mp)
printf(" total_obj_size=%"PRIu32"\n",
mp->header_size + mp->elt_size + mp->trailer_size);
printf(" private_data_size=%"PRIu32"\n", mp->private_data_size);
printf(" pg_num=%"PRIu32"\n", mp->pg_num);
printf(" pg_shift=%"PRIu32"\n", mp->pg_shift);
printf(" pg_mask=%#tx\n", mp->pg_mask);
printf(" elt_va_start=%#tx\n", mp->elt_va_start);
printf(" elt_va_end=%#tx\n", mp->elt_va_end);
printf(" elt_pa[0]=0x%" PRIx64 "\n", mp->elt_pa[0]);
if (mp->size != 0)
printf(" avg bytes/object=%#Lf\n",
(long double)(mp->elt_va_end - mp->elt_va_start) /
mp->size);
cache_count = rte_mempool_dump_cache(mp);
common_count = rte_ring_count(mp->ring);
if ((cache_count + common_count) > mp->size)

View File

@ -108,14 +108,36 @@ struct rte_mempool_cache {
} __rte_cache_aligned;
#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
struct rte_mempool_objsz {
uint32_t elt_size; /**< Size of an element. */
uint32_t header_size; /**< Size of header (before elt). */
uint32_t trailer_size; /**< Size of trailer (after elt). */
uint32_t total_size;
/**< Total size of an object (header + elt + trailer). */
};
#define RTE_MEMPOOL_NAMESIZE 32 /**< Maximum length of a memory pool. */
#define RTE_MEMPOOL_MZ_PREFIX "MP_"
/* "MP_<name>" */
#define RTE_MEMPOOL_MZ_FORMAT RTE_MEMPOOL_MZ_PREFIX "%s"
#ifdef RTE_LIBRTE_XEN_DOM0
/* "<name>_MP_elt" */
#define RTE_MEMPOOL_OBJ_NAME "%s_" RTE_MEMPOOL_MZ_PREFIX "elt"
#else
#define RTE_MEMPOOL_OBJ_NAME RTE_MEMPOOL_MZ_FORMAT
#endif /* RTE_LIBRTE_XEN_DOM0 */
#define MEMPOOL_PG_SHIFT_MAX (sizeof(uintptr_t) * CHAR_BIT - 1)
/** Mempool over one chunk of physically continuous memory */
#define MEMPOOL_PG_NUM_DEFAULT 1
/**
* The RTE mempool structure.
*/
@ -128,7 +150,8 @@ struct rte_mempool {
int flags; /**< Flags of the mempool. */
uint32_t size; /**< Size of the mempool. */
uint32_t cache_size; /**< Size of per-lcore local cache. */
uint32_t cache_flushthresh; /**< Threshold before we flush excess elements. */
uint32_t cache_flushthresh;
/**< Threshold before we flush excess elements. */
uint32_t elt_size; /**< Size of an element. */
uint32_t header_size; /**< Size of header (before elt). */
@ -145,6 +168,20 @@ struct rte_mempool {
/** Per-lcore statistics. */
struct rte_mempool_debug_stats stats[RTE_MAX_LCORE];
#endif
/* Address translation support, starts from next cache line. */
/** Number of elements in the elt_pa array. */
uint32_t pg_num __rte_cache_aligned;
uint32_t pg_shift; /**< LOG2 of the physical pages. */
uintptr_t pg_mask; /**< physical page mask value. */
uintptr_t elt_va_start;
/**< Virtual address of the first mempool object. */
uintptr_t elt_va_end;
/**< Virtual address of the <size + 1> mempool object. */
phys_addr_t elt_pa[MEMPOOL_PG_NUM_DEFAULT];
/**< Array of physical pages addresses for the mempool objects buffer. */
} __rte_cache_aligned;
#define MEMPOOL_F_NO_SPREAD 0x0001 /**< Do not spread in memory. */
@ -171,6 +208,24 @@ struct rte_mempool {
#define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
#endif
/**
* Calculates size of the mempool header.
* @param mp
* Pointer to the memory pool.
* @param pgn
* Number of page used to store mempool objects.
*/
#define MEMPOOL_HEADER_SIZE(mp, pgn) (sizeof(*(mp)) + \
RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
sizeof ((mp)->elt_pa[0]), CACHE_LINE_SIZE))
/**
* Returns TRUE if whole mempool is allocated in one contiguous block of memory.
*/
#define MEMPOOL_IS_CONTIG(mp) \
((mp)->pg_num == MEMPOOL_PG_NUM_DEFAULT && \
(mp)->phys_addr == (mp)->elt_pa[0])
/**
* @internal Get a pointer to a mempool pointer in the object header.
* @param obj
@ -331,6 +386,49 @@ static inline void __mempool_check_cookies(const struct rte_mempool *mp,
#define __mempool_check_cookies(mp, obj_table_const, n, free) do {} while(0)
#endif /* RTE_LIBRTE_MEMPOOL_DEBUG */
/**
* An mempool's object iterator callback function.
*/
typedef void (*rte_mempool_obj_iter_t)(void * /*obj_iter_arg*/,
void * /*obj_start*/,
void * /*obj_end*/,
uint32_t /*obj_index */);
/*
* Iterates across objects of the given size and alignment in the
* provided chunk of memory. The given memory buffer can consist of
* disjoint physical pages.
* For each object calls the provided callback (if any).
* Used to populate mempool, walk through all elements of the mempool,
* estimate how many elements of the given size could be created in the given
* memory buffer.
* @param vaddr
* Virtual address of the memory buffer.
* @param elt_num
* Maximum number of objects to iterate through.
* @param elt_sz
* Size of each object.
* @param paddr
* Array of phyiscall addresses of the pages that comprises given memory
* buffer.
* @param pg_num
* Number of elements in the paddr array.
* @param pg_shift
* LOG2 of the physical pages size.
* @param obj_iter
* Object iterator callback function (could be NULL).
* @param obj_iter_arg
* User defined Prameter for the object iterator callback function.
*
* @return
* Number of objects iterated through.
*/
uint32_t rte_mempool_obj_iter(void *vaddr,
uint32_t elt_num, size_t elt_sz, size_t align,
const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg);
/**
* An object constructor callback function for mempool.
*
@ -354,6 +452,8 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *);
*
* This function uses ``memzone_reserve()`` to allocate memory. The
* pool contains n elements of elt_size. Its size is set to n.
* All elements of the mempool are allocated together with the mempool header,
* in one physically continuous chunk of memory.
*
* @param name
* The name of the mempool.
@ -436,6 +536,199 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags);
/**
* Creates a new mempool named *name* in memory.
*
* This function uses ``memzone_reserve()`` to allocate memory. The
* pool contains n elements of elt_size. Its size is set to n.
* Depending on the input parameters, mempool elements can be either allocated
* together with the mempool header, or an externally provided memory buffer
* could be used to store mempool objects. In later case, that external
* memory buffer can consist of set of disjoint phyiscal pages.
*
* @param name
* The name of the mempool.
* @param n
* The number of elements in the mempool. The optimum size (in terms of
* memory usage) for a mempool is when n is a power of two minus one:
* n = (2^q - 1).
* @param elt_size
* The size of each element.
* @param cache_size
* If cache_size is non-zero, the rte_mempool library will try to
* limit the accesses to the common lockless pool, by maintaining a
* per-lcore object cache. This argument must be lower or equal to
* CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose
* cache_size to have "n modulo cache_size == 0": if this is
* not the case, some elements will always stay in the pool and will
* never be used. The access to the per-lcore table is of course
* faster than the multi-producer/consumer pool. The cache can be
* disabled if the cache_size argument is set to 0; it can be useful to
* avoid loosing objects in cache. Note that even if not used, the
* memory space for cache is always reserved in a mempool structure,
* except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0.
* @param private_data_size
* The size of the private data appended after the mempool
* structure. This is useful for storing some private data after the
* mempool structure, as is done for rte_mbuf_pool for example.
* @param mp_init
* A function pointer that is called for initialization of the pool,
* before object initialization. The user can initialize the private
* data in this function if needed. This parameter can be NULL if
* not needed.
* @param mp_init_arg
* An opaque pointer to data that can be used in the mempool
* constructor function.
* @param obj_init
* A function pointer that is called for each object at
* initialization of the pool. The user can set some meta data in
* objects if needed. This parameter can be NULL if not needed.
* The obj_init() function takes the mempool pointer, the init_arg,
* the object pointer and the object number as parameters.
* @param obj_init_arg
* An opaque pointer to data that can be used as an argument for
* each call to the object constructor function.
* @param socket_id
* The *socket_id* argument is the socket identifier in the case of
* NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
* constraint for the reserved zone.
* @param flags
* The *flags* arguments is an OR of following flags:
* - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread
* between channels in RAM: the pool allocator will add padding
* between objects depending on the hardware configuration. See
* Memory alignment constraints for details. If this flag is set,
* the allocator will just align them to a cache line.
* - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are
* cache-aligned. This flag removes this constraint, and no
* padding will be present between objects. This flag implies
* MEMPOOL_F_NO_SPREAD.
* - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior
* when using rte_mempool_put() or rte_mempool_put_bulk() is
* "single-producer". Otherwise, it is "multi-producers".
* - MEMPOOL_F_SC_GET: If this flag is set, the default behavior
* when using rte_mempool_get() or rte_mempool_get_bulk() is
* "single-consumer". Otherwise, it is "multi-consumers".
* @param vaddr
* Virtual address of the externally allocated memory buffer.
* Will be used to store mempool objects.
* @param paddr
* Array of phyiscall addresses of the pages that comprises given memory
* buffer.
* @param pg_num
* Number of elements in the paddr array.
* @param pg_shift
* LOG2 of the physical pages size.
* @return
* The pointer to the new allocated mempool, on success. NULL on error
* with rte_errno set appropriately. Possible rte_errno values include:
* - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
* - E_RTE_SECONDARY - function was called from a secondary process instance
* - E_RTE_NO_TAILQ - no tailq list could be got for the ring or mempool list
* - EINVAL - cache size provided is too large
* - ENOSPC - the maximum number of memzones has already been allocated
* - EEXIST - a memzone with the same name already exists
* - ENOMEM - no appropriate memory area found in which to create memzone
*/
struct rte_mempool *
rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags, void *vaddr,
const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift);
#ifdef RTE_LIBRTE_XEN_DOM0
/**
* Creates a new mempool named *name* in memory on Xen Dom0.
*
* This function uses ``rte_mempool_xmem_create()`` to allocate memory. The
* pool contains n elements of elt_size. Its size is set to n.
* All elements of the mempool are allocated together with the mempool header,
* and memory buffer can consist of set of disjoint phyiscal pages.
*
* @param name
* The name of the mempool.
* @param n
* The number of elements in the mempool. The optimum size (in terms of
* memory usage) for a mempool is when n is a power of two minus one:
* n = (2^q - 1).
* @param elt_size
* The size of each element.
* @param cache_size
* If cache_size is non-zero, the rte_mempool library will try to
* limit the accesses to the common lockless pool, by maintaining a
* per-lcore object cache. This argument must be lower or equal to
* CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose
* cache_size to have "n modulo cache_size == 0": if this is
* not the case, some elements will always stay in the pool and will
* never be used. The access to the per-lcore table is of course
* faster than the multi-producer/consumer pool. The cache can be
* disabled if the cache_size argument is set to 0; it can be useful to
* avoid loosing objects in cache. Note that even if not used, the
* memory space for cache is always reserved in a mempool structure,
* except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0.
* @param private_data_size
* The size of the private data appended after the mempool
* structure. This is useful for storing some private data after the
* mempool structure, as is done for rte_mbuf_pool for example.
* @param mp_init
* A function pointer that is called for initialization of the pool,
* before object initialization. The user can initialize the private
* data in this function if needed. This parameter can be NULL if
* not needed.
* @param mp_init_arg
* An opaque pointer to data that can be used in the mempool
* constructor function.
* @param obj_init
* A function pointer that is called for each object at
* initialization of the pool. The user can set some meta data in
* objects if needed. This parameter can be NULL if not needed.
* The obj_init() function takes the mempool pointer, the init_arg,
* the object pointer and the object number as parameters.
* @param obj_init_arg
* An opaque pointer to data that can be used as an argument for
* each call to the object constructor function.
* @param socket_id
* The *socket_id* argument is the socket identifier in the case of
* NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
* constraint for the reserved zone.
* @param flags
* The *flags* arguments is an OR of following flags:
* - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread
* between channels in RAM: the pool allocator will add padding
* between objects depending on the hardware configuration. See
* Memory alignment constraints for details. If this flag is set,
* the allocator will just align them to a cache line.
* - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are
* cache-aligned. This flag removes this constraint, and no
* padding will be present between objects. This flag implies
* MEMPOOL_F_NO_SPREAD.
* - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior
* when using rte_mempool_put() or rte_mempool_put_bulk() is
* "single-producer". Otherwise, it is "multi-producers".
* - MEMPOOL_F_SC_GET: If this flag is set, the default behavior
* when using rte_mempool_get() or rte_mempool_get_bulk() is
* "single-consumer". Otherwise, it is "multi-consumers".
* @return
* The pointer to the new allocated mempool, on success. NULL on error
* with rte_errno set appropriately. Possible rte_errno values include:
* - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
* - E_RTE_SECONDARY - function was called from a secondary process instance
* - E_RTE_NO_TAILQ - no tailq list could be got for the ring or mempool list
* - EINVAL - cache size provided is too large
* - ENOSPC - the maximum number of memzones has already been allocated
* - EEXIST - a memzone with the same name already exists
* - ENOMEM - no appropriate memory area found in which to create memzone
*/
struct rte_mempool *
rte_dom0_mempool_create(const char *name, unsigned n, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags);
#endif
/**
* Dump the status of the mempool to the console.
*
@ -959,13 +1252,13 @@ rte_mempool_empty(const struct rte_mempool *mp)
* @return
* The physical address of the elt element.
*/
static inline phys_addr_t rte_mempool_virt2phy(const struct rte_mempool *mp,
const void *elt)
static inline phys_addr_t
rte_mempool_virt2phy(const struct rte_mempool *mp, const void *elt)
{
uintptr_t off;
off = (const char *)elt - (const char *)mp;
return mp->phys_addr + off;
off = (const char *)elt - (const char *)mp->elt_va_start;
return (mp->elt_pa[off >> mp->pg_shift] + (off & mp->pg_mask));
}
@ -991,7 +1284,7 @@ void rte_mempool_audit(const struct rte_mempool *mp);
*/
static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
{
return (char *)mp + sizeof(struct rte_mempool);
return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
}
/**
@ -1005,13 +1298,73 @@ void rte_mempool_list_dump(void);
* @param name
* The name of the mempool.
* @return
* The pointer to the mempool matching the name, or NULL if not found.NULL on error
* The pointer to the mempool matching the name, or NULL if not found.
* NULL on error
* with rte_errno set appropriately. Possible rte_errno values include:
* - ENOENT - required entry not available to return.
*
*/
struct rte_mempool *rte_mempool_lookup(const char *name);
/**
* Given a desired size of the mempool element and mempool flags,
* caluclates header, trailer, body and total sizes of the mempool object.
* @param elt_size
* The size of each element.
* @param flags
* The flags used for the mempool creation.
* Consult rte_mempool_create() for more information about possible values.
* The size of each element.
* @return
* Total size of the mempool object.
*/
uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
struct rte_mempool_objsz *sz);
/**
* Calculate maximum amount of memory required to store given number of objects.
* Assumes that the memory buffer will be alligned at page boundary.
* Note, that if object size is bigger then page size, then it assumes that
* we have a subsets of physically continuous pages big enough to store
* at least one object.
* @param elt_num
* Number of elements.
* @param elt_sz
* The size of each element.
* @param pg_shift
* LOG2 of the physical pages size.
* @return
* Required memory size aligned at page boundary.
*/
size_t rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz,
uint32_t pg_shift);
/**
* Calculate how much memory would be actually required with the given
* memory footprint to store required number of objects.
* @param vaddr
* Virtual address of the externally allocated memory buffer.
* Will be used to store mempool objects.
* @param elt_num
* Number of elements.
* @param elt_sz
* The size of each element.
* @param paddr
* Array of phyiscall addresses of the pages that comprises given memory
* buffer.
* @param pg_num
* Number of elements in the paddr array.
* @param pg_shift
* LOG2 of the physical pages size.
* @return
* Number of bytes needed to store given number of objects,
* aligned to the given page size.
* If provided memory buffer is not big enough:
* (-1) * actual number of elemnts that can be stored in that buffer.
*/
ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz,
const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift);
#ifdef __cplusplus
}
#endif

View File

@ -1101,7 +1101,12 @@ ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
if ((mz = rte_memzone_lookup(z_name)) != 0)
return (mz);
#ifdef RTE_LIBRTE_XEN_DOM0
return rte_memzone_reserve_bounded(z_name, ring_size,
socket_id, 0, CACHE_LINE_SIZE, RTE_PGSIZE_2M);
#else
return rte_memzone_reserve(z_name, ring_size, socket_id, 0);
#endif
}
static void
@ -1277,7 +1282,11 @@ eth_em_tx_queue_setup(struct rte_eth_dev *dev,
txq->port_id = dev->data->port_id;
txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(queue_idx));
#ifndef RTE_LIBRTE_XEN_DOM0
txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
#else
txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
#endif
txq->tx_ring = (struct e1000_data_desc *) tz->addr;
PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
@ -1404,8 +1413,12 @@ eth_em_rx_queue_setup(struct rte_eth_dev *dev,
0 : ETHER_CRC_LEN);
rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(queue_idx));
rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(queue_idx));
rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(queue_idx));
#ifndef RTE_LIBRTE_XEN_DOM0
rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
#else
rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
#endif
rxq->rx_ring = (struct e1000_rx_desc *) rz->addr;
PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",

View File

@ -1086,8 +1086,13 @@ ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
if (mz)
return mz;
#ifdef RTE_LIBRTE_XEN_DOM0
return rte_memzone_reserve_bounded(z_name, ring_size,
socket_id, 0, IGB_ALIGN, RTE_PGSIZE_2M);
#else
return rte_memzone_reserve_aligned(z_name, ring_size,
socket_id, 0, IGB_ALIGN);
#endif
}
static void
@ -1240,9 +1245,12 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
txq->port_id = dev->data->port_id;
txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
#ifndef RTE_LIBRTE_XEN_DOM0
txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
#else
txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
#endif
txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
/* Allocate software ring */
txq->sw_ring = rte_zmalloc("txq->sw_ring",
sizeof(struct igb_tx_entry) * nb_desc,
@ -1372,7 +1380,11 @@ eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
}
rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
#ifndef RTE_LIBRTE_XEN_DOM0
rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
#else
rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
#endif
rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
/* Allocate software ring. */
@ -1838,8 +1850,7 @@ eth_igb_rx_init(struct rte_eth_dev *dev)
/*
* Configure RX buffer size.
*/
mbp_priv = (struct rte_pktmbuf_pool_private *)
((char *)rxq->mb_pool + sizeof(struct rte_mempool));
mbp_priv = rte_mempool_get_priv(rxq->mb_pool);
buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
RTE_PKTMBUF_HEADROOM);
if (buf_size >= 1024) {
@ -2093,8 +2104,7 @@ eth_igbvf_rx_init(struct rte_eth_dev *dev)
/*
* Configure RX buffer size.
*/
mbp_priv = (struct rte_pktmbuf_pool_private *)
((char *)rxq->mb_pool + sizeof(struct rte_mempool));
mbp_priv = rte_mempool_get_priv(rxq->mb_pool);
buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
RTE_PKTMBUF_HEADROOM);
if (buf_size >= 1024) {

View File

@ -1758,8 +1758,13 @@ ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
if (mz)
return mz;
#ifdef RTE_LIBRTE_XEN_DOM0
return rte_memzone_reserve_bounded(z_name, ring_size,
socket_id, 0, IXGBE_ALIGN, RTE_PGSIZE_2M);
#else
return rte_memzone_reserve_aligned(z_name, ring_size,
socket_id, 0, IXGBE_ALIGN);
socket_id, 0, IXGBE_ALIGN);
#endif
}
static void
@ -1971,8 +1976,11 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
else
txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
#ifndef RTE_LIBRTE_XEN_DOM0
txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
#else
txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
#endif
txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
/* Allocate software ring */
@ -2221,8 +2229,11 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
rxq->rdh_reg_addr =
IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
}
#ifndef RTE_LIBRTE_XEN_DOM0
rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
#else
rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
#endif
rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
/*
@ -3440,8 +3451,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
* The value is in 1 KB resolution. Valid values can be from
* 1 KB to 16 KB.
*/
mbp_priv = (struct rte_pktmbuf_pool_private *)
((char *)rxq->mb_pool + sizeof(struct rte_mempool));
mbp_priv = rte_mempool_get_priv(rxq->mb_pool);
buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
RTE_PKTMBUF_HEADROOM);
srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
@ -3712,8 +3722,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
* The value is in 1 KB resolution. Valid values can be from
* 1 KB to 16 KB.
*/
mbp_priv = (struct rte_pktmbuf_pool_private *)
((char *)rxq->mb_pool + sizeof(struct rte_mempool));
mbp_priv = rte_mempool_get_priv(rxq->mb_pool);
buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
RTE_PKTMBUF_HEADROOM);
srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &

View File

@ -140,8 +140,7 @@ eth_pcap_rx(void *queue,
break;
/* Now get the space available for data in the mbuf */
mbp_priv = (struct rte_pktmbuf_pool_private *)
((char *)pcap_q->mb_pool + sizeof(struct rte_mempool));
mbp_priv = rte_mempool_get_priv(pcap_q->mb_pool);
buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
RTE_PKTMBUF_HEADROOM);