xen: import xenvirt pmd and vhost_xen

This provides a para-virtualization packet switching solution, based on the
Xen hypervisor’s Grant Table, which provides simple and fast packet
switching capability between guest domains and host domain based on
MAC address or VLAN tag.

This solution is comprised of two components; a Poll Mode Driver (PMD)
as the front end in the guest domain and a switching back end in the
host domain.  XenStore is used to exchange configure information
between the PMD front end and switching back end,
including grant reference IDs for shared Virtio RX/TX rings, MAC
address, device state, and so on.

The front end PMD can be found in the Intel DPDK directory lib/
librte_pmd_xenvirt and back end example in examples/vhost_xen.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
This commit is contained in:
Bruce Richardson 2014-02-12 15:50:11 +00:00 committed by David Marchand
parent 148f963fb5
commit 47bd46112b
20 changed files with 5373 additions and 4 deletions

View File

@ -0,0 +1,51 @@
# BSD LICENSE
#
# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ifeq ($(RTE_SDK),)
$(error "Please define RTE_SDK environment variable")
endif
# Default target, can be overriden by command line or environment
RTE_TARGET ?= x86_64-default-linuxapp-gcc
include $(RTE_SDK)/mk/rte.vars.mk
# binary name
APP = vhost-switch
# all source are stored in SRCS-y
SRCS-y := main.c vhost_monitor.c xenstore_parse.c
CFLAGS += -O2 -I/usr/local/include -D_FILE_OFFSET_BITS=64 -Wno-unused-parameter
CFLAGS += $(WERROR_FLAGS)
LDFLAGS += -lxenstore
include $(RTE_SDK)/mk/rte.extapp.mk

1541
examples/vhost_xen/main.c Normal file

File diff suppressed because it is too large Load Diff

85
examples/vhost_xen/main.h Normal file
View File

@ -0,0 +1,85 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _MAIN_H_
#define _MAIN_H_
#ifdef RTE_EXEC_ENV_BAREMETAL
#define MAIN _main
#else
#define MAIN main
#endif
//#define DEBUG
#ifdef DEBUG
#define LOG_LEVEL RTE_LOG_DEBUG
#define LOG_DEBUG(log_type, fmt, args...) \
RTE_LOG(DEBUG, log_type, fmt, ##args)
#else
#define LOG_LEVEL RTE_LOG_INFO
#define LOG_DEBUG(log_type, fmt, args...) do{} while(0)
#endif
/* Macros for printing using RTE_LOG */
#define RTE_LOGTYPE_CONFIG RTE_LOGTYPE_USER1
#define RTE_LOGTYPE_DATA RTE_LOGTYPE_USER2
#define RTE_LOGTYPE_PORT RTE_LOGTYPE_USER3
/*
* Device linked list structure for data path.
*/
struct virtio_net_data_ll
{
struct virtio_net *dev; /* Pointer to device created by configuration core. */
struct virtio_net_data_ll *next; /* Pointer to next device in linked list. */
};
/*
* Structure containing data core specific information.
*/
struct lcore_ll_info
{
struct virtio_net_data_ll *ll_root_free; /* Pointer to head in free linked list. */
struct virtio_net_data_ll *ll_root_used; /* Pointer to head of used linked list. */
uint32_t device_num; /* Number of devices on lcore. */
volatile uint8_t dev_removal_flag; /* Flag to synchronize device removal. */
};
struct lcore_info
{
struct lcore_ll_info *lcore_ll; /* Pointer to data core specific lcore_ll_info struct */
};
int MAIN(int argc, char **argv);
#endif /* _MAIN_H_ */

View File

@ -0,0 +1,595 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdlib.h>
#include <stdio.h>
#include <dirent.h>
#include <unistd.h>
#include <sys/eventfd.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <xen/xen-compat.h>
#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
#include <xs.h>
#else
#include <xenstore.h>
#endif
#include <linux/virtio_ring.h>
#include <linux/virtio_pci.h>
#include <linux/virtio_net.h>
#include <rte_ethdev.h>
#include <rte_log.h>
#include <rte_malloc.h>
#include <rte_string_fns.h>
#include "virtio-net.h"
#include "xen_vhost.h"
struct virtio_watch {
struct xs_handle *xs;
int watch_fd;
};
/* device ops to add/remove device to/from data core. */
static struct virtio_net_device_ops const *notify_ops;
/* root address of the linked list in the configuration core. */
static struct virtio_net_config_ll *ll_root = NULL;
/* root address of VM. */
static struct xen_guestlist guest_root;
static struct virtio_watch watch;
static void
vq_vring_init(struct vhost_virtqueue *vq, unsigned int num, uint8_t *p,
unsigned long align)
{
vq->size = num;
vq->desc = (struct vring_desc *) p;
vq->avail = (struct vring_avail *) (p +
num * sizeof(struct vring_desc));
vq->used = (void *)
RTE_ALIGN_CEIL( (uintptr_t)(&vq->avail->ring[num]), align);
}
static int
init_watch(void)
{
struct xs_handle *xs;
int ret;
int fd;
/* get a connection to the daemon */
xs = xs_daemon_open();
if (xs == NULL) {
RTE_LOG(ERR, XENHOST, "xs_daemon_open failed\n");
return (-1);
}
ret = xs_watch(xs, "/local/domain", "mytoken");
if (ret == 0) {
RTE_LOG(ERR, XENHOST, "%s: xs_watch failed\n", __func__);
xs_daemon_close(xs);
return (-1);
}
/* We are notified of read availability on the watch via the file descriptor. */
fd = xs_fileno(xs);
watch.xs = xs;
watch.watch_fd = fd;
TAILQ_INIT(&guest_root);
return 0;
}
static struct xen_guest *
get_xen_guest(int dom_id)
{
struct xen_guest *guest = NULL;
TAILQ_FOREACH(guest, &guest_root, next) {
if(guest->dom_id == dom_id)
return guest;
}
return (NULL);
}
static struct xen_guest *
add_xen_guest(int32_t dom_id)
{
struct xen_guest *guest = NULL;
if ((guest = get_xen_guest(dom_id)) != NULL)
return guest;
guest = (struct xen_guest * )calloc(1, sizeof(struct xen_guest));
if (guest) {
RTE_LOG(ERR, XENHOST, " %s: return newly created guest with %d rings\n", __func__, guest->vring_num);
TAILQ_INSERT_TAIL(&guest_root, guest, next);
guest->dom_id = dom_id;
}
return guest;
}
static void
cleanup_device(struct virtio_net_config_ll *ll_dev)
{
if (ll_dev == NULL)
return;
if (ll_dev->dev.virtqueue_rx) {
rte_free(ll_dev->dev.virtqueue_rx);
ll_dev->dev.virtqueue_rx = NULL;
}
if (ll_dev->dev.virtqueue_tx) {
rte_free(ll_dev->dev.virtqueue_tx);
ll_dev->dev.virtqueue_tx = NULL;
}
free(ll_dev);
}
/*
* Add entry containing a device to the device configuration linked list.
*/
static void
add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
{
struct virtio_net_config_ll *ll_dev = ll_root;
/* If ll_dev == NULL then this is the first device so go to else */
if (ll_dev) {
/* If the 1st device_id != 0 then we insert our device here. */
if (ll_dev->dev.device_fh != 0) {
new_ll_dev->dev.device_fh = 0;
new_ll_dev->next = ll_dev;
ll_root = new_ll_dev;
} else {
/* increment through the ll until we find un unused device_id,
* insert the device at that entry
*/
while ((ll_dev->next != NULL) && (ll_dev->dev.device_fh == (ll_dev->next->dev.device_fh - 1)))
ll_dev = ll_dev->next;
new_ll_dev->dev.device_fh = ll_dev->dev.device_fh + 1;
new_ll_dev->next = ll_dev->next;
ll_dev->next = new_ll_dev;
}
} else {
ll_root = new_ll_dev;
ll_root->dev.device_fh = 0;
}
}
/*
* Remove an entry from the device configuration linked list.
*/
static struct virtio_net_config_ll *
rm_config_ll_entry(struct virtio_net_config_ll *ll_dev, struct virtio_net_config_ll *ll_dev_last)
{
/* First remove the device and then clean it up. */
if (ll_dev == ll_root) {
ll_root = ll_dev->next;
cleanup_device(ll_dev);
return ll_root;
} else {
ll_dev_last->next = ll_dev->next;
cleanup_device(ll_dev);
return ll_dev_last->next;
}
}
/*
* Retrieves an entry from the devices configuration linked list.
*/
static struct virtio_net_config_ll *
get_config_ll_entry(unsigned int virtio_idx, unsigned int dom_id)
{
struct virtio_net_config_ll *ll_dev = ll_root;
/* Loop through linked list until the dom_id is found. */
while (ll_dev != NULL) {
if (ll_dev->dev.dom_id == dom_id && ll_dev->dev.virtio_idx == virtio_idx)
return ll_dev;
ll_dev = ll_dev->next;
}
return NULL;
}
/*
* Initialise all variables in device structure.
*/
static void
init_dev(struct virtio_net *dev)
{
RTE_SET_USED(dev);
}
static struct
virtio_net_config_ll *new_device(unsigned int virtio_idx, struct xen_guest *guest)
{
struct virtio_net_config_ll *new_ll_dev;
struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
size_t size, vq_ring_size, vq_size = VQ_DESC_NUM;
void *vq_ring_virt_mem;
uint64_t gpa;
uint32_t i;
/* Setup device and virtqueues. */
new_ll_dev = calloc(1, sizeof(struct virtio_net_config_ll));
virtqueue_rx = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), CACHE_LINE_SIZE);
virtqueue_tx = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), CACHE_LINE_SIZE);
if (new_ll_dev == NULL || virtqueue_rx == NULL || virtqueue_tx == NULL)
goto err;
new_ll_dev->dev.virtqueue_rx = virtqueue_rx;
new_ll_dev->dev.virtqueue_tx = virtqueue_tx;
new_ll_dev->dev.dom_id = guest->dom_id;
new_ll_dev->dev.virtio_idx = virtio_idx;
/* Initialise device and virtqueues. */
init_dev(&new_ll_dev->dev);
size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
(void)vq_ring_size;
vq_ring_virt_mem = guest->vring[virtio_idx].rxvring_addr;
vq_vring_init(virtqueue_rx, vq_size, vq_ring_virt_mem, VIRTIO_PCI_VRING_ALIGN);
virtqueue_rx->size = vq_size;
virtqueue_rx->vhost_hlen = sizeof(struct virtio_net_hdr);
vq_ring_virt_mem = guest->vring[virtio_idx].txvring_addr;
vq_vring_init(virtqueue_tx, vq_size, vq_ring_virt_mem, VIRTIO_PCI_VRING_ALIGN);
virtqueue_tx->size = vq_size;
memcpy(&new_ll_dev->dev.mac_address, &guest->vring[virtio_idx].addr, sizeof(struct ether_addr));
/* virtio_memory has to be one per domid */
new_ll_dev->dev.mem = malloc(sizeof(struct virtio_memory) + sizeof(struct virtio_memory_regions) * MAX_XENVIRT_MEMPOOL);
new_ll_dev->dev.mem->nregions = guest->pool_num;
for (i = 0; i < guest->pool_num; i++) {
gpa = new_ll_dev->dev.mem->regions[i].guest_phys_address = (uint64_t)guest->mempool[i].gva;
new_ll_dev->dev.mem->regions[i].guest_phys_address_end = gpa + guest->mempool[i].mempfn_num * getpagesize();
new_ll_dev->dev.mem->regions[i].address_offset = (uint64_t)guest->mempool[i].hva - gpa;
}
new_ll_dev->next = NULL;
/* Add entry to device configuration linked list. */
add_config_ll_entry(new_ll_dev);
return new_ll_dev;
err:
if (new_ll_dev)
free(new_ll_dev);
if (virtqueue_rx)
rte_free(virtqueue_rx);
if (virtqueue_tx)
rte_free(virtqueue_tx);
return NULL;
}
static void
destroy_guest(struct xen_guest *guest)
{
uint32_t i;
for (i = 0; i < guest->vring_num; i++)
cleanup_vring(&guest->vring[i]);
/* clean mempool */
for (i = 0; i < guest->pool_num; i++)
cleanup_mempool(&guest->mempool[i]);
free(guest);
return;
}
/*
* This function will cleanup the device and remove it from device configuration linked list.
*/
static void
destroy_device(unsigned int virtio_idx, unsigned int dom_id)
{
struct virtio_net_config_ll *ll_dev_cur_ctx, *ll_dev_last = NULL;
struct virtio_net_config_ll *ll_dev_cur = ll_root;
/* clean virtio device */
struct xen_guest *guest = NULL;
guest = get_xen_guest(dom_id);
if (guest == NULL)
return;
/* Find the linked list entry for the device to be removed. */
ll_dev_cur_ctx = get_config_ll_entry(virtio_idx, dom_id);
while (ll_dev_cur != NULL) {
/* If the device is found or a device that doesn't exist is found then it is removed. */
if (ll_dev_cur == ll_dev_cur_ctx) {
if ((ll_dev_cur->dev.flags & VIRTIO_DEV_RUNNING))
notify_ops->destroy_device(&(ll_dev_cur->dev));
ll_dev_cur = rm_config_ll_entry(ll_dev_cur, ll_dev_last);
} else {
ll_dev_last = ll_dev_cur;
ll_dev_cur = ll_dev_cur->next;
}
}
RTE_LOG(INFO, XENHOST, " %s guest:%p vring:%p rxvring:%p txvring:%p flag:%p\n",
__func__, guest, &guest->vring[virtio_idx], guest->vring[virtio_idx].rxvring_addr, guest->vring[virtio_idx].txvring_addr, guest->vring[virtio_idx].flag);
cleanup_vring(&guest->vring[virtio_idx]);
guest->vring[virtio_idx].removed = 1;
guest->vring_num -= 1;
}
static void
watch_unmap_event(void)
{
int i;
struct xen_guest *guest = NULL;
bool remove_request;
TAILQ_FOREACH(guest, &guest_root, next) {
for (i = 0; i < MAX_VIRTIO; i++) {
if (guest->vring[i].dom_id && guest->vring[i].removed == 0 && *guest->vring[i].flag == 0) {
RTE_LOG(INFO, XENHOST, "\n\n");
RTE_LOG(INFO, XENHOST, " #####%s: (%d, %d) to be removed\n",
__func__,
guest->vring[i].dom_id,
i);
destroy_device(i, guest->dom_id);
RTE_LOG(INFO, XENHOST, " %s: DOM %u, vring num: %d\n",
__func__,
guest->dom_id,
guest->vring_num);
}
}
}
_find_next_remove:
guest = NULL;
remove_request = false;
TAILQ_FOREACH(guest, &guest_root, next) {
if (guest->vring_num == 0) {
remove_request = true;
break;
}
}
if (remove_request == true) {
TAILQ_REMOVE(&guest_root, guest, next);
RTE_LOG(INFO, XENHOST, " #####%s: destroy guest (%d)\n", __func__, guest->dom_id);
destroy_guest(guest);
goto _find_next_remove;
}
return;
}
/*
* OK, if the guest starts first, it is ok.
* if host starts first, it is ok.
* if guest starts, and has run for sometime, and host stops and restarts,
* then last_used_idx 0? how to solve this. */
static void virtio_init(void)
{
uint32_t len, e_num;
uint32_t i,j;
char **dom;
char *status;
int dom_id;
char path[PATH_MAX];
char node[PATH_MAX];
xs_transaction_t th;
struct xen_guest *guest;
struct virtio_net_config_ll *net_config;
char *end;
int val;
/* init env for watch the node */
if (init_watch() < 0)
return;
dom = xs_directory(watch.xs, XBT_NULL, "/local/domain", &e_num);
for (i = 0; i < e_num; i++) {
errno = 0;
dom_id = strtol(dom[i], &end, 0);
if (errno != 0 || end == NULL || dom_id == 0)
continue;
for (j = 0; j < RTE_MAX_ETHPORTS; j++) {
rte_snprintf(node, PATH_MAX, "%s%d", VIRTIO_START, j);
rte_snprintf(path, PATH_MAX, XEN_VM_NODE_FMT,
dom_id, node);
th = xs_transaction_start(watch.xs);
status = xs_read(watch.xs, th, path, &len);
xs_transaction_end(watch.xs, th, false);
if (status == NULL)
break;
/* if there's any valid virtio device */
errno = 0;
val = strtol(status, &end, 0);
if (errno != 0 || end == NULL || dom_id == 0)
val = 0;
if (val == 1) {
guest = add_xen_guest(dom_id);
if (guest == NULL)
continue;
RTE_LOG(INFO, XENHOST, " there's a new virtio existed, new a virtio device\n\n");
RTE_LOG(INFO, XENHOST, " parse_vringnode dom_id %d virtioidx %d\n",dom_id,j);
if (parse_vringnode(guest, j)) {
RTE_LOG(ERR, XENHOST, " there is invalid information in xenstore\n");
TAILQ_REMOVE(&guest_root, guest, next);
destroy_guest(guest);
continue;
}
/*if pool_num > 0, then mempool has already been parsed*/
if (guest->pool_num == 0 && parse_mempoolnode(guest)) {
RTE_LOG(ERR, XENHOST, " there is error information in xenstore\n");
TAILQ_REMOVE(&guest_root, guest, next);
destroy_guest(guest);
continue;
}
net_config = new_device(j, guest);
/* every thing is ready now, added into data core */
notify_ops->new_device(&net_config->dev);
}
}
}
free(dom);
return;
}
void
virtio_monitor_loop(void)
{
char **vec;
xs_transaction_t th;
char *buf;
unsigned int len;
unsigned int dom_id;
uint32_t virtio_idx;
struct xen_guest *guest;
struct virtio_net_config_ll *net_config;
enum fieldnames {
FLD_NULL = 0,
FLD_LOCAL,
FLD_DOMAIN,
FLD_ID,
FLD_CONTROL,
FLD_DPDK,
FLD_NODE,
_NUM_FLD
};
char *str_fld[_NUM_FLD];
char *str;
char *end;
virtio_init();
while (1) {
watch_unmap_event();
usleep(50);
vec = xs_check_watch(watch.xs);
if (vec == NULL)
continue;
th = xs_transaction_start(watch.xs);
buf = xs_read(watch.xs, th, vec[XS_WATCH_PATH],&len);
xs_transaction_end(watch.xs, th, false);
if (buf) {
/* theres' some node for vhost existed */
if (rte_strsplit(vec[XS_WATCH_PATH], strnlen(vec[XS_WATCH_PATH], PATH_MAX),
str_fld, _NUM_FLD, '/') == _NUM_FLD) {
if (strstr(str_fld[FLD_NODE], VIRTIO_START)) {
errno = 0;
str = str_fld[FLD_ID];
dom_id = strtoul(str, &end, 0);
if (errno != 0 || end == NULL || end == str ) {
RTE_LOG(INFO, XENHOST, "invalid domain id\n");
continue;
}
errno = 0;
str = str_fld[FLD_NODE] + sizeof(VIRTIO_START) - 1;
virtio_idx = strtoul(str, &end, 0);
if (errno != 0 || end == NULL || end == str
|| virtio_idx > MAX_VIRTIO) {
RTE_LOG(INFO, XENHOST, "invalid virtio idx\n");
continue;
}
RTE_LOG(INFO, XENHOST, " #####virtio dev (%d, %d) is started\n", dom_id, virtio_idx);
guest = add_xen_guest(dom_id);
if (guest == NULL)
continue;
guest->dom_id = dom_id;
if (parse_vringnode(guest, virtio_idx)) {
RTE_LOG(ERR, XENHOST, " there is invalid information in xenstore\n");
/*guest newly created? guest existed ?*/
TAILQ_REMOVE(&guest_root, guest, next);
destroy_guest(guest);
continue;
}
/*if pool_num > 0, then mempool has already been parsed*/
if (guest->pool_num == 0 && parse_mempoolnode(guest)) {
RTE_LOG(ERR, XENHOST, " there is error information in xenstore\n");
TAILQ_REMOVE(&guest_root, guest, next);
destroy_guest(guest);
continue;
}
net_config = new_device(virtio_idx, guest);
RTE_LOG(INFO, XENHOST, " Add to dataplane core\n");
notify_ops->new_device(&net_config->dev);
}
}
}
free(vec);
}
return;
}
/*
* Register ops so that we can add/remove device to data core.
*/
int
init_virtio_xen(struct virtio_net_device_ops const *const ops)
{
notify_ops = ops;
if (xenhost_init())
return -1;
return 0;
}

View File

@ -0,0 +1,115 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _VIRTIO_NET_H_
#define _VIRTIO_NET_H_
#include <stdint.h>
#define VQ_DESC_NUM 256
/* Used to indicate that the device is running on a data core */
#define VIRTIO_DEV_RUNNING 1
/*
* Structure contains variables relevant to TX/RX virtqueues.
*/
struct vhost_virtqueue
{
struct vring_desc *desc; /* Virtqueue descriptor ring. */
struct vring_avail *avail; /* Virtqueue available ring. */
struct vring_used *used; /* Virtqueue used ring. */
uint32_t size; /* Size of descriptor ring. */
uint32_t vhost_hlen; /* Vhost header length (varies depending on RX merge buffers. */
volatile uint16_t last_used_idx; /* Last index used on the available ring */
volatile uint16_t last_used_idx_res; /* Used for multiple devices reserving buffers. */
} __rte_cache_aligned;
/*
* Device structure contains all configuration information relating to the device.
*/
struct virtio_net
{
struct vhost_virtqueue *virtqueue_tx; /* Contains all TX virtqueue information. */
struct vhost_virtqueue *virtqueue_rx; /* Contains all RX virtqueue information. */
struct virtio_memory *mem; /* QEMU memory and memory region information. */
struct ether_addr mac_address; /* Device MAC address (Obtained on first TX packet). */
uint32_t flags; /* Device flags. Only used to check if device is running on data core. */
uint32_t vlan_tag; /* Vlan tag for device. Currently set to device_id (0-63). */
uint32_t vmdq_rx_q;
uint64_t device_fh; /* device identifier. */
uint16_t coreid;
volatile uint8_t ready; /* A device is set as ready if the MAC address has been set. */
volatile uint8_t remove; /* Device is marked for removal from the data core. */
uint32_t virtio_idx; /* Index of virtio device */
uint32_t dom_id; /* Domain id of xen guest */
} ___rte_cache_aligned;
/*
* Device linked list structure for configuration.
*/
struct virtio_net_config_ll
{
struct virtio_net dev; /* Virtio device. */
struct virtio_net_config_ll *next; /* Next entry on linked list. */
};
/*
* Information relating to memory regions including offsets to addresses in QEMUs memory file.
*/
struct virtio_memory_regions {
uint64_t guest_phys_address; /* Base guest physical address of region. */
uint64_t guest_phys_address_end; /* End guest physical address of region. */
uint64_t memory_size; /* Size of region. */
uint64_t userspace_address; /* Base userspace address of region. */
uint64_t address_offset; /* Offset of region for address translation. */
};
/*
* Memory structure includes region and mapping information.
*/
struct virtio_memory {
uint32_t nregions; /* Number of memory regions. */
struct virtio_memory_regions regions[0]; /* Memory region information. */
};
/*
* Device operations to add/remove device.
*/
struct virtio_net_device_ops {
int (* new_device)(struct virtio_net *); /* Add device. */
void (* destroy_device) (volatile struct virtio_net *); /* Remove device. */
};
struct vhost_net_device_ops const * get_virtio_net_callbacks(void);
#endif

View File

@ -0,0 +1,149 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _XEN_VHOST_H_
#define _XEN_VHOST_H_
#include <stdint.h>
#include <rte_tailq.h>
#include <rte_ether.h>
#include "virtio-net.h"
#define RTE_LOGTYPE_XENHOST RTE_LOGTYPE_USER1
#define XEN_VM_ROOTNODE_FMT "/local/domain/%d/control/dpdk"
#define XEN_VM_NODE_FMT "/local/domain/%d/control/dpdk/%s"
#define XEN_MEMPOOL_SUFFIX "mempool_gref"
#define XEN_RXVRING_SUFFIX "rx_vring_gref"
#define XEN_TXVRING_SUFFIX "tx_vring_gref"
#define XEN_GVA_SUFFIX "mempool_va"
#define XEN_VRINGFLAG_SUFFIX "vring_flag"
#define XEN_ADDR_SUFFIX "ether_addr"
#define VIRTIO_START "event_type_start_"
#define XEN_GREF_SPLITTOKEN ','
#define MAX_XENVIRT_MEMPOOL 16
#define MAX_VIRTIO 32
#define MAX_GREF_PER_NODE 64 /* 128 MB memory */
#define PAGE_SIZE 4096
#define PAGE_PFNNUM (PAGE_SIZE / sizeof(uint32_t))
#define XEN_GNTDEV_FNAME "/dev/xen/gntdev"
/* xen grant reference info in one grant node */
struct xen_gnt {
uint32_t gref; /* grant reference for this node */
union {
int gref; /* grant reference */
uint32_t pfn_num; /* guest pfn number of grant reference */
} gref_pfn[PAGE_PFNNUM];
}__attribute__((__packed__));
/* structure for mempool or vring node list */
struct xen_gntnode {
uint32_t gnt_num; /* grant reference number */
struct xen_gnt *gnt_info; /* grant reference info */
};
struct xen_vring {
uint32_t dom_id;
uint32_t virtio_idx; /* index of virtio device */
void *rxvring_addr; /* mapped virtual address of rxvring */
void *txvring_addr; /* mapped virtual address of txvring */
uint32_t rxpfn_num; /* number of gpfn for rxvring */
uint32_t txpfn_num; /* number of gpfn for txvring */
uint32_t *rxpfn_tbl; /* array of rxvring gpfn */
uint32_t *txpfn_tbl; /* array of txvring gpfn */
uint64_t *rx_pindex; /* index used to release rx grefs */
uint64_t *tx_pindex; /* index used to release tx grefs */
uint64_t flag_index;
uint8_t *flag; /* cleared to zero on guest unmap */
struct ether_addr addr; /* ethernet address of virtio device */
uint8_t removed;
};
struct xen_mempool {
uint32_t dom_id; /* guest domain id */
uint32_t pool_idx; /* index of memory pool */
void *gva; /* guest virtual address of mbuf pool */
void *hva; /* host virtual address of mbuf pool */
uint32_t mempfn_num; /* number of gpfn for mbuf pool */
uint32_t *mempfn_tbl; /* array of mbuf pool gpfn */
uint64_t *pindex; /* index used to release grefs */
};
struct xen_guest {
TAILQ_ENTRY(xen_guest) next;
int32_t dom_id; /* guest domain id */
uint32_t pool_num; /* number of mbuf pool of the guest */
uint32_t vring_num; /* number of virtio ports of the guest */
/* array contain the guest mbuf pool info */
struct xen_mempool mempool[MAX_XENVIRT_MEMPOOL];
/* array contain the guest rx/tx vring info */
struct xen_vring vring[MAX_VIRTIO];
};
TAILQ_HEAD(xen_guestlist, xen_guest);
int
parse_mempoolnode(struct xen_guest *guest);
int
xenhost_init(void);
int
parse_vringnode(struct xen_guest *guest, uint32_t virtio_idx);
int
parse_mempoolnode(struct xen_guest *guest);
void
cleanup_mempool(struct xen_mempool *mempool);
void
cleanup_vring(struct xen_vring *vring);
void
virtio_monitor_loop(void);
int
init_virtio_xen(struct virtio_net_device_ops const * const);
#endif

View File

@ -0,0 +1,786 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdint.h>
#include <unistd.h>
#include <inttypes.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <xen/sys/gntalloc.h>
#include <xen/sys/gntdev.h>
#include <xen/xen-compat.h>
#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
#include <xs.h>
#else
#include <xenstore.h>
#endif
#include <rte_common.h>
#include <rte_memory.h>
#include <rte_tailq.h>
#include <rte_eal.h>
#include <rte_malloc.h>
#include <rte_string_fns.h>
#include <rte_log.h>
#include <rte_debug.h>
#include "xen_vhost.h"
/* xenstore handle */
static struct xs_handle *xs = NULL;
/* gntdev file descriptor to map grant pages */
static int d_fd = -1;
/*
* The grant node format in xenstore for vring/mpool is like:
* idx#_rx_vring_gref = "gref1#, gref2#, gref3#"
* idx#_mempool_gref = "gref1#, gref2#, gref3#"
* each gref# is the grant reference for a shared page.
* In each shared page, we store the grant_node_item items.
*/
struct grant_node_item {
uint32_t gref;
uint32_t pfn;
} __attribute__((packed));
int cmdline_parse_etheraddr(void *tk, const char *srcbuf,
void *res);
/* Map grant ref refid at addr_ori*/
static void *
xen_grant_mmap(void *addr_ori, int domid, int refid, uint64_t *pindex)
{
struct ioctl_gntdev_map_grant_ref arg;
void *addr = NULL;
int pg_sz = getpagesize();
arg.count = 1;
arg.refs[0].domid = domid;
arg.refs[0].ref = refid;
int rv = ioctl(d_fd, IOCTL_GNTDEV_MAP_GRANT_REF, &arg);
if (rv) {
RTE_LOG(ERR, XENHOST, " %s: (%d,%d) %s (ioctl failed)\n", __func__,
domid, refid, strerror(errno));
return NULL;
}
if (addr_ori == NULL)
addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED,
d_fd, arg.index);
else
addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FIXED,
d_fd, arg.index);
if (addr == MAP_FAILED) {
RTE_LOG(ERR, XENHOST, " %s: (%d, %d) %s (map failed)\n", __func__,
domid, refid, strerror(errno));
return NULL;
}
if (pindex)
*pindex = arg.index;
return addr;
}
/* Unmap one grant ref, and munmap must be called before this */
static int
xen_unmap_grant_ref(uint64_t index)
{
struct ioctl_gntdev_unmap_grant_ref arg;
int rv;
arg.count = 1;
arg.index = index;
rv = ioctl(d_fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &arg);
if (rv) {
RTE_LOG(ERR, XENHOST, " %s: index 0x%" PRIx64 "unmap failed\n", __func__, index);
return -1;
}
return 0;
}
/*
* Reserve a virtual address space.
* On success, returns the pointer. On failure, returns NULL.
*/
static void *
get_xen_virtual(size_t size, size_t page_sz)
{
void *addr;
uintptr_t aligned_addr;
addr = mmap(NULL, size + page_sz, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (addr == MAP_FAILED) {
RTE_LOG(ERR, XENHOST, "failed get a virtual area\n");
return NULL;
}
aligned_addr = RTE_ALIGN_CEIL((uintptr_t)addr, page_sz);
munmap(addr, aligned_addr - (uintptr_t)addr);
munmap((void *)(aligned_addr + size), page_sz + (uintptr_t)addr - aligned_addr);
addr = (void *)(aligned_addr);
return addr;
}
static void
free_xen_virtual(void *addr, size_t size, size_t page_sz __rte_unused)
{
if (addr)
munmap(addr, size);
}
/*
* Returns val str in xenstore.
* @param path
* Full path string for key
* @return
* Pointer to Val str, NULL on failure
*/
static char *
xen_read_node(char *path, uint32_t *len)
{
char *buf;
buf = xs_read(xs, XBT_NULL, path, len);
return buf;
}
static int
cal_pagenum(struct xen_gnt *gnt)
{
unsigned int i;
/*
* the items in the page are in the format of
* gref#,pfn#,...,gref#,pfn#
* FIXME, 0 is reserved by system, use it as terminator.
*/
for (i = 0; i < (PAGE_PFNNUM) / 2; i++) {
if (gnt->gref_pfn[i * 2].gref <= 0)
break;
}
return i;
}
/* Frees memory allocated to a grant node */
static void
xen_free_gntnode(struct xen_gntnode *gntnode)
{
if (gntnode == NULL)
return;
if (gntnode->gnt_info)
free(gntnode->gnt_info);
free(gntnode);
}
/*
* Parse a grant node.
* @param domid
* Guest domain id.
* @param path
* Full path string for a grant node, like for the following (key, val) pair
* idx#_mempool_gref = "gref#, gref#, gref#"
* path = 'local/domain/domid/control/dpdk/idx#_mempool_gref'
* gref# is a shared page contain packed (gref,pfn) entries
* @return
* Returns the pointer to xen_gntnode
*/
static struct xen_gntnode *
parse_gntnode(int dom_id, char *path)
{
char **gref_list = NULL;
uint32_t i, len, gref_num;
void *addr = NULL;
char *buf = NULL;
struct xen_gntnode *gntnode = NULL;
struct xen_gnt *gnt = NULL;
int pg_sz = getpagesize();
char *end;
uint64_t index;
if ((buf = xen_read_node(path, &len)) == NULL)
goto err;
gref_list = malloc(MAX_GREF_PER_NODE * sizeof(char *));
if (gref_list == NULL)
goto err;
gref_num = rte_strsplit(buf, len, gref_list, MAX_GREF_PER_NODE,
XEN_GREF_SPLITTOKEN);
if (gref_num == 0) {
RTE_LOG(ERR, XENHOST, " %s: invalid grant node format\n", __func__);
goto err;
}
gntnode = (struct xen_gntnode *)calloc(1, sizeof(struct xen_gntnode));
gnt = (struct xen_gnt *)calloc(gref_num, sizeof(struct xen_gnt));
if (gnt == NULL || gntnode == NULL)
goto err;
for (i = 0; i < gref_num; i++) {
errno = 0;
gnt[i].gref = strtol(gref_list[i], &end, 0);
if (errno != 0 || end == NULL || end == gref_list[i] ||
(*end != '\0' && *end != XEN_GREF_SPLITTOKEN)) {
RTE_LOG(ERR, XENHOST, " %s: parse grant node item failed\n", __func__);
goto err;
}
addr = xen_grant_mmap(NULL, dom_id, gnt[i].gref, &index);
if (addr == NULL) {
RTE_LOG(ERR, XENHOST, " %s: map gref %u failed\n", __func__, gnt[i].gref);
goto err;
}
RTE_LOG(INFO, XENHOST, " %s: map gref %u to %p\n", __func__, gnt[i].gref, addr);
memcpy(gnt[i].gref_pfn, addr, pg_sz);
if (munmap(addr, pg_sz)) {
RTE_LOG(INFO, XENHOST, " %s: unmap gref %u failed\n", __func__, gnt[i].gref);
goto err;
}
if (xen_unmap_grant_ref(index)) {
RTE_LOG(INFO, XENHOST, " %s: release gref %u failed\n", __func__, gnt[i].gref);
goto err;
}
}
gntnode->gnt_num = gref_num;
gntnode->gnt_info = gnt;
free(buf);
free(gref_list);
return gntnode;
err:
if (gnt)
free(gnt);
if (gntnode)
free(gntnode);
if (gref_list)
free(gref_list);
if (buf)
free(buf);
return NULL;
}
/*
* This function maps grant node of vring or mbuf pool to a continous virtual address space,
* and returns mapped address, pfn array, index array
* @param gntnode
* Pointer to grant node
* @param domid
* Guest domain id
* @param ppfn
* Pointer to pfn array, caller should free this array
* @param pgs
* Pointer to number of pages
* @param ppindex
* Pointer to index array, used to release grefs when to free this node
* @return
* Pointer to mapped virtual address, NULL on failure
*/
static void *
map_gntnode(struct xen_gntnode *gntnode, int domid, uint32_t **ppfn, uint32_t *pgs, uint64_t **ppindex)
{
struct xen_gnt *gnt;
uint32_t i, j;
size_t total_pages = 0;
void *addr;
uint32_t *pfn;
uint64_t *pindex;
uint32_t pfn_num = 0;
int pg_sz;
if (gntnode == NULL)
return NULL;
pg_sz = getpagesize();
for (i = 0; i < gntnode->gnt_num; i++) {
gnt = gntnode->gnt_info + i;
total_pages += cal_pagenum(gnt);
}
if ((addr = get_xen_virtual(total_pages * pg_sz, pg_sz)) == NULL) {
RTE_LOG(ERR, XENHOST, " %s: failed get_xen_virtual\n", __func__);
return NULL;
}
pfn = calloc(total_pages, (size_t)sizeof(uint32_t));
pindex = calloc(total_pages, (size_t)sizeof(uint64_t));
if (pfn == NULL || pindex == NULL) {
free_xen_virtual(addr, total_pages * pg_sz, pg_sz);
free(pfn);
free(pindex);
return NULL;
}
RTE_LOG(INFO, XENHOST, " %s: total pages:%zu, map to [%p, %p]\n", __func__, total_pages, addr, RTE_PTR_ADD(addr, total_pages * pg_sz - 1));
for (i = 0; i < gntnode->gnt_num; i++) {
gnt = gntnode->gnt_info + i;
for (j = 0; j < (PAGE_PFNNUM) / 2; j++) {
if ((gnt->gref_pfn[j * 2].gref) <= 0)
goto _end;
/*alternative: batch map, or through libxc*/
if (xen_grant_mmap(RTE_PTR_ADD(addr, pfn_num * pg_sz),
domid,
gnt->gref_pfn[j * 2].gref,
&pindex[pfn_num]) == NULL) {
goto mmap_failed;
}
pfn[pfn_num] = gnt->gref_pfn[j * 2 + 1].pfn_num;
pfn_num++;
}
}
mmap_failed:
if (pfn_num)
munmap(addr, pfn_num * pg_sz);
for (i = 0; i < pfn_num; i++) {
xen_unmap_grant_ref(pindex[i]);
}
free(pindex);
free(pfn);
return NULL;
_end:
if (ppindex)
*ppindex = pindex;
else
free(pindex);
if (ppfn)
*ppfn = pfn;
else
free(pfn);
if (pgs)
*pgs = total_pages;
return addr;
}
static int
parse_mpool_va(struct xen_mempool *mempool)
{
char path[PATH_MAX] = {0};
char *buf;
uint32_t len;
char *end;
int ret = -1;
errno = 0;
rte_snprintf(path, sizeof(path),
XEN_VM_ROOTNODE_FMT"/%d_"XEN_GVA_SUFFIX,
mempool->dom_id, mempool->pool_idx);
if((buf = xen_read_node(path, &len)) == NULL)
goto out;
mempool->gva = (void *)strtoul(buf, &end, 16);
if (errno != 0 || end == NULL || end == buf || *end != '\0') {
mempool->gva = NULL;
goto out;
}
ret = 0;
out:
if (buf)
free(buf);
return ret;
}
/*
* map mbuf pool
*/
static int
map_mempoolnode(struct xen_gntnode *gntnode,
struct xen_mempool *mempool)
{
if (gntnode == NULL || mempool == NULL)
return -1;
mempool->hva =
map_gntnode(gntnode, mempool->dom_id, &mempool->mempfn_tbl, &mempool->mempfn_num, &mempool->pindex);
RTE_LOG(INFO, XENHOST, " %s: map mempool at %p\n", __func__, (void *)mempool->hva);
if (mempool->hva)
return 0;
else {
return -1;
}
}
void
cleanup_mempool(struct xen_mempool *mempool)
{
int pg_sz = getpagesize();
uint32_t i;
if (mempool->hva)
munmap(mempool->hva, mempool->mempfn_num * pg_sz);
mempool->hva = NULL;
if (mempool->pindex) {
RTE_LOG(INFO, XENHOST, " %s: unmap dom %02u mempool%02u %u grefs\n",
__func__,
mempool->dom_id,
mempool->pool_idx,
mempool->mempfn_num);
for (i = 0; i < mempool->mempfn_num; i ++) {
xen_unmap_grant_ref(mempool->pindex[i]);
}
}
mempool->pindex = NULL;
if (mempool->mempfn_tbl)
free(mempool->mempfn_tbl);
mempool->mempfn_tbl = NULL;
}
/*
* process mempool node idx#_mempool_gref, idx = 0, 1, 2...
* untill we encounter a node that doesn't exist.
*/
int
parse_mempoolnode(struct xen_guest *guest)
{
uint32_t i, len;
char path[PATH_MAX] = {0};
struct xen_gntnode *gntnode = NULL;
struct xen_mempool *mempool = NULL;
char *buf;
bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0]));
guest->pool_num = 0;
while (1) {
/* check if null terminated */
rte_snprintf(path, sizeof(path),
XEN_VM_ROOTNODE_FMT"/%d_"XEN_MEMPOOL_SUFFIX,
guest->dom_id,
guest->pool_num);
if ((buf = xen_read_node(path, &len)) != NULL) {
/* this node exists */
free(buf);
} else {
if (guest->pool_num == 0) {
RTE_LOG(ERR, PMD, "no mempool found\n");
return -1;
}
break;
}
mempool = &guest->mempool[guest->pool_num];
mempool->dom_id = guest->dom_id;
mempool->pool_idx = guest->pool_num;
RTE_LOG(INFO, XENHOST, " %s: mempool %u parse gntnode %s\n", __func__, guest->pool_num, path);
gntnode = parse_gntnode(guest->dom_id, path);
if (gntnode == NULL)
goto err;
if (parse_mpool_va(mempool))
goto err;
RTE_LOG(INFO, XENHOST, " %s: mempool %u map gntnode %s\n", __func__, guest->pool_num, path);
if (map_mempoolnode(gntnode, mempool))
goto err;
xen_free_gntnode(gntnode);
guest->pool_num++;
}
return 0;
err:
if (gntnode)
xen_free_gntnode(gntnode);
for (i = 0; i < MAX_XENVIRT_MEMPOOL ; i++) {
cleanup_mempool(&guest->mempool[i]);
}
/* reinitialise mempool */
bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0]));
return -1;
}
static int
xen_map_vringflag(struct xen_vring *vring)
{
char path[PATH_MAX] = {0};
char *buf;
uint32_t len,gref;
int pg_sz = getpagesize();
char *end;
rte_snprintf(path, sizeof(path),
XEN_VM_ROOTNODE_FMT"/%d_"XEN_VRINGFLAG_SUFFIX,
vring->dom_id, vring->virtio_idx);
if((buf = xen_read_node(path, &len)) == NULL)
goto err;
errno = 0;
gref = strtol(buf, &end, 0);
if (errno != 0 || end == NULL || end == buf) {
goto err;
}
vring->flag = xen_grant_mmap(0, vring->dom_id, gref, &vring->flag_index);
if (vring->flag == NULL || *vring->flag == 0)
goto err;
free(buf);
return 0;
err:
if (buf)
free(buf);
if (vring->flag) {
munmap(vring->flag, pg_sz);
vring->flag = NULL;
xen_unmap_grant_ref(vring->flag_index);
}
return -1;
}
static int
xen_map_rxvringnode(struct xen_gntnode *gntnode,
struct xen_vring *vring)
{
vring->rxvring_addr =
map_gntnode(gntnode, vring->dom_id, &vring->rxpfn_tbl, &vring->rxpfn_num, &vring->rx_pindex);
RTE_LOG(INFO, XENHOST, " %s: map rx vring at %p\n", __func__, (void *)vring->rxvring_addr);
if (vring->rxvring_addr)
return 0;
else
return -1;
}
static int
xen_map_txvringnode(struct xen_gntnode *gntnode,
struct xen_vring *vring)
{
vring->txvring_addr =
map_gntnode(gntnode, vring->dom_id, &vring->txpfn_tbl, &vring->txpfn_num, &vring->tx_pindex);
RTE_LOG(INFO, XENHOST, " %s: map tx vring at %p\n", __func__, (void *)vring->txvring_addr);
if (vring->txvring_addr)
return 0;
else
return -1;
}
void
cleanup_vring(struct xen_vring *vring)
{
int pg_sz = getpagesize();
uint32_t i;
RTE_LOG(INFO, XENHOST, " %s: cleanup dom %u vring %u\n", __func__, vring->dom_id, vring->virtio_idx);
if (vring->rxvring_addr) {
munmap(vring->rxvring_addr, vring->rxpfn_num * pg_sz);
RTE_LOG(INFO, XENHOST, " %s: unmap rx vring [%p, %p]\n",
__func__,
vring->rxvring_addr,
RTE_PTR_ADD(vring->rxvring_addr,
vring->rxpfn_num * pg_sz - 1));
}
vring->rxvring_addr = NULL;
if (vring->rx_pindex) {
RTE_LOG(INFO, XENHOST, " %s: unmap rx vring %u grefs\n", __func__, vring->rxpfn_num);
for (i = 0; i < vring->rxpfn_num; i++) {
xen_unmap_grant_ref(vring->rx_pindex[i]);
}
}
vring->rx_pindex = NULL;
if (vring->rxpfn_tbl)
free(vring->rxpfn_tbl);
vring->rxpfn_tbl = NULL;
if (vring->txvring_addr) {
munmap(vring->txvring_addr, vring->txpfn_num * pg_sz);
RTE_LOG(INFO, XENHOST, " %s: unmap tx vring [%p, %p]\n",
__func__,
vring->txvring_addr,
RTE_PTR_ADD(vring->txvring_addr,
vring->txpfn_num * pg_sz - 1));
}
vring->txvring_addr = NULL;
if (vring->tx_pindex) {
RTE_LOG(INFO, XENHOST, " %s: unmap tx vring %u grefs\n", __func__, vring->txpfn_num);
for (i = 0; i < vring->txpfn_num; i++) {
xen_unmap_grant_ref(vring->tx_pindex[i]);
}
}
vring->tx_pindex = NULL;
if (vring->txpfn_tbl)
free(vring->txpfn_tbl);
vring->txpfn_tbl = NULL;
if (vring->flag) {
if (!munmap((void *)vring->flag, pg_sz))
RTE_LOG(INFO, XENHOST, " %s: unmap flag page at %p\n", __func__, vring->flag);
if (!xen_unmap_grant_ref(vring->flag_index))
RTE_LOG(INFO, XENHOST, " %s: release flag ref index 0x%" PRIx64 "\n", __func__, vring->flag_index);
}
vring->flag = NULL;
return;
}
static int
xen_parse_etheraddr(struct xen_vring *vring)
{
char path[PATH_MAX] = {0};
char *buf;
uint32_t len;
int ret = -1;
rte_snprintf(path, sizeof(path),
XEN_VM_ROOTNODE_FMT"/%d_"XEN_ADDR_SUFFIX,
vring->dom_id, vring->virtio_idx);
if ((buf = xen_read_node(path, &len)) == NULL)
goto out;
if (cmdline_parse_etheraddr(NULL, buf, &vring->addr) < 0)
goto out;
ret = 0;
out:
if (buf)
free(buf);
return ret;
}
int
parse_vringnode(struct xen_guest *guest, uint32_t virtio_idx)
{
char path[PATH_MAX] = {0};
struct xen_gntnode *rx_gntnode = NULL;
struct xen_gntnode *tx_gntnode = NULL;
struct xen_vring *vring = NULL;
/*check if null terminated */
rte_snprintf(path, sizeof(path),
XEN_VM_ROOTNODE_FMT"/%d_"XEN_RXVRING_SUFFIX,
guest->dom_id,
virtio_idx);
RTE_LOG(INFO, XENHOST, " %s: virtio %u parse rx gntnode %s\n", __func__, virtio_idx, path);
rx_gntnode = parse_gntnode(guest->dom_id, path);
if (rx_gntnode == NULL)
goto err;
/*check if null terminated */
rte_snprintf(path, sizeof(path),
XEN_VM_ROOTNODE_FMT"/%d_"XEN_TXVRING_SUFFIX,
guest->dom_id,
virtio_idx);
RTE_LOG(INFO, XENHOST, " %s: virtio %u parse tx gntnode %s\n", __func__, virtio_idx, path);
tx_gntnode = parse_gntnode(guest->dom_id, path);
if (tx_gntnode == NULL)
goto err;
vring = &guest->vring[virtio_idx];
bzero(vring, sizeof(*vring));
vring->dom_id = guest->dom_id;
vring->virtio_idx = virtio_idx;
if (xen_parse_etheraddr(vring) != 0)
goto err;
RTE_LOG(INFO, XENHOST, " %s: virtio %u map rx gntnode %s\n", __func__, virtio_idx, path);
if (xen_map_rxvringnode(rx_gntnode, vring) != 0)
goto err;
RTE_LOG(INFO, XENHOST, " %s: virtio %u map tx gntnode %s\n", __func__, virtio_idx, path);
if (xen_map_txvringnode(tx_gntnode, vring) != 0)
goto err;
if (xen_map_vringflag(vring) != 0)
goto err;
guest->vring_num++;
xen_free_gntnode(rx_gntnode);
xen_free_gntnode(tx_gntnode);
return 0;
err:
if (rx_gntnode)
xen_free_gntnode(rx_gntnode);
if (tx_gntnode)
xen_free_gntnode(tx_gntnode);
if (vring) {
cleanup_vring(vring);
bzero(vring, sizeof(*vring));
}
return -1;
}
/*
* Open xen grant dev driver
* @return
* 0 on success, -1 on failure.
*/
static int
xen_grant_init(void)
{
d_fd = open(XEN_GNTDEV_FNAME, O_RDWR);
return d_fd == -1? (-1): (0);
}
/*
* Initialise xenstore handle and open grant dev driver.
* @return
* 0 on success, -1 on failure.
*/
int
xenhost_init(void)
{
xs = xs_daemon_open();
if (xs == NULL) {
rte_panic("failed initialize xen daemon handler");
return -1;
}
if (xen_grant_init())
return -1;
return 0;
}

View File

@ -45,6 +45,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_ixgbe
DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring
DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap
DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio
DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt
DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net

View File

@ -39,6 +39,9 @@
#ifdef RTE_LIBRTE_PMD_PCAP #ifdef RTE_LIBRTE_PMD_PCAP
#include <rte_eth_pcap.h> #include <rte_eth_pcap.h>
#endif #endif
#ifdef RTE_LIBRTE_PMD_XENVIRT
#include <rte_eth_xenvirt.h>
#endif
#include "eal_private.h" #include "eal_private.h"
struct device_init { struct device_init {
@ -59,6 +62,12 @@ struct device_init dev_types[] = {
.dev_prefix = RTE_ETH_PCAP_PARAM_NAME, .dev_prefix = RTE_ETH_PCAP_PARAM_NAME,
.init_fn = rte_pmd_pcap_init .init_fn = rte_pmd_pcap_init
}, },
#endif
#ifdef RTE_LIBRTE_PMD_XENVIRT
{
.dev_prefix = RTE_ETH_XENVIRT_PARAM_NAME,
.init_fn = rte_pmd_xenvirt_init
},
#endif #endif
{ {
.dev_prefix = "-nodev-", .dev_prefix = "-nodev-",

View File

@ -51,6 +51,9 @@
#ifdef RTE_LIBRTE_PMD_PCAP #ifdef RTE_LIBRTE_PMD_PCAP
#include <rte_eth_pcap.h> #include <rte_eth_pcap.h>
#endif #endif
#ifdef RTE_LIBRTE_PMD_XENVIRT
#include <rte_eth_xenvirt.h>
#endif
#include "eal_private.h" #include "eal_private.h"
static char dev_list_str[4096]; static char dev_list_str[4096];
@ -102,6 +105,9 @@ is_valid_wl_entry(const char *device_str, size_t dev_buf_len)
#endif #endif
#ifdef RTE_LIBRTE_PMD_PCAP #ifdef RTE_LIBRTE_PMD_PCAP
RTE_ETH_PCAP_PARAM_NAME, RTE_ETH_PCAP_PARAM_NAME,
#endif
#ifdef RTE_LIBRTE_PMD_XENVIRT
RTE_ETH_XENVIRT_PARAM_NAME,
#endif #endif
"-nodev-" /* dummy value to prevent compiler warnings */ "-nodev-" /* dummy value to prevent compiler warnings */
}; };

View File

@ -44,6 +44,7 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_ether
CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem
CFLAGS += -I$(RTE_SDK)/lib/librte_pmd_ring CFLAGS += -I$(RTE_SDK)/lib/librte_pmd_ring
CFLAGS += -I$(RTE_SDK)/lib/librte_pmd_pcap CFLAGS += -I$(RTE_SDK)/lib/librte_pmd_pcap
CFLAGS += -I$(RTE_SDK)/lib/librte_pmd_xenvirt
CFLAGS += $(WERROR_FLAGS) -O3 CFLAGS += $(WERROR_FLAGS) -O3
# specific to linuxapp exec-env # specific to linuxapp exec-env

View File

@ -0,0 +1,58 @@
# BSD LICENSE
#
# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include $(RTE_SDK)/mk/rte.vars.mk
#
# library name
#
LIB = librte_pmd_xenvirt.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
#
# all source are stored in SRCS-y
#
SRCS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += rte_eth_xenvirt.c rte_mempool_gntalloc.c rte_xen_lib.c
#
# Export include files
#
SYMLINK-y-include += rte_eth_xenvirt.h
# this lib depends upon:
DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += lib/librte_eal lib/librte_ether
DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += lib/librte_mempool lib/librte_mbuf
DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += lib/librte_net lib/librte_malloc
DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += lib/librte_cmdline
include $(RTE_SDK)/mk/rte.lib.mk

View File

@ -0,0 +1,706 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdint.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <errno.h>
#include <sys/user.h>
#include <linux/binfmts.h>
#include <xen/xen-compat.h>
#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
#include <xs.h>
#else
#include <xenstore.h>
#endif
#include <linux/virtio_ring.h>
#include <rte_mbuf.h>
#include <rte_ethdev.h>
#include <rte_malloc.h>
#include <rte_memcpy.h>
#include <rte_string_fns.h>
#include <cmdline_parse.h>
#include <cmdline_parse_etheraddr.h>
#include "rte_xen_lib.h"
#include "virtqueue.h"
#include "rte_eth_xenvirt.h"
#define VQ_DESC_NUM 256
#define VIRTIO_MBUF_BURST_SZ 64
/* virtio_idx is increased after new device is created.*/
static int virtio_idx = 0;
static const char *drivername = "xen dummy virtio PMD";
static struct rte_eth_link pmd_link = {
.link_speed = 10000,
.link_duplex = ETH_LINK_FULL_DUPLEX,
.link_status = 0
};
static inline struct rte_mbuf *
rte_rxmbuf_alloc(struct rte_mempool *mp)
{
struct rte_mbuf *m;
m = __rte_mbuf_raw_alloc(mp);
__rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
return m;
}
static uint16_t
eth_xenvirt_rx(void *q, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
{
struct virtqueue *rxvq = q;
struct rte_mbuf *rxm, *new_mbuf;
uint16_t nb_used, num;
uint32_t len[VIRTIO_MBUF_BURST_SZ];
uint32_t i;
struct pmd_internals *pi = rxvq->internals;
nb_used = VIRTQUEUE_NUSED(rxvq);
rte_compiler_barrier(); /* rmb */
num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
if (unlikely(num == 0)) return 0;
num = virtqueue_dequeue_burst(rxvq, rx_pkts, len, num);
PMD_RX_LOG(DEBUG, "used:%d dequeue:%d\n", nb_used, num);
for (i = 0; i < num ; i ++) {
rxm = rx_pkts[i];
PMD_RX_LOG(DEBUG, "packet len:%d\n", len[i]);
rxm->pkt.next = NULL;
rxm->pkt.data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
rxm->pkt.data_len = (uint16_t)(len[i] - sizeof(struct virtio_net_hdr));
rxm->pkt.nb_segs = 1;
rxm->pkt.in_port = pi->port_id;
rxm->pkt.pkt_len = (uint32_t)(len[i] - sizeof(struct virtio_net_hdr));
}
/* allocate new mbuf for the used descriptor */
while (likely(!virtqueue_full(rxvq))) {
new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
if (unlikely(new_mbuf == NULL)) {
break;
}
if (unlikely(virtqueue_enqueue_recv_refill(rxvq, new_mbuf))) {
rte_pktmbuf_free_seg(new_mbuf);
break;
}
}
pi->eth_stats.ipackets += num;
return num;
}
static uint16_t
eth_xenvirt_tx(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
struct virtqueue *txvq = tx_queue;
struct rte_mbuf *txm;
uint16_t nb_used, nb_tx, num, i;
int error;
uint32_t len[VIRTIO_MBUF_BURST_SZ];
struct rte_mbuf *snd_pkts[VIRTIO_MBUF_BURST_SZ];
struct pmd_internals *pi = txvq->internals;
nb_tx = 0;
if (unlikely(nb_pkts == 0))
return 0;
PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
nb_used = VIRTQUEUE_NUSED(txvq);
rte_compiler_barrier(); /* rmb */
num = (uint16_t)(likely(nb_used <= VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ);
num = virtqueue_dequeue_burst(txvq, snd_pkts, len, num);
for (i = 0; i < num ; i ++) {
/* mergable not supported, one segment only */
rte_pktmbuf_free_seg(snd_pkts[i]);
}
while (nb_tx < nb_pkts) {
if (likely(!virtqueue_full(txvq))) {
/* TODO drop tx_pkts if it contains multiple segments */
txm = tx_pkts[nb_tx];
error = virtqueue_enqueue_xmit(txvq, txm);
if (unlikely(error)) {
if (error == ENOSPC)
PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0\n");
else if (error == EMSGSIZE)
PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1\n");
else
PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d\n", error);
break;
}
nb_tx++;
} else {
PMD_TX_LOG(ERR, "No free tx descriptors to transmit\n");
/* virtqueue_notify not needed in our para-virt solution */
break;
}
}
pi->eth_stats.opackets += nb_tx;
return nb_tx;
}
static int
eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
{
RTE_LOG(ERR, PMD, "%s\n", __func__);
return 0;
}
/*
* Create a shared page between guest and host.
* Host monitors this page if it is cleared on unmap, and then
* do necessary clean up.
*/
static void
gntalloc_vring_flag(int vtidx)
{
char key_str[PATH_MAX];
char val_str[PATH_MAX];
uint32_t gref_tmp;
void *ptr;
if (grefwatch_from_alloc(&gref_tmp, &ptr)) {
RTE_LOG(ERR, PMD, "grefwatch_from_alloc error\n");
exit(0);
}
*(uint8_t *)ptr = MAP_FLAG;
rte_snprintf(val_str, sizeof(val_str), "%u", gref_tmp);
rte_snprintf(key_str, sizeof(key_str),
DPDK_XENSTORE_PATH"%d"VRING_FLAG_STR, vtidx);
xenstore_write(key_str, val_str);
}
/*
* Notify host this virtio device is started.
* Host could start polling this device.
*/
static void
dev_start_notify(int vtidx)
{
char key_str[PATH_MAX];
char val_str[PATH_MAX];
RTE_LOG(INFO, PMD, "%s: virtio %d is started\n", __func__, vtidx);
gntalloc_vring_flag(vtidx);
rte_snprintf(key_str, sizeof(key_str), "%s%s%d",
DPDK_XENSTORE_PATH, EVENT_TYPE_START_STR,
vtidx);
rte_snprintf(val_str, sizeof(val_str), "1");
xenstore_write(key_str, val_str);
}
/*
* Notify host this virtio device is stopped.
* Host could stop polling this device.
*/
static void
dev_stop_notify(int vtidx)
{
RTE_SET_USED(vtidx);
}
static int
update_mac_address(struct ether_addr *mac_addrs, int vtidx)
{
char key_str[PATH_MAX];
char val_str[PATH_MAX];
int rv;
if (mac_addrs == NULL) {
RTE_LOG(ERR, PMD, "%s: NULL pointer mac specified\n", __func__);
return -1;
}
rv = rte_snprintf(key_str, sizeof(key_str),
DPDK_XENSTORE_PATH"%d_ether_addr", vtidx);
if (rv == -1)
return rv;
rv = rte_snprintf(val_str, sizeof(val_str), "%02x:%02x:%02x:%02x:%02x:%02x",
mac_addrs->addr_bytes[0],
mac_addrs->addr_bytes[1],
mac_addrs->addr_bytes[2],
mac_addrs->addr_bytes[3],
mac_addrs->addr_bytes[4],
mac_addrs->addr_bytes[5]);
if (rv == -1)
return rv;
if (xenstore_write(key_str, val_str))
return rv;
return 0;
}
static int
eth_dev_start(struct rte_eth_dev *dev)
{
struct virtqueue *rxvq = dev->data->rx_queues[0];
struct virtqueue *txvq = dev->data->tx_queues[0];
struct rte_mbuf *m;
struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private;
int rv;
dev->data->dev_link.link_status = 1;
while (!virtqueue_full(rxvq)) {
m = rte_rxmbuf_alloc(rxvq->mpool);
if (m == NULL)
break;
/* Enqueue allocated buffers. */
if (virtqueue_enqueue_recv_refill(rxvq, m)) {
rte_pktmbuf_free_seg(m);
break;
}
}
rxvq->internals = pi;
txvq->internals = pi;
rv = update_mac_address(dev->data->mac_addrs, pi->virtio_idx);
if (rv)
return -1;
dev_start_notify(pi->virtio_idx);
return 0;
}
static void
eth_dev_stop(struct rte_eth_dev *dev)
{
struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private;
dev->data->dev_link.link_status = 0;
dev_stop_notify(pi->virtio_idx);
}
/*
* Notify host this virtio device is closed.
* Host could do necessary clean up to this device.
*/
static void
eth_dev_close(struct rte_eth_dev *dev)
{
RTE_SET_USED(dev);
}
static void
eth_dev_info(struct rte_eth_dev *dev,
struct rte_eth_dev_info *dev_info)
{
struct pmd_internals *internals = dev->data->dev_private;
RTE_SET_USED(internals);
dev_info->driver_name = drivername;
dev_info->max_mac_addrs = 1;
dev_info->max_rx_pktlen = (uint32_t)2048;
dev_info->max_rx_queues = (uint16_t)1;
dev_info->max_tx_queues = (uint16_t)1;
dev_info->min_rx_bufsize = 0;
dev_info->pci_dev = NULL;
}
static void
eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
{
struct pmd_internals *internals = dev->data->dev_private;
if(stats)
rte_memcpy(stats, &internals->eth_stats, sizeof(*stats));
}
static void
eth_stats_reset(struct rte_eth_dev *dev)
{
struct pmd_internals *internals = dev->data->dev_private;
/* Reset software totals */
memset(&internals->eth_stats, 0, sizeof(internals->eth_stats));
}
static void
eth_queue_release(void *q __rte_unused)
{
}
static int
eth_link_update(struct rte_eth_dev *dev __rte_unused,
int wait_to_complete __rte_unused)
{
return 0;
}
/*
* Create shared vring between guest and host.
* Memory is allocated through grant alloc driver, so it is not physical continous.
*/
static void *
gntalloc_vring_create(int queue_type, uint32_t size, int vtidx)
{
char key_str[PATH_MAX] = {0};
char val_str[PATH_MAX] = {0};
void *va = NULL;
int pg_size;
uint32_t pg_num;
uint32_t *gref_arr = NULL;
phys_addr_t *pa_arr = NULL;
uint64_t start_index;
int rv;
pg_size = getpagesize();
size = RTE_ALIGN_CEIL(size, pg_size);
pg_num = size / pg_size;
gref_arr = calloc(pg_num, sizeof(gref_arr[0]));
pa_arr = calloc(pg_num, sizeof(pa_arr[0]));
if (gref_arr == NULL || pa_arr == NULL) {
RTE_LOG(ERR, PMD, "%s: calloc failed\n", __func__);
goto out;
}
va = gntalloc(size, gref_arr, &start_index);
if (va == NULL) {
RTE_LOG(ERR, PMD, "%s: gntalloc failed\n", __func__);
goto out;
}
if (get_phys_map(va, pa_arr, pg_num, pg_size))
goto out;
/* write in xenstore gref and pfn for each page of vring */
if (grant_node_create(pg_num, gref_arr, pa_arr, val_str, sizeof(val_str))) {
gntfree(va, size, start_index);
va = NULL;
goto out;
}
if (queue_type == VTNET_RQ)
rv = rte_snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"RXVRING_XENSTORE_STR, vtidx);
else
rv = rte_snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"TXVRING_XENSTORE_STR, vtidx);
if (rv == -1 || xenstore_write(key_str, val_str) == -1) {
gntfree(va, size, start_index);
va = NULL;
}
out:
if (pa_arr)
free(pa_arr);
if (gref_arr)
free(gref_arr);
return va;
}
static struct virtqueue *
virtio_queue_setup(struct rte_eth_dev *dev, int queue_type)
{
struct virtqueue *vq = NULL;
uint16_t vq_size = VQ_DESC_NUM;
int i = 0;
char vq_name[VIRTQUEUE_MAX_NAME_SZ];
size_t size;
struct vring *vr;
/* Allocate memory for virtqueue. */
if (queue_type == VTNET_RQ) {
rte_snprintf(vq_name, sizeof(vq_name), "port%d_rvq",
dev->data->port_id);
vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
vq_size * sizeof(struct vq_desc_extra), CACHE_LINE_SIZE);
if (vq == NULL) {
RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__);
return NULL;
}
memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
} else if(queue_type == VTNET_TQ) {
rte_snprintf(vq_name, sizeof(vq_name), "port%d_tvq",
dev->data->port_id);
vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
vq_size * sizeof(struct vq_desc_extra), CACHE_LINE_SIZE);
if (vq == NULL) {
RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__);
return NULL;
}
memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
}
memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
vq->vq_alignment = VIRTIO_PCI_VRING_ALIGN;
vq->vq_nentries = vq_size;
vq->vq_free_cnt = vq_size;
/* Calcuate vring size according to virtio spec */
size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
/* Allocate memory for virtio vring through gntalloc driver*/
vq->vq_ring_virt_mem = gntalloc_vring_create(queue_type, vq->vq_ring_size,
((struct pmd_internals *)dev->data->dev_private)->virtio_idx);
memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
vr = &vq->vq_ring;
vring_init(vr, vq_size, vq->vq_ring_virt_mem, vq->vq_alignment);
/*
* Locally maintained last consumed index, this idex trails
* vq_ring.used->idx.
*/
vq->vq_used_cons_idx = 0;
vq->vq_desc_head_idx = 0;
vq->vq_free_cnt = vq->vq_nentries;
memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
/* Chain all the descriptors in the ring with an END */
for (i = 0; i < vq_size - 1; i++)
vr->desc[i].next = (uint16_t)(i + 1);
vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
return vq;
}
static int
eth_rx_queue_setup(struct rte_eth_dev *dev,uint16_t rx_queue_id,
uint16_t nb_rx_desc __rte_unused,
unsigned int socket_id __rte_unused,
const struct rte_eth_rxconf *rx_conf __rte_unused,
struct rte_mempool *mb_pool)
{
struct virtqueue *vq;
vq = dev->data->rx_queues[rx_queue_id] = virtio_queue_setup(dev, VTNET_RQ);
vq->mpool = mb_pool;
return 0;
}
static int
eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
uint16_t nb_tx_desc __rte_unused,
unsigned int socket_id __rte_unused,
const struct rte_eth_txconf *tx_conf __rte_unused)
{
dev->data->tx_queues[tx_queue_id] = virtio_queue_setup(dev, VTNET_TQ);
return 0;
}
static struct eth_dev_ops ops = {
.dev_start = eth_dev_start,
.dev_stop = eth_dev_stop,
.dev_close = eth_dev_close,
.dev_configure = eth_dev_configure,
.dev_infos_get = eth_dev_info,
.rx_queue_setup = eth_rx_queue_setup,
.tx_queue_setup = eth_tx_queue_setup,
.rx_queue_release = eth_queue_release,
.tx_queue_release = eth_queue_release,
.link_update = eth_link_update,
.stats_get = eth_stats_get,
.stats_reset = eth_stats_reset,
};
static int
rte_eth_xenvirt_parse_args(struct xenvirt_dict *dict,
const char *name, const char *params)
{
int i;
char *pairs[RTE_ETH_XENVIRT_MAX_ARGS];
int num_of_pairs;
char *pair[2];
char *args;
int ret = -1;
if (params == NULL)
return 0;
args = rte_zmalloc(NULL, strlen(params) + 1, CACHE_LINE_SIZE);
if (args == NULL) {
RTE_LOG(ERR, PMD, "Couldn't parse %s device \n", name);
return -1;
}
rte_memcpy(args, params, strlen(params));
num_of_pairs = rte_strsplit(args, strnlen(args, MAX_ARG_STRLEN),
pairs,
RTE_ETH_XENVIRT_MAX_ARGS ,
RTE_ETH_XENVIRT_PAIRS_DELIM);
for (i = 0; i < num_of_pairs; i++) {
pair[0] = NULL;
pair[1] = NULL;
rte_strsplit(pairs[i], strnlen(pairs[i], MAX_ARG_STRLEN),
pair, 2,
RTE_ETH_XENVIRT_KEY_VALUE_DELIM);
if (pair[0] == NULL || pair[1] == NULL || pair[0][0] == 0
|| pair[1][0] == 0) {
RTE_LOG(ERR, PMD,
"Couldn't parse %s device,"
"wrong key or value \n", name);
goto err;
}
if (!strncmp(pair[0], RTE_ETH_XENVIRT_MAC_PARAM,
sizeof(RTE_ETH_XENVIRT_MAC_PARAM))) {
if (cmdline_parse_etheraddr(NULL,
pair[1],
&dict->addr) < 0) {
RTE_LOG(ERR, PMD,
"Invalid %s device ether address\n",
name);
goto err;
}
dict->addr_valid = 1;
}
}
ret = 0;
err:
rte_free(args);
return ret;
}
enum dev_action {
DEV_CREATE,
DEV_ATTACH
};
static int
eth_dev_xenvirt_create(const char *name, const char *params,
const unsigned numa_node,
enum dev_action action)
{
struct rte_eth_dev_data *data = NULL;
struct rte_pci_device *pci_dev = NULL;
struct pmd_internals *internals = NULL;
struct rte_eth_dev *eth_dev = NULL;
struct xenvirt_dict dict;
bzero(&dict, sizeof(struct xenvirt_dict));
RTE_LOG(INFO, PMD, "Creating virtio rings backed ethdev on numa socket %u\n",
numa_node);
RTE_SET_USED(action);
if (rte_eth_xenvirt_parse_args(&dict, name, params) < 0) {
RTE_LOG(ERR, PMD, "%s: Failed to parse ethdev parameters\n", __func__);
return -1;
}
/* now do all data allocation - for eth_dev structure, dummy pci driver
* and internal (private) data
*/
data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
if (data == NULL)
goto err;
pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, numa_node);
if (pci_dev == NULL)
goto err;
internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node);
if (internals == NULL)
goto err;
/* reserve an ethdev entry */
eth_dev = rte_eth_dev_allocate();
if (eth_dev == NULL)
goto err;
pci_dev->numa_node = numa_node;
data->dev_private = internals;
data->port_id = eth_dev->data->port_id;
data->nb_rx_queues = (uint16_t)1;
data->nb_tx_queues = (uint16_t)1;
data->dev_link = pmd_link;
data->mac_addrs = rte_zmalloc("xen_virtio", ETHER_ADDR_LEN, 0);
if(dict.addr_valid)
memcpy(&data->mac_addrs->addr_bytes, &dict.addr, sizeof(struct ether_addr));
else
eth_random_addr(&data->mac_addrs->addr_bytes[0]);
eth_dev->data = data;
eth_dev->dev_ops = &ops;
eth_dev->pci_dev = pci_dev;
eth_dev->rx_pkt_burst = eth_xenvirt_rx;
eth_dev->tx_pkt_burst = eth_xenvirt_tx;
internals->virtio_idx = virtio_idx++;
internals->port_id = eth_dev->data->port_id;
return 0;
err:
if (data)
rte_free(data);
if (pci_dev)
rte_free(pci_dev);
if (internals)
rte_free(internals);
return -1;
}
/*TODO: Support multiple process model */
int
rte_pmd_xenvirt_init(const char *name, const char *params)
{
if (virtio_idx == 0) {
if (xenstore_init() != 0) {
RTE_LOG(ERR, PMD, "%s: xenstore init failed\n", __func__);
return -1;
}
if (gntalloc_open() != 0) {
RTE_LOG(ERR, PMD, "%s: grant init failed\n", __func__);
return -1;
}
}
eth_dev_xenvirt_create(name, params, rte_socket_id(), DEV_CREATE);
return 0;
}

View File

@ -0,0 +1,70 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _RTE_ETH_XENVIRT_H_
#define _RTE_ETH_XENVIRT_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <rte_mempool.h>
#include <rte_ring.h>
#define RTE_ETH_XENVIRT_PARAM_NAME "eth_xenvirt"
/**
* For use by the EAL only. Called as part of EAL init to set up any dummy NICs
* configured on command line.
*/
int rte_pmd_xenvirt_init(const char *name, const char *params);
/**
* Creates mempool for xen virtio PMD.
* This function uses memzone_reserve to allocate memory for meta data,
* and uses grant alloc driver to allocate memory for data area.
* The input parameters are exactly the same as rte_mempool_create.
*/
struct rte_mempool *
rte_mempool_gntalloc_create(const char *name, unsigned elt_num, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,298 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdint.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <string.h>
#include <xen/sys/gntalloc.h>
#include <rte_common.h>
#include <rte_mempool.h>
#include <rte_memory.h>
#include <rte_errno.h>
#include "rte_xen_lib.h"
#include "rte_eth_xenvirt.h"
struct _gntarr {
uint32_t gref;
phys_addr_t pa;
uint64_t index;
void *va;
};
struct _mempool_gntalloc_info {
struct rte_mempool *mp;
uint32_t pg_num;
uint32_t *gref_arr;
phys_addr_t *pa_arr;
void *va;
uint32_t mempool_idx;
uint64_t start_index;
};
static rte_atomic32_t global_xenvirt_mempool_idx = RTE_ATOMIC32_INIT(-1);
static int
compare(const void *p1, const void *p2)
{
return ((const struct _gntarr *)p1)->pa - ((const struct _gntarr *)p2)->pa;
}
static struct _mempool_gntalloc_info
_create_mempool(const char *name, unsigned elt_num, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags)
{
struct _mempool_gntalloc_info mgi;
struct rte_mempool *mp = NULL;
struct rte_mempool_objsz objsz;
uint32_t pg_num, rpg_num, pg_shift, pg_sz;
char *va, *orig_va, *uv; /* uv: from which, the pages could be freed */
ssize_t sz, usz; /* usz: unused size */
/*
* for each page allocated through xen_gntalloc driver,
* gref_arr:stores grant references,
* pa_arr: stores physical address,
* gnt_arr: stores all meta dat
*/
uint32_t *gref_arr = NULL;
phys_addr_t *pa_arr = NULL;
struct _gntarr *gnt_arr = NULL;
/* start index of the grant referances, used for dealloc*/
uint64_t start_index;
uint32_t i, j;
int rv = 0;
struct ioctl_gntalloc_dealloc_gref arg;
mgi.mp = NULL;
va = orig_va = uv = NULL;
pg_num = rpg_num = 0;
sz = 0;
pg_sz = getpagesize();
if (rte_is_power_of_2(pg_sz) == 0) {
goto out;
}
pg_shift = rte_bsf32(pg_sz);
rte_mempool_calc_obj_size(elt_size, flags, &objsz);
sz = rte_mempool_xmem_size(elt_num, objsz.total_size, pg_shift);
pg_num = sz >> pg_shift;
pa_arr = calloc(pg_num, sizeof(pa_arr[0]));
gref_arr = calloc(pg_num, sizeof(gref_arr[0]));
gnt_arr = calloc(pg_num, sizeof(gnt_arr[0]));
if ((gnt_arr == NULL) || (gref_arr == NULL) || (pa_arr == NULL))
goto out;
/* grant index is continuous in ascending order */
orig_va = gntalloc(sz, gref_arr, &start_index);
if (orig_va == NULL)
goto out;
get_phys_map(orig_va, pa_arr, pg_num, pg_sz);
for (i = 0; i < pg_num; i++) {
gnt_arr[i].index = start_index + i * pg_sz;
gnt_arr[i].gref = gref_arr[i];
gnt_arr[i].pa = pa_arr[i];
gnt_arr[i].va = RTE_PTR_ADD(orig_va, i * pg_sz);
}
qsort(gnt_arr, pg_num, sizeof(struct _gntarr), compare);
va = get_xen_virtual(sz, pg_sz);
if (va == NULL) {
goto out;
}
/*
* map one by one, as index isn't continuous now.
* pg_num VMAs, doesn't linux has a limitation on this?
*/
for (i = 0; i < pg_num; i++) {
/* update gref_arr and pa_arr after sort */
gref_arr[i] = gnt_arr[i].gref;
pa_arr[i] = gnt_arr[i].pa;
gnt_arr[i].va = mmap(va + i * pg_sz, pg_sz, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, gntalloc_fd, gnt_arr[i].index);
if ((gnt_arr[i].va == MAP_FAILED) || (gnt_arr[i].va != (va + i * pg_sz))) {
RTE_LOG(ERR, PMD, "failed to map %d pages\n", i);
goto mmap_failed;
}
}
/*
* Check that allocated size is big enough to hold elt_num
* objects and a calcualte how many bytes are actually required.
*/
usz = rte_mempool_xmem_usage(va, elt_num, objsz.total_size, pa_arr, pg_num, pg_shift);
if (usz < 0) {
mp = NULL;
i = pg_num;
goto mmap_failed;
} else {
/* unmap unused pages if any */
uv = RTE_PTR_ADD(va, usz);
if ((usz = va + sz - uv) > 0) {
RTE_LOG(ERR, PMD,
"%s(%s): unmap unused %zu of %zu "
"mmaped bytes @%p orig:%p\n",
__func__, name, usz, sz, uv, va);
munmap(uv, usz);
i = (sz - usz) / pg_sz;
for (; i < pg_num; i++) {
arg.count = 1;
arg.index = gnt_arr[i].index;
rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg);
if (rv) {
/* shouldn't fail here */
RTE_LOG(ERR, PMD, "va=%p pa=%p index=%p %s\n",
gnt_arr[i].va,
(void *)gnt_arr[i].pa,
(void *)arg.index, strerror(errno));
rte_panic("gntdealloc failed when freeing pages\n");
}
}
rpg_num = (sz - usz) >> pg_shift;
} else
rpg_num = pg_num;
mp = rte_mempool_xmem_create(name, elt_num, elt_size,
cache_size, private_data_size,
mp_init, mp_init_arg,
obj_init, obj_init_arg,
socket_id, flags, va, pa_arr, rpg_num, pg_shift);
RTE_VERIFY(elt_num == mp->size);
}
mgi.mp = mp;
mgi.pg_num = rpg_num;
mgi.gref_arr = gref_arr;
mgi.pa_arr = pa_arr;
if (mp)
mgi.mempool_idx = rte_atomic32_add_return(&global_xenvirt_mempool_idx, 1);
mgi.start_index = start_index;
mgi.va = va;
if (mp == NULL) {
i = pg_num;
goto mmap_failed;
}
/*
* unmap only, without deallocate grant reference.
* unused pages have already been unmaped,
* unmap twice will fail, but it is safe.
*/
mmap_failed:
for (j = 0; j < i; j++) {
if (gnt_arr[i].va)
munmap(gnt_arr[i].va, pg_sz);
}
out:
if (gnt_arr)
free(gnt_arr);
if (orig_va)
munmap(orig_va, sz);
if (mp == NULL) {
if (gref_arr)
free(gref_arr);
if (pa_arr)
free(pa_arr);
/* some gref has already been de-allocated from the list in the driver,
* so dealloc one by one, and it is safe to deallocate twice
*/
if (orig_va) {
for (i = 0; i < pg_num; i++) {
arg.index = start_index + i * pg_sz;
rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg);
}
}
}
return mgi;
}
struct rte_mempool *
rte_mempool_gntalloc_create(const char *name, unsigned elt_num, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags)
{
int rv;
uint32_t i;
struct _mempool_gntalloc_info mgi;
struct ioctl_gntalloc_dealloc_gref arg;
int pg_sz = getpagesize();
mgi = _create_mempool(name, elt_num, elt_size,
cache_size, private_data_size,
mp_init, mp_init_arg,
obj_init, obj_init_arg,
socket_id, flags);
if (mgi.mp) {
rv = grant_gntalloc_mbuf_pool(mgi.mp,
mgi.pg_num,
mgi.gref_arr,
mgi.pa_arr,
mgi.mempool_idx);
free(mgi.gref_arr);
free(mgi.pa_arr);
if (rv == 0)
return mgi.mp;
/*
* in _create_mempool, unused pages have already been unmapped, deallocagted
* unmap and dealloc the remained ones here.
*/
munmap(mgi.va, pg_sz * mgi.pg_num);
for (i = 0; i < mgi.pg_num; i++) {
arg.index = mgi.start_index + i * pg_sz;
rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg);
}
return NULL;
}
return NULL;
}

View File

@ -0,0 +1,430 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <xen/xen-compat.h>
#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
#include <xs.h>
#else
#include <xenstore.h>
#endif
#include <xen/sys/gntalloc.h>
#include <rte_common.h>
#include <rte_string_fns.h>
#include "rte_xen_lib.h"
/*
* The grant node format in xenstore for vring/mpool is:
* 0_rx_vring_gref = "gref1#, gref2#, gref3#"
* 0_mempool_gref = "gref1#, gref2#, gref3#"
* each gref# is a grant reference for a shared page.
* In each shared page, we store the grant_node_item items.
*/
struct grant_node_item {
uint32_t gref;
uint32_t pfn;
} __attribute__((packed));
/* fd for xen_gntalloc driver, used to allocate grant pages*/
int gntalloc_fd = -1;
/* xenstore path for local domain, now it is '/local/domain/domid/' */
static char *dompath = NULL;
/* handle to xenstore read/write operations */
static struct xs_handle *xs = NULL;
/*
* Reserve a virtual address space.
* On success, returns the pointer. On failure, returns NULL.
*/
void *
get_xen_virtual(size_t size, size_t page_sz)
{
void *addr;
uintptr_t aligned_addr;
addr = mmap(NULL, size + page_sz, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (addr == MAP_FAILED) {
RTE_LOG(ERR, PMD, "failed get a virtual area\n");
return NULL;
}
aligned_addr = RTE_ALIGN_CEIL((uintptr_t)addr, page_sz);
addr = (void *)(aligned_addr);
return addr;
}
/*
* Get the physical address for virtual memory starting at va.
*/
int
get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num, uint32_t pg_sz)
{
int32_t fd, rc = 0;
uint32_t i, nb;
off_t ofs;
ofs = (uintptr_t)va / pg_sz * sizeof(*pa);
nb = pg_num * sizeof(*pa);
if ((fd = open(PAGEMAP_FNAME, O_RDONLY)) < 0 ||
(rc = pread(fd, pa, nb, ofs)) < 0 ||
(rc -= nb) != 0) {
RTE_LOG(ERR, PMD, "%s: failed read of %u bytes from \'%s\' "
"at offset %zu, error code: %d\n",
__func__, nb, PAGEMAP_FNAME, ofs, errno);
rc = ENOENT;
}
close(fd);
for (i = 0; i != pg_num; i++)
pa[i] = (pa[i] & PAGEMAP_PFN_MASK) * pg_sz;
return rc;
}
int
gntalloc_open(void)
{
gntalloc_fd = open(XEN_GNTALLOC_FNAME, O_RDWR);
return (gntalloc_fd != -1) ? 0 : -1;
}
void
gntalloc_close(void)
{
if (gntalloc_fd != -1)
close(gntalloc_fd);
gntalloc_fd = -1;
}
void *
gntalloc(size_t size, uint32_t *gref, uint64_t *start_index)
{
int page_size = getpagesize();
uint32_t i, pg_num;
void *va;
int rv;
struct ioctl_gntalloc_alloc_gref *arg;
struct ioctl_gntalloc_dealloc_gref arg_d;
if (size % page_size) {
RTE_LOG(ERR, PMD, "%s: %zu isn't multiple of page size\n",
__func__, size);
return NULL;
}
pg_num = size / page_size;
arg = malloc(sizeof(*arg) + (pg_num - 1) * sizeof(uint32_t));
if (arg == NULL)
return NULL;
arg->domid = DOM0_DOMID;
arg->flags = GNTALLOC_FLAG_WRITABLE;
arg->count = pg_num;
rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_ALLOC_GREF, arg);
if (rv) {
RTE_LOG(ERR, PMD, "%s: ioctl error\n", __func__);
free(arg);
return NULL;
}
va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gntalloc_fd, arg->index);
if (va == MAP_FAILED) {
RTE_LOG(ERR, PMD, "%s: mmap failed\n", __func__);
arg_d.count = pg_num;
arg_d.index = arg->index;
ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg_d);
free(arg);
return NULL;
}
if (gref) {
for (i = 0; i < pg_num; i++) {
gref[i] = arg->gref_ids[i];
}
}
if (start_index)
*start_index = arg->index;
free(arg);
return va;
}
int
grefwatch_from_alloc(uint32_t *gref, void **pptr)
{
int rv;
void *ptr;
int pg_size = getpagesize();
struct ioctl_gntalloc_alloc_gref arg = {
.domid = DOM0_DOMID,
.flags = GNTALLOC_FLAG_WRITABLE,
.count = 1
};
struct ioctl_gntalloc_dealloc_gref arg_d;
struct ioctl_gntalloc_unmap_notify notify = {
.action = UNMAP_NOTIFY_CLEAR_BYTE
};
rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_ALLOC_GREF, &arg);
if (rv) {
RTE_LOG(ERR, PMD, "%s: ioctl error\n", __func__);
return -1;
}
ptr = (void *)mmap(NULL, pg_size, PROT_READ|PROT_WRITE, MAP_SHARED, gntalloc_fd, arg.index);
arg_d.index = arg.index;
arg_d.count = 1;
if (ptr == MAP_FAILED) {
RTE_LOG(ERR, PMD, "%s: mmap failed\n", __func__);
ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg_d);
return -1;
}
if (pptr)
*pptr = ptr;
if (gref)
*gref = arg.gref_ids[0];
notify.index = arg.index;
rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_SET_UNMAP_NOTIFY, &notify);
if (rv) {
RTE_LOG(ERR, PMD, "%s: unmap notify failed\n", __func__);
munmap(ptr, pg_size);
ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg_d);
return -1;
}
return 0;
}
void
gntfree(void *va, size_t sz, uint64_t start_index)
{
struct ioctl_gntalloc_dealloc_gref arg_d;
if (va && sz) {
munmap(va, sz);
arg_d.count = sz / getpagesize();
arg_d.index = start_index;
ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg_d);
}
}
static int
xenstore_cleanup(void)
{
char store_path[PATH_MAX] = {0};
if (rte_snprintf(store_path, sizeof(store_path),
"%s%s", dompath, DPDK_XENSTORE_NODE) == -1)
return -1;
if (xs_rm(xs, XBT_NULL, store_path) == false) {
RTE_LOG(ERR, PMD, "%s: failed cleanup node\n", __func__);
return -1;
}
return 0;
}
int
xenstore_init(void)
{
unsigned int len, domid;
char *buf;
static int cleanup = 0;
char *end;
xs = xs_domain_open();
if (xs == NULL) {
RTE_LOG(ERR, PMD,"%s: xs_domain_open failed\n", __func__);
return -1;
}
buf = xs_read(xs, XBT_NULL, "domid", &len);
if (buf == NULL) {
RTE_LOG(ERR, PMD, "%s: failed read domid\n", __func__);
return -1;
}
errno = 0;
domid = strtoul(buf, &end, 0);
if (errno != 0 || end == NULL || end == buf || domid == 0)
return -1;
RTE_LOG(INFO, PMD, "retrieved dom ID = %d\n", domid);
dompath = xs_get_domain_path(xs, domid);
if (dompath == NULL)
return -1;
xs_transaction_start(xs); /* When to stop transaction */
if (cleanup == 0) {
if (xenstore_cleanup())
return -1;
cleanup = 1;
}
return 0;
}
int
xenstore_write(const char *key_str, const char *val_str)
{
char grant_path[PATH_MAX];
int rv, len;
if (xs == NULL) {
RTE_LOG(ERR, PMD, "%s: xenstore init failed\n", __func__);
return -1;
}
rv = rte_snprintf(grant_path, sizeof(grant_path), "%s%s", dompath, key_str);
if (rv == -1) {
RTE_LOG(ERR, PMD, "%s: rte_snprintf %s %s failed\n",
__func__, dompath, key_str);
return -1;
}
len = strnlen(val_str, PATH_MAX);
if (xs_write(xs, XBT_NULL, grant_path, val_str, len) == false) {
RTE_LOG(ERR, PMD, "%s: xs_write failed\n", __func__);
return -1;
}
return 0;
}
int
grant_node_create(uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, char *val_str, size_t str_size)
{
uint64_t start_index;
int pg_size;
uint32_t pg_shift;
void *ptr = NULL;
uint32_t count, entries_per_pg;
uint32_t i, j = 0, k = 0;;
uint32_t *gref_tmp;
int first = 1;
char tmp_str[PATH_MAX] = {0};
int rv = -1;
pg_size = getpagesize();
if (rte_is_power_of_2(pg_size) == 0) {
return -1;
}
pg_shift = rte_bsf32(pg_size);
if (pg_size % sizeof(struct grant_node_item)) {
RTE_LOG(ERR, PMD, "pg_size isn't a multiple of grant node item\n");
return -1;
}
entries_per_pg = pg_size / sizeof(struct grant_node_item);
count = (pg_num + entries_per_pg - 1 ) / entries_per_pg;
gref_tmp = malloc(count * sizeof(uint32_t));
if (gref_tmp == NULL)
return -1;
ptr = gntalloc(pg_size * count, gref_tmp, &start_index);
if (ptr == NULL) {
RTE_LOG(ERR, PMD, "%s: gntalloc error of %d pages\n", __func__, count);
free(gref_tmp);
return -1;
}
while (j < pg_num) {
if (first) {
rv = rte_snprintf(val_str, str_size, "%u", gref_tmp[k]);
first = 0;
} else {
rte_snprintf(tmp_str, PATH_MAX, "%s", val_str);
rv = rte_snprintf(val_str, str_size, "%s,%u", tmp_str, gref_tmp[k]);
}
k++;
if (rv == -1)
break;
for (i = 0; i < entries_per_pg && j < pg_num ; i++) {
((struct grant_node_item *)ptr)->gref = gref_arr[j];
((struct grant_node_item *)ptr)->pfn = pa_arr[j] >> pg_shift;
ptr = RTE_PTR_ADD(ptr, sizeof(struct grant_node_item));
j++;
}
}
if (rv == -1) {
gntfree(ptr, pg_size * count, start_index);
} else
rv = 0;
free(gref_tmp);
return rv;
}
int
grant_gntalloc_mbuf_pool(struct rte_mempool *mpool, uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, int mempool_idx)
{
char key_str[PATH_MAX] = {0};
char val_str[PATH_MAX] = {0};
rte_snprintf(val_str, sizeof(val_str), "");
if (grant_node_create(pg_num, gref_arr, pa_arr, val_str, sizeof(val_str))) {
return -1;
}
if (rte_snprintf(key_str, sizeof(key_str),
DPDK_XENSTORE_PATH"%d"MEMPOOL_XENSTORE_STR, mempool_idx) == -1)
return -1;
if (xenstore_write(key_str, val_str) == -1)
return -1;
if (rte_snprintf(key_str, sizeof(key_str),
DPDK_XENSTORE_PATH"%d"MEMPOOL_VA_XENSTORE_STR, mempool_idx) == -1)
return -1;
if (rte_snprintf(val_str, sizeof(val_str), "%p", (uintptr_t)mpool->elt_va_start) == -1)
return -1;
if (xenstore_write(key_str, val_str) == -1)
return -1;
return 0;
}

View File

@ -0,0 +1,113 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _RTE_XEN_DUMMY_PMD_H
#define _RTE_XEN_DUMMY_PMD_H
#include <stdint.h>
#include <rte_common.h>
#include <rte_mempool.h>
#include <rte_ether.h>
#define PAGEMAP_FNAME "/proc/self/pagemap"
#define XEN_GNTALLOC_FNAME "/dev/xen/gntalloc"
#define DPDK_XENSTORE_PATH "/control/dpdk/"
#define DPDK_XENSTORE_NODE "/control/dpdk"
/*format 0_mempool_gref = "1537,1524,1533" */
#define MEMPOOL_XENSTORE_STR "_mempool_gref"
/*format 0_mempool_va = 0x80340000 */
#define MEMPOOL_VA_XENSTORE_STR "_mempool_va"
/*format 0_rx_vring_gref = "1537,1524,1533" */
#define RXVRING_XENSTORE_STR "_rx_vring_gref"
/*format 0_tx_vring_gref = "1537,1524,1533" */
#define TXVRING_XENSTORE_STR "_tx_vring_gref"
#define VRING_FLAG_STR "_vring_flag"
/*format: event_type_start_0 = 1*/
#define EVENT_TYPE_START_STR "event_type_start_"
#define DOM0_DOMID 0
/*
* the pfn (page frame number) are bits 0-54 (see pagemap.txt in linux
* Documentation).
*/
#define PAGEMAP_PFN_BITS 54
#define PAGEMAP_PFN_MASK RTE_LEN2MASK(PAGEMAP_PFN_BITS, phys_addr_t)
#define MAP_FLAG 0xA5
#define RTE_ETH_XENVIRT_PAIRS_DELIM ';'
#define RTE_ETH_XENVIRT_KEY_VALUE_DELIM '='
#define RTE_ETH_XENVIRT_MAX_ARGS 1
#define RTE_ETH_XENVIRT_MAC_PARAM "mac"
struct xenvirt_dict {
uint8_t addr_valid;
struct ether_addr addr;
};
extern int gntalloc_fd;
int
gntalloc_open(void);
void
gntalloc_close(void);
void *
gntalloc(size_t sz, uint32_t *gref, uint64_t *start_index);
void
gntfree(void *va, size_t sz, uint64_t start_index);
int
xenstore_init(void);
int
xenstore_write(const char *key_str, const char *val_str);
int
get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num, uint32_t pg_sz);
void *
get_xen_virtual(size_t size, size_t page_sz);
int
grefwatch_from_alloc(uint32_t *gref, void **pptr);
int grant_node_create(uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, char *val_str, size_t str_size);
int
grant_gntalloc_mbuf_pool(struct rte_mempool *mpool, uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, int mempool_idx);
#endif

View File

@ -0,0 +1,70 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _VIRTIO_LOGS_H_
#define _VIRTIO_LOGS_H_
#include <rte_log.h>
#ifdef RTE_LIBRTE_VIRTIO_DEBUG_INIT
#define PMD_INIT_LOG(level, fmt, args...) \
RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
#else
#define PMD_INIT_LOG(level, fmt, args...) do { } while(0)
#define PMD_INIT_FUNC_TRACE() do { } while(0)
#endif
#ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX
#define PMD_RX_LOG(level, fmt, args...) \
RTE_LOG(level, PMD, "%s() rx: " fmt , __func__, ## args)
#else
#define PMD_RX_LOG(level, fmt, args...) do { } while(0)
#endif
#ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX
#define PMD_TX_LOG(level, fmt, args...) \
RTE_LOG(level, PMD, "%s() tx: " fmt , __func__, ## args)
#else
#define PMD_TX_LOG(level, fmt, args...) do { } while(0)
#endif
#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER
#define PMD_DRV_LOG(level, fmt, args...) \
RTE_LOG(level, PMD, "%s(): " fmt , __func__, ## args)
#else
#define PMD_DRV_LOG(level, fmt, args...) do { } while(0)
#endif
#endif /* _VIRTIO_LOGS_H_ */

View File

@ -0,0 +1,279 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _VIRTQUEUE_H_
#define _VIRTQUEUE_H_
#include <stdint.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_net.h>
#include <rte_atomic.h>
#include <rte_memory.h>
#include <rte_memzone.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
#include "virtio_logs.h"
/* The alignment to use between consumer and producer parts of vring. */
#define VIRTIO_PCI_VRING_ALIGN 4096
/*
* Address translatio is between gva<->hva,
* rather than gpa<->hva in virito spec.
*/
#define RTE_MBUF_DATA_DMA_ADDR(mb) \
((uint64_t)((mb)->pkt.data))
enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 };
/**
* The maximum virtqueue size is 2^15. Use that value as the end of
* descriptor chain terminator since it will never be a valid index
* in the descriptor table. This is used to verify we are correctly
* handling vq_free_cnt.
*/
#define VQ_RING_DESC_CHAIN_END 32768
#define VIRTQUEUE_MAX_NAME_SZ 32
struct pmd_internals {
struct rte_eth_stats eth_stats;
int port_id;
int virtio_idx;
};
struct virtqueue {
char vq_name[VIRTQUEUE_MAX_NAME_SZ];
struct rte_mempool *mpool; /**< mempool for mbuf allocation */
uint16_t queue_id; /**< DPDK queue index. */
uint16_t vq_queue_index; /**< PCI queue index */
uint8_t port_id; /**< Device port identifier. */
void *vq_ring_virt_mem; /**< virtual address of vring*/
int vq_alignment;
int vq_ring_size;
struct vring vq_ring; /**< vring keeping desc, used and avail */
struct pmd_internals *internals; /**< virtio device internal info. */
uint16_t vq_nentries; /**< vring desc numbers */
uint16_t vq_desc_head_idx;
uint16_t vq_free_cnt; /**< num of desc available */
uint16_t vq_used_cons_idx; /**< Last consumed desc in used table, trails vq_ring.used->idx*/
struct vq_desc_extra {
void *cookie;
uint16_t ndescs;
} vq_descx[0] __rte_cache_aligned;
};
#ifdef RTE_LIBRTE_XENVIRT_DEBUG_DUMP
#define VIRTQUEUE_DUMP(vq) do { \
uint16_t used_idx, nused; \
used_idx = (vq)->vq_ring.used->idx; \
nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
PMD_INIT_LOG(DEBUG, \
"VQ: %s - size=%d; free=%d; used=%d; desc_head_idx=%d;" \
" avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \
" avail.flags=0x%x; used.flags=0x%x\n", \
(vq)->vq_name, (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \
(vq)->vq_desc_head_idx, (vq)->vq_ring.avail->idx, \
(vq)->vq_used_cons_idx, (vq)->vq_ring.used->idx, \
(vq)->vq_ring.avail->flags, (vq)->vq_ring.used->flags); \
} while (0)
#else
#define VIRTQUEUE_DUMP(vq) do { } while (0)
#endif
/**
* Dump virtqueue internal structures, for debug purpose only.
*/
void virtqueue_dump(struct virtqueue *vq);
/**
* Get all mbufs to be freed.
*/
struct rte_mbuf * virtqueue_detatch_unused(struct virtqueue *vq);
static inline int __attribute__((always_inline))
virtqueue_full(const struct virtqueue *vq)
{
return (vq->vq_free_cnt == 0);
}
#define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx))
static inline void __attribute__((always_inline))
vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx)
{
uint16_t avail_idx;
/*
* Place the head of the descriptor chain into the next slot and make
* it usable to the host. The chain is made available now rather than
* deferring to virtqueue_notify() in the hopes that if the host is
* currently running on another CPU, we can keep it processing the new
* descriptor.
*/
avail_idx = (uint16_t)(vq->vq_ring.avail->idx & (vq->vq_nentries - 1));
vq->vq_ring.avail->ring[avail_idx] = desc_idx;
rte_compiler_barrier(); /* wmb , for IA memory model barrier is enough*/
vq->vq_ring.avail->idx++;
}
static inline void __attribute__((always_inline))
vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
{
struct vring_desc *dp;
struct vq_desc_extra *dxp;
dp = &vq->vq_ring.desc[desc_idx];
dxp = &vq->vq_descx[desc_idx];
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
while (dp->flags & VRING_DESC_F_NEXT) {
dp = &vq->vq_ring.desc[dp->next];
}
dxp->ndescs = 0;
/*
* We must append the existing free chain, if any, to the end of
* newly freed chain. If the virtqueue was completely used, then
* head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
*/
dp->next = vq->vq_desc_head_idx;
vq->vq_desc_head_idx = desc_idx;
}
static inline int __attribute__((always_inline))
virtqueue_enqueue_recv_refill(struct virtqueue *rxvq, struct rte_mbuf *cookie)
{
const uint16_t needed = 1;
const uint16_t head_idx = rxvq->vq_desc_head_idx;
struct vring_desc *start_dp = rxvq->vq_ring.desc;
struct vq_desc_extra *dxp;
if (unlikely(rxvq->vq_free_cnt == 0))
return -ENOSPC;
if (unlikely(rxvq->vq_free_cnt < needed))
return -EMSGSIZE;
if (unlikely(head_idx >= rxvq->vq_nentries))
return -EFAULT;
dxp = &rxvq->vq_descx[head_idx];
dxp->cookie = (void *)cookie;
dxp->ndescs = needed;
start_dp[head_idx].addr =
(uint64_t) ((uint64_t)cookie->buf_addr + RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
start_dp[head_idx].len = cookie->buf_len - RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr);
start_dp[head_idx].flags = VRING_DESC_F_WRITE;
rxvq->vq_desc_head_idx = start_dp[head_idx].next;
rxvq->vq_free_cnt = (uint16_t)(rxvq->vq_free_cnt - needed);
vq_ring_update_avail(rxvq, head_idx);
return 0;
}
static inline int __attribute__((always_inline))
virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
{
const uint16_t needed = 2;
struct vring_desc *start_dp = txvq->vq_ring.desc;
uint16_t head_idx = txvq->vq_desc_head_idx;
uint16_t idx = head_idx;
struct vq_desc_extra *dxp;
if (unlikely(txvq->vq_free_cnt == 0))
return -ENOSPC;
if (unlikely(txvq->vq_free_cnt < needed))
return -EMSGSIZE;
if (unlikely(head_idx >= txvq->vq_nentries))
return -EFAULT;
dxp = &txvq->vq_descx[idx];
dxp->cookie = (void *)cookie;
dxp->ndescs = needed;
start_dp = txvq->vq_ring.desc;
start_dp[idx].addr = 0;
/*
* TODO: save one desc here?
*/
start_dp[idx].len = sizeof(struct virtio_net_hdr);
start_dp[idx].flags = VRING_DESC_F_NEXT;
start_dp[idx].addr = (uintptr_t)NULL;
idx = start_dp[idx].next;
start_dp[idx].addr = RTE_MBUF_DATA_DMA_ADDR(cookie);
start_dp[idx].len = cookie->pkt.data_len;
start_dp[idx].flags = 0;
idx = start_dp[idx].next;
txvq->vq_desc_head_idx = idx;
txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
vq_ring_update_avail(txvq, head_idx);
return 0;
}
static inline uint16_t __attribute__((always_inline))
virtqueue_dequeue_burst(struct virtqueue *vq, struct rte_mbuf **rx_pkts, uint32_t *len, uint16_t num)
{
struct vring_used_elem *uep;
struct rte_mbuf *cookie;
uint16_t used_idx, desc_idx;
uint16_t i;
/* Caller does the check */
for (i = 0; i < num ; i ++) {
used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
uep = &vq->vq_ring.used->ring[used_idx];
desc_idx = (uint16_t) uep->id;
cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
if (unlikely(cookie == NULL)) {
PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
vq->vq_used_cons_idx);
RTE_LOG(ERR, PMD, "%s: inconsistent (%u, %u)\n", __func__, used_idx , desc_idx);
break;
}
len[i] = uep->len;
rx_pkts[i] = cookie;
vq->vq_used_cons_idx++;
vq_ring_free_chain(vq, desc_idx);
vq->vq_descx[desc_idx].cookie = NULL;
}
return i;
}
#endif /* _VIRTQUEUE_H_ */

View File

@ -82,10 +82,6 @@ ifeq ($(CONFIG_RTE_LIBRTE_VIRTIO_PMD),y)
LDLIBS += -lrte_pmd_virtio LDLIBS += -lrte_pmd_virtio
endif endif
ifeq ($(CONFIG_RTE_LIBRTE_CMDLINE),y)
LDLIBS += -lrte_cmdline
endif
ifeq ($(CONFIG_RTE_LIBRTE_TIMER),y) ifeq ($(CONFIG_RTE_LIBRTE_TIMER),y)
LDLIBS += -lrte_timer LDLIBS += -lrte_timer
endif endif
@ -155,6 +151,16 @@ ifeq ($(CONFIG_RTE_LIBRTE_EAL),y)
LDLIBS += -lrte_eal LDLIBS += -lrte_eal
endif endif
ifeq ($(CONFIG_RTE_LIBRTE_PMD_XENVIRT),y)
LDLIBS += -lrte_pmd_xenvirt
LDLIBS += -lxenstore
endif
ifeq ($(CONFIG_RTE_LIBRTE_CMDLINE),y)
LDLIBS += -lrte_cmdline
endif
ifeq ($(CONFIG_RTE_LIBRTE_PMD_PCAP),y) ifeq ($(CONFIG_RTE_LIBRTE_PMD_PCAP),y)
LDLIBS += -lrte_pmd_pcap -lpcap LDLIBS += -lrte_pmd_pcap -lpcap
endif endif