vhost: remove vhost-cuse

remove vhost-cuse code, including the eventfd_link kernel module that
is for vhost-cuse only.

The lib/virt/qemu-wrap.py is also removed, as it's mainly for vhost-cuse
usage.

As we have one vhost implementation now, one vhost config option is
needed only. Thus, CONFIG_RTE_LIBRTE_VHOST_USER is removed.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
This commit is contained in:
Yuanhan Liu 2016-08-18 16:48:37 +08:00
parent 6545392887
commit 466d914b01
13 changed files with 4 additions and 1890 deletions

View File

@ -546,13 +546,9 @@ CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
CONFIG_RTE_LIBRTE_PDUMP=y
#
# Compile vhost library
# fuse-devel is needed to run vhost-cuse.
# fuse-devel enables user space char driver development
# vhost-user is turned on by default.
# Compile vhost user library
#
CONFIG_RTE_LIBRTE_VHOST=n
CONFIG_RTE_LIBRTE_VHOST_USER=y
CONFIG_RTE_LIBRTE_VHOST_NUMA=n
CONFIG_RTE_LIBRTE_VHOST_DEBUG=n

View File

@ -50,10 +50,6 @@ Deprecation Notices
and will be removed in 17.02.
It is replaced by ``rte_mempool_generic_get/put`` functions.
* The vhost-cuse will be removed in 16.11. Since v2.1, a large majority of
development effort has gone to vhost-user, such as multiple-queue, live
migration, reconnect etc. Therefore, vhost-user should be used instead.
* API will change for ``rte_port_source_params`` and ``rte_port_sink_params``
structures. The member ``file_name`` data type will be changed from
``char *`` to ``const char *``. This change targets release 16.11.

View File

@ -39,13 +39,8 @@ EXPORT_MAP := rte_vhost_version.map
LIBABIVER := 3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64
ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
CFLAGS += -I vhost_user
LDLIBS += -lpthread
else
CFLAGS += -I vhost_cuse
LDLIBS += -lfuse
endif
ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
LDLIBS += -lnuma
@ -53,11 +48,9 @@ endif
# all source are stored in SRCS-y
SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c
ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c
else
SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c vhost_cuse/eventfd_copy.c
endif
SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/vhost-net-user.c
SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/virtio-net-user.c
SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/fd_man.c
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h

View File

@ -1,41 +0,0 @@
# BSD LICENSE
#
# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
RTE_KERNELDIR ?= /lib/modules/$(shell uname -r)/build
obj-m += eventfd_link.o
all:
make -C $(RTE_KERNELDIR) M=$(PWD) modules
clean:
make -C $(RTE_KERNELDIR) M=$(PWD) clean

View File

@ -1,277 +0,0 @@
/*-
* GPL LICENSE SUMMARY
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
* The full GNU General Public License is included in this distribution
* in the file called LICENSE.GPL.
*
* Contact Information:
* Intel Corporation
*/
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/syscalls.h>
#include "eventfd_link.h"
/*
* get_files_struct is copied from fs/file.c
*/
struct files_struct *
get_files_struct(struct task_struct *task)
{
struct files_struct *files;
task_lock(task);
files = task->files;
if (files)
atomic_inc(&files->count);
task_unlock(task);
return files;
}
/*
* put_files_struct is extracted from fs/file.c
*/
void
put_files_struct(struct files_struct *files)
{
if (atomic_dec_and_test(&files->count))
BUG();
}
static struct file *
fget_from_files(struct files_struct *files, unsigned fd)
{
struct file *file;
rcu_read_lock();
file = fcheck_files(files, fd);
if (file) {
if (file->f_mode & FMODE_PATH ||
!atomic_long_inc_not_zero(&file->f_count)) {
file = NULL;
}
}
rcu_read_unlock();
return file;
}
static long
eventfd_link_ioctl_copy2(unsigned long arg)
{
void __user *argp = (void __user *) arg;
struct task_struct *task_target = NULL;
struct file *file;
struct files_struct *files;
struct eventfd_copy2 eventfd_copy2;
long ret = -EFAULT;
if (copy_from_user(&eventfd_copy2, argp, sizeof(struct eventfd_copy2)))
goto out;
/*
* Find the task struct for the target pid
*/
ret = -ESRCH;
task_target =
get_pid_task(find_vpid(eventfd_copy2.pid), PIDTYPE_PID);
if (task_target == NULL) {
pr_info("Unable to find pid %d\n", eventfd_copy2.pid);
goto out;
}
ret = -ESTALE;
files = get_files_struct(task_target);
if (files == NULL) {
pr_info("Failed to get target files struct\n");
goto out_task;
}
ret = -EBADF;
file = fget_from_files(files, eventfd_copy2.fd);
put_files_struct(files);
if (file == NULL) {
pr_info("Failed to get fd %d from target\n", eventfd_copy2.fd);
goto out_task;
}
/*
* Install the file struct from the target process into the
* newly allocated file desciptor of the source process.
*/
ret = get_unused_fd_flags(eventfd_copy2.flags);
if (ret < 0) {
fput(file);
goto out_task;
}
fd_install(ret, file);
out_task:
put_task_struct(task_target);
out:
return ret;
}
static long
eventfd_link_ioctl_copy(unsigned long arg)
{
void __user *argp = (void __user *) arg;
struct task_struct *task_target = NULL;
struct file *file;
struct files_struct *files;
struct fdtable *fdt;
struct eventfd_copy eventfd_copy;
long ret = -EFAULT;
if (copy_from_user(&eventfd_copy, argp, sizeof(struct eventfd_copy)))
goto out;
/*
* Find the task struct for the target pid
*/
ret = -ESRCH;
task_target =
get_pid_task(find_vpid(eventfd_copy.target_pid), PIDTYPE_PID);
if (task_target == NULL) {
pr_info("Unable to find pid %d\n", eventfd_copy.target_pid);
goto out;
}
ret = -ESTALE;
files = get_files_struct(current);
if (files == NULL) {
pr_info("Failed to get current files struct\n");
goto out_task;
}
ret = -EBADF;
file = fget_from_files(files, eventfd_copy.source_fd);
if (file == NULL) {
pr_info("Failed to get fd %d from source\n",
eventfd_copy.source_fd);
put_files_struct(files);
goto out_task;
}
/*
* Release the existing eventfd in the source process
*/
spin_lock(&files->file_lock);
fput(file);
filp_close(file, files);
fdt = files_fdtable(files);
fdt->fd[eventfd_copy.source_fd] = NULL;
spin_unlock(&files->file_lock);
put_files_struct(files);
/*
* Find the file struct associated with the target fd.
*/
ret = -ESTALE;
files = get_files_struct(task_target);
if (files == NULL) {
pr_info("Failed to get target files struct\n");
goto out_task;
}
ret = -EBADF;
file = fget_from_files(files, eventfd_copy.target_fd);
put_files_struct(files);
if (file == NULL) {
pr_info("Failed to get fd %d from target\n",
eventfd_copy.target_fd);
goto out_task;
}
/*
* Install the file struct from the target process into the
* file desciptor of the source process,
*/
fd_install(eventfd_copy.source_fd, file);
ret = 0;
out_task:
put_task_struct(task_target);
out:
return ret;
}
static long
eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
{
long ret = -ENOIOCTLCMD;
switch (ioctl) {
case EVENTFD_COPY:
ret = eventfd_link_ioctl_copy(arg);
break;
case EVENTFD_COPY2:
ret = eventfd_link_ioctl_copy2(arg);
break;
}
return ret;
}
static const struct file_operations eventfd_link_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = eventfd_link_ioctl,
};
static struct miscdevice eventfd_link_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = "eventfd-link",
.fops = &eventfd_link_fops,
};
static int __init
eventfd_link_init(void)
{
return misc_register(&eventfd_link_misc);
}
module_init(eventfd_link_init);
static void __exit
eventfd_link_exit(void)
{
misc_deregister(&eventfd_link_misc);
}
module_exit(eventfd_link_exit);
MODULE_VERSION("0.0.1");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Anthony Fee");
MODULE_DESCRIPTION("Link eventfd");
MODULE_ALIAS("devname:eventfd-link");

View File

@ -1,94 +0,0 @@
/*-
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
* The full GNU General Public License is included in this distribution
* in the file called LICENSE.GPL.
*
* Contact Information:
* Intel Corporation
*
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef _EVENTFD_LINK_H_
#define _EVENTFD_LINK_H_
/*
* arguements for the EVENTFD_COPY ioctl
*/
struct eventfd_copy {
unsigned target_fd; /* fd in the target pid */
unsigned source_fd; /* fd in the calling pid */
pid_t target_pid; /* pid of the target pid */
};
/*
* ioctl to copy an fd entry in calling process to an fd in a target process
* NOTE: this one should be
* #define EVENTFD_COPY _IOWR('D', 1, struct eventfd_copy) actually
*/
#define EVENTFD_COPY 1
/*
* arguments for the EVENTFD_COPY2 ioctl
*/
struct eventfd_copy2 {
unsigned fd; /* fd to steal */
pid_t pid; /* pid of the process to steal from */
unsigned flags; /* flags to allocate new fd with */
};
/*
* ioctl to copy an fd entry from the target process into newly allocated
* fd in the calling process
*/
#define EVENTFD_COPY2 _IOW('D', 2, struct eventfd_copy2)
#endif /* _EVENTFD_LINK_H_ */

View File

@ -1,387 +0,0 @@
#!/usr/bin/python
#/*
# * BSD LICENSE
# *
# * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
# * All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions
# * are met:
# *
# * * Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * * Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in
# * the documentation and/or other materials provided with the
# * distribution.
# * * Neither the name of Intel Corporation nor the names of its
# * contributors may be used to endorse or promote products derived
# * from this software without specific prior written permission.
# *
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# */
#####################################################################
# This script is designed to modify the call to the QEMU emulator
# to support userspace vhost when starting a guest machine through
# libvirt with vhost enabled. The steps to enable this are as follows
# and should be run as root:
#
# 1. Place this script in a libvirtd's binary search PATH ($PATH)
# A good location would be in the same directory that the QEMU
# binary is located
#
# 2. Ensure that the script has the same owner/group and file
# permissions as the QEMU binary
#
# 3. Update the VM xml file using "virsh edit VM.xml"
#
# 3.a) Set the VM to use the launch script
#
# Set the emulator path contained in the
# <emulator><emulator/> tags
#
# e.g replace <emulator>/usr/bin/qemu-kvm<emulator/>
# with <emulator>/usr/bin/qemu-wrap.py<emulator/>
#
# 3.b) Set the VM's device's to use vhost-net offload
#
# <interface type="network">
# <model type="virtio"/>
# <driver name="vhost"/>
# <interface/>
#
# 4. Enable libvirt to access our userpace device file by adding it to
# controllers cgroup for libvirtd using the following steps
#
# 4.a) In /etc/libvirt/qemu.conf add/edit the following lines:
# 1) cgroup_controllers = [ ... "devices", ... ]
# 2) clear_emulator_capabilities = 0
# 3) user = "root"
# 4) group = "root"
# 5) cgroup_device_acl = [
# "/dev/null", "/dev/full", "/dev/zero",
# "/dev/random", "/dev/urandom",
# "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
# "/dev/rtc", "/dev/hpet", "/dev/net/tun",
# "/dev/<devbase-name>",
# "/dev/hugepages",
# ]
#
# 4.b) Disable SELinux or set to permissive mode
#
# 4.c) Mount cgroup device controller
# "mkdir /dev/cgroup"
# "mount -t cgroup none /dev/cgroup -o devices"
#
# 4.d) Set hugetlbfs_mount variable - ( Optional )
# VMs using userspace vhost must use hugepage backed
# memory. This can be enabled in the libvirt XML
# config by adding a memory backing section to the
# XML config e.g.
# <memoryBacking>
# <hugepages/>
# </memoryBacking>
# This memory backing section should be added after the
# <memory> and <currentMemory> sections. This will add
# flags "-mem-prealloc -mem-path <path>" to the QEMU
# command line. The hugetlbfs_mount variable can be used
# to override the default <path> passed through by libvirt.
#
# if "-mem-prealloc" or "-mem-path <path>" are not passed
# through and a vhost device is detected then these options will
# be automatically added by this script. This script will detect
# the system hugetlbfs mount point to be used for <path>. The
# default <path> for this script can be overidden by the
# hugetlbfs_dir variable in the configuration section of this script.
#
#
# 4.e) Restart the libvirtd system process
# e.g. on Fedora "systemctl restart libvirtd.service"
#
#
# 4.f) Edit the Configuration Parameters section of this script
# to point to the correct emulator location and set any
# addition options
#
# The script modifies the libvirtd Qemu call by modifying/adding
# options based on the configuration parameters below.
# NOTE:
# emul_path and us_vhost_path must be set
# All other parameters are optional
#####################################################################
#############################################
# Configuration Parameters
#############################################
#Path to QEMU binary
emul_path = "/usr/local/bin/qemu-system-x86_64"
#Path to userspace vhost device file
# This filename should match the --dev-basename parameters of
# the command used to launch the userspace vhost sample application e.g.
# if the sample app lauch command is:
# ./build/vhost-switch ..... --dev-basename usvhost
# then this variable should be set to:
# us_vhost_path = "/dev/usvhost"
us_vhost_path = "/dev/usvhost"
#List of additional user defined emulation options. These options will
#be added to all Qemu calls
emul_opts_user = []
#List of additional user defined emulation options for vhost only.
#These options will only be added to vhost enabled guests
emul_opts_user_vhost = []
#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs
# Set this variable to one to enable this option for all VMs
use_huge_all = 0
#Instead of autodetecting, override the hugetlbfs directory by setting
#this variable
hugetlbfs_dir = ""
#############################################
#############################################
# ****** Do Not Modify Below this Line ******
#############################################
import sys, os, subprocess
import time
import signal
#List of open userspace vhost file descriptors
fd_list = []
#additional virtio device flags when using userspace vhost
vhost_flags = [ "csum=off",
"gso=off",
"guest_tso4=off",
"guest_tso6=off",
"guest_ecn=off"
]
#String of the path to the Qemu process pid
qemu_pid = "/tmp/%d-qemu.pid" % os.getpid()
#############################################
# Signal haldler to kill Qemu subprocess
#############################################
def kill_qemu_process(signum, stack):
pidfile = open(qemu_pid, 'r')
pid = int(pidfile.read())
os.killpg(pid, signal.SIGTERM)
pidfile.close()
#############################################
# Find the system hugefile mount point.
# Note:
# if multiple hugetlbfs mount points exist
# then the first one found will be used
#############################################
def find_huge_mount():
if (len(hugetlbfs_dir)):
return hugetlbfs_dir
huge_mount = ""
if (os.access("/proc/mounts", os.F_OK)):
f = open("/proc/mounts", "r")
line = f.readline()
while line:
line_split = line.split(" ")
if line_split[2] == 'hugetlbfs':
huge_mount = line_split[1]
break
line = f.readline()
else:
print "/proc/mounts not found"
exit (1)
f.close
if len(huge_mount) == 0:
print "Failed to find hugetlbfs mount point"
exit (1)
return huge_mount
#############################################
# Get a userspace Vhost file descriptor
#############################################
def get_vhost_fd():
if (os.access(us_vhost_path, os.F_OK)):
fd = os.open( us_vhost_path, os.O_RDWR)
else:
print ("US-Vhost file %s not found" %us_vhost_path)
exit (1)
return fd
#############################################
# Check for vhostfd. if found then replace
# with our own vhost fd and append any vhost
# flags onto the end
#############################################
def modify_netdev_arg(arg):
global fd_list
vhost_in_use = 0
s = ''
new_opts = []
netdev_opts = arg.split(",")
for opt in netdev_opts:
#check if vhost is used
if "vhost" == opt[:5]:
vhost_in_use = 1
else:
new_opts.append(opt)
#if using vhost append vhost options
if vhost_in_use == 1:
#append vhost on option
new_opts.append('vhost=on')
#append vhostfd ption
new_fd = get_vhost_fd()
new_opts.append('vhostfd=' + str(new_fd))
fd_list.append(new_fd)
#concatenate all options
for opt in new_opts:
if len(s) > 0:
s+=','
s+=opt
return s
#############################################
# Main
#############################################
def main():
global fd_list
global vhost_in_use
new_args = []
num_cmd_args = len(sys.argv)
emul_call = ''
mem_prealloc_set = 0
mem_path_set = 0
num = 0;
#parse the parameters
while (num < num_cmd_args):
arg = sys.argv[num]
#Check netdev +1 parameter for vhostfd
if arg == '-netdev':
num_vhost_devs = len(fd_list)
new_args.append(arg)
num+=1
arg = sys.argv[num]
mod_arg = modify_netdev_arg(arg)
new_args.append(mod_arg)
#append vhost flags if this is a vhost device
# and -device is the next arg
# i.e -device -opt1,-opt2,...,-opt3,%vhost
if (num_vhost_devs < len(fd_list)):
num+=1
arg = sys.argv[num]
if arg == '-device':
new_args.append(arg)
num+=1
new_arg = sys.argv[num]
for flag in vhost_flags:
new_arg = ''.join([new_arg,',',flag])
new_args.append(new_arg)
else:
new_args.append(arg)
elif arg == '-mem-prealloc':
mem_prealloc_set = 1
new_args.append(arg)
elif arg == '-mem-path':
mem_path_set = 1
new_args.append(arg)
else:
new_args.append(arg)
num+=1
#Set Qemu binary location
emul_call+=emul_path
emul_call+=" "
#Add prealloc mem options if using vhost and not already added
if ((len(fd_list) > 0) and (mem_prealloc_set == 0)):
emul_call += "-mem-prealloc "
#Add mempath mem options if using vhost and not already added
if ((len(fd_list) > 0) and (mem_path_set == 0)):
#Detect and add hugetlbfs mount point
mp = find_huge_mount()
mp = "".join(["-mem-path ", mp])
emul_call += mp
emul_call += " "
#add user options
for opt in emul_opts_user:
emul_call += opt
emul_call += " "
#Add add user vhost only options
if len(fd_list) > 0:
for opt in emul_opts_user_vhost:
emul_call += opt
emul_call += " "
#Add updated libvirt options
iter_args = iter(new_args)
#skip 1st arg i.e. call to this script
next(iter_args)
for arg in iter_args:
emul_call+=str(arg)
emul_call+= " "
emul_call += "-pidfile %s " % qemu_pid
#Call QEMU
process = subprocess.Popen(emul_call, shell=True, preexec_fn=os.setsid)
for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP, signal.SIGQUIT]:
signal.signal(sig, kill_qemu_process)
process.wait()
#Close usvhost files
for fd in fd_list:
os.close(fd)
#Cleanup temporary files
if os.access(qemu_pid, os.F_OK):
os.remove(qemu_pid)
if __name__ == "__main__":
main()

View File

@ -1,104 +0,0 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include <sys/eventfd.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <rte_log.h>
#include "eventfd_link/eventfd_link.h"
#include "eventfd_copy.h"
#include "vhost-net.h"
static const char eventfd_cdev[] = "/dev/eventfd-link";
static int eventfd_link = -1;
int
eventfd_init(void)
{
if (eventfd_link >= 0)
return 0;
eventfd_link = open(eventfd_cdev, O_RDWR);
if (eventfd_link < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"eventfd_link module is not loaded\n");
return -1;
}
return 0;
}
int
eventfd_free(void)
{
if (eventfd_link >= 0)
close(eventfd_link);
return 0;
}
/*
* This function uses the eventfd_link kernel module to copy an eventfd file
* descriptor provided by QEMU in to our process space.
*/
int
eventfd_copy(int target_fd, int target_pid)
{
int ret;
struct eventfd_copy2 eventfd_copy2;
/* Open the character device to the kernel module. */
/* TODO: check this earlier rather than fail until VM boots! */
if (eventfd_init() < 0)
return -1;
eventfd_copy2.fd = target_fd;
eventfd_copy2.pid = target_pid;
eventfd_copy2.flags = O_NONBLOCK | O_CLOEXEC;
/* Call the IOCTL to copy the eventfd. */
ret = ioctl(eventfd_link, EVENTFD_COPY2, &eventfd_copy2);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"EVENTFD_COPY2 ioctl failed\n");
return -1;
}
return ret;
}

View File

@ -1,45 +0,0 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _EVENTFD_H
#define _EVENTFD_H
int
eventfd_init(void);
int
eventfd_free(void);
int
eventfd_copy(int target_fd, int target_pid);
#endif

View File

@ -1,431 +0,0 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <errno.h>
#include <fuse/cuse_lowlevel.h>
#include <linux/limits.h>
#include <linux/vhost.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <rte_ethdev.h>
#include <rte_log.h>
#include <rte_string_fns.h>
#include <rte_virtio_net.h>
#include "virtio-net-cdev.h"
#include "vhost-net.h"
#include "eventfd_copy.h"
#define FUSE_OPT_DUMMY "\0\0"
#define FUSE_OPT_FORE "-f\0\0"
#define FUSE_OPT_NOMULTI "-s\0\0"
static const uint32_t default_major = 231;
static const uint32_t default_minor = 1;
static const char cuse_device_name[] = "/dev/cuse";
static const char default_cdev[] = "vhost-net";
static struct fuse_session *session;
/*
* Returns vhost_cuse_device_ctx from given fuse_req_t. The
* index is populated later when the device is added to the
* device linked list.
*/
static struct vhost_cuse_device_ctx
fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
{
struct vhost_cuse_device_ctx ctx;
struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
ctx.pid = req_ctx->pid;
ctx.vid = (int)fi->fh;
return ctx;
}
/*
* When the device is created in QEMU it gets initialised here and
* added to the device linked list.
*/
static void
vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
{
int vid = 0;
vid = vhost_new_device();
if (vid == -1) {
fuse_reply_err(req, EPERM);
return;
}
fi->fh = vid;
RTE_LOG(INFO, VHOST_CONFIG,
"(%d) device configuration started\n", vid);
fuse_reply_open(req, fi);
}
/*
* When QEMU is shutdown or killed the device gets released.
*/
static void
vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
{
int err = 0;
struct vhost_cuse_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
vhost_destroy_device(ctx.vid);
RTE_LOG(INFO, VHOST_CONFIG, "(%d) device released\n", ctx.vid);
fuse_reply_err(req, err);
}
/*
* Boilerplate code for CUSE IOCTL
* Implicit arguments: vid, req, result.
*/
#define VHOST_IOCTL(func) do { \
result = (func)(vid); \
fuse_reply_ioctl(req, result, NULL, 0); \
} while (0)
/*
* Boilerplate IOCTL RETRY
* Implicit arguments: req.
*/
#define VHOST_IOCTL_RETRY(size_r, size_w) do { \
struct iovec iov_r = { arg, (size_r) }; \
struct iovec iov_w = { arg, (size_w) }; \
fuse_reply_ioctl_retry(req, &iov_r, \
(size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
} while (0)
/*
* Boilerplate code for CUSE Read IOCTL
* Implicit arguments: vid, req, result, in_bufsz, in_buf.
*/
#define VHOST_IOCTL_R(type, var, func) do { \
if (!in_bufsz) { \
VHOST_IOCTL_RETRY(sizeof(type), 0);\
} else { \
(var) = *(const type*)in_buf; \
result = func(vid, &(var)); \
fuse_reply_ioctl(req, result, NULL, 0);\
} \
} while (0)
/*
* Boilerplate code for CUSE Write IOCTL
* Implicit arguments: vid, req, result, out_bufsz.
*/
#define VHOST_IOCTL_W(type, var, func) do { \
if (!out_bufsz) { \
VHOST_IOCTL_RETRY(0, sizeof(type));\
} else { \
result = (func)(vid, &(var));\
fuse_reply_ioctl(req, result, &(var), sizeof(type));\
} \
} while (0)
/*
* Boilerplate code for CUSE Read/Write IOCTL
* Implicit arguments: vid, req, result, in_bufsz, in_buf.
*/
#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \
if (!in_bufsz) { \
VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
} else { \
(var1) = *(const type1*) (in_buf); \
result = (func)(vid, (var1), &(var2)); \
fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
} \
} while (0)
/*
* The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
* of IOCTL a buffer is requested to read or to write. This request is handled
* by FUSE and the buffer is then given to CUSE.
*/
static void
vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
struct fuse_file_info *fi, __rte_unused unsigned flags,
const void *in_buf, size_t in_bufsz, size_t out_bufsz)
{
struct vhost_cuse_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
struct vhost_vring_file file;
struct vhost_vring_state state;
struct vhost_vring_addr addr;
uint64_t features;
uint32_t index;
int result = 0;
int vid = ctx.vid;
switch (cmd) {
case VHOST_NET_SET_BACKEND:
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: VHOST_NET_SET_BACKEND\n", ctx.vid);
if (!in_buf) {
VHOST_IOCTL_RETRY(sizeof(file), 0);
break;
}
file = *(const struct vhost_vring_file *)in_buf;
result = cuse_set_backend(ctx, &file);
fuse_reply_ioctl(req, result, NULL, 0);
break;
case VHOST_GET_FEATURES:
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: VHOST_GET_FEATURES\n", vid);
VHOST_IOCTL_W(uint64_t, features, vhost_get_features);
break;
case VHOST_SET_FEATURES:
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: VHOST_SET_FEATURES\n", vid);
VHOST_IOCTL_R(uint64_t, features, vhost_set_features);
break;
case VHOST_RESET_OWNER:
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: VHOST_RESET_OWNER\n", vid);
VHOST_IOCTL(vhost_reset_owner);
break;
case VHOST_SET_OWNER:
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: VHOST_SET_OWNER\n", vid);
VHOST_IOCTL(vhost_set_owner);
break;
case VHOST_SET_MEM_TABLE:
/*TODO fix race condition.*/
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: VHOST_SET_MEM_TABLE\n", vid);
static struct vhost_memory mem_temp;
switch (in_bufsz) {
case 0:
VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
break;
case sizeof(struct vhost_memory):
mem_temp = *(const struct vhost_memory *) in_buf;
if (mem_temp.nregions > 0) {
VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
(sizeof(struct vhost_memory_region) *
mem_temp.nregions), 0);
} else {
result = -1;
fuse_reply_ioctl(req, result, NULL, 0);
}
break;
default:
result = cuse_set_mem_table(ctx, in_buf,
mem_temp.nregions);
if (result)
fuse_reply_err(req, EINVAL);
else
fuse_reply_ioctl(req, result, NULL, 0);
}
break;
case VHOST_SET_VRING_NUM:
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: VHOST_SET_VRING_NUM\n", vid);
VHOST_IOCTL_R(struct vhost_vring_state, state,
vhost_set_vring_num);
break;
case VHOST_SET_VRING_BASE:
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: VHOST_SET_VRING_BASE\n", vid);
VHOST_IOCTL_R(struct vhost_vring_state, state,
vhost_set_vring_base);
break;
case VHOST_GET_VRING_BASE:
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: VHOST_GET_VRING_BASE\n", vid);
VHOST_IOCTL_RW(uint32_t, index,
struct vhost_vring_state, state, vhost_get_vring_base);
break;
case VHOST_SET_VRING_ADDR:
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: VHOST_SET_VRING_ADDR\n", vid);
VHOST_IOCTL_R(struct vhost_vring_addr, addr,
vhost_set_vring_addr);
break;
case VHOST_SET_VRING_KICK:
case VHOST_SET_VRING_CALL:
if (cmd == VHOST_SET_VRING_KICK)
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: VHOST_SET_VRING_KICK\n", vid);
else
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: VHOST_SET_VRING_CALL\n", vid);
if (!in_buf)
VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
else {
int fd;
file = *(const struct vhost_vring_file *)in_buf;
LOG_DEBUG(VHOST_CONFIG,
"idx:%d fd:%d\n", file.index, file.fd);
fd = eventfd_copy(file.fd, ctx.pid);
if (fd < 0) {
fuse_reply_ioctl(req, -1, NULL, 0);
result = -1;
break;
}
file.fd = fd;
if (cmd == VHOST_SET_VRING_KICK) {
result = vhost_set_vring_kick(vid, &file);
fuse_reply_ioctl(req, result, NULL, 0);
} else {
result = vhost_set_vring_call(vid, &file);
fuse_reply_ioctl(req, result, NULL, 0);
}
}
break;
default:
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) IOCTL: DOESN NOT EXIST\n", vid);
result = -1;
fuse_reply_ioctl(req, result, NULL, 0);
}
if (result < 0)
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: FAIL\n", vid);
else
LOG_DEBUG(VHOST_CONFIG,
"(%d) IOCTL: SUCCESS\n", vid);
}
/*
* Structure handling open, release and ioctl function pointers is populated.
*/
static const struct cuse_lowlevel_ops vhost_net_ops = {
.open = vhost_net_open,
.release = vhost_net_release,
.ioctl = vhost_net_ioctl,
};
/*
* cuse_info is populated and used to register the cuse device.
* vhost_net_device_ops are also passed when the device is registered in app.
*/
int
rte_vhost_driver_register(const char *dev_name, uint64_t flags)
{
struct cuse_info cuse_info;
char device_name[PATH_MAX] = "";
char char_device_name[PATH_MAX] = "";
const char *device_argv[] = { device_name };
char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
char fuse_opt_fore[] = FUSE_OPT_FORE;
char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
if (flags) {
RTE_LOG(ERR, VHOST_CONFIG,
"vhost-cuse does not support any flags so far\n");
return -1;
}
if (access(cuse_device_name, R_OK | W_OK) < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"char device %s can't be accessed, maybe not exist\n",
cuse_device_name);
return -1;
}
if (eventfd_init() < 0)
return -1;
/*
* The device name is created. This is passed to QEMU so that it can
* register the device with our application.
*/
snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
/* Check if device already exists. */
if (access(char_device_name, F_OK) != -1) {
RTE_LOG(ERR, VHOST_CONFIG,
"char device %s already exists\n", char_device_name);
return -1;
}
memset(&cuse_info, 0, sizeof(cuse_info));
cuse_info.dev_major = default_major;
cuse_info.dev_minor = default_minor;
cuse_info.dev_info_argc = 1;
cuse_info.dev_info_argv = device_argv;
cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
session = cuse_lowlevel_setup(3, fuse_argv,
&cuse_info, &vhost_net_ops, 0, NULL);
if (session == NULL)
return -1;
return 0;
}
/**
* An empty function for unregister
*/
int
rte_vhost_driver_unregister(const char *dev_name __rte_unused)
{
return 0;
}
/**
* The CUSE session is launched allowing the application to receive open,
* release and ioctl calls.
*/
int
rte_vhost_driver_session_start(void)
{
fuse_session_loop(session);
return 0;
}

View File

@ -1,433 +0,0 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdint.h>
#include <dirent.h>
#include <linux/vhost.h>
#include <linux/virtio_net.h>
#include <fuse/cuse_lowlevel.h>
#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <sys/eventfd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <linux/if_tun.h>
#include <linux/if.h>
#include <errno.h>
#include <rte_log.h>
#include "rte_virtio_net.h"
#include "vhost-net.h"
#include "virtio-net-cdev.h"
#include "eventfd_copy.h"
/* Line size for reading maps file. */
static const uint32_t BUFSIZE = PATH_MAX;
/* Size of prot char array in procmap. */
#define PROT_SZ 5
/* Number of elements in procmap struct. */
#define PROCMAP_SZ 8
/* Structure containing information gathered from maps file. */
struct procmap {
uint64_t va_start; /* Start virtual address in file. */
uint64_t len; /* Size of file. */
uint64_t pgoff; /* Not used. */
uint32_t maj; /* Not used. */
uint32_t min; /* Not used. */
uint32_t ino; /* Not used. */
char prot[PROT_SZ]; /* Not used. */
char fname[PATH_MAX]; /* File name. */
};
/*
* Locate the file containing QEMU's memory space and
* map it to our address space.
*/
static int
host_memory_map(pid_t pid, uint64_t addr,
uint64_t *mapped_address, uint64_t *mapped_size)
{
struct dirent *dptr = NULL;
struct procmap procmap;
DIR *dp = NULL;
int fd;
int i;
char memfile[PATH_MAX];
char mapfile[PATH_MAX];
char procdir[PATH_MAX];
char resolved_path[PATH_MAX];
char *path = NULL;
FILE *fmap;
void *map;
uint8_t found = 0;
char line[BUFSIZE];
char dlm[] = "- : ";
char *str, *sp, *in[PROCMAP_SZ];
char *end = NULL;
/* Path where mem files are located. */
snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
/* Maps file used to locate mem file. */
snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
fmap = fopen(mapfile, "r");
if (fmap == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to open maps file for pid %d\n",
pid);
return -1;
}
/* Read through maps file until we find out base_address. */
while (fgets(line, BUFSIZE, fmap) != 0) {
str = line;
errno = 0;
/* Split line into fields. */
for (i = 0; i < PROCMAP_SZ; i++) {
in[i] = strtok_r(str, &dlm[i], &sp);
if ((in[i] == NULL) || (errno != 0)) {
fclose(fmap);
return -1;
}
str = NULL;
}
/* Convert/Copy each field as needed. */
procmap.va_start = strtoull(in[0], &end, 16);
if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
(errno != 0)) {
fclose(fmap);
return -1;
}
procmap.len = strtoull(in[1], &end, 16);
if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
(errno != 0)) {
fclose(fmap);
return -1;
}
procmap.pgoff = strtoull(in[3], &end, 16);
if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
(errno != 0)) {
fclose(fmap);
return -1;
}
procmap.maj = strtoul(in[4], &end, 16);
if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
(errno != 0)) {
fclose(fmap);
return -1;
}
procmap.min = strtoul(in[5], &end, 16);
if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
(errno != 0)) {
fclose(fmap);
return -1;
}
procmap.ino = strtoul(in[6], &end, 16);
if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
(errno != 0)) {
fclose(fmap);
return -1;
}
memcpy(&procmap.prot, in[2], PROT_SZ);
memcpy(&procmap.fname, in[7], PATH_MAX);
if (procmap.va_start == addr) {
procmap.len = procmap.len - procmap.va_start;
found = 1;
break;
}
}
fclose(fmap);
if (!found) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to find memory file in pid %d maps file\n",
pid);
return -1;
}
/* Find the guest memory file among the process fds. */
dp = opendir(procdir);
if (dp == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
"Cannot open pid %d process directory\n",
pid);
return -1;
}
found = 0;
/* Read the fd directory contents. */
while (NULL != (dptr = readdir(dp))) {
snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
pid, dptr->d_name);
path = realpath(memfile, resolved_path);
if ((path == NULL) && (strlen(resolved_path) == 0)) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to resolve fd directory\n");
closedir(dp);
return -1;
}
if (strncmp(resolved_path, procmap.fname,
strnlen(procmap.fname, PATH_MAX)) == 0) {
found = 1;
break;
}
}
closedir(dp);
if (found == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to find memory file for pid %d\n",
pid);
return -1;
}
/* Open the shared memory file and map the memory into this process. */
fd = open(memfile, O_RDWR);
if (fd == -1) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to open %s for pid %d\n",
memfile, pid);
return -1;
}
map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
MAP_POPULATE|MAP_SHARED, fd, 0);
close(fd);
if (map == MAP_FAILED) {
RTE_LOG(ERR, VHOST_CONFIG,
"Error mapping the file %s for pid %d\n",
memfile, pid);
return -1;
}
/* Store the memory address and size in the device data structure */
*mapped_address = (uint64_t)(uintptr_t)map;
*mapped_size = procmap.len;
LOG_DEBUG(VHOST_CONFIG,
"Mem File: %s->%s - Size: %llu - VA: %p\n",
memfile, resolved_path,
(unsigned long long)*mapped_size, map);
return 0;
}
int
cuse_set_mem_table(struct vhost_cuse_device_ctx ctx,
const struct vhost_memory *mem_regions_addr, uint32_t nregions)
{
uint64_t size = offsetof(struct vhost_memory, regions);
uint32_t idx, valid_regions;
struct virtio_memory_regions *pregion;
struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
((uint64_t)(uintptr_t)mem_regions_addr + size);
uint64_t base_address = 0, mapped_address, mapped_size;
struct virtio_net *dev;
dev = get_device(ctx.vid);
if (dev == NULL)
return -1;
if (dev->mem && dev->mem->mapped_address) {
munmap((void *)(uintptr_t)dev->mem->mapped_address,
(size_t)dev->mem->mapped_size);
free(dev->mem);
dev->mem = NULL;
}
dev->mem = calloc(1, sizeof(struct virtio_memory) +
sizeof(struct virtio_memory_regions) * nregions);
if (dev->mem == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to allocate memory for dev->mem\n",
dev->vid);
return -1;
}
pregion = &dev->mem->regions[0];
for (idx = 0; idx < nregions; idx++) {
pregion[idx].guest_phys_address =
mem_regions[idx].guest_phys_addr;
pregion[idx].guest_phys_address_end =
pregion[idx].guest_phys_address +
mem_regions[idx].memory_size;
pregion[idx].memory_size =
mem_regions[idx].memory_size;
pregion[idx].userspace_address =
mem_regions[idx].userspace_addr;
LOG_DEBUG(VHOST_CONFIG,
"REGION: %u - GPA: %p - QVA: %p - SIZE (%"PRIu64")\n",
idx,
(void *)(uintptr_t)pregion[idx].guest_phys_address,
(void *)(uintptr_t)pregion[idx].userspace_address,
pregion[idx].memory_size);
/*set the base address mapping*/
if (pregion[idx].guest_phys_address == 0x0) {
base_address =
pregion[idx].userspace_address;
/* Map VM memory file */
if (host_memory_map(ctx.pid, base_address,
&mapped_address, &mapped_size) != 0) {
free(dev->mem);
dev->mem = NULL;
return -1;
}
dev->mem->mapped_address = mapped_address;
dev->mem->base_address = base_address;
dev->mem->mapped_size = mapped_size;
}
}
/* Check that we have a valid base address. */
if (base_address == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to find base address of qemu memory file.\n");
free(dev->mem);
dev->mem = NULL;
return -1;
}
valid_regions = nregions;
for (idx = 0; idx < nregions; idx++) {
if ((pregion[idx].userspace_address < base_address) ||
(pregion[idx].userspace_address >
(base_address + mapped_size)))
valid_regions--;
}
if (valid_regions != nregions) {
valid_regions = 0;
for (idx = nregions; 0 != idx--; ) {
if ((pregion[idx].userspace_address < base_address) ||
(pregion[idx].userspace_address >
(base_address + mapped_size))) {
memmove(&pregion[idx], &pregion[idx + 1],
sizeof(struct virtio_memory_regions) *
valid_regions);
} else
valid_regions++;
}
}
for (idx = 0; idx < valid_regions; idx++) {
pregion[idx].address_offset =
mapped_address - base_address +
pregion[idx].userspace_address -
pregion[idx].guest_phys_address;
}
dev->mem->nregions = valid_regions;
return 0;
}
/*
* Function to get the tap device name from the provided file descriptor and
* save it in the device structure.
*/
static int
get_ifname(int vid, int tap_fd, int pid)
{
int fd_tap;
struct ifreq ifr;
uint32_t ifr_size;
int ret;
fd_tap = eventfd_copy(tap_fd, pid);
if (fd_tap < 0)
return -1;
ret = ioctl(fd_tap, TUNGETIFF, &ifr);
if (close(fd_tap) < 0)
RTE_LOG(ERR, VHOST_CONFIG, "(%d) fd close failed\n", vid);
if (ret >= 0) {
ifr_size = strnlen(ifr.ifr_name, sizeof(ifr.ifr_name));
vhost_set_ifname(vid, ifr.ifr_name, ifr_size);
} else
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) TUNGETIFF ioctl failed\n", vid);
return 0;
}
int
cuse_set_backend(struct vhost_cuse_device_ctx ctx,
struct vhost_vring_file *file)
{
struct virtio_net *dev;
dev = get_device(ctx.vid);
if (dev == NULL)
return -1;
if (!(dev->flags & VIRTIO_DEV_RUNNING) && file->fd != VIRTIO_DEV_STOPPED)
get_ifname(ctx.vid, file->fd, ctx.pid);
return vhost_set_backend(ctx.vid, file);
}
void
vhost_backend_cleanup(struct virtio_net *dev)
{
/* Unmap QEMU memory file if mapped. */
if (dev->mem) {
munmap((void *)(uintptr_t)dev->mem->mapped_address,
(size_t)dev->mem->mapped_size);
free(dev->mem);
dev->mem = NULL;
}
}

View File

@ -1,56 +0,0 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _VIRTIO_NET_CDEV_H
#define _VIRTIO_NET_CDEV_H
#include <stdint.h>
#include <linux/vhost.h>
#include "vhost-net.h"
/*
* Structure used to identify device context.
*/
struct vhost_cuse_device_ctx {
pid_t pid; /* PID of process calling the IOCTL. */
int vid; /* Virtio-net device ID */
};
int
cuse_set_mem_table(struct vhost_cuse_device_ctx ctx,
const struct vhost_memory *mem_regions_addr, uint32_t nregions);
int
cuse_set_backend(struct vhost_cuse_device_ctx ctx, struct vhost_vring_file *);
#endif

View File

@ -157,9 +157,6 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_METER) += -lm
ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
_LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lnuma
endif
ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),n)
_LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lfuse
endif
_LDLIBS-$(CONFIG_RTE_PORT_PCAP) += -lpcap
endif # !CONFIG_RTE_BUILD_SHARED_LIBS