2015-02-23 17:36:31 +00:00
|
|
|
/*-
|
|
|
|
* BSD LICENSE
|
|
|
|
*
|
|
|
|
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in
|
|
|
|
* the documentation and/or other materials provided with the
|
|
|
|
* distribution.
|
|
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived
|
|
|
|
* from this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/un.h>
|
|
|
|
#include <errno.h>
|
vhost: support dynamically registering server
* support calling rte_vhost_driver_register after rte_vhost_driver_session_start
* add mutext to protect fdset from concurrent access
* add busy flag in fdentry. this flag is set before cb and cleared after cb is finished.
mutex lock scenario in vhost:
* event_dispatch(in rte_vhost_driver_session_start) runs in a separate thread, infinitely
processing vhost messages through cb(callback).
* event_dispatch acquires the lock, get the cb and its context, mark the busy flag,
and releases the mutex.
* vserver_new_vq_conn cb calls fdset_add, which acquires the mutex and add new fd into fdset.
* vserver_message_handler cb frees data context, marks remove flag to request to delete
connfd(connection fd) from fdset.
* after cb returns, event_dispatch
1. clears busy flag.
2. if there is remove request, call fdset_del, which acquires mutex, checks busy flag, and
removes connfd from fdset.
* rte_vhost_driver_unregister(not implemented) runs in another thread, acquires the mutex,
calls fdset_del to remove fd(listenerfd) from fdset. Then it could free data context.
The above steps ensures fd data context isn't freed when cb is using.
VM(s) should have been shutdown before rte_vhost_driver_unregister.
Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Tetsuya Mukawa <mukawa@igel.co.jp>
2015-02-23 17:36:33 +00:00
|
|
|
#include <pthread.h>
|
2015-02-23 17:36:31 +00:00
|
|
|
|
|
|
|
#include <rte_log.h>
|
|
|
|
#include <rte_virtio_net.h>
|
|
|
|
|
|
|
|
#include "fd_man.h"
|
|
|
|
#include "vhost-net-user.h"
|
|
|
|
#include "vhost-net.h"
|
|
|
|
#include "virtio-net-user.h"
|
|
|
|
|
|
|
|
#define MAX_VIRTIO_BACKLOG 128
|
vhost: support dynamically registering server
* support calling rte_vhost_driver_register after rte_vhost_driver_session_start
* add mutext to protect fdset from concurrent access
* add busy flag in fdentry. this flag is set before cb and cleared after cb is finished.
mutex lock scenario in vhost:
* event_dispatch(in rte_vhost_driver_session_start) runs in a separate thread, infinitely
processing vhost messages through cb(callback).
* event_dispatch acquires the lock, get the cb and its context, mark the busy flag,
and releases the mutex.
* vserver_new_vq_conn cb calls fdset_add, which acquires the mutex and add new fd into fdset.
* vserver_message_handler cb frees data context, marks remove flag to request to delete
connfd(connection fd) from fdset.
* after cb returns, event_dispatch
1. clears busy flag.
2. if there is remove request, call fdset_del, which acquires mutex, checks busy flag, and
removes connfd from fdset.
* rte_vhost_driver_unregister(not implemented) runs in another thread, acquires the mutex,
calls fdset_del to remove fd(listenerfd) from fdset. Then it could free data context.
The above steps ensures fd data context isn't freed when cb is using.
VM(s) should have been shutdown before rte_vhost_driver_unregister.
Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Tetsuya Mukawa <mukawa@igel.co.jp>
2015-02-23 17:36:33 +00:00
|
|
|
|
|
|
|
static void vserver_new_vq_conn(int fd, void *data, int *remove);
|
|
|
|
static void vserver_message_handler(int fd, void *dat, int *remove);
|
2015-02-23 17:36:31 +00:00
|
|
|
|
|
|
|
struct connfd_ctx {
|
|
|
|
struct vhost_server *vserver;
|
2016-05-23 08:36:33 +00:00
|
|
|
int vid;
|
2015-02-23 17:36:31 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
#define MAX_VHOST_SERVER 1024
|
vhost: support dynamically registering server
* support calling rte_vhost_driver_register after rte_vhost_driver_session_start
* add mutext to protect fdset from concurrent access
* add busy flag in fdentry. this flag is set before cb and cleared after cb is finished.
mutex lock scenario in vhost:
* event_dispatch(in rte_vhost_driver_session_start) runs in a separate thread, infinitely
processing vhost messages through cb(callback).
* event_dispatch acquires the lock, get the cb and its context, mark the busy flag,
and releases the mutex.
* vserver_new_vq_conn cb calls fdset_add, which acquires the mutex and add new fd into fdset.
* vserver_message_handler cb frees data context, marks remove flag to request to delete
connfd(connection fd) from fdset.
* after cb returns, event_dispatch
1. clears busy flag.
2. if there is remove request, call fdset_del, which acquires mutex, checks busy flag, and
removes connfd from fdset.
* rte_vhost_driver_unregister(not implemented) runs in another thread, acquires the mutex,
calls fdset_del to remove fd(listenerfd) from fdset. Then it could free data context.
The above steps ensures fd data context isn't freed when cb is using.
VM(s) should have been shutdown before rte_vhost_driver_unregister.
Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Tetsuya Mukawa <mukawa@igel.co.jp>
2015-02-23 17:36:33 +00:00
|
|
|
struct _vhost_server {
|
2015-02-23 17:36:31 +00:00
|
|
|
struct vhost_server *server[MAX_VHOST_SERVER];
|
vhost: support dynamically registering server
* support calling rte_vhost_driver_register after rte_vhost_driver_session_start
* add mutext to protect fdset from concurrent access
* add busy flag in fdentry. this flag is set before cb and cleared after cb is finished.
mutex lock scenario in vhost:
* event_dispatch(in rte_vhost_driver_session_start) runs in a separate thread, infinitely
processing vhost messages through cb(callback).
* event_dispatch acquires the lock, get the cb and its context, mark the busy flag,
and releases the mutex.
* vserver_new_vq_conn cb calls fdset_add, which acquires the mutex and add new fd into fdset.
* vserver_message_handler cb frees data context, marks remove flag to request to delete
connfd(connection fd) from fdset.
* after cb returns, event_dispatch
1. clears busy flag.
2. if there is remove request, call fdset_del, which acquires mutex, checks busy flag, and
removes connfd from fdset.
* rte_vhost_driver_unregister(not implemented) runs in another thread, acquires the mutex,
calls fdset_del to remove fd(listenerfd) from fdset. Then it could free data context.
The above steps ensures fd data context isn't freed when cb is using.
VM(s) should have been shutdown before rte_vhost_driver_unregister.
Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Tetsuya Mukawa <mukawa@igel.co.jp>
2015-02-23 17:36:33 +00:00
|
|
|
struct fdset fdset;
|
2015-06-30 09:20:48 +00:00
|
|
|
int vserver_cnt;
|
|
|
|
pthread_mutex_t server_mutex;
|
vhost: support dynamically registering server
* support calling rte_vhost_driver_register after rte_vhost_driver_session_start
* add mutext to protect fdset from concurrent access
* add busy flag in fdentry. this flag is set before cb and cleared after cb is finished.
mutex lock scenario in vhost:
* event_dispatch(in rte_vhost_driver_session_start) runs in a separate thread, infinitely
processing vhost messages through cb(callback).
* event_dispatch acquires the lock, get the cb and its context, mark the busy flag,
and releases the mutex.
* vserver_new_vq_conn cb calls fdset_add, which acquires the mutex and add new fd into fdset.
* vserver_message_handler cb frees data context, marks remove flag to request to delete
connfd(connection fd) from fdset.
* after cb returns, event_dispatch
1. clears busy flag.
2. if there is remove request, call fdset_del, which acquires mutex, checks busy flag, and
removes connfd from fdset.
* rte_vhost_driver_unregister(not implemented) runs in another thread, acquires the mutex,
calls fdset_del to remove fd(listenerfd) from fdset. Then it could free data context.
The above steps ensures fd data context isn't freed when cb is using.
VM(s) should have been shutdown before rte_vhost_driver_unregister.
Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Tetsuya Mukawa <mukawa@igel.co.jp>
2015-02-23 17:36:33 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct _vhost_server g_vhost_server = {
|
|
|
|
.fdset = {
|
|
|
|
.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
|
|
|
|
.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
|
|
|
|
.num = 0
|
|
|
|
},
|
2015-06-30 09:20:48 +00:00
|
|
|
.vserver_cnt = 0,
|
|
|
|
.server_mutex = PTHREAD_MUTEX_INITIALIZER,
|
vhost: support dynamically registering server
* support calling rte_vhost_driver_register after rte_vhost_driver_session_start
* add mutext to protect fdset from concurrent access
* add busy flag in fdentry. this flag is set before cb and cleared after cb is finished.
mutex lock scenario in vhost:
* event_dispatch(in rte_vhost_driver_session_start) runs in a separate thread, infinitely
processing vhost messages through cb(callback).
* event_dispatch acquires the lock, get the cb and its context, mark the busy flag,
and releases the mutex.
* vserver_new_vq_conn cb calls fdset_add, which acquires the mutex and add new fd into fdset.
* vserver_message_handler cb frees data context, marks remove flag to request to delete
connfd(connection fd) from fdset.
* after cb returns, event_dispatch
1. clears busy flag.
2. if there is remove request, call fdset_del, which acquires mutex, checks busy flag, and
removes connfd from fdset.
* rte_vhost_driver_unregister(not implemented) runs in another thread, acquires the mutex,
calls fdset_del to remove fd(listenerfd) from fdset. Then it could free data context.
The above steps ensures fd data context isn't freed when cb is using.
VM(s) should have been shutdown before rte_vhost_driver_unregister.
Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Tetsuya Mukawa <mukawa@igel.co.jp>
2015-02-23 17:36:33 +00:00
|
|
|
};
|
2015-02-23 17:36:31 +00:00
|
|
|
|
|
|
|
static const char *vhost_message_str[VHOST_USER_MAX] = {
|
|
|
|
[VHOST_USER_NONE] = "VHOST_USER_NONE",
|
|
|
|
[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
|
|
|
|
[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
|
|
|
|
[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
|
|
|
|
[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
|
|
|
|
[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
|
|
|
|
[VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
|
|
|
|
[VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
|
|
|
|
[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
|
|
|
|
[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
|
|
|
|
[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
|
|
|
|
[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
|
|
|
|
[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
|
|
|
|
[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
|
2015-10-22 12:35:49 +00:00
|
|
|
[VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
|
|
|
|
[VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
|
|
|
|
[VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
|
2015-10-22 12:35:50 +00:00
|
|
|
[VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
|
2015-10-22 12:35:54 +00:00
|
|
|
[VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
|
2016-01-29 04:58:01 +00:00
|
|
|
[VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
|
2015-02-23 17:36:31 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Create a unix domain socket, bind to path and listen for connection.
|
|
|
|
* @return
|
|
|
|
* socket fd or -1 on failure
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
uds_socket(const char *path)
|
|
|
|
{
|
|
|
|
struct sockaddr_un un;
|
|
|
|
int sockfd;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (path == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
|
|
|
|
if (sockfd < 0)
|
|
|
|
return -1;
|
|
|
|
RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
|
|
|
|
|
|
|
|
memset(&un, 0, sizeof(un));
|
|
|
|
un.sun_family = AF_UNIX;
|
|
|
|
snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
|
|
|
|
ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
|
2015-07-06 02:26:51 +00:00
|
|
|
if (ret == -1) {
|
|
|
|
RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try again.\n",
|
|
|
|
sockfd, path);
|
2015-02-23 17:36:31 +00:00
|
|
|
goto err;
|
2015-07-06 02:26:51 +00:00
|
|
|
}
|
2015-02-23 17:36:31 +00:00
|
|
|
RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
|
|
|
|
|
|
|
|
ret = listen(sockfd, MAX_VIRTIO_BACKLOG);
|
|
|
|
if (ret == -1)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
return sockfd;
|
|
|
|
|
|
|
|
err:
|
|
|
|
close(sockfd);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* return bytes# of read on success or negative val on failure. */
|
|
|
|
static int
|
|
|
|
read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
|
|
|
|
{
|
|
|
|
struct iovec iov;
|
|
|
|
struct msghdr msgh;
|
|
|
|
size_t fdsize = fd_num * sizeof(int);
|
|
|
|
char control[CMSG_SPACE(fdsize)];
|
|
|
|
struct cmsghdr *cmsg;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
memset(&msgh, 0, sizeof(msgh));
|
|
|
|
iov.iov_base = buf;
|
|
|
|
iov.iov_len = buflen;
|
|
|
|
|
|
|
|
msgh.msg_iov = &iov;
|
|
|
|
msgh.msg_iovlen = 1;
|
|
|
|
msgh.msg_control = control;
|
|
|
|
msgh.msg_controllen = sizeof(control);
|
|
|
|
|
|
|
|
ret = recvmsg(sockfd, &msgh, 0);
|
|
|
|
if (ret <= 0) {
|
|
|
|
RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
|
|
|
|
RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
|
|
|
|
cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
|
|
|
|
if ((cmsg->cmsg_level == SOL_SOCKET) &&
|
|
|
|
(cmsg->cmsg_type == SCM_RIGHTS)) {
|
|
|
|
memcpy(fds, CMSG_DATA(cmsg), fdsize);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* return bytes# of read on success or negative val on failure. */
|
|
|
|
static int
|
|
|
|
read_vhost_message(int sockfd, struct VhostUserMsg *msg)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
|
|
|
|
msg->fds, VHOST_MEMORY_MAX_NREGIONS);
|
|
|
|
if (ret <= 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (msg && msg->size) {
|
|
|
|
if (msg->size > sizeof(msg->payload)) {
|
|
|
|
RTE_LOG(ERR, VHOST_CONFIG,
|
|
|
|
"invalid msg size: %d\n", msg->size);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
ret = read(sockfd, &msg->payload, msg->size);
|
|
|
|
if (ret <= 0)
|
|
|
|
return ret;
|
|
|
|
if (ret != (int)msg->size) {
|
|
|
|
RTE_LOG(ERR, VHOST_CONFIG,
|
|
|
|
"read control message failed\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
|
|
|
|
{
|
|
|
|
|
|
|
|
struct iovec iov;
|
|
|
|
struct msghdr msgh;
|
|
|
|
size_t fdsize = fd_num * sizeof(int);
|
|
|
|
char control[CMSG_SPACE(fdsize)];
|
|
|
|
struct cmsghdr *cmsg;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
memset(&msgh, 0, sizeof(msgh));
|
|
|
|
iov.iov_base = buf;
|
|
|
|
iov.iov_len = buflen;
|
|
|
|
|
|
|
|
msgh.msg_iov = &iov;
|
|
|
|
msgh.msg_iovlen = 1;
|
|
|
|
|
|
|
|
if (fds && fd_num > 0) {
|
|
|
|
msgh.msg_control = control;
|
|
|
|
msgh.msg_controllen = sizeof(control);
|
|
|
|
cmsg = CMSG_FIRSTHDR(&msgh);
|
|
|
|
cmsg->cmsg_len = CMSG_LEN(fdsize);
|
|
|
|
cmsg->cmsg_level = SOL_SOCKET;
|
|
|
|
cmsg->cmsg_type = SCM_RIGHTS;
|
|
|
|
memcpy(CMSG_DATA(cmsg), fds, fdsize);
|
|
|
|
} else {
|
|
|
|
msgh.msg_control = NULL;
|
|
|
|
msgh.msg_controllen = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
do {
|
|
|
|
ret = sendmsg(sockfd, &msgh, 0);
|
|
|
|
} while (ret < 0 && errno == EINTR);
|
|
|
|
|
|
|
|
if (ret < 0) {
|
|
|
|
RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
send_vhost_message(int sockfd, struct VhostUserMsg *msg)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!msg)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
msg->flags &= ~VHOST_USER_VERSION_MASK;
|
|
|
|
msg->flags |= VHOST_USER_VERSION;
|
|
|
|
msg->flags |= VHOST_USER_REPLY_MASK;
|
|
|
|
|
|
|
|
ret = send_fd_message(sockfd, (char *)msg,
|
|
|
|
VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* call back when there is new virtio connection. */
|
|
|
|
static void
|
vhost: support dynamically registering server
* support calling rte_vhost_driver_register after rte_vhost_driver_session_start
* add mutext to protect fdset from concurrent access
* add busy flag in fdentry. this flag is set before cb and cleared after cb is finished.
mutex lock scenario in vhost:
* event_dispatch(in rte_vhost_driver_session_start) runs in a separate thread, infinitely
processing vhost messages through cb(callback).
* event_dispatch acquires the lock, get the cb and its context, mark the busy flag,
and releases the mutex.
* vserver_new_vq_conn cb calls fdset_add, which acquires the mutex and add new fd into fdset.
* vserver_message_handler cb frees data context, marks remove flag to request to delete
connfd(connection fd) from fdset.
* after cb returns, event_dispatch
1. clears busy flag.
2. if there is remove request, call fdset_del, which acquires mutex, checks busy flag, and
removes connfd from fdset.
* rte_vhost_driver_unregister(not implemented) runs in another thread, acquires the mutex,
calls fdset_del to remove fd(listenerfd) from fdset. Then it could free data context.
The above steps ensures fd data context isn't freed when cb is using.
VM(s) should have been shutdown before rte_vhost_driver_unregister.
Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Tetsuya Mukawa <mukawa@igel.co.jp>
2015-02-23 17:36:33 +00:00
|
|
|
vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove)
|
2015-02-23 17:36:31 +00:00
|
|
|
{
|
|
|
|
struct vhost_server *vserver = (struct vhost_server *)dat;
|
|
|
|
int conn_fd;
|
|
|
|
struct connfd_ctx *ctx;
|
2016-05-23 08:36:33 +00:00
|
|
|
int vid;
|
2015-03-19 10:43:24 +00:00
|
|
|
struct vhost_device_ctx vdev_ctx = { (pid_t)0, 0 };
|
2015-02-23 17:36:32 +00:00
|
|
|
unsigned int size;
|
2015-02-23 17:36:31 +00:00
|
|
|
|
|
|
|
conn_fd = accept(fd, NULL, NULL);
|
|
|
|
RTE_LOG(INFO, VHOST_CONFIG,
|
|
|
|
"new virtio connection is %d\n", conn_fd);
|
|
|
|
if (conn_fd < 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ctx = calloc(1, sizeof(*ctx));
|
|
|
|
if (ctx == NULL) {
|
|
|
|
close(conn_fd);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-05-23 08:36:33 +00:00
|
|
|
vid = vhost_new_device(vdev_ctx);
|
|
|
|
if (vid == -1) {
|
2015-02-23 17:36:31 +00:00
|
|
|
free(ctx);
|
|
|
|
close(conn_fd);
|
|
|
|
return;
|
|
|
|
}
|
2015-02-23 17:36:32 +00:00
|
|
|
|
2016-05-23 08:36:33 +00:00
|
|
|
vdev_ctx.vid = vid;
|
2015-02-23 17:36:32 +00:00
|
|
|
size = strnlen(vserver->path, PATH_MAX);
|
2016-02-19 18:10:16 +00:00
|
|
|
vhost_set_ifname(vdev_ctx, vserver->path,
|
2015-02-23 17:36:32 +00:00
|
|
|
size);
|
|
|
|
|
2016-05-23 08:36:33 +00:00
|
|
|
RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
|
2015-02-23 17:36:31 +00:00
|
|
|
|
|
|
|
ctx->vserver = vserver;
|
2016-05-23 08:36:33 +00:00
|
|
|
ctx->vid = vid;
|
2015-02-23 17:36:31 +00:00
|
|
|
fdset_add(&g_vhost_server.fdset,
|
|
|
|
conn_fd, vserver_message_handler, NULL, ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* callback when there is message on the connfd */
|
|
|
|
static void
|
vhost: support dynamically registering server
* support calling rte_vhost_driver_register after rte_vhost_driver_session_start
* add mutext to protect fdset from concurrent access
* add busy flag in fdentry. this flag is set before cb and cleared after cb is finished.
mutex lock scenario in vhost:
* event_dispatch(in rte_vhost_driver_session_start) runs in a separate thread, infinitely
processing vhost messages through cb(callback).
* event_dispatch acquires the lock, get the cb and its context, mark the busy flag,
and releases the mutex.
* vserver_new_vq_conn cb calls fdset_add, which acquires the mutex and add new fd into fdset.
* vserver_message_handler cb frees data context, marks remove flag to request to delete
connfd(connection fd) from fdset.
* after cb returns, event_dispatch
1. clears busy flag.
2. if there is remove request, call fdset_del, which acquires mutex, checks busy flag, and
removes connfd from fdset.
* rte_vhost_driver_unregister(not implemented) runs in another thread, acquires the mutex,
calls fdset_del to remove fd(listenerfd) from fdset. Then it could free data context.
The above steps ensures fd data context isn't freed when cb is using.
VM(s) should have been shutdown before rte_vhost_driver_unregister.
Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Tetsuya Mukawa <mukawa@igel.co.jp>
2015-02-23 17:36:33 +00:00
|
|
|
vserver_message_handler(int connfd, void *dat, int *remove)
|
2015-02-23 17:36:31 +00:00
|
|
|
{
|
|
|
|
struct vhost_device_ctx ctx;
|
|
|
|
struct connfd_ctx *cfd_ctx = (struct connfd_ctx *)dat;
|
|
|
|
struct VhostUserMsg msg;
|
|
|
|
uint64_t features;
|
|
|
|
int ret;
|
|
|
|
|
2016-05-23 08:36:33 +00:00
|
|
|
ctx.vid = cfd_ctx->vid;
|
2015-02-23 17:36:31 +00:00
|
|
|
ret = read_vhost_message(connfd, &msg);
|
2015-12-10 17:57:16 +00:00
|
|
|
if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
|
2015-09-09 05:34:35 +00:00
|
|
|
if (ret < 0)
|
|
|
|
RTE_LOG(ERR, VHOST_CONFIG,
|
|
|
|
"vhost read message failed\n");
|
|
|
|
else if (ret == 0)
|
|
|
|
RTE_LOG(INFO, VHOST_CONFIG,
|
|
|
|
"vhost peer closed\n");
|
|
|
|
else
|
|
|
|
RTE_LOG(ERR, VHOST_CONFIG,
|
|
|
|
"vhost read incorrect message\n");
|
2015-02-23 17:36:31 +00:00
|
|
|
|
|
|
|
close(connfd);
|
vhost: support dynamically registering server
* support calling rte_vhost_driver_register after rte_vhost_driver_session_start
* add mutext to protect fdset from concurrent access
* add busy flag in fdentry. this flag is set before cb and cleared after cb is finished.
mutex lock scenario in vhost:
* event_dispatch(in rte_vhost_driver_session_start) runs in a separate thread, infinitely
processing vhost messages through cb(callback).
* event_dispatch acquires the lock, get the cb and its context, mark the busy flag,
and releases the mutex.
* vserver_new_vq_conn cb calls fdset_add, which acquires the mutex and add new fd into fdset.
* vserver_message_handler cb frees data context, marks remove flag to request to delete
connfd(connection fd) from fdset.
* after cb returns, event_dispatch
1. clears busy flag.
2. if there is remove request, call fdset_del, which acquires mutex, checks busy flag, and
removes connfd from fdset.
* rte_vhost_driver_unregister(not implemented) runs in another thread, acquires the mutex,
calls fdset_del to remove fd(listenerfd) from fdset. Then it could free data context.
The above steps ensures fd data context isn't freed when cb is using.
VM(s) should have been shutdown before rte_vhost_driver_unregister.
Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Tetsuya Mukawa <mukawa@igel.co.jp>
2015-02-23 17:36:33 +00:00
|
|
|
*remove = 1;
|
2015-02-23 17:36:31 +00:00
|
|
|
free(cfd_ctx);
|
2016-02-19 18:10:16 +00:00
|
|
|
vhost_destroy_device(ctx);
|
2015-02-23 17:36:31 +00:00
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
|
|
|
|
vhost_message_str[msg.request]);
|
|
|
|
switch (msg.request) {
|
|
|
|
case VHOST_USER_GET_FEATURES:
|
2016-02-19 18:10:16 +00:00
|
|
|
ret = vhost_get_features(ctx, &features);
|
2015-02-23 17:36:31 +00:00
|
|
|
msg.payload.u64 = features;
|
|
|
|
msg.size = sizeof(msg.payload.u64);
|
|
|
|
send_vhost_message(connfd, &msg);
|
|
|
|
break;
|
|
|
|
case VHOST_USER_SET_FEATURES:
|
|
|
|
features = msg.payload.u64;
|
2016-02-19 18:10:16 +00:00
|
|
|
vhost_set_features(ctx, &features);
|
2015-02-23 17:36:31 +00:00
|
|
|
break;
|
|
|
|
|
2015-10-22 12:35:49 +00:00
|
|
|
case VHOST_USER_GET_PROTOCOL_FEATURES:
|
|
|
|
msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
|
|
|
|
msg.size = sizeof(msg.payload.u64);
|
|
|
|
send_vhost_message(connfd, &msg);
|
|
|
|
break;
|
|
|
|
case VHOST_USER_SET_PROTOCOL_FEATURES:
|
|
|
|
user_set_protocol_features(ctx, msg.payload.u64);
|
|
|
|
break;
|
|
|
|
|
2015-02-23 17:36:31 +00:00
|
|
|
case VHOST_USER_SET_OWNER:
|
2016-02-19 18:10:16 +00:00
|
|
|
vhost_set_owner(ctx);
|
2015-02-23 17:36:31 +00:00
|
|
|
break;
|
|
|
|
case VHOST_USER_RESET_OWNER:
|
2016-02-19 18:10:16 +00:00
|
|
|
vhost_reset_owner(ctx);
|
2015-02-23 17:36:31 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case VHOST_USER_SET_MEM_TABLE:
|
|
|
|
user_set_mem_table(ctx, &msg);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VHOST_USER_SET_LOG_BASE:
|
2016-01-29 04:57:56 +00:00
|
|
|
user_set_log_base(ctx, &msg);
|
2015-12-10 17:57:18 +00:00
|
|
|
|
2016-01-29 04:57:56 +00:00
|
|
|
/* it needs a reply */
|
|
|
|
msg.size = sizeof(msg.payload.u64);
|
|
|
|
send_vhost_message(connfd, &msg);
|
|
|
|
break;
|
2015-02-23 17:36:31 +00:00
|
|
|
case VHOST_USER_SET_LOG_FD:
|
|
|
|
close(msg.fds[0]);
|
|
|
|
RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VHOST_USER_SET_VRING_NUM:
|
2016-02-19 18:10:16 +00:00
|
|
|
vhost_set_vring_num(ctx, &msg.payload.state);
|
2015-02-23 17:36:31 +00:00
|
|
|
break;
|
|
|
|
case VHOST_USER_SET_VRING_ADDR:
|
2016-02-19 18:10:16 +00:00
|
|
|
vhost_set_vring_addr(ctx, &msg.payload.addr);
|
2015-02-23 17:36:31 +00:00
|
|
|
break;
|
|
|
|
case VHOST_USER_SET_VRING_BASE:
|
2016-02-19 18:10:16 +00:00
|
|
|
vhost_set_vring_base(ctx, &msg.payload.state);
|
2015-02-23 17:36:31 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case VHOST_USER_GET_VRING_BASE:
|
|
|
|
ret = user_get_vring_base(ctx, &msg.payload.state);
|
|
|
|
msg.size = sizeof(msg.payload.state);
|
|
|
|
send_vhost_message(connfd, &msg);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VHOST_USER_SET_VRING_KICK:
|
|
|
|
user_set_vring_kick(ctx, &msg);
|
|
|
|
break;
|
|
|
|
case VHOST_USER_SET_VRING_CALL:
|
|
|
|
user_set_vring_call(ctx, &msg);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VHOST_USER_SET_VRING_ERR:
|
|
|
|
if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
|
|
|
|
close(msg.fds[0]);
|
|
|
|
RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
|
|
|
|
break;
|
|
|
|
|
2015-10-22 12:35:50 +00:00
|
|
|
case VHOST_USER_GET_QUEUE_NUM:
|
2015-10-29 03:37:45 +00:00
|
|
|
msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
|
2015-10-22 12:35:50 +00:00
|
|
|
msg.size = sizeof(msg.payload.u64);
|
|
|
|
send_vhost_message(connfd, &msg);
|
|
|
|
break;
|
|
|
|
|
2015-10-22 12:35:54 +00:00
|
|
|
case VHOST_USER_SET_VRING_ENABLE:
|
|
|
|
user_set_vring_enable(ctx, &msg.payload.state);
|
|
|
|
break;
|
2016-01-29 04:58:01 +00:00
|
|
|
case VHOST_USER_SEND_RARP:
|
vhost: broadcast RARP by injecting in receiving mbuf array
Broadcast RARP packet by injecting it to receiving mbuf array at
rte_vhost_dequeue_burst().
Commit 33226236a35e ("vhost: handle request to send RARP") iterates
all host interfaces and then broadcast it by all of them. It did
notify the switches about the new location of the migrated VM, however,
the mac learning table in the target host is wrong (at least in my
test with OVS):
$ ovs-appctl fdb/show ovsbr0
port VLAN MAC Age
1 0 b6:3c:72:71:cd:4d 10
LOCAL 0 b6:3c:72:71:cd:4e 10
LOCAL 0 52:54:00:12:34:68 9
1 0 56:f6:64:2c:bc:c0 1
Where 52:54:00:12:34:68 is the mac of the VM. As you can see from the
above, the port learned is "LOCAL", which is the "ovsbr0" port. That
is reasonable, since we indeed send the pkt by the "ovsbr0" interface.
The wrong mac table lead all the packets to the VM go to the "ovsbr0"
in the end, which ends up with all packets being lost, until the guest
send a ARP quest (or reply) to refresh the mac learning table.
Jianfeng then came up with a solution I have thought of firstly but NAKed
by myself, concerning it has potential issues [0]. The solution is as title
stated: broadcast the RARP packet by injecting it to the receiving mbuf
arrays at rte_vhost_dequeue_burst(). The re-bring of that idea made me
think it twice; it looked like a false concern to me then. And I had done
a rough verification: it worked as expected.
[0]: http://dpdk.org/ml/archives/dev/2016-February/033527.html
Another note is that while preparing this version, I found that DPDK has
some ARP related structures and macros defined. So, use them instead of
the one from standard header files here.
Cc: Thibaut Collet <thibaut.collet@6wind.com>
Suggested-by: Jianfeng Tan <jianfeng.tan@intel.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-22 14:36:11 +00:00
|
|
|
user_send_rarp(ctx, &msg);
|
2016-01-29 04:58:01 +00:00
|
|
|
break;
|
2015-10-22 12:35:54 +00:00
|
|
|
|
2015-02-23 17:36:31 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates and initialise the vhost server.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
rte_vhost_driver_register(const char *path)
|
|
|
|
{
|
|
|
|
struct vhost_server *vserver;
|
|
|
|
|
2015-06-30 09:20:48 +00:00
|
|
|
pthread_mutex_lock(&g_vhost_server.server_mutex);
|
|
|
|
|
|
|
|
if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) {
|
|
|
|
RTE_LOG(ERR, VHOST_CONFIG,
|
|
|
|
"error: the number of servers reaches maximum\n");
|
|
|
|
pthread_mutex_unlock(&g_vhost_server.server_mutex);
|
2015-02-23 17:36:31 +00:00
|
|
|
return -1;
|
2015-06-30 09:20:48 +00:00
|
|
|
}
|
2015-02-23 17:36:31 +00:00
|
|
|
|
|
|
|
vserver = calloc(sizeof(struct vhost_server), 1);
|
2015-06-30 09:20:48 +00:00
|
|
|
if (vserver == NULL) {
|
|
|
|
pthread_mutex_unlock(&g_vhost_server.server_mutex);
|
2015-02-23 17:36:31 +00:00
|
|
|
return -1;
|
2015-06-30 09:20:48 +00:00
|
|
|
}
|
2015-02-23 17:36:31 +00:00
|
|
|
|
|
|
|
vserver->listenfd = uds_socket(path);
|
|
|
|
if (vserver->listenfd < 0) {
|
|
|
|
free(vserver);
|
2015-06-30 09:20:48 +00:00
|
|
|
pthread_mutex_unlock(&g_vhost_server.server_mutex);
|
2015-02-23 17:36:31 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2015-06-30 09:20:48 +00:00
|
|
|
|
|
|
|
vserver->path = strdup(path);
|
2015-02-23 17:36:31 +00:00
|
|
|
|
|
|
|
fdset_add(&g_vhost_server.fdset, vserver->listenfd,
|
2015-06-30 09:20:48 +00:00
|
|
|
vserver_new_vq_conn, NULL, vserver);
|
2015-02-23 17:36:31 +00:00
|
|
|
|
2015-06-30 09:20:48 +00:00
|
|
|
g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver;
|
|
|
|
pthread_mutex_unlock(&g_vhost_server.server_mutex);
|
2015-02-23 17:36:31 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-06-30 09:20:48 +00:00
|
|
|
/**
|
|
|
|
* Unregister the specified vhost server
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
rte_vhost_driver_unregister(const char *path)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int count;
|
|
|
|
|
|
|
|
pthread_mutex_lock(&g_vhost_server.server_mutex);
|
|
|
|
|
|
|
|
for (i = 0; i < g_vhost_server.vserver_cnt; i++) {
|
|
|
|
if (!strcmp(g_vhost_server.server[i]->path, path)) {
|
|
|
|
fdset_del(&g_vhost_server.fdset,
|
|
|
|
g_vhost_server.server[i]->listenfd);
|
|
|
|
|
|
|
|
close(g_vhost_server.server[i]->listenfd);
|
|
|
|
free(g_vhost_server.server[i]->path);
|
|
|
|
free(g_vhost_server.server[i]);
|
|
|
|
|
|
|
|
unlink(path);
|
|
|
|
|
|
|
|
count = --g_vhost_server.vserver_cnt;
|
|
|
|
g_vhost_server.server[i] = g_vhost_server.server[count];
|
|
|
|
g_vhost_server.server[count] = NULL;
|
|
|
|
pthread_mutex_unlock(&g_vhost_server.server_mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pthread_mutex_unlock(&g_vhost_server.server_mutex);
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2015-02-23 17:36:31 +00:00
|
|
|
int
|
|
|
|
rte_vhost_driver_session_start(void)
|
|
|
|
{
|
|
|
|
fdset_event_dispatch(&g_vhost_server.fdset);
|
|
|
|
return 0;
|
|
|
|
}
|