numam-dpdk/lib/librte_vhost/fd_man.c
Yuanhan Liu af14759181 vhost: introduce API to start a specific driver
We used to use rte_vhost_driver_session_start() to trigger the vhost-user
session. It takes no argument, thus it's a global trigger. And it could
be problematic.

The issue is, currently, rte_vhost_driver_register(path, flags) actually
tries to put it into the session loop (by fdset_add). However, it needs
a set of APIs to set a vhost-user driver properly:
  * rte_vhost_driver_register(path, flags);
  * rte_vhost_driver_set_features(path, features);
  * rte_vhost_driver_callback_register(path, vhost_device_ops);

If a new vhost-user driver is registered after the trigger (think OVS-DPDK
that could add a port dynamically from cmdline), the current code will
effectively starts the session for the new driver just after the first
API rte_vhost_driver_register() is invoked, leaving later calls taking
no effect at all.

To handle the case properly, this patch introduce a new API,
rte_vhost_driver_start(path), to trigger a specific vhost-user driver.
To do that, the rte_vhost_driver_register(path, flags) is simplified
to create the socket only and let rte_vhost_driver_start(path) to
actually put it into the session loop.

Meanwhile, the rte_vhost_driver_session_start is removed: we could hide
the session thread internally (create the thread if it has not been
created). This would also simplify the application.

NOTE: the API order in prog guide is slightly adjusted for showing the
correct invoke order.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
2017-04-01 10:42:44 +02:00

301 lines
7.1 KiB
C

/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>
#include <rte_common.h>
#include <rte_log.h>
#include "fd_man.h"
#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
static int
get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
{
int i;
for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
;
return i;
}
static void
fdset_move(struct fdset *pfdset, int dst, int src)
{
pfdset->fd[dst] = pfdset->fd[src];
pfdset->rwfds[dst] = pfdset->rwfds[src];
}
static void
fdset_shrink_nolock(struct fdset *pfdset)
{
int i;
int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
for (i = 0; i < last_valid_idx; i++) {
if (pfdset->fd[i].fd != -1)
continue;
fdset_move(pfdset, i, last_valid_idx);
last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
}
pfdset->num = last_valid_idx + 1;
}
/*
* Find deleted fd entries and remove them
*/
static void
fdset_shrink(struct fdset *pfdset)
{
pthread_mutex_lock(&pfdset->fd_mutex);
fdset_shrink_nolock(pfdset);
pthread_mutex_unlock(&pfdset->fd_mutex);
}
/**
* Returns the index in the fdset for a given fd.
* @return
* index for the fd, or -1 if fd isn't in the fdset.
*/
static int
fdset_find_fd(struct fdset *pfdset, int fd)
{
int i;
for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
;
return i == pfdset->num ? -1 : i;
}
static void
fdset_add_fd(struct fdset *pfdset, int idx, int fd,
fd_cb rcb, fd_cb wcb, void *dat)
{
struct fdentry *pfdentry = &pfdset->fd[idx];
struct pollfd *pfd = &pfdset->rwfds[idx];
pfdentry->fd = fd;
pfdentry->rcb = rcb;
pfdentry->wcb = wcb;
pfdentry->dat = dat;
pfd->fd = fd;
pfd->events = rcb ? POLLIN : 0;
pfd->events |= wcb ? POLLOUT : 0;
pfd->revents = 0;
}
void
fdset_init(struct fdset *pfdset)
{
int i;
if (pfdset == NULL)
return;
for (i = 0; i < MAX_FDS; i++) {
pfdset->fd[i].fd = -1;
pfdset->fd[i].dat = NULL;
}
pfdset->num = 0;
}
/**
* Register the fd in the fdset with read/write handler and context.
*/
int
fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
{
int i;
if (pfdset == NULL || fd == -1)
return -1;
pthread_mutex_lock(&pfdset->fd_mutex);
i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
if (i == -1) {
fdset_shrink_nolock(pfdset);
i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
if (i == -1) {
pthread_mutex_unlock(&pfdset->fd_mutex);
return -2;
}
}
fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
pthread_mutex_unlock(&pfdset->fd_mutex);
return 0;
}
/**
* Unregister the fd from the fdset.
* Returns context of a given fd or NULL.
*/
void *
fdset_del(struct fdset *pfdset, int fd)
{
int i;
void *dat = NULL;
if (pfdset == NULL || fd == -1)
return NULL;
do {
pthread_mutex_lock(&pfdset->fd_mutex);
i = fdset_find_fd(pfdset, fd);
if (i != -1 && pfdset->fd[i].busy == 0) {
/* busy indicates r/wcb is executing! */
dat = pfdset->fd[i].dat;
pfdset->fd[i].fd = -1;
pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
pfdset->fd[i].dat = NULL;
i = -1;
}
pthread_mutex_unlock(&pfdset->fd_mutex);
} while (i != -1);
return dat;
}
/**
* This functions runs in infinite blocking loop until there is no fd in
* pfdset. It calls corresponding r/w handler if there is event on the fd.
*
* Before the callback is called, we set the flag to busy status; If other
* thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
* will wait until the flag is reset to zero(which indicates the callback is
* finished), then it could free the context after fdset_del.
*/
void *
fdset_event_dispatch(void *arg)
{
int i;
struct pollfd *pfd;
struct fdentry *pfdentry;
fd_cb rcb, wcb;
void *dat;
int fd, numfds;
int remove1, remove2;
int need_shrink;
struct fdset *pfdset = arg;
if (pfdset == NULL)
return NULL;
while (1) {
/*
* When poll is blocked, other threads might unregister
* listenfds from and register new listenfds into fdset.
* When poll returns, the entries for listenfds in the fdset
* might have been updated. It is ok if there is unwanted call
* for new listenfds.
*/
pthread_mutex_lock(&pfdset->fd_mutex);
numfds = pfdset->num;
pthread_mutex_unlock(&pfdset->fd_mutex);
poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
need_shrink = 0;
for (i = 0; i < numfds; i++) {
pthread_mutex_lock(&pfdset->fd_mutex);
pfdentry = &pfdset->fd[i];
fd = pfdentry->fd;
pfd = &pfdset->rwfds[i];
if (fd < 0) {
need_shrink = 1;
pthread_mutex_unlock(&pfdset->fd_mutex);
continue;
}
if (!pfd->revents) {
pthread_mutex_unlock(&pfdset->fd_mutex);
continue;
}
remove1 = remove2 = 0;
rcb = pfdentry->rcb;
wcb = pfdentry->wcb;
dat = pfdentry->dat;
pfdentry->busy = 1;
pthread_mutex_unlock(&pfdset->fd_mutex);
if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
rcb(fd, dat, &remove1);
if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
wcb(fd, dat, &remove2);
pfdentry->busy = 0;
/*
* fdset_del needs to check busy flag.
* We don't allow fdset_del to be called in callback
* directly.
*/
/*
* When we are to clean up the fd from fdset,
* because the fd is closed in the cb,
* the old fd val could be reused by when creates new
* listen fd in another thread, we couldn't call
* fd_set_del.
*/
if (remove1 || remove2) {
pfdentry->fd = -1;
need_shrink = 1;
}
}
if (need_shrink)
fdset_shrink(pfdset);
}
return NULL;
}