numam-dpdk/lib/librte_eal/common/eal_common_proc.c
Anatoly Burakov 0a529578f1 eal: clean up unused files on initialization
When creating process data structures, EAL will create many files
in EAL runtime directory. Because we allow multiple secondary
processes to run, each secondary process gets their own unique
file. With many secondary processes running and exiting on the
system, runtime directory will, over time, create enormous amounts
of sockets, fbarray files and other stuff that just sits there
unused because the process that allocated it has died a long time
ago. This may lead to exhaustion of disk (or RAM) space in the
runtime directory.

Fix this by removing every unlocked file at initialization that
matches either socket or fbarray naming convention. We cannot be
sure of any other files, so we'll leave them alone. Also, remove
similar code from mp socket code.

We do it at the end of init, rather than at the beginning, because
secondary process will use primary process' data structures even
if the primary itself has died, and we don't want to remove those
before we lock them.

Bugzilla ID: 106
Cc: stable@dpdk.org

Reported-by: Vipin Varghese <vipin.varghese@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2018-12-19 04:12:30 +01:00

1153 lines
27 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2016-2018 Intel Corporation
*/
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <fnmatch.h>
#include <inttypes.h>
#include <libgen.h>
#include <limits.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/file.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#include <rte_alarm.h>
#include <rte_common.h>
#include <rte_cycles.h>
#include <rte_eal.h>
#include <rte_errno.h>
#include <rte_lcore.h>
#include <rte_log.h>
#include <rte_tailq.h>
#include "eal_private.h"
#include "eal_filesystem.h"
#include "eal_internal_cfg.h"
static int mp_fd = -1;
static char mp_filter[PATH_MAX]; /* Filter for secondary process sockets */
static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
struct action_entry {
TAILQ_ENTRY(action_entry) next;
char action_name[RTE_MP_MAX_NAME_LEN];
rte_mp_t action;
};
/** Double linked list of actions. */
TAILQ_HEAD(action_entry_list, action_entry);
static struct action_entry_list action_entry_list =
TAILQ_HEAD_INITIALIZER(action_entry_list);
enum mp_type {
MP_MSG, /* Share message with peers, will not block */
MP_REQ, /* Request for information, Will block for a reply */
MP_REP, /* Response to previously-received request */
MP_IGN, /* Response telling requester to ignore this response */
};
struct mp_msg_internal {
int type;
struct rte_mp_msg msg;
};
struct async_request_param {
rte_mp_async_reply_t clb;
struct rte_mp_reply user_reply;
struct timespec end;
int n_responses_processed;
};
struct pending_request {
TAILQ_ENTRY(pending_request) next;
enum {
REQUEST_TYPE_SYNC,
REQUEST_TYPE_ASYNC
} type;
char dst[PATH_MAX];
struct rte_mp_msg *request;
struct rte_mp_msg *reply;
int reply_received;
RTE_STD_C11
union {
struct {
struct async_request_param *param;
} async;
struct {
pthread_cond_t cond;
} sync;
};
};
TAILQ_HEAD(pending_request_list, pending_request);
static struct {
struct pending_request_list requests;
pthread_mutex_t lock;
} pending_requests = {
.requests = TAILQ_HEAD_INITIALIZER(pending_requests.requests),
.lock = PTHREAD_MUTEX_INITIALIZER,
/**< used in async requests only */
};
/* forward declarations */
static int
mp_send(struct rte_mp_msg *msg, const char *peer, int type);
/* for use with alarm callback */
static void
async_reply_handle(void *arg);
/* for use with process_msg */
static struct pending_request *
async_reply_handle_thread_unsafe(void *arg);
static void
trigger_async_action(struct pending_request *req);
static struct pending_request *
find_pending_request(const char *dst, const char *act_name)
{
struct pending_request *r;
TAILQ_FOREACH(r, &pending_requests.requests, next) {
if (!strcmp(r->dst, dst) &&
!strcmp(r->request->name, act_name))
break;
}
return r;
}
static void
create_socket_path(const char *name, char *buf, int len)
{
const char *prefix = eal_mp_socket_path();
if (strlen(name) > 0)
snprintf(buf, len, "%s_%s", prefix, name);
else
strlcpy(buf, prefix, len);
}
int
rte_eal_primary_proc_alive(const char *config_file_path)
{
int config_fd;
if (config_file_path)
config_fd = open(config_file_path, O_RDONLY);
else {
const char *path;
path = eal_runtime_config_path();
config_fd = open(path, O_RDONLY);
}
if (config_fd < 0)
return 0;
int ret = lockf(config_fd, F_TEST, 0);
close(config_fd);
return !!ret;
}
static struct action_entry *
find_action_entry_by_name(const char *name)
{
struct action_entry *entry;
TAILQ_FOREACH(entry, &action_entry_list, next) {
if (strncmp(entry->action_name, name, RTE_MP_MAX_NAME_LEN) == 0)
break;
}
return entry;
}
static int
validate_action_name(const char *name)
{
if (name == NULL) {
RTE_LOG(ERR, EAL, "Action name cannot be NULL\n");
rte_errno = EINVAL;
return -1;
}
if (strnlen(name, RTE_MP_MAX_NAME_LEN) == 0) {
RTE_LOG(ERR, EAL, "Length of action name is zero\n");
rte_errno = EINVAL;
return -1;
}
if (strnlen(name, RTE_MP_MAX_NAME_LEN) == RTE_MP_MAX_NAME_LEN) {
rte_errno = E2BIG;
return -1;
}
return 0;
}
int __rte_experimental
rte_mp_action_register(const char *name, rte_mp_t action)
{
struct action_entry *entry;
if (validate_action_name(name))
return -1;
entry = malloc(sizeof(struct action_entry));
if (entry == NULL) {
rte_errno = ENOMEM;
return -1;
}
strlcpy(entry->action_name, name, sizeof(entry->action_name));
entry->action = action;
pthread_mutex_lock(&mp_mutex_action);
if (find_action_entry_by_name(name) != NULL) {
pthread_mutex_unlock(&mp_mutex_action);
rte_errno = EEXIST;
free(entry);
return -1;
}
TAILQ_INSERT_TAIL(&action_entry_list, entry, next);
pthread_mutex_unlock(&mp_mutex_action);
return 0;
}
void __rte_experimental
rte_mp_action_unregister(const char *name)
{
struct action_entry *entry;
if (validate_action_name(name))
return;
pthread_mutex_lock(&mp_mutex_action);
entry = find_action_entry_by_name(name);
if (entry == NULL) {
pthread_mutex_unlock(&mp_mutex_action);
return;
}
TAILQ_REMOVE(&action_entry_list, entry, next);
pthread_mutex_unlock(&mp_mutex_action);
free(entry);
}
static int
read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
{
int msglen;
struct iovec iov;
struct msghdr msgh;
char control[CMSG_SPACE(sizeof(m->msg.fds))];
struct cmsghdr *cmsg;
int buflen = sizeof(*m) - sizeof(m->msg.fds);
memset(&msgh, 0, sizeof(msgh));
iov.iov_base = m;
iov.iov_len = buflen;
msgh.msg_name = s;
msgh.msg_namelen = sizeof(*s);
msgh.msg_iov = &iov;
msgh.msg_iovlen = 1;
msgh.msg_control = control;
msgh.msg_controllen = sizeof(control);
msglen = recvmsg(mp_fd, &msgh, 0);
if (msglen < 0) {
RTE_LOG(ERR, EAL, "recvmsg failed, %s\n", strerror(errno));
return -1;
}
if (msglen != buflen || (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
RTE_LOG(ERR, EAL, "truncted msg\n");
return -1;
}
/* read auxiliary FDs if any */
for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
if ((cmsg->cmsg_level == SOL_SOCKET) &&
(cmsg->cmsg_type == SCM_RIGHTS)) {
memcpy(m->msg.fds, CMSG_DATA(cmsg), sizeof(m->msg.fds));
break;
}
}
return 0;
}
static void
process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
{
struct pending_request *pending_req;
struct action_entry *entry;
struct rte_mp_msg *msg = &m->msg;
rte_mp_t action = NULL;
RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name);
if (m->type == MP_REP || m->type == MP_IGN) {
struct pending_request *req = NULL;
pthread_mutex_lock(&pending_requests.lock);
pending_req = find_pending_request(s->sun_path, msg->name);
if (pending_req) {
memcpy(pending_req->reply, msg, sizeof(*msg));
/* -1 indicates that we've been asked to ignore */
pending_req->reply_received =
m->type == MP_REP ? 1 : -1;
if (pending_req->type == REQUEST_TYPE_SYNC)
pthread_cond_signal(&pending_req->sync.cond);
else if (pending_req->type == REQUEST_TYPE_ASYNC)
req = async_reply_handle_thread_unsafe(
pending_req);
} else
RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name);
pthread_mutex_unlock(&pending_requests.lock);
if (req != NULL)
trigger_async_action(req);
return;
}
pthread_mutex_lock(&mp_mutex_action);
entry = find_action_entry_by_name(msg->name);
if (entry != NULL)
action = entry->action;
pthread_mutex_unlock(&mp_mutex_action);
if (!action) {
if (m->type == MP_REQ && !internal_config.init_complete) {
/* if this is a request, and init is not yet complete,
* and callback wasn't registered, we should tell the
* requester to ignore our existence because we're not
* yet ready to process this request.
*/
struct rte_mp_msg dummy;
memset(&dummy, 0, sizeof(dummy));
strlcpy(dummy.name, msg->name, sizeof(dummy.name));
mp_send(&dummy, s->sun_path, MP_IGN);
} else {
RTE_LOG(ERR, EAL, "Cannot find action: %s\n",
msg->name);
}
} else if (action(msg, s->sun_path) < 0) {
RTE_LOG(ERR, EAL, "Fail to handle message: %s\n", msg->name);
}
}
static void *
mp_handle(void *arg __rte_unused)
{
struct mp_msg_internal msg;
struct sockaddr_un sa;
while (1) {
if (read_msg(&msg, &sa) == 0)
process_msg(&msg, &sa);
}
return NULL;
}
static int
timespec_cmp(const struct timespec *a, const struct timespec *b)
{
if (a->tv_sec < b->tv_sec)
return -1;
if (a->tv_sec > b->tv_sec)
return 1;
if (a->tv_nsec < b->tv_nsec)
return -1;
if (a->tv_nsec > b->tv_nsec)
return 1;
return 0;
}
enum async_action {
ACTION_FREE, /**< free the action entry, but don't trigger callback */
ACTION_TRIGGER /**< trigger callback, then free action entry */
};
static enum async_action
process_async_request(struct pending_request *sr, const struct timespec *now)
{
struct async_request_param *param;
struct rte_mp_reply *reply;
bool timeout, last_msg;
param = sr->async.param;
reply = &param->user_reply;
/* did we timeout? */
timeout = timespec_cmp(&param->end, now) <= 0;
/* if we received a response, adjust relevant data and copy mesasge. */
if (sr->reply_received == 1 && sr->reply) {
struct rte_mp_msg *msg, *user_msgs, *tmp;
msg = sr->reply;
user_msgs = reply->msgs;
tmp = realloc(user_msgs, sizeof(*msg) *
(reply->nb_received + 1));
if (!tmp) {
RTE_LOG(ERR, EAL, "Fail to alloc reply for request %s:%s\n",
sr->dst, sr->request->name);
/* this entry is going to be removed and its message
* dropped, but we don't want to leak memory, so
* continue.
*/
} else {
user_msgs = tmp;
reply->msgs = user_msgs;
memcpy(&user_msgs[reply->nb_received],
msg, sizeof(*msg));
reply->nb_received++;
}
/* mark this request as processed */
param->n_responses_processed++;
} else if (sr->reply_received == -1) {
/* we were asked to ignore this process */
reply->nb_sent--;
} else if (timeout) {
/* count it as processed response, but don't increment
* nb_received.
*/
param->n_responses_processed++;
}
free(sr->reply);
last_msg = param->n_responses_processed == reply->nb_sent;
return last_msg ? ACTION_TRIGGER : ACTION_FREE;
}
static void
trigger_async_action(struct pending_request *sr)
{
struct async_request_param *param;
struct rte_mp_reply *reply;
param = sr->async.param;
reply = &param->user_reply;
param->clb(sr->request, reply);
/* clean up */
free(sr->async.param->user_reply.msgs);
free(sr->async.param);
free(sr->request);
free(sr);
}
static struct pending_request *
async_reply_handle_thread_unsafe(void *arg)
{
struct pending_request *req = (struct pending_request *)arg;
enum async_action action;
struct timespec ts_now;
struct timeval now;
if (gettimeofday(&now, NULL) < 0) {
RTE_LOG(ERR, EAL, "Cannot get current time\n");
goto no_trigger;
}
ts_now.tv_nsec = now.tv_usec * 1000;
ts_now.tv_sec = now.tv_sec;
action = process_async_request(req, &ts_now);
TAILQ_REMOVE(&pending_requests.requests, req, next);
if (rte_eal_alarm_cancel(async_reply_handle, req) < 0) {
/* if we failed to cancel the alarm because it's already in
* progress, don't proceed because otherwise we will end up
* handling the same message twice.
*/
if (rte_errno == EINPROGRESS) {
RTE_LOG(DEBUG, EAL, "Request handling is already in progress\n");
goto no_trigger;
}
RTE_LOG(ERR, EAL, "Failed to cancel alarm\n");
}
if (action == ACTION_TRIGGER)
return req;
no_trigger:
free(req);
return NULL;
}
static void
async_reply_handle(void *arg)
{
struct pending_request *req;
pthread_mutex_lock(&pending_requests.lock);
req = async_reply_handle_thread_unsafe(arg);
pthread_mutex_unlock(&pending_requests.lock);
if (req != NULL)
trigger_async_action(req);
}
static int
open_socket_fd(void)
{
char peer_name[PATH_MAX] = {0};
struct sockaddr_un un;
if (rte_eal_process_type() == RTE_PROC_SECONDARY)
snprintf(peer_name, sizeof(peer_name),
"%d_%"PRIx64, getpid(), rte_rdtsc());
mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
if (mp_fd < 0) {
RTE_LOG(ERR, EAL, "failed to create unix socket\n");
return -1;
}
memset(&un, 0, sizeof(un));
un.sun_family = AF_UNIX;
create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
unlink(un.sun_path); /* May still exist since last run */
if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
un.sun_path, strerror(errno));
close(mp_fd);
return -1;
}
RTE_LOG(INFO, EAL, "Multi-process socket %s\n", un.sun_path);
return mp_fd;
}
int
rte_mp_channel_init(void)
{
char path[PATH_MAX];
int dir_fd;
pthread_t mp_handle_tid;
/* in no shared files mode, we do not have secondary processes support,
* so no need to initialize IPC.
*/
if (internal_config.no_shconf) {
RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC will be disabled\n");
return 0;
}
/* create filter path */
create_socket_path("*", path, sizeof(path));
strlcpy(mp_filter, basename(path), sizeof(mp_filter));
/* path may have been modified, so recreate it */
create_socket_path("*", path, sizeof(path));
strlcpy(mp_dir_path, dirname(path), sizeof(mp_dir_path));
/* lock the directory */
dir_fd = open(mp_dir_path, O_RDONLY);
if (dir_fd < 0) {
RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
mp_dir_path, strerror(errno));
return -1;
}
if (flock(dir_fd, LOCK_EX)) {
RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
mp_dir_path, strerror(errno));
close(dir_fd);
return -1;
}
if (open_socket_fd() < 0) {
close(dir_fd);
return -1;
}
if (rte_ctrl_thread_create(&mp_handle_tid, "rte_mp_handle",
NULL, mp_handle, NULL) < 0) {
RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
strerror(errno));
close(mp_fd);
close(dir_fd);
mp_fd = -1;
return -1;
}
/* unlock the directory */
flock(dir_fd, LOCK_UN);
close(dir_fd);
return 0;
}
/**
* Return -1, as fail to send message and it's caused by the local side.
* Return 0, as fail to send message and it's caused by the remote side.
* Return 1, as succeed to send message.
*
*/
static int
send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
{
int snd;
struct iovec iov;
struct msghdr msgh;
struct cmsghdr *cmsg;
struct sockaddr_un dst;
struct mp_msg_internal m;
int fd_size = msg->num_fds * sizeof(int);
char control[CMSG_SPACE(fd_size)];
m.type = type;
memcpy(&m.msg, msg, sizeof(*msg));
memset(&dst, 0, sizeof(dst));
dst.sun_family = AF_UNIX;
strlcpy(dst.sun_path, dst_path, sizeof(dst.sun_path));
memset(&msgh, 0, sizeof(msgh));
memset(control, 0, sizeof(control));
iov.iov_base = &m;
iov.iov_len = sizeof(m) - sizeof(msg->fds);
msgh.msg_name = &dst;
msgh.msg_namelen = sizeof(dst);
msgh.msg_iov = &iov;
msgh.msg_iovlen = 1;
msgh.msg_control = control;
msgh.msg_controllen = sizeof(control);
cmsg = CMSG_FIRSTHDR(&msgh);
cmsg->cmsg_len = CMSG_LEN(fd_size);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
memcpy(CMSG_DATA(cmsg), msg->fds, fd_size);
do {
snd = sendmsg(mp_fd, &msgh, 0);
} while (snd < 0 && errno == EINTR);
if (snd < 0) {
rte_errno = errno;
/* Check if it caused by peer process exits */
if (errno == ECONNREFUSED &&
rte_eal_process_type() == RTE_PROC_PRIMARY) {
unlink(dst_path);
return 0;
}
if (errno == ENOBUFS) {
RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n",
dst_path);
return 0;
}
RTE_LOG(ERR, EAL, "failed to send to (%s) due to %s\n",
dst_path, strerror(errno));
return -1;
}
return 1;
}
static int
mp_send(struct rte_mp_msg *msg, const char *peer, int type)
{
int dir_fd, ret = 0;
DIR *mp_dir;
struct dirent *ent;
if (!peer && (rte_eal_process_type() == RTE_PROC_SECONDARY))
peer = eal_mp_socket_path();
if (peer) {
if (send_msg(peer, msg, type) < 0)
return -1;
else
return 0;
}
/* broadcast to all secondary processes */
mp_dir = opendir(mp_dir_path);
if (!mp_dir) {
RTE_LOG(ERR, EAL, "Unable to open directory %s\n",
mp_dir_path);
rte_errno = errno;
return -1;
}
dir_fd = dirfd(mp_dir);
/* lock the directory to prevent processes spinning up while we send */
if (flock(dir_fd, LOCK_SH)) {
RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
mp_dir_path);
rte_errno = errno;
closedir(mp_dir);
return -1;
}
while ((ent = readdir(mp_dir))) {
char path[PATH_MAX];
if (fnmatch(mp_filter, ent->d_name, 0) != 0)
continue;
snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
ent->d_name);
if (send_msg(path, msg, type) < 0)
ret = -1;
}
/* unlock the dir */
flock(dir_fd, LOCK_UN);
/* dir_fd automatically closed on closedir */
closedir(mp_dir);
return ret;
}
static bool
check_input(const struct rte_mp_msg *msg)
{
if (msg == NULL) {
RTE_LOG(ERR, EAL, "Msg cannot be NULL\n");
rte_errno = EINVAL;
return false;
}
if (validate_action_name(msg->name))
return false;
if (msg->len_param > RTE_MP_MAX_PARAM_LEN) {
RTE_LOG(ERR, EAL, "Message data is too long\n");
rte_errno = E2BIG;
return false;
}
if (msg->num_fds > RTE_MP_MAX_FD_NUM) {
RTE_LOG(ERR, EAL, "Cannot send more than %d FDs\n",
RTE_MP_MAX_FD_NUM);
rte_errno = E2BIG;
return false;
}
return true;
}
int __rte_experimental
rte_mp_sendmsg(struct rte_mp_msg *msg)
{
if (!check_input(msg))
return -1;
RTE_LOG(DEBUG, EAL, "sendmsg: %s\n", msg->name);
return mp_send(msg, NULL, MP_MSG);
}
static int
mp_request_async(const char *dst, struct rte_mp_msg *req,
struct async_request_param *param, const struct timespec *ts)
{
struct rte_mp_msg *reply_msg;
struct pending_request *pending_req, *exist;
int ret = -1;
pending_req = calloc(1, sizeof(*pending_req));
reply_msg = calloc(1, sizeof(*reply_msg));
if (pending_req == NULL || reply_msg == NULL) {
RTE_LOG(ERR, EAL, "Could not allocate space for sync request\n");
rte_errno = ENOMEM;
ret = -1;
goto fail;
}
pending_req->type = REQUEST_TYPE_ASYNC;
strlcpy(pending_req->dst, dst, sizeof(pending_req->dst));
pending_req->request = req;
pending_req->reply = reply_msg;
pending_req->async.param = param;
/* queue already locked by caller */
exist = find_pending_request(dst, req->name);
if (exist) {
RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name);
rte_errno = EEXIST;
ret = -1;
goto fail;
}
ret = send_msg(dst, req, MP_REQ);
if (ret < 0) {
RTE_LOG(ERR, EAL, "Fail to send request %s:%s\n",
dst, req->name);
ret = -1;
goto fail;
} else if (ret == 0) {
ret = 0;
goto fail;
}
param->user_reply.nb_sent++;
/* if alarm set fails, we simply ignore the reply */
if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000,
async_reply_handle, pending_req) < 0) {
RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n",
dst, req->name);
ret = -1;
goto fail;
}
TAILQ_INSERT_TAIL(&pending_requests.requests, pending_req, next);
return 0;
fail:
free(pending_req);
free(reply_msg);
return ret;
}
static int
mp_request_sync(const char *dst, struct rte_mp_msg *req,
struct rte_mp_reply *reply, const struct timespec *ts)
{
int ret;
struct rte_mp_msg msg, *tmp;
struct pending_request pending_req, *exist;
pending_req.type = REQUEST_TYPE_SYNC;
pending_req.reply_received = 0;
strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
pending_req.request = req;
pending_req.reply = &msg;
pthread_cond_init(&pending_req.sync.cond, NULL);
exist = find_pending_request(dst, req->name);
if (exist) {
RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name);
rte_errno = EEXIST;
return -1;
}
ret = send_msg(dst, req, MP_REQ);
if (ret < 0) {
RTE_LOG(ERR, EAL, "Fail to send request %s:%s\n",
dst, req->name);
return -1;
} else if (ret == 0)
return 0;
TAILQ_INSERT_TAIL(&pending_requests.requests, &pending_req, next);
reply->nb_sent++;
do {
ret = pthread_cond_timedwait(&pending_req.sync.cond,
&pending_requests.lock, ts);
} while (ret != 0 && ret != ETIMEDOUT);
TAILQ_REMOVE(&pending_requests.requests, &pending_req, next);
if (pending_req.reply_received == 0) {
RTE_LOG(ERR, EAL, "Fail to recv reply for request %s:%s\n",
dst, req->name);
rte_errno = ETIMEDOUT;
return -1;
}
if (pending_req.reply_received == -1) {
RTE_LOG(DEBUG, EAL, "Asked to ignore response\n");
/* not receiving this message is not an error, so decrement
* number of sent messages
*/
reply->nb_sent--;
return 0;
}
tmp = realloc(reply->msgs, sizeof(msg) * (reply->nb_received + 1));
if (!tmp) {
RTE_LOG(ERR, EAL, "Fail to alloc reply for request %s:%s\n",
dst, req->name);
rte_errno = ENOMEM;
return -1;
}
memcpy(&tmp[reply->nb_received], &msg, sizeof(msg));
reply->msgs = tmp;
reply->nb_received++;
return 0;
}
int __rte_experimental
rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
const struct timespec *ts)
{
int dir_fd, ret = 0;
DIR *mp_dir;
struct dirent *ent;
struct timeval now;
struct timespec end;
RTE_LOG(DEBUG, EAL, "request: %s\n", req->name);
if (check_input(req) == false)
return -1;
reply->nb_sent = 0;
reply->nb_received = 0;
reply->msgs = NULL;
if (internal_config.no_shconf) {
RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
return 0;
}
if (gettimeofday(&now, NULL) < 0) {
RTE_LOG(ERR, EAL, "Failed to get current time\n");
rte_errno = errno;
return -1;
}
end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
end.tv_sec = now.tv_sec + ts->tv_sec +
(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
/* for secondary process, send request to the primary process only */
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
pthread_mutex_lock(&pending_requests.lock);
ret = mp_request_sync(eal_mp_socket_path(), req, reply, &end);
pthread_mutex_unlock(&pending_requests.lock);
return ret;
}
/* for primary process, broadcast request, and collect reply 1 by 1 */
mp_dir = opendir(mp_dir_path);
if (!mp_dir) {
RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path);
rte_errno = errno;
return -1;
}
dir_fd = dirfd(mp_dir);
/* lock the directory to prevent processes spinning up while we send */
if (flock(dir_fd, LOCK_SH)) {
RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
mp_dir_path);
closedir(mp_dir);
rte_errno = errno;
return -1;
}
pthread_mutex_lock(&pending_requests.lock);
while ((ent = readdir(mp_dir))) {
char path[PATH_MAX];
if (fnmatch(mp_filter, ent->d_name, 0) != 0)
continue;
snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
ent->d_name);
/* unlocks the mutex while waiting for response,
* locks on receive
*/
if (mp_request_sync(path, req, reply, &end))
ret = -1;
}
pthread_mutex_unlock(&pending_requests.lock);
/* unlock the directory */
flock(dir_fd, LOCK_UN);
/* dir_fd automatically closed on closedir */
closedir(mp_dir);
return ret;
}
int __rte_experimental
rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
rte_mp_async_reply_t clb)
{
struct rte_mp_msg *copy;
struct pending_request *dummy;
struct async_request_param *param;
struct rte_mp_reply *reply;
int dir_fd, ret = 0;
DIR *mp_dir;
struct dirent *ent;
struct timeval now;
struct timespec *end;
bool dummy_used = false;
RTE_LOG(DEBUG, EAL, "request: %s\n", req->name);
if (check_input(req) == false)
return -1;
if (internal_config.no_shconf) {
RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
return 0;
}
if (gettimeofday(&now, NULL) < 0) {
RTE_LOG(ERR, EAL, "Faile to get current time\n");
rte_errno = errno;
return -1;
}
copy = calloc(1, sizeof(*copy));
dummy = calloc(1, sizeof(*dummy));
param = calloc(1, sizeof(*param));
if (copy == NULL || dummy == NULL || param == NULL) {
RTE_LOG(ERR, EAL, "Failed to allocate memory for async reply\n");
rte_errno = ENOMEM;
goto fail;
}
/* copy message */
memcpy(copy, req, sizeof(*copy));
param->n_responses_processed = 0;
param->clb = clb;
end = &param->end;
reply = &param->user_reply;
end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
end->tv_sec = now.tv_sec + ts->tv_sec +
(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
reply->nb_sent = 0;
reply->nb_received = 0;
reply->msgs = NULL;
/* we have to lock the request queue here, as we will be adding a bunch
* of requests to the queue at once, and some of the replies may arrive
* before we add all of the requests to the queue.
*/
pthread_mutex_lock(&pending_requests.lock);
/* we have to ensure that callback gets triggered even if we don't send
* anything, therefore earlier we have allocated a dummy request. fill
* it, and put it on the queue if we don't send any requests.
*/
dummy->type = REQUEST_TYPE_ASYNC;
dummy->request = copy;
dummy->reply = NULL;
dummy->async.param = param;
dummy->reply_received = 1; /* short-circuit the timeout */
/* for secondary process, send request to the primary process only */
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
ret = mp_request_async(eal_mp_socket_path(), copy, param, ts);
/* if we didn't send anything, put dummy request on the queue */
if (ret == 0 && reply->nb_sent == 0) {
TAILQ_INSERT_TAIL(&pending_requests.requests, dummy,
next);
dummy_used = true;
}
pthread_mutex_unlock(&pending_requests.lock);
/* if we couldn't send anything, clean up */
if (ret != 0)
goto fail;
return 0;
}
/* for primary process, broadcast request */
mp_dir = opendir(mp_dir_path);
if (!mp_dir) {
RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path);
rte_errno = errno;
goto unlock_fail;
}
dir_fd = dirfd(mp_dir);
/* lock the directory to prevent processes spinning up while we send */
if (flock(dir_fd, LOCK_SH)) {
RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
mp_dir_path);
rte_errno = errno;
goto closedir_fail;
}
while ((ent = readdir(mp_dir))) {
char path[PATH_MAX];
if (fnmatch(mp_filter, ent->d_name, 0) != 0)
continue;
snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
ent->d_name);
if (mp_request_async(path, copy, param, ts))
ret = -1;
}
/* if we didn't send anything, put dummy request on the queue */
if (ret == 0 && reply->nb_sent == 0) {
TAILQ_INSERT_HEAD(&pending_requests.requests, dummy, next);
dummy_used = true;
}
/* finally, unlock the queue */
pthread_mutex_unlock(&pending_requests.lock);
/* unlock the directory */
flock(dir_fd, LOCK_UN);
/* dir_fd automatically closed on closedir */
closedir(mp_dir);
/* if dummy was unused, free it */
if (!dummy_used)
free(dummy);
return ret;
closedir_fail:
closedir(mp_dir);
unlock_fail:
pthread_mutex_unlock(&pending_requests.lock);
fail:
free(dummy);
free(param);
free(copy);
return -1;
}
int __rte_experimental
rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
{
RTE_LOG(DEBUG, EAL, "reply: %s\n", msg->name);
if (check_input(msg) == false)
return -1;
if (peer == NULL) {
RTE_LOG(ERR, EAL, "peer is not specified\n");
rte_errno = EINVAL;
return -1;
}
if (internal_config.no_shconf) {
RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
return 0;
}
return mp_send(msg, peer, MP_REP);
}