Add implementations of sendmmsg(3) and recvmmsg(3) functions which

wraps sendmsg(2) and recvmsg(2) into batch send and receive operation.
The goal of this implementation is only to provide API compatibility
with Linux.

The cancellation behaviour of the functions is not quite right, but
due to relative rare use of cancellation it is considered acceptable
comparing with the complexity of the correct implementation.  If
functions are reimplemented as syscalls, the fix would come almost
trivial.  The direct use of the syscall trampolines instead of libc
wrappers for sendmsg(2) and recvmsg(2) is to avoid data loss on
cancellation.

Submitted by:	Boris Astardzhiev <boris.astardzhiev@gmail.com>
Discussed with:	jilles (cancellation behaviour)
MFC after:	1 month
This commit is contained in:
Konstantin Belousov 2016-01-29 14:12:12 +00:00
parent 8a1867f4aa
commit bf420ace0a
9 changed files with 318 additions and 20 deletions

View File

@ -99,11 +99,13 @@ SRCS+= __getosreldate.c \
raise.c \
readdir.c \
readpassphrase.c \
recvmmsg.c \
rewinddir.c \
scandir.c \
seed48.c \
seekdir.c \
semctl.c \
sendmmsg.c \
setdomainname.c \
sethostname.c \
setjmperr.c \
@ -451,10 +453,12 @@ MLINKS+=rand48.3 _rand48.3 \
rand48.3 nrand48.3 \
rand48.3 seed48.3 \
rand48.3 srand48.3
MLINKS+=recv.2 recvmmsg.2
MLINKS+=scandir.3 alphasort.3
MLINKS+=sem_open.3 sem_close.3 \
sem_open.3 sem_unlink.3
MLINKS+=sem_wait.3 sem_trywait.3
MLINKS+=send.2 sendmmsg.2
MLINKS+=setjmp.3 _longjmp.3 \
setjmp.3 _setjmp.3 \
setjmp.3 longjmp.3 \

96
lib/libc/gen/recvmmsg.c Normal file
View File

@ -0,0 +1,96 @@
/*
* Copyright (c) 2016 Boris Astardzhiev, Smartcom-Bulgaria AD
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice(s), this list of conditions and the following disclaimer as
* the first lines of this file unmodified other than the possible
* addition of one or more copyright notices.
* 2. Redistributions in binary form must reproduce the above copyright
* notice(s), this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/socket.h>
#include <errno.h>
#include <poll.h>
#include <stddef.h>
#include "libc_private.h"
ssize_t
recvmmsg(int s, struct mmsghdr *__restrict msgvec, size_t vlen, int flags,
const struct timespec *__restrict timeout)
{
struct pollfd pfd[1];
size_t i, rcvd;
ssize_t ret;
int res;
short ev;
if (timeout != NULL) {
pfd[0].fd = s;
pfd[0].revents = 0;
pfd[0].events = ev = POLLIN | POLLRDNORM | POLLRDBAND |
POLLPRI;
res = ppoll(&pfd[0], 1, timeout, NULL);
if (res == -1 || res == 0)
return (res);
if (pfd[0].revents & POLLNVAL) {
errno = EBADF;
return (-1);
}
if ((pfd[0].revents & ev) == 0) {
errno = ETIMEDOUT;
return (-1);
}
}
ret = __sys_recvmsg(s, &msgvec[0].msg_hdr, flags);
if (ret == -1)
return (ret);
/*
* Do non-blocking receive for second and later messages if
* WAITFORONE is set.
*/
if (flags & MSG_WAITFORONE)
flags |= MSG_DONTWAIT;
rcvd = 1;
for (i = rcvd; i < vlen; i++, rcvd++) {
ret = __sys_recvmsg(s, &msgvec[i].msg_hdr, flags);
if (ret == -1) {
/* We have received messages. Let caller know
* about the data received, socket
* error is returned on next
* invocation.
*/
return (rcvd);
}
/* Save received bytes. */
msgvec[i].msg_len = ret;
}
return (rcvd);
}

64
lib/libc/gen/sendmmsg.c Normal file
View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2016 Boris Astardzhiev, Smartcom-Bulgaria AD
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice(s), this list of conditions and the following disclaimer as
* the first lines of this file unmodified other than the possible
* addition of one or more copyright notices.
* 2. Redistributions in binary form must reproduce the above copyright
* notice(s), this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/socket.h>
#include "libc_private.h"
ssize_t
sendmmsg(int s, struct mmsghdr *__restrict msgvec, size_t vlen, int flags)
{
size_t i, sent;
ssize_t ret;
sent = 0;
for (i = 0; i < vlen; i++, sent++) {
ret = __sys_sendmsg(s, &msgvec[i].msg_hdr, flags);
if (ret == -1) {
if (sent != 0) {
/*
* We have sent messages. Let caller
* know about the data sent, socket
* error is returned on next
* invocation.
*/
return (sent);
}
return (ret);
}
/* Save sent bytes. */
msgvec[i].msg_len = ret;
}
return (sent);
}

View File

@ -208,6 +208,7 @@
#define readv _readv
#define recvfrom _recvfrom
#define recvmsg _recvmsg
#define recvmmsg _recvmmsg
#define select _select
#define sem_close _sem_close
#define sem_destroy _sem_destroy
@ -220,6 +221,7 @@
#define sem_unlink _sem_unlink
#define sem_wait _sem_wait
#define sendmsg _sendmsg
#define sendmmsg _sendmmsg
#define sendto _sendto
#define setsockopt _setsockopt
/*#define sigaction _sigaction*/

View File

@ -189,6 +189,7 @@
#undef readv
#undef recvfrom
#undef recvmsg
#undef recvmmsg
#undef select
#undef sem_close
#undef sem_destroy
@ -201,6 +202,7 @@
#undef sem_unlink
#undef sem_wait
#undef sendmsg
#undef sendmmsg
#undef sendto
#undef setsockopt
#undef sigaction

View File

@ -399,6 +399,8 @@ FBSD_1.4 {
utimensat;
numa_setaffinity;
numa_getaffinity;
sendmmsg;
recvmmsg;
};
FBSDprivate_1.0 {

View File

@ -28,14 +28,15 @@
.\" @(#)recv.2 8.3 (Berkeley) 2/21/94
.\" $FreeBSD$
.\"
.Dd October 15, 2014
.Dd January 29, 2016
.Dt RECV 2
.Os
.Sh NAME
.Nm recv ,
.Nm recvfrom ,
.Nm recvmsg
.Nd receive a message from a socket
.Nm recvmsg ,
.Nm recvmmsg
.Nd receive message(s) from a socket
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
@ -47,11 +48,14 @@
.Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr * restrict from" "socklen_t * restrict fromlen"
.Ft ssize_t
.Fn recvmsg "int s" "struct msghdr *msg" "int flags"
.Ft ssize_t
.Fn recvmmsg "int s" "struct mmsghdr * restrict msgvec" "size_t vlen" "int flags" "const struct timespec * restrict timeout"
.Sh DESCRIPTION
The
.Fn recvfrom
.Fn recvfrom ,
.Fn recvmsg ,
and
.Fn recvmsg
.Fn recvmmsg
system calls
are used to receive messages from a socket,
and may be used to receive data on a socket whether or not
@ -84,8 +88,39 @@ null pointer passed as its
.Fa from
argument.
.Pp
All three routines return the length of the message on successful
completion.
The
.Fn recvmmsg
function is used to receive multiple
messages at a call.
Their number is supplied by
.Fa vlen .
The messages are placed in the buffers described by
.Fa msgvec
vector, after reception.
The size of each received message is placed in the
.Fa msg_len
field of each element of the vector.
If
.Fa timeout
is NULL the call blocks until the data is available for each
supplied message buffer.
Otherwise it waits for data for the specified amount of time.
If the timeout expired and there is no data received,
a value 0 is returned.
The
.Xr ppoll 2
system call is used to implement the timeout mechanism,
before first receive is performed.
.Pp
The
.Fn recv ,
.Fn recvfrom
and
.Fn recvmsg
return the length of the message on successful
completion, whereas
.Fn recvmmsg
returns the number of received messages.
If a message is too long to fit in the supplied buffer,
excess bytes may be discarded depending on the type of socket
the message is received from (see
@ -100,7 +135,9 @@ in which case the value
.Va errno
is set to
.Er EAGAIN .
The receive calls normally return any data available,
The receive calls except
.Fn recvmmsg
normally return any data available,
up to the requested amount,
rather than waiting for receipt of the full amount requested;
this behavior is affected by the socket-level options
@ -109,6 +146,9 @@ and
.Dv SO_RCVTIMEO
described in
.Xr getsockopt 2 .
The
.Fn recvmmsg
function implements this behaviour for each message in the vector.
.Pp
The
.Xr select 2
@ -127,6 +167,10 @@ one or more of the values:
.It Dv MSG_WAITALL Ta wait for full request or error
.It Dv MSG_DONTWAIT Ta do not block
.It Dv MSG_CMSG_CLOEXEC Ta set received fds close-on-exec
.It Dv MSG_WAITFORONE Ta do not block after receiving the first message
(only for
.Fn recvmmsg
)
.El
.Pp
The
@ -158,6 +202,11 @@ is set to
This flag is not available in strict
.Tn ANSI
or C99 compilation mode.
The
.Dv MSG_WAITFORONE
flag sets MSG_DONTWAIT after the first message has been received.
This flag is only relevant for
.Fn recvmmsg .
.Pp
The
.Fn recvmsg
@ -290,9 +339,31 @@ control data were discarded due to lack of space in the buffer
for ancillary data.
.Dv MSG_OOB
is returned to indicate that expedited or out-of-band data were received.
.Pp
The
.Fn recvmmsg
system call uses the
.Fa mmsghdr
structure, defined as follows in the
.In sys/socket.h
header :
.Bd -literal
struct mmsghdr {
struct msghdr msg_hdr; /* message header */
ssize_t msg_len; /* message length */
};
.Ed
.Pp
On data reception the
.Fa msg_len
field is updated to the length of the received message.
.Sh RETURN VALUES
These calls return the number of bytes received, or -1
if an error occurred.
These calls except
.Fn recvmmsg
return the number of bytes received.
.Fn recvmmsg
returns the number of messages received.
A value of -1 is returned if an error occurred.
.Sh ERRORS
The calls fail if:
.Bl -tag -width Er
@ -347,3 +418,7 @@ The
.Fn recv
function appeared in
.Bx 4.2 .
The
.Fn recvmmsg
function appeared in
.Fx 11.0 .

View File

@ -28,14 +28,15 @@
.\" From: @(#)send.2 8.2 (Berkeley) 2/21/94
.\" $FreeBSD$
.\"
.Dd February 5, 2009
.Dd January 29, 2016
.Dt SEND 2
.Os
.Sh NAME
.Nm send ,
.Nm sendto ,
.Nm sendmsg
.Nd send a message from a socket
.Nm sendmsg ,
.Nm sendmmsg
.Nd send message(s) from a socket
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
@ -47,25 +48,33 @@
.Fn sendto "int s" "const void *msg" "size_t len" "int flags" "const struct sockaddr *to" "socklen_t tolen"
.Ft ssize_t
.Fn sendmsg "int s" "const struct msghdr *msg" "int flags"
.Ft ssize_t
.Fn sendmmsg "int s" "struct mmsghdr * restrict msgvec" "size_t vlen" "int flags"
.Sh DESCRIPTION
The
.Fn send
function,
and
.Fn sendmmsg
functions,
and
.Fn sendto
and
.Fn sendmsg
system calls
are used to transmit a message to another socket.
are used to transmit one or more messages (with the
.Fn sendmmsg
call) to
another socket.
The
.Fn send
function
may be used only when the socket is in a
.Em connected
state, while
.Fn sendto
and
.Fn sendto ,
.Fn sendmsg
and
.Fn sendmmsg
may be used at any time.
.Pp
The address of the target is given by
@ -81,6 +90,18 @@ underlying protocol, the error
is returned, and
the message is not transmitted.
.Pp
The
.Fn sendmmsg
function sends multiple messages at a call.
They are given by the
.Fa msgvec
vector along with
.Fa vlen
specifying the vector size.
The number of octets sent per each message is placed in the
.Fa msg_len
field of each processed element of the vector after transmission.
.Pp
No indication of failure to deliver is implicit in a
.Fn send .
Locally detected errors are indicated by a return value of -1.
@ -138,14 +159,27 @@ See
.Xr recv 2
for a description of the
.Fa msghdr
structure and the
.Fa mmsghdr
structure.
.Sh RETURN VALUES
The call returns the number of characters sent, or -1
if an error occurred.
The
.Fn send ,
.Fn sendto
and
.Fn sendmsg
calls
return the number of octets sent.
The
.Fn sendmmsg
call returns the number of messages sent.
If an error occurred a value of -1 is returned.
.Sh ERRORS
The
.Fn send
function and
and
.Fn sendmmsg
functions and
.Fn sendto
and
.Fn sendmsg
@ -215,6 +249,10 @@ The
.Fn send
function appeared in
.Bx 4.2 .
The
.Fn sendmmsg
function appeared in
.Fx 11.0 .
.Sh BUGS
Because
.Fn sendmsg

View File

@ -431,6 +431,7 @@ struct msghdr {
#define MSG_NBIO 0x4000 /* FIONBIO mode, used by fifofs */
#define MSG_COMPAT 0x8000 /* used in sendit() */
#define MSG_CMSG_CLOEXEC 0x40000 /* make received fds close-on-exec */
#define MSG_WAITFORONE 0x80000 /* for recvmmsg() */
#endif
#ifdef _KERNEL
#define MSG_SOCALLBCK 0x10000 /* for use by socket callbacks - soreceive (TCP) */
@ -596,6 +597,14 @@ struct sf_hdtr {
#define SFK_COMPAT 0x00000001
#define SF_READAHEAD(flags) ((flags) >> 16)
#endif /* _KERNEL */
/*
* Sendmmsg/recvmmsg specific structure(s)
*/
struct mmsghdr {
struct msghdr msg_hdr; /* message header */
ssize_t msg_len; /* message length */
};
#endif /* __BSD_VISIBLE */
#ifndef _KERNEL
@ -618,12 +627,18 @@ int listen(int, int);
ssize_t recv(int, void *, size_t, int);
ssize_t recvfrom(int, void *, size_t, int, struct sockaddr * __restrict, socklen_t * __restrict);
ssize_t recvmsg(int, struct msghdr *, int);
#if __BSD_VISIBLE
struct timespec;
ssize_t recvmmsg(int, struct mmsghdr * __restrict, size_t, int,
const struct timespec * __restrict);
#endif
ssize_t send(int, const void *, size_t, int);
ssize_t sendto(int, const void *,
size_t, int, const struct sockaddr *, socklen_t);
ssize_t sendmsg(int, const struct msghdr *, int);
#if __BSD_VISIBLE
int sendfile(int, int, off_t, size_t, struct sf_hdtr *, off_t *, int);
ssize_t sendmmsg(int, struct mmsghdr * __restrict, size_t, int);
int setfib(int);
#endif
int setsockopt(int, int, int, const void *, socklen_t);