From bf420ace0a5ebf0e737125511bbd8f94a4d29c1a Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Fri, 29 Jan 2016 14:12:12 +0000 Subject: [PATCH] Add implementations of sendmmsg(3) and recvmmsg(3) functions which wraps sendmsg(2) and recvmsg(2) into batch send and receive operation. The goal of this implementation is only to provide API compatibility with Linux. The cancellation behaviour of the functions is not quite right, but due to relative rare use of cancellation it is considered acceptable comparing with the complexity of the correct implementation. If functions are reimplemented as syscalls, the fix would come almost trivial. The direct use of the syscall trampolines instead of libc wrappers for sendmsg(2) and recvmsg(2) is to avoid data loss on cancellation. Submitted by: Boris Astardzhiev Discussed with: jilles (cancellation behaviour) MFC after: 1 month --- lib/libc/gen/Makefile.inc | 4 ++ lib/libc/gen/recvmmsg.c | 96 +++++++++++++++++++++++++++++++++ lib/libc/gen/sendmmsg.c | 64 ++++++++++++++++++++++ lib/libc/include/namespace.h | 2 + lib/libc/include/un-namespace.h | 2 + lib/libc/sys/Symbol.map | 2 + lib/libc/sys/recv.2 | 95 ++++++++++++++++++++++++++++---- lib/libc/sys/send.2 | 58 ++++++++++++++++---- sys/sys/socket.h | 15 ++++++ 9 files changed, 318 insertions(+), 20 deletions(-) create mode 100644 lib/libc/gen/recvmmsg.c create mode 100644 lib/libc/gen/sendmmsg.c diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc index b44846131f45..7de8ce33d647 100644 --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -99,11 +99,13 @@ SRCS+= __getosreldate.c \ raise.c \ readdir.c \ readpassphrase.c \ + recvmmsg.c \ rewinddir.c \ scandir.c \ seed48.c \ seekdir.c \ semctl.c \ + sendmmsg.c \ setdomainname.c \ sethostname.c \ setjmperr.c \ @@ -451,10 +453,12 @@ MLINKS+=rand48.3 _rand48.3 \ rand48.3 nrand48.3 \ rand48.3 seed48.3 \ rand48.3 srand48.3 +MLINKS+=recv.2 recvmmsg.2 MLINKS+=scandir.3 alphasort.3 MLINKS+=sem_open.3 sem_close.3 \ sem_open.3 sem_unlink.3 MLINKS+=sem_wait.3 sem_trywait.3 +MLINKS+=send.2 sendmmsg.2 MLINKS+=setjmp.3 _longjmp.3 \ setjmp.3 _setjmp.3 \ setjmp.3 longjmp.3 \ diff --git a/lib/libc/gen/recvmmsg.c b/lib/libc/gen/recvmmsg.c new file mode 100644 index 000000000000..0db9e1acab17 --- /dev/null +++ b/lib/libc/gen/recvmmsg.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016 Boris Astardzhiev, Smartcom-Bulgaria AD + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice(s), this list of conditions and the following disclaimer as + * the first lines of this file unmodified other than the possible + * addition of one or more copyright notices. + * 2. Redistributions in binary form must reproduce the above copyright + * notice(s), this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include "libc_private.h" + +ssize_t +recvmmsg(int s, struct mmsghdr *__restrict msgvec, size_t vlen, int flags, + const struct timespec *__restrict timeout) +{ + struct pollfd pfd[1]; + size_t i, rcvd; + ssize_t ret; + int res; + short ev; + + if (timeout != NULL) { + pfd[0].fd = s; + pfd[0].revents = 0; + pfd[0].events = ev = POLLIN | POLLRDNORM | POLLRDBAND | + POLLPRI; + res = ppoll(&pfd[0], 1, timeout, NULL); + if (res == -1 || res == 0) + return (res); + if (pfd[0].revents & POLLNVAL) { + errno = EBADF; + return (-1); + } + if ((pfd[0].revents & ev) == 0) { + errno = ETIMEDOUT; + return (-1); + } + } + + ret = __sys_recvmsg(s, &msgvec[0].msg_hdr, flags); + if (ret == -1) + return (ret); + + /* + * Do non-blocking receive for second and later messages if + * WAITFORONE is set. + */ + if (flags & MSG_WAITFORONE) + flags |= MSG_DONTWAIT; + + rcvd = 1; + for (i = rcvd; i < vlen; i++, rcvd++) { + ret = __sys_recvmsg(s, &msgvec[i].msg_hdr, flags); + if (ret == -1) { + /* We have received messages. Let caller know + * about the data received, socket + * error is returned on next + * invocation. + */ + return (rcvd); + } + + /* Save received bytes. */ + msgvec[i].msg_len = ret; + } + + return (rcvd); +} diff --git a/lib/libc/gen/sendmmsg.c b/lib/libc/gen/sendmmsg.c new file mode 100644 index 000000000000..7b3e8f357613 --- /dev/null +++ b/lib/libc/gen/sendmmsg.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2016 Boris Astardzhiev, Smartcom-Bulgaria AD + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice(s), this list of conditions and the following disclaimer as + * the first lines of this file unmodified other than the possible + * addition of one or more copyright notices. + * 2. Redistributions in binary form must reproduce the above copyright + * notice(s), this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include "libc_private.h" + +ssize_t +sendmmsg(int s, struct mmsghdr *__restrict msgvec, size_t vlen, int flags) +{ + size_t i, sent; + ssize_t ret; + + sent = 0; + for (i = 0; i < vlen; i++, sent++) { + ret = __sys_sendmsg(s, &msgvec[i].msg_hdr, flags); + if (ret == -1) { + if (sent != 0) { + /* + * We have sent messages. Let caller + * know about the data sent, socket + * error is returned on next + * invocation. + */ + return (sent); + } + return (ret); + } + + /* Save sent bytes. */ + msgvec[i].msg_len = ret; + } + + return (sent); +} diff --git a/lib/libc/include/namespace.h b/lib/libc/include/namespace.h index 739d7b112392..c95829e401f1 100644 --- a/lib/libc/include/namespace.h +++ b/lib/libc/include/namespace.h @@ -208,6 +208,7 @@ #define readv _readv #define recvfrom _recvfrom #define recvmsg _recvmsg +#define recvmmsg _recvmmsg #define select _select #define sem_close _sem_close #define sem_destroy _sem_destroy @@ -220,6 +221,7 @@ #define sem_unlink _sem_unlink #define sem_wait _sem_wait #define sendmsg _sendmsg +#define sendmmsg _sendmmsg #define sendto _sendto #define setsockopt _setsockopt /*#define sigaction _sigaction*/ diff --git a/lib/libc/include/un-namespace.h b/lib/libc/include/un-namespace.h index f31fa7ade9e7..023334858572 100644 --- a/lib/libc/include/un-namespace.h +++ b/lib/libc/include/un-namespace.h @@ -189,6 +189,7 @@ #undef readv #undef recvfrom #undef recvmsg +#undef recvmmsg #undef select #undef sem_close #undef sem_destroy @@ -201,6 +202,7 @@ #undef sem_unlink #undef sem_wait #undef sendmsg +#undef sendmmsg #undef sendto #undef setsockopt #undef sigaction diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map index 7b3257c6a812..dc2ed0ed6e57 100644 --- a/lib/libc/sys/Symbol.map +++ b/lib/libc/sys/Symbol.map @@ -399,6 +399,8 @@ FBSD_1.4 { utimensat; numa_setaffinity; numa_getaffinity; + sendmmsg; + recvmmsg; }; FBSDprivate_1.0 { diff --git a/lib/libc/sys/recv.2 b/lib/libc/sys/recv.2 index 326e7ffd130a..4867ea77e3e5 100644 --- a/lib/libc/sys/recv.2 +++ b/lib/libc/sys/recv.2 @@ -28,14 +28,15 @@ .\" @(#)recv.2 8.3 (Berkeley) 2/21/94 .\" $FreeBSD$ .\" -.Dd October 15, 2014 +.Dd January 29, 2016 .Dt RECV 2 .Os .Sh NAME .Nm recv , .Nm recvfrom , -.Nm recvmsg -.Nd receive a message from a socket +.Nm recvmsg , +.Nm recvmmsg +.Nd receive message(s) from a socket .Sh LIBRARY .Lb libc .Sh SYNOPSIS @@ -47,11 +48,14 @@ .Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr * restrict from" "socklen_t * restrict fromlen" .Ft ssize_t .Fn recvmsg "int s" "struct msghdr *msg" "int flags" +.Ft ssize_t +.Fn recvmmsg "int s" "struct mmsghdr * restrict msgvec" "size_t vlen" "int flags" "const struct timespec * restrict timeout" .Sh DESCRIPTION The -.Fn recvfrom +.Fn recvfrom , +.Fn recvmsg , and -.Fn recvmsg +.Fn recvmmsg system calls are used to receive messages from a socket, and may be used to receive data on a socket whether or not @@ -84,8 +88,39 @@ null pointer passed as its .Fa from argument. .Pp -All three routines return the length of the message on successful -completion. +The +.Fn recvmmsg +function is used to receive multiple +messages at a call. +Their number is supplied by +.Fa vlen . +The messages are placed in the buffers described by +.Fa msgvec +vector, after reception. +The size of each received message is placed in the +.Fa msg_len +field of each element of the vector. +If +.Fa timeout +is NULL the call blocks until the data is available for each +supplied message buffer. +Otherwise it waits for data for the specified amount of time. +If the timeout expired and there is no data received, +a value 0 is returned. +The +.Xr ppoll 2 +system call is used to implement the timeout mechanism, +before first receive is performed. +.Pp +The +.Fn recv , +.Fn recvfrom +and +.Fn recvmsg +return the length of the message on successful +completion, whereas +.Fn recvmmsg +returns the number of received messages. If a message is too long to fit in the supplied buffer, excess bytes may be discarded depending on the type of socket the message is received from (see @@ -100,7 +135,9 @@ in which case the value .Va errno is set to .Er EAGAIN . -The receive calls normally return any data available, +The receive calls except +.Fn recvmmsg +normally return any data available, up to the requested amount, rather than waiting for receipt of the full amount requested; this behavior is affected by the socket-level options @@ -109,6 +146,9 @@ and .Dv SO_RCVTIMEO described in .Xr getsockopt 2 . +The +.Fn recvmmsg +function implements this behaviour for each message in the vector. .Pp The .Xr select 2 @@ -127,6 +167,10 @@ one or more of the values: .It Dv MSG_WAITALL Ta wait for full request or error .It Dv MSG_DONTWAIT Ta do not block .It Dv MSG_CMSG_CLOEXEC Ta set received fds close-on-exec +.It Dv MSG_WAITFORONE Ta do not block after receiving the first message +(only for +.Fn recvmmsg +) .El .Pp The @@ -158,6 +202,11 @@ is set to This flag is not available in strict .Tn ANSI or C99 compilation mode. +The +.Dv MSG_WAITFORONE +flag sets MSG_DONTWAIT after the first message has been received. +This flag is only relevant for +.Fn recvmmsg . .Pp The .Fn recvmsg @@ -290,9 +339,31 @@ control data were discarded due to lack of space in the buffer for ancillary data. .Dv MSG_OOB is returned to indicate that expedited or out-of-band data were received. +.Pp +The +.Fn recvmmsg +system call uses the +.Fa mmsghdr +structure, defined as follows in the +.In sys/socket.h +header : +.Bd -literal +struct mmsghdr { + struct msghdr msg_hdr; /* message header */ + ssize_t msg_len; /* message length */ +}; +.Ed +.Pp +On data reception the +.Fa msg_len +field is updated to the length of the received message. .Sh RETURN VALUES -These calls return the number of bytes received, or -1 -if an error occurred. +These calls except +.Fn recvmmsg +return the number of bytes received. +.Fn recvmmsg +returns the number of messages received. +A value of -1 is returned if an error occurred. .Sh ERRORS The calls fail if: .Bl -tag -width Er @@ -347,3 +418,7 @@ The .Fn recv function appeared in .Bx 4.2 . +The +.Fn recvmmsg +function appeared in +.Fx 11.0 . diff --git a/lib/libc/sys/send.2 b/lib/libc/sys/send.2 index 8fa2c64354d0..db2f4ee2a880 100644 --- a/lib/libc/sys/send.2 +++ b/lib/libc/sys/send.2 @@ -28,14 +28,15 @@ .\" From: @(#)send.2 8.2 (Berkeley) 2/21/94 .\" $FreeBSD$ .\" -.Dd February 5, 2009 +.Dd January 29, 2016 .Dt SEND 2 .Os .Sh NAME .Nm send , .Nm sendto , -.Nm sendmsg -.Nd send a message from a socket +.Nm sendmsg , +.Nm sendmmsg +.Nd send message(s) from a socket .Sh LIBRARY .Lb libc .Sh SYNOPSIS @@ -47,25 +48,33 @@ .Fn sendto "int s" "const void *msg" "size_t len" "int flags" "const struct sockaddr *to" "socklen_t tolen" .Ft ssize_t .Fn sendmsg "int s" "const struct msghdr *msg" "int flags" +.Ft ssize_t +.Fn sendmmsg "int s" "struct mmsghdr * restrict msgvec" "size_t vlen" "int flags" .Sh DESCRIPTION The .Fn send -function, +and +.Fn sendmmsg +functions, and .Fn sendto and .Fn sendmsg system calls -are used to transmit a message to another socket. +are used to transmit one or more messages (with the +.Fn sendmmsg +call) to +another socket. The .Fn send function may be used only when the socket is in a .Em connected state, while -.Fn sendto -and +.Fn sendto , .Fn sendmsg +and +.Fn sendmmsg may be used at any time. .Pp The address of the target is given by @@ -81,6 +90,18 @@ underlying protocol, the error is returned, and the message is not transmitted. .Pp +The +.Fn sendmmsg +function sends multiple messages at a call. +They are given by the +.Fa msgvec +vector along with +.Fa vlen +specifying the vector size. +The number of octets sent per each message is placed in the +.Fa msg_len +field of each processed element of the vector after transmission. +.Pp No indication of failure to deliver is implicit in a .Fn send . Locally detected errors are indicated by a return value of -1. @@ -138,14 +159,27 @@ See .Xr recv 2 for a description of the .Fa msghdr +structure and the +.Fa mmsghdr structure. .Sh RETURN VALUES -The call returns the number of characters sent, or -1 -if an error occurred. +The +.Fn send , +.Fn sendto +and +.Fn sendmsg +calls +return the number of octets sent. +The +.Fn sendmmsg +call returns the number of messages sent. +If an error occurred a value of -1 is returned. .Sh ERRORS The .Fn send -function and +and +.Fn sendmmsg +functions and .Fn sendto and .Fn sendmsg @@ -215,6 +249,10 @@ The .Fn send function appeared in .Bx 4.2 . +The +.Fn sendmmsg +function appeared in +.Fx 11.0 . .Sh BUGS Because .Fn sendmsg diff --git a/sys/sys/socket.h b/sys/sys/socket.h index ee621d9dd1e8..c20b07522c97 100644 --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -431,6 +431,7 @@ struct msghdr { #define MSG_NBIO 0x4000 /* FIONBIO mode, used by fifofs */ #define MSG_COMPAT 0x8000 /* used in sendit() */ #define MSG_CMSG_CLOEXEC 0x40000 /* make received fds close-on-exec */ +#define MSG_WAITFORONE 0x80000 /* for recvmmsg() */ #endif #ifdef _KERNEL #define MSG_SOCALLBCK 0x10000 /* for use by socket callbacks - soreceive (TCP) */ @@ -596,6 +597,14 @@ struct sf_hdtr { #define SFK_COMPAT 0x00000001 #define SF_READAHEAD(flags) ((flags) >> 16) #endif /* _KERNEL */ + +/* + * Sendmmsg/recvmmsg specific structure(s) + */ +struct mmsghdr { + struct msghdr msg_hdr; /* message header */ + ssize_t msg_len; /* message length */ +}; #endif /* __BSD_VISIBLE */ #ifndef _KERNEL @@ -618,12 +627,18 @@ int listen(int, int); ssize_t recv(int, void *, size_t, int); ssize_t recvfrom(int, void *, size_t, int, struct sockaddr * __restrict, socklen_t * __restrict); ssize_t recvmsg(int, struct msghdr *, int); +#if __BSD_VISIBLE +struct timespec; +ssize_t recvmmsg(int, struct mmsghdr * __restrict, size_t, int, + const struct timespec * __restrict); +#endif ssize_t send(int, const void *, size_t, int); ssize_t sendto(int, const void *, size_t, int, const struct sockaddr *, socklen_t); ssize_t sendmsg(int, const struct msghdr *, int); #if __BSD_VISIBLE int sendfile(int, int, off_t, size_t, struct sf_hdtr *, off_t *, int); +ssize_t sendmmsg(int, struct mmsghdr * __restrict, size_t, int); int setfib(int); #endif int setsockopt(int, int, int, const void *, socklen_t);