From 3aaaa2efde896e19d229ee2cf09fe7e6ab0fbf6e Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Wed, 28 Apr 2021 21:31:38 +1200 Subject: [PATCH] poll(2): Add POLLRDHUP. Teach poll(2) to support Linux-style POLLRDHUP events for sockets, if requested. Triggered when the remote peer shuts down writing or closes its end. Reviewed by: kib MFC after: 1 month Differential Revision: https://reviews.freebsd.org/D29757 --- lib/libc/sys/poll.2 | 14 ++- sys/kern/uipc_socket.c | 4 +- sys/sys/poll.h | 1 + tests/sys/netinet/socket_afinet.c | 141 ++++++++++++++++++++++++++++++ usr.bin/truss/syscalls.c | 2 +- 5 files changed, 159 insertions(+), 3 deletions(-) diff --git a/lib/libc/sys/poll.2 b/lib/libc/sys/poll.2 index bea4aac82bd3..fec82db08944 100644 --- a/lib/libc/sys/poll.2 +++ b/lib/libc/sys/poll.2 @@ -28,7 +28,7 @@ .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" -.Dd February 27, 2019 +.Dd April 27, 2021 .Dt POLL 2 .Os .Sh NAME @@ -126,6 +126,15 @@ POLLOUT should never be present in the .Fa revents bitmask at the same time. +.It POLLRDHUP +Remote peer closed connection, or shut down writing. +Unlike +POLLHUP, +POLLRDHUP +must be present in the +.Fa events +bitmask to be reported. +Applies only to stream sockets. .It POLLNVAL The file descriptor is not open, or in capability mode the file descriptor has insufficient rights. @@ -261,6 +270,9 @@ function conforms to The .Fn ppoll is not specified by POSIX. +The +POLLRDHUP +flag is not specified by POSIX, but is compatible with Linux and illumos. .Sh HISTORY The .Fn poll diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 92a204aafef0..ae678136bade 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -3571,9 +3571,11 @@ sopoll_generic(struct socket *so, int events, struct ucred *active_cred, revents |= POLLHUP; } } + if (so->so_rcv.sb_state & SBS_CANTRCVMORE) + revents |= events & POLLRDHUP; if (revents == 0) { if (events & - (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { + (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND | POLLRDHUP)) { selrecord(td, &so->so_rdsel); so->so_rcv.sb_flags |= SB_SEL; } diff --git a/sys/sys/poll.h b/sys/sys/poll.h index 6805704ca39f..02347ed09a13 100644 --- a/sys/sys/poll.h +++ b/sys/sys/poll.h @@ -71,6 +71,7 @@ struct pollfd { #if __BSD_VISIBLE /* General FreeBSD extension (currently only supported for sockets): */ #define POLLINIGNEOF 0x2000 /* like POLLIN, except ignore EOF */ +#define POLLRDHUP 0x4000 /* half shut down */ #endif /* diff --git a/tests/sys/netinet/socket_afinet.c b/tests/sys/netinet/socket_afinet.c index 54585086da23..985d67d83c99 100644 --- a/tests/sys/netinet/socket_afinet.c +++ b/tests/sys/netinet/socket_afinet.c @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include @@ -89,12 +90,152 @@ ATF_TC_BODY(socket_afinet_bind_ok, tc) close(sd); } +ATF_TC_WITHOUT_HEAD(socket_afinet_poll_no_rdhup); +ATF_TC_BODY(socket_afinet_poll_no_rdhup, tc) +{ + int ss, ss2, cs, rc; + struct sockaddr_in sin; + struct pollfd pfd; + int one = 1; + + /* Verify that we don't expose POLLRDHUP if not requested. */ + + /* Server setup. */ + ss = socket(PF_INET, SOCK_STREAM, 0); + ATF_CHECK(ss >= 0); + rc = setsockopt(ss, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + ATF_CHECK_EQ(0, rc); + bzero(&sin, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + sin.sin_port = htons(6666); + sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + rc = bind(ss, (struct sockaddr *)&sin, sizeof(sin)); + ATF_CHECK_EQ(0, rc); + rc = listen(ss, 1); + ATF_CHECK_EQ(0, rc); + + /* Client connects, server accepts. */ + cs = socket(PF_INET, SOCK_STREAM, 0); + ATF_CHECK(cs >= 0); + rc = connect(cs, (struct sockaddr *)&sin, sizeof(sin)); + ATF_CHECK_EQ(0, rc); + ss2 = accept(ss, NULL, NULL); + ATF_CHECK(ss2 >= 0); + + /* Server can write, sees only POLLOUT. */ + pfd.fd = ss2; + pfd.events = POLLIN | POLLOUT; + rc = poll(&pfd, 1, 0); + ATF_CHECK_EQ(1, rc); + ATF_CHECK_EQ(POLLOUT, pfd.revents); + + /* Client closes socket! */ + rc = close(cs); + ATF_CHECK_EQ(0, rc); + + /* + * Server now sees POLLIN, but not POLLRDHUP because we didn't ask. + * Need non-zero timeout to wait for the FIN to arrive and trigger the + * socket to become readable. + */ + pfd.fd = ss2; + pfd.events = POLLIN; + rc = poll(&pfd, 1, 60000); + ATF_CHECK_EQ(1, rc); + ATF_CHECK_EQ(POLLIN, pfd.revents); + + close(ss2); + close(ss); +} + +ATF_TC_WITHOUT_HEAD(socket_afinet_poll_rdhup); +ATF_TC_BODY(socket_afinet_poll_rdhup, tc) +{ + int ss, ss2, cs, rc; + struct sockaddr_in sin; + struct pollfd pfd; + char buffer; + int one = 1; + + /* Verify that server sees POLLRDHUP if it asks for it. */ + + /* Server setup. */ + ss = socket(PF_INET, SOCK_STREAM, 0); + ATF_CHECK(ss >= 0); + rc = setsockopt(ss, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + ATF_CHECK_EQ(0, rc); + bzero(&sin, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + sin.sin_port = htons(6666); + sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + rc = bind(ss, (struct sockaddr *)&sin, sizeof(sin)); + ATF_CHECK_EQ(0, rc); + rc = listen(ss, 1); + ATF_CHECK_EQ(0, rc); + + /* Client connects, server accepts. */ + cs = socket(PF_INET, SOCK_STREAM, 0); + ATF_CHECK(cs >= 0); + rc = connect(cs, (struct sockaddr *)&sin, sizeof(sin)); + ATF_CHECK_EQ(0, rc); + ss2 = accept(ss, NULL, NULL); + ATF_CHECK(ss2 >= 0); + + /* Server can write, so sees POLLOUT. */ + pfd.fd = ss2; + pfd.events = POLLIN | POLLOUT | POLLRDHUP; + rc = poll(&pfd, 1, 0); + ATF_CHECK_EQ(1, rc); + ATF_CHECK_EQ(POLLOUT, pfd.revents); + + /* Client writes two bytes, server reads only one of them. */ + rc = write(cs, "xx", 2); + ATF_CHECK_EQ(2, rc); + rc = read(ss2, &buffer, 1); + ATF_CHECK_EQ(1, rc); + + /* Server can read, so sees POLLIN. */ + pfd.fd = ss2; + pfd.events = POLLIN | POLLOUT | POLLRDHUP; + rc = poll(&pfd, 1, 0); + ATF_CHECK_EQ(1, rc); + ATF_CHECK_EQ(POLLIN | POLLOUT, pfd.revents); + + /* Client closes socket! */ + rc = close(cs); + ATF_CHECK_EQ(0, rc); + + /* + * Server sees Linux-style POLLRDHUP. Note that this is the case even + * though one byte of data remains unread. + * + * This races against the delivery of FIN caused by the close() above. + * Sometimes (more likely when run under truss or if another system + * call is added in between) it hits the path where sopoll_generic() + * immediately sees SBS_CANTRCVMORE, and sometimes it sleeps with flag + * SB_SEL so that it's woken up almost immediately and runs again, + * which is why we need a non-zero timeout here. + */ + pfd.fd = ss2; + pfd.events = POLLRDHUP; + rc = poll(&pfd, 1, 60000); + ATF_CHECK_EQ(1, rc); + ATF_CHECK_EQ(POLLRDHUP, pfd.revents); + + close(ss2); + close(ss); +} + ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, socket_afinet); ATF_TP_ADD_TC(tp, socket_afinet_bind_zero); ATF_TP_ADD_TC(tp, socket_afinet_bind_ok); + ATF_TP_ADD_TC(tp, socket_afinet_poll_no_rdhup); + ATF_TP_ADD_TC(tp, socket_afinet_poll_rdhup); return atf_no_error(); } diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c index eaea3ad96765..798cd299582c 100644 --- a/usr.bin/truss/syscalls.c +++ b/usr.bin/truss/syscalls.c @@ -726,7 +726,7 @@ struct xlat { static struct xlat poll_flags[] = { X(POLLSTANDARD) X(POLLIN) X(POLLPRI) X(POLLOUT) X(POLLERR) X(POLLHUP) X(POLLNVAL) X(POLLRDNORM) X(POLLRDBAND) - X(POLLWRBAND) X(POLLINIGNEOF) XEND + X(POLLWRBAND) X(POLLINIGNEOF) X(POLLRDHUP) XEND }; static struct xlat sigaction_flags[] = {