From d47a1edbd9f55e958baf4618439633778ceb8ac8 Mon Sep 17 00:00:00 2001 From: markj Date: Wed, 21 Aug 2019 19:35:04 +0000 Subject: [PATCH] Modify pipe_poll() to properly check for pending direct writes. With r349546, it is a responsibility of the writer to clear PIPE_DIRECTW after pinned data has been read. In particular, once a reader has drained this data, there is a small window where the pipe is empty but PIPE_DIRECTW is set. pipe_poll() was using the presence of PIPE_DIRECTW to determine whether to return POLLIN, so in this window it would claim that data was available to read when this was not the case. Fix this by modifying several checks for PIPE_DIRECTW to instead look at the number of residual bytes in data pinned by a direct writer. In some cases we really do want to check for PIPE_DIRECTW, since the presence of this flag indicates that any attempt to write to the pipe will block on the existing direct writer. Bisected and test case provided by: mav Tested by: pho Reviewed by: kib MFC after: 3 days Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D21333 --- sys/kern/sys_pipe.c | 52 +++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index e516e277ee18..df0d027502c9 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -709,11 +709,9 @@ pipe_read(struct file *fp, struct uio *uio, struct ucred *active_cred, /* * Direct copy, bypassing a kernel buffer. */ - } else if ((size = rpipe->pipe_map.cnt) && - (rpipe->pipe_state & PIPE_DIRECTW)) { + } else if ((size = rpipe->pipe_map.cnt) != 0) { if (size > uio->uio_resid) size = (u_int) uio->uio_resid; - PIPE_UNLOCK(rpipe); error = uiomove_fromphys(rpipe->pipe_map.ms, rpipe->pipe_map.pos, size, uio); @@ -819,32 +817,33 @@ pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio) u_int size; int i; - PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); - KASSERT(wpipe->pipe_state & PIPE_DIRECTW, - ("Clone attempt on non-direct write pipe!")); + PIPE_LOCK_ASSERT(wpipe, MA_OWNED); + KASSERT((wpipe->pipe_state & PIPE_DIRECTW) == 0, + ("%s: PIPE_DIRECTW set on %p", __func__, wpipe)); + KASSERT(wpipe->pipe_map.cnt == 0, + ("%s: pipe map for %p contains residual data", __func__, wpipe)); if (uio->uio_iov->iov_len > wpipe->pipe_buffer.size) size = wpipe->pipe_buffer.size; else size = uio->uio_iov->iov_len; - if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, + wpipe->pipe_state |= PIPE_DIRECTW; + PIPE_UNLOCK(wpipe); + i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ, - wpipe->pipe_map.ms, PIPENPAGES)) < 0) + wpipe->pipe_map.ms, PIPENPAGES); + PIPE_LOCK(wpipe); + if (i < 0) { + wpipe->pipe_state &= ~PIPE_DIRECTW; return (EFAULT); + } -/* - * set up the control block - */ wpipe->pipe_map.npages = i; wpipe->pipe_map.pos = ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; wpipe->pipe_map.cnt = size; -/* - * and update the uio data - */ - uio->uio_iov->iov_len -= size; uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; if (uio->uio_iov->iov_len == 0) @@ -864,6 +863,8 @@ pipe_destroy_write_buffer(struct pipe *wpipe) PIPE_LOCK_ASSERT(wpipe, MA_OWNED); KASSERT((wpipe->pipe_state & PIPE_DIRECTW) != 0, ("%s: PIPE_DIRECTW not set on %p", __func__, wpipe)); + KASSERT(wpipe->pipe_map.cnt == 0, + ("%s: pipe map for %p contains residual data", __func__, wpipe)); wpipe->pipe_state &= ~PIPE_DIRECTW; vm_page_unhold_pages(wpipe->pipe_map.ms, wpipe->pipe_map.npages); @@ -889,6 +890,7 @@ pipe_clone_write_buffer(struct pipe *wpipe) size = wpipe->pipe_map.cnt; pos = wpipe->pipe_map.pos; + wpipe->pipe_map.cnt = 0; wpipe->pipe_buffer.in = size; wpipe->pipe_buffer.out = 0; @@ -946,7 +948,6 @@ pipe_direct_write(struct pipe *wpipe, struct uio *uio) else goto retry; } - wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ if (wpipe->pipe_buffer.cnt > 0) { if (wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; @@ -963,19 +964,15 @@ pipe_direct_write(struct pipe *wpipe, struct uio *uio) goto retry; } - wpipe->pipe_state |= PIPE_DIRECTW; - - PIPE_UNLOCK(wpipe); error = pipe_build_write_buffer(wpipe, uio); - PIPE_LOCK(wpipe); if (error) { - wpipe->pipe_state &= ~PIPE_DIRECTW; pipeunlock(wpipe); goto error1; } while (wpipe->pipe_map.cnt != 0) { if (wpipe->pipe_state & PIPE_EOF) { + wpipe->pipe_map.cnt = 0; pipe_destroy_write_buffer(wpipe); pipeselwakeup(wpipe); pipeunlock(wpipe); @@ -1129,7 +1126,7 @@ pipe_write(struct file *fp, struct uio *uio, struct ucred *active_cred, * pipe buffer. We break out if a signal occurs or the * reader goes away. */ - if (wpipe->pipe_state & PIPE_DIRECTW) { + if (wpipe->pipe_map.cnt != 0) { if (wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; wakeup(wpipe); @@ -1347,7 +1344,7 @@ pipe_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, PIPE_UNLOCK(mpipe); return (0); } - if (mpipe->pipe_state & PIPE_DIRECTW) + if (mpipe->pipe_map.cnt != 0) *(int *)data = mpipe->pipe_map.cnt; else *(int *)data = mpipe->pipe_buffer.cnt; @@ -1403,14 +1400,13 @@ pipe_poll(struct file *fp, int events, struct ucred *active_cred, goto locked_error; #endif if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) - if ((rpipe->pipe_state & PIPE_DIRECTW) || - (rpipe->pipe_buffer.cnt > 0)) + if (rpipe->pipe_map.cnt > 0 || rpipe->pipe_buffer.cnt > 0) revents |= events & (POLLIN | POLLRDNORM); if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) if (wpipe->pipe_present != PIPE_ACTIVE || (wpipe->pipe_state & PIPE_EOF) || - (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && + ((wpipe->pipe_state & PIPE_DIRECTW) == 0 && ((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF || wpipe->pipe_buffer.size == 0))) revents |= events & (POLLOUT | POLLWRNORM); @@ -1485,7 +1481,7 @@ pipe_stat(struct file *fp, struct stat *ub, struct ucred *active_cred, bzero(ub, sizeof(*ub)); ub->st_mode = S_IFIFO; ub->st_blksize = PAGE_SIZE; - if (pipe->pipe_state & PIPE_DIRECTW) + if (pipe->pipe_map.cnt != 0) ub->st_size = pipe->pipe_map.cnt; else ub->st_size = pipe->pipe_buffer.cnt; @@ -1727,7 +1723,7 @@ filt_piperead(struct knote *kn, long hint) PIPE_LOCK_ASSERT(rpipe, MA_OWNED); kn->kn_data = rpipe->pipe_buffer.cnt; - if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) + if (kn->kn_data == 0) kn->kn_data = rpipe->pipe_map.cnt; if ((rpipe->pipe_state & PIPE_EOF) ||