From b75a1171d8ba4b08c4249e3ab6ce5f0ea446fdf3 Mon Sep 17 00:00:00 2001 From: Poul-Henning Kamp Date: Sun, 3 Feb 2008 15:54:41 +0000 Subject: [PATCH] Give sendfile(2) a SF_SYNC flag which makes it wait until all mbufs referencing the files VM pages are returned from the network stack, making changes to the file safe. This flag does not guarantee that the data has been transmitted to the other end. --- lib/libc/sys/sendfile.2 | 15 +++++++++++++- sys/kern/uipc_syscalls.c | 43 +++++++++++++++++++++++++++++++++++++++- sys/sys/socket.h | 1 + 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/lib/libc/sys/sendfile.2 b/lib/libc/sys/sendfile.2 index 93420f69e27f..322971f0080f 100644 --- a/lib/libc/sys/sendfile.2 +++ b/lib/libc/sys/sendfile.2 @@ -103,7 +103,9 @@ variable pointed to by .Pp The .Fa flags -argument has one possible value: +argument is a bitmap of these values: +.Bl -item -offset indent +.It .Dv SF_NODISKIO . This flag causes any .Fn sendfile @@ -112,6 +114,17 @@ return .Er EBUSY . Busy servers may benefit by transferring requests that would block to a separate I/O worker thread. +.It +.Dv SF_MNOWAIT . +(description missing) +.It +.Dv SF_SYNC , +.Nm +sleeps until the network stack no longer references the VM pages +of the file, making subsequent modifications to it safe. +Please note that this is not a guarantee that the data has actually +been sent. +.El .Pp When using a socket marked for non-blocking I/O, .Fn sendfile diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index beca0627736c..789276e11207 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -1660,6 +1660,14 @@ getsockaddr(namp, uaddr, len) return (error); } +#include + +struct sendfile_sync { + struct mtx mtx; + struct cv cv; + unsigned count; +}; + /* * Detach mapped page and release resources back to the system. */ @@ -1667,6 +1675,7 @@ void sf_buf_mext(void *addr, void *args) { vm_page_t m; + struct sendfile_sync *sfs; m = sf_buf_page(args); sf_buf_free(args); @@ -1680,6 +1689,14 @@ sf_buf_mext(void *addr, void *args) if (m->wire_count == 0 && m->object == NULL) vm_page_free(m); vm_page_unlock_queues(); + if (addr == NULL) + return; + sfs = addr; + mtx_lock(&sfs->mtx); + KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); + if (--sfs->count == 0) + cv_signal(&sfs->cv); + mtx_unlock(&sfs->mtx); } /* @@ -1767,6 +1784,7 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0; int error, hdrlen = 0, mnw = 0; int vfslocked; + struct sendfile_sync *sfs = NULL; /* * The file descriptor must be a regular file and have a @@ -1829,6 +1847,13 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, if (uap->flags & SF_MNOWAIT) mnw = 1; + if (uap->flags & SF_SYNC) { + sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK); + memset(sfs, 0, sizeof *sfs); + mtx_init(&sfs->mtx, "sendfile", MTX_DEF, 0); + cv_init(&sfs->cv, "sendfile"); + } + #ifdef MAC SOCK_LOCK(so); error = mac_socket_check_send(td->td_ucred, so); @@ -2099,7 +2124,7 @@ retry_space: break; } MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, - (void*)sf_buf_kva(sf), sf, M_RDONLY, EXT_SFBUF); + sfs, sf, M_RDONLY, EXT_SFBUF); m0->m_data = (char *)sf_buf_kva(sf) + pgoff; m0->m_len = xfsize; @@ -2112,6 +2137,12 @@ retry_space: /* Keep track of bits processed. */ loopbytes += xfsize; off += xfsize; + + if (sfs != NULL) { + mtx_lock(&sfs->mtx); + sfs->count++; + mtx_unlock(&sfs->mtx); + } } /* Add the buffer chain to the socket buffer. */ @@ -2190,6 +2221,16 @@ out: if (m) m_freem(m); + if (sfs != NULL) { + mtx_lock(&sfs->mtx); + if (sfs->count != 0) + cv_wait(&sfs->cv, &sfs->mtx); + KASSERT(sfs->count == 0, ("sendfile sync still busy")); + cv_destroy(&sfs->cv); + mtx_destroy(&sfs->mtx); + free(sfs, M_TEMP); + } + if (error == ERESTART) error = EINTR; diff --git a/sys/sys/socket.h b/sys/sys/socket.h index f23ee0be247b..4d2b70717741 100644 --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -609,6 +609,7 @@ struct sf_hdtr { */ #define SF_NODISKIO 0x00000001 #define SF_MNOWAIT 0x00000002 +#define SF_SYNC 0x00000004 #endif #ifndef _KERNEL