Remove SF_KQUEUE code. This code was developed at Netflix, but was not

ever used.  It didn't go into stable/10, neither was documented.
It might be useful, but we collectively decided to remove it, rather
leave it abandoned and unmaintained.  It is removed in one single
commit, so restoring it should be easy, if anyone wants to reopen
this idea.

Sponsored by:	Netflix
This commit is contained in:
Gleb Smirnoff 2014-11-11 20:32:46 +00:00
parent 7bfc98355a
commit 0e87b36eaa
7 changed files with 71 additions and 840 deletions

View File

@ -83,10 +83,6 @@ __FBSDID("$FreeBSD$");
#include <sys/msg.h>
#include <sys/sem.h>
#include <sys/shm.h>
#include <sys/condvar.h>
#include <sys/sf_buf.h>
#include <sys/sf_sync.h>
#include <sys/sf_base.h>
#ifdef INET
#include <netinet/in.h>
@ -1567,26 +1563,16 @@ struct sf_hdtr32 {
int trl_cnt;
};
struct sf_hdtr_kq32 {
int kq_fd;
uint32_t kq_flags;
uint32_t kq_udata; /* 32-bit void ptr */
uint32_t kq_ident; /* 32-bit uintptr_t */
};
static int
freebsd32_do_sendfile(struct thread *td,
struct freebsd32_sendfile_args *uap, int compat)
{
struct sf_hdtr32 hdtr32;
struct sf_hdtr hdtr;
struct sf_hdtr_kq32 hdtr_kq32;
struct sf_hdtr_kq hdtr_kq;
struct uio *hdr_uio, *trl_uio;
struct iovec32 *iov32;
off_t offset;
off_t offset, sbytes;
int error;
off_t sbytes;
offset = PAIR32TO64(off_t, uap->offset);
if (offset < 0)
@ -1617,31 +1603,17 @@ freebsd32_do_sendfile(struct thread *td,
if (error)
goto out;
}
/*
* If SF_KQUEUE is set, then we need to also copy in
* the kqueue data after the normal hdtr set and set do_kqueue=1.
*/
if (uap->flags & SF_KQUEUE) {
error = copyin(((char *) uap->hdtr) + sizeof(hdtr32),
&hdtr_kq32,
sizeof(hdtr_kq32));
if (error != 0)
goto out;
/* 32->64 bit fields */
CP(hdtr_kq32, hdtr_kq, kq_fd);
CP(hdtr_kq32, hdtr_kq, kq_flags);
PTRIN_CP(hdtr_kq32, hdtr_kq, kq_udata);
CP(hdtr_kq32, hdtr_kq, kq_ident);
}
}
AUDIT_ARG_FD(uap->fd);
/* Call sendfile */
/* XXX stack depth! */
error = _do_sendfile(td, uap->fd, uap->s, uap->flags, compat,
offset, uap->nbytes, &sbytes, hdr_uio, trl_uio, &hdtr_kq);
if ((error = fget_read(td, uap->fd,
cap_rights_init(&rights, CAP_PREAD), &fp)) != 0)
goto out;
error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset,
uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, td);
fdrop(fp, td);
if (uap->sbytes != NULL)
copyout(&sbytes, uap->sbytes, sizeof(off_t));

View File

@ -3684,7 +3684,7 @@ badfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
static int
badfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
int kflags, struct sendfile_sync *sfs, struct thread *td)
int kflags, struct thread *td)
{
return (EBADF);
@ -3770,7 +3770,7 @@ invfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
int
invfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
int kflags, struct sendfile_sync *sfs, struct thread *td)
int kflags, struct thread *td)
{
return (EINVAL);

View File

@ -63,8 +63,6 @@ __FBSDID("$FreeBSD$");
#include <sys/protosw.h>
#include <sys/rwlock.h>
#include <sys/sf_buf.h>
#include <sys/sf_sync.h>
#include <sys/sf_base.h>
#include <sys/sysent.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@ -115,10 +113,6 @@ static int getpeername1(struct thread *td, struct getpeername_args *uap,
counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
static int filt_sfsync_attach(struct knote *kn);
static void filt_sfsync_detach(struct knote *kn);
static int filt_sfsync(struct knote *kn, long hint);
/*
* sendfile(2)-related variables and associated sysctls
*/
@ -128,28 +122,6 @@ static int sfreadahead = 1;
SYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, readahead, CTLFLAG_RW,
&sfreadahead, 0, "Number of sendfile(2) read-ahead MAXBSIZE blocks");
#ifdef SFSYNC_DEBUG
static int sf_sync_debug = 0;
SYSCTL_INT(_debug, OID_AUTO, sf_sync_debug, CTLFLAG_RW,
&sf_sync_debug, 0, "Output debugging during sf_sync lifecycle");
#define SFSYNC_DPRINTF(s, ...) \
do { \
if (sf_sync_debug) \
printf((s), ##__VA_ARGS__); \
} while (0)
#else
#define SFSYNC_DPRINTF(c, ...)
#endif
static uma_zone_t zone_sfsync;
static struct filterops sendfile_filtops = {
.f_isfd = 0,
.f_attach = filt_sfsync_attach,
.f_detach = filt_sfsync_detach,
.f_event = filt_sfsync,
};
static void
sfstat_init(const void *unused)
{
@ -159,19 +131,6 @@ sfstat_init(const void *unused)
}
SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL);
static void
sf_sync_init(const void *unused)
{
zone_sfsync = uma_zcreate("sendfile_sync", sizeof(struct sendfile_sync),
NULL, NULL,
NULL, NULL,
UMA_ALIGN_CACHE,
0);
kqueue_add_filteropts(EVFILT_SENDFILE, &sendfile_filtops);
}
SYSINIT(sf_sync, SI_SUB_MBUF, SI_ORDER_FIRST, sf_sync_init, NULL);
static int
sfstat_sysctl(SYSCTL_HANDLER_ARGS)
{
@ -1864,116 +1823,11 @@ getsockaddr(namp, uaddr, len)
return (error);
}
static int
filt_sfsync_attach(struct knote *kn)
{
struct sendfile_sync *sfs = (struct sendfile_sync *) kn->kn_sdata;
struct knlist *knl = &sfs->klist;
SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs);
/*
* Validate that we actually received this via the kernel API.
*/
if ((kn->kn_flags & EV_FLAG1) == 0)
return (EPERM);
kn->kn_ptr.p_v = sfs;
kn->kn_flags &= ~EV_FLAG1;
knl->kl_lock(knl->kl_lockarg);
/*
* If we're in the "freeing" state,
* don't allow the add. That way we don't
* end up racing with some other thread that
* is trying to finish some setup.
*/
if (sfs->state == SF_STATE_FREEING) {
knl->kl_unlock(knl->kl_lockarg);
return (EINVAL);
}
knlist_add(&sfs->klist, kn, 1);
knl->kl_unlock(knl->kl_lockarg);
return (0);
}
/*
* Called when a knote is being detached.
*/
static void
filt_sfsync_detach(struct knote *kn)
{
struct knlist *knl;
struct sendfile_sync *sfs;
int do_free = 0;
sfs = kn->kn_ptr.p_v;
knl = &sfs->klist;
SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs);
knl->kl_lock(knl->kl_lockarg);
if (!knlist_empty(knl))
knlist_remove(knl, kn, 1);
/*
* If the list is empty _AND_ the refcount is 0
* _AND_ we've finished the setup phase and now
* we're in the running phase, we can free the
* underlying sendfile_sync.
*
* But we shouldn't do it before finishing the
* underlying divorce from the knote.
*
* So, we have the sfsync lock held; transition
* it to "freeing", then unlock, then free
* normally.
*/
if (knlist_empty(knl)) {
if (sfs->state == SF_STATE_COMPLETED && sfs->count == 0) {
SFSYNC_DPRINTF("%s: (%llu) sfs=%p; completed, "
"count==0, empty list: time to free!\n",
__func__,
(unsigned long long) curthread->td_tid,
sfs);
sf_sync_set_state(sfs, SF_STATE_FREEING, 1);
do_free = 1;
}
}
knl->kl_unlock(knl->kl_lockarg);
/*
* Only call free if we're the one who has transitioned things
* to free. Otherwise we could race with another thread that
* is currently tearing things down.
*/
if (do_free == 1) {
SFSYNC_DPRINTF("%s: (%llu) sfs=%p, %s:%d\n",
__func__,
(unsigned long long) curthread->td_tid,
sfs,
__FILE__,
__LINE__);
sf_sync_free(sfs);
}
}
static int
filt_sfsync(struct knote *kn, long hint)
{
struct sendfile_sync *sfs = (struct sendfile_sync *) kn->kn_ptr.p_v;
int ret;
SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs);
/*
* XXX add a lock assertion here!
*/
ret = (sfs->count == 0 && sfs->state == SF_STATE_COMPLETED);
return (ret);
}
struct sendfile_sync {
struct mtx mtx;
struct cv cv;
unsigned count;
};
/*
* Add more references to a vm_page + sf_buf + sendfile_sync.
@ -2022,344 +1876,13 @@ sf_ext_free(void *arg1, void *arg2)
vm_page_free(pg);
vm_page_unlock(pg);
if (sfs != NULL)
sf_sync_deref(sfs);
}
/*
* Called to remove a reference to a sf_sync object.
*
* This is generally done during the mbuf free path to signify
* that one of the mbufs in the transaction has been completed.
*
* If we're doing SF_SYNC and the refcount is zero then we'll wake
* up any waiters.
*
* IF we're doing SF_KQUEUE and the refcount is zero then we'll
* fire off the knote.
*/
void
sf_sync_deref(struct sendfile_sync *sfs)
{
int do_free = 0;
if (sfs == NULL)
return;
mtx_lock(&sfs->mtx);
KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
sfs->count --;
/*
* Only fire off the wakeup / kqueue notification if
* we are in the running state.
*/
if (sfs->count == 0 && sfs->state == SF_STATE_COMPLETED) {
if (sfs->flags & SF_SYNC)
cv_signal(&sfs->cv);
if (sfs->flags & SF_KQUEUE) {
SFSYNC_DPRINTF("%s: (%llu) sfs=%p: knote!\n",
__func__,
(unsigned long long) curthread->td_tid,
sfs);
KNOTE_LOCKED(&sfs->klist, 1);
}
/*
* If we're not waiting around for a sync,
* check if the knote list is empty.
* If it is, we transition to free.
*
* XXX I think it's about time I added some state
* or flag that says whether we're supposed to be
* waiting around until we've done a signal.
*
* XXX Ie, the reason that I don't free it here
* is because the caller will free the last reference,
* not us. That should be codified in some flag
* that indicates "self-free" rather than checking
* for SF_SYNC all the time.
*/
if ((sfs->flags & SF_SYNC) == 0 && knlist_empty(&sfs->klist)) {
SFSYNC_DPRINTF("%s: (%llu) sfs=%p; completed, "
"count==0, empty list: time to free!\n",
__func__,
(unsigned long long) curthread->td_tid,
sfs);
sf_sync_set_state(sfs, SF_STATE_FREEING, 1);
do_free = 1;
}
}
mtx_unlock(&sfs->mtx);
/*
* Attempt to do a free here.
*
* We do this outside of the lock because it may destroy the
* lock in question as it frees things. We can optimise this
* later.
*
* XXX yes, we should make it a requirement to hold the
* lock across sf_sync_free().
*/
if (do_free == 1) {
SFSYNC_DPRINTF("%s: (%llu) sfs=%p\n",
__func__,
(unsigned long long) curthread->td_tid,
sfs);
sf_sync_free(sfs);
}
}
/*
* Allocate a sendfile_sync state structure.
*
* For now this only knows about the "sleep" sync, but later it will
* grow various other personalities.
*/
struct sendfile_sync *
sf_sync_alloc(uint32_t flags)
{
struct sendfile_sync *sfs;
sfs = uma_zalloc(zone_sfsync, M_WAITOK | M_ZERO);
mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
cv_init(&sfs->cv, "sendfile");
sfs->flags = flags;
sfs->state = SF_STATE_SETUP;
knlist_init_mtx(&sfs->klist, &sfs->mtx);
SFSYNC_DPRINTF("%s: sfs=%p, flags=0x%08x\n", __func__, sfs, sfs->flags);
return (sfs);
}
/*
* Take a reference to a sfsync instance.
*
* This has to map 1:1 to free calls coming in via sf_ext_free(),
* so typically this will be referenced once for each mbuf allocated.
*/
void
sf_sync_ref(struct sendfile_sync *sfs)
{
if (sfs == NULL)
return;
mtx_lock(&sfs->mtx);
sfs->count++;
mtx_unlock(&sfs->mtx);
}
void
sf_sync_syscall_wait(struct sendfile_sync *sfs)
{
if (sfs == NULL)
return;
KASSERT(mtx_owned(&sfs->mtx), ("%s: sfs=%p: not locked but should be!",
__func__,
sfs));
/*
* If we're not requested to wait during the syscall,
* don't bother waiting.
*/
if ((sfs->flags & SF_SYNC) == 0)
goto out;
/*
* This is a bit suboptimal and confusing, so bear with me.
*
* Ideally sf_sync_syscall_wait() will wait until
* all pending mbuf transmit operations are done.
* This means that when sendfile becomes async, it'll
* run in the background and will transition from
* RUNNING to COMPLETED when it's finished acquiring
* new things to send. Then, when the mbufs finish
* sending, COMPLETED + sfs->count == 0 is enough to
* know that no further work is being done.
*
* So, we will sleep on both RUNNING and COMPLETED.
* It's up to the (in progress) async sendfile loop
* to transition the sf_sync from RUNNING to
* COMPLETED so the wakeup above will actually
* do the cv_signal() call.
*/
if (sfs->state != SF_STATE_COMPLETED && sfs->state != SF_STATE_RUNNING)
goto out;
if (sfs->count != 0)
cv_wait(&sfs->cv, &sfs->mtx);
KASSERT(sfs->count == 0, ("sendfile sync still busy"));
out:
return;
}
/*
* Free an sf_sync if it's appropriate to.
*/
void
sf_sync_free(struct sendfile_sync *sfs)
{
if (sfs == NULL)
return;
SFSYNC_DPRINTF("%s: (%lld) sfs=%p; called; state=%d, flags=0x%08x "
"count=%d\n",
__func__,
(long long) curthread->td_tid,
sfs,
sfs->state,
sfs->flags,
sfs->count);
mtx_lock(&sfs->mtx);
/*
* We keep the sf_sync around if the state is active,
* we are doing kqueue notification and we have active
* knotes.
*
* If the caller wants to free us right this second it
* should transition this to the freeing state.
*
* So, complain loudly if they break this rule.
*/
if (sfs->state != SF_STATE_FREEING) {
printf("%s: (%llu) sfs=%p; not freeing; let's wait!\n",
__func__,
(unsigned long long) curthread->td_tid,
sfs);
mtx_unlock(&sfs->mtx);
return;
}
KASSERT(sfs->count == 0, ("sendfile sync still busy"));
cv_destroy(&sfs->cv);
/*
* This doesn't call knlist_detach() on each knote; it just frees
* the entire list.
*/
knlist_delete(&sfs->klist, curthread, 1);
mtx_destroy(&sfs->mtx);
SFSYNC_DPRINTF("%s: (%llu) sfs=%p; freeing\n",
__func__,
(unsigned long long) curthread->td_tid,
sfs);
uma_zfree(zone_sfsync, sfs);
}
/*
* Setup a sf_sync to post a kqueue notification when things are complete.
*/
int
sf_sync_kqueue_setup(struct sendfile_sync *sfs, struct sf_hdtr_kq *sfkq)
{
struct kevent kev;
int error;
sfs->flags |= SF_KQUEUE;
/* Check the flags are valid */
if ((sfkq->kq_flags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0)
return (EINVAL);
SFSYNC_DPRINTF("%s: sfs=%p: kqfd=%d, flags=0x%08x, ident=%p, udata=%p\n",
__func__,
sfs,
sfkq->kq_fd,
sfkq->kq_flags,
(void *) sfkq->kq_ident,
(void *) sfkq->kq_udata);
/* Setup and register a knote on the given kqfd. */
kev.ident = (uintptr_t) sfkq->kq_ident;
kev.filter = EVFILT_SENDFILE;
kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1 | sfkq->kq_flags;
kev.data = (intptr_t) sfs;
kev.udata = sfkq->kq_udata;
error = kqfd_register(sfkq->kq_fd, &kev, curthread, 1);
if (error != 0) {
SFSYNC_DPRINTF("%s: returned %d\n", __func__, error);
}
return (error);
}
void
sf_sync_set_state(struct sendfile_sync *sfs, sendfile_sync_state_t state,
int islocked)
{
sendfile_sync_state_t old_state;
if (! islocked)
if (sfs != NULL) {
mtx_lock(&sfs->mtx);
/*
* Update our current state.
*/
old_state = sfs->state;
sfs->state = state;
SFSYNC_DPRINTF("%s: (%llu) sfs=%p; going from %d to %d\n",
__func__,
(unsigned long long) curthread->td_tid,
sfs,
old_state,
state);
/*
* If we're transitioning from RUNNING to COMPLETED and the count is
* zero, then post the knote. The caller may have completed the
* send before we updated the state to COMPLETED and we need to make
* sure this is communicated.
*/
if (old_state == SF_STATE_RUNNING
&& state == SF_STATE_COMPLETED
&& sfs->count == 0
&& sfs->flags & SF_KQUEUE) {
SFSYNC_DPRINTF("%s: (%llu) sfs=%p: triggering knote!\n",
__func__,
(unsigned long long) curthread->td_tid,
sfs);
KNOTE_LOCKED(&sfs->klist, 1);
}
if (! islocked)
KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
if (--sfs->count == 0)
cv_signal(&sfs->cv);
mtx_unlock(&sfs->mtx);
}
/*
* Set the retval/errno for the given transaction.
*
* This will eventually/ideally be used when the KNOTE is fired off
* to signify the completion of this transaction.
*
* The sfsync lock should be held before entering this function.
*/
void
sf_sync_set_retval(struct sendfile_sync *sfs, off_t retval, int xerrno)
{
KASSERT(mtx_owned(&sfs->mtx), ("%s: sfs=%p: not locked but should be!",
__func__,
sfs));
SFSYNC_DPRINTF("%s: (%llu) sfs=%p: errno=%d, retval=%jd\n",
__func__,
(unsigned long long) curthread->td_tid,
sfs,
xerrno,
(intmax_t) retval);
sfs->retval = retval;
sfs->xerrno = xerrno;
}
}
/*
@ -2380,174 +1903,15 @@ sys_sendfile(struct thread *td, struct sendfile_args *uap)
return (do_sendfile(td, uap, 0));
}
int
_do_sendfile(struct thread *td, int src_fd, int sock_fd, int flags,
int compat, off_t offset, size_t nbytes, off_t *sbytes,
struct uio *hdr_uio,
struct uio *trl_uio, struct sf_hdtr_kq *hdtr_kq)
{
cap_rights_t rights;
struct sendfile_sync *sfs = NULL;
struct file *fp;
int error;
int do_kqueue = 0;
int do_free = 0;
AUDIT_ARG_FD(src_fd);
if (hdtr_kq != NULL)
do_kqueue = 1;
/*
* sendfile(2) can start at any offset within a file so we require
* CAP_READ+CAP_SEEK = CAP_PREAD.
*/
if ((error = fget_read(td, src_fd,
cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) {
goto out;
}
/*
* IF SF_KQUEUE is set but we haven't copied in anything for
* kqueue data, error out.
*/
if (flags & SF_KQUEUE && do_kqueue == 0) {
SFSYNC_DPRINTF("%s: SF_KQUEUE but no KQUEUE data!\n", __func__);
goto out;
}
/*
* If we need to wait for completion, initialise the sfsync
* state here.
*/
if (flags & (SF_SYNC | SF_KQUEUE))
sfs = sf_sync_alloc(flags & (SF_SYNC | SF_KQUEUE));
if (flags & SF_KQUEUE) {
error = sf_sync_kqueue_setup(sfs, hdtr_kq);
if (error) {
SFSYNC_DPRINTF("%s: (%llu) error; sfs=%p\n",
__func__,
(unsigned long long) curthread->td_tid,
sfs);
sf_sync_set_state(sfs, SF_STATE_FREEING, 0);
sf_sync_free(sfs);
goto out;
}
}
/*
* Do the sendfile call.
*
* If this fails, it'll free the mbuf chain which will free up the
* sendfile_sync references.
*/
error = fo_sendfile(fp, sock_fd, hdr_uio, trl_uio, offset,
nbytes, sbytes, flags, compat ? SFK_COMPAT : 0, sfs, td);
/*
* If the sendfile call succeeded, transition the sf_sync state
* to RUNNING, then COMPLETED.
*
* If the sendfile call failed, then the sendfile call may have
* actually sent some data first - so we check to see whether
* any data was sent. If some data was queued (ie, count > 0)
* then we can't call free; we have to wait until the partial
* transaction completes before we continue along.
*
* This has the side effect of firing off the knote
* if the refcount has hit zero by the time we get here.
*/
if (sfs != NULL) {
mtx_lock(&sfs->mtx);
if (error == 0 || sfs->count > 0) {
/*
* When it's time to do async sendfile, the transition
* to RUNNING signifies that we're actually actively
* adding and completing mbufs. When the last disk
* buffer is read (ie, when we're not doing any
* further read IO and all subsequent stuff is mbuf
* transmissions) we'll transition to COMPLETED
* and when the final mbuf is freed, the completion
* will be signaled.
*/
sf_sync_set_state(sfs, SF_STATE_RUNNING, 1);
/*
* Set the retval before we signal completed.
* If we do it the other way around then transitioning to
* COMPLETED may post the knote before you set the return
* status!
*
* XXX for now, errno is always 0, as we don't post
* knotes if sendfile failed. Maybe that'll change later.
*/
sf_sync_set_retval(sfs, *sbytes, error);
/*
* And now transition to completed, which will kick off
* the knote if required.
*/
sf_sync_set_state(sfs, SF_STATE_COMPLETED, 1);
} else {
/*
* Error isn't zero, sfs_count is zero, so we
* won't have some other thing to wake things up.
* Thus free.
*/
sf_sync_set_state(sfs, SF_STATE_FREEING, 1);
do_free = 1;
}
/*
* Next - wait if appropriate.
*/
sf_sync_syscall_wait(sfs);
/*
* If we're not doing kqueue notifications, we can
* transition this immediately to the freeing state.
*/
if ((sfs->flags & SF_KQUEUE) == 0) {
sf_sync_set_state(sfs, SF_STATE_FREEING, 1);
do_free = 1;
}
mtx_unlock(&sfs->mtx);
}
/*
* If do_free is set, free here.
*
* If we're doing no-kqueue notification and it's just sleep notification,
* we also do free; it's the only chance we have.
*/
if (sfs != NULL && do_free == 1) {
sf_sync_free(sfs);
}
/*
* XXX Should we wait until the send has completed before freeing the source
* file handle? It's the previous behaviour, sure, but is it required?
* We've wired down the page references after all.
*/
fdrop(fp, td);
out:
/* Return error */
return (error);
}
static int
do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
{
struct sf_hdtr hdtr;
struct sf_hdtr_kq hdtr_kq;
struct uio *hdr_uio, *trl_uio;
int error;
struct file *fp;
cap_rights_t rights;
off_t sbytes;
int do_kqueue = 0;
int error;
/*
* File offset must be positive. If it goes beyond EOF
@ -2563,38 +1927,37 @@ do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
if (error != 0)
goto out;
if (hdtr.headers != NULL) {
error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
error = copyinuio(hdtr.headers, hdtr.hdr_cnt,
&hdr_uio);
if (error != 0)
goto out;
}
if (hdtr.trailers != NULL) {
error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
error = copyinuio(hdtr.trailers, hdtr.trl_cnt,
&trl_uio);
if (error != 0)
goto out;
}
/*
* If SF_KQUEUE is set, then we need to also copy in
* the kqueue data after the normal hdtr set and set
* do_kqueue=1.
*/
if (uap->flags & SF_KQUEUE) {
error = copyin(((char *) uap->hdtr) + sizeof(hdtr),
&hdtr_kq,
sizeof(hdtr_kq));
if (error != 0)
goto out;
do_kqueue = 1;
}
}
/* Call sendfile */
error = _do_sendfile(td, uap->fd, uap->s, uap->flags, compat,
uap->offset, uap->nbytes, &sbytes, hdr_uio, trl_uio, &hdtr_kq);
AUDIT_ARG_FD(src_fd);
if (uap->sbytes != NULL) {
/*
* sendfile(2) can start at any offset within a file so we require
* CAP_READ+CAP_SEEK = CAP_PREAD.
*/
if ((error = fget_read(td, uap->fd,
cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) {
goto out;
}
error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset,
uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, td);
fdrop(fp, td);
if (uap->sbytes != NULL)
copyout(&sbytes, uap->sbytes, sizeof(off_t));
}
out:
free(hdr_uio, M_IOV);
free(trl_uio, M_IOV);
@ -2819,7 +2182,7 @@ kern_sendfile_getsock(struct thread *td, int s, struct file **sock_fp,
int
vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
int kflags, struct sendfile_sync *sfs, struct thread *td)
int kflags, struct thread *td)
{
struct file *sock_fp;
struct vnode *vp;
@ -2829,6 +2192,7 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
struct sf_buf *sf;
struct vm_page *pg;
struct shmfd *shmfd;
struct sendfile_sync *sfs;
struct vattr va;
off_t off, xfsize, fsbytes, sbytes, rem, obj_size;
int error, bsize, nd, hdrlen, mnw;
@ -2837,6 +2201,7 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
obj = NULL;
so = NULL;
m = NULL;
sfs = NULL;
fsbytes = sbytes = 0;
hdrlen = mnw = 0;
rem = nbytes;
@ -2860,6 +2225,12 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
if (flags & SF_MNOWAIT)
mnw = 1;
if (flags & SF_SYNC) {
sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
cv_init(&sfs->cv, "sendfile");
}
#ifdef MAC
error = mac_socket_check_send(td->td_ucred, so);
if (error != 0)
@ -3106,12 +2477,11 @@ retry_space:
loopbytes += xfsize;
off += xfsize;
/*
* XXX eventually this should be a sfsync
* method call!
*/
if (sfs != NULL)
sf_sync_ref(sfs);
if (sfs != NULL) {
mtx_lock(&sfs->mtx);
sfs->count++;
mtx_unlock(&sfs->mtx);
}
}
if (vp != NULL)
@ -3193,6 +2563,16 @@ out:
if (m)
m_freem(m);
if (sfs != NULL) {
mtx_lock(&sfs->mtx);
if (sfs->count != 0)
cv_wait(&sfs->cv, &sfs->mtx);
KASSERT(sfs->count == 0, ("sendfile sync still busy"));
cv_destroy(&sfs->cv);
mtx_destroy(&sfs->mtx);
free(sfs, M_TEMP);
}
if (error == ERESTART)
error = EINTR;

View File

@ -90,9 +90,6 @@ foffset_get(struct file *fp)
return (foffset_lock(fp, FOF_NOLOCK));
}
/* XXX pollution? */
struct sendfile_sync;
typedef int fo_rdwr_t(struct file *fp, struct uio *uio,
struct ucred *active_cred, int flags,
struct thread *td);
@ -112,8 +109,7 @@ typedef int fo_chown_t(struct file *fp, uid_t uid, gid_t gid,
struct ucred *active_cred, struct thread *td);
typedef int fo_sendfile_t(struct file *fp, int sockfd, struct uio *hdr_uio,
struct uio *trl_uio, off_t offset, size_t nbytes,
off_t *sent, int flags, int kflags,
struct sendfile_sync *sfs, struct thread *td);
off_t *sent, int flags, int kflags, struct thread *td);
typedef int fo_seek_t(struct file *fp, off_t offset, int whence,
struct thread *td);
typedef int fo_fill_kinfo_t(struct file *fp, struct kinfo_file *kif,
@ -371,11 +367,11 @@ fo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
static __inline int
fo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
int kflags, struct sendfile_sync *sfs, struct thread *td)
int kflags, struct thread *td)
{
return ((*fp->f_ops->fo_sendfile)(fp, sockfd, hdr_uio, trl_uio, offset,
nbytes, sent, flags, kflags, sfs, td));
nbytes, sent, flags, kflags, td));
}
static __inline int

View File

@ -1,37 +0,0 @@
/*-
* Copyright (c) 2013 Adrian Chadd <adrian@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_SF_BASE_H_
#define _SYS_SF_BASE_H_
extern int _do_sendfile(struct thread *, int src_fd, int sock_fd, int flags,
int compat, off_t offset, size_t nbytes, off_t *sbytes,
struct uio *hdr_uio, struct uio *trl_uio,
struct sf_hdtr_kq *hdtr_kq);
#endif /* _SYS_SF_BASE_H_ */

View File

@ -1,64 +0,0 @@
/*-
* Copyright (c) 2013 Adrian Chadd <adrian@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_SF_SYNC_H_
#define _SYS_SF_SYNC_H_
typedef enum {
SF_STATE_NONE,
SF_STATE_SETUP,
SF_STATE_RUNNING,
SF_STATE_COMPLETED,
SF_STATE_FREEING
} sendfile_sync_state_t;
struct sendfile_sync {
struct mtx mtx;
struct cv cv;
struct knlist klist;
uint32_t flags;
uint32_t count;
int32_t xerrno; /* Completion errno, if retval < 0 */
off_t retval; /* Completion retval (eg written bytes) */
sendfile_sync_state_t state;
};
/* XXX pollution */
struct sf_hdtr_kq;
extern struct sendfile_sync * sf_sync_alloc(uint32_t flags);
extern void sf_sync_syscall_wait(struct sendfile_sync *);
extern void sf_sync_free(struct sendfile_sync *);
extern void sf_sync_try_free(struct sendfile_sync *);
extern void sf_sync_ref(struct sendfile_sync *);
extern void sf_sync_deref(struct sendfile_sync *);
extern int sf_sync_kqueue_setup(struct sendfile_sync *, struct sf_hdtr_kq *);
extern void sf_sync_set_state(struct sendfile_sync *, sendfile_sync_state_t, int);
extern void sf_sync_set_retval(struct sendfile_sync *, off_t, int);
#endif /* !_SYS_SF_BUF_H_ */

View File

@ -583,28 +583,12 @@ struct sf_hdtr {
int trl_cnt; /* number of trailer iovec's */
};
/*
* sendfile(2) kqueue information
*/
struct sf_hdtr_kq {
uintptr_t kq_ident; /* ident (from userland?) */
void *kq_udata; /* user data pointer */
uint32_t kq_flags; /* extra flags to pass in */
int kq_fd; /* kq fd to post completion events on */
};
struct sf_hdtr_all {
struct sf_hdtr hdtr;
struct sf_hdtr_kq kq;
};
/*
* Sendfile-specific flag(s)
*/
#define SF_NODISKIO 0x00000001
#define SF_MNOWAIT 0x00000002
#define SF_SYNC 0x00000004
#define SF_KQUEUE 0x00000008
#ifdef _KERNEL
#define SFK_COMPAT 0x00000001