641e2829b6
of the counter, that may happen when too many sendfile(2) calls are being executed with this vnode [1]. To keep the size of the struct vm_page and offsets of the fields accessed by out-of-tree modules, swap the types and locations of the wire_count and cow fields. Add safety checks to detect cow overflow and force fallback to the normal copy code for zero-copy sockets. [2] Reported by: Anton Yuzhaninov <citrin citrin ru> [1] Suggested by: alc [2] Reviewed by: alc MFC after: 2 weeks
185 lines
4.7 KiB
C
185 lines
4.7 KiB
C
/*--
|
|
* Copyright (c) 1997, Duke University
|
|
* All rights reserved.
|
|
*
|
|
* Author:
|
|
* Andrew Gallatin <gallatin@cs.duke.edu>
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. The name of Duke University may not be used to endorse or promote
|
|
* products derived from this software without specific prior written
|
|
* permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY DUKE UNIVERSITY ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DUKE UNIVERSITY BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITSOR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
|
* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* This is a set of routines for enabling and disabling copy on write
|
|
* protection for data written into sockets.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/mbuf.h>
|
|
#include <sys/sf_buf.h>
|
|
#include <sys/socketvar.h>
|
|
#include <sys/uio.h>
|
|
|
|
#include <vm/vm.h>
|
|
#include <vm/vm_extern.h>
|
|
#include <vm/vm_param.h>
|
|
#include <vm/pmap.h>
|
|
#include <vm/vm_map.h>
|
|
#include <vm/vm_page.h>
|
|
#include <vm/vm_object.h>
|
|
|
|
|
|
struct netsend_cow_stats {
|
|
int attempted;
|
|
int fail_not_mapped;
|
|
int fail_sf_buf;
|
|
int success;
|
|
int iodone;
|
|
};
|
|
|
|
static struct netsend_cow_stats socow_stats;
|
|
|
|
static void socow_iodone(void *addr, void *args);
|
|
|
|
static void
|
|
socow_iodone(void *addr, void *args)
|
|
{
|
|
struct sf_buf *sf;
|
|
vm_page_t pp;
|
|
|
|
sf = args;
|
|
pp = sf_buf_page(sf);
|
|
sf_buf_free(sf);
|
|
/* remove COW mapping */
|
|
vm_page_lock_queues();
|
|
vm_page_cowclear(pp);
|
|
vm_page_unwire(pp, 0);
|
|
/*
|
|
* Check for the object going away on us. This can
|
|
* happen since we don't hold a reference to it.
|
|
* If so, we're responsible for freeing the page.
|
|
*/
|
|
if (pp->wire_count == 0 && pp->object == NULL)
|
|
vm_page_free(pp);
|
|
vm_page_unlock_queues();
|
|
socow_stats.iodone++;
|
|
}
|
|
|
|
int
|
|
socow_setup(struct mbuf *m0, struct uio *uio)
|
|
{
|
|
struct sf_buf *sf;
|
|
vm_page_t pp;
|
|
struct iovec *iov;
|
|
struct vmspace *vmspace;
|
|
struct vm_map *map;
|
|
vm_offset_t offset, uva;
|
|
|
|
socow_stats.attempted++;
|
|
vmspace = curproc->p_vmspace;
|
|
map = &vmspace->vm_map;
|
|
uva = (vm_offset_t) uio->uio_iov->iov_base;
|
|
offset = uva & PAGE_MASK;
|
|
|
|
/*
|
|
* Verify that access to the given address is allowed from user-space.
|
|
*/
|
|
if (vm_fault_quick((caddr_t)uva, VM_PROT_READ) < 0)
|
|
return (0);
|
|
|
|
/*
|
|
* verify page is mapped & not already wired for i/o
|
|
*/
|
|
pp = pmap_extract_and_hold(map->pmap, uva, VM_PROT_READ);
|
|
if (pp == NULL) {
|
|
socow_stats.fail_not_mapped++;
|
|
return(0);
|
|
}
|
|
|
|
/*
|
|
* set up COW
|
|
*/
|
|
vm_page_lock_queues();
|
|
if (vm_page_cowsetup(pp) != 0) {
|
|
vm_page_unhold(pp);
|
|
vm_page_unlock_queues();
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* wire the page for I/O
|
|
*/
|
|
vm_page_wire(pp);
|
|
vm_page_unhold(pp);
|
|
vm_page_unlock_queues();
|
|
|
|
/*
|
|
* Allocate an sf buf
|
|
*/
|
|
sf = sf_buf_alloc(pp, SFB_CATCH);
|
|
if (!sf) {
|
|
vm_page_lock_queues();
|
|
vm_page_cowclear(pp);
|
|
vm_page_unwire(pp, 0);
|
|
/*
|
|
* Check for the object going away on us. This can
|
|
* happen since we don't hold a reference to it.
|
|
* If so, we're responsible for freeing the page.
|
|
*/
|
|
if (pp->wire_count == 0 && pp->object == NULL)
|
|
vm_page_free(pp);
|
|
vm_page_unlock_queues();
|
|
socow_stats.fail_sf_buf++;
|
|
return(0);
|
|
}
|
|
/*
|
|
* attach to mbuf
|
|
*/
|
|
MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, socow_iodone,
|
|
(void*)sf_buf_kva(sf), sf, M_RDONLY, EXT_SFBUF);
|
|
m0->m_len = PAGE_SIZE - offset;
|
|
m0->m_data = (caddr_t)sf_buf_kva(sf) + offset;
|
|
socow_stats.success++;
|
|
|
|
iov = uio->uio_iov;
|
|
iov->iov_base = (char *)iov->iov_base + m0->m_len;
|
|
iov->iov_len -= m0->m_len;
|
|
uio->uio_resid -= m0->m_len;
|
|
uio->uio_offset += m0->m_len;
|
|
if (iov->iov_len == 0) {
|
|
uio->uio_iov++;
|
|
uio->uio_iovcnt--;
|
|
}
|
|
|
|
return(m0->m_len);
|
|
}
|