freebsd-skq/sys/kern/uipc_cow.c
kib ac1b596fda Extend the struct vm_page wire_count to u_int to avoid the overflow
of the counter, that may happen when too many sendfile(2) calls are
being executed with this vnode [1].

To keep the size of the struct vm_page and offsets of the fields
accessed by out-of-tree modules, swap the types and locations
of the wire_count and cow fields. Add safety checks to detect cow
overflow and force fallback to the normal copy code for zero-copy
sockets. [2]

Reported by:	Anton Yuzhaninov <citrin citrin ru> [1]
Suggested by:	alc [2]
Reviewed by:	alc
MFC after:	2 weeks
2009-01-03 13:24:08 +00:00

185 lines
4.7 KiB
C

/*--
* Copyright (c) 1997, Duke University
* All rights reserved.
*
* Author:
* Andrew Gallatin <gallatin@cs.duke.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of Duke University may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY DUKE UNIVERSITY ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DUKE UNIVERSITY BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITSOR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* This is a set of routines for enabling and disabling copy on write
* protection for data written into sockets.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/mbuf.h>
#include <sys/sf_buf.h>
#include <sys/socketvar.h>
#include <sys/uio.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_page.h>
#include <vm/vm_object.h>
struct netsend_cow_stats {
int attempted;
int fail_not_mapped;
int fail_sf_buf;
int success;
int iodone;
};
static struct netsend_cow_stats socow_stats;
static void socow_iodone(void *addr, void *args);
static void
socow_iodone(void *addr, void *args)
{
struct sf_buf *sf;
vm_page_t pp;
sf = args;
pp = sf_buf_page(sf);
sf_buf_free(sf);
/* remove COW mapping */
vm_page_lock_queues();
vm_page_cowclear(pp);
vm_page_unwire(pp, 0);
/*
* Check for the object going away on us. This can
* happen since we don't hold a reference to it.
* If so, we're responsible for freeing the page.
*/
if (pp->wire_count == 0 && pp->object == NULL)
vm_page_free(pp);
vm_page_unlock_queues();
socow_stats.iodone++;
}
int
socow_setup(struct mbuf *m0, struct uio *uio)
{
struct sf_buf *sf;
vm_page_t pp;
struct iovec *iov;
struct vmspace *vmspace;
struct vm_map *map;
vm_offset_t offset, uva;
socow_stats.attempted++;
vmspace = curproc->p_vmspace;
map = &vmspace->vm_map;
uva = (vm_offset_t) uio->uio_iov->iov_base;
offset = uva & PAGE_MASK;
/*
* Verify that access to the given address is allowed from user-space.
*/
if (vm_fault_quick((caddr_t)uva, VM_PROT_READ) < 0)
return (0);
/*
* verify page is mapped & not already wired for i/o
*/
pp = pmap_extract_and_hold(map->pmap, uva, VM_PROT_READ);
if (pp == NULL) {
socow_stats.fail_not_mapped++;
return(0);
}
/*
* set up COW
*/
vm_page_lock_queues();
if (vm_page_cowsetup(pp) != 0) {
vm_page_unhold(pp);
vm_page_unlock_queues();
return (0);
}
/*
* wire the page for I/O
*/
vm_page_wire(pp);
vm_page_unhold(pp);
vm_page_unlock_queues();
/*
* Allocate an sf buf
*/
sf = sf_buf_alloc(pp, SFB_CATCH);
if (!sf) {
vm_page_lock_queues();
vm_page_cowclear(pp);
vm_page_unwire(pp, 0);
/*
* Check for the object going away on us. This can
* happen since we don't hold a reference to it.
* If so, we're responsible for freeing the page.
*/
if (pp->wire_count == 0 && pp->object == NULL)
vm_page_free(pp);
vm_page_unlock_queues();
socow_stats.fail_sf_buf++;
return(0);
}
/*
* attach to mbuf
*/
MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, socow_iodone,
(void*)sf_buf_kva(sf), sf, M_RDONLY, EXT_SFBUF);
m0->m_len = PAGE_SIZE - offset;
m0->m_data = (caddr_t)sf_buf_kva(sf) + offset;
socow_stats.success++;
iov = uio->uio_iov;
iov->iov_base = (char *)iov->iov_base + m0->m_len;
iov->iov_len -= m0->m_len;
uio->uio_resid -= m0->m_len;
uio->uio_offset += m0->m_len;
if (iov->iov_len == 0) {
uio->uio_iov++;
uio->uio_iovcnt--;
}
return(m0->m_len);
}