Remove zero-copy sockets code. It only worked for anonymous memory,
and the equivalent functionality is now provided by sendfile(2) over posix shared memory filedescriptor. Remove the cow member of struct vm_page, and rearrange the remaining members. While there, make hold_count unsigned. Requested and reviewed by: alc Tested by: pho Sponsored by: The FreeBSD Foundation Approved by: re (delphij)
This commit is contained in:
parent
9867f4e99b
commit
6796656333
@ -996,21 +996,6 @@ options TCP_SIGNATURE #include support for RFC 2385
|
||||
# a smooth scheduling of the traffic.
|
||||
options DUMMYNET
|
||||
|
||||
# "Zero copy" sockets support is split into the send and receive path
|
||||
# which operate very differently.
|
||||
# For the send path the VM page with the data is wired into the kernel
|
||||
# and marked as COW (copy-on-write). If the application touches the
|
||||
# data while it is still in the send socket buffer the page is copied
|
||||
# and divorced from its kernel wiring (no longer zero copy).
|
||||
# The receive side requires explicit NIC driver support to create
|
||||
# disposable pages which are flipped from kernel to user-space VM.
|
||||
# See zero_copy(9) for more details.
|
||||
# XXX: The COW based send mechanism is not safe and may result in
|
||||
# kernel crashes.
|
||||
# XXX: None of the current NIC drivers support disposable pages.
|
||||
options SOCKET_SEND_COW
|
||||
options SOCKET_RECV_PFLIP
|
||||
|
||||
#####################################################################
|
||||
# FILESYSTEM OPTIONS
|
||||
|
||||
|
@ -528,8 +528,6 @@ NGATM_CCATM opt_netgraph.h
|
||||
# DRM options
|
||||
DRM_DEBUG opt_drm.h
|
||||
|
||||
SOCKET_SEND_COW opt_zero.h
|
||||
SOCKET_RECV_PFLIP opt_zero.h
|
||||
TI_SF_BUF_JUMBO opt_ti.h
|
||||
TI_JUMBO_HDRSPLIT opt_ti.h
|
||||
|
||||
|
@ -37,8 +37,6 @@
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include "opt_zero.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
@ -58,84 +56,12 @@ __FBSDID("$FreeBSD$");
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_pageout.h>
|
||||
#include <vm/vm_map.h>
|
||||
#ifdef SOCKET_SEND_COW
|
||||
#include <vm/vm_object.h>
|
||||
#endif
|
||||
|
||||
SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV,
|
||||
"Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)");
|
||||
|
||||
static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault);
|
||||
|
||||
#ifdef SOCKET_SEND_COW
|
||||
/* Declared in uipc_socket.c */
|
||||
extern int so_zero_copy_receive;
|
||||
|
||||
/*
|
||||
* Identify the physical page mapped at the given kernel virtual
|
||||
* address. Insert this physical page into the given address space at
|
||||
* the given virtual address, replacing the physical page, if any,
|
||||
* that already exists there.
|
||||
*/
|
||||
static int
|
||||
vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr)
|
||||
{
|
||||
vm_map_t map = mapa;
|
||||
vm_page_t kern_pg, user_pg;
|
||||
vm_object_t uobject;
|
||||
vm_map_entry_t entry;
|
||||
vm_pindex_t upindex;
|
||||
vm_prot_t prot;
|
||||
boolean_t wired;
|
||||
|
||||
KASSERT((uaddr & PAGE_MASK) == 0,
|
||||
("vm_pgmoveco: uaddr is not page aligned"));
|
||||
|
||||
/*
|
||||
* Herein the physical page is validated and dirtied. It is
|
||||
* unwired in sf_buf_mext().
|
||||
*/
|
||||
kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr));
|
||||
kern_pg->valid = VM_PAGE_BITS_ALL;
|
||||
KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1,
|
||||
("vm_pgmoveco: kern_pg is not correctly wired"));
|
||||
|
||||
if ((vm_map_lookup(&map, uaddr,
|
||||
VM_PROT_WRITE, &entry, &uobject,
|
||||
&upindex, &prot, &wired)) != KERN_SUCCESS) {
|
||||
return(EFAULT);
|
||||
}
|
||||
VM_OBJECT_WLOCK(uobject);
|
||||
retry:
|
||||
if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) {
|
||||
if (vm_page_sleep_if_busy(user_pg, "vm_pgmoveco"))
|
||||
goto retry;
|
||||
vm_page_lock(user_pg);
|
||||
pmap_remove_all(user_pg);
|
||||
vm_page_free(user_pg);
|
||||
vm_page_unlock(user_pg);
|
||||
} else {
|
||||
/*
|
||||
* Even if a physical page does not exist in the
|
||||
* object chain's first object, a physical page from a
|
||||
* backing object may be mapped read only.
|
||||
*/
|
||||
if (uobject->backing_object != NULL)
|
||||
pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE);
|
||||
}
|
||||
if (vm_page_insert(kern_pg, uobject, upindex)) {
|
||||
VM_OBJECT_WUNLOCK(uobject);
|
||||
VM_WAIT;
|
||||
VM_OBJECT_WLOCK(uobject);
|
||||
goto retry;
|
||||
}
|
||||
vm_page_dirty(kern_pg);
|
||||
VM_OBJECT_WUNLOCK(uobject);
|
||||
vm_map_lookup_done(map, entry);
|
||||
return(KERN_SUCCESS);
|
||||
}
|
||||
#endif /* SOCKET_SEND_COW */
|
||||
|
||||
int
|
||||
copyin_nofault(const void *udaddr, void *kaddr, size_t len)
|
||||
{
|
||||
@ -313,103 +239,6 @@ uiomove_frombuf(void *buf, int buflen, struct uio *uio)
|
||||
return (uiomove((char *)buf + offset, n, uio));
|
||||
}
|
||||
|
||||
#ifdef SOCKET_RECV_PFLIP
|
||||
/*
|
||||
* Experimental support for zero-copy I/O
|
||||
*/
|
||||
static int
|
||||
userspaceco(void *cp, u_int cnt, struct uio *uio, int disposable)
|
||||
{
|
||||
struct iovec *iov;
|
||||
int error;
|
||||
|
||||
iov = uio->uio_iov;
|
||||
if (uio->uio_rw == UIO_READ) {
|
||||
if ((so_zero_copy_receive != 0)
|
||||
&& ((cnt & PAGE_MASK) == 0)
|
||||
&& ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0)
|
||||
&& ((uio->uio_offset & PAGE_MASK) == 0)
|
||||
&& ((((intptr_t) cp) & PAGE_MASK) == 0)
|
||||
&& (disposable != 0)) {
|
||||
/* SOCKET: use page-trading */
|
||||
/*
|
||||
* We only want to call vm_pgmoveco() on
|
||||
* disposeable pages, since it gives the
|
||||
* kernel page to the userland process.
|
||||
*/
|
||||
error = vm_pgmoveco(&curproc->p_vmspace->vm_map,
|
||||
(vm_offset_t)cp, (vm_offset_t)iov->iov_base);
|
||||
|
||||
/*
|
||||
* If we get an error back, attempt
|
||||
* to use copyout() instead. The
|
||||
* disposable page should be freed
|
||||
* automatically if we weren't able to move
|
||||
* it into userland.
|
||||
*/
|
||||
if (error != 0)
|
||||
error = copyout(cp, iov->iov_base, cnt);
|
||||
} else {
|
||||
error = copyout(cp, iov->iov_base, cnt);
|
||||
}
|
||||
} else {
|
||||
error = copyin(iov->iov_base, cp, cnt);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
uiomoveco(void *cp, int n, struct uio *uio, int disposable)
|
||||
{
|
||||
struct iovec *iov;
|
||||
u_int cnt;
|
||||
int error;
|
||||
|
||||
KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
|
||||
("uiomoveco: mode"));
|
||||
KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread,
|
||||
("uiomoveco proc"));
|
||||
|
||||
while (n > 0 && uio->uio_resid) {
|
||||
iov = uio->uio_iov;
|
||||
cnt = iov->iov_len;
|
||||
if (cnt == 0) {
|
||||
uio->uio_iov++;
|
||||
uio->uio_iovcnt--;
|
||||
continue;
|
||||
}
|
||||
if (cnt > n)
|
||||
cnt = n;
|
||||
|
||||
switch (uio->uio_segflg) {
|
||||
|
||||
case UIO_USERSPACE:
|
||||
maybe_yield();
|
||||
error = userspaceco(cp, cnt, uio, disposable);
|
||||
if (error)
|
||||
return (error);
|
||||
break;
|
||||
|
||||
case UIO_SYSSPACE:
|
||||
if (uio->uio_rw == UIO_READ)
|
||||
bcopy(cp, iov->iov_base, cnt);
|
||||
else
|
||||
bcopy(iov->iov_base, cp, cnt);
|
||||
break;
|
||||
case UIO_NOCOPY:
|
||||
break;
|
||||
}
|
||||
iov->iov_base = (char *)iov->iov_base + cnt;
|
||||
iov->iov_len -= cnt;
|
||||
uio->uio_resid -= cnt;
|
||||
uio->uio_offset += cnt;
|
||||
cp = (char *)cp + cnt;
|
||||
n -= cnt;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
#endif /* SOCKET_RECV_PFLIP */
|
||||
|
||||
/*
|
||||
* Give next character to user as result of read.
|
||||
*/
|
||||
|
@ -105,7 +105,6 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include "opt_inet.h"
|
||||
#include "opt_inet6.h"
|
||||
#include "opt_zero.h"
|
||||
#include "opt_compat.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
@ -221,21 +220,6 @@ static int numopensockets;
|
||||
SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
|
||||
&numopensockets, 0, "Number of open sockets");
|
||||
|
||||
#if defined(SOCKET_SEND_COW) || defined(SOCKET_RECV_PFLIP)
|
||||
SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0,
|
||||
"Zero copy controls");
|
||||
#ifdef SOCKET_RECV_PFLIP
|
||||
int so_zero_copy_receive = 1;
|
||||
SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW,
|
||||
&so_zero_copy_receive, 0, "Enable zero copy receive");
|
||||
#endif
|
||||
#ifdef SOCKET_SEND_COW
|
||||
int so_zero_copy_send = 1;
|
||||
SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW,
|
||||
&so_zero_copy_send, 0, "Enable zero copy send");
|
||||
#endif /* SOCKET_SEND_COW */
|
||||
#endif /* SOCKET_SEND_COW || SOCKET_RECV_PFLIP */
|
||||
|
||||
/*
|
||||
* accept_mtx locks down per-socket fields relating to accept queues. See
|
||||
* socketvar.h for an annotation of the protected fields of struct socket.
|
||||
@ -978,113 +962,6 @@ sodisconnect(struct socket *so)
|
||||
return (error);
|
||||
}
|
||||
|
||||
#ifdef SOCKET_SEND_COW
|
||||
struct so_zerocopy_stats{
|
||||
int size_ok;
|
||||
int align_ok;
|
||||
int found_ifp;
|
||||
};
|
||||
struct so_zerocopy_stats so_zerocp_stats = {0,0,0};
|
||||
|
||||
/*
|
||||
* sosend_copyin() is only used if zero copy sockets are enabled. Otherwise
|
||||
* sosend_dgram() and sosend_generic() use m_uiotombuf().
|
||||
*
|
||||
* sosend_copyin() accepts a uio and prepares an mbuf chain holding part or
|
||||
* all of the data referenced by the uio. If desired, it uses zero-copy.
|
||||
* *space will be updated to reflect data copied in.
|
||||
*
|
||||
* NB: If atomic I/O is requested, the caller must already have checked that
|
||||
* space can hold resid bytes.
|
||||
*
|
||||
* NB: In the event of an error, the caller may need to free the partial
|
||||
* chain pointed to by *mpp. The contents of both *uio and *space may be
|
||||
* modified even in the case of an error.
|
||||
*/
|
||||
static int
|
||||
sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
|
||||
int flags)
|
||||
{
|
||||
struct mbuf *m, **mp, *top;
|
||||
long len;
|
||||
ssize_t resid;
|
||||
int error;
|
||||
int cow_send;
|
||||
|
||||
*retmp = top = NULL;
|
||||
mp = ⊤
|
||||
len = 0;
|
||||
resid = uio->uio_resid;
|
||||
error = 0;
|
||||
do {
|
||||
cow_send = 0;
|
||||
if (resid >= MINCLSIZE) {
|
||||
if (top == NULL) {
|
||||
m = m_gethdr(M_WAITOK, MT_DATA);
|
||||
m->m_pkthdr.len = 0;
|
||||
m->m_pkthdr.rcvif = NULL;
|
||||
} else
|
||||
m = m_get(M_WAITOK, MT_DATA);
|
||||
if (so_zero_copy_send &&
|
||||
resid >= PAGE_SIZE &&
|
||||
*space >= PAGE_SIZE &&
|
||||
uio->uio_iov->iov_len >= PAGE_SIZE) {
|
||||
so_zerocp_stats.size_ok++;
|
||||
so_zerocp_stats.align_ok++;
|
||||
cow_send = socow_setup(m, uio);
|
||||
len = cow_send;
|
||||
}
|
||||
if (!cow_send) {
|
||||
m_clget(m, M_WAITOK);
|
||||
len = min(min(MCLBYTES, resid), *space);
|
||||
}
|
||||
} else {
|
||||
if (top == NULL) {
|
||||
m = m_gethdr(M_WAITOK, MT_DATA);
|
||||
m->m_pkthdr.len = 0;
|
||||
m->m_pkthdr.rcvif = NULL;
|
||||
|
||||
len = min(min(MHLEN, resid), *space);
|
||||
/*
|
||||
* For datagram protocols, leave room
|
||||
* for protocol headers in first mbuf.
|
||||
*/
|
||||
if (atomic && m && len < MHLEN)
|
||||
MH_ALIGN(m, len);
|
||||
} else {
|
||||
m = m_get(M_WAITOK, MT_DATA);
|
||||
len = min(min(MLEN, resid), *space);
|
||||
}
|
||||
}
|
||||
if (m == NULL) {
|
||||
error = ENOBUFS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*space -= len;
|
||||
if (cow_send)
|
||||
error = 0;
|
||||
else
|
||||
error = uiomove(mtod(m, void *), (int)len, uio);
|
||||
resid = uio->uio_resid;
|
||||
m->m_len = len;
|
||||
*mp = m;
|
||||
top->m_pkthdr.len += len;
|
||||
if (error)
|
||||
goto out;
|
||||
mp = &m->m_next;
|
||||
if (resid <= 0) {
|
||||
if (flags & MSG_EOR)
|
||||
top->m_flags |= M_EOR;
|
||||
break;
|
||||
}
|
||||
} while (*space > 0 && atomic);
|
||||
out:
|
||||
*retmp = top;
|
||||
return (error);
|
||||
}
|
||||
#endif /* SOCKET_SEND_COW */
|
||||
|
||||
#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
|
||||
|
||||
int
|
||||
@ -1094,9 +971,6 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
||||
long space;
|
||||
ssize_t resid;
|
||||
int clen = 0, error, dontroute;
|
||||
#ifdef SOCKET_SEND_COW
|
||||
int atomic = sosendallatonce(so) || top;
|
||||
#endif
|
||||
|
||||
KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM"));
|
||||
KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
|
||||
@ -1179,11 +1053,6 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
||||
if (flags & MSG_EOR)
|
||||
top->m_flags |= M_EOR;
|
||||
} else {
|
||||
#ifdef SOCKET_SEND_COW
|
||||
error = sosend_copyin(uio, &top, atomic, &space, flags);
|
||||
if (error)
|
||||
goto out;
|
||||
#else
|
||||
/*
|
||||
* Copy the data from userland into a mbuf chain.
|
||||
* If no data is to be copied in, a single empty mbuf
|
||||
@ -1196,7 +1065,6 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
||||
goto out;
|
||||
}
|
||||
space -= resid - uio->uio_resid;
|
||||
#endif /* SOCKET_SEND_COW */
|
||||
resid = uio->uio_resid;
|
||||
}
|
||||
KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
|
||||
@ -1368,12 +1236,6 @@ restart:
|
||||
if (flags & MSG_EOR)
|
||||
top->m_flags |= M_EOR;
|
||||
} else {
|
||||
#ifdef SOCKET_SEND_COW
|
||||
error = sosend_copyin(uio, &top, atomic,
|
||||
&space, flags);
|
||||
if (error != 0)
|
||||
goto release;
|
||||
#else
|
||||
/*
|
||||
* Copy the data from userland into a mbuf
|
||||
* chain. If no data is to be copied in,
|
||||
@ -1388,7 +1250,6 @@ restart:
|
||||
goto release;
|
||||
}
|
||||
space -= resid - uio->uio_resid;
|
||||
#endif /* SOCKET_SEND_COW */
|
||||
resid = uio->uio_resid;
|
||||
}
|
||||
if (dontroute) {
|
||||
@ -1480,20 +1341,6 @@ soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
|
||||
if (error)
|
||||
goto bad;
|
||||
do {
|
||||
#ifdef SOCKET_RECV_PFLIP
|
||||
if (so_zero_copy_receive) {
|
||||
int disposable;
|
||||
|
||||
if ((m->m_flags & M_EXT)
|
||||
&& (m->m_ext.ext_type == EXT_DISPOSABLE))
|
||||
disposable = 1;
|
||||
else
|
||||
disposable = 0;
|
||||
|
||||
error = uiomoveco(mtod(m, void *),
|
||||
min(uio->uio_resid, m->m_len), uio, disposable);
|
||||
} else
|
||||
#endif /* SOCKET_RECV_PFLIP */
|
||||
error = uiomove(mtod(m, void *),
|
||||
(int) min(uio->uio_resid, m->m_len), uio);
|
||||
m = m_free(m);
|
||||
@ -1816,20 +1663,6 @@ dontblock:
|
||||
SBLASTRECORDCHK(&so->so_rcv);
|
||||
SBLASTMBUFCHK(&so->so_rcv);
|
||||
SOCKBUF_UNLOCK(&so->so_rcv);
|
||||
#ifdef SOCKET_RECV_PFLIP
|
||||
if (so_zero_copy_receive) {
|
||||
int disposable;
|
||||
|
||||
if ((m->m_flags & M_EXT)
|
||||
&& (m->m_ext.ext_type == EXT_DISPOSABLE))
|
||||
disposable = 1;
|
||||
else
|
||||
disposable = 0;
|
||||
|
||||
error = uiomoveco(mtod(m, char *) + moff,
|
||||
(int)len, uio, disposable);
|
||||
} else
|
||||
#endif /* SOCKET_RECV_PFLIP */
|
||||
error = uiomove(mtod(m, char *) + moff, (int)len, uio);
|
||||
SOCKBUF_LOCK(&so->so_rcv);
|
||||
if (error) {
|
||||
|
@ -10,7 +10,7 @@ SRCS= cxgb_mc5.c cxgb_vsc8211.c cxgb_ael1002.c cxgb_mv88e1xxx.c
|
||||
SRCS+= cxgb_xgmac.c cxgb_vsc7323.c cxgb_t3_hw.c cxgb_main.c cxgb_aq100x.c
|
||||
SRCS+= cxgb_sge.c cxgb_tn1010.c
|
||||
SRCS+= device_if.h bus_if.h pci_if.h
|
||||
SRCS+= opt_inet.h opt_inet6.h opt_zero.h opt_sched.h
|
||||
SRCS+= opt_inet.h opt_inet6.h opt_sched.h
|
||||
SRCS+= uipc_mvec.c
|
||||
|
||||
CFLAGS+= -g -DDEFAULT_JUMBO -I${CXGB}
|
||||
|
@ -5,7 +5,7 @@ KMOD= sfxge
|
||||
SFXGE= ${.CURDIR}/../../dev/sfxge
|
||||
|
||||
SRCS= device_if.h bus_if.h pci_if.h
|
||||
SRCS+= opt_inet.h opt_zero.h opt_sched.h
|
||||
SRCS+= opt_inet.h opt_sched.h
|
||||
|
||||
.PATH: ${.CURDIR}/../../dev/sfxge
|
||||
SRCS+= sfxge.c sfxge_dma.c sfxge_ev.c
|
||||
|
@ -3,6 +3,6 @@
|
||||
.PATH: ${.CURDIR}/../../dev/ti
|
||||
|
||||
KMOD= if_ti
|
||||
SRCS= if_ti.c device_if.h bus_if.h pci_if.h opt_ti.h opt_zero.h
|
||||
SRCS= if_ti.c device_if.h bus_if.h pci_if.h opt_ti.h
|
||||
|
||||
.include <bsd.kmod.mk>
|
||||
|
@ -325,7 +325,6 @@ int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td);
|
||||
int soconnectat(int fd, struct socket *so, struct sockaddr *nam,
|
||||
struct thread *td);
|
||||
int soconnect2(struct socket *so1, struct socket *so2);
|
||||
int socow_setup(struct mbuf *m0, struct uio *uio);
|
||||
int socreate(int dom, struct socket **aso, int type, int proto,
|
||||
struct ucred *cred, struct thread *td);
|
||||
int sodisconnect(struct socket *so);
|
||||
|
@ -104,7 +104,6 @@ int uiomove_fromphys(struct vm_page *ma[], vm_offset_t offset, int n,
|
||||
struct uio *uio);
|
||||
int uiomove_nofault(void *cp, int n, struct uio *uio);
|
||||
int uiomove_object(struct vm_object *obj, off_t obj_size, struct uio *uio);
|
||||
int uiomoveco(void *cp, int n, struct uio *uio, int disposable);
|
||||
|
||||
#else /* !_KERNEL */
|
||||
|
||||
|
@ -333,24 +333,6 @@ RetryFault:;
|
||||
*/
|
||||
fs.m = vm_page_lookup(fs.object, fs.pindex);
|
||||
if (fs.m != NULL) {
|
||||
/*
|
||||
* check for page-based copy on write.
|
||||
* We check fs.object == fs.first_object so
|
||||
* as to ensure the legacy COW mechanism is
|
||||
* used when the page in question is part of
|
||||
* a shadow object. Otherwise, vm_page_cowfault()
|
||||
* removes the page from the backing object,
|
||||
* which is not what we want.
|
||||
*/
|
||||
vm_page_lock(fs.m);
|
||||
if ((fs.m->cow) &&
|
||||
(fault_type & VM_PROT_WRITE) &&
|
||||
(fs.object == fs.first_object)) {
|
||||
vm_page_cowfault(fs.m);
|
||||
unlock_and_deallocate(&fs);
|
||||
goto RetryFault;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait/Retry if the page is busy. We have to do this
|
||||
* if the page is either exclusive or shared busy
|
||||
@ -374,7 +356,6 @@ RetryFault:;
|
||||
* likely to reclaim it.
|
||||
*/
|
||||
vm_page_aflag_set(fs.m, PGA_REFERENCED);
|
||||
vm_page_unlock(fs.m);
|
||||
if (fs.object != fs.first_object) {
|
||||
if (!VM_OBJECT_TRYWLOCK(
|
||||
fs.first_object)) {
|
||||
@ -400,6 +381,7 @@ RetryFault:;
|
||||
vm_object_deallocate(fs.first_object);
|
||||
goto RetryFault;
|
||||
}
|
||||
vm_page_lock(fs.m);
|
||||
vm_page_remque(fs.m);
|
||||
vm_page_unlock(fs.m);
|
||||
|
||||
|
104
sys/vm/vm_page.c
104
sys/vm/vm_page.c
@ -674,8 +674,8 @@ vm_page_unhold(vm_page_t mem)
|
||||
{
|
||||
|
||||
vm_page_lock_assert(mem, MA_OWNED);
|
||||
KASSERT(mem->hold_count >= 1, ("vm_page_unhold: hold count < 0!!!"));
|
||||
--mem->hold_count;
|
||||
KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!"));
|
||||
if (mem->hold_count == 0 && (mem->flags & PG_UNHOLDFREE) != 0)
|
||||
vm_page_free_toq(mem);
|
||||
}
|
||||
@ -3108,108 +3108,6 @@ vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line)
|
||||
}
|
||||
#endif
|
||||
|
||||
int so_zerocp_fullpage = 0;
|
||||
|
||||
/*
|
||||
* Replace the given page with a copy. The copied page assumes
|
||||
* the portion of the given page's "wire_count" that is not the
|
||||
* responsibility of this copy-on-write mechanism.
|
||||
*
|
||||
* The object containing the given page must have a non-zero
|
||||
* paging-in-progress count and be locked.
|
||||
*/
|
||||
void
|
||||
vm_page_cowfault(vm_page_t m)
|
||||
{
|
||||
vm_page_t mnew;
|
||||
vm_object_t object;
|
||||
vm_pindex_t pindex;
|
||||
|
||||
vm_page_lock_assert(m, MA_OWNED);
|
||||
object = m->object;
|
||||
VM_OBJECT_ASSERT_WLOCKED(object);
|
||||
KASSERT(object->paging_in_progress != 0,
|
||||
("vm_page_cowfault: object %p's paging-in-progress count is zero.",
|
||||
object));
|
||||
pindex = m->pindex;
|
||||
|
||||
retry_alloc:
|
||||
mnew = vm_page_alloc(NULL, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ);
|
||||
if (mnew == NULL) {
|
||||
vm_page_unlock(m);
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
VM_WAIT;
|
||||
VM_OBJECT_WLOCK(object);
|
||||
if (m == vm_page_lookup(object, pindex)) {
|
||||
vm_page_lock(m);
|
||||
goto retry_alloc;
|
||||
} else {
|
||||
/*
|
||||
* Page disappeared during the wait.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (m->cow == 0) {
|
||||
/*
|
||||
* check to see if we raced with an xmit complete when
|
||||
* waiting to allocate a page. If so, put things back
|
||||
* the way they were
|
||||
*/
|
||||
vm_page_unlock(m);
|
||||
vm_page_lock(mnew);
|
||||
vm_page_free(mnew);
|
||||
vm_page_unlock(mnew);
|
||||
} else { /* clear COW & copy page */
|
||||
pmap_remove_all(m);
|
||||
mnew->object = object;
|
||||
if (object->memattr != VM_MEMATTR_DEFAULT &&
|
||||
(object->flags & OBJ_FICTITIOUS) == 0)
|
||||
pmap_page_set_memattr(mnew, object->memattr);
|
||||
if (vm_page_replace(mnew, object, pindex) != m)
|
||||
panic("vm_page_cowfault: invalid page replacement");
|
||||
if (!so_zerocp_fullpage)
|
||||
pmap_copy_page(m, mnew);
|
||||
mnew->valid = VM_PAGE_BITS_ALL;
|
||||
vm_page_dirty(mnew);
|
||||
mnew->wire_count = m->wire_count - m->cow;
|
||||
m->wire_count = m->cow;
|
||||
vm_page_unlock(m);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vm_page_cowclear(vm_page_t m)
|
||||
{
|
||||
|
||||
vm_page_lock_assert(m, MA_OWNED);
|
||||
if (m->cow) {
|
||||
m->cow--;
|
||||
/*
|
||||
* let vm_fault add back write permission lazily
|
||||
*/
|
||||
}
|
||||
/*
|
||||
* sf_buf_free() will free the page, so we needn't do it here
|
||||
*/
|
||||
}
|
||||
|
||||
int
|
||||
vm_page_cowsetup(vm_page_t m)
|
||||
{
|
||||
|
||||
vm_page_lock_assert(m, MA_OWNED);
|
||||
if ((m->flags & PG_FICTITIOUS) != 0 ||
|
||||
(m->oflags & VPO_UNMANAGED) != 0 ||
|
||||
m->cow == USHRT_MAX - 1 || !VM_OBJECT_TRYWLOCK(m->object))
|
||||
return (EBUSY);
|
||||
m->cow++;
|
||||
pmap_remove_write(m);
|
||||
VM_OBJECT_WUNLOCK(m->object);
|
||||
return (0);
|
||||
}
|
||||
|
||||
#ifdef INVARIANTS
|
||||
void
|
||||
vm_page_object_lock_assert(vm_page_t m)
|
||||
|
@ -142,23 +142,21 @@ struct vm_page {
|
||||
vm_pindex_t pindex; /* offset into object (O,P) */
|
||||
vm_paddr_t phys_addr; /* physical address of page */
|
||||
struct md_page md; /* machine dependant stuff */
|
||||
uint8_t queue; /* page queue index (P,Q) */
|
||||
int8_t segind;
|
||||
short hold_count; /* page hold count (P) */
|
||||
uint8_t order; /* index of the buddy queue */
|
||||
uint8_t pool;
|
||||
u_short cow; /* page cow mapping count (P) */
|
||||
u_int wire_count; /* wired down maps refs (P) */
|
||||
volatile u_int busy_lock; /* busy owners lock */
|
||||
uint16_t hold_count; /* page hold count (P) */
|
||||
uint16_t flags; /* page PG_* flags (P) */
|
||||
uint8_t aflags; /* access is atomic */
|
||||
uint8_t oflags; /* page VPO_* flags (O) */
|
||||
uint16_t flags; /* page PG_* flags (P) */
|
||||
uint8_t queue; /* page queue index (P,Q) */
|
||||
int8_t segind;
|
||||
uint8_t order; /* index of the buddy queue */
|
||||
uint8_t pool;
|
||||
u_char act_count; /* page usage count (P) */
|
||||
u_char __pad0; /* unused padding */
|
||||
/* NOTE that these must support one bit per DEV_BSIZE in a page */
|
||||
/* so, on normal X86 kernels, they must be at least 8 bits wide */
|
||||
vm_page_bits_t valid; /* map of valid DEV_BSIZE chunks (O) */
|
||||
vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */
|
||||
volatile u_int busy_lock; /* busy owners lock */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -482,9 +480,6 @@ vm_page_bits_t vm_page_bits(int base, int size);
|
||||
void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid);
|
||||
void vm_page_free_toq(vm_page_t m);
|
||||
void vm_page_zero_idle_wakeup(void);
|
||||
void vm_page_cowfault (vm_page_t);
|
||||
int vm_page_cowsetup(vm_page_t);
|
||||
void vm_page_cowclear (vm_page_t);
|
||||
|
||||
void vm_page_dirty_KBI(vm_page_t m);
|
||||
void vm_page_lock_KBI(vm_page_t m, const char *file, int line);
|
||||
|
Loading…
x
Reference in New Issue
Block a user