The soft and hard busy mechanism rely on the vm object lock to work.

Unify the 2 concept into a real, minimal, sxlock where the shared acquisition represent the soft busy and the exclusive acquisition represent the hard busy. The old VPO_WANTED mechanism becames the hard-path for this new lock and it becomes per-page rather than per-object. The vm_object lock becames an interlock for this functionality: it can be held in both read or write mode. However, if the vm_object lock is held in read mode while acquiring or releasing the busy state, the thread owner cannot make any assumption on the busy state unless it is also busying it. Also: - Add a new flag to directly shared busy pages while vm_page_alloc and vm_page_grab are being executed. This will be very helpful once these functions happen under a read object lock. - Move the swapping sleep into its own per-object flag The KPI is heavilly changed this is why the version is bumped. It is very likely that some VM ports users will need to change their own code. Sponsored by: EMC / Isilon storage division Discussed with: alc Reviewed by: jeff, kib Tested by: gavin, bapt (older version) Tested by: pho, scottl
2013-08-09 11:11:11 +00:00 · 2013-08-09 11:11:11 +00:00 · c7aebda8a1
commit c7aebda8a1
parent 8ddc3590cc
44 changed files with 876 additions and 729 deletions
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@ -332,11 +332,8 @@ MAN=	accept_filter.9 \
 	vm_page_grab.9 \
 	vm_page_hold.9 \
 	vm_page_insert.9 \
-	vm_page_io.9 \
 	vm_page_lookup.9 \
 	vm_page_rename.9 \
-	vm_page_sleep_if_busy.9 \
-	vm_page_wakeup.9 \
 	vm_page_wire.9 \
 	vm_set_page_size.9 \
 	vmem.9 \
@ -1465,6 +1462,21 @@ MLINKS+=vm_page_bits.9 vm_page_clear_dirty.9 \
 	vm_page_bits.9 vm_page_test_dirty.9 \
 	vm_page_bits.9 vm_page_undirty.9 \
 	vm_page_bits.9 vm_page_zero_invalid.9
+MLINKS+=vm_page_busy.9 vm_page_busied.9 \
+	vm_page_busy.9 vm_page_busy_downgrade.9 \
+	vm_page_busy.9 vm_page_busy_sleep.9 \
+	vm_page_busy.9 vm_page_sbusied.9 \
+	vm_page_busy.9 vm_page_sbusy.9 \
+	vm_page_busy.9 vm_page_sleep_if_busy.9 \
+	vm_page_busy.9 vm_page_sunbusy.9 \
+	vm_page_busy.9 vm_page_trysbusy.9 \
+	vm_page_busy.9 vm_page_tryxbusy.9 \
+	vm_page_busy.9 vm_page_xbusied.9 \
+	vm_page_busy.9 vm_page_xbusy.9 \
+	vm_page_busy.9 vm_page_xunbusy.9 \
+	vm_page_busy.9 vm_page_assert_sbusied.9 \
+	vm_page_busy.9 vm_page_assert_unbusied.9 \
+	vm_page_busy.9 vm_page_assert_xbusied.9
 MLINKS+=vm_page_aflag.9 vm_page_aflag_clear.9 \
 	vm_page_aflag.9 vm_page_aflag_set.9 \
 	vm_page_aflag.9 vm_page_reference.9
@ -1473,10 +1485,6 @@ MLINKS+=vm_page_free.9 vm_page_free_toq.9 \
 	vm_page_free.9 vm_page_try_to_free.9
 MLINKS+=vm_page_hold.9 vm_page_unhold.9
 MLINKS+=vm_page_insert.9 vm_page_remove.9
-MLINKS+=vm_page_io.9 vm_page_io_finish.9 \
-	vm_page_io.9 vm_page_io_start.9
-MLINKS+=vm_page_wakeup.9 vm_page_busy.9 \
-	vm_page_wakeup.9 vm_page_flash.9
 MLINKS+=vm_page_wire.9 vm_page_unwire.9
 MLINKS+=VOP_ACCESS.9 VOP_ACCESSX.9
 MLINKS+=VOP_ATTRIB.9 VOP_GETATTR.9 \
--- a/share/man/man9/VOP_GETPAGES.9
+++ b/share/man/man9/VOP_GETPAGES.9
@ -102,7 +102,7 @@ When the write completes, the completion callback should
 call
 .Xr vm_object_pip_wakeup 9
 and
-.Xr vm_page_io_finish 9
+.Xr vm_page_sunbusy 9
 to clear the busy flag and awaken any other threads waiting for this page,
 in addition to calling
 .Xr vm_page_undirty 9 .
@ -139,7 +139,7 @@ For example,
 .Fn VOP_GETPAGES
 may either activate a page (if its wanted bit is set)
 or deactivate it (otherwise), and finally call
-.Xr vm_page_wakeup 9
+.Xr vm_page_xunbusy 9
 to arouse any threads currently waiting for the page to be faulted in.
 .Sh RETURN VALUES
 If it successfully reads
@ -156,9 +156,9 @@ is
 .Sh SEE ALSO
 .Xr vm_object_pip_wakeup 9 ,
 .Xr vm_page_free 9 ,
-.Xr vm_page_io_finish 9 ,
+.Xr vm_pagge_sunbusy 9 ,
 .Xr vm_page_undirty 9 ,
-.Xr vm_page_wakeup 9 ,
+.Xr vm_page_xunbusy 9 ,
 .Xr vnode 9
 .Sh AUTHORS
 This manual page was written by
--- a/share/man/man9/vm_page_alloc.9
+++ b/share/man/man9/vm_page_alloc.9
@ -91,9 +91,7 @@ than zero.
 The optional flags are:
 .Bl -tag -width ".Dv VM_ALLOC_IFNOTCACHED"
 .It Dv VM_ALLOC_NOBUSY
-The returned page will not have the
-.Dv VPO_BUSY
-flag set.
+The returned page will not be exclusive busy.
 .It Dv VM_ALLOC_NODUMP
 The returned page will not be included in any kernel core dumps
 regardless of whether or not it is mapped in to KVA.
@ -112,6 +110,8 @@ Only allocate the page if it is not cached in the
 If the page at the specified
 .Fa pindex
 is cached, NULL is returned instead.
+.It Dv VM_ALLOC_SBUSY
+The returned page will be shared busy.
 .It Dv VM_ALLOC_WIRED
 The returned page will be wired.
 .It Dv VM_ALLOC_ZERO
--- a/share/man/man9/vm_page_busy.9
+++ b/share/man/man9/vm_page_busy.9
@ -0,0 +1,216 @@
+.\"
+.\" Copyright (c) 2013 EMC Corp.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+.\" DAMAGE.
+.\"
+.\" $FreeBSD$
+.Dd August 07, 2013
+.Dt vm_page_busy 9
+.Os
+.Sh NAME
+.Nm vm_page_busied ,
+.Nm vm_page_busy_downgrade ,
+.Nm vm_page_busy_sleep ,
+.Nm vm_page_sbusied ,
+.Nm vm_page_sbusy ,
+.Nm vm_page_sleep_if_busy ,
+.Nm vm_page_sunbusy ,
+.Nm vm_page_trysbusy ,
+.Nm vm_page_tryxbusy ,
+.Nm vm_page_xbusied ,
+.Nm vm_page_xbusy ,
+.Nm vm_page_xunbusy ,
+.Nm vm_page_assert_sbusied ,
+.Nm vm_page_assert_unbusied ,
+.Nm vm_page_assert_xbusied
+.Nd protect page identity changes and page content references
+.Sh SYNOPSIS
+.In sys/param.h
+.In vm/vm.h
+.In vm/vm_page.h
+.Ft int
+.Fn vm_page_busied "vm_page_t m"
+.Ft void
+.Fn vm_page_busy_downgrade "vm_page_t m"
+.Ft void
+.Fn vm_page_busy_sleep "vm_page_t m" "const char *msg"
+.Ft int
+.Fn vm_page_sbusied "vm_page_t m"
+.Ft void
+.Fn vm_page_sbusy "vm_page_t m"
+.Ft int
+.Fn vm_page_sleep_if_busy "vm_page_t m" "const char *msg"
+.Ft void
+.Fn vm_page_sunbusy "vm_page_t m"
+.Ft int
+.Fn vm_page_trysbusy "vm_page_t m"
+.Ft int
+.Fn vm_page_tryxbusy "vm_page_t m"
+.Ft int
+.Fn vm_page_xbusied "vm_page_t m"
+.Ft void
+.Fn vm_page_xbusy "vm_page_t m"
+.Ft void
+.Fn vm_page_xunbusy "vm_page_t m"
+.Pp
+.Cd "options INVARIANTS"
+.Cd "options INVARIANT_SUPPORT"
+.Ft void
+.Fn vm_page_assert_sbusied "vm_page_t m"
+.Ft void
+.Fn vm_page_assert_unbusied "vm_page_t m"
+.Ft void
+.Fn vm_page_assert_xbusied "vm_page_t m"
+.Sh DESCRIPTION
+Page identity is usually protected by higher level locks like vm_object
+locks and vm page locks.
+However, sometimes it is not possible to hold such locks for the time
+necessary to complete the identity change.
+In such case the page can be exclusively busied by a thread which needs
+to own the identity for a certain amount of time.
+.Pp
+In other situations, threads do not need to change the identity of the
+page but they want to prevent other threads from changing the identity
+themselves.
+For example, when a thread wants to access or update page contents
+without a lock held the page is shared busied.
+.Pp
+Before busing a page the vm_object lock must be held.
+The same rule applies when a page is unbusied.
+This makes the vm_object lock a real busy interlock.
+.Pp
+The
+.Fn vm_page_busied
+function returns non-zero if the current thread busied
+.Fa m
+in either exclusive or shared mode.
+Returns zero otherwise.
+.Pp
+The
+.Fn vm_page_busy_downgrade
+function must be used to downgrade
+.Fa m
+from an exclusive busy state to a shared busy state.
+.Pp
+The
+.Fn vm_page_busy_sleep
+function puts the invoking thread to sleep using the appropriate
+waitchannels for the busy mechanism.
+The parameter
+.Fa msg
+is a string describing the sleep condition for userland tools.
+.Pp
+The
+.Fn vm_page_busied
+function returns non-zero if the current thread busied
+.Fa m
+in shared mode.
+Returns zero otherwise.
+.Pp
+The
+.Fn vm_page_sbusy
+function shared busies
+.Fa m .
+.Pp
+The
+.Fn vm_page_sleep_if_busy
+function puts the invoking thread to sleep, using the appropriate
+waitchannels for the busy mechanism, if
+.Fa m .
+is busied in either exclusive or shared mode.
+If the invoking thread slept a non-zero value is returned, otherwise
+0 is returned.
+The parameter
+.Fa msg
+is a string describing the sleep condition for userland tools.
+.Pp
+The
+.Fn vm_page_sunbusy
+function shared unbusies
+.Fa m .
+.Pp
+The
+.Fn vm_page_trysbusy
+attempts to shared busy
+.Fa m .
+If the operation cannot immediately succeed
+.Fn vm_page_trysbusy
+returns 0, otherwise a non-zero value is returned.
+.Pp
+The
+.Fn vm_page_tryxbusy
+attempts to exclusive busy
+.Fa m .
+If the operation cannot immediately succeed
+.Fn vm_page_tryxbusy
+returns 0, otherwise a non-zero value is returned.
+.Pp
+The
+.Fn vm_page_xbusied
+function returns non-zero if the current thread busied
+.Fa m
+in exclusive mode.
+Returns zero otherwise.
+.Pp
+The
+.Fn vm_page_xbusy
+function exclusive busies
+.Fa m .
+.Pp
+The
+.Fn vm_page_xunbusy
+function exclusive unbusies
+.Fa m .
+Assertions on the busy state allow kernels compiled with
+.Cd "options INVARIANTS"
+and
+.Cd "options INVARIANT_SUPPORT"
+to panic if they are not respected.
+.Pp
+The
+.Fn vm_page_assert_sbusied
+function panics if
+.Fa m
+is not shared busied.
+.Pp
+The
+.Fn vm_page_assert_unbusied
+function panics if
+.Fa m
+is not unbusied.
+.Pp
+The
+.Fn vm_page_assert_xbusied
+function panics if
+.Fa m
+is not exclusive busied.
+.Sh SEE ALSO
+.Xr VOP_GETPAGES 9 ,
+.Xr vm_page_aflag 9 ,
+.Xr vm_page_alloc 9 ,
+.Xr vm_page_deactivate 9 ,
+.Xr vm_page_free 9 ,
+.Xr vm_page_grab 9 ,
+.Xr vm_page_insert 9 ,
+.Xr vm_page_lookup 9 ,
+.Xr vm_page_rename 9
--- a/share/man/man9/vm_page_io.9
+++ b/share/man/man9/vm_page_io.9
@ -1,65 +0,0 @@
-.\"
-.\" Copyright (C) 2001 Chad David <davidc@acns.ab.ca>. All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice(s), this list of conditions and the following disclaimer as
-.\"    the first lines of this file unmodified other than the possible
-.\"    addition of one or more copyright notices.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\"    notice(s), this list of conditions and the following disclaimer in the
-.\"    documentation and/or other materials provided with the distribution.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
-.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-.\" DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
-.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-.\" DAMAGE.
-.\"
-.\" $FreeBSD$
-.\"
-.Dd July 17, 2001
-.Dt VM_PAGE_IO_START 9
-.Os
-.Sh NAME
-.Nm vm_page_io_start ,
-.Nm vm_page_io_finish
-.Nd "ready or unready a page for I/O"
-.Sh SYNOPSIS
-.In sys/param.h
-.In vm/vm.h
-.In vm/vm_page.h
-.Ft void
-.Fn vm_page_io_start "vm_page_t m"
-.Ft void
-.Fn vm_page_io_finish "vm_page_t m"
-.Sh DESCRIPTION
-The
-.Fn vm_page_io_start
-function prepares the page for I/O by incrementing its busy flag by one.
-.Pp
-The
-.Fn vm_page_io_finish
-function lowers the busy count on the page by one, if the resulting busy
-count is zero, a
-.Xr wakeup 9
-will be issued if the page has been marked
-.Dv VPO_WANTED .
-A page is typically marked
-.Dv VPO_WANTED
-by a thread to register its interest in
-the page to either complete I/O or becoming available for general use.
-.Sh AUTHORS
-.An -nosplit
-This manual page was written by
-.An Chad David Aq davidc@acns.ab.ca
-and
-.An Alfred Perlstein Aq alfred@FreeBSD.org .
--- a/share/man/man9/vm_page_sleep_if_busy.9
+++ b/share/man/man9/vm_page_sleep_if_busy.9
@ -1,68 +0,0 @@
-.\"
-.\" Copyright (C) 2001 Chad David <davidc@acns.ab.ca>. All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice(s), this list of conditions and the following disclaimer as
-.\"    the first lines of this file unmodified other than the possible
-.\"    addition of one or more copyright notices.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\"    notice(s), this list of conditions and the following disclaimer in the
-.\"    documentation and/or other materials provided with the distribution.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
-.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-.\" DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
-.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-.\" DAMAGE.
-.\"
-.\" $FreeBSD$
-.\"
-.Dd July 13, 2001
-.Dt VM_PAGE_SLEEP_IF_BUSY 9
-.Os
-.Sh NAME
-.Nm vm_page_sleep_if_busy
-.Nd "wait for a busy page to become unbusy"
-.Sh SYNOPSIS
-.In sys/param.h
-.In vm/vm.h
-.In vm/vm_page.h
-.Ft int
-.Fn vm_page_sleep_if_busy "vm_page_t m" "int also_m_busy" "const char *wmesg"
-.Sh DESCRIPTION
-The
-.Fn vm_page_sleep_if_busy
-function waits until the
-.Dv VPO_BUSY
-flag is cleared.
-If
-.Fa also_m_busy
-is non-zero, it also waits for
-.Fa m->busy
-to become zero.
-.Sh RETURN VALUES
-If
-.Fn vm_page_sleep_if_busy
-finds the page busy it returns
-.Dv TRUE .
-If not, it returns
-.Dv FALSE .
-Returning
-.Dv TRUE
-does not necessary mean that
-.Fn vm_page_sleep_if_busy
-slept, but only that
-.Fn splvm
-was called.
-.Sh AUTHORS
-This manual page was written by
-.An Chad David Aq davidc@acns.ab.ca .
--- a/share/man/man9/vm_page_wakeup.9
+++ b/share/man/man9/vm_page_wakeup.9
@ -1,75 +0,0 @@
-.\"
-.\" Copyright (C) 2001 Chad David <davidc@acns.ab.ca>. All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice(s), this list of conditions and the following disclaimer as
-.\"    the first lines of this file unmodified other than the possible
-.\"    addition of one or more copyright notices.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\"    notice(s), this list of conditions and the following disclaimer in the
-.\"    documentation and/or other materials provided with the distribution.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
-.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-.\" DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
-.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-.\" DAMAGE.
-.\"
-.\" $FreeBSD$
-.\"
-.Dd July 14, 2001
-.Dt VM_PAGE_BUSY 9
-.Os
-.Sh NAME
-.Nm vm_page_busy ,
-.Nm vm_page_flash ,
-.Nm vm_page_wakeup
-.Nd "handle the busying and unbusying of a page"
-.Sh SYNOPSIS
-.In sys/param.h
-.In vm/vm.h
-.In vm/vm_page.h
-.Ft void
-.Fn vm_page_busy "vm_page_t m"
-.Ft void
-.Fn vm_page_flash "vm_page_t m"
-.Ft void
-.Fn vm_page_wakeup "vm_page_t m"
-.Sh DESCRIPTION
-These functions handle the busying, unbusying and notification of the unbusying
-of a page.
-.Pp
-.Fn vm_page_busy
-sets the
-.Dv VPO_BUSY
-flag in the page.
-.Pp
-.Fn vm_page_flash
-checks to see if there is anybody waiting on the page
-.Dv ( VPO_WANTED
-will be set), and if so, clears the
-.Dv VPO_WANTED
-flag and notifies whoever is waiting via
-.Fn wakeup .
-.Pp
-.Fn vm_page_wakeup
-clears the
-.Dv VPO_BUSY
-flag on the page, and calls
-.Fn vm_page_flash
-in case somebody has been waiting for it.
-.Sh SEE ALSO
-.Xr vm_page_sleep_if_busy 9 ,
-.Xr wakeup 9
-.Sh AUTHORS
-This manual page was written by
-.An Chad David Aq davidc@acns.ab.ca .
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@ -3498,7 +3498,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
 	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
 	    va >= kmi.clean_eva,
 	    ("pmap_enter: managed mapping within the clean submap"));
-	if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0)
+	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_WLOCKED(m->object);
 	pa = VM_PAGE_TO_PHYS(m);
 	newpte = (pt_entry_t)(pa | PG_A | PG_V);
@ -4609,13 +4609,12 @@ pmap_is_modified(vm_page_t m)
 	    ("pmap_is_modified: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	rw_wlock(&pvh_global_lock);
 	rv = pmap_is_modified_pvh(&m->md) ||
@ -4740,13 +4739,12 @@ pmap_remove_write(vm_page_t m)
 	    ("pmap_remove_write: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
-	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
-	 * is clear, no page table entries need updating.
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
+	 * set by another thread while the object is locked.  Thus,
+	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	if ((m->flags & PG_FICTITIOUS) != 0)
@ -4924,13 +4922,13 @@ pmap_clear_modify(vm_page_t m)
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT((m->oflags & VPO_BUSY) == 0,
-	    ("pmap_clear_modify: page %p is busy", m));
+	KASSERT(!vm_page_xbusied(m),
+	    ("pmap_clear_modify: page %p is exclusive busied", m));

 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
 	 * If the object containing the page is locked and the page is not
-	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
+	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
--- a/sys/arm/arm/pmap-v6.c
+++ b/sys/arm/arm/pmap-v6.c
@ -2670,8 +2670,8 @@ pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
 		pa = systempage.pv_pa;
 		m = NULL;
 	} else {
-		KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
-		    (flags & M_NOWAIT) != 0,
+		KASSERT((m->oflags & VPO_UNMANAGED) != 0 ||
+		    vm_page_xbusied(m) || (flags & M_NOWAIT) != 0,
 		    ("pmap_enter_locked: page %p is not busy", m));
 		pa = VM_PAGE_TO_PHYS(m);
 	}
@ -3934,13 +3934,12 @@ pmap_is_modified(vm_page_t m)
 	    ("pmap_is_modified: page %p is not managed", m));
 	rv = FALSE;
 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (rv);
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
@ -3968,13 +3967,13 @@ pmap_clear_modify(vm_page_t m)
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT((m->oflags & VPO_BUSY) == 0,
-	    ("pmap_clear_modify: page %p is busy", m));
+	KASSERT(!vm_page_xbusied(m),
+	    ("pmap_clear_modify: page %p is exclusive busied", m));

 	/*
 	 * If the page is not PGA_WRITEABLE, then no mappings can be modified.
 	 * If the object containing the page is locked and the page is not
-	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
+	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
@ -4009,13 +4008,12 @@ pmap_remove_write(vm_page_t m)
 	    ("pmap_remove_write: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
-	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
-	 * is clear, no page table entries need updating.
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
+	 * set by another thread while the object is locked.  Thus,
+	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) != 0 ||
-	    (m->aflags & PGA_WRITEABLE) != 0)
+	if (vm_page_xbusied(m) || (m->aflags & PGA_WRITEABLE) != 0)
 		pmap_clearbit(m, PVF_WRITE);
 }

--- a/sys/arm/arm/pmap.c
+++ b/sys/arm/arm/pmap.c
@ -3319,8 +3319,8 @@ pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 		pa = systempage.pv_pa;
 		m = NULL;
 	} else {
-		KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
-		    (flags & M_NOWAIT) != 0,
+		KASSERT((m->oflags & VPO_UNMANAGED) != 0 ||
+		    vm_page_xbusied(m) || (flags & M_NOWAIT) != 0,
 		    ("pmap_enter_locked: page %p is not busy", m));
 		pa = VM_PAGE_TO_PHYS(m);
 	}
@ -4555,13 +4555,13 @@ pmap_clear_modify(vm_page_t m)
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT((m->oflags & VPO_BUSY) == 0,
-	    ("pmap_clear_modify: page %p is busy", m));
+	KASSERT(!vm_page_xbusied(m),
+	    ("pmap_clear_modify: page %p is exclusive busied", m));

 	/*
 	 * If the page is not PGA_WRITEABLE, then no mappings can be modified.
 	 * If the object containing the page is locked and the page is not
-	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
+	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
@ -4612,13 +4612,12 @@ pmap_remove_write(vm_page_t m)
 	    ("pmap_remove_write: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
-	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
-	 * is clear, no page table entries need updating.
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
+	 * set by another thread while the object is locked.  Thus,
+	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) != 0 ||
-	    (m->aflags & PGA_WRITEABLE) != 0)
+	if (vm_page_xbusied(m) || (m->aflags & PGA_WRITEABLE) != 0)
 		pmap_clearbit(m, PVF_WRITE);
 }

--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
@ -335,20 +335,24 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
 	for (;;) {
 		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
 		    pp->valid) {
-			if ((pp->oflags & VPO_BUSY) != 0) {
+			if (vm_page_xbusied(pp)) {
 				/*
 				 * Reference the page before unlocking and
 				 * sleeping so that the page daemon is less
 				 * likely to reclaim it.
 				 */
 				vm_page_reference(pp);
-				vm_page_sleep(pp, "zfsmwb");
+				vm_page_lock(pp);
+				zfs_vmobject_wunlock(obj);
+				vm_page_busy_sleep(pp, "zfsmwb");
+				zfs_vmobject_wlock(obj);
 				continue;
 			}
+			vm_page_sbusy(pp);
 		} else if (pp == NULL) {
 			pp = vm_page_alloc(obj, OFF_TO_IDX(start),
 			    VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED |
-			    VM_ALLOC_NOBUSY);
+			    VM_ALLOC_SBUSY);
 		} else {
 			ASSERT(pp != NULL && !pp->valid);
 			pp = NULL;
@ -357,7 +361,6 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
 		if (pp != NULL) {
 			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
 			vm_object_pip_add(obj, 1);
-			vm_page_io_start(pp);
 			pmap_remove_write(pp);
 			vm_page_clear_dirty(pp, off, nbytes);
 		}
@ -370,7 +373,7 @@ static void
 page_unbusy(vm_page_t pp)
 {

-	vm_page_io_finish(pp);
+	vm_page_sunbusy(pp);
 	vm_object_pip_subtract(pp->object, 1);
 }

@ -386,14 +389,17 @@ page_hold(vnode_t *vp, int64_t start)
 	for (;;) {
 		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
 		    pp->valid) {
-			if ((pp->oflags & VPO_BUSY) != 0) {
+			if (vm_page_xbusied(pp)) {
 				/*
 				 * Reference the page before unlocking and
 				 * sleeping so that the page daemon is less
 				 * likely to reclaim it.
 				 */
 				vm_page_reference(pp);
-				vm_page_sleep(pp, "zfsmwb");
+				vm_page_lock(pp);
+				zfs_vmobject_wunlock(obj);
+				vm_page_busy_sleep(pp, "zfsmwb");
+				zfs_vmobject_wlock(obj);
 				continue;
 			}

@ -467,7 +473,7 @@ update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid,
 			    ("zfs update_pages: unaligned data in putpages case"));
 			KASSERT(pp->valid == VM_PAGE_BITS_ALL,
 			    ("zfs update_pages: invalid page in putpages case"));
-			KASSERT(pp->busy > 0,
+			KASSERT(vm_page_sbusied(pp),
 			    ("zfs update_pages: unbusy page in putpages case"));
 			KASSERT(!pmap_page_is_write_mapped(pp),
 			    ("zfs update_pages: writable page in putpages case"));
@ -503,7 +509,7 @@ update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid,
 * ZFS to populate a range of page cache pages with data.
 *
 * NOTE: this function could be optimized to pre-allocate
- * all pages in advance, drain VPO_BUSY on all of them,
+ * all pages in advance, drain exclusive busy on all of them,
 * map them into contiguous KVA region and populate them
 * in one single dmu_read() call.
 */
@ -531,10 +537,9 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio)
 	for (start = uio->uio_loffset; len > 0; start += PAGESIZE) {
 		int bytes = MIN(PAGESIZE, len);

-		pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_NOBUSY |
+		pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY |
 		    VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_IGN_SBUSY);
 		if (pp->valid == 0) {
-			vm_page_io_start(pp);
 			zfs_vmobject_wunlock(obj);
 			va = zfs_map_page(pp, &sf);
 			error = dmu_read(os, zp->z_id, start, bytes, va,
@ -543,18 +548,19 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio)
 				bzero(va + bytes, PAGESIZE - bytes);
 			zfs_unmap_page(sf);
 			zfs_vmobject_wlock(obj);
-			vm_page_io_finish(pp);
+			vm_page_sunbusy(pp);
 			vm_page_lock(pp);
 			if (error) {
 				if (pp->wire_count == 0 && pp->valid == 0 &&
-				    pp->busy == 0 && !(pp->oflags & VPO_BUSY))
+				    !vm_page_busied(pp))
 					vm_page_free(pp);
 			} else {
 				pp->valid = VM_PAGE_BITS_ALL;
 				vm_page_activate(pp);
 			}
 			vm_page_unlock(pp);
-		}
+		} else
+			vm_page_sunbusy(pp);
 		if (error)
 			break;
 		uio->uio_resid -= bytes;
--- a/sys/dev/agp/agp.c
+++ b/sys/dev/agp/agp.c
@ -600,7 +600,7 @@ agp_generic_bind_memory(device_t dev, struct agp_memory *mem,
 				goto bad;
 			}
 		}
-		vm_page_wakeup(m);
+		vm_page_xunbusy(m);
 	}
 	VM_OBJECT_WUNLOCK(mem->am_obj);

@ -627,7 +627,7 @@ bad:
 	for (k = 0; k < mem->am_size; k += PAGE_SIZE) {
 		m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(k));
 		if (k >= i)
-			vm_page_wakeup(m);
+			vm_page_xunbusy(m);
 		vm_page_lock(m);
 		vm_page_unwire(m, 0);
 		vm_page_unlock(m);
--- a/sys/dev/drm2/i915/i915_gem.c
+++ b/sys/dev/drm2/i915/i915_gem.c
@ -1356,9 +1356,8 @@ i915_gem_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot,
 		*mres = NULL;
 	} else
 		oldm = NULL;
-retry:
 	VM_OBJECT_WUNLOCK(vm_obj);
-unlocked_vmobj:
+retry:
 	cause = ret = 0;
 	m = NULL;

@ -1379,9 +1378,11 @@ unlocked_vmobj:
 	VM_OBJECT_WLOCK(vm_obj);
 	m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
 	if (m != NULL) {
-		if ((m->flags & VPO_BUSY) != 0) {
+		if (vm_page_busied(m)) {
 			DRM_UNLOCK(dev);
-			vm_page_sleep(m, "915pee");
+			vm_page_lock(m);
+			VM_OBJECT_WUNLOCK(vm_obj);
+			vm_page_busy_sleep(m, "915pee");
 			goto retry;
 		}
 		goto have_page;
@ -1435,16 +1436,18 @@ unlocked_vmobj:
 	    ("not fictitious %p", m));
 	KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));

-	if ((m->flags & VPO_BUSY) != 0) {
+	if (vm_page_busied(m)) {
 		DRM_UNLOCK(dev);
-		vm_page_sleep(m, "915pbs");
+		vm_page_lock(m);
+		VM_OBJECT_WUNLOCK(vm_obj);
+		vm_page_busy_sleep(m, "915pbs");
 		goto retry;
 	}
 	m->valid = VM_PAGE_BITS_ALL;
 	vm_page_insert(m, vm_obj, OFF_TO_IDX(offset));
 have_page:
 	*mres = m;
-	vm_page_busy(m);
+	vm_page_xbusy(m);

 	CTR4(KTR_DRM, "fault %p %jx %x phys %x", gem_obj, offset, prot,
 	    m->phys_addr);
@ -1465,7 +1468,7 @@ out:
 	    -ret, cause);
 	if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) {
 		kern_yield(PRI_USER);
-		goto unlocked_vmobj;
+		goto retry;
 	}
 	VM_OBJECT_WLOCK(vm_obj);
 	vm_object_pip_wakeup(vm_obj);
@ -2330,7 +2333,7 @@ retry:
 			m = vm_page_lookup(devobj, i);
 			if (m == NULL)
 				continue;
-			if (vm_page_sleep_if_busy(m, true, "915unm"))
+			if (vm_page_sleep_if_busy(m, "915unm"))
 				goto retry;
 			cdev_pager_free_page(devobj, m);
 		}
@ -2504,10 +2507,8 @@ i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex)
 	int rv;

 	VM_OBJECT_ASSERT_WLOCKED(object);
-	m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY |
-	    VM_ALLOC_RETRY);
+	m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 	if (m->valid != VM_PAGE_BITS_ALL) {
-		vm_page_busy(m);
 		if (vm_pager_has_page(object, pindex, NULL, NULL)) {
 			rv = vm_pager_get_pages(object, &m, 1, 0);
 			m = vm_page_lookup(object, pindex);
@ -2524,11 +2525,11 @@ i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex)
 			m->valid = VM_PAGE_BITS_ALL;
 			m->dirty = 0;
 		}
-		vm_page_wakeup(m);
 	}
 	vm_page_lock(m);
 	vm_page_wire(m);
 	vm_page_unlock(m);
+	vm_page_xunbusy(m);
 	atomic_add_long(&i915_gem_wired_pages_cnt, 1);
 	return (m);
 }
--- a/sys/dev/drm2/ttm/ttm_bo_vm.c
+++ b/sys/dev/drm2/ttm/ttm_bo_vm.c
@ -212,8 +212,11 @@ reserve:
 	}

 	VM_OBJECT_WLOCK(vm_obj);
-	if ((m->flags & VPO_BUSY) != 0) {
-		vm_page_sleep(m, "ttmpbs");
+	if (vm_page_busied(m)) {
+		vm_page_lock(m);
+		VM_OBJECT_WUNLOCK(vm_obj);
+		vm_page_busy_sleep(m, "ttmpbs");
+		VM_OBJECT_WLOCK(vm_obj);
 		ttm_mem_io_unlock(man);
 		ttm_bo_unreserve(bo);
 		goto retry;
@ -228,7 +231,7 @@ reserve:
 		    ("inconsistent insert bo %p m %p m1 %p offset %jx",
 		    bo, m, m1, (uintmax_t)offset));
 	}
-	vm_page_busy(m);
+	vm_page_xbusy(m);

 	if (oldm != NULL) {
 		vm_page_lock(oldm);
--- a/sys/dev/drm2/ttm/ttm_tt.c
+++ b/sys/dev/drm2/ttm/ttm_tt.c
@ -288,10 +288,8 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
 	VM_OBJECT_WLOCK(obj);
 	vm_object_pip_add(obj, 1);
 	for (i = 0; i < ttm->num_pages; ++i) {
-		from_page = vm_page_grab(obj, i, VM_ALLOC_NOBUSY |
-		    VM_ALLOC_RETRY);
+		from_page = vm_page_grab(obj, i, VM_ALLOC_RETRY);
 		if (from_page->valid != VM_PAGE_BITS_ALL) {
-			vm_page_busy(from_page);
 			if (vm_pager_has_page(obj, i, NULL, NULL)) {
 				rv = vm_pager_get_pages(obj, &from_page, 1, 0);
 				if (rv != VM_PAGER_OK) {
@ -303,8 +301,8 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
 				}
 			} else
 				vm_page_zero_invalid(from_page, TRUE);
-			vm_page_wakeup(from_page);
 		}
+		vm_page_xunbusy(from_page);
 		to_page = ttm->pages[i];
 		if (unlikely(to_page == NULL)) {
 			ret = -ENOMEM;
@ -357,7 +355,7 @@ int ttm_tt_swapout(struct ttm_tt *ttm, vm_object_t persistent_swap_storage)
 		pmap_copy_page(from_page, to_page);
 		vm_page_dirty(to_page);
 		to_page->valid = VM_PAGE_BITS_ALL;
-		vm_page_wakeup(to_page);
+		vm_page_xunbusy(to_page);
 	}
 	vm_object_pip_wakeup(obj);
 	VM_OBJECT_WUNLOCK(obj);
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@ -834,7 +834,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
 			else
 				rv = vm_pager_get_pages(sc->object, &m, 1, 0);
 			if (rv == VM_PAGER_ERROR) {
-				vm_page_wakeup(m);
+				vm_page_xunbusy(m);
 				break;
 			} else if (rv == VM_PAGER_FAIL) {
 				/*
@ -859,7 +859,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
 			else
 				rv = VM_PAGER_OK;
 			if (rv == VM_PAGER_ERROR) {
-				vm_page_wakeup(m);
+				vm_page_xunbusy(m);
 				break;
 			}
 			if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
@ -875,7 +875,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
 			else
 				rv = VM_PAGER_OK;
 			if (rv == VM_PAGER_ERROR) {
-				vm_page_wakeup(m);
+				vm_page_xunbusy(m);
 				break;
 			}
 			if (len != PAGE_SIZE) {
@ -885,7 +885,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
 			} else
 				vm_pager_page_unswapped(m);
 		}
-		vm_page_wakeup(m);
+		vm_page_xunbusy(m);
 		vm_page_lock(m);
 		if (bp->bio_cmd == BIO_DELETE && len == PAGE_SIZE)
 			vm_page_free(m);
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@ -1854,36 +1854,8 @@ fuse_vnop_getpages(struct vop_getpages_args *ap)
 			 */
 			;
 		}
-		if (i != ap->a_reqpage) {
-			/*
-			 * Whether or not to leave the page activated is up in
-			 * the air, but we should put the page on a page queue
-			 * somewhere (it already is in the object).  Result:
-			 * It appears that emperical results show that
-			 * deactivating pages is best.
-			 */
-
-			/*
-			 * Just in case someone was asking for this page we
-			 * now tell them that it is ok to use.
-			 */
-			if (!error) {
-				if (m->oflags & VPO_WANTED) {
-					fuse_vm_page_lock(m);
-					vm_page_activate(m);
-					fuse_vm_page_unlock(m);
-				} else {
-					fuse_vm_page_lock(m);
-					vm_page_deactivate(m);
-					fuse_vm_page_unlock(m);
-				}
-				vm_page_wakeup(m);
-			} else {
-				fuse_vm_page_lock(m);
-				vm_page_free(m);
-				fuse_vm_page_unlock(m);
-			}
-		}
+		if (i != ap->a_reqpage)
+			vm_page_readahead_finish(m);
 	}
 	fuse_vm_page_unlock_queues();
 	VM_OBJECT_WUNLOCK(vp->v_object);
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@ -1355,11 +1355,8 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr)
 retry:
 			m = vm_page_lookup(uobj, idx);
 			if (m != NULL) {
-				if ((m->oflags & VPO_BUSY) != 0 ||
-				    m->busy != 0) {
-					vm_page_sleep(m, "tmfssz");
+				if (vm_page_sleep_if_busy(m, "tmfssz"))
 					goto retry;
-				}
 				MPASS(m->valid == VM_PAGE_BITS_ALL);
 			} else if (vm_pager_has_page(uobj, idx, NULL, NULL)) {
 				m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL);
@ -1379,7 +1376,7 @@ retry:
 				if (rv == VM_PAGER_OK) {
 					vm_page_deactivate(m);
 					vm_page_unlock(m);
-					vm_page_wakeup(m);
+					vm_page_xunbusy(m);
 				} else {
 					vm_page_free(m);
 					vm_page_unlock(m);
--- a/sys/fs/tmpfs/tmpfs_vnops.c
+++ b/sys/fs/tmpfs/tmpfs_vnops.c
@ -449,7 +449,7 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx,

 	/*
 	 * Parallel reads of the page content from disk are prevented
-	 * by VPO_BUSY.
+	 * by exclusive busy.
 	 *
 	 * Although the tmpfs vnode lock is held here, it is
 	 * nonetheless safe to sleep waiting for a free page.  The
@ -457,10 +457,8 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx,
 	 * lock to page out tobj's pages because tobj is a OBJT_SWAP
 	 * type object.
 	 */
-	m = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY |
-	    VM_ALLOC_NOBUSY);
+	m = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 	if (m->valid != VM_PAGE_BITS_ALL) {
-		vm_page_busy(m);
 		if (vm_pager_has_page(tobj, idx, NULL, NULL)) {
 			rv = vm_pager_get_pages(tobj, &m, 1, 0);
 			m = vm_page_lookup(tobj, idx);
@ -483,8 +481,8 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx,
 			}
 		} else
 			vm_page_zero_invalid(m, TRUE);
-		vm_page_wakeup(m);
 	}
+	vm_page_xunbusy(m);
 	vm_page_lock(m);
 	vm_page_hold(m);
 	vm_page_unlock(m);
@ -574,10 +572,8 @@ tmpfs_mappedwrite(vm_object_t tobj, size_t len, struct uio *uio)
 	tlen = MIN(PAGE_SIZE - offset, len);

 	VM_OBJECT_WLOCK(tobj);
-	tpg = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY |
-	    VM_ALLOC_RETRY);
+	tpg = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 	if (tpg->valid != VM_PAGE_BITS_ALL) {
-		vm_page_busy(tpg);
 		if (vm_pager_has_page(tobj, idx, NULL, NULL)) {
 			rv = vm_pager_get_pages(tobj, &tpg, 1, 0);
 			tpg = vm_page_lookup(tobj, idx);
@ -600,8 +596,8 @@ tmpfs_mappedwrite(vm_object_t tobj, size_t len, struct uio *uio)
 			}
 		} else
 			vm_page_zero_invalid(tpg, TRUE);
-		vm_page_wakeup(tpg);
 	}
+	vm_page_xunbusy(tpg);
 	vm_page_lock(tpg);
 	vm_page_hold(tpg);
 	vm_page_unlock(tpg);
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@ -3459,7 +3459,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
 	KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
 	    ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
 	    va));
-	if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0)
+	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_WLOCKED(m->object);

 	mpte = NULL;
@ -4553,13 +4553,12 @@ pmap_is_modified(vm_page_t m)
 	    ("pmap_is_modified: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	rw_wlock(&pvh_global_lock);
 	rv = pmap_is_modified_pvh(&m->md) ||
@ -4688,13 +4687,12 @@ pmap_remove_write(vm_page_t m)
 	    ("pmap_remove_write: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
-	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
-	 * is clear, no page table entries need updating.
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
+	 * set by another thread while the object is locked.  Thus,
+	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
@ -4845,13 +4843,13 @@ pmap_clear_modify(vm_page_t m)
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT((m->oflags & VPO_BUSY) == 0,
-	    ("pmap_clear_modify: page %p is busy", m));
+	KASSERT(!vm_page_xbusied(m),
+	    ("pmap_clear_modify: page %p is exclusive busied", m));

 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
 	 * If the object containing the page is locked and the page is not
-	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
+	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
--- a/sys/i386/xen/pmap.c
+++ b/sys/i386/xen/pmap.c
@ -2665,7 +2665,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
 	KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
 	    ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
 	    va));
-	if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0)
+	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_WLOCKED(m->object);

 	mpte = NULL;
@ -3694,13 +3694,12 @@ pmap_is_modified(vm_page_t m)
 	rv = FALSE;

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (rv);
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
@ -3825,13 +3824,12 @@ pmap_remove_write(vm_page_t m)
 	    ("pmap_remove_write: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
-	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
-	 * is clear, no page table entries need updating.
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
+	 * set by another thread while the object is locked.  Thus,
+	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
@ -3931,13 +3929,13 @@ pmap_clear_modify(vm_page_t m)
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT((m->oflags & VPO_BUSY) == 0,
-	    ("pmap_clear_modify: page %p is busy", m));
+	KASSERT(!vm_page_xbusied(m),
+	    ("pmap_clear_modify: page %p is exclusive busied", m));

 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
 	 * If the object containing the page is locked and the page is not
-	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
+	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
--- a/sys/ia64/ia64/pmap.c
+++ b/sys/ia64/ia64/pmap.c
@ -1677,7 +1677,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,

 	va &= ~PAGE_MASK;
 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
-	KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0,
+	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_xbusied(m),
 	    ("pmap_enter: page %p is not busy", m));

 	/*
@ -2234,13 +2234,12 @@ pmap_is_modified(vm_page_t m)
 	rv = FALSE;

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can be dirty.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (rv);
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
@ -2323,13 +2322,13 @@ pmap_clear_modify(vm_page_t m)
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT((m->oflags & VPO_BUSY) == 0,
-	    ("pmap_clear_modify: page %p is busy", m));
+	KASSERT(!vm_page_xbusied(m),
+	    ("pmap_clear_modify: page %p is exclusive busied", m));

 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can be modified.
 	 * If the object containing the page is locked and the page is not
-	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
+	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
@ -2396,13 +2395,12 @@ pmap_remove_write(vm_page_t m)
 	    ("pmap_remove_write: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
-	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
-	 * is clear, no page table entries need updating.
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
+	 * set by another thread while the object is locked.  Thus,
+	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@ -937,10 +937,8 @@ exec_map_first_page(imgp)
 		object->pg_color = 0;
 	}
 #endif
-	ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY |
-	    VM_ALLOC_RETRY);
+	ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 	if (ma[0]->valid != VM_PAGE_BITS_ALL) {
-		vm_page_busy(ma[0]);
 		initial_pagein = VM_INITIAL_PAGEIN;
 		if (initial_pagein > object->size)
 			initial_pagein = object->size;
@ -948,9 +946,8 @@ exec_map_first_page(imgp)
 			if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) {
 				if (ma[i]->valid)
 					break;
-				if ((ma[i]->oflags & VPO_BUSY) || ma[i]->busy)
+				if (vm_page_tryxbusy(ma[i]))
 					break;
-				vm_page_busy(ma[i]);
 			} else {
 				ma[i] = vm_page_alloc(object, i,
 				    VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED);
@ -970,8 +967,8 @@ exec_map_first_page(imgp)
 			VM_OBJECT_WUNLOCK(object);
 			return (EIO);
 		}
-		vm_page_wakeup(ma[0]);
 	}
+	vm_page_xunbusy(ma[0]);
 	vm_page_lock(ma[0]);
 	vm_page_hold(ma[0]);
 	vm_page_unlock(ma[0]);
--- a/sys/kern/subr_uio.c
+++ b/sys/kern/subr_uio.c
@ -107,7 +107,7 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr)
 	VM_OBJECT_WLOCK(uobject);
 retry:
 	if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) {
-		if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco"))
+		if (vm_page_sleep_if_busy(user_pg, "vm_pgmoveco"))
 			goto retry;
 		vm_page_lock(user_pg);
 		pmap_remove_all(user_pg);
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@ -281,11 +281,8 @@ shm_dotruncate(struct shmfd *shmfd, off_t length)
 retry:
 			m = vm_page_lookup(object, idx);
 			if (m != NULL) {
-				if ((m->oflags & VPO_BUSY) != 0 ||
-				    m->busy != 0) {
-					vm_page_sleep(m, "shmtrc");
+				if (vm_page_sleep_if_busy(m, "shmtrc"))
 					goto retry;
-				}
 			} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
 				m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL);
 				if (m == NULL) {
@ -305,7 +302,7 @@ retry:
 				if (rv == VM_PAGER_OK) {
 					vm_page_deactivate(m);
 					vm_page_unlock(m);
-					vm_page_wakeup(m);
+					vm_page_xunbusy(m);
 				} else {
 					vm_page_free(m);
 					vm_page_unlock(m);
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@ -2272,7 +2272,7 @@ retry_space:
 				 * then free it.
 				 */
 				if (pg->wire_count == 0 && pg->valid == 0 &&
-				    pg->busy == 0 && !(pg->oflags & VPO_BUSY))
+				    !vm_page_busied(pg))
 					vm_page_free(pg);
 				vm_page_unlock(pg);
 				VM_OBJECT_WUNLOCK(obj);
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@ -584,7 +584,7 @@ vfs_buf_test_cache(struct buf *bp,
 		  vm_page_t m)
 {

-	VM_OBJECT_ASSERT_WLOCKED(m->object);
+	VM_OBJECT_ASSERT_LOCKED(m->object);
 	if (bp->b_flags & B_CACHE) {
 		int base = (foff + off) & PAGE_MASK;
 		if (vm_page_is_valid(m, base, size) == 0)
@ -1852,26 +1852,19 @@ vfs_vmio_release(struct buf *bp)
 		 */
 		vm_page_lock(m);
 		vm_page_unwire(m, 0);
+
 		/*
-		 * We don't mess with busy pages, it is
-		 * the responsibility of the process that
-		 * busied the pages to deal with them.
+		 * Might as well free the page if we can and it has
+		 * no valid data.  We also free the page if the
+		 * buffer was used for direct I/O
 		 */
-		if ((m->oflags & VPO_BUSY) == 0 && m->busy == 0 &&
-		    m->wire_count == 0) {
-			/*
-			 * Might as well free the page if we can and it has
-			 * no valid data.  We also free the page if the
-			 * buffer was used for direct I/O
-			 */
-			if ((bp->b_flags & B_ASYNC) == 0 && !m->valid) {
+		if ((bp->b_flags & B_ASYNC) == 0 && !m->valid) {
+			if (m->wire_count == 0 && !vm_page_busied(m))
 				vm_page_free(m);
-			} else if (bp->b_flags & B_DIRECT) {
-				vm_page_try_to_free(m);
-			} else if (buf_vm_page_count_severe()) {
-				vm_page_try_to_cache(m);
-			}
-		}
+		} else if (bp->b_flags & B_DIRECT)
+			vm_page_try_to_free(m);
+		else if (buf_vm_page_count_severe())
+			vm_page_try_to_cache(m);
 		vm_page_unlock(m);
 	}
 	VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
@ -3450,7 +3443,7 @@ allocbuf(struct buf *bp, int size)
 					m = bp->b_pages[i];
 					KASSERT(m != bogus_page,
 					    ("allocbuf: bogus page found"));
-					while (vm_page_sleep_if_busy(m, TRUE,
+					while (vm_page_sleep_if_busy(m,
 					    "biodep"))
 						continue;

@ -3489,10 +3482,10 @@ allocbuf(struct buf *bp, int size)
 				 * here could interfere with paging I/O, no
 				 * matter which process we are.
 				 *
-				 * We can only test VPO_BUSY here.  Blocking on
-				 * m->busy might lead to a deadlock:
-				 *  vm_fault->getpages->cluster_read->allocbuf
-				 * Thus, we specify VM_ALLOC_IGN_SBUSY.
+				 * Only exclusive busy can be tested here.
+				 * Blocking on shared busy might lead to
+				 * deadlocks once allocbuf() is called after
+				 * pages are vfs_busy_pages().
 				 */
 				m = vm_page_grab(obj, OFF_TO_IDX(bp->b_offset) +
 				    bp->b_npages, VM_ALLOC_NOBUSY |
@ -3852,7 +3845,7 @@ bufdone_finish(struct buf *bp)
 				vfs_page_set_valid(bp, foff, m);
 			}

-			vm_page_io_finish(m);
+			vm_page_sunbusy(m);
 			vm_object_pip_subtract(obj, 1);
 			foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 			iosize -= resid;
@ -3914,7 +3907,7 @@ vfs_unbusy_pages(struct buf *bp)
 				BUF_CHECK_UNMAPPED(bp);
 		}
 		vm_object_pip_subtract(obj, 1);
-		vm_page_io_finish(m);
+		vm_page_sunbusy(m);
 	}
 	vm_object_pip_wakeupn(obj, 0);
 	VM_OBJECT_WUNLOCK(obj);
@ -3987,8 +3980,8 @@ vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, vm_page_t m)
 }

 /*
- * Ensure that all buffer pages are not busied by VPO_BUSY flag. If
- * any page is busy, drain the flag.
+ * Ensure that all buffer pages are not exclusive busied.  If any page is
+ * exclusive busy, drain it.
 */
 static void
 vfs_drain_busy_pages(struct buf *bp)
@ -4000,22 +3993,26 @@ vfs_drain_busy_pages(struct buf *bp)
 	last_busied = 0;
 	for (i = 0; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
-		if ((m->oflags & VPO_BUSY) != 0) {
+		if (vm_page_xbusied(m)) {
 			for (; last_busied < i; last_busied++)
-				vm_page_busy(bp->b_pages[last_busied]);
-			while ((m->oflags & VPO_BUSY) != 0)
-				vm_page_sleep(m, "vbpage");
+				vm_page_xbusy(bp->b_pages[last_busied]);
+			while (vm_page_xbusied(m)) {
+				vm_page_lock(m);
+				VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
+				vm_page_busy_sleep(m, "vbpage");
+				VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
+			}
 		}
 	}
 	for (i = 0; i < last_busied; i++)
-		vm_page_wakeup(bp->b_pages[i]);
+		vm_page_xunbusy(bp->b_pages[i]);
 }

 /*
 * This routine is called before a device strategy routine.
 * It is used to tell the VM system that paging I/O is in
 * progress, and treat the pages associated with the buffer
- * almost as being VPO_BUSY.  Also the object paging_in_progress
+ * almost as being exclusive busy.  Also the object paging_in_progress
 * flag is handled to make sure that the object doesn't become
 * inconsistant.
 *
@ -4048,7 +4045,7 @@ vfs_busy_pages(struct buf *bp, int clear_modify)

 		if ((bp->b_flags & B_CLUSTER) == 0) {
 			vm_object_pip_add(obj, 1);
-			vm_page_io_start(m);
+			vm_page_sbusy(m);
 		}
 		/*
 		 * When readying a buffer for a read ( i.e
@ -4268,7 +4265,7 @@ vm_hold_free_pages(struct buf *bp, int newbsize)
 	for (index = newnpages; index < bp->b_npages; index++) {
 		p = bp->b_pages[index];
 		bp->b_pages[index] = NULL;
-		if (p->busy != 0)
+		if (vm_page_sbusied(p))
 			printf("vm_hold_free_pages: blkno: %jd, lblkno: %jd\n",
 			    (intmax_t)bp->b_blkno, (intmax_t)bp->b_lblkno);
 		p->wire_count--;
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@ -466,7 +466,7 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
 		for (j = 0; j < tbp->b_npages; j += 1) {
 			vm_page_t m;
 			m = tbp->b_pages[j];
-			vm_page_io_start(m);
+			vm_page_sbusy(m);
 			vm_object_pip_add(m->object, 1);
 			if ((bp->b_npages == 0) ||
 				(bp->b_pages[bp->b_npages-1] != m)) {
@ -947,7 +947,7 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
 				if (i != 0) { /* if not first buffer */
 					for (j = 0; j < tbp->b_npages; j += 1) {
 						m = tbp->b_pages[j];
-						if (m->oflags & VPO_BUSY) {
+						if (vm_page_xbusied(m)) {
 							VM_OBJECT_WUNLOCK(
 							    tbp->b_object);
 							bqrelse(tbp);
@ -957,7 +957,7 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
 				}
 				for (j = 0; j < tbp->b_npages; j += 1) {
 					m = tbp->b_pages[j];
-					vm_page_io_start(m);
+					vm_page_sbusy(m);
 					vm_object_pip_add(m->object, 1);
 					if ((bp->b_npages == 0) ||
 					  (bp->b_pages[bp->b_npages - 1] != m)) {
--- a/sys/mips/mips/pmap.c
+++ b/sys/mips/mips/pmap.c
@ -2014,7 +2014,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
 	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
 	    va >= kmi.clean_eva,
 	    ("pmap_enter: managed mapping within the clean submap"));
-	KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0,
+	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_xbusied(m),
 	    ("pmap_enter: page %p is not busy", m));
 	pa = VM_PAGE_TO_PHYS(m);
 	newpte = TLBLO_PA_TO_PFN(pa) | init_pte_prot(m, access, prot);
@ -2812,13 +2812,12 @@ pmap_remove_write(vm_page_t m)
 	    ("pmap_remove_write: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
-	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
-	 * is clear, no page table entries need updating.
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
+	 * set by another thread while the object is locked.  Thus,
+	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
@ -2878,13 +2877,12 @@ pmap_is_modified(vm_page_t m)
 	    ("pmap_is_modified: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PTE_D set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	rw_wlock(&pvh_global_lock);
 	rv = pmap_testbit(m, PTE_D);
@ -2931,13 +2929,13 @@ pmap_clear_modify(vm_page_t m)
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT((m->oflags & VPO_BUSY) == 0,
-	    ("pmap_clear_modify: page %p is busy", m));
+	KASSERT(!vm_page_xbusied(m),
+	    ("pmap_clear_modify: page %p is exclusive busied", m));

 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_D set.
 	 * If the object containing the page is locked and the page is not
-	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
+	 * write busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
--- a/sys/powerpc/aim/mmu_oea.c
+++ b/sys/powerpc/aim/mmu_oea.c
@ -1158,7 +1158,7 @@ moea_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 	if (pmap_bootstrapped)
 		rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0)
+	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);

 	/* XXX change the pvo head for fake pages */
@ -1326,13 +1326,12 @@ moea_is_modified(mmu_t mmu, vm_page_t m)
 	    ("moea_is_modified: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PTE_CHG set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	rw_wlock(&pvh_global_lock);
 	rv = moea_query_bit(m, PTE_CHG);
@ -1371,13 +1370,13 @@ moea_clear_modify(mmu_t mmu, vm_page_t m)
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT((m->oflags & VPO_BUSY) == 0,
-	    ("moea_clear_modify: page %p is busy", m));
+	KASSERT(!vm_page_xbusied(m),
+	    ("moea_clear_modify: page %p is exclusive busy", m));

 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_CHG
 	 * set.  If the object containing the page is locked and the page is
-	 * not VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
+	 * not exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
@ -1401,13 +1400,12 @@ moea_remove_write(mmu_t mmu, vm_page_t m)
 	    ("moea_remove_write: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
-	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
-	 * is clear, no page table entries need updating.
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
+	 * set by another thread while the object is locked.  Thus,
+	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	lo = moea_attr_fetch(m);
--- a/sys/powerpc/aim/mmu_oea64.c
+++ b/sys/powerpc/aim/mmu_oea64.c
@ -1260,7 +1260,7 @@ moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
 		pvo_flags = PVO_MANAGED;
 	}

-	if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0)
+	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);

 	/* XXX change the pvo head for fake pages */
@ -1522,13 +1522,12 @@ moea64_is_modified(mmu_t mmu, vm_page_t m)
 	    ("moea64_is_modified: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have LPTE_CHG set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	return (moea64_query_bit(mmu, m, LPTE_CHG));
 }
@ -1562,13 +1561,13 @@ moea64_clear_modify(mmu_t mmu, vm_page_t m)
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT((m->oflags & VPO_BUSY) == 0,
-	    ("moea64_clear_modify: page %p is busy", m));
+	KASSERT(!vm_page_xbusied(m),
+	    ("moea64_clear_modify: page %p is exclusive busied", m));

 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have LPTE_CHG
 	 * set.  If the object containing the page is locked and the page is
-	 * not VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
+	 * not exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
@ -1590,13 +1589,12 @@ moea64_remove_write(mmu_t mmu, vm_page_t m)
 	    ("moea64_remove_write: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
-	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
-	 * is clear, no page table entries need updating.
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
+	 * set by another thread while the object is locked.  Thus,
+	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	powerpc_sync();
 	LOCK_TABLE_RD();
--- a/sys/powerpc/booke/pmap.c
+++ b/sys/powerpc/booke/pmap.c
@ -1563,7 +1563,7 @@ mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
 		KASSERT((va <= VM_MAXUSER_ADDRESS),
 		    ("mmu_booke_enter_locked: user pmap, non user va"));
 	}
-	if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0)
+	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);

 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@ -1959,13 +1959,12 @@ mmu_booke_remove_write(mmu_t mmu, vm_page_t m)
 	    ("mmu_booke_remove_write: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
-	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
-	 * is clear, no page table entries need updating.
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
+	 * set by another thread while the object is locked.  Thus,
+	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
@ -2204,13 +2203,12 @@ mmu_booke_is_modified(mmu_t mmu, vm_page_t m)
 	rv = FALSE;

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can be modified.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (rv);
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
@ -2281,13 +2279,13 @@ mmu_booke_clear_modify(mmu_t mmu, vm_page_t m)
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("mmu_booke_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT((m->oflags & VPO_BUSY) == 0,
-	    ("mmu_booke_clear_modify: page %p is busy", m));
+	KASSERT(!vm_page_xbusied(m),
+	    ("mmu_booke_clear_modify: page %p is exclusive busied", m));

 	/*
 	 * If the page is not PG_AWRITEABLE, then no PTEs can be modified.
 	 * If the object containing the page is locked and the page is not
-	 * VPO_BUSY, then PG_AWRITEABLE cannot be concurrently set.
+	 * exclusive busied, then PG_AWRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
--- a/sys/sparc64/sparc64/pmap.c
+++ b/sys/sparc64/sparc64/pmap.c
@ -1492,7 +1492,7 @@ pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,

 	rw_assert(&tte_list_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pm, MA_OWNED);
-	if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0)
+	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 	PMAP_STATS_INC(pmap_nenter);
 	pa = VM_PAGE_TO_PHYS(m);
@ -2066,13 +2066,12 @@ pmap_is_modified(vm_page_t m)
 	rv = FALSE;

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no TTEs can have TD_W set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (rv);
 	rw_wlock(&tte_list_global_lock);
 	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
@ -2139,13 +2138,13 @@ pmap_clear_modify(vm_page_t m)
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT((m->oflags & VPO_BUSY) == 0,
-	    ("pmap_clear_modify: page %p is busy", m));
+	KASSERT(!vm_page_xbusied(m),
+	    ("pmap_clear_modify: page %p is exclusive busied", m));

 	/*
 	 * If the page is not PGA_WRITEABLE, then no TTEs can have TD_W set.
 	 * If the object containing the page is locked and the page is not
-	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
+	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
@ -2189,13 +2188,12 @@ pmap_remove_write(vm_page_t m)
 	    ("pmap_remove_write: page %p is not managed", m));

 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
-	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
-	 * is clear, no page table entries need updating.
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
+	 * set by another thread while the object is locked.  Thus,
+	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&tte_list_global_lock);
 	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@ -58,7 +58,7 @@
 *		in the range 5 to 9.
 */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1000041	/* Master, propagated to newvers */
+#define __FreeBSD_version 1000042	/* Master, propagated to newvers */

 /*
 * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
--- a/sys/vm/phys_pager.c
+++ b/sys/vm/phys_pager.c
@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>

 #include <vm/vm.h>
+#include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
@ -152,10 +153,12 @@ phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
 		KASSERT(m[i]->dirty == 0,
 		    ("phys_pager_getpages: dirty page %p", m[i]));
 		/* The requested page must remain busy, the others not. */
-		if (i == reqpage)
+		if (i == reqpage) {
+			vm_page_lock(m[i]);
 			vm_page_flash(m[i]);
-		else
-			vm_page_wakeup(m[i]);
+			vm_page_unlock(m[i]);
+		} else
+			vm_page_xunbusy(m[i]);
 	}
 	return (VM_PAGER_OK);
 }
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@ -1219,9 +1219,10 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
 	 */
 	VM_OBJECT_WLOCK(object);
 	while ((mreq->oflags & VPO_SWAPINPROG) != 0) {
-		mreq->oflags |= VPO_WANTED;
+		mreq->oflags |= VPO_SWAPSLEEP;
 		PCPU_INC(cnt.v_intrans);
-		if (VM_OBJECT_SLEEP(object, mreq, PSWP, "swread", hz * 20)) {
+		if (VM_OBJECT_SLEEP(object, &object->paging_in_progress, PSWP,
+		    "swread", hz * 20)) {
 			printf(
 "swap_pager: indefinite wait buffer: bufobj: %p, blkno: %jd, size: %ld\n",
 			    bp->b_bufobj, (intmax_t)bp->b_blkno, bp->b_bcount);
@ -1459,12 +1460,6 @@ swap_pager_putpages(vm_object_t object, vm_page_t *m, int count,
 *	Completion routine for asynchronous reads and writes from/to swap.
 *	Also called manually by synchronous code to finish up a bp.
 *
- *	For READ operations, the pages are VPO_BUSY'd.  For WRITE operations,
- *	the pages are vm_page_t->busy'd.  For READ operations, we VPO_BUSY
- *	unbusy all pages except the 'main' request page.  For WRITE
- *	operations, we vm_page_t->busy'd unbusy all pages ( we can do this
- *	because we marked them all VM_PAGER_PEND on return from putpages ).
- *
 *	This routine may not sleep.
 */
 static void
@ -1514,6 +1509,10 @@ swp_pager_async_iodone(struct buf *bp)
 		vm_page_t m = bp->b_pages[i];

 		m->oflags &= ~VPO_SWAPINPROG;
+		if (m->oflags & VPO_SWAPSLEEP) {
+			m->oflags &= ~VPO_SWAPSLEEP;
+			wakeup(&object->paging_in_progress);
+		}

 		if (bp->b_ioflags & BIO_ERROR) {
 			/*
@ -1542,8 +1541,11 @@ swp_pager_async_iodone(struct buf *bp)
 				m->valid = 0;
 				if (i != bp->b_pager.pg_reqpage)
 					swp_pager_free_nrpage(m);
-				else
+				else {
+					vm_page_lock(m);
 					vm_page_flash(m);
+					vm_page_unlock(m);
+				}
 				/*
 				 * If i == bp->b_pager.pg_reqpage, do not wake
 				 * the page up.  The caller needs to.
@ -1558,7 +1560,7 @@ swp_pager_async_iodone(struct buf *bp)
 				vm_page_lock(m);
 				vm_page_activate(m);
 				vm_page_unlock(m);
-				vm_page_io_finish(m);
+				vm_page_sunbusy(m);
 			}
 		} else if (bp->b_iocmd == BIO_READ) {
 			/*
@ -1575,7 +1577,7 @@ swp_pager_async_iodone(struct buf *bp)
 			 * Note that the requested page, reqpage, is left
 			 * busied, but we still have to wake it up.  The
 			 * other pages are released (unbusied) by
-			 * vm_page_wakeup().
+			 * vm_page_xunbusy().
 			 */
 			KASSERT(!pmap_page_is_mapped(m),
 			    ("swp_pager_async_iodone: page %p is mapped", m));
@ -1595,9 +1597,12 @@ swp_pager_async_iodone(struct buf *bp)
 				vm_page_lock(m);
 				vm_page_deactivate(m);
 				vm_page_unlock(m);
-				vm_page_wakeup(m);
-			} else
+				vm_page_xunbusy(m);
+			} else {
+				vm_page_lock(m);
 				vm_page_flash(m);
+				vm_page_unlock(m);
+			}
 		} else {
 			/*
 			 * For write success, clear the dirty
@ -1608,7 +1613,7 @@ swp_pager_async_iodone(struct buf *bp)
 			    ("swp_pager_async_iodone: page %p is not write"
 			    " protected", m));
 			vm_page_undirty(m);
-			vm_page_io_finish(m);
+			vm_page_sunbusy(m);
 			if (vm_page_count_severe()) {
 				vm_page_lock(m);
 				vm_page_try_to_cache(m);
@ -1706,19 +1711,18 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex)
 	vm_page_t m;

 	vm_object_pip_add(object, 1);
-	m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY |
-	    VM_ALLOC_NOBUSY);
+	m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 	if (m->valid == VM_PAGE_BITS_ALL) {
 		vm_object_pip_subtract(object, 1);
 		vm_page_dirty(m);
 		vm_page_lock(m);
 		vm_page_activate(m);
 		vm_page_unlock(m);
+		vm_page_xunbusy(m);
 		vm_pager_page_unswapped(m);
 		return;
 	}

-	vm_page_busy(m);
 	if (swap_pager_getpages(object, &m, 1, 0) != VM_PAGER_OK)
 		panic("swap_pager_force_pagein: read from swap failed");/*XXX*/
 	vm_object_pip_subtract(object, 1);
@ -1726,7 +1730,7 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex)
 	vm_page_lock(m);
 	vm_page_deactivate(m);
 	vm_page_unlock(m);
-	vm_page_wakeup(m);
+	vm_page_xunbusy(m);
 	vm_pager_page_unswapped(m);
 }

--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@ -141,7 +141,7 @@ static inline void
 release_page(struct faultstate *fs)
 {

-	vm_page_wakeup(fs->m);
+	vm_page_xunbusy(fs->m);
 	vm_page_lock(fs->m);
 	vm_page_deactivate(fs->m);
 	vm_page_unlock(fs->m);
@ -353,21 +353,21 @@ RetryFault:;

 			/*
 			 * Wait/Retry if the page is busy.  We have to do this
-			 * if the page is busy via either VPO_BUSY or 
-			 * vm_page_t->busy because the vm_pager may be using
-			 * vm_page_t->busy for pageouts ( and even pageins if
-			 * it is the vnode pager ), and we could end up trying
-			 * to pagein and pageout the same page simultaneously.
+			 * if the page is either exclusive or shared busy
+			 * because the vm_pager may be using read busy for
+			 * pageouts (and even pageins if it is the vnode
+			 * pager), and we could end up trying to pagein and
+			 * pageout the same page simultaneously.
 			 *
 			 * We can theoretically allow the busy case on a read
 			 * fault if the page is marked valid, but since such
 			 * pages are typically already pmap'd, putting that
 			 * special case in might be more effort then it is 
 			 * worth.  We cannot under any circumstances mess
-			 * around with a vm_page_t->busy page except, perhaps,
+			 * around with a shared busied page except, perhaps,
 			 * to pmap it.
 			 */
-			if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) {
+			if (vm_page_busied(fs.m)) {
 				/*
 				 * Reference the page before unlocking and
 				 * sleeping so that the page daemon is less
@ -392,8 +392,7 @@ RetryFault:;
 				unlock_map(&fs);
 				if (fs.m == vm_page_lookup(fs.object,
 				    fs.pindex)) {
-					vm_page_sleep_if_busy(fs.m, TRUE,
-					    "vmpfw");
+					vm_page_sleep_if_busy(fs.m, "vmpfw");
 				}
 				vm_object_pip_wakeup(fs.object);
 				VM_OBJECT_WUNLOCK(fs.object);
@ -410,7 +409,7 @@ RetryFault:;
 			 * (readable), jump to readrest, else break-out ( we
 			 * found the page ).
 			 */
-			vm_page_busy(fs.m);
+			vm_page_xbusy(fs.m);
 			if (fs.m->valid != VM_PAGE_BITS_ALL)
 				goto readrest;
 			break;
@ -516,7 +515,7 @@ readrest:
 			/*
 			 * Call the pager to retrieve the data, if any, after
 			 * releasing the lock on the map.  We hold a ref on
-			 * fs.object and the pages are VPO_BUSY'd.
+			 * fs.object and the pages are exclusive busied.
 			 */
 			unlock_map(&fs);

@ -565,7 +564,7 @@ vnode_locked:
 			 * return value is the index into the marray for the
 			 * vm_page_t passed to the routine.
 			 *
-			 * fs.m plus the additional pages are VPO_BUSY'd.
+			 * fs.m plus the additional pages are exclusive busied.
 			 */
 			faultcount = vm_fault_additional_pages(
 			    fs.m, behind, ahead, marray, &reqpage);
@ -691,8 +690,7 @@ vnode_locked:
 		}
 	}

-	KASSERT((fs.m->oflags & VPO_BUSY) != 0,
-	    ("vm_fault: not busy after main loop"));
+	vm_page_assert_xbusied(fs.m);

 	/*
 	 * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock
@ -757,7 +755,7 @@ vnode_locked:
 				vm_page_lock(fs.m);
 				vm_page_rename(fs.m, fs.first_object, fs.first_pindex);
 				vm_page_unlock(fs.m);
-				vm_page_busy(fs.m);
+				vm_page_xbusy(fs.m);
 				fs.first_m = fs.m;
 				fs.m = NULL;
 				PCPU_INC(cnt.v_cow_optim);
@ -905,11 +903,8 @@ vnode_locked:
 		}
 	}

-	/*
-	 * Page had better still be busy
-	 */
-	KASSERT(fs.m->oflags & VPO_BUSY,
-		("vm_fault: page %p not busy!", fs.m));
+	vm_page_assert_xbusied(fs.m);
+
 	/*
 	 * Page must be completely valid or it is not fit to
 	 * map into user space.  vm_pager_get_pages() ensures this.
@ -946,7 +941,7 @@ vnode_locked:
 		vm_page_hold(fs.m);
 	}
 	vm_page_unlock(fs.m);
-	vm_page_wakeup(fs.m);
+	vm_page_xunbusy(fs.m);

 	/*
 	 * Unlock everything, and return
@ -991,13 +986,12 @@ vm_fault_cache_behind(const struct faultstate *fs, int distance)
 		if (pindex < OFF_TO_IDX(fs->entry->offset))
 			pindex = OFF_TO_IDX(fs->entry->offset);
 		m = first_object != object ? fs->first_m : fs->m;
-		KASSERT((m->oflags & VPO_BUSY) != 0,
-		    ("vm_fault_cache_behind: page %p is not busy", m));
+		vm_page_assert_xbusied(m);
 		m_prev = vm_page_prev(m);
 		while ((m = m_prev) != NULL && m->pindex >= pindex &&
 		    m->valid == VM_PAGE_BITS_ALL) {
 			m_prev = vm_page_prev(m);
-			if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0)
+			if (vm_page_busied(m))
 				continue;
 			vm_page_lock(m);
 			if (m->hold_count == 0 && m->wire_count == 0) {
@ -1378,7 +1372,7 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map,
 			vm_page_activate(dst_m);
 			vm_page_unlock(dst_m);
 		}
-		vm_page_wakeup(dst_m);
+		vm_page_xunbusy(dst_m);
 	}
 	VM_OBJECT_WUNLOCK(dst_object);
 	if (upgrade) {
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@ -233,10 +233,8 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset)

 	VM_OBJECT_WLOCK(object);
 	pindex = OFF_TO_IDX(offset);
-	m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY |
-	    VM_ALLOC_NOBUSY);
+	m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 	if (m->valid != VM_PAGE_BITS_ALL) {
-		vm_page_busy(m);
 		ma[0] = m;
 		rv = vm_pager_get_pages(object, ma, 1, 0);
 		m = vm_page_lookup(object, pindex);
@ -249,8 +247,8 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset)
 			m = NULL;
 			goto out;
 		}
-		vm_page_wakeup(m);
 	}
+	vm_page_xunbusy(m);
 	vm_page_lock(m);
 	vm_page_hold(m);
 	vm_page_unlock(m);
@ -533,13 +531,11 @@ vm_thread_swapin(struct thread *td)
 		    VM_ALLOC_WIRED);
 	for (i = 0; i < pages; i++) {
 		if (ma[i]->valid != VM_PAGE_BITS_ALL) {
-			KASSERT(ma[i]->oflags & VPO_BUSY,
-			    ("lost busy 1"));
+			vm_page_assert_xbusied(ma[i]);
 			vm_object_pip_add(ksobj, 1);
 			for (j = i + 1; j < pages; j++) {
-				KASSERT(ma[j]->valid == VM_PAGE_BITS_ALL ||
-				    (ma[j]->oflags & VPO_BUSY),
-				    ("lost busy 2"));
+				if (ma[j]->valid != VM_PAGE_BITS_ALL)
+					vm_page_assert_xbusied(ma[j]);
 				if (ma[j]->valid == VM_PAGE_BITS_ALL)
 					break;
 			}
@ -550,9 +546,9 @@ vm_thread_swapin(struct thread *td)
 			vm_object_pip_wakeup(ksobj);
 			for (k = i; k < j; k++)
 				ma[k] = vm_page_lookup(ksobj, k);
-			vm_page_wakeup(ma[i]);
-		} else if (ma[i]->oflags & VPO_BUSY)
-			vm_page_wakeup(ma[i]);
+			vm_page_xunbusy(ma[i]);
+		} else if (vm_page_xbusied(ma[i]))
+			vm_page_xunbusy(ma[i]);
 	}
 	VM_OBJECT_WUNLOCK(ksobj);
 	pmap_qenter(td->td_kstack, ma, pages);
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@ -744,8 +744,7 @@ vm_object_terminate(vm_object_t object)
 	 * the object, the page and object are reset to any empty state. 
 	 */
 	TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
-		KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0,
-		    ("vm_object_terminate: freeing busy page %p", p));
+		vm_page_assert_unbusied(p);
 		vm_page_lock(p);
 		/*
 		 * Optimize the page's removal from the object by resetting
@ -871,7 +870,7 @@ rescan:
 		np = TAILQ_NEXT(p, listq);
 		if (p->valid == 0)
 			continue;
-		if (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) {
+		if (vm_page_sleep_if_busy(p, "vpcwai")) {
 			if (object->generation != curgeneration) {
 				if ((flags & OBJPC_SYNC) != 0)
 					goto rescan;
@ -939,7 +938,7 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,

 	for (tp = p; count < vm_pageout_page_count; count++) {
 		tp = vm_page_next(tp);
-		if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0)
+		if (tp == NULL || vm_page_busied(tp))
 			break;
 		if (!vm_object_page_remove_write(tp, flags, clearobjflags))
 			break;
@ -947,7 +946,7 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,

 	for (p_first = p; count < vm_pageout_page_count; count++) {
 		tp = vm_page_prev(p_first);
-		if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0)
+		if (tp == NULL || vm_page_busied(tp))
 			break;
 		if (!vm_object_page_remove_write(tp, flags, clearobjflags))
 			break;
@ -1156,7 +1155,7 @@ shadowlookup:
 		    ("vm_object_madvise: page %p is fictitious", m));
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("vm_object_madvise: page %p is not managed", m));
-		if ((m->oflags & VPO_BUSY) || m->busy) {
+		if (vm_page_busied(m)) {
 			if (advise == MADV_WILLNEED) {
 				/*
 				 * Reference the page before unlocking and
@ -1165,11 +1164,10 @@ shadowlookup:
 				 */
 				vm_page_aflag_set(m, PGA_REFERENCED);
 			}
-			vm_page_unlock(m);
 			if (object != tobject)
 				VM_OBJECT_WUNLOCK(object);
-			m->oflags |= VPO_WANTED;
-			VM_OBJECT_SLEEP(tobject, m, PDROP | PVM, "madvpo", 0);
+			VM_OBJECT_WUNLOCK(tobject);
+			vm_page_busy_sleep(m, "madvpo");
 			VM_OBJECT_WLOCK(object);
  			goto relookup;
 		}
@ -1344,10 +1342,12 @@ retry:
 		 * We do not have to VM_PROT_NONE the page as mappings should
 		 * not be changed by this operation.
 		 */
-		if ((m->oflags & VPO_BUSY) || m->busy) {
+		if (vm_page_busied(m)) {
 			VM_OBJECT_WUNLOCK(new_object);
-			m->oflags |= VPO_WANTED;
-			VM_OBJECT_SLEEP(orig_object, m, PVM, "spltwt", 0);
+			vm_page_lock(m);
+			VM_OBJECT_WUNLOCK(orig_object);
+			vm_page_busy_sleep(m, "spltwt");
+			VM_OBJECT_WLOCK(orig_object);
 			VM_OBJECT_WLOCK(new_object);
 			goto retry;
 		}
@ -1371,7 +1371,7 @@ retry:
 		vm_page_unlock(m);
 		/* page automatically made dirty by rename and cache handled */
 		if (orig_object->type == OBJT_SWAP)
-			vm_page_busy(m);
+			vm_page_xbusy(m);
 	}
 	if (orig_object->type == OBJT_SWAP) {
 		/*
@ -1380,7 +1380,7 @@ retry:
 		 */
 		swap_pager_copy(orig_object, new_object, offidxstart, 0);
 		TAILQ_FOREACH(m, &new_object->memq, listq)
-			vm_page_wakeup(m);
+			vm_page_xunbusy(m);

 		/*
 		 * Transfer any cached pages from orig_object to new_object.
@ -1496,18 +1496,16 @@ vm_object_backing_scan(vm_object_t object, int op)
 			vm_page_t pp;

 			if (op & OBSC_COLLAPSE_NOWAIT) {
-				if ((p->oflags & VPO_BUSY) ||
-				    !p->valid || 
-				    p->busy) {
+				if (!p->valid || vm_page_busied(p)) {
 					p = next;
 					continue;
 				}
 			} else if (op & OBSC_COLLAPSE_WAIT) {
-				if ((p->oflags & VPO_BUSY) || p->busy) {
+				if (vm_page_busied(p)) {
 					VM_OBJECT_WUNLOCK(object);
-					p->oflags |= VPO_WANTED;
-					VM_OBJECT_SLEEP(backing_object, p,
-					    PDROP | PVM, "vmocol", 0);
+					vm_page_lock(p);
+					VM_OBJECT_WUNLOCK(backing_object);
+					vm_page_busy_sleep(p, "vmocol");
 					VM_OBJECT_WLOCK(object);
 					VM_OBJECT_WLOCK(backing_object);
 					/*
@ -1905,8 +1903,12 @@ again:
 			}
 			goto next;
 		}
-		if (vm_page_sleep_if_busy(p, TRUE, "vmopar"))
+		if (vm_page_busied(p)) {
+			VM_OBJECT_WUNLOCK(object);
+			vm_page_busy_sleep(p, "vmopar");
+			VM_OBJECT_WLOCK(object);
 			goto again;
+		}
 		KASSERT((p->flags & PG_FICTITIOUS) == 0,
 		    ("vm_object_page_remove: page %p is fictitious", p));
 		if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) {
@ -2033,7 +2035,7 @@ vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 	if (pindex > start) {
 		m = vm_page_lookup(object, start);
 		while (m != NULL && m->pindex < pindex) {
-			vm_page_wakeup(m);
+			vm_page_xunbusy(m);
 			m = TAILQ_NEXT(m, listq);
 		}
 	}
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@ -483,66 +483,170 @@ vm_page_reference(vm_page_t m)
 	vm_page_aflag_set(m, PGA_REFERENCED);
 }

-void
-vm_page_busy(vm_page_t m)
-{
-
-	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT((m->oflags & VPO_BUSY) == 0,
-	    ("vm_page_busy: page already busy!!!"));
-	m->oflags |= VPO_BUSY;
-}
-
 /*
- *      vm_page_flash:
+ *	vm_page_busy_downgrade:
 *
- *      wakeup anyone waiting for the page.
+ *	Downgrade an exclusive busy page into a single shared busy page.
 */
 void
-vm_page_flash(vm_page_t m)
+vm_page_busy_downgrade(vm_page_t m)
 {
+	u_int x;

-	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if (m->oflags & VPO_WANTED) {
-		m->oflags &= ~VPO_WANTED;
-		wakeup(m);
+	vm_page_assert_xbusied(m);
+
+	for (;;) {
+		x = m->busy_lock;
+		x &= VPB_BIT_WAITERS;
+		if (atomic_cmpset_rel_int(&m->busy_lock,
+		    VPB_SINGLE_EXCLUSIVER | x, VPB_SHARERS_WORD(1) | x))
+			break;
 	}
 }

 /*
- *      vm_page_wakeup:
+ *	vm_page_sbusied:
 *
- *      clear the VPO_BUSY flag and wakeup anyone waiting for the
- *      page.
+ *	Return a positive value if the page is shared busied, 0 otherwise.
+ */
+int
+vm_page_sbusied(vm_page_t m)
+{
+	u_int x;
+
+	x = m->busy_lock;
+	return ((x & VPB_BIT_SHARED) != 0 && x != VPB_UNBUSIED);
+}
+
+/*
+ *	vm_page_sunbusy:
 *
+ *	Shared unbusy a page.
 */
 void
-vm_page_wakeup(vm_page_t m)
+vm_page_sunbusy(vm_page_t m)
 {
+	u_int x;

-	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT(m->oflags & VPO_BUSY, ("vm_page_wakeup: page not busy!!!"));
-	m->oflags &= ~VPO_BUSY;
-	vm_page_flash(m);
+	vm_page_assert_sbusied(m);
+
+	for (;;) {
+		x = m->busy_lock;
+		if (VPB_SHARERS(x) > 1) {
+			if (atomic_cmpset_int(&m->busy_lock, x,
+			    x - VPB_ONE_SHARER))
+				break;
+			continue;
+		}
+		if ((x & VPB_BIT_WAITERS) == 0) {
+			KASSERT(x == VPB_SHARERS_WORD(1),
+			    ("vm_page_sunbusy: invalid lock state"));
+			if (atomic_cmpset_int(&m->busy_lock,
+			    VPB_SHARERS_WORD(1), VPB_UNBUSIED))
+				break;
+			continue;
+		}
+		KASSERT(x == (VPB_SHARERS_WORD(1) | VPB_BIT_WAITERS),
+		    ("vm_page_sunbusy: invalid lock state for waiters"));
+
+		vm_page_lock(m);
+		if (!atomic_cmpset_int(&m->busy_lock, x, VPB_UNBUSIED)) {
+			vm_page_unlock(m);
+			continue;
+		}
+		wakeup(m);
+		vm_page_unlock(m);
+		break;
+	}
 }

+/*
+ *	vm_page_busy_sleep:
+ *
+ *	Sleep and release the page lock, using the page pointer as wchan.
+ *	This is used to implement the hard-path of busying mechanism.
+ *
+ *	The given page must be locked.
+ */
 void
-vm_page_io_start(vm_page_t m)
+vm_page_busy_sleep(vm_page_t m, const char *wmesg)
 {
+	u_int x;

-	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	m->busy++;
+	vm_page_lock_assert(m, MA_OWNED);
+
+	x = m->busy_lock;
+	if (x == VPB_UNBUSIED) {
+		vm_page_unlock(m);
+		return;
+	}
+	if ((x & VPB_BIT_WAITERS) == 0 &&
+	    !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS)) {
+		vm_page_unlock(m);
+		return;
+	}
+	msleep(m, vm_page_lockptr(m), PVM | PDROP, wmesg, 0);
 }

+/*
+ *	vm_page_trysbusy:
+ *
+ *	Try to shared busy a page.
+ *	If the operation succeeds 1 is returned otherwise 0.
+ *	The operation never sleeps.
+ */
+int
+vm_page_trysbusy(vm_page_t m)
+{
+	u_int x;
+
+	x = m->busy_lock;
+	return ((x & VPB_BIT_SHARED) != 0 &&
+	    atomic_cmpset_acq_int(&m->busy_lock, x, x + VPB_ONE_SHARER));
+}
+
+/*
+ *	vm_page_xunbusy_hard:
+ *
+ *	Called after the first try the exclusive unbusy of a page failed.
+ *	It is assumed that the waiters bit is on.
+ */
 void
-vm_page_io_finish(vm_page_t m)
+vm_page_xunbusy_hard(vm_page_t m)
 {

-	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT(m->busy > 0, ("vm_page_io_finish: page %p is not busy", m));
-	m->busy--;
-	if (m->busy == 0)
-		vm_page_flash(m);
+	vm_page_assert_xbusied(m);
+
+	vm_page_lock(m);
+	atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
+	wakeup(m);
+	vm_page_unlock(m);
+}
+
+/*
+ *	vm_page_flash:
+ *
+ *	Wakeup anyone waiting for the page.
+ *	The ownership bits do not change.
+ *
+ *	The given page must be locked.
+ */
+void
+vm_page_flash(vm_page_t m)
+{
+	u_int x;
+
+	vm_page_lock_assert(m, MA_OWNED);
+
+	for (;;) {
+		x = m->busy_lock;
+		if ((x & VPB_BIT_WAITERS) == 0)
+			return;
+		if (atomic_cmpset_int(&m->busy_lock, x,
+		    x & (~VPB_BIT_WAITERS)))
+			break;
+	}
+	wakeup(m);
 }

 /*
@ -657,7 +761,8 @@ vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
 	/* Fictitious pages don't use "segind". */
 	m->flags = PG_FICTITIOUS;
 	/* Fictitious pages don't use "order" or "pool". */
-	m->oflags = VPO_BUSY | VPO_UNMANAGED;
+	m->oflags = VPO_UNMANAGED;
+	m->busy_lock = VPB_SINGLE_EXCLUSIVER;
 	m->wire_count = 1;
 	pmap_page_init(m);
 memattr:
@ -737,16 +842,13 @@ vm_page_readahead_finish(vm_page_t m)
 		 * deactivating the page is usually the best choice,
 		 * unless the page is wanted by another thread.
 		 */
-		if (m->oflags & VPO_WANTED) {
-			vm_page_lock(m);
+		vm_page_lock(m);
+		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
 			vm_page_activate(m);
-			vm_page_unlock(m);
-		} else {
-			vm_page_lock(m);
+		else
 			vm_page_deactivate(m);
-			vm_page_unlock(m);
-		}
-		vm_page_wakeup(m);
+		vm_page_unlock(m);
+		vm_page_xunbusy(m);
 	} else {
 		/*
 		 * Free the completely invalid page.  Such page state
@ -761,29 +863,38 @@ vm_page_readahead_finish(vm_page_t m)
 }

 /*
- *	vm_page_sleep:
+ *	vm_page_sleep_if_busy:
 *
- *	Sleep and release the page lock.
+ *	Sleep and release the page queues lock if the page is busied.
+ *	Returns TRUE if the thread slept.
 *
- *	The object containing the given page must be locked.
+ *	The given page must be unlocked and object containing it must
+ *	be locked.
 */
-void
-vm_page_sleep(vm_page_t m, const char *msg)
+int
+vm_page_sleep_if_busy(vm_page_t m, const char *msg)
 {
+	vm_object_t obj;

+	vm_page_lock_assert(m, MA_NOTOWNED);
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if (mtx_owned(vm_page_lockptr(m)))
-		vm_page_unlock(m);

-	/*
-	 * It's possible that while we sleep, the page will get
-	 * unbusied and freed.  If we are holding the object
-	 * lock, we will assume we hold a reference to the object
-	 * such that even if m->object changes, we can re-lock
-	 * it.
-	 */
-	m->oflags |= VPO_WANTED;
-	VM_OBJECT_SLEEP(m->object, m, PVM, msg, 0);
+	if (vm_page_busied(m)) {
+		/*
+		 * The page-specific object must be cached because page
+		 * identity can change during the sleep, causing the
+		 * re-lock of a different object.
+		 * It is assumed that a reference to the object is already
+		 * held by the callers.
+		 */
+		obj = m->object;
+		vm_page_lock(m);
+		VM_OBJECT_WUNLOCK(obj);
+		vm_page_busy_sleep(m, msg);
+		VM_OBJECT_WLOCK(obj);
+		return (TRUE);
+	}
+	return (FALSE);
 }

 /*
@ -908,15 +1019,24 @@ void
 vm_page_remove(vm_page_t m)
 {
 	vm_object_t object;
+	boolean_t lockacq;

 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		vm_page_lock_assert(m, MA_OWNED);
 	if ((object = m->object) == NULL)
 		return;
 	VM_OBJECT_ASSERT_WLOCKED(object);
-	if (m->oflags & VPO_BUSY) {
-		m->oflags &= ~VPO_BUSY;
+	if (vm_page_xbusied(m)) {
+		lockacq = FALSE;
+		if ((m->oflags & VPO_UNMANAGED) != 0 &&
+		    !mtx_owned(vm_page_lockptr(m))) {
+			lockacq = TRUE;
+			vm_page_lock(m);
+		}
 		vm_page_flash(m);
+		atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
+		if (lockacq)
+			vm_page_unlock(m);
 	}

 	/*
@ -1185,8 +1305,7 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t pindex)
 *	vm_page_alloc:
 *
 *	Allocate and return a page that is associated with the specified
- *	object and offset pair.  By default, this page has the flag VPO_BUSY
- *	set.
+ *	object and offset pair.  By default, this page is exclusive busied.
 *
 *	The caller must always specify an allocation class.
 *
@ -1201,10 +1320,11 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t pindex)
 *	VM_ALLOC_IFCACHED	return page only if it is cached
 *	VM_ALLOC_IFNOTCACHED	return NULL, do not reactivate if the page
 *				is cached
- *	VM_ALLOC_NOBUSY		do not set the flag VPO_BUSY on the page
+ *	VM_ALLOC_NOBUSY		do not exclusive busy the page
 *	VM_ALLOC_NODUMP		do not include the page in a kernel core dump
 *	VM_ALLOC_NOOBJ		page is not associated with an object and
- *				should not have the flag VPO_BUSY set
+ *				should not be exclusive busy 
+ *	VM_ALLOC_SBUSY		shared busy the allocated page
 *	VM_ALLOC_WIRED		wire the allocated page
 *	VM_ALLOC_ZERO		prefer a zeroed page
 *
@ -1219,8 +1339,12 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 	int flags, req_class;

 	mpred = 0;	/* XXX: pacify gcc */
-	KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0),
-	    ("vm_page_alloc: inconsistent object/req"));
+	KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
+	    (object != NULL || (req & VM_ALLOC_SBUSY) == 0) &&
+	    ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
+	    (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
+	    ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object,
+	    req));
 	if (object != NULL)
 		VM_OBJECT_ASSERT_WLOCKED(object);

@ -1301,7 +1425,8 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 	    ("vm_page_alloc: page %p has unexpected queue %d", m, m->queue));
 	KASSERT(m->wire_count == 0, ("vm_page_alloc: page %p is wired", m));
 	KASSERT(m->hold_count == 0, ("vm_page_alloc: page %p is held", m));
-	KASSERT(m->busy == 0, ("vm_page_alloc: page %p is busy", m));
+	KASSERT(!vm_page_sbusied(m), 
+	    ("vm_page_alloc: page %p is busy", m));
 	KASSERT(m->dirty == 0, ("vm_page_alloc: page %p is dirty", m));
 	KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
 	    ("vm_page_alloc: page %p has unexpected memattr %d", m,
@ -1345,8 +1470,11 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 	m->aflags = 0;
 	m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ?
 	    VPO_UNMANAGED : 0;
-	if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ)) == 0)
-		m->oflags |= VPO_BUSY;
+	m->busy_lock = VPB_UNBUSIED;
+	if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0)
+		m->busy_lock = VPB_SINGLE_EXCLUSIVER;
+	if ((req & VM_ALLOC_SBUSY) != 0)
+		m->busy_lock = VPB_SHARERS_WORD(1);
 	if (req & VM_ALLOC_WIRED) {
 		/*
 		 * The page lock is not required for wiring a page until that
@ -1414,9 +1542,10 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 *	VM_ALLOC_INTERRUPT	interrupt time request
 *
 *	optional allocation flags:
- *	VM_ALLOC_NOBUSY		do not set the flag VPO_BUSY on the page
+ *	VM_ALLOC_NOBUSY		do not exclusive busy the page
 *	VM_ALLOC_NOOBJ		page is not associated with an object and
- *				should not have the flag VPO_BUSY set
+ *				should not be exclusive busy 
+ *	VM_ALLOC_SBUSY		shared busy the allocated page
 *	VM_ALLOC_WIRED		wire the allocated page
 *	VM_ALLOC_ZERO		prefer a zeroed page
 *
@ -1432,8 +1561,12 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
 	u_int flags, oflags;
 	int req_class;

-	KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0),
-	    ("vm_page_alloc_contig: inconsistent object/req"));
+	KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
+	    (object != NULL || (req & VM_ALLOC_SBUSY) == 0) &&
+	    ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
+	    (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
+	    ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object,
+	    req));
 	if (object != NULL) {
 		VM_OBJECT_ASSERT_WLOCKED(object);
 		KASSERT(object->type == OBJT_PHYS,
@ -1509,8 +1642,6 @@ retry:
 		atomic_add_int(&cnt.v_wire_count, npages);
 	oflags = VPO_UNMANAGED;
 	if (object != NULL) {
-		if ((req & VM_ALLOC_NOBUSY) == 0)
-			oflags |= VPO_BUSY;
 		if (object->memattr != VM_MEMATTR_DEFAULT &&
 		    memattr == VM_MEMATTR_DEFAULT)
 			memattr = object->memattr;
@ -1518,6 +1649,13 @@ retry:
 	for (m = m_ret; m < &m_ret[npages]; m++) {
 		m->aflags = 0;
 		m->flags = (m->flags | PG_NODUMP) & flags;
+		m->busy_lock = VPB_UNBUSIED;
+		if (object != NULL) {
+			if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
+				m->busy_lock = VPB_SINGLE_EXCLUSIVER;
+			if ((req & VM_ALLOC_SBUSY) != 0)
+				m->busy_lock = VPB_SHARERS_WORD(1);
+		}
 		if ((req & VM_ALLOC_WIRED) != 0)
 			m->wire_count = 1;
 		/* Unmanaged pages don't use "act_count". */
@ -1560,7 +1698,7 @@ vm_page_alloc_init(vm_page_t m)
 	    ("vm_page_alloc_init: page %p is wired", m));
 	KASSERT(m->hold_count == 0,
 	    ("vm_page_alloc_init: page %p is held", m));
-	KASSERT(m->busy == 0,
+	KASSERT(!vm_page_sbusied(m),
 	    ("vm_page_alloc_init: page %p is busy", m));
 	KASSERT(m->dirty == 0,
 	    ("vm_page_alloc_init: page %p is dirty", m));
@ -1926,7 +2064,7 @@ vm_page_free_toq(vm_page_t m)

 	if (VM_PAGE_IS_FREE(m))
 		panic("vm_page_free: freeing free page %p", m);
-	else if (m->busy != 0)
+	else if (vm_page_sbusied(m))
 		panic("vm_page_free: freeing busy page %p", m);

 	/*
@ -2137,8 +2275,8 @@ vm_page_try_to_cache(vm_page_t m)

 	vm_page_lock_assert(m, MA_OWNED);
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if (m->dirty || m->hold_count || m->busy || m->wire_count ||
-	    (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0)
+	if (m->dirty || m->hold_count || m->wire_count ||
+	    (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m))
 		return (0);
 	pmap_remove_all(m);
 	if (m->dirty)
@ -2160,8 +2298,8 @@ vm_page_try_to_free(vm_page_t m)
 	vm_page_lock_assert(m, MA_OWNED);
 	if (m->object != NULL)
 		VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if (m->dirty || m->hold_count || m->busy || m->wire_count ||
-	    (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0)
+	if (m->dirty || m->hold_count || m->wire_count ||
+	    (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m))
 		return (0);
 	pmap_remove_all(m);
 	if (m->dirty)
@ -2186,7 +2324,7 @@ vm_page_cache(vm_page_t m)
 	vm_page_lock_assert(m, MA_OWNED);
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
-	if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) || m->busy ||
+	if (vm_page_busied(m) || (m->oflags & VPO_UNMANAGED) ||
 	    m->hold_count || m->wire_count)
 		panic("vm_page_cache: attempting to cache busy page");
 	KASSERT(!pmap_page_is_mapped(m),
@ -2372,21 +2510,29 @@ vm_page_t
 vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
 {
 	vm_page_t m;
+	int sleep;

 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT((allocflags & VM_ALLOC_RETRY) != 0,
 	    ("vm_page_grab: VM_ALLOC_RETRY is required"));
+	KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
+	    (allocflags & VM_ALLOC_IGN_SBUSY) != 0,
+	    ("vm_page_grab: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch"));
 retrylookup:
 	if ((m = vm_page_lookup(object, pindex)) != NULL) {
-		if ((m->oflags & VPO_BUSY) != 0 ||
-		    ((allocflags & VM_ALLOC_IGN_SBUSY) == 0 && m->busy != 0)) {
+		sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ?
+		    vm_page_xbusied(m) : vm_page_busied(m);
+		if (sleep) {
 			/*
 			 * Reference the page before unlocking and
 			 * sleeping so that the page daemon is less
 			 * likely to reclaim it.
 			 */
 			vm_page_aflag_set(m, PGA_REFERENCED);
-			vm_page_sleep(m, "pgrbwt");
+			vm_page_lock(m);
+			VM_OBJECT_WUNLOCK(object);
+			vm_page_busy_sleep(m, "pgrbwt");
+			VM_OBJECT_WLOCK(object);
 			goto retrylookup;
 		} else {
 			if ((allocflags & VM_ALLOC_WIRED) != 0) {
@ -2394,8 +2540,11 @@ retrylookup:
 				vm_page_wire(m);
 				vm_page_unlock(m);
 			}
-			if ((allocflags & VM_ALLOC_NOBUSY) == 0)
-				vm_page_busy(m);
+			if ((allocflags &
+			    (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
+				vm_page_xbusy(m);
+			if ((allocflags & VM_ALLOC_SBUSY) != 0)
+				vm_page_sbusy(m);
 			return (m);
 		}
 	}
@ -2503,12 +2652,12 @@ vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits)
 #endif

 	/*
-	 * If the object is locked and the page is neither VPO_BUSY nor
+	 * If the object is locked and the page is neither exclusive busy nor
 	 * write mapped, then the page's dirty field cannot possibly be
 	 * set by a concurrent pmap operation.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if ((m->oflags & VPO_BUSY) == 0 && !pmap_page_is_write_mapped(m))
+	if (!vm_page_xbusied(m) && !pmap_page_is_write_mapped(m))
 		m->dirty &= ~pagebits;
 	else {
 		/*
@ -2717,7 +2866,7 @@ vm_page_is_valid(vm_page_t m, int base, int size)
 {
 	vm_page_bits_t bits;

-	VM_OBJECT_ASSERT_WLOCKED(m->object);
+	VM_OBJECT_ASSERT_LOCKED(m->object);
 	bits = vm_page_bits(base, size);
 	return (m->valid != 0 && (m->valid & bits) == bits);
 }
@ -2877,12 +3026,11 @@ vm_page_object_lock_assert(vm_page_t m)

 	/*
 	 * Certain of the page's fields may only be modified by the
-	 * holder of the containing object's lock or the setter of the
-	 * page's VPO_BUSY flag.  Unfortunately, the setter of the
-	 * VPO_BUSY flag is not recorded, and thus cannot be checked
-	 * here.
+	 * holder of the containing object's lock or the exclusive busy.
+	 * holder.  Unfortunately, the holder of the write busy is
+	 * not recorded, and thus cannot be checked here.
 	 */
-	if (m->object != NULL && (m->oflags & VPO_BUSY) == 0)
+	if (m->object != NULL && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_WLOCKED(m->object);
 }
 #endif
@ -2942,9 +3090,9 @@ DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo)
 		m = (vm_page_t)addr;
 	db_printf(
    "page %p obj %p pidx 0x%jx phys 0x%jx q %d hold %d wire %d\n"
-    "  af 0x%x of 0x%x f 0x%x act %d busy %d valid 0x%x dirty 0x%x\n",
+    "  af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n",
 	    m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr,
 	    m->queue, m->hold_count, m->wire_count, m->aflags, m->oflags,
-	    m->flags, m->act_count, m->busy, m->valid, m->dirty);
+	    m->flags, m->act_count, m->busy_lock, m->valid, m->dirty);
 }
 #endif /* DDB */
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@ -144,11 +144,12 @@ struct vm_page {
 	uint8_t oflags;			/* page VPO_* flags (O) */
 	uint16_t flags;			/* page PG_* flags (P) */
 	u_char	act_count;		/* page usage count (P) */
-	u_char	busy;			/* page busy count (O) */
+	u_char __pad0;			/* unused padding */
 	/* NOTE that these must support one bit per DEV_BSIZE in a page!!! */
 	/* so, on normal X86 kernels, they must be at least 8 bits wide */
 	vm_page_bits_t valid;		/* map of valid DEV_BSIZE chunks (O) */
 	vm_page_bits_t dirty;		/* map of dirty DEV_BSIZE chunks (M) */
+	volatile u_int busy_lock;	/* busy owners lock */
 };

 /*
@ -165,12 +166,35 @@ struct vm_page {
 * 	 mappings, and such pages are also not on any PQ queue.
 *
 */
-#define	VPO_BUSY	0x01		/* page is in transit */
-#define	VPO_WANTED	0x02		/* someone is waiting for page */
+#define	VPO_UNUSED01	0x01		/* --available-- */
+#define	VPO_SWAPSLEEP	0x02		/* waiting for swap to finish */
 #define	VPO_UNMANAGED	0x04		/* no PV management for page */
 #define	VPO_SWAPINPROG	0x08		/* swap I/O in progress on page */
 #define	VPO_NOSYNC	0x10		/* do not collect for syncer */

+/*
+ * Busy page implementation details.
+ * The algorithm is taken mostly by rwlock(9) and sx(9) locks implementation,
+ * even if the support for owner identity is removed because of size
+ * constraints.  Checks on lock recursion are then not possible, while the
+ * lock assertions effectiveness is someway reduced.
+ */
+#define	VPB_BIT_SHARED		0x01
+#define	VPB_BIT_EXCLUSIVE	0x02
+#define	VPB_BIT_WAITERS		0x04
+#define	VPB_BIT_FLAGMASK						\
+	(VPB_BIT_SHARED | VPB_BIT_EXCLUSIVE | VPB_BIT_WAITERS)
+
+#define	VPB_SHARERS_SHIFT	3
+#define	VPB_SHARERS(x)							\
+	(((x) & ~VPB_BIT_FLAGMASK) >> VPB_SHARERS_SHIFT)
+#define	VPB_SHARERS_WORD(x)	((x) << VPB_SHARERS_SHIFT | VPB_BIT_SHARED)
+#define	VPB_ONE_SHARER		(1 << VPB_SHARERS_SHIFT)
+
+#define	VPB_SINGLE_EXCLUSIVER	VPB_BIT_EXCLUSIVE
+
+#define	VPB_UNBUSIED		VPB_SHARERS_WORD(0)
+
 #define	PQ_NONE		255
 #define	PQ_INACTIVE	0
 #define	PQ_ACTIVE	1
@ -274,8 +298,9 @@ extern struct mtx_padalign pa_lock[];
 * directly set this flag.  They should call vm_page_reference() instead.
 *
 * PGA_WRITEABLE is set exclusively on managed pages by pmap_enter().  When it
- * does so, the page must be VPO_BUSY.  The MI VM layer must never access this
- * flag directly.  Instead, it should call pmap_page_is_write_mapped().
+ * does so, the page must be exclusive busied.  The MI VM layer must never
+ * access this flag directly.  Instead, it should call
+ * pmap_page_is_write_mapped().
 *
 * PGA_EXECUTABLE may be set by pmap routines, and indicates that a page has
 * at least one executable mapping.  It is not consumed by the MI VM layer.
@ -362,6 +387,7 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
 #define	VM_ALLOC_IFNOTCACHED	0x0800	/* Fail if the page is cached */
 #define	VM_ALLOC_IGN_SBUSY	0x1000	/* vm_page_grab() only */
 #define	VM_ALLOC_NODUMP		0x2000	/* don't include in dump */
+#define	VM_ALLOC_SBUSY		0x4000	/* Shared busy the page */

 #define	VM_ALLOC_COUNT_SHIFT	16
 #define	VM_ALLOC_COUNT(count)	((count) << VM_ALLOC_COUNT_SHIFT)
@ -385,15 +411,13 @@ malloc2vm_flags(int malloc_flags)
 }
 #endif

-void vm_page_busy(vm_page_t m);
+void vm_page_busy_downgrade(vm_page_t m);
+void vm_page_busy_sleep(vm_page_t m, const char *msg);
 void vm_page_flash(vm_page_t m);
-void vm_page_io_start(vm_page_t m);
-void vm_page_io_finish(vm_page_t m);
 void vm_page_hold(vm_page_t mem);
 void vm_page_unhold(vm_page_t mem);
 void vm_page_free(vm_page_t m);
 void vm_page_free_zero(vm_page_t m);
-void vm_page_wakeup(vm_page_t m);

 void vm_page_activate (vm_page_t);
 void vm_page_advise(vm_page_t m, int advice);
@ -428,13 +452,17 @@ void vm_page_remove (vm_page_t);
 void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
 void vm_page_requeue(vm_page_t m);
 void vm_page_requeue_locked(vm_page_t m);
+int vm_page_sbusied(vm_page_t m);
 void vm_page_set_valid_range(vm_page_t m, int base, int size);
-void vm_page_sleep(vm_page_t m, const char *msg);
+int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
 vm_offset_t vm_page_startup(vm_offset_t vaddr);
+void vm_page_sunbusy(vm_page_t m);
+int vm_page_trysbusy(vm_page_t m);
 void vm_page_unhold_pages(vm_page_t *ma, int count);
 void vm_page_unwire (vm_page_t, int);
 void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
 void vm_page_wire (vm_page_t);
+void vm_page_xunbusy_hard(vm_page_t m);
 void vm_page_set_validclean (vm_page_t, int, int);
 void vm_page_clear_dirty (vm_page_t, int, int);
 void vm_page_set_invalid (vm_page_t, int, int);
@ -457,6 +485,48 @@ void vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line);
 void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line);
 #endif

+#define	vm_page_assert_sbusied(m)					\
+	KASSERT(vm_page_sbusied(m),					\
+	    ("vm_page_assert_sbusied: page %p not shared busy @ %s:%d", \
+	    (void *)m, __FILE__, __LINE__));
+
+#define	vm_page_assert_unbusied(m)					\
+	KASSERT(!vm_page_busied(m),					\
+	    ("vm_page_assert_unbusied: page %p busy @ %s:%d",		\
+	    (void *)m, __FILE__, __LINE__));
+
+#define	vm_page_assert_xbusied(m)					\
+	KASSERT(vm_page_xbusied(m),					\
+	    ("vm_page_assert_xbusied: page %p not exclusive busy @ %s:%d", \
+	    (void *)m, __FILE__, __LINE__));
+
+#define	vm_page_busied(m)						\
+	((m)->busy_lock != VPB_UNBUSIED)
+
+#define	vm_page_sbusy(m) do {						\
+	if (!vm_page_trysbusy(m))					\
+		panic("%s: page %p failed shared busing", __func__, m);	\
+} while (0)
+
+#define	vm_page_tryxbusy(m)						\
+	(atomic_cmpset_acq_int(&m->busy_lock, VPB_UNBUSIED,		\
+	    VPB_SINGLE_EXCLUSIVER))
+
+#define	vm_page_xbusied(m)						\
+	((m->busy_lock & VPB_SINGLE_EXCLUSIVER) != 0)
+
+#define	vm_page_xbusy(m) do {						\
+	if (!vm_page_tryxbusy(m))					\
+		panic("%s: page %p failed exclusive busing", __func__,	\
+		    m);							\
+} while (0)
+
+#define	vm_page_xunbusy(m) do {						\
+	if (!atomic_cmpset_rel_int(&(m)->busy_lock,			\
+	    VPB_SINGLE_EXCLUSIVER, VPB_UNBUSIED))			\
+		vm_page_xunbusy_hard(m);				\
+} while (0)
+
 #ifdef INVARIANTS
 void vm_page_object_lock_assert(vm_page_t m);
 #define	VM_PAGE_OBJECT_LOCK_ASSERT(m)	vm_page_object_lock_assert(m)
@ -511,11 +581,11 @@ vm_page_aflag_set(vm_page_t m, uint8_t bits)

 	/*
 	 * The PGA_WRITEABLE flag can only be set if the page is managed and
-	 * VPO_BUSY.  Currently, this flag is only set by pmap_enter().
+	 * exclusive busied.  Currently, this flag is only set by pmap_enter().
 	 */
 	KASSERT((bits & PGA_WRITEABLE) == 0 ||
-	    (m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY,
-	    ("vm_page_aflag_set: PGA_WRITEABLE and !VPO_BUSY"));
+	    ((m->oflags & VPO_UNMANAGED) == 0 && vm_page_xbusied(m)),
+	    ("vm_page_aflag_set: PGA_WRITEABLE and not exclusive busy"));

 	/*
 	 * Access the whole 32-bit word containing the aflags field with an
@ -570,27 +640,6 @@ vm_page_remque(vm_page_t m)
 		vm_page_dequeue(m);
 }

-/*
- *	vm_page_sleep_if_busy:
- *
- *	Sleep and release the page queues lock if VPO_BUSY is set or,
- *	if also_m_busy is TRUE, busy is non-zero.  Returns TRUE if the
- *	thread slept and the page queues lock was released.
- *	Otherwise, retains the page queues lock and returns FALSE.
- *
- *	The object containing the given page must be locked.
- */
-static __inline int
-vm_page_sleep_if_busy(vm_page_t m, int also_m_busy, const char *msg)
-{
-
-	if ((m->oflags & VPO_BUSY) || (also_m_busy && m->busy)) {
-		vm_page_sleep(m, msg);
-		return (TRUE);
-	}
-	return (FALSE);
-}
-
 /*
 *	vm_page_undirty:
 *
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@ -232,8 +232,8 @@ static void vm_pageout_page_stats(struct vm_domain *vmd);
 /*
 * Initialize a dummy page for marking the caller's place in the specified
 * paging queue.  In principle, this function only needs to set the flag
- * PG_MARKER.  Nonetheless, it sets the flag VPO_BUSY and initializes the hold
- * count to one as safety precautions.
+ * PG_MARKER.  Nonetheless, it wirte busies and initializes the hold count
+ * to one as safety precautions.
 */ 
 static void
 vm_pageout_init_marker(vm_page_t marker, u_short queue)
@ -241,7 +241,7 @@ vm_pageout_init_marker(vm_page_t marker, u_short queue)

 	bzero(marker, sizeof(*marker));
 	marker->flags = PG_MARKER;
-	marker->oflags = VPO_BUSY;
+	marker->busy_lock = VPB_SINGLE_EXCLUSIVER;
 	marker->queue = queue;
 	marker->hold_count = 1;
 }
@ -361,8 +361,7 @@ vm_pageout_clean(vm_page_t m)
 	/*
 	 * Can't clean the page if it's busy or held.
 	 */
-	KASSERT(m->busy == 0 && (m->oflags & VPO_BUSY) == 0,
-	    ("vm_pageout_clean: page %p is busy", m));
+	vm_page_assert_unbusied(m);
 	KASSERT(m->hold_count == 0, ("vm_pageout_clean: page %p is held", m));
 	vm_page_unlock(m);

@ -400,8 +399,7 @@ more:
 			break;
 		}

-		if ((p = vm_page_prev(pb)) == NULL ||
-		    (p->oflags & VPO_BUSY) != 0 || p->busy != 0) {
+		if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) {
 			ib = 0;
 			break;
 		}
@ -430,8 +428,7 @@ more:
 	    pindex + is < object->size) {
 		vm_page_t p;

-		if ((p = vm_page_next(ps)) == NULL ||
-		    (p->oflags & VPO_BUSY) != 0 || p->busy != 0)
+		if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p))
 			break;
 		vm_page_lock(p);
 		vm_page_test_dirty(p);
@ -501,7 +498,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
 		KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL,
 		    ("vm_pageout_flush: partially invalid page %p index %d/%d",
 			mc[i], i, count));
-		vm_page_io_start(mc[i]);
+		vm_page_sbusy(mc[i]);
 		pmap_remove_write(mc[i]);
 	}
 	vm_object_pip_add(object, count);
@ -557,7 +554,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
 		 */
 		if (pageout_status[i] != VM_PAGER_PEND) {
 			vm_object_pip_wakeup(object);
-			vm_page_io_finish(mt);
+			vm_page_sunbusy(mt);
 			if (vm_page_count_severe()) {
 				vm_page_lock(mt);
 				vm_page_try_to_cache(mt);
@ -594,8 +591,7 @@ vm_pageout_launder(struct vm_pagequeue *pq, int tries, vm_paddr_t low,
 		object = m->object;
 		if ((!VM_OBJECT_TRYWLOCK(object) &&
 		    (!vm_pageout_fallback_object_lock(m, &next) ||
-		    m->hold_count != 0)) || (m->oflags & VPO_BUSY) != 0 ||
-		    m->busy != 0) {
+		    m->hold_count != 0)) || vm_page_busied(m)) {
 			vm_page_unlock(m);
 			VM_OBJECT_WUNLOCK(object);
 			continue;
@ -767,7 +763,7 @@ vm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
 		TAILQ_FOREACH(p, &object->memq, listq) {
 			if (pmap_resident_count(pmap) <= desired)
 				goto unlock_return;
-			if ((p->oflags & VPO_BUSY) != 0 || p->busy != 0)
+			if (vm_page_busied(p))
 				continue;
 			PCPU_INC(cnt.v_pdpages);
 			vm_page_lock(p);
@ -1005,7 +1001,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
 		 * pages, because they may leave the inactive queue
 		 * shortly after page scan is finished.
 		 */
-		if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) {
+		if (vm_page_busied(m)) {
 			vm_page_unlock(m);
 			VM_OBJECT_WUNLOCK(object);
 			addl_page_shortage++;
@ -1224,7 +1220,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
 				 * page back onto the end of the queue so that
 				 * statistics are more correct if we don't.
 				 */
-				if (m->busy || (m->oflags & VPO_BUSY)) {
+				if (vm_page_busied(m)) {
 					vm_page_unlock(m);
 					goto unlock_and_continue;
 				}
@ -1334,9 +1330,7 @@ relock_queues:
 		/*
 		 * Don't deactivate pages that are busy.
 		 */
-		if ((m->busy != 0) ||
-		    (m->oflags & VPO_BUSY) ||
-		    (m->hold_count != 0)) {
+		if (vm_page_busied(m) || m->hold_count != 0) {
 			vm_page_unlock(m);
 			VM_OBJECT_WUNLOCK(object);
 			vm_page_requeue_locked(m);
@ -1641,9 +1635,7 @@ vm_pageout_page_stats(struct vm_domain *vmd)
 		/*
 		 * Don't deactivate pages that are busy or held.
 		 */
-		if (m->busy != 0 ||
-		    (m->oflags & VPO_BUSY) != 0 ||
-		    m->hold_count != 0) {
+		if (vm_page_busied(m) || m->hold_count != 0) {
 			vm_page_unlock(m);
 			VM_OBJECT_WUNLOCK(object);
 			vm_page_requeue_locked(m);
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@ -568,7 +568,8 @@ vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
 	}
 	for (i = 0; i < page_count; i++) {
 		vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr);
-		fp[i].oflags &= ~(VPO_BUSY | VPO_UNMANAGED);
+		fp[i].oflags &= ~VPO_UNMANAGED;
+		fp[i].busy_lock = VPB_UNBUSIED;
 	}
 	mtx_lock(&vm_phys_fictitious_reg_mtx);
 	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@ -1135,8 +1135,7 @@ vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount,
 				 * pmap operation.
 				 */
 				m = ma[ncount - 1];
-				KASSERT(m->busy > 0,
-		("vnode_pager_generic_putpages: page %p is not busy", m));
+				vm_page_assert_sbusied(m);
 				KASSERT(!pmap_page_is_write_mapped(m),
 		("vnode_pager_generic_putpages: page %p is not read-only", m));
 				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -