Centralize the logic in vfs_vmio_unwire() and sendfile_free_page().

Both of these functions atomically unwire a page, optionally attempt
to free the page, and enqueue or requeue the page.  Add functions
vm_page_release() and vm_page_release_locked() to perform the same task.
The latter must be called with the page's object lock held.

As a side effect of this refactoring, the buffer cache will no longer
attempt to free mapped pages when completing direct I/O.  This is
consistent with the handling of pages by sendfile(SF_NOCACHE).

Reviewed by:	alc, kib
MFC after:	2 weeks
Sponsored by:	Netflix
Differential Revision:	https://reviews.freebsd.org/D20986
This commit is contained in:
Mark Johnston 2019-07-29 22:01:28 +00:00
parent 7244507616
commit 98549e2dc6
4 changed files with 111 additions and 131 deletions

View File

@ -121,76 +121,22 @@ sfstat_sysctl(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW,
NULL, 0, sfstat_sysctl, "I", "sendfile statistics");
/*
* Detach mapped page and release resources back to the system. Called
* by mbuf(9) code when last reference to a page is freed.
*/
static void
sendfile_free_page(vm_page_t pg, bool nocache)
{
bool freed;
vm_page_lock(pg);
/*
* In either case check for the object going away on us. This can
* happen since we don't hold a reference to it. If so, we're
* responsible for freeing the page. In 'noncache' case try to free
* the page, but only if it is cheap to.
*/
if (vm_page_unwire_noq(pg)) {
vm_object_t obj;
if ((obj = pg->object) == NULL)
vm_page_free(pg);
else {
freed = false;
if (nocache && !vm_page_xbusied(pg) &&
VM_OBJECT_TRYWLOCK(obj)) {
/* Only free unmapped pages. */
if (obj->ref_count == 0 ||
!pmap_page_is_mapped(pg))
/*
* The busy test before the object is
* locked cannot be relied upon.
*/
freed = vm_page_try_to_free(pg);
VM_OBJECT_WUNLOCK(obj);
}
if (!freed) {
/*
* If we were asked to not cache the page, place
* it near the head of the inactive queue so
* that it is reclaimed sooner. Otherwise,
* maintain LRU.
*/
if (nocache)
vm_page_deactivate_noreuse(pg);
else if (vm_page_active(pg))
vm_page_reference(pg);
else
vm_page_deactivate(pg);
}
}
}
vm_page_unlock(pg);
}
static void
sendfile_free_mext(struct mbuf *m)
{
struct sf_buf *sf;
vm_page_t pg;
bool nocache;
int flags;
KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_SFBUF,
("%s: m %p !M_EXT or !EXT_SFBUF", __func__, m));
sf = m->m_ext.ext_arg1;
pg = sf_buf_page(sf);
nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE;
flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0;
sf_buf_free(sf);
sendfile_free_page(pg, nocache);
vm_page_release(pg, flags);
if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
struct sendfile_sync *sfs = m->m_ext.ext_arg2;
@ -208,21 +154,21 @@ sendfile_free_mext_pg(struct mbuf *m)
{
struct mbuf_ext_pgs *ext_pgs;
vm_page_t pg;
int i;
bool nocache, cache_last;
int flags, i;
bool cache_last;
KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_PGS,
("%s: m %p !M_EXT or !EXT_PGS", __func__, m));
nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE;
cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST;
ext_pgs = m->m_ext.ext_pgs;
flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0;
for (i = 0; i < ext_pgs->npgs; i++) {
if (cache_last && i == ext_pgs->npgs - 1)
nocache = false;
flags = 0;
pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
sendfile_free_page(pg, nocache);
vm_page_release(pg, flags);
}
if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {

View File

@ -2894,47 +2894,6 @@ vfs_vmio_iodone(struct buf *bp)
}
}
/*
* Unwire a page held by a buf and either free it or update the page queues to
* reflect its recent use.
*/
static void
vfs_vmio_unwire(struct buf *bp, vm_page_t m)
{
bool freed;
vm_page_lock(m);
if (vm_page_unwire_noq(m)) {
if ((bp->b_flags & B_DIRECT) != 0)
freed = vm_page_try_to_free(m);
else
freed = false;
if (!freed) {
/*
* Use a racy check of the valid bits to determine
* whether we can accelerate reclamation of the page.
* The valid bits will be stable unless the page is
* being mapped or is referenced by multiple buffers,
* and in those cases we expect races to be rare. At
* worst we will either accelerate reclamation of a
* valid page and violate LRU, or unnecessarily defer
* reclamation of an invalid page.
*
* The B_NOREUSE flag marks data that is not expected to
* be reused, so accelerate reclamation in that case
* too. Otherwise, maintain LRU.
*/
if (m->valid == 0 || (bp->b_flags & B_NOREUSE) != 0)
vm_page_deactivate_noreuse(m);
else if (vm_page_active(m))
vm_page_reference(m);
else
vm_page_deactivate(m);
}
}
vm_page_unlock(m);
}
/*
* Perform page invalidation when a buffer is released. The fully invalid
* pages will be reclaimed later in vfs_vmio_truncate().
@ -2944,7 +2903,7 @@ vfs_vmio_invalidate(struct buf *bp)
{
vm_object_t obj;
vm_page_t m;
int i, resid, poffset, presid;
int flags, i, resid, poffset, presid;
if (buf_mapped(bp)) {
BUF_CHECK_MAPPED(bp);
@ -2963,6 +2922,7 @@ vfs_vmio_invalidate(struct buf *bp)
*
* See man buf(9) for more information
*/
flags = (bp->b_flags & B_NOREUSE) != 0 ? VPR_NOREUSE : 0;
obj = bp->b_bufobj->bo_object;
resid = bp->b_bufsize;
poffset = bp->b_offset & PAGE_MASK;
@ -2984,7 +2944,7 @@ vfs_vmio_invalidate(struct buf *bp)
}
if (pmap_page_wired_mappings(m) == 0)
vm_page_set_invalid(m, poffset, presid);
vfs_vmio_unwire(bp, m);
vm_page_release_locked(m, flags);
resid -= presid;
poffset = 0;
}
@ -3000,7 +2960,7 @@ vfs_vmio_truncate(struct buf *bp, int desiredpages)
{
vm_object_t obj;
vm_page_t m;
int i;
int flags, i;
if (bp->b_npages == desiredpages)
return;
@ -3015,14 +2975,22 @@ vfs_vmio_truncate(struct buf *bp, int desiredpages)
/*
* The object lock is needed only if we will attempt to free pages.
*/
obj = (bp->b_flags & B_DIRECT) != 0 ? bp->b_bufobj->bo_object : NULL;
if (obj != NULL)
flags = (bp->b_flags & B_NOREUSE) != 0 ? VPR_NOREUSE : 0;
if ((bp->b_flags & B_DIRECT) != 0) {
flags |= VPR_TRYFREE;
obj = bp->b_bufobj->bo_object;
VM_OBJECT_WLOCK(obj);
} else {
obj = NULL;
}
for (i = desiredpages; i < bp->b_npages; i++) {
m = bp->b_pages[i];
KASSERT(m != bogus_page, ("allocbuf: bogus page found"));
bp->b_pages[i] = NULL;
vfs_vmio_unwire(bp, m);
if (obj != NULL)
vm_page_release_locked(m, flags);
else
vm_page_release(m, flags);
}
if (obj != NULL)
VM_OBJECT_WUNLOCK(obj);

View File

@ -3747,29 +3747,92 @@ vm_page_unswappable(vm_page_t m)
vm_page_enqueue(m, PQ_UNSWAPPABLE);
}
/*
* Attempt to free the page. If it cannot be freed, do nothing. Returns true
* if the page is freed and false otherwise.
*
* The page must be managed. The page and its containing object must be
* locked.
*/
bool
vm_page_try_to_free(vm_page_t m)
static void
vm_page_release_toq(vm_page_t m, int flags)
{
vm_page_assert_locked(m);
VM_OBJECT_ASSERT_WLOCKED(m->object);
KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m));
if (m->dirty != 0 || vm_page_wired(m) || vm_page_busied(m))
return (false);
if (m->object->ref_count != 0) {
pmap_remove_all(m);
if (m->dirty != 0)
return (false);
/*
* Use a check of the valid bits to determine whether we should
* accelerate reclamation of the page. The object lock might not be
* held here, in which case the check is racy. At worst we will either
* accelerate reclamation of a valid page and violate LRU, or
* unnecessarily defer reclamation of an invalid page.
*
* If we were asked to not cache the page, place it near the head of the
* inactive queue so that is reclaimed sooner.
*/
if ((flags & (VPR_TRYFREE | VPR_NOREUSE)) != 0 || m->valid == 0)
vm_page_deactivate_noreuse(m);
else if (vm_page_active(m))
vm_page_reference(m);
else
vm_page_deactivate(m);
}
/*
* Unwire a page and either attempt to free it or re-add it to the page queues.
*/
void
vm_page_release(vm_page_t m, int flags)
{
vm_object_t object;
bool freed;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("vm_page_release: page %p is unmanaged", m));
vm_page_lock(m);
if (m->object != NULL)
VM_OBJECT_ASSERT_UNLOCKED(m->object);
if (vm_page_unwire_noq(m)) {
if ((object = m->object) == NULL) {
vm_page_free(m);
} else {
freed = false;
if ((flags & VPR_TRYFREE) != 0 && !vm_page_busied(m) &&
/* Depends on type stability. */
VM_OBJECT_TRYWLOCK(object)) {
/*
* Only free unmapped pages. The busy test from
* before the object was locked cannot be relied
* upon.
*/
if ((object->ref_count == 0 ||
!pmap_page_is_mapped(m)) && m->dirty == 0 &&
!vm_page_busied(m)) {
vm_page_free(m);
freed = true;
}
VM_OBJECT_WUNLOCK(object);
}
if (!freed)
vm_page_release_toq(m, flags);
}
}
vm_page_free(m);
return (true);
vm_page_unlock(m);
}
/* See vm_page_release(). */
void
vm_page_release_locked(vm_page_t m, int flags)
{
VM_OBJECT_ASSERT_WLOCKED(m->object);
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("vm_page_release_locked: page %p is unmanaged", m));
vm_page_lock(m);
if (vm_page_unwire_noq(m)) {
if ((flags & VPR_TRYFREE) != 0 &&
(m->object->ref_count == 0 || !pmap_page_is_mapped(m)) &&
m->dirty == 0 && !vm_page_busied(m)) {
vm_page_free(m);
} else {
vm_page_release_toq(m, flags);
}
}
vm_page_unlock(m);
}
/*

View File

@ -562,8 +562,12 @@ bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low,
bool vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
void vm_page_reference(vm_page_t m);
#define VPR_TRYFREE 0x01
#define VPR_NOREUSE 0x02
void vm_page_release(vm_page_t m, int flags);
void vm_page_release_locked(vm_page_t m, int flags);
bool vm_page_remove(vm_page_t);
int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t);
vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object,
vm_pindex_t pindex);
void vm_page_requeue(vm_page_t m);
@ -574,7 +578,6 @@ void vm_page_set_valid_range(vm_page_t m, int base, int size);
int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
vm_offset_t vm_page_startup(vm_offset_t vaddr);
void vm_page_sunbusy(vm_page_t m);
bool vm_page_try_to_free(vm_page_t m);
int vm_page_trysbusy(vm_page_t m);
void vm_page_unhold_pages(vm_page_t *ma, int count);
void vm_page_unswappable(vm_page_t m);