Correct an error of omission in the implementation of the truncation

operation on POSIX shared memory objects and tmpfs.  Previously, neither of
these modules correctly handled the case in which the new size of the object
or file was not a multiple of the page size.  Specifically, they did not
handle partial page truncation of data stored on swap.  As a result, stale
data might later be returned to an application.

Interestingly, a data inconsistency was less likely to occur under tmpfs
than POSIX shared memory objects.  The reason being that a different mistake
by the tmpfs truncation operation helped avoid a data inconsistency.  If the
data was still resident in memory in a PG_CACHED page, then the tmpfs
truncation operation would reactivate that page, zero the truncated portion,
and leave the page pinned in memory.  More precisely, the benevolent error
was that the truncation operation didn't add the reactivated page to any of
the paging queues, effectively pinning the page.  This page would remain
pinned until the file was destroyed or the page was read or written.  With
this change, the page is now added to the inactive queue.

Discussed with:	jhb
Reviewed by:	kib (an earlier version)
MFC after:	3 weeks
This commit is contained in:
Alan Cox 2012-01-08 20:09:26 +00:00
parent ca54e1aee3
commit 2971897d51
2 changed files with 113 additions and 66 deletions

View File

@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
#include <vm/vm_extern.h>
@ -886,10 +887,10 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize)
struct tmpfs_mount *tmp;
struct tmpfs_node *node;
vm_object_t uobj;
vm_page_t m;
vm_pindex_t newpages, oldpages;
vm_page_t m, ma[1];
vm_pindex_t idx, newpages, oldpages;
off_t oldsize;
size_t zerolen;
int base, rv;
MPASS(vp->v_type == VREG);
MPASS(newsize >= 0);
@ -912,14 +913,56 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize)
newpages - oldpages > TMPFS_PAGES_AVAIL(tmp))
return (ENOSPC);
TMPFS_LOCK(tmp);
tmp->tm_pages_used += (newpages - oldpages);
TMPFS_UNLOCK(tmp);
node->tn_size = newsize;
vnode_pager_setsize(vp, newsize);
VM_OBJECT_LOCK(uobj);
if (newsize < oldsize) {
/*
* Zero the truncated part of the last page.
*/
base = newsize & PAGE_MASK;
if (base != 0) {
idx = OFF_TO_IDX(newsize);
retry:
m = vm_page_lookup(uobj, idx);
if (m != NULL) {
if ((m->oflags & VPO_BUSY) != 0 ||
m->busy != 0) {
vm_page_sleep(m, "tmfssz");
goto retry;
}
} else if (vm_pager_has_page(uobj, idx, NULL, NULL)) {
m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL);
if (m == NULL) {
VM_OBJECT_UNLOCK(uobj);
VM_WAIT;
VM_OBJECT_LOCK(uobj);
goto retry;
} else if (m->valid != VM_PAGE_BITS_ALL) {
ma[0] = m;
rv = vm_pager_get_pages(uobj, ma, 1, 0);
m = vm_page_lookup(uobj, idx);
} else
/* A cached page was reactivated. */
rv = VM_PAGER_OK;
vm_page_lock(m);
if (rv == VM_PAGER_OK) {
vm_page_deactivate(m);
vm_page_unlock(m);
vm_page_wakeup(m);
} else {
vm_page_free(m);
vm_page_unlock(m);
VM_OBJECT_UNLOCK(uobj);
return (EIO);
}
}
if (m != NULL) {
pmap_zero_page_area(m, base, PAGE_SIZE - base);
MPASS(m->valid == VM_PAGE_BITS_ALL);
vm_page_dirty(m);
vm_pager_page_unswapped(m);
}
}
/*
* Release any swap space and free any whole pages.
*/
@ -928,19 +971,16 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize)
newpages);
vm_object_page_remove(uobj, newpages, 0, 0);
}
/*
* Zero the truncated part of the last page.
*/
zerolen = round_page(newsize) - newsize;
if (zerolen > 0) {
m = vm_page_grab(uobj, OFF_TO_IDX(newsize),
VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
pmap_zero_page_area(m, PAGE_SIZE - zerolen, zerolen);
}
}
uobj->size = newpages;
VM_OBJECT_UNLOCK(uobj);
TMPFS_LOCK(tmp);
tmp->tm_pages_used += (newpages - oldpages);
TMPFS_UNLOCK(tmp);
node->tn_size = newsize;
vnode_pager_setsize(vp, newsize);
return (0);
}

View File

@ -39,13 +39,6 @@
*
* (3) Resource limits? Does this need its own resource limits or are the
* existing limits in mmap(2) sufficient?
*
* (4) Partial page truncation. vnode_pager_setsize() will zero any parts
* of a partially mapped page as a result of ftruncate(2)/truncate(2).
* We can do the same (with the same pmap evil), but do we need to
* worry about the bits on disk if the page is swapped out or will the
* swapper zero the parts of a page that are invalid if the page is
* swapped back in for us?
*/
#include <sys/cdefs.h>
@ -86,6 +79,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
#include <vm/swap_pager.h>
@ -253,9 +247,10 @@ static int
shm_dotruncate(struct shmfd *shmfd, off_t length)
{
vm_object_t object;
vm_page_t m;
vm_pindex_t nobjsize;
vm_page_t m, ma[1];
vm_pindex_t idx, nobjsize;
vm_ooffset_t delta;
int base, rv;
object = shmfd->shm_object;
VM_OBJECT_LOCK(object);
@ -275,6 +270,56 @@ shm_dotruncate(struct shmfd *shmfd, off_t length)
VM_OBJECT_UNLOCK(object);
return (EBUSY);
}
/*
* Zero the truncated part of the last page.
*/
base = length & PAGE_MASK;
if (base != 0) {
idx = OFF_TO_IDX(length);
retry:
m = vm_page_lookup(object, idx);
if (m != NULL) {
if ((m->oflags & VPO_BUSY) != 0 ||
m->busy != 0) {
vm_page_sleep(m, "shmtrc");
goto retry;
}
} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL);
if (m == NULL) {
VM_OBJECT_UNLOCK(object);
VM_WAIT;
VM_OBJECT_LOCK(object);
goto retry;
} else if (m->valid != VM_PAGE_BITS_ALL) {
ma[0] = m;
rv = vm_pager_get_pages(object, ma, 1,
0);
m = vm_page_lookup(object, idx);
} else
/* A cached page was reactivated. */
rv = VM_PAGER_OK;
vm_page_lock(m);
if (rv == VM_PAGER_OK) {
vm_page_deactivate(m);
vm_page_unlock(m);
vm_page_wakeup(m);
} else {
vm_page_free(m);
vm_page_unlock(m);
VM_OBJECT_UNLOCK(object);
return (EIO);
}
}
if (m != NULL) {
pmap_zero_page_area(m, base, PAGE_SIZE - base);
KASSERT(m->valid == VM_PAGE_BITS_ALL,
("shm_dotruncate: page %p is invalid", m));
vm_page_dirty(m);
vm_pager_page_unswapped(m);
}
}
delta = ptoa(object->size - nobjsize);
/* Toss in memory pages. */
@ -289,45 +334,7 @@ shm_dotruncate(struct shmfd *shmfd, off_t length)
/* Free the swap accounted for shm */
swap_release_by_cred(delta, object->cred);
object->charge -= delta;
/*
* If the last page is partially mapped, then zero out
* the garbage at the end of the page. See comments
* in vnode_pager_setsize() for more details.
*
* XXXJHB: This handles in memory pages, but what about
* a page swapped out to disk?
*/
if ((length & PAGE_MASK) &&
(m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL &&
m->valid != 0) {
int base = (int)length & PAGE_MASK;
int size = PAGE_SIZE - base;
pmap_zero_page_area(m, base, size);
/*
* Update the valid bits to reflect the blocks that
* have been zeroed. Some of these valid bits may
* have already been set.
*/
vm_page_set_valid_range(m, base, size);
/*
* Round "base" to the next block boundary so that the
* dirty bit for a partially zeroed block is not
* cleared.
*/
base = roundup2(base, DEV_BSIZE);
vm_page_clear_dirty(m, base, PAGE_SIZE - base);
} else if ((length & PAGE_MASK) &&
__predict_false(object->cache != NULL)) {
vm_page_cache_free(object, OFF_TO_IDX(length),
nobjsize);
}
} else {
/* Attempt to reserve the swap */
delta = ptoa(nobjsize - object->size);
if (!swap_reserve_by_cred(delta, object->cred)) {