In vm_object_page_clean(), do not clean OBJ_MIGHTBEDIRTY object flag

if the filesystem performed short write and we are skipping the page
due to this.

Propogate write error from the pager back to the callers of
vm_pageout_flush().  Report the failure to write a page from the
requested range as the FALSE return value from vm_object_page_clean(),
and propagate it back to msync(2) to return EIO to usermode.

While there, convert the clearobjflags variable in the
vm_object_page_clean() and arguments of the helper functions to
boolean.

PR:	kern/165927
Reviewed by:	alc
MFC after:	2 weeks
This commit is contained in:
kib 2012-03-17 23:00:32 +00:00
parent 7af7d69c50
commit 2963c3c979
7 changed files with 60 additions and 26 deletions

View File

@ -137,7 +137,8 @@ vm_contig_launder_page(vm_page_t m, vm_page_t *next)
object->type == OBJT_DEFAULT) {
vm_page_unlock_queues();
m_tmp = m;
vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC, 0, NULL);
vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC, 0,
NULL, NULL);
VM_OBJECT_UNLOCK(object);
vm_page_lock_queues();
return (0);

View File

@ -2591,6 +2591,7 @@ vm_map_sync(
vm_object_t object;
vm_ooffset_t offset;
unsigned int last_timestamp;
boolean_t failed;
vm_map_lock_read(map);
VM_MAP_RANGE_CHECK(map, start, end);
@ -2620,6 +2621,7 @@ vm_map_sync(
if (invalidate)
pmap_remove(map->pmap, start, end);
failed = FALSE;
/*
* Make a second pass, cleaning/uncaching pages from the indicated
@ -2648,7 +2650,8 @@ vm_map_sync(
vm_object_reference(object);
last_timestamp = map->timestamp;
vm_map_unlock_read(map);
vm_object_sync(object, offset, size, syncio, invalidate);
if (!vm_object_sync(object, offset, size, syncio, invalidate))
failed = TRUE;
start += size;
vm_object_deallocate(object);
vm_map_lock_read(map);
@ -2658,7 +2661,7 @@ vm_map_sync(
}
vm_map_unlock_read(map);
return (KERN_SUCCESS);
return (failed ? KERN_FAILURE : KERN_SUCCESS);
}
/*

View File

@ -508,6 +508,8 @@ sys_msync(td, uap)
return (EINVAL); /* Sun returns ENOMEM? */
case KERN_INVALID_ARGUMENT:
return (EBUSY);
case KERN_FAILURE:
return (EIO);
default:
return (EINVAL);
}

View File

@ -101,9 +101,10 @@ SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
"Use old (insecure) msync behavior");
static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p,
int pagerflags, int flags, int *clearobjflags);
int pagerflags, int flags, boolean_t *clearobjflags,
boolean_t *eio);
static boolean_t vm_object_page_remove_write(vm_page_t p, int flags,
int *clearobjflags);
boolean_t *clearobjflags);
static void vm_object_qcollapse(vm_object_t object);
static void vm_object_vndeallocate(vm_object_t object);
@ -775,7 +776,7 @@ vm_object_terminate(vm_object_t object)
* page should be flushed, and FALSE otherwise.
*/
static boolean_t
vm_object_page_remove_write(vm_page_t p, int flags, int *clearobjflags)
vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *clearobjflags)
{
/*
@ -784,7 +785,7 @@ vm_object_page_remove_write(vm_page_t p, int flags, int *clearobjflags)
* cleared in this case so we do not have to set them.
*/
if ((flags & OBJPC_NOSYNC) != 0 && (p->oflags & VPO_NOSYNC) != 0) {
*clearobjflags = 0;
*clearobjflags = FALSE;
return (FALSE);
} else {
pmap_remove_write(p);
@ -806,21 +807,25 @@ vm_object_page_remove_write(vm_page_t p, int flags, int *clearobjflags)
* Odd semantics: if start == end, we clean everything.
*
* The object must be locked.
*
* Returns FALSE if some page from the range was not written, as
* reported by the pager, and TRUE otherwise.
*/
void
boolean_t
vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end,
int flags)
{
vm_page_t np, p;
vm_pindex_t pi, tend, tstart;
int clearobjflags, curgeneration, n, pagerflags;
int curgeneration, n, pagerflags;
boolean_t clearobjflags, eio, res;
mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED);
VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
KASSERT(object->type == OBJT_VNODE, ("Not a vnode object"));
if ((object->flags & OBJ_MIGHTBEDIRTY) == 0 ||
object->resident_page_count == 0)
return;
return (TRUE);
pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ?
VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
@ -829,6 +834,7 @@ vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end,
tstart = OFF_TO_IDX(start);
tend = (end == 0) ? object->size : OFF_TO_IDX(end + PAGE_MASK);
clearobjflags = tstart == 0 && tend >= object->size;
res = TRUE;
rescan:
curgeneration = object->generation;
@ -845,7 +851,7 @@ vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end,
if ((flags & OBJPC_SYNC) != 0)
goto rescan;
else
clearobjflags = 0;
clearobjflags = FALSE;
}
np = vm_page_find_least(object, pi);
continue;
@ -854,12 +860,16 @@ vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end,
continue;
n = vm_object_page_collect_flush(object, p, pagerflags,
flags, &clearobjflags);
flags, &clearobjflags, &eio);
if (eio) {
res = FALSE;
clearobjflags = FALSE;
}
if (object->generation != curgeneration) {
if ((flags & OBJPC_SYNC) != 0)
goto rescan;
else
clearobjflags = 0;
clearobjflags = FALSE;
}
/*
@ -874,8 +884,10 @@ vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end,
* behind, but there is not much we can do there if
* filesystem refuses to write it.
*/
if (n == 0)
if (n == 0) {
n = 1;
clearobjflags = FALSE;
}
np = vm_page_find_least(object, pi + n);
}
#if 0
@ -884,11 +896,12 @@ vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end,
if (clearobjflags)
vm_object_clear_flag(object, OBJ_MIGHTBEDIRTY);
return (res);
}
static int
vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,
int flags, int *clearobjflags)
int flags, boolean_t *clearobjflags, boolean_t *eio)
{
vm_page_t ma[vm_pageout_page_count], p_first, tp;
int count, i, mreq, runlen;
@ -921,7 +934,7 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,
for (tp = p_first, i = 0; i < count; tp = TAILQ_NEXT(tp, listq), i++)
ma[i] = tp;
vm_pageout_flush(ma, count, pagerflags, mreq, &runlen);
vm_pageout_flush(ma, count, pagerflags, mreq, &runlen, eio);
return (runlen);
}
@ -939,17 +952,20 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,
* Note: certain anonymous maps, such as MAP_NOSYNC maps,
* may start out with a NULL object.
*/
void
boolean_t
vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
boolean_t syncio, boolean_t invalidate)
{
vm_object_t backing_object;
struct vnode *vp;
struct mount *mp;
int flags, fsync_after;
int error, flags, fsync_after;
boolean_t res;
if (object == NULL)
return;
return (TRUE);
res = TRUE;
error = 0;
VM_OBJECT_LOCK(object);
while ((backing_object = object->backing_object) != NULL) {
VM_OBJECT_LOCK(backing_object);
@ -995,13 +1011,16 @@ vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
fsync_after = FALSE;
}
VM_OBJECT_LOCK(object);
vm_object_page_clean(object, offset, offset + size, flags);
res = vm_object_page_clean(object, offset, offset + size,
flags);
VM_OBJECT_UNLOCK(object);
if (fsync_after)
(void) VOP_FSYNC(vp, MNT_WAIT, curthread);
error = VOP_FSYNC(vp, MNT_WAIT, curthread);
VOP_UNLOCK(vp, 0);
VFS_UNLOCK_GIANT(vfslocked);
vn_finished_write(mp);
if (error != 0)
res = FALSE;
VM_OBJECT_LOCK(object);
}
if ((object->type == OBJT_VNODE ||
@ -1021,6 +1040,7 @@ vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
OFF_TO_IDX(offset + size + PAGE_MASK), flags);
}
VM_OBJECT_UNLOCK(object);
return (res);
}
/*

View File

@ -227,7 +227,7 @@ void vm_object_set_writeable_dirty (vm_object_t);
void vm_object_init (void);
void vm_object_page_cache(vm_object_t object, vm_pindex_t start,
vm_pindex_t end);
void vm_object_page_clean(vm_object_t object, vm_ooffset_t start,
boolean_t vm_object_page_clean(vm_object_t object, vm_ooffset_t start,
vm_ooffset_t end, int flags);
void vm_object_page_remove(vm_object_t object, vm_pindex_t start,
vm_pindex_t end, int options);
@ -238,7 +238,7 @@ void vm_object_reference_locked(vm_object_t);
int vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr);
void vm_object_shadow (vm_object_t *, vm_ooffset_t *, vm_size_t);
void vm_object_split(vm_map_entry_t);
void vm_object_sync(vm_object_t, vm_ooffset_t, vm_size_t, boolean_t,
boolean_t vm_object_sync(vm_object_t, vm_ooffset_t, vm_size_t, boolean_t,
boolean_t);
void vm_object_madvise (vm_object_t, vm_pindex_t, int, int);
#endif /* _KERNEL */

View File

@ -445,7 +445,8 @@ vm_pageout_clean(vm_page_t m)
/*
* we allow reads during pageouts...
*/
return (vm_pageout_flush(&mc[page_base], pageout_count, 0, 0, NULL));
return (vm_pageout_flush(&mc[page_base], pageout_count, 0, 0, NULL,
NULL));
}
/*
@ -459,9 +460,12 @@ vm_pageout_clean(vm_page_t m)
*
* Returned runlen is the count of pages between mreq and first
* page after mreq with status VM_PAGER_AGAIN.
* *eio is set to TRUE if pager returned VM_PAGER_ERROR or VM_PAGER_FAIL
* for any page in runlen set.
*/
int
vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen)
vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
boolean_t *eio)
{
vm_object_t object = mc[0]->object;
int pageout_status[count];
@ -493,6 +497,8 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen)
vm_pager_put_pages(object, mc, count, flags, pageout_status);
runlen = count - mreq;
if (eio != NULL)
*eio = FALSE;
for (i = 0; i < count; i++) {
vm_page_t mt = mc[i];
@ -522,6 +528,8 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen)
vm_page_lock(mt);
vm_page_activate(mt);
vm_page_unlock(mt);
if (eio != NULL && i >= mreq && i - mreq < runlen)
*eio = TRUE;
break;
case VM_PAGER_AGAIN:
if (i >= mreq && i - mreq < runlen)

View File

@ -102,7 +102,7 @@ extern void vm_waitpfault(void);
#ifdef _KERNEL
boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
int vm_pageout_flush(vm_page_t *, int, int, int, int *);
int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *);
void vm_pageout_oom(int shortage);
boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
void vm_contig_grow_cache(int, vm_paddr_t, vm_paddr_t);