Revert r253939:
We cannot busy a page before doing pagefaults. Infact, it can deadlock against vnode lock, as it tries to vget(). Other functions, right now, have an opposite lock ordering, like vm_object_sync(), which acquires the vnode lock first and then sleeps on the busy mechanism. Before this patch is reinserted we need to break this ordering. Sponsored by: EMC / Isilon storage division Reported by: kib
This commit is contained in:
parent
05101f7501
commit
899ab64514
@ -324,8 +324,7 @@ zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred,
|
||||
}
|
||||
|
||||
static vm_page_t
|
||||
page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes,
|
||||
boolean_t alloc)
|
||||
page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
|
||||
{
|
||||
vm_object_t obj;
|
||||
vm_page_t pp;
|
||||
@ -347,8 +346,6 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes,
|
||||
continue;
|
||||
}
|
||||
} else if (pp == NULL) {
|
||||
if (!alloc)
|
||||
break;
|
||||
pp = vm_page_alloc(obj, OFF_TO_IDX(start),
|
||||
VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED |
|
||||
VM_ALLOC_NOBUSY);
|
||||
@ -359,10 +356,8 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes,
|
||||
|
||||
if (pp != NULL) {
|
||||
ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
|
||||
vm_page_io_start(pp);
|
||||
if (!alloc)
|
||||
break;
|
||||
vm_object_pip_add(obj, 1);
|
||||
vm_page_io_start(pp);
|
||||
pmap_remove_write(pp);
|
||||
vm_page_clear_dirty(pp, off, nbytes);
|
||||
}
|
||||
@ -372,12 +367,55 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes,
|
||||
}
|
||||
|
||||
static void
|
||||
page_unbusy(vm_page_t pp, boolean_t unalloc)
|
||||
page_unbusy(vm_page_t pp)
|
||||
{
|
||||
|
||||
vm_page_io_finish(pp);
|
||||
if (unalloc)
|
||||
vm_object_pip_subtract(pp->object, 1);
|
||||
vm_object_pip_subtract(pp->object, 1);
|
||||
}
|
||||
|
||||
static vm_page_t
|
||||
page_hold(vnode_t *vp, int64_t start)
|
||||
{
|
||||
vm_object_t obj;
|
||||
vm_page_t pp;
|
||||
|
||||
obj = vp->v_object;
|
||||
zfs_vmobject_assert_wlocked(obj);
|
||||
|
||||
for (;;) {
|
||||
if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
|
||||
pp->valid) {
|
||||
if ((pp->oflags & VPO_BUSY) != 0) {
|
||||
/*
|
||||
* Reference the page before unlocking and
|
||||
* sleeping so that the page daemon is less
|
||||
* likely to reclaim it.
|
||||
*/
|
||||
vm_page_reference(pp);
|
||||
vm_page_sleep(pp, "zfsmwb");
|
||||
continue;
|
||||
}
|
||||
|
||||
ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
|
||||
vm_page_lock(pp);
|
||||
vm_page_hold(pp);
|
||||
vm_page_unlock(pp);
|
||||
|
||||
} else
|
||||
pp = NULL;
|
||||
break;
|
||||
}
|
||||
return (pp);
|
||||
}
|
||||
|
||||
static void
|
||||
page_unhold(vm_page_t pp)
|
||||
{
|
||||
|
||||
vm_page_lock(pp);
|
||||
vm_page_unhold(pp);
|
||||
vm_page_unlock(pp);
|
||||
}
|
||||
|
||||
static caddr_t
|
||||
@ -441,8 +479,7 @@ update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid,
|
||||
|
||||
zfs_vmobject_wlock(obj);
|
||||
vm_page_undirty(pp);
|
||||
} else if ((pp = page_busy(vp, start, off, nbytes,
|
||||
TRUE)) != NULL) {
|
||||
} else if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
|
||||
zfs_vmobject_wunlock(obj);
|
||||
|
||||
va = zfs_map_page(pp, &sf);
|
||||
@ -451,7 +488,7 @@ update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid,
|
||||
zfs_unmap_page(sf);
|
||||
|
||||
zfs_vmobject_wlock(obj);
|
||||
page_unbusy(pp, TRUE);
|
||||
page_unbusy(pp);
|
||||
}
|
||||
len -= nbytes;
|
||||
off = 0;
|
||||
@ -561,7 +598,7 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio)
|
||||
vm_page_t pp;
|
||||
uint64_t bytes = MIN(PAGESIZE - off, len);
|
||||
|
||||
if (pp = page_busy(vp, start, 0, 0, FALSE)) {
|
||||
if (pp = page_hold(vp, start)) {
|
||||
struct sf_buf *sf;
|
||||
caddr_t va;
|
||||
|
||||
@ -570,7 +607,7 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio)
|
||||
error = uiomove(va + off, bytes, UIO_READ, uio);
|
||||
zfs_unmap_page(sf);
|
||||
zfs_vmobject_wlock(obj);
|
||||
page_unbusy(pp, FALSE);
|
||||
page_unhold(pp);
|
||||
} else {
|
||||
zfs_vmobject_wunlock(obj);
|
||||
error = dmu_read_uio(os, zp->z_id, uio, bytes);
|
||||
|
@ -485,13 +485,13 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx,
|
||||
vm_page_zero_invalid(m, TRUE);
|
||||
vm_page_wakeup(m);
|
||||
}
|
||||
vm_page_io_start(m);
|
||||
vm_page_lock(m);
|
||||
vm_page_hold(m);
|
||||
vm_page_unlock(m);
|
||||
VM_OBJECT_WUNLOCK(tobj);
|
||||
error = uiomove_fromphys(&m, offset, tlen, uio);
|
||||
VM_OBJECT_WLOCK(tobj);
|
||||
vm_page_io_finish(m);
|
||||
VM_OBJECT_WUNLOCK(tobj);
|
||||
vm_page_lock(m);
|
||||
vm_page_unhold(m);
|
||||
if (m->queue == PQ_NONE) {
|
||||
vm_page_deactivate(m);
|
||||
} else {
|
||||
@ -602,14 +602,16 @@ tmpfs_mappedwrite(vm_object_t tobj, size_t len, struct uio *uio)
|
||||
vm_page_zero_invalid(tpg, TRUE);
|
||||
vm_page_wakeup(tpg);
|
||||
}
|
||||
vm_page_io_start(tpg);
|
||||
vm_page_lock(tpg);
|
||||
vm_page_hold(tpg);
|
||||
vm_page_unlock(tpg);
|
||||
VM_OBJECT_WUNLOCK(tobj);
|
||||
error = uiomove_fromphys(&tpg, offset, tlen, uio);
|
||||
VM_OBJECT_WLOCK(tobj);
|
||||
vm_page_io_finish(tpg);
|
||||
if (error == 0)
|
||||
vm_page_dirty(tpg);
|
||||
vm_page_lock(tpg);
|
||||
vm_page_unhold(tpg);
|
||||
if (tpg->queue == PQ_NONE) {
|
||||
vm_page_deactivate(tpg);
|
||||
} else {
|
||||
|
@ -378,7 +378,7 @@ __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
|
||||
off = offset - trunc_page(offset);
|
||||
error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
|
||||
end - start);
|
||||
vm_imgact_unmap_page(object, sf);
|
||||
vm_imgact_unmap_page(sf);
|
||||
if (error) {
|
||||
return (KERN_FAILURE);
|
||||
}
|
||||
@ -433,7 +433,7 @@ __elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
|
||||
sz = PAGE_SIZE - off;
|
||||
error = copyout((caddr_t)sf_buf_kva(sf) + off,
|
||||
(caddr_t)start, sz);
|
||||
vm_imgact_unmap_page(object, sf);
|
||||
vm_imgact_unmap_page(sf);
|
||||
if (error) {
|
||||
return (KERN_FAILURE);
|
||||
}
|
||||
@ -553,7 +553,7 @@ __elfN(load_section)(struct image_params *imgp, vm_offset_t offset,
|
||||
trunc_page(offset + filsz);
|
||||
error = copyout((caddr_t)sf_buf_kva(sf) + off,
|
||||
(caddr_t)map_addr, copy_len);
|
||||
vm_imgact_unmap_page(object, sf);
|
||||
vm_imgact_unmap_page(sf);
|
||||
if (error) {
|
||||
return (error);
|
||||
}
|
||||
|
@ -973,7 +973,7 @@ exec_map_first_page(imgp)
|
||||
vm_page_wakeup(ma[0]);
|
||||
}
|
||||
vm_page_lock(ma[0]);
|
||||
vm_page_wire(ma[0]);
|
||||
vm_page_hold(ma[0]);
|
||||
vm_page_unlock(ma[0]);
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
|
||||
@ -994,7 +994,7 @@ exec_unmap_first_page(imgp)
|
||||
sf_buf_free(imgp->firstpage);
|
||||
imgp->firstpage = NULL;
|
||||
vm_page_lock(m);
|
||||
vm_page_unwire(m, 0);
|
||||
vm_page_unhold(m);
|
||||
vm_page_unlock(m);
|
||||
}
|
||||
}
|
||||
|
@ -263,7 +263,6 @@ proc_rwmem(struct proc *p, struct uio *uio)
|
||||
writing = uio->uio_rw == UIO_WRITE;
|
||||
reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ;
|
||||
fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL;
|
||||
fault_flags |= VM_FAULT_IOBUSY;
|
||||
|
||||
/*
|
||||
* Only map in one page at a time. We don't have to, but it
|
||||
@ -288,9 +287,9 @@ proc_rwmem(struct proc *p, struct uio *uio)
|
||||
len = min(PAGE_SIZE - page_offset, uio->uio_resid);
|
||||
|
||||
/*
|
||||
* Fault and busy the page on behalf of the process.
|
||||
* Fault and hold the page on behalf of the process.
|
||||
*/
|
||||
error = vm_fault_handle(map, pageno, reqprot, fault_flags, &m);
|
||||
error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m);
|
||||
if (error != KERN_SUCCESS) {
|
||||
if (error == KERN_RESOURCE_SHORTAGE)
|
||||
error = ENOMEM;
|
||||
@ -316,9 +315,9 @@ proc_rwmem(struct proc *p, struct uio *uio)
|
||||
/*
|
||||
* Release the page.
|
||||
*/
|
||||
VM_OBJECT_WLOCK(m->object);
|
||||
vm_page_io_finish(m);
|
||||
VM_OBJECT_WUNLOCK(m->object);
|
||||
vm_page_lock(m);
|
||||
vm_page_unhold(m);
|
||||
vm_page_unlock(m);
|
||||
|
||||
} while (error == 0 && uio->uio_resid > 0);
|
||||
|
||||
|
@ -63,7 +63,7 @@ void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t,
|
||||
vm_ooffset_t *);
|
||||
int vm_fault_disable_pagefaults(void);
|
||||
void vm_fault_enable_pagefaults(int save);
|
||||
int vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
|
||||
int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
|
||||
int fault_flags, vm_page_t *m_hold);
|
||||
int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
|
||||
vm_prot_t prot, vm_page_t *ma, int max_count);
|
||||
@ -87,7 +87,7 @@ void vnode_pager_setsize(struct vnode *, vm_ooffset_t);
|
||||
int vslock(void *, size_t);
|
||||
void vsunlock(void *, size_t);
|
||||
struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset);
|
||||
void vm_imgact_unmap_page(vm_object_t, struct sf_buf *sf);
|
||||
void vm_imgact_unmap_page(struct sf_buf *sf);
|
||||
void vm_thread_dispose(struct thread *td);
|
||||
int vm_thread_new(struct thread *td, int pages);
|
||||
int vm_mlock(struct proc *, struct ucred *, const void *, size_t);
|
||||
|
@ -221,8 +221,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
|
||||
if (map != kernel_map && KTRPOINT(td, KTR_FAULT))
|
||||
ktrfault(vaddr, fault_type);
|
||||
#endif
|
||||
result = vm_fault_handle(map, trunc_page(vaddr), fault_type,
|
||||
fault_flags, NULL);
|
||||
result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags,
|
||||
NULL);
|
||||
#ifdef KTRACE
|
||||
if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND))
|
||||
ktrfaultend(result);
|
||||
@ -231,7 +231,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
|
||||
}
|
||||
|
||||
int
|
||||
vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
|
||||
vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
|
||||
int fault_flags, vm_page_t *m_hold)
|
||||
{
|
||||
vm_prot_t prot;
|
||||
@ -943,10 +943,7 @@ RetryFault:;
|
||||
vm_page_activate(fs.m);
|
||||
if (m_hold != NULL) {
|
||||
*m_hold = fs.m;
|
||||
if (fault_flags & VM_FAULT_IOBUSY)
|
||||
vm_page_io_start(fs.m);
|
||||
else
|
||||
vm_page_hold(fs.m);
|
||||
vm_page_hold(fs.m);
|
||||
}
|
||||
vm_page_unlock(fs.m);
|
||||
vm_page_wakeup(fs.m);
|
||||
@ -1148,7 +1145,7 @@ vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
|
||||
* and hold these pages.
|
||||
*/
|
||||
for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE)
|
||||
if (*mp == NULL && vm_fault_handle(map, va, prot,
|
||||
if (*mp == NULL && vm_fault_hold(map, va, prot,
|
||||
VM_FAULT_NORMAL, mp) != KERN_SUCCESS)
|
||||
goto error;
|
||||
}
|
||||
|
@ -223,7 +223,7 @@ vsunlock(void *addr, size_t len)
|
||||
* Return the pinned page if successful; otherwise, return NULL.
|
||||
*/
|
||||
static vm_page_t
|
||||
vm_imgact_page_iostart(vm_object_t object, vm_ooffset_t offset)
|
||||
vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset)
|
||||
{
|
||||
vm_page_t m, ma[1];
|
||||
vm_pindex_t pindex;
|
||||
@ -249,7 +249,9 @@ vm_imgact_page_iostart(vm_object_t object, vm_ooffset_t offset)
|
||||
}
|
||||
vm_page_wakeup(m);
|
||||
}
|
||||
vm_page_io_start(m);
|
||||
vm_page_lock(m);
|
||||
vm_page_hold(m);
|
||||
vm_page_unlock(m);
|
||||
out:
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
return (m);
|
||||
@ -264,7 +266,7 @@ vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset)
|
||||
{
|
||||
vm_page_t m;
|
||||
|
||||
m = vm_imgact_page_iostart(object, offset);
|
||||
m = vm_imgact_hold_page(object, offset);
|
||||
if (m == NULL)
|
||||
return (NULL);
|
||||
sched_pin();
|
||||
@ -275,16 +277,16 @@ vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset)
|
||||
* Destroy the given CPU private mapping and unpin the page that it mapped.
|
||||
*/
|
||||
void
|
||||
vm_imgact_unmap_page(vm_object_t object, struct sf_buf *sf)
|
||||
vm_imgact_unmap_page(struct sf_buf *sf)
|
||||
{
|
||||
vm_page_t m;
|
||||
|
||||
m = sf_buf_page(sf);
|
||||
sf_buf_free(sf);
|
||||
sched_unpin();
|
||||
VM_OBJECT_WLOCK(object);
|
||||
vm_page_io_finish(m);
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
vm_page_lock(m);
|
||||
vm_page_unhold(m);
|
||||
vm_page_unlock(m);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -329,7 +329,6 @@ long vmspace_resident_count(struct vmspace *vmspace);
|
||||
#define VM_FAULT_NORMAL 0 /* Nothing special */
|
||||
#define VM_FAULT_CHANGE_WIRING 1 /* Change the wiring as appropriate */
|
||||
#define VM_FAULT_DIRTY 2 /* Dirty the page; use w/VM_PROT_COPY */
|
||||
#define VM_FAULT_IOBUSY 4 /* Busy the faulted page */
|
||||
|
||||
/*
|
||||
* Initially, mappings are slightly sequential. The maximum window size must
|
||||
|
Loading…
Reference in New Issue
Block a user