Implement kern.maxvnodes. adjusting kern.maxvnodes now actually has a
real effect. Optimize vfs_msync(). Avoid having to continually drop and re-obtain mutexes when scanning the vnode list. Improves looping case by 500%. Optimize ffs_sync(). Avoid having to continually drop and re-obtain mutexes when scanning the vnode list. This makes a couple of assumptions, which I believe are ok, in regards to vnode stability when the mount list mutex is held. Improves looping case by 500%. (more optimization work is needed on top of these fixes) MFC after: 1 week
This commit is contained in:
parent
6b72138fae
commit
aafb37b657
@ -518,6 +518,49 @@ vattr_null(vap)
|
||||
vap->va_vaflags = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is called when we have too many vnodes. It attempts
|
||||
* to free <count> vnodes and will potentially free vnodes that still
|
||||
* have VM backing store (VM backing store is typically the cause
|
||||
* of a vnode blowout so we want to do this). Therefore, this operation
|
||||
* is not considered cheap.
|
||||
*
|
||||
* A number of conditions may prevent a vnode from being reclaimed.
|
||||
* the buffer cache may have references on the vnode, a directory
|
||||
* vnode may still have references due to the namei cache representing
|
||||
* underlying files, or the vnode may be in active use. It is not
|
||||
* desireable to reuse such vnodes. These conditions may cause the
|
||||
* number of vnodes to reach some minimum value regardless of what
|
||||
* you set kern.maxvnodes to. Do not set kernl.maxvnodes too low.
|
||||
*/
|
||||
static void
|
||||
vlrureclaim(struct mount *mp, int count)
|
||||
{
|
||||
struct vnode *vp;
|
||||
|
||||
mtx_lock(&mntvnode_mtx);
|
||||
while (count && (vp = TAILQ_FIRST(&mp->mnt_nvnodelist)) != NULL) {
|
||||
TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
|
||||
TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
|
||||
|
||||
if (vp->v_type != VNON &&
|
||||
vp->v_type != VBAD &&
|
||||
VMIGHTFREE(vp) && /* critical path opt */
|
||||
mtx_trylock(&vp->v_interlock)
|
||||
) {
|
||||
mtx_unlock(&mntvnode_mtx);
|
||||
if (VMIGHTFREE(vp)) {
|
||||
vgonel(vp, curthread);
|
||||
} else {
|
||||
mtx_unlock(&vp->v_interlock);
|
||||
}
|
||||
mtx_lock(&mntvnode_mtx);
|
||||
}
|
||||
--count;
|
||||
}
|
||||
mtx_unlock(&mntvnode_mtx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Routines having to do with the management of the vnode table.
|
||||
*/
|
||||
@ -532,25 +575,33 @@ getnewvnode(tag, mp, vops, vpp)
|
||||
vop_t **vops;
|
||||
struct vnode **vpp;
|
||||
{
|
||||
int s, count;
|
||||
int s;
|
||||
struct thread *td = curthread; /* XXX */
|
||||
struct vnode *vp = NULL;
|
||||
struct mount *vnmp;
|
||||
vm_object_t object;
|
||||
|
||||
s = splbio();
|
||||
/*
|
||||
* We take the least recently used vnode from the freelist
|
||||
* if we can get it and it has no cached pages, and no
|
||||
* namecache entries are relative to it.
|
||||
* Otherwise we allocate a new vnode
|
||||
* Try to reuse vnodes if we hit the max. This situation only
|
||||
* occurs in certain large-memory (2G+) situations. For the
|
||||
* algorithm to be stable we have to try to reuse at least 2.
|
||||
* No hysteresis should be necessary.
|
||||
*/
|
||||
if (numvnodes - freevnodes > desiredvnodes)
|
||||
vlrureclaim(mp, 2);
|
||||
|
||||
/*
|
||||
* Attempt to reuse a vnode already on the free list, allocating
|
||||
* a new vnode if we can't find one or if we have not reached a
|
||||
* good minimum for good LRU performance.
|
||||
*/
|
||||
|
||||
s = splbio();
|
||||
mtx_lock(&vnode_free_list_mtx);
|
||||
|
||||
if (freevnodes < wantfreevnodes) {
|
||||
vp = NULL;
|
||||
} else if (numvnodes >= minvnodes) {
|
||||
if (freevnodes >= wantfreevnodes && numvnodes >= minvnodes) {
|
||||
int count;
|
||||
|
||||
for (count = 0; count < freevnodes; count++) {
|
||||
vp = TAILQ_FIRST(&vnode_free_list);
|
||||
if (vp == NULL || vp->v_usecount)
|
||||
@ -2408,22 +2459,20 @@ vfs_msync(struct mount *mp, int flags)
|
||||
{
|
||||
struct vnode *vp, *nvp;
|
||||
struct vm_object *obj;
|
||||
int anyio, tries;
|
||||
int tries;
|
||||
|
||||
GIANT_REQUIRED;
|
||||
|
||||
tries = 5;
|
||||
loop:
|
||||
anyio = 0;
|
||||
mtx_lock(&mntvnode_mtx);
|
||||
loop:
|
||||
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
|
||||
|
||||
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
|
||||
|
||||
if (vp->v_mount != mp) {
|
||||
mtx_unlock(&mntvnode_mtx);
|
||||
goto loop;
|
||||
if (--tries > 0)
|
||||
goto loop;
|
||||
break;
|
||||
}
|
||||
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
|
||||
|
||||
if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */
|
||||
continue;
|
||||
@ -2431,36 +2480,27 @@ vfs_msync(struct mount *mp, int flags)
|
||||
if (vp->v_flag & VNOSYNC) /* unlinked, skip it */
|
||||
continue;
|
||||
|
||||
if (flags != MNT_WAIT) {
|
||||
if (VOP_GETVOBJECT(vp, &obj) != 0 ||
|
||||
(obj->flags & OBJ_MIGHTBEDIRTY) == 0)
|
||||
continue;
|
||||
if (VOP_ISLOCKED(vp, NULL))
|
||||
continue;
|
||||
}
|
||||
|
||||
mtx_unlock(&mntvnode_mtx);
|
||||
mtx_lock(&vp->v_interlock);
|
||||
if (VOP_GETVOBJECT(vp, &obj) == 0 &&
|
||||
(obj->flags & OBJ_MIGHTBEDIRTY)) {
|
||||
if ((vp->v_flag & VOBJDIRTY) &&
|
||||
(flags == MNT_WAIT || VOP_ISLOCKED(vp, NULL) == 0)) {
|
||||
mtx_unlock(&mntvnode_mtx);
|
||||
if (!vget(vp,
|
||||
LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curthread)) {
|
||||
LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curthread)) {
|
||||
if (VOP_GETVOBJECT(vp, &obj) == 0) {
|
||||
vm_object_page_clean(obj, 0, 0,
|
||||
flags == MNT_WAIT ?
|
||||
OBJPC_SYNC : OBJPC_NOSYNC);
|
||||
anyio = 1;
|
||||
}
|
||||
vput(vp);
|
||||
}
|
||||
} else {
|
||||
mtx_unlock(&vp->v_interlock);
|
||||
mtx_lock(&mntvnode_mtx);
|
||||
if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp) {
|
||||
if (--tries > 0)
|
||||
goto loop;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mtx_lock(&mntvnode_mtx);
|
||||
}
|
||||
mtx_unlock(&mntvnode_mtx);
|
||||
if (anyio && (--tries > 0))
|
||||
goto loop;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -175,6 +175,7 @@ struct vnode {
|
||||
/* open for business 0x100000 */
|
||||
#define VONWORKLST 0x200000 /* On syncer work-list */
|
||||
#define VMOUNT 0x400000 /* Mount in progress */
|
||||
#define VOBJDIRTY 0x800000 /* object might be dirty */
|
||||
|
||||
/*
|
||||
* Vnode attributes. A field value of VNOVAL represents a field whose value
|
||||
@ -311,6 +312,10 @@ extern void (*lease_updatetime) __P((int deltat));
|
||||
(!(vp)->v_object || \
|
||||
!((vp)->v_object->ref_count || (vp)->v_object->resident_page_count)))
|
||||
|
||||
#define VMIGHTFREE(vp) \
|
||||
(!((vp)->v_flag & (VFREE|VDOOMED)) && \
|
||||
!(vp)->v_holdcnt && !(vp)->v_usecount)
|
||||
|
||||
#define VSHOULDBUSY(vp) \
|
||||
(((vp)->v_flag & VFREE) && \
|
||||
((vp)->v_holdcnt || (vp)->v_usecount))
|
||||
|
@ -1001,10 +1001,10 @@ ffs_sync(mp, waitfor, cred, td)
|
||||
* Write back each (modified) inode.
|
||||
*/
|
||||
wait = 0;
|
||||
lockreq = LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK;
|
||||
lockreq = LK_EXCLUSIVE | LK_NOWAIT;
|
||||
if (waitfor == MNT_WAIT) {
|
||||
wait = 1;
|
||||
lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
|
||||
lockreq = LK_EXCLUSIVE;
|
||||
}
|
||||
mtx_lock(&mntvnode_mtx);
|
||||
loop:
|
||||
@ -1015,34 +1015,40 @@ ffs_sync(mp, waitfor, cred, td)
|
||||
*/
|
||||
if (vp->v_mount != mp)
|
||||
goto loop;
|
||||
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
|
||||
|
||||
mtx_unlock(&mntvnode_mtx);
|
||||
mtx_lock(&vp->v_interlock);
|
||||
/*
|
||||
* Depend on the mntvnode_slock to keep things stable enough
|
||||
* for a quick test. Since there might be hundreds of
|
||||
* thousands of vnodes, we cannot afford even a subroutine
|
||||
* call unless there's a good chance that we have work to do.
|
||||
*/
|
||||
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
|
||||
ip = VTOI(vp);
|
||||
if (vp->v_type == VNON || ((ip->i_flag &
|
||||
(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
|
||||
TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
|
||||
mtx_unlock(&vp->v_interlock);
|
||||
mtx_lock(&mntvnode_mtx);
|
||||
(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
|
||||
TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
|
||||
continue;
|
||||
}
|
||||
if (vp->v_type != VCHR) {
|
||||
mtx_unlock(&mntvnode_mtx);
|
||||
if ((error = vget(vp, lockreq, td)) != 0) {
|
||||
mtx_lock(&mntvnode_mtx);
|
||||
if (error == ENOENT)
|
||||
goto loop;
|
||||
continue;
|
||||
} else {
|
||||
if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
|
||||
allerror = error;
|
||||
VOP_UNLOCK(vp, 0, td);
|
||||
vrele(vp);
|
||||
mtx_lock(&mntvnode_mtx);
|
||||
}
|
||||
if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
|
||||
allerror = error;
|
||||
VOP_UNLOCK(vp, 0, td);
|
||||
vrele(vp);
|
||||
} else {
|
||||
mtx_unlock(&vp->v_interlock);
|
||||
mtx_unlock(&mntvnode_mtx);
|
||||
UFS_UPDATE(vp, wait);
|
||||
mtx_lock(&mntvnode_mtx);
|
||||
}
|
||||
mtx_lock(&mntvnode_mtx);
|
||||
if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
|
||||
goto loop;
|
||||
}
|
||||
mtx_unlock(&mntvnode_mtx);
|
||||
/*
|
||||
|
@ -806,8 +806,7 @@ RetryFault:;
|
||||
|
||||
if (prot & VM_PROT_WRITE) {
|
||||
vm_page_flag_set(fs.m, PG_WRITEABLE);
|
||||
vm_object_set_flag(fs.m->object,
|
||||
OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
|
||||
vm_object_set_writeable_dirty(fs.m->object);
|
||||
|
||||
/*
|
||||
* If the fault is a write, we know that this page is being
|
||||
|
@ -321,8 +321,11 @@ vm_object_reference(vm_object_t object)
|
||||
if (object == NULL)
|
||||
return;
|
||||
|
||||
#if 0
|
||||
/* object can be re-referenced during final cleaning */
|
||||
KASSERT(!(object->flags & OBJ_DEAD),
|
||||
("vm_object_reference: attempting to reference dead obj"));
|
||||
#endif
|
||||
|
||||
object->ref_count++;
|
||||
if (object->type == OBJT_VNODE) {
|
||||
@ -454,8 +457,13 @@ vm_object_deallocate(vm_object_t object)
|
||||
temp->generation++;
|
||||
object->backing_object = NULL;
|
||||
}
|
||||
vm_object_terminate(object);
|
||||
/* unlocks and deallocates object */
|
||||
/*
|
||||
* Don't double-terminate, we could be in a termination
|
||||
* recursion due to the terminate having to sync data
|
||||
* to disk.
|
||||
*/
|
||||
if ((object->flags & OBJ_DEAD) == 0)
|
||||
vm_object_terminate(object);
|
||||
object = temp;
|
||||
}
|
||||
}
|
||||
@ -627,7 +635,17 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int
|
||||
}
|
||||
|
||||
if (clearobjflags && (tstart == 0) && (tend == object->size)) {
|
||||
struct vnode *vp;
|
||||
|
||||
vm_object_clear_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
|
||||
if (object->type == OBJT_VNODE &&
|
||||
(vp = (struct vnode *)object->handle) != NULL) {
|
||||
if (vp->v_flag & VOBJDIRTY) {
|
||||
mtx_lock(&vp->v_interlock);
|
||||
vp->v_flag &= ~VOBJDIRTY;
|
||||
mtx_unlock(&vp->v_interlock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rescan:
|
||||
@ -1357,6 +1375,8 @@ vm_object_collapse(vm_object_t object)
|
||||
* and no object references within it, all that is
|
||||
* necessary is to dispose of it.
|
||||
*/
|
||||
KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object));
|
||||
KASSERT(TAILQ_FIRST(&backing_object->memq) == NULL, ("backing_object %p somehow has left over pages during collapse!", backing_object));
|
||||
|
||||
TAILQ_REMOVE(
|
||||
&vm_object_list,
|
||||
@ -1684,6 +1704,23 @@ vm_object_in_map(vm_object_t object)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
vm_object_set_writeable_dirty(vm_object_t object)
|
||||
{
|
||||
struct vnode *vp;
|
||||
|
||||
vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
|
||||
if (object->type == OBJT_VNODE &&
|
||||
(vp = (struct vnode *)object->handle) != NULL) {
|
||||
if ((vp->v_flag & VOBJDIRTY) == 0) {
|
||||
mtx_lock(&vp->v_interlock);
|
||||
vp->v_flag |= VOBJDIRTY;
|
||||
mtx_unlock(&vp->v_interlock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
DB_SHOW_COMMAND(vmochk, vm_object_check)
|
||||
{
|
||||
vm_object_t object;
|
||||
|
@ -184,6 +184,7 @@ void vm_object_collapse (vm_object_t);
|
||||
void vm_object_deallocate (vm_object_t);
|
||||
void vm_object_terminate (vm_object_t);
|
||||
void vm_object_vndeallocate (vm_object_t);
|
||||
void vm_object_set_writeable_dirty (vm_object_t);
|
||||
void vm_object_init (void);
|
||||
void vm_object_page_clean (vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t);
|
||||
void vm_object_page_remove (vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t);
|
||||
|
@ -609,7 +609,7 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
|
||||
* update the object's OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY flags.
|
||||
*/
|
||||
if (m->flags & PG_WRITEABLE)
|
||||
vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
|
||||
vm_object_set_writeable_dirty(object);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user