From bb68298f623ded730e0dc981b2f072c542325a78 Mon Sep 17 00:00:00 2001 From: attilio Date: Sun, 4 May 2008 13:54:55 +0000 Subject: [PATCH] sync_vnode() has some messy code about locking in order to deal with mount fs needing Giant to be held when processing bufobjs. Use a different subqueue for pending workitems on filesystems requiring Giant. This simplifies the code notably and also reduces the number of Giant acquisitions (and the whole processing cost). Suggested by: jeff Reviewed by: kib Tested by: pho --- sys/kern/vfs_subr.c | 76 ++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 3116ca9c8ba2..128bca289eff 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -89,6 +89,9 @@ __FBSDID("$FreeBSD$"); #include #endif +#define WI_MPSAFEQ 0 +#define WI_GIANTQ 1 + static MALLOC_DEFINE(M_NETADDR, "subr_export_host", "Export host address structure"); static void delmntque(struct vnode *vp); @@ -221,7 +224,7 @@ int prtactive; static int syncer_delayno; static long syncer_mask; LIST_HEAD(synclist, bufobj); -static struct synclist *syncer_workitem_pending; +static struct synclist *syncer_workitem_pending[2]; /* * The sync_mtx protects: * bo->bo_synclist @@ -317,8 +320,10 @@ vntblinit(void *dummy __unused) /* * Initialize the filesystem syncer. */ - syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, - &syncer_mask); + syncer_workitem_pending[WI_MPSAFEQ] = hashinit(syncer_maxdelay, M_VNODE, + &syncer_mask); + syncer_workitem_pending[WI_GIANTQ] = hashinit(syncer_maxdelay, M_VNODE, + &syncer_mask); syncer_maxdelay = syncer_mask + 1; mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF); } @@ -1568,7 +1573,7 @@ brelvp(struct buf *bp) static void vn_syncer_add_to_worklist(struct bufobj *bo, int delay) { - int slot; + int queue, slot; ASSERT_BO_LOCKED(bo); @@ -1584,7 +1589,10 @@ vn_syncer_add_to_worklist(struct bufobj *bo, int delay) delay = syncer_maxdelay - 2; slot = (syncer_delayno + delay) & syncer_mask; - LIST_INSERT_HEAD(&syncer_workitem_pending[slot], bo, bo_synclist); + queue = VFS_NEEDSGIANT(bo->__bo_vnode->v_mount) ? WI_GIANTQ : + WI_MPSAFEQ; + LIST_INSERT_HEAD(&syncer_workitem_pending[queue][slot], bo, + bo_synclist); mtx_unlock(&sync_mtx); } @@ -1617,38 +1625,13 @@ sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td) { struct vnode *vp; struct mount *mp; - int vfslocked; - vfslocked = 0; -restart: *bo = LIST_FIRST(slp); - if (*bo == NULL) { - VFS_UNLOCK_GIANT(vfslocked); + if (*bo == NULL) return (0); - } vp = (*bo)->__bo_vnode; /* XXX */ - if (VFS_NEEDSGIANT(vp->v_mount)) { - if (!vfslocked) { - vfslocked = 1; - if (mtx_trylock(&Giant) == 0) { - mtx_unlock(&sync_mtx); - mtx_lock(&Giant); - mtx_lock(&sync_mtx); - goto restart; - } - } - } else { - VFS_UNLOCK_GIANT(vfslocked); - vfslocked = 0; - } - if (VOP_ISLOCKED(vp) != 0) { - VFS_UNLOCK_GIANT(vfslocked); + if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0) return (1); - } - if (VI_TRYLOCK(vp) == 0) { - VFS_UNLOCK_GIANT(vfslocked); - return (1); - } /* * We use vhold in case the vnode does not * successfully sync. vhold prevents the vnode from @@ -1660,7 +1643,6 @@ restart: VI_UNLOCK(vp); if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { vdrop(vp); - VFS_UNLOCK_GIANT(vfslocked); mtx_lock(&sync_mtx); return (*bo == LIST_FIRST(slp)); } @@ -1680,7 +1662,6 @@ restart: } BO_UNLOCK(*bo); vdrop(vp); - VFS_UNLOCK_GIANT(vfslocked); mtx_lock(&sync_mtx); return (0); } @@ -1691,8 +1672,8 @@ restart: static void sched_sync(void) { - struct synclist *next; - struct synclist *slp; + struct synclist *gnext, *next; + struct synclist *gslp, *slp; struct bufobj *bo; long starttime; struct thread *td = curthread; @@ -1739,11 +1720,13 @@ sched_sync(void) * Skip over empty worklist slots when shutting down. */ do { - slp = &syncer_workitem_pending[syncer_delayno]; + slp = &syncer_workitem_pending[WI_MPSAFEQ][syncer_delayno]; + gslp = &syncer_workitem_pending[WI_GIANTQ][syncer_delayno]; syncer_delayno += 1; if (syncer_delayno == syncer_maxdelay) syncer_delayno = 0; - next = &syncer_workitem_pending[syncer_delayno]; + next = &syncer_workitem_pending[WI_MPSAFEQ][syncer_delayno]; + gnext = &syncer_workitem_pending[WI_GIANTQ][syncer_delayno]; /* * If the worklist has wrapped since the * it was emptied of all but syncer vnodes, @@ -1757,7 +1740,7 @@ sched_sync(void) syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP; } } while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) && - syncer_worklist_len > 0); + LIST_EMPTY(gslp) && syncer_worklist_len > 0); /* * Keep track of the last time there was anything @@ -1777,6 +1760,21 @@ sched_sync(void) continue; } } + if (!LIST_EMPTY(gslp)) { + mtx_unlock(&sync_mtx); + mtx_lock(&Giant); + mtx_lock(&sync_mtx); + while (!LIST_EMPTY(gslp)) { + error = sync_vnode(gslp, &bo, td); + if (error == 1) { + LIST_REMOVE(bo, bo_synclist); + LIST_INSERT_HEAD(gnext, bo, + bo_synclist); + continue; + } + } + mtx_unlock(&Giant); + } if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0) syncer_final_iter--; /*