LFS stability patches. There is still a problem with directory update

ordering that can prove fatal during large batches of deletes, but this is much better than it was. I probably won't be putting much more time into this until Seltzer releases her new version of LFS which has fragment support. This should be availible just before USENIX.
1995-01-04 23:46:34 +00:00 · 1995-01-04 23:46:34 +00:00 · 11cf69520c
commit 11cf69520c
parent 7ed45041db
5 changed files with 87 additions and 61 deletions
--- a/sys/ufs/lfs/lfs_inode.c
+++ b/sys/ufs/lfs/lfs_inode.c
@ -31,7 +31,7 @@
 * SUCH DAMAGE.
 *
 *	@(#)lfs_inode.c	8.5 (Berkeley) 12/30/93
- * $Id: lfs_inode.c,v 1.3 1994/08/29 06:09:15 davidg Exp $
+ * $Id: lfs_inode.c,v 1.4 1994/10/10 01:04:50 phk Exp $
 */

 #include <sys/param.h>
@ -89,13 +89,26 @@ lfs_update(ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip;
+	int error;

-	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+	if (vp->v_mount->mnt_flag & MNT_RDONLY){
 		return (0);
+	 }
 	ip = VTOI(vp);
-	if ((ip->i_flag &
-	    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)
-		return (0);
+	/* XXX
+	 * We used to just return here.  Now we make sure to check if
+	 * we were called by lfs_fsync, since in this case, the inode
+	 * may have been written to disk without all buffers connected
+	 * with the vnode being flushed.  It seems really suspicious
+	 * that this could happen since from what I understand of the
+	 * intended semantics, one of these flags should be set if there
+	 * are still dirty buffers.  Compare to how ffs_fsync/ffs_update
+	 * work together and you'll see what I mean.
+	 */
+	if (((ip->i_flag & (IN_ACCESS|IN_CHANGE|IN_MODIFIED|IN_UPDATE)) == 0)
+	    && (vp->v_dirtyblkhd.lh_first == NULL))
+		return(0);
+
 	if (ip->i_flag & IN_ACCESS)
 		ip->i_atime.ts_sec = ap->a_access->tv_sec;
 	if (ip->i_flag & IN_UPDATE) {
@ -111,7 +124,11 @@ lfs_update(ap)
 	ip->i_flag |= IN_MODIFIED;

 	/* If sync, push back the vnode and any dirty blocks it may have. */
-	return (ap->a_waitfor & LFS_SYNC ? lfs_vflush(vp) : 0);
+	error = (ap->a_waitfor & LFS_SYNC ? lfs_vflush(vp) : 0);
+	if(ap->a_waitfor &  LFS_SYNC && vp->v_dirtyblkhd.lh_first != NULL)
+	       panic("lfs_update: dirty bufs");
+	return( error );
+	
 }

 /* Update segment usage information when removing a block. */
--- a/sys/ufs/lfs/lfs_segment.c
+++ b/sys/ufs/lfs/lfs_segment.c
@ -31,7 +31,7 @@
 * SUCH DAMAGE.
 *
 *	@(#)lfs_segment.c	8.5 (Berkeley) 1/4/94
- * $Id: lfs_segment.c,v 1.4 1994/08/20 03:49:02 davidg Exp $
+ * $Id: lfs_segment.c,v 1.5 1994/11/17 01:30:49 gibbs Exp $
 */

 #include <sys/param.h>
@ -96,9 +96,11 @@ lfs_reclaim_buffers() {
 	s = splhigh();
 	for(i=0;i<lfs_total_free_count;i++) {
 		reclaimed = 1;
-		splx(s);
-		free(lfs_freebufs[i].address, M_SEGMENT); 
-		s = splhigh();
+		if( lfs_freebufs[i].address ){
+                	splx(s);
+                	free(lfs_freebufs[i].address, M_SEGMENT); 
+                	s = splhigh();
+		}
 		lfs_total_io_size -= lfs_freebufs[i].size;
 		lfs_total_io_count -= 1;
 	}
@ -116,7 +118,7 @@ lfs_alloc_buffer(int size) {
 	caddr_t rtval;
 	if( lfs_total_free_count)
 		lfs_reclaim_buffers();
-	s = splhigh();
+	s = splhigh(); /* XXX can't this just be splbio?? */
 	while( ((lfs_total_io_count+1) >= MAX_IO_BUFS) ||
 			(lfs_total_io_size >= MAX_IO_SIZE)) {
 		lfs_free_needed = 1;
@ -180,21 +182,37 @@ struct lfs_stats lfs_stats;
 */

 int
+
 lfs_vflush(vp)
 	struct vnode *vp;
 {
 	struct inode *ip;
 	struct lfs *fs;
 	struct segment *sp;
+	int error;

 	fs = VFSTOUFS(vp->v_mount)->um_lfs;
-	if (fs->lfs_nactive > MAX_ACTIVE)
-		return(lfs_segwrite(vp->v_mount, SEGM_SYNC|SEGM_CKP));
+	/* XXX 
+	 * lfs_segwrite uses lfs_writevnodes to flush dirty vnodes.  
+	 * lfs_writevnodes (by way of a check with lfs_vref) passes over 
+	 * locked vnodes.  Since we usually come here with vp locked, anytime
+	 * we just happen to call lfs_vflush and we are past the "MAX_ACTIVE"
+	 * threshold, we used to call lfs_seqwrite and assume it would take
+	 * care of the problem... but of course it didn't.  Now the question 
+	 * remains, is this the right thing to do, or should lfs_seqwrite or 
+	 * lfs_writevnodes be fixed to handle locked vnodes??
+	 */
+	if (fs->lfs_nactive > MAX_ACTIVE){
+		error = lfs_segwrite(vp->v_mount, SEGM_SYNC|SEGM_CKP);
+		if(error)
+		  return(error);
+        }
+
 	lfs_seglock(fs, SEGM_SYNC);
 	sp = fs->lfs_sp;

-
 	ip = VTOI(vp);
+
 	if (vp->v_dirtyblkhd.lh_first == NULL)
 		lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY);

@ -206,6 +224,9 @@ lfs_vflush(vp)

 	} while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM);

+	if (vp->v_dirtyblkhd.lh_first != NULL)
+		panic("lfs_vflush: dirty bufs!!!\n");
+
 #ifdef DOSTATS
 	++lfs_stats.nwrites;
 	if (sp->seg_flags & SEGM_SYNC)
@ -267,7 +288,7 @@ lfs_writevnodes(fs, mp, sp, op)
 			(void) lfs_writeinode(fs, sp, ip);
 		}
 		vp->v_flag &= ~VDIROP;
-		lfs_vunref(vp);
+		lfs_vunref(vp); 
 	}
 }

@ -297,7 +318,7 @@ lfs_segwrite(mp, flags)
 		clean = cip->clean;
 		brelse(bp);
 		if (clean <= 2) {
-			printf("segs clean: %d\n", clean);
+			printf("lfs_segwrite: ran out of clean segments, waiting for cleaner\n");
 			wakeup(&lfs_allclean_wakeup);
 			if (error = tsleep(&fs->lfs_avail, PRIBIO + 1,
 			    "lfs writer", 0))
@ -922,18 +943,14 @@ lfs_writeseg(fs, sp)
 			} else
 				bcopy(bp->b_data, p, bp->b_bcount);
 			p += bp->b_bcount;
-			if (bp->b_flags & B_LOCKED)
+                       if (bp->b_flags & B_LOCKED)
 				--locked_queue_count;
 			bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
-			     B_LOCKED | B_GATHERED);
+				B_LOCKED | B_GATHERED);
 			if (bp->b_flags & B_CALL) {
 				/* if B_CALL, it was created with newbuf */
 				if (!(bp->b_flags & B_INVAL))
-/*
-					free(bp->b_data, M_SEGMENT);
-*/
 					lfs_free_buffer( bp->b_data, roundup( bp->b_bufsize, DEV_BSIZE));
-/*				free(bp, M_SEGMENT); */
 				relpbuf(bp);
 			} else {
 				bremfree(bp);
@ -942,9 +959,9 @@ lfs_writeseg(fs, sp)
 				brelse(bp);
 			}
 		}
+		cbp->b_bcount = p - (char *)cbp->b_data;
 		++cbp->b_vp->v_numoutput;
 		splx(s);
-		cbp->b_bcount = p - (char *)cbp->b_data;
 		/*
 		 * XXXX This is a gross and disgusting hack.  Since these
 		 * buffers are physically addressed, they hang off the
@ -1071,11 +1088,8 @@ lfs_newbuf(vp, daddr, size)
 	size_t nbytes;

 	nbytes = roundup(size, DEV_BSIZE);
-/*	bp = malloc(sizeof(struct buf), M_SEGMENT, M_WAITOK); */
 	bp = getpbuf();
-/*	bzero(bp, sizeof(struct buf)); */
 	if (nbytes)
-/*		bp->b_data = malloc(nbytes, M_SEGMENT, M_WAITOK); */
 		bp->b_data = lfs_alloc_buffer( nbytes);
 	bgetvp(vp, bp);
 	bp->b_bufsize = size;
@ -1095,7 +1109,7 @@ lfs_callback(bp)
 {
 	struct lfs *fs;

-	fs = (struct lfs *)bp->b_saveaddr;
+       fs = (struct lfs *)bp->b_saveaddr;
 #ifdef DIAGNOSTIC
 	if (fs->lfs_iocount == 0)
 		panic("lfs_callback: zero iocount\n");
@ -1103,12 +1117,9 @@ lfs_callback(bp)
 	if (--fs->lfs_iocount == 0)
 		wakeup(&fs->lfs_iocount);

-/*
-	free(bp->b_data, M_SEGMENT);
-	free(bp, M_SEGMENT);
-*/
 	lfs_free_buffer( bp->b_data, roundup( bp->b_bufsize, DEV_BSIZE));
 	relpbuf(bp);
+
 }

 void
@ -1118,10 +1129,6 @@ lfs_supercallback(bp)
 	if( bp->b_data)
 		lfs_free_buffer( bp->b_data, roundup( bp->b_bufsize, DEV_BSIZE));
 	relpbuf(bp);
-/*
-	free(bp->b_data, M_SEGMENT);
-	free(bp, M_SEGMENT);
-*/
 }

 /*
@ -1170,26 +1177,24 @@ int
 lfs_vref(vp)
 	register struct vnode *vp;
 {
-
-	if (vp->v_flag & VXLOCK)
-		return(1);
-	return (vget(vp, 0));
+    if ((vp->v_flag & VXLOCK) || 
+	(vp->v_usecount == 0 &&
+	 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb))
+      return(1);
+    return (vget(vp, 0));
 }

 void
 lfs_vunref(vp)
 	register struct vnode *vp;
 {
-	extern int lfs_no_inactive;
-
-	/*
-	 * This is vrele except that we do not want to VOP_INACTIVE
-	 * this vnode. Rather than inline vrele here, we use a global
-	 * flag to tell lfs_inactive not to run. Yes, its gross.
-	 */
-	lfs_no_inactive = 1;
-	vrele(vp);
-	lfs_no_inactive = 0;
+    /*
+     * This is vrele except that we do not want to VOP_INACTIVE
+     * this vnode. Rather than inline vrele here, we flag the vnode
+     * to tell lfs_inactive not to run on this vnode. Not as gross as
+     * a global.
+     */
+    vp->v_flag |= VNINACT;
+    vrele(vp);
+    vp->v_flag &= ~VNINACT;
 }
-
-
--- a/sys/ufs/lfs/lfs_subr.c
+++ b/sys/ufs/lfs/lfs_subr.c
@ -31,7 +31,7 @@
 * SUCH DAMAGE.
 *
 *	@(#)lfs_subr.c	8.2 (Berkeley) 9/21/93
- * $Id: lfs_subr.c,v 1.3 1994/08/02 07:54:37 davidg Exp $
+ * $Id: lfs_subr.c,v 1.4 1994/11/17 01:30:51 gibbs Exp $
 */

 #include <sys/param.h>
@ -106,6 +106,7 @@ lfs_seglock(fs, flags)
 			(void)tsleep(&fs->lfs_seglock, PRIBIO + 1,
 			    "lfs seglock", 0);

+	/* XXX RACE CONDITION????? */
 	fs->lfs_seglock = 1;
 	fs->lfs_lockpid = curproc->p_pid;

--- a/sys/ufs/lfs/lfs_syscalls.c
+++ b/sys/ufs/lfs/lfs_syscalls.c
@ -31,7 +31,7 @@
 * SUCH DAMAGE.
 *
 *	@(#)lfs_syscalls.c	8.5 (Berkeley) 4/20/94
- * $Id: lfs_syscalls.c,v 1.3 1994/08/02 07:54:38 davidg Exp $
+ * $Id: lfs_syscalls.c,v 1.4 1994/11/17 01:30:52 gibbs Exp $
 */

 #include <sys/param.h>
@ -475,7 +475,7 @@ lfs_fastvget(mp, ino, daddr, vpp, dinp)
 			++ump->um_lfs->lfs_uinodes;
 			ip->i_flag |= IN_MODIFIED;
 		}
-		ip->i_flag |= IN_MODIFIED;
+		ip->i_flag |= IN_MODIFIED; /* XXX why is this here? it's redundant */
 		return (0);
 	}

--- a/sys/ufs/lfs/lfs_vnops.c
+++ b/sys/ufs/lfs/lfs_vnops.c
@ -31,7 +31,7 @@
 * SUCH DAMAGE.
 *
 *	@(#)lfs_vnops.c	8.5 (Berkeley) 12/30/93
- * $Id: lfs_vnops.c,v 1.4 1994/09/21 03:47:40 wollman Exp $
+ * $Id: lfs_vnops.c,v 1.5 1994/09/22 19:38:39 wollman Exp $
 */

 #include <sys/param.h>
@ -229,10 +229,13 @@ lfs_fsync(ap)
 	} */ *ap;
 {
 	struct timeval tv;
-
+	int error;
 	tv = time;
-	return (VOP_UPDATE(ap->a_vp, &tv, &tv,
+	error = (VOP_UPDATE(ap->a_vp, &tv, &tv,
 	    ap->a_waitfor == MNT_WAIT ? LFS_SYNC : 0));
+	if(ap->a_waitfor == MNT_WAIT && ap->a_vp->v_dirtyblkhd.lh_first != NULL)
+	       panic("lfs_fsync: dirty bufs");
+	return( error );
 }

 /*
@ -474,9 +477,8 @@ lfs_close(ap)
 }

 /*
- * Stub inactive routine that avoid calling ufs_inactive in some cases.
+ * Stub inactive routine that avoids calling ufs_inactive in some cases.
 */
-int lfs_no_inactive = 0;

 int
 lfs_inactive(ap)
@ -485,7 +487,8 @@ lfs_inactive(ap)
 	} */ *ap;
 {
 	
-	if (lfs_no_inactive)
-		return (0);
+        if (ap->a_vp->v_flag & VNINACT) {
+	  return(0);
+        }
 	return (ufs_inactive(ap));
 }