The ffs_balloc_ufs{1,2} functions call bdwrite() while having several

vnode buffers locked at once. In particular, there are indirect buffers
among locked ones. The bdwrite() may start the flushing to keep dirty
buffer list at the bounds. If any buffer on the dirty list requires
translation from logical to physical block number, code may ends up
trying to lock an indirect buffer already locked in ffs_balloc_ufsX.

Prevent the bdflush() activity when several buffers are locked at once
by setting the TDP_INBDFUSH for the problematic code blocks.

Reported and tested by:	pho, Josef Buchsteiner at Juniper
In collaboration with:	kan
MFC after:	1 month
This commit is contained in:
kib 2008-07-23 14:32:44 +00:00
parent 4f882c5332
commit 167593c766

View File

@ -104,6 +104,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
int unwindidx = -1; int unwindidx = -1;
int saved_inbdflush;
ip = VTOI(vp); ip = VTOI(vp);
dp = ip->i_din1; dp = ip->i_din1;
@ -225,6 +226,9 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
if (num < 1) if (num < 1)
panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
#endif #endif
saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags &
TDP_INBDFLUSH);
curthread->td_pflags |= TDP_INBDFLUSH;
/* /*
* Fetch the first indirect block allocating if necessary. * Fetch the first indirect block allocating if necessary.
*/ */
@ -237,8 +241,10 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
UFS_LOCK(ump); UFS_LOCK(ump);
pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
cred, &newb)) != 0) cred, &newb)) != 0) {
curthread->td_pflags &= saved_inbdflush;
return (error); return (error);
}
nb = newb; nb = newb;
*allocblk++ = nb; *allocblk++ = nb;
*lbns_remfree++ = indirs[1].in_lbn; *lbns_remfree++ = indirs[1].in_lbn;
@ -329,6 +335,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
* If asked only for the indirect block, then return it. * If asked only for the indirect block, then return it.
*/ */
if (flags & BA_METAONLY) { if (flags & BA_METAONLY) {
curthread->td_pflags &= saved_inbdflush;
*bpp = bp; *bpp = bp;
return (0); return (0);
} }
@ -366,6 +373,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
bp->b_flags |= B_CLUSTEROK; bp->b_flags |= B_CLUSTEROK;
bdwrite(bp); bdwrite(bp);
} }
curthread->td_pflags &= saved_inbdflush;
*bpp = nbp; *bpp = nbp;
return (0); return (0);
} }
@ -387,9 +395,11 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
nbp->b_blkno = fsbtodb(fs, nb); nbp->b_blkno = fsbtodb(fs, nb);
} }
curthread->td_pflags &= saved_inbdflush;
*bpp = nbp; *bpp = nbp;
return (0); return (0);
fail: fail:
curthread->td_pflags &= saved_inbdflush;
/* /*
* If we have failed to allocate any blocks, simply return the error. * If we have failed to allocate any blocks, simply return the error.
* This is the usual case and avoids the need to fsync the file. * This is the usual case and avoids the need to fsync the file.
@ -489,6 +499,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
int deallocated, osize, nsize, num, i, error; int deallocated, osize, nsize, num, i, error;
int unwindidx = -1; int unwindidx = -1;
int saved_inbdflush;
ip = VTOI(vp); ip = VTOI(vp);
dp = ip->i_din2; dp = ip->i_din2;
@ -719,6 +730,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
if (num < 1) if (num < 1)
panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
#endif #endif
saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags &
TDP_INBDFLUSH);
curthread->td_pflags |= TDP_INBDFLUSH;
/* /*
* Fetch the first indirect block allocating if necessary. * Fetch the first indirect block allocating if necessary.
*/ */
@ -731,8 +745,10 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
UFS_LOCK(ump); UFS_LOCK(ump);
pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
cred, &newb)) != 0) cred, &newb)) != 0) {
curthread->td_pflags &= saved_inbdflush;
return (error); return (error);
}
nb = newb; nb = newb;
*allocblk++ = nb; *allocblk++ = nb;
*lbns_remfree++ = indirs[1].in_lbn; *lbns_remfree++ = indirs[1].in_lbn;
@ -823,6 +839,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
* If asked only for the indirect block, then return it. * If asked only for the indirect block, then return it.
*/ */
if (flags & BA_METAONLY) { if (flags & BA_METAONLY) {
curthread->td_pflags &= saved_inbdflush;
*bpp = bp; *bpp = bp;
return (0); return (0);
} }
@ -860,6 +877,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
bp->b_flags |= B_CLUSTEROK; bp->b_flags |= B_CLUSTEROK;
bdwrite(bp); bdwrite(bp);
} }
curthread->td_pflags &= saved_inbdflush;
*bpp = nbp; *bpp = nbp;
return (0); return (0);
} }
@ -887,9 +905,11 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
nbp->b_blkno = fsbtodb(fs, nb); nbp->b_blkno = fsbtodb(fs, nb);
} }
curthread->td_pflags &= saved_inbdflush;
*bpp = nbp; *bpp = nbp;
return (0); return (0);
fail: fail:
curthread->td_pflags &= saved_inbdflush;
/* /*
* If we have failed to allocate any blocks, simply return the error. * If we have failed to allocate any blocks, simply return the error.
* This is the usual case and avoids the need to fsync the file. * This is the usual case and avoids the need to fsync the file.