This patch fixes a deadlock between the bufdaemon and a process taking

a snapshot. As part of taking a snapshot of a filesystem, the kernel
builds up a list of the filesystem metadata (such as the cylinder
group bitmaps) that are contained in the snapshot. When doing a
copy-on-write check, the list is first consulted. If the block being
written is found on the list, then the full snapshot lookup can be
avoided. Besides providing an important performance speedup this
check also avoids a potential deadlock between the code creating
the snapshot and the bufdaemon trying to cleanup snapshot related
buffers. This fix creates a temporary list containing the key
metadata blocks that can cause the deadlock. This temporary list
is used between the time that the snapshot is first enabled and the
time that the fully complete list is built.

Reported by:	Attila Nagy <bra@fsn.hu>
Sponsored by:   DARPA & NAI Labs.
This commit is contained in:
Kirk McKusick 2003-02-22 00:59:34 +00:00
parent 37e2ebfdba
commit 5bb651cb72

View File

@ -119,16 +119,15 @@ ffs_snapshot(mp, snapfile)
struct mount *mp;
char *snapfile;
{
ufs2_daddr_t numblks, blkno;
ufs2_daddr_t numblks, blkno, *blkp, *snapblklist;
int error, cg, snaploc;
int i, size, len, loc;
int flag = mp->mnt_flag;
struct timespec starttime = {0, 0}, endtime;
char saved_nice = 0;
long redo = 0, snaplistsize;
long redo = 0, snaplistsize = 0;
int32_t *lp;
void *space;
daddr_t *snapblklist;
struct fs *copy_fs = NULL, *fs = VFSTOUFS(mp)->um_fs;
struct snaphead *snaphead;
struct thread *td = curthread;
@ -394,7 +393,11 @@ restart:
* spec_strategy about writing on a suspended filesystem.
* Note that we skip unlinked snapshot files as they will
* be handled separately below.
*
* We also calculate the needed size for the snapshot list.
*/
snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
mp->mnt_kern_flag &= ~MNTK_SUSPENDED;
mtx_lock(&mntvnode_mtx);
loop:
@ -438,6 +441,7 @@ loop:
DIP(xp, i_db[loc]) = 0;
}
}
snaplistsize += 1;
if (xp->i_ump->um_fstype == UFS1)
error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
BLK_NOCOPY);
@ -486,6 +490,36 @@ loop:
vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, td);
transferlockers(&vp->v_lock, vp->v_vnlock);
lockmgr(&vp->v_lock, LK_RELEASE, NULL, td);
/*
* If this is the first snapshot on this filesystem, then we need
* to allocate the space for the list of preallocated snapshot blocks.
* This list will be refined below, but this preliminary one will
* keep us out of deadlock until the full one is ready.
*/
if (xp == NULL) {
MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t),
M_UFSMNT, M_WAITOK);
blkp = &snapblklist[1];
*blkp++ = lblkno(fs, fs->fs_sblockloc);
blkno = fragstoblks(fs, fs->fs_csaddr);
for (cg = 0; cg < fs->fs_ncg; cg++) {
if (fragstoblks(fs, cgtod(fs, cg) > blkno))
break;
*blkp++ = fragstoblks(fs, cgtod(fs, cg));
}
len = howmany(fs->fs_cssize, fs->fs_bsize);
for (loc = 0; loc < len; loc++)
*blkp++ = blkno + loc;
for (; cg < fs->fs_ncg; cg++)
*blkp++ = fragstoblks(fs, cgtod(fs, cg));
snapblklist[0] = blkp - snapblklist;
VI_LOCK(devvp);
if (devvp->v_rdev->si_snapblklist != NULL)
panic("ffs_snapshot: non-empty list");
devvp->v_rdev->si_snapblklist = snapblklist;
devvp->v_rdev->si_snaplistsize = blkp - snapblklist;
VI_UNLOCK(devvp);
}
/*
* Record snapshot inode. Since this is the newest snapshot,
* it must be placed at the end of the list.
@ -535,10 +569,8 @@ out1:
}
}
/*
* Allocate the space for the list of preallocated snapshot blocks.
* Allocate space for the full list of preallocated snapshot blocks.
*/
snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t),
M_UFSMNT, M_WAITOK);
ip->i_snapblklist = &snapblklist[1];
@ -556,6 +588,8 @@ out1:
FREE(snapblklist, M_UFSMNT);
goto done;
}
if (snaplistsize < ip->i_snapblklist - snapblklist)
panic("ffs_snapshot: list too small");
snaplistsize = ip->i_snapblklist - snapblklist;
snapblklist[0] = snaplistsize;
ip->i_snapblklist = 0;
@ -600,9 +634,11 @@ out1:
* should replace the previous list.
*/
VI_LOCK(devvp);
FREE(devvp->v_rdev->si_snapblklist, M_UFSMNT);
space = devvp->v_rdev->si_snapblklist;
devvp->v_rdev->si_snapblklist = snapblklist;
devvp->v_rdev->si_snaplistsize = snaplistsize;
if (space != NULL)
FREE(space, M_UFSMNT);
VI_UNLOCK(devvp);
done:
free(copy_fs->fs_csp, M_UFSMNT);