This patch fixes a deadlock between the bufdaemon and a process taking
a snapshot. As part of taking a snapshot of a filesystem, the kernel builds up a list of the filesystem metadata (such as the cylinder group bitmaps) that are contained in the snapshot. When doing a copy-on-write check, the list is first consulted. If the block being written is found on the list, then the full snapshot lookup can be avoided. Besides providing an important performance speedup this check also avoids a potential deadlock between the code creating the snapshot and the bufdaemon trying to cleanup snapshot related buffers. This fix creates a temporary list containing the key metadata blocks that can cause the deadlock. This temporary list is used between the time that the snapshot is first enabled and the time that the fully complete list is built. Reported by: Attila Nagy <bra@fsn.hu> Sponsored by: DARPA & NAI Labs.
This commit is contained in:
parent
37e2ebfdba
commit
5bb651cb72
@ -119,16 +119,15 @@ ffs_snapshot(mp, snapfile)
|
||||
struct mount *mp;
|
||||
char *snapfile;
|
||||
{
|
||||
ufs2_daddr_t numblks, blkno;
|
||||
ufs2_daddr_t numblks, blkno, *blkp, *snapblklist;
|
||||
int error, cg, snaploc;
|
||||
int i, size, len, loc;
|
||||
int flag = mp->mnt_flag;
|
||||
struct timespec starttime = {0, 0}, endtime;
|
||||
char saved_nice = 0;
|
||||
long redo = 0, snaplistsize;
|
||||
long redo = 0, snaplistsize = 0;
|
||||
int32_t *lp;
|
||||
void *space;
|
||||
daddr_t *snapblklist;
|
||||
struct fs *copy_fs = NULL, *fs = VFSTOUFS(mp)->um_fs;
|
||||
struct snaphead *snaphead;
|
||||
struct thread *td = curthread;
|
||||
@ -394,7 +393,11 @@ restart:
|
||||
* spec_strategy about writing on a suspended filesystem.
|
||||
* Note that we skip unlinked snapshot files as they will
|
||||
* be handled separately below.
|
||||
*
|
||||
* We also calculate the needed size for the snapshot list.
|
||||
*/
|
||||
snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
|
||||
FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
|
||||
mp->mnt_kern_flag &= ~MNTK_SUSPENDED;
|
||||
mtx_lock(&mntvnode_mtx);
|
||||
loop:
|
||||
@ -438,6 +441,7 @@ loop:
|
||||
DIP(xp, i_db[loc]) = 0;
|
||||
}
|
||||
}
|
||||
snaplistsize += 1;
|
||||
if (xp->i_ump->um_fstype == UFS1)
|
||||
error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
|
||||
BLK_NOCOPY);
|
||||
@ -486,6 +490,36 @@ loop:
|
||||
vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, td);
|
||||
transferlockers(&vp->v_lock, vp->v_vnlock);
|
||||
lockmgr(&vp->v_lock, LK_RELEASE, NULL, td);
|
||||
/*
|
||||
* If this is the first snapshot on this filesystem, then we need
|
||||
* to allocate the space for the list of preallocated snapshot blocks.
|
||||
* This list will be refined below, but this preliminary one will
|
||||
* keep us out of deadlock until the full one is ready.
|
||||
*/
|
||||
if (xp == NULL) {
|
||||
MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t),
|
||||
M_UFSMNT, M_WAITOK);
|
||||
blkp = &snapblklist[1];
|
||||
*blkp++ = lblkno(fs, fs->fs_sblockloc);
|
||||
blkno = fragstoblks(fs, fs->fs_csaddr);
|
||||
for (cg = 0; cg < fs->fs_ncg; cg++) {
|
||||
if (fragstoblks(fs, cgtod(fs, cg) > blkno))
|
||||
break;
|
||||
*blkp++ = fragstoblks(fs, cgtod(fs, cg));
|
||||
}
|
||||
len = howmany(fs->fs_cssize, fs->fs_bsize);
|
||||
for (loc = 0; loc < len; loc++)
|
||||
*blkp++ = blkno + loc;
|
||||
for (; cg < fs->fs_ncg; cg++)
|
||||
*blkp++ = fragstoblks(fs, cgtod(fs, cg));
|
||||
snapblklist[0] = blkp - snapblklist;
|
||||
VI_LOCK(devvp);
|
||||
if (devvp->v_rdev->si_snapblklist != NULL)
|
||||
panic("ffs_snapshot: non-empty list");
|
||||
devvp->v_rdev->si_snapblklist = snapblklist;
|
||||
devvp->v_rdev->si_snaplistsize = blkp - snapblklist;
|
||||
VI_UNLOCK(devvp);
|
||||
}
|
||||
/*
|
||||
* Record snapshot inode. Since this is the newest snapshot,
|
||||
* it must be placed at the end of the list.
|
||||
@ -535,10 +569,8 @@ out1:
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Allocate the space for the list of preallocated snapshot blocks.
|
||||
* Allocate space for the full list of preallocated snapshot blocks.
|
||||
*/
|
||||
snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
|
||||
FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
|
||||
MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t),
|
||||
M_UFSMNT, M_WAITOK);
|
||||
ip->i_snapblklist = &snapblklist[1];
|
||||
@ -556,6 +588,8 @@ out1:
|
||||
FREE(snapblklist, M_UFSMNT);
|
||||
goto done;
|
||||
}
|
||||
if (snaplistsize < ip->i_snapblklist - snapblklist)
|
||||
panic("ffs_snapshot: list too small");
|
||||
snaplistsize = ip->i_snapblklist - snapblklist;
|
||||
snapblklist[0] = snaplistsize;
|
||||
ip->i_snapblklist = 0;
|
||||
@ -600,9 +634,11 @@ out1:
|
||||
* should replace the previous list.
|
||||
*/
|
||||
VI_LOCK(devvp);
|
||||
FREE(devvp->v_rdev->si_snapblklist, M_UFSMNT);
|
||||
space = devvp->v_rdev->si_snapblklist;
|
||||
devvp->v_rdev->si_snapblklist = snapblklist;
|
||||
devvp->v_rdev->si_snaplistsize = snaplistsize;
|
||||
if (space != NULL)
|
||||
FREE(space, M_UFSMNT);
|
||||
VI_UNLOCK(devvp);
|
||||
done:
|
||||
free(copy_fs->fs_csp, M_UFSMNT);
|
||||
|
Loading…
x
Reference in New Issue
Block a user