This patch corrects the first round of panics and hangs reported

with the new snapshot code.

Update addaliasu to correctly implement the semantics of the old
checkalias function. When a device vnode first comes into existence,
check to see if an anonymous vnode for the same device was created
at boot time by bdevvp(). If so, adopt the bdevvp vnode rather than
creating a new vnode for the device. This corrects a problem which
caused the kernel to panic when taking a snapshot of the root
filesystem.

Change the calling convention of vn_write_suspend_wait() to be the
same as vn_start_write().

Split out softdep_flushworklist() from softdep_flushfiles() so that
it can be used to clear the work queue when suspending filesystem
operations.

Access to buffers becomes recursive so that snapshots can recursively
traverse their indirect blocks using ffs_copyonwrite() when checking
for the need for copy on write when flushing one of their own indirect
blocks. This eliminates a deadlock between the syncer daemon and a
process taking a snapshot.

Ensure that softdep_process_worklist() can never block because of a
snapshot being taken. This eliminates a problem with buffer starvation.

Cleanup change in ffs_sync() which did not synchronously wait when
MNT_WAIT was specified. The result was an unclean filesystem panic
when doing forcible unmount with heavy filesystem I/O in progress.

Return a zero'ed block when reading a block that was not in use at
the time that a snapshot was taken. Normally, these blocks should
never be read. However, the readahead code will occationally read
them which can cause unexpected behavior.

Clean up the debugging code that ensures that no blocks be written
on a filesystem while it is suspended. Snapshots must explicitly
label the blocks that they are writing during the suspension so that
they do not cause a `write on suspended filesystem' panic.

Reorganize ffs_copyonwrite() to eliminate a deadlock and also to
prevent a race condition that would permit the same block to be
copied twice. This change eliminates an unexpected soft updates
inconsistency in fsck caused by the double allocation.

Use bqrelse rather than brelse for buffers that will be needed
soon again by the snapshot code. This improves snapshot performance.
This commit is contained in:
Kirk McKusick 2000-07-24 05:28:33 +00:00
parent 3adc8b3d1d
commit 9b97113391
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=63788
24 changed files with 298 additions and 103 deletions

View File

@ -855,7 +855,8 @@ cd9660_vget_internal(mp, ino, vpp, relocated, isodir)
case VCHR:
case VBLK:
vp->v_op = cd9660_specop_p;
addaliasu(vp, ip->inode.iso_rdev);
vp = addaliasu(vp, ip->inode.iso_rdev);
ip->i_vnode = vp;
break;
default:
break;

View File

@ -421,9 +421,11 @@ spec_strategy(ap)
bp = ap->a_bp;
vp = ap->a_vp;
if ((bp->b_iocmd == BIO_WRITE)) {
if (vp->v_mount != NULL &&
(vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
(bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
panic("spec_strategy: bad I/O");
bp->b_flags &= ~B_VALIDSUSPWRT;
if (LIST_FIRST(&bp->b_dep) != NULL)
buf_start(bp);
if ((vp->v_flag & VCOPYONWRITE) &&

View File

@ -147,7 +147,18 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
num = *nump;
if (num == 0) {
*bnp = blkptrtodb(ump, ip->i_db[bn]);
if (*bnp == 0) {
/*
* Since this is FFS independent code, we are out of
* scope for the definitions of BLK_NOCOPY and
* BLK_SNAP, but we do know that they will fall in
* the range 1..um_seqinc, so we use that test and
* return a request for a zeroed out buffer if attempts
* are made to read a BLK_NOCOPY or BLK_SNAP block.
*/
if ((ip->i_flags & SF_SNAPSHOT) &&
ip->i_db[bn] > 0 && ip->i_db[bn] < ump->um_seqinc) {
*bnp = -1;
} else if (*bnp == 0) {
if (ip->i_flags & SF_SNAPSHOT)
*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
else
@ -230,6 +241,17 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
if (bp)
bqrelse(bp);
/*
* Since this is FFS independent code, we are out of scope for the
* definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they
* will fall in the range 1..um_seqinc, so we use that test and
* return a request for a zeroed out buffer if attempts are made
* to read a BLK_NOCOPY or BLK_SNAP block.
*/
if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){
*bnp = -1;
return (0);
}
*bnp = blkptrtodb(ump, daddr);
if (*bnp == 0) {
if (ip->i_flags & SF_SNAPSHOT)

View File

@ -147,7 +147,18 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
num = *nump;
if (num == 0) {
*bnp = blkptrtodb(ump, ip->i_db[bn]);
if (*bnp == 0) {
/*
* Since this is FFS independent code, we are out of
* scope for the definitions of BLK_NOCOPY and
* BLK_SNAP, but we do know that they will fall in
* the range 1..um_seqinc, so we use that test and
* return a request for a zeroed out buffer if attempts
* are made to read a BLK_NOCOPY or BLK_SNAP block.
*/
if ((ip->i_flags & SF_SNAPSHOT) &&
ip->i_db[bn] > 0 && ip->i_db[bn] < ump->um_seqinc) {
*bnp = -1;
} else if (*bnp == 0) {
if (ip->i_flags & SF_SNAPSHOT)
*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
else
@ -230,6 +241,17 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
if (bp)
bqrelse(bp);
/*
* Since this is FFS independent code, we are out of scope for the
* definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they
* will fall in the range 1..um_seqinc, so we use that test and
* return a request for a zeroed out buffer if attempts are made
* to read a BLK_NOCOPY or BLK_SNAP block.
*/
if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){
*bnp = -1;
return (0);
}
*bnp = blkptrtodb(ump, daddr);
if (*bnp == 0) {
if (ip->i_flags & SF_SNAPSHOT)

View File

@ -855,7 +855,8 @@ cd9660_vget_internal(mp, ino, vpp, relocated, isodir)
case VCHR:
case VBLK:
vp->v_op = cd9660_specop_p;
addaliasu(vp, ip->inode.iso_rdev);
vp = addaliasu(vp, ip->inode.iso_rdev);
ip->i_vnode = vp;
break;
default:
break;

View File

@ -1296,15 +1296,45 @@ bdevvp(dev, vpp)
* how many users there are is inadequate; the v_usecount for
* the vnodes need to be accumulated. vcount() does that.
*/
void
struct vnode *
addaliasu(nvp, nvp_rdev)
struct vnode *nvp;
udev_t nvp_rdev;
{
struct vnode *ovp;
vop_t **ops;
dev_t dev;
if (nvp->v_type != VBLK && nvp->v_type != VCHR)
panic("addaliasu on non-special vnode");
addalias(nvp, udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0));
dev = udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0);
/*
* Check to see if we have a bdevvp vnode with no associated
* filesystem. If so, we want to associate the filesystem of
* the new newly instigated vnode with the bdevvp vnode and
* discard the newly created vnode rather than leaving the
* bdevvp vnode lying around with no associated filesystem.
*/
if (vfinddev(dev, nvp->v_type, &ovp) == 0 || ovp->v_data != NULL) {
addalias(nvp, dev);
return (nvp);
}
/*
* Discard unneeded vnode, but save its node specific data.
* Note that if there is a lock, it is carried over in the
* node specific data to the replacement vnode.
*/
vref(ovp);
ovp->v_data = nvp->v_data;
ovp->v_tag = nvp->v_tag;
nvp->v_data = NULL;
ops = nvp->v_op;
nvp->v_op = ovp->v_op;
ovp->v_op = ops;
insmntque(ovp, nvp->v_mount);
vrele(nvp);
vgone(nvp);
return (ovp);
}
void
@ -1648,7 +1678,7 @@ vclean(vp, flags, p)
*/
if (flags & DOCLOSE) {
if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL)
(void) vn_write_suspend_wait(vp, V_WAIT);
(void) vn_write_suspend_wait(vp, NULL, V_WAIT);
if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0)
vinvalbuf(vp, 0, NOCRED, p, 0, 0);
}

View File

@ -1296,15 +1296,45 @@ bdevvp(dev, vpp)
* how many users there are is inadequate; the v_usecount for
* the vnodes need to be accumulated. vcount() does that.
*/
void
struct vnode *
addaliasu(nvp, nvp_rdev)
struct vnode *nvp;
udev_t nvp_rdev;
{
struct vnode *ovp;
vop_t **ops;
dev_t dev;
if (nvp->v_type != VBLK && nvp->v_type != VCHR)
panic("addaliasu on non-special vnode");
addalias(nvp, udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0));
dev = udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0);
/*
* Check to see if we have a bdevvp vnode with no associated
* filesystem. If so, we want to associate the filesystem of
* the new newly instigated vnode with the bdevvp vnode and
* discard the newly created vnode rather than leaving the
* bdevvp vnode lying around with no associated filesystem.
*/
if (vfinddev(dev, nvp->v_type, &ovp) == 0 || ovp->v_data != NULL) {
addalias(nvp, dev);
return (nvp);
}
/*
* Discard unneeded vnode, but save its node specific data.
* Note that if there is a lock, it is carried over in the
* node specific data to the replacement vnode.
*/
vref(ovp);
ovp->v_data = nvp->v_data;
ovp->v_tag = nvp->v_tag;
nvp->v_data = NULL;
ops = nvp->v_op;
nvp->v_op = ovp->v_op;
ovp->v_op = ops;
insmntque(ovp, nvp->v_mount);
vrele(nvp);
vgone(nvp);
return (ovp);
}
void
@ -1648,7 +1678,7 @@ vclean(vp, flags, p)
*/
if (flags & DOCLOSE) {
if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL)
(void) vn_write_suspend_wait(vp, V_WAIT);
(void) vn_write_suspend_wait(vp, NULL, V_WAIT);
if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0)
vinvalbuf(vp, 0, NOCRED, p, 0, 0);
}

View File

@ -729,17 +729,19 @@ vn_start_write(vp, mpp, flags)
* time, these operations are halted until the suspension is over.
*/
int
vn_write_suspend_wait(vp, flags)
vn_write_suspend_wait(vp, mp, flags)
struct vnode *vp;
struct mount *mp;
int flags;
{
struct mount *mp;
int error;
if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) {
if (error != EOPNOTSUPP)
return (error);
return (0);
if (vp != NULL) {
if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) {
if (error != EOPNOTSUPP)
return (error);
return (0);
}
}
/*
* If we are not suspended or have not yet reached suspended

View File

@ -421,9 +421,11 @@ spec_strategy(ap)
bp = ap->a_bp;
vp = ap->a_vp;
if ((bp->b_iocmd == BIO_WRITE)) {
if (vp->v_mount != NULL &&
(vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
(bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
panic("spec_strategy: bad I/O");
bp->b_flags &= ~B_VALIDSUSPWRT;
if (LIST_FIRST(&bp->b_dep) != NULL)
buf_start(bp);
if ((vp->v_flag & VCOPYONWRITE) &&

View File

@ -1282,7 +1282,8 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)
}
if (vp->v_type == VCHR || vp->v_type == VBLK) {
vp->v_op = spec_nfsv2nodeop_p;
addaliasu(vp, rdev);
vp = addaliasu(vp, rdev);
np->n_vnode = vp;
}
np->n_mtime = mtime.tv_sec;
}

View File

@ -1282,7 +1282,8 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)
}
if (vp->v_type == VCHR || vp->v_type == VBLK) {
vp->v_op = spec_nfsv2nodeop_p;
addaliasu(vp, rdev);
vp = addaliasu(vp, rdev);
np->n_vnode = vp;
}
np->n_mtime = mtime.tv_sec;
}

View File

@ -1282,7 +1282,8 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)
}
if (vp->v_type == VCHR || vp->v_type == VBLK) {
vp->v_op = spec_nfsv2nodeop_p;
addaliasu(vp, rdev);
vp = addaliasu(vp, rdev);
np->n_vnode = vp;
}
np->n_mtime = mtime.tv_sec;
}

View File

@ -1282,7 +1282,8 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)
}
if (vp->v_type == VCHR || vp->v_type == VBLK) {
vp->v_op = spec_nfsv2nodeop_p;
addaliasu(vp, rdev);
vp = addaliasu(vp, rdev);
np->n_vnode = vp;
}
np->n_mtime = mtime.tv_sec;
}

View File

@ -183,7 +183,7 @@ struct buf {
#define B_UNUSED0 0x00000008 /* Old B_BAD */
#define B_DEFERRED 0x00000010 /* Skipped over for cleaning */
#define B_CACHE 0x00000020 /* Bread found us in the cache. */
#define B_UNUSED40 0x00000040 /* Old B_CALL */
#define B_VALIDSUSPWRT 0x00000040 /* Valid write during suspension. */
#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
#define B_DONE 0x00000200 /* I/O completed. */
#define B_EINTR 0x00000400 /* I/O was interrupted */
@ -237,7 +237,7 @@ extern char *buf_wmesg; /* Default buffer lock message */
* Initialize a lock.
*/
#define BUF_LOCKINIT(bp) \
lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, 0)
lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, LK_CANRECURSE)
/*
*
* Get a lock sleeping non-interruptably until it becomes available.
@ -467,6 +467,7 @@ buf_countdeps(struct buf *bp, int i)
#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */
#define B_SYNC 0x02 /* Do all allocations synchronously. */
#define B_METAONLY 0x04 /* Return indirect block buffer. */
#define B_NOWAIT 0x08 /* do not sleep to await lock */
#ifdef _KERNEL
extern int nbuf; /* The number of buffer headers */

View File

@ -537,7 +537,7 @@ struct vop_bwrite_args;
extern int (*lease_check_hook) __P((struct vop_lease_args *));
void addalias __P((struct vnode *vp, dev_t nvp_rdev));
void addaliasu __P((struct vnode *vp, udev_t nvp_rdev));
struct vnode *addaliasu __P((struct vnode *vp, udev_t nvp_rdev));
int bdevvp __P((dev_t dev, struct vnode **vpp));
/* cache_* may belong in namei.h. */
void cache_enter __P((struct vnode *dvp, struct vnode *vp,
@ -593,7 +593,8 @@ int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
int vn_start_write __P((struct vnode *vp, struct mount **mpp, int flags));
dev_t vn_todev __P((struct vnode *vp));
int vn_write_suspend_wait __P((struct vnode *vp, int flags));
int vn_write_suspend_wait __P((struct vnode *vp, struct mount *mp,
int flags));
int vn_writechk __P((struct vnode *vp));
int vfs_cache_lookup __P((struct vop_lookup_args *ap));
int vfs_object_create __P((struct vnode *vp, struct proc *p,

View File

@ -116,6 +116,7 @@ extern vop_t **ffs_fifoop_p;
void softdep_initialize __P((void));
int softdep_mount __P((struct vnode *, struct mount *, struct fs *,
struct ucred *));
int softdep_flushworklist __P((struct mount *, int *, struct proc *));
int softdep_flushfiles __P((struct mount *, int, struct proc *));
void softdep_update_inodeblock __P((struct inode *, struct buf *, int));
void softdep_load_inodeblock __P((struct inode *));

View File

@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ffs_snapshot.c 8.10 (McKusick) 7/11/00
* @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00
* $FreeBSD$
*/
@ -290,6 +290,7 @@ ffs_snapshot(mp, snapfile)
if (fs->fs_cgsize < fs->fs_bsize)
bzero(&nbp->b_data[fs->fs_cgsize],
fs->fs_bsize - fs->fs_cgsize);
nbp->b_flags |= B_VALIDSUSPWRT;
bawrite(nbp);
base = cg * fs->fs_fpg / fs->fs_frag;
if (base + len > numblks)
@ -311,6 +312,7 @@ ffs_snapshot(mp, snapfile)
indiroff = (base + loc - NDADDR) % NINDIR(fs);
for ( ; loc < len; loc++, indiroff++) {
if (indiroff >= NINDIR(fs)) {
ibp->b_flags |= B_VALIDSUSPWRT;
bawrite(ibp);
error = VOP_BALLOC(vp,
lblktosize(fs, (off_t)(base + loc)),
@ -325,7 +327,8 @@ ffs_snapshot(mp, snapfile)
continue;
((ufs_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY;
}
brelse(bp);
bqrelse(bp);
ibp->b_flags |= B_VALIDSUSPWRT;
bdwrite(ibp);
}
/*
@ -340,6 +343,7 @@ ffs_snapshot(mp, snapfile)
if (fs->fs_sbsize < fs->fs_bsize)
bzero(&nbp->b_data[fs->fs_sbsize],
fs->fs_bsize - fs->fs_sbsize);
nbp->b_flags |= B_VALIDSUSPWRT;
bawrite(nbp);
blkno = fragstoblks(fs, fs->fs_csaddr);
len = howmany(fs->fs_cssize, fs->fs_bsize) - 1;
@ -354,6 +358,7 @@ ffs_snapshot(mp, snapfile)
size = fs->fs_cssize % fs->fs_bsize;
}
bcopy(fs->fs_csp[loc], nbp->b_data, size);
nbp->b_flags |= B_VALIDSUSPWRT;
bawrite(nbp);
}
/*
@ -366,6 +371,7 @@ ffs_snapshot(mp, snapfile)
if (error)
goto out1;
readblock(nbp, inoblks[loc]);
nbp->b_flags |= B_VALIDSUSPWRT;
bdwrite(nbp);
}
/*
@ -410,6 +416,7 @@ ffs_snapshot(mp, snapfile)
dip->di_blocks = 0;
dip->di_flags &= ~(SF_IMMUTABLE | SF_SNAPSHOT);
bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs_daddr_t));
nbp->b_flags |= B_VALIDSUSPWRT;
bdwrite(nbp);
}
/*
@ -422,7 +429,7 @@ ffs_snapshot(mp, snapfile)
if (error)
goto out1;
copyblkno = fragstoblks(fs, dbtofsb(fs, ibp->b_blkno));
brelse(ibp);
bqrelse(ibp);
error = VOP_BALLOC(vp, lblktosize(fs, (off_t)copyblkno),
fs->fs_bsize, p->p_ucred, 0, &nbp);
if (error)
@ -434,7 +441,8 @@ ffs_snapshot(mp, snapfile)
goto out1;
}
bcopy(ibp->b_data, nbp->b_data, fs->fs_bsize);
brelse(ibp);
bqrelse(ibp);
nbp->b_flags |= B_VALIDSUSPWRT;
bawrite(nbp);
}
/*
@ -518,7 +526,7 @@ indiracct(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, blksperindir)
} else {
MALLOC(bap, ufs_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK);
bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
brelse(bp);
bqrelse(bp);
}
error = snapacct(snapvp, &bap[0], &bap[last]);
if (error || level == 0)
@ -539,7 +547,7 @@ indiracct(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, blksperindir)
}
out:
if (snapvp != cancelvp)
brelse(bp);
bqrelse(bp);
else
FREE(bap, M_DEVBUF);
return (error);
@ -578,8 +586,10 @@ snapacct(vp, oldblkp, lastblkp)
if (*blkp != 0)
panic("snapacct: bad block");
*blkp = BLK_SNAP;
if (lbn >= NDADDR)
if (lbn >= NDADDR) {
ibp->b_flags |= B_VALIDSUSPWRT;
bdwrite(ibp);
}
}
return (0);
}
@ -732,7 +742,7 @@ ffs_snapblkfree(freeip, bno, size)
default:
case BLK_NOCOPY:
if (lbn >= NDADDR)
brelse(ibp);
bqrelse(ibp);
continue;
/*
* No previous snapshot claimed the block, so it will be
@ -787,7 +797,7 @@ ffs_snapblkfree(freeip, bno, size)
return (1);
}
if (lbn >= NDADDR)
brelse(ibp);
bqrelse(ibp);
/*
* Allocate the block into which to do the copy. Note that this
* allocation will never require any additional allocations for
@ -933,40 +943,57 @@ ffs_copyonwrite(ap)
if (bp->b_vp == vp)
continue;
/*
* Check to see if block needs to be copied.
* Check to see if block needs to be copied. We have to
* be able to do the VOP_BALLOC without blocking, otherwise
* we may get in a deadlock with another process also
* trying to allocate. If we find outselves unable to
* get the buffer lock, we unlock the snapshot vnode,
* sleep briefly, and try again.
*/
retry:
vn_lock(vp, LK_SHARED | LK_RETRY, p);
if (lbn < NDADDR) {
blkno = ip->i_db[lbn];
} else {
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
p->p_flag |= P_COWINPROGRESS;
error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
fs->fs_bsize, KERNCRED, B_METAONLY | B_NOWAIT, &ibp);
p->p_flag &= ~P_COWINPROGRESS;
VOP_UNLOCK(vp, 0, p);
if (error)
break;
if (error) {
VOP_UNLOCK(vp, 0, p);
if (error != EWOULDBLOCK)
break;
tsleep(vp, p->p_usrpri, "nap", 1);
goto retry;
}
indiroff = (lbn - NDADDR) % NINDIR(fs);
blkno = ((ufs_daddr_t *)(ibp->b_data))[indiroff];
brelse(ibp);
bqrelse(ibp);
}
#ifdef DIAGNOSTIC
if (blkno == BLK_SNAP && bp->b_lblkno >= 0)
panic("ffs_copyonwrite: bad copy block");
#endif
if (blkno != 0)
if (blkno != 0) {
VOP_UNLOCK(vp, 0, p);
continue;
}
/*
* Allocate the block into which to do the copy. Note that this
* allocation will never require any additional allocations for
* the snapshot inode.
*/
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
p->p_flag |= P_COWINPROGRESS;
error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
fs->fs_bsize, KERNCRED, 0, &cbp);
fs->fs_bsize, KERNCRED, B_NOWAIT, &cbp);
p->p_flag &= ~P_COWINPROGRESS;
VOP_UNLOCK(vp, 0, p);
if (error) {
if (error != EWOULDBLOCK)
break;
tsleep(vp, p->p_usrpri, "nap", 1);
goto retry;
}
#ifdef DEBUG
if (snapdebug) {
printf("Copyonwrite: snapino %d lbn %d for ",
@ -979,8 +1006,6 @@ ffs_copyonwrite(ap)
cbp->b_blkno);
}
#endif
if (error)
break;
/*
* If we have already read the old block contents, then
* simply copy them to the new block.

View File

@ -548,41 +548,45 @@ softdep_process_worklist(matchmnt)
case D_DIRREM:
/* removal of a directory entry */
mp = WK_DIRREM(wk)->dm_mnt;
if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
panic("%s: dirrem on suspended filesystem",
"softdep_process_worklist");
if (mp == matchmnt)
matchcnt += 1;
vn_start_write(NULL, &mp, V_WAIT);
handle_workitem_remove(WK_DIRREM(wk));
vn_finished_write(mp);
break;
case D_FREEBLKS:
/* releasing blocks and/or fragments from a file */
mp = WK_FREEBLKS(wk)->fb_mnt;
if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
panic("%s: freeblks on suspended filesystem",
"softdep_process_worklist");
if (mp == matchmnt)
matchcnt += 1;
vn_start_write(NULL, &mp, V_WAIT);
handle_workitem_freeblocks(WK_FREEBLKS(wk));
vn_finished_write(mp);
break;
case D_FREEFRAG:
/* releasing a fragment when replaced as a file grows */
mp = WK_FREEFRAG(wk)->ff_mnt;
if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
panic("%s: freefrag on suspended filesystem",
"softdep_process_worklist");
if (mp == matchmnt)
matchcnt += 1;
vn_start_write(NULL, &mp, V_WAIT);
handle_workitem_freefrag(WK_FREEFRAG(wk));
vn_finished_write(mp);
break;
case D_FREEFILE:
/* releasing an inode when its link count drops to 0 */
mp = WK_FREEFILE(wk)->fx_mnt;
if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
panic("%s: freefile on suspended filesystem",
"softdep_process_worklist");
if (mp == matchmnt)
matchcnt += 1;
vn_start_write(NULL, &mp, V_WAIT);
handle_workitem_freefile(WK_FREEFILE(wk));
vn_finished_write(mp);
break;
default:
@ -646,13 +650,13 @@ softdep_move_dependencies(oldbp, newbp)
* Purge the work list of all items associated with a particular mount point.
*/
int
softdep_flushfiles(oldmnt, flags, p)
softdep_flushworklist(oldmnt, countp, p)
struct mount *oldmnt;
int flags;
int *countp;
struct proc *p;
{
struct vnode *devvp;
int error, loopcnt;
int count, error = 0;
/*
* Await our turn to clear out the queue.
@ -660,32 +664,16 @@ softdep_flushfiles(oldmnt, flags, p)
while (softdep_worklist_busy)
tsleep(&lbolt, PRIBIO, "softflush", 0);
softdep_worklist_busy = 1;
if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0) {
softdep_worklist_busy = 0;
return (error);
}
/*
* Alternately flush the block device associated with the mount
* point and process any dependencies that the flushing
* creates. In theory, this loop can happen at most twice,
* but we give it a few extra just to be sure.
* creates. We continue until no more worklist dependencies
* are found.
*/
*countp = 0;
devvp = VFSTOUFS(oldmnt)->um_devvp;
for (loopcnt = 10; loopcnt > 0; ) {
if (softdep_process_worklist(oldmnt) == 0) {
loopcnt--;
/*
* Do another flush in case any vnodes were brought in
* as part of the cleanup operations.
*/
if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
break;
/*
* If we still found nothing to do, we are really done.
*/
if (softdep_process_worklist(oldmnt) == 0)
break;
}
while ((count = softdep_process_worklist(oldmnt)) > 0) {
*countp += count;
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT, p);
VOP_UNLOCK(devvp, 0, p);
@ -693,6 +681,37 @@ softdep_flushfiles(oldmnt, flags, p)
break;
}
softdep_worklist_busy = 0;
return (error);
}
/*
* Flush all vnodes and worklist items associated with a specified mount point.
*/
int
softdep_flushfiles(oldmnt, flags, p)
struct mount *oldmnt;
int flags;
struct proc *p;
{
int error, count, loopcnt;
/*
* Alternately flush the vnodes associated with the mount
* point and process any dependencies that the flushing
* creates. In theory, this loop can happen at most twice,
* but we give it a few extra just to be sure.
*/
for (loopcnt = 10; loopcnt > 0; loopcnt--) {
/*
* Do another flush in case any vnodes were brought in
* as part of the cleanup operations.
*/
if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
break;
if ((error = softdep_flushworklist(oldmnt, &count, p)) != 0 ||
count == 0)
break;
}
/*
* If we are unmounting then it is an error to fail. If we
* are simply trying to downgrade to read-only, then filesystem
@ -4432,8 +4451,8 @@ clear_remove(p)
mp = pagedep->pd_mnt;
ino = pagedep->pd_ino;
FREE_LOCK(&lk);
if (vn_start_write(NULL, &mp, V_WAIT | PCATCH) != 0)
return;
if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
continue;
if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
softdep_error("clear_remove: vget", error);
vn_finished_write(mp);
@ -4503,8 +4522,8 @@ clear_inodedeps(p)
if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
continue;
FREE_LOCK(&lk);
if (vn_start_write(NULL, &mp, V_WAIT | PCATCH) != 0)
return;
if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
continue;
if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
softdep_error("clear_inodedeps: vget", error);
vn_finished_write(mp);

View File

@ -908,7 +908,7 @@ ffs_sync(mp, waitfor, cred, p)
struct inode *ip;
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs;
int error, allerror = 0;
int error, count, wait, lockreq, allerror = 0;
fs = ump->um_fs;
if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
@ -918,6 +918,12 @@ ffs_sync(mp, waitfor, cred, p)
/*
* Write back each (modified) inode.
*/
wait = 0;
lockreq = LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK;
if (waitfor == MNT_WAIT) {
wait = 1;
lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
}
simple_lock(&mntvnode_slock);
loop:
for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
@ -938,9 +944,7 @@ ffs_sync(mp, waitfor, cred, p)
}
if (vp->v_type != VCHR) {
simple_unlock(&mntvnode_slock);
error =
vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
if (error) {
if ((error = vget(vp, lockreq, p)) != 0) {
simple_lock(&mntvnode_slock);
if (error == ENOENT)
goto loop;
@ -948,14 +952,12 @@ ffs_sync(mp, waitfor, cred, p)
}
if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0)
allerror = error;
VOP_UNLOCK(vp, 0, p);
vrele(vp);
vput(vp);
simple_lock(&mntvnode_slock);
} else {
simple_unlock(&mntvnode_slock);
simple_unlock(&vp->v_interlock);
/* UFS_UPDATE(vp, waitfor == MNT_WAIT); */
UFS_UPDATE(vp, 0);
UFS_UPDATE(vp, wait);
simple_lock(&mntvnode_slock);
}
}
@ -963,9 +965,16 @@ ffs_sync(mp, waitfor, cred, p)
/*
* Force stale file system control information to be flushed.
*/
if (waitfor != MNT_LAZY) {
if (ump->um_mountp->mnt_flag & MNT_SOFTDEP)
waitfor = MNT_NOWAIT;
if (waitfor == MNT_WAIT) {
if ((error = softdep_flushworklist(ump->um_mountp, &count, p)))
allerror = error;
/* Flushed work items may create new vnodes to clean */
if (count) {
simple_lock(&mntvnode_slock);
goto loop;
}
}
if (waitfor == MNT_NOWAIT) {
vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
allerror = error;

View File

@ -248,7 +248,7 @@ mfs_mount(mp, path, data, ndp, p)
/* It is not clear that these will get initialized otherwise */
dev->si_bsize_phys = DEV_BSIZE;
dev->si_iosize_max = DFLTPHYS;
addaliasu(devvp, makeudev(253, mfs_minor++));
devvp = addaliasu(devvp, makeudev(253, mfs_minor++));
devvp->v_data = mfsp;
mfsp->mfs_baseoff = args.base;
mfsp->mfs_size = args.size;

View File

@ -147,7 +147,18 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
num = *nump;
if (num == 0) {
*bnp = blkptrtodb(ump, ip->i_db[bn]);
if (*bnp == 0) {
/*
* Since this is FFS independent code, we are out of
* scope for the definitions of BLK_NOCOPY and
* BLK_SNAP, but we do know that they will fall in
* the range 1..um_seqinc, so we use that test and
* return a request for a zeroed out buffer if attempts
* are made to read a BLK_NOCOPY or BLK_SNAP block.
*/
if ((ip->i_flags & SF_SNAPSHOT) &&
ip->i_db[bn] > 0 && ip->i_db[bn] < ump->um_seqinc) {
*bnp = -1;
} else if (*bnp == 0) {
if (ip->i_flags & SF_SNAPSHOT)
*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
else
@ -230,6 +241,17 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
if (bp)
bqrelse(bp);
/*
* Since this is FFS independent code, we are out of scope for the
* definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they
* will fall in the range 1..um_seqinc, so we use that test and
* return a request for a zeroed out buffer if attempts are made
* to read a BLK_NOCOPY or BLK_SNAP block.
*/
if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){
*bnp = -1;
return (0);
}
*bnp = blkptrtodb(ump, daddr);
if (*bnp == 0) {
if (ip->i_flags & SF_SNAPSHOT)

View File

@ -77,7 +77,7 @@ ufs_inactive(ap)
if (ip->i_mode == 0)
goto out;
if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
(void) vn_write_suspend_wait(vp, V_WAIT);
(void) vn_write_suspend_wait(vp, NULL, V_WAIT);
#ifdef QUOTA
if (!getinoquota(ip))
(void)chkiq(ip, -1, NOCRED, 0);
@ -94,10 +94,10 @@ ufs_inactive(ap)
}
if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) {
if ((ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
vn_write_suspend_wait(vp, V_NOWAIT)) {
vn_write_suspend_wait(vp, NULL, V_NOWAIT)) {
ip->i_flag &= ~IN_ACCESS;
} else {
(void) vn_write_suspend_wait(vp, V_WAIT);
(void) vn_write_suspend_wait(vp, NULL, V_WAIT);
UFS_UPDATE(vp, 0);
}
}

View File

@ -898,7 +898,7 @@ dqsync(vp, dq)
return (0);
if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP)
panic("dqsync: file");
(void) vn_write_suspend_wait(dqvp, V_WAIT);
(void) vn_write_suspend_wait(dqvp, NULL, V_WAIT);
if (vp != dqvp)
vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p);
while (dq->dq_flags & DQ_LOCK) {

View File

@ -2036,7 +2036,8 @@ ufs_vinit(mntp, specops, fifoops, vpp)
case VCHR:
case VBLK:
vp->v_op = specops;
addaliasu(vp, ip->i_rdev);
vp = addaliasu(vp, ip->i_rdev);
ip->i_vnode = vp;
break;
case VFIFO:
vp->v_op = fifoops;