This commit enables a UFS filesystem to do a forcible unmount when

the underlying media fails or becomes inaccessible. For example
when a USB flash memory card hosting a UFS filesystem is unplugged.

The strategy for handling disk I/O errors when soft updates are
enabled is to stop writing to the disk of the affected file system
but continue to accept I/O requests and report that all future
writes by the file system to that disk actually succeed. Then
initiate an asynchronous forced unmount of the affected file system.

There are two cases for disk I/O errors:

   - ENXIO, which means that this disk is gone and the lower layers
     of the storage stack already guarantee that no future I/O to
     this disk will succeed.

   - EIO (or most other errors), which means that this particular
     I/O request has failed but subsequent I/O requests to this
     disk might still succeed.

For ENXIO, we can just clear the error and continue, because we
know that the file system cannot affect the on-disk state after we
see this error. For EIO or other errors, we arrange for the geom_vfs
layer to reject all future I/O requests with ENXIO just like is
done when the geom_vfs is orphaned. In both cases, the file system
code can just clear the error and proceed with the forcible unmount.

This new treatment of I/O errors is needed for writes of any buffer
that is involved in a dependency. Most dependencies are described
by a structure attached to the buffer's b_dep field. But some are
created and processed as a result of the completion of the dependencies
attached to the buffer.

Clearing of some dependencies require a read. For example if there
is a dependency that requires an inode to be written, the disk block
containing that inode must be read, the updated inode copied into
place in that buffer, and the buffer then written back to disk.

Often the needed buffer is already in memory and can be used. But
if it needs to be read from the disk, the read will fail, so we
fabricate a buffer full of zeroes and pretend that the read succeeded.
This zero'ed buffer can be updated and written back to disk.

The only case where a buffer full of zeros causes the code to do
the wrong thing is when reading an inode buffer containing an inode
that still has an inode dependency in memory that will reinitialize
the effective link count (i_effnlink) based on the actual link count
(i_nlink) that we read. To handle this case we now store the i_nlink
value that we wrote in the inode dependency so that it can be
restored into the zero'ed buffer thus keeping the tracking of the
inode link count consistent.

Because applications depend on knowing when an attempt to write
their data to stable storage has failed, the fsync(2) and msync(2)
system calls need to return errors if data fails to be written to
stable storage. So these operations return ENXIO for every call
made on files in a file system where we have otherwise been ignoring
I/O errors.

Coauthered by: mckusick
Reviewed by:   kib
Tested by:     Peter Holm
Approved by:   mckusick (mentor)
Sponsored by:  Netflix
Differential Revision:  https://reviews.freebsd.org/D24088
This commit is contained in:
Chuck Silvers 2020-05-25 23:47:31 +00:00
parent b02676a2cb
commit d79ff54b5c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=361491
14 changed files with 333 additions and 73 deletions

View File

@ -55,6 +55,7 @@ struct g_vfs_softc {
struct bufobj *sc_bo;
int sc_active;
int sc_orphaned;
int sc_enxio_active;
};
static struct buf_ops __g_vfs_bufops = {
@ -139,9 +140,14 @@ g_vfs_done(struct bio *bip)
cp = bip->bio_from;
sc = cp->geom->softc;
if (bip->bio_error && bip->bio_error != EOPNOTSUPP)
if (bip->bio_error != 0 && bip->bio_error != EOPNOTSUPP) {
if ((bp->b_xflags & BX_CVTENXIO) != 0)
sc->sc_enxio_active = 1;
if (sc->sc_enxio_active)
bip->bio_error = ENXIO;
g_print_bio("g_vfs_done():", bip, "error = %d",
bip->bio_error);
}
bp->b_error = bip->bio_error;
bp->b_ioflags = bip->bio_flags;
if (bip->bio_error)
@ -172,7 +178,7 @@ g_vfs_strategy(struct bufobj *bo, struct buf *bp)
* If the provider has orphaned us, just return ENXIO.
*/
mtx_lock(&sc->sc_mtx);
if (sc->sc_orphaned) {
if (sc->sc_orphaned || sc->sc_enxio_active) {
mtx_unlock(&sc->sc_mtx);
bp->b_error = ENXIO;
bp->b_ioflags |= BIO_ERROR;

View File

@ -2176,6 +2176,8 @@ breadn_flags(struct vnode *vp, daddr_t blkno, daddr_t dblkno, int size,
bp->b_flags |= B_CKHASH;
bp->b_ckhashcalc = ckhashfunc;
}
if ((flags & GB_CVTENXIO) != 0)
bp->b_xflags |= BX_CVTENXIO;
bp->b_ioflags &= ~BIO_ERROR;
if (bp->b_rcred == NOCRED && cred != NOCRED)
bp->b_rcred = crhold(cred);
@ -2773,6 +2775,7 @@ brelse(struct buf *bp)
panic("brelse: not dirty");
bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_RELBUF | B_DIRECT);
bp->b_xflags &= ~(BX_CVTENXIO);
/* binsfree unlocks bp. */
binsfree(bp, qindex);
}
@ -2804,6 +2807,7 @@ bqrelse(struct buf *bp)
return;
}
bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
bp->b_xflags &= ~(BX_CVTENXIO);
if (bp->b_flags & B_MANAGED) {
if (bp->b_flags & B_REMFREE)

View File

@ -261,12 +261,14 @@ struct buf {
*/
#define BX_VNDIRTY 0x00000001 /* On vnode dirty list */
#define BX_VNCLEAN 0x00000002 /* On vnode clean list */
#define BX_CVTENXIO 0x00000004 /* Convert errors to ENXIO */
#define BX_BKGRDWRITE 0x00000010 /* Do writes in background */
#define BX_BKGRDMARKER 0x00000020 /* Mark buffer for splay tree */
#define BX_ALTDATA 0x00000040 /* Holds extended data */
#define BX_FSPRIV 0x00FF0000 /* Filesystem-specific flags mask */
#define PRINT_BUF_XFLAGS "\20\7altdata\6bkgrdmarker\5bkgrdwrite\2clean\1dirty"
#define PRINT_BUF_XFLAGS "\20\7altdata\6bkgrdmarker\5bkgrdwrite\3cvtenxio" \
"\2clean\1dirty"
#define NOOFFSET (-1LL) /* No buffer offset calculated yet */
@ -487,6 +489,7 @@ buf_track(struct buf *bp __unused, const char *location __unused)
#define GB_KVAALLOC 0x0010 /* But allocate KVA. */
#define GB_CKHASH 0x0020 /* If reading, calc checksum hash */
#define GB_NOSPARSE 0x0040 /* Do not instantiate holes */
#define GB_CVTENXIO 0x0080 /* Convert errors to ENXIO */
#ifdef _KERNEL
extern int nbuf; /* The number of buffer headers */

View File

@ -222,6 +222,10 @@ ffs_alloc(ip, lbn, bpref, size, flags, cred, bnp)
softdep_request_cleanup(fs, ITOV(ip), cred, FLUSH_BLOCKS_WAIT);
goto retry;
}
if (ffs_fsfail_cleanup_locked(ump, 0)) {
UFS_UNLOCK(ump);
return (ENXIO);
}
if (reclaimed > 0 &&
ppsratecheck(&ump->um_last_fullmsg, &ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
@ -447,6 +451,12 @@ ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp)
softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT);
goto retry;
}
if (bp)
brelse(bp);
if (ffs_fsfail_cleanup_locked(ump, 0)) {
UFS_UNLOCK(ump);
return (ENXIO);
}
if (reclaimed > 0 &&
ppsratecheck(&ump->um_last_fullmsg, &ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
@ -456,8 +466,6 @@ ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp)
} else {
UFS_UNLOCK(ump);
}
if (bp)
brelse(bp);
return (ENOSPC);
}
@ -1102,7 +1110,7 @@ ffs_valloc(pvp, mode, cred, vpp)
struct ufsmount *ump;
ino_t ino, ipref;
u_int cg;
int error, error1, reclaimed;
int error, reclaimed;
*vpp = NULL;
pip = VTOI(pvp);
@ -1137,28 +1145,21 @@ ffs_valloc(pvp, mode, cred, vpp)
(allocfcn_t *)ffs_nodealloccg);
if (ino == 0)
goto noinodes;
/*
* Get rid of the cached old vnode, force allocation of a new vnode
* for this inode.
* for this inode. If this fails, release the allocated ino and
* return the error.
*/
error = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp, FFSV_REPLACE);
if (error) {
error1 = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp,
FFSV_FORCEINSMQ | FFSV_REPLACE);
if ((error = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp,
FFSV_FORCEINSMQ | FFSV_REPLACE)) != 0) {
ffs_vfree(pvp, ino, mode);
if (error1 == 0) {
ip = VTOI(*vpp);
if (ip->i_mode)
goto dup_alloc;
UFS_INODE_SET_FLAG(ip, IN_MODIFIED);
vput(*vpp);
}
return (error);
}
/*
* We got an inode, so check mode and panic if it is already allocated.
*/
ip = VTOI(*vpp);
if (ip->i_mode) {
dup_alloc:
printf("mode = 0%o, inum = %ju, fs = %s\n",
ip->i_mode, (uintmax_t)ip->i_number, fs->fs_fsmnt);
panic("ffs_valloc: dup alloc");
@ -1197,6 +1198,10 @@ ffs_valloc(pvp, mode, cred, vpp)
softdep_request_cleanup(fs, pvp, cred, FLUSH_INODES_WAIT);
goto retry;
}
if (ffs_fsfail_cleanup_locked(ump, 0)) {
UFS_UNLOCK(ump);
return (ENXIO);
}
if (ppsratecheck(&ump->um_last_fullmsg, &ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
ffs_fserr(fs, pip->i_number, "out of inodes");
@ -2230,6 +2235,7 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd)
struct mount *mp;
struct cg *cgp;
struct buf *bp;
daddr_t dbn;
ufs1_daddr_t fragno, cgbno;
int i, blk, frags, bbase, error;
u_int cg;
@ -2262,8 +2268,23 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd)
ffs_fserr(fs, inum, "bad block");
return;
}
if ((error = ffs_getcg(fs, devvp, cg, 0, &bp, &cgp)) != 0)
if ((error = ffs_getcg(fs, devvp, cg, GB_CVTENXIO, &bp, &cgp)) != 0) {
if (!ffs_fsfail_cleanup(ump, error) ||
!MOUNTEDSOFTDEP(UFSTOVFS(ump)) || devvp->v_type != VCHR)
return;
if (devvp->v_type == VREG)
dbn = fragstoblks(fs, cgtod(fs, cg));
else
dbn = fsbtodb(fs, cgtod(fs, cg));
error = getblkx(devvp, dbn, dbn, fs->fs_cgsize, 0, 0, 0, &bp);
KASSERT(error == 0, ("getblkx failed"));
softdep_setup_blkfree(UFSTOVFS(ump), bp, bno,
numfrags(fs, size), dephd);
bp->b_flags |= B_RELBUF | B_NOCACHE;
bp->b_flags &= ~B_CACHE;
bawrite(bp);
return;
}
cgbno = dtogd(fs, bno);
blksfree = cg_blksfree(cgp);
UFS_LOCK(ump);
@ -2783,6 +2804,7 @@ ffs_freefile(ump, fs, devvp, ino, mode, wkhd)
{
struct cg *cgp;
struct buf *bp;
daddr_t dbn;
int error;
u_int cg;
u_int8_t *inosused;
@ -2804,8 +2826,22 @@ ffs_freefile(ump, fs, devvp, ino, mode, wkhd)
if (ino >= fs->fs_ipg * fs->fs_ncg)
panic("ffs_freefile: range: dev = %s, ino = %ju, fs = %s",
devtoname(dev), (uintmax_t)ino, fs->fs_fsmnt);
if ((error = ffs_getcg(fs, devvp, cg, 0, &bp, &cgp)) != 0)
if ((error = ffs_getcg(fs, devvp, cg, GB_CVTENXIO, &bp, &cgp)) != 0) {
if (!ffs_fsfail_cleanup(ump, error) ||
!MOUNTEDSOFTDEP(UFSTOVFS(ump)) || devvp->v_type != VCHR)
return (error);
if (devvp->v_type == VREG)
dbn = fragstoblks(fs, cgtod(fs, cg));
else
dbn = fsbtodb(fs, cgtod(fs, cg));
error = getblkx(devvp, dbn, dbn, fs->fs_cgsize, 0, 0, 0, &bp);
KASSERT(error == 0, ("getblkx failed"));
softdep_setup_inofree(UFSTOVFS(ump), bp, ino, wkhd);
bp->b_flags |= B_RELBUF | B_NOCACHE;
bp->b_flags &= ~B_CACHE;
bawrite(bp);
return (error);
}
inosused = cg_inosused(cgp);
cgino = ino % fs->fs_ipg;
if (isclr(inosused, cgino)) {

View File

@ -324,7 +324,8 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
UFS_UNLOCK(ump);
goto retry;
}
if (ppsratecheck(&ump->um_last_fullmsg,
if (!ffs_fsfail_cleanup_locked(ump, error) &&
ppsratecheck(&ump->um_last_fullmsg,
&ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
ffs_fserr(fs, ip->i_number, "filesystem full");
@ -407,7 +408,8 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
UFS_UNLOCK(ump);
goto retry;
}
if (ppsratecheck(&ump->um_last_fullmsg,
if (!ffs_fsfail_cleanup_locked(ump, error) &&
ppsratecheck(&ump->um_last_fullmsg,
&ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
ffs_fserr(fs, ip->i_number, "filesystem full");
@ -919,7 +921,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
UFS_UNLOCK(ump);
goto retry;
}
if (ppsratecheck(&ump->um_last_fullmsg,
if (!ffs_fsfail_cleanup_locked(ump, error) &&
ppsratecheck(&ump->um_last_fullmsg,
&ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
ffs_fserr(fs, ip->i_number, "filesystem full");
@ -1003,7 +1006,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
UFS_UNLOCK(ump);
goto retry;
}
if (ppsratecheck(&ump->um_last_fullmsg,
if (!ffs_fsfail_cleanup_locked(ump, error) &&
ppsratecheck(&ump->um_last_fullmsg,
&ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
ffs_fserr(fs, ip->i_number, "filesystem full");

View File

@ -116,6 +116,11 @@ int ffs_vfree(struct vnode *, ino_t, int);
vfs_vget_t ffs_vget;
int ffs_vgetf(struct mount *, ino_t, int, struct vnode **, int);
void process_deferred_inactive(struct mount *mp);
int ffs_fsfail_cleanup(struct ufsmount *, int);
int ffs_fsfail_cleanup_locked(struct ufsmount *, int);
int ffs_breadz(struct ufsmount *, struct vnode *, daddr_t, daddr_t, int,
daddr_t *, int *, int, struct ucred *, int, void (*)(struct buf *),
struct buf **);
/*
* Flags to ffs_vgetf
@ -162,6 +167,7 @@ void softdep_uninitialize(void);
int softdep_mount(struct vnode *, struct mount *, struct fs *,
struct ucred *);
void softdep_unmount(struct mount *);
void softdep_handle_error(struct buf *);
int softdep_move_dependencies(struct buf *, struct buf *);
int softdep_flushworklist(struct mount *, int *, struct thread *);
int softdep_flushfiles(struct mount *, int, struct thread *);

View File

@ -86,6 +86,7 @@ ffs_update(vp, waitfor)
struct fs *fs;
struct buf *bp;
struct inode *ip;
daddr_t bn;
int flags, error;
ASSERT_VOP_ELOCKED(vp, "ffs_update");
@ -112,9 +113,9 @@ ffs_update(vp, waitfor)
if (IS_SNAPSHOT(ip))
flags = GB_LOCK_NOWAIT;
loop:
error = bread_gb(ITODEVVP(ip),
fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
(int) fs->fs_bsize, NOCRED, flags, &bp);
bn = fsbtodb(fs, ino_to_fsba(fs, ip->i_number));
error = ffs_breadz(VFSTOUFS(vp->v_mount), ITODEVVP(ip), bn, bn,
(int) fs->fs_bsize, NULL, NULL, 0, NOCRED, flags, NULL, &bp);
if (error != 0) {
if (error != EBUSY)
return (error);
@ -163,9 +164,11 @@ ffs_update(vp, waitfor)
*/
random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), RANDOM_FS_ATIME);
}
if (waitfor)
if (waitfor) {
error = bwrite(bp);
else if (vm_page_count_severe() || buf_dirty_count_severe()) {
if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error))
error = 0;
} else if (vm_page_count_severe() || buf_dirty_count_severe()) {
bawrite(bp);
error = 0;
} else {
@ -684,7 +687,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
* of having bread() attempt to calculate it using VOP_BMAP().
*/
vp = ITOV(ip);
error = breadn_flags(vp, lbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0,
error = ffs_breadz(ump, vp, lbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0,
NOCRED, 0, NULL, &bp);
if (error) {
*countp = 0;

View File

@ -2271,6 +2271,7 @@ inodedep_lookup(mp, inum, flags, inodedeppp)
inodedep->id_ino = inum;
inodedep->id_state = ALLCOMPLETE;
inodedep->id_nlinkdelta = 0;
inodedep->id_nlinkwrote = -1;
inodedep->id_savedino1 = NULL;
inodedep->id_savedsize = -1;
inodedep->id_savedextsize = -1;
@ -3606,6 +3607,7 @@ softdep_process_journal(mp, needwk, flags)
jblocks->jb_needseg = 0;
WORKLIST_INSERT(&bp->b_dep, &jseg->js_list);
FREE_LOCK(ump);
bp->b_xflags |= BX_CVTENXIO;
pbgetvp(ump->um_devvp, bp);
/*
* We only do the blocking wait once we find the journal
@ -6334,7 +6336,7 @@ setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno)
* the on-disk address, so we just pass it to bread() instead of
* having bread() attempt to calculate it using VOP_BMAP().
*/
error = breadn_flags(ITOV(ip), lbn, blkptrtodb(ump, blkno),
error = ffs_breadz(ump, ITOV(ip), lbn, blkptrtodb(ump, blkno),
(int)mp->mnt_stat.f_iosize, NULL, NULL, 0, NOCRED, 0, NULL, &bp);
if (error)
return (error);
@ -6485,6 +6487,15 @@ complete_trunc_indir(freework)
else
WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd,
&freework->fw_list);
if (fwn == NULL) {
freework->fw_indir = (void *)0x0000deadbeef0000;
bp = indirdep->ir_savebp;
indirdep->ir_savebp = NULL;
free_indirdep(indirdep);
FREE_LOCK(ump);
brelse(bp);
ACQUIRE_LOCK(ump);
}
} else {
/* Complete when the real copy is written. */
WORKLIST_INSERT(&bp->b_dep, &freework->fw_list);
@ -6589,6 +6600,7 @@ softdep_journal_freeblocks(ip, cred, length, flags)
struct buf *bp;
struct vnode *vp;
struct mount *mp;
daddr_t dbn;
ufs2_daddr_t extblocks, datablocks;
ufs_lbn_t tmpval, lbn, lastlbn;
int frags, lastoff, iboff, allocblock, needj, error, i;
@ -6726,8 +6738,9 @@ softdep_journal_freeblocks(ip, cred, length, flags)
*/
ufs_itimes(vp);
ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
(int)fs->fs_bsize, cred, &bp);
dbn = fsbtodb(fs, ino_to_fsba(fs, ip->i_number));
error = ffs_breadz(ump, ump->um_devvp, dbn, dbn, (int)fs->fs_bsize,
NULL, NULL, 0, cred, 0, NULL, &bp);
if (error) {
softdep_error("softdep_journal_freeblocks", error);
return;
@ -6828,13 +6841,13 @@ softdep_journal_freeblocks(ip, cred, length, flags)
*/
size = sblksize(fs, length, lastlbn);
error = bread(vp, lastlbn, size, cred, &bp);
if (error) {
if (error == 0) {
bzero((char *)bp->b_data + lastoff, size - lastoff);
bawrite(bp);
} else if (!ffs_fsfail_cleanup(ump, error)) {
softdep_error("softdep_journal_freeblks", error);
return;
}
bzero((char *)bp->b_data + lastoff, size - lastoff);
bawrite(bp);
}
ACQUIRE_LOCK(ump);
inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
@ -6945,8 +6958,8 @@ softdep_setup_freeblocks(ip, length, flags)
if ((error = bread(ump->um_devvp,
fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
(int)fs->fs_bsize, NOCRED, &bp)) != 0) {
brelse(bp);
softdep_error("softdep_setup_freeblocks", error);
if (!ffs_fsfail_cleanup(ump, error))
softdep_error("softdep_setup_freeblocks", error);
return;
}
freeblks = newfreeblks(mp, ip);
@ -8161,7 +8174,7 @@ indir_trunc(freework, dbn, lbn)
ufs_lbn_t lbnadd, nlbn;
u_long key;
int nblocks, ufs1fmt, freedblocks;
int goingaway, freedeps, needj, level, cnt, i;
int goingaway, freedeps, needj, level, cnt, i, error;
freeblks = freework->fw_freeblks;
mp = freeblks->fb_list.wk_mp;
@ -8199,10 +8212,11 @@ indir_trunc(freework, dbn, lbn)
if (indirdep == NULL || (indirdep->ir_state & GOINGAWAY) == 0)
panic("indir_trunc: Bad indirdep %p from buf %p",
indirdep, bp);
} else if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize,
NOCRED, &bp) != 0) {
brelse(bp);
return;
} else {
error = ffs_breadz(ump, freeblks->fb_devvp, dbn, dbn,
(int)fs->fs_bsize, NULL, NULL, 0, NOCRED, 0, NULL, &bp);
if (error)
return;
}
ACQUIRE_LOCK(ump);
/* Protects against a race with complete_trunc_indir(). */
@ -9700,6 +9714,7 @@ clear_unlinked_inodedep(inodedep)
struct inodedep *idn;
struct fs *fs, *bpfs;
struct buf *bp;
daddr_t dbn;
ino_t ino;
ino_t nino;
ino_t pino;
@ -9753,11 +9768,10 @@ clear_unlinked_inodedep(inodedep)
bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
(int)fs->fs_sbsize, 0, 0, 0);
} else {
error = bread(ump->um_devvp,
fsbtodb(fs, ino_to_fsba(fs, pino)),
(int)fs->fs_bsize, NOCRED, &bp);
if (error)
brelse(bp);
dbn = fsbtodb(fs, ino_to_fsba(fs, pino));
error = ffs_breadz(ump, ump->um_devvp, dbn, dbn,
(int)fs->fs_bsize, NULL, NULL, 0, NOCRED, 0, NULL,
&bp);
}
ACQUIRE_LOCK(ump);
if (error)
@ -10578,14 +10592,16 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
if ((adp->ad_state & ATTACHED) == 0)
panic("inodedep %p and adp %p not attached", inodedep, adp);
prevlbn = adp->ad_offset;
if (adp->ad_offset < UFS_NDADDR &&
if (!ffs_fsfail_cleanup(ump, 0) &&
adp->ad_offset < UFS_NDADDR &&
dp->di_db[adp->ad_offset] != adp->ad_newblkno)
panic("initiate_write_inodeblock_ufs2: "
"direct pointer #%jd mismatch %jd != %jd",
(intmax_t)adp->ad_offset,
(intmax_t)dp->di_db[adp->ad_offset],
(intmax_t)adp->ad_newblkno);
if (adp->ad_offset >= UFS_NDADDR &&
if (!ffs_fsfail_cleanup(ump, 0) &&
adp->ad_offset >= UFS_NDADDR &&
dp->di_ib[adp->ad_offset - UFS_NDADDR] != adp->ad_newblkno)
panic("initiate_write_inodeblock_ufs2: "
"indirect pointer #%jd mismatch %jd != %jd",
@ -10817,12 +10833,14 @@ softdep_setup_inofree(mp, bp, ino, wkhd)
("softdep_setup_inofree called on non-softdep filesystem"));
ump = VFSTOUFS(mp);
ACQUIRE_LOCK(ump);
fs = ump->um_fs;
cgp = (struct cg *)bp->b_data;
inosused = cg_inosused(cgp);
if (isset(inosused, ino % fs->fs_ipg))
panic("softdep_setup_inofree: inode %ju not freed.",
(uintmax_t)ino);
if (!ffs_fsfail_cleanup(ump, 0)) {
fs = ump->um_fs;
cgp = (struct cg *)bp->b_data;
inosused = cg_inosused(cgp);
if (isset(inosused, ino % fs->fs_ipg))
panic("softdep_setup_inofree: inode %ju not freed.",
(uintmax_t)ino);
}
if (inodedep_lookup(mp, ino, 0, &inodedep))
panic("softdep_setup_inofree: ino %ju has existing inodedep %p",
(uintmax_t)ino, inodedep);
@ -11091,6 +11109,26 @@ initiate_write_bmsafemap(bmsafemap, bp)
wk_list);
}
void
softdep_handle_error(struct buf *bp)
{
struct ufsmount *ump;
ump = softdep_bp_to_mp(bp);
if (ump == NULL)
return;
if (ffs_fsfail_cleanup(ump, bp->b_error)) {
/*
* No future writes will succeed, so the on-disk image is safe.
* Pretend that this write succeeded so that the softdep state
* will be cleaned up naturally.
*/
bp->b_ioflags &= ~BIO_ERROR;
bp->b_error = 0;
}
}
/*
* This routine is called during the completion interrupt
* service routine for a disk write (from the procedure called
@ -11117,6 +11155,8 @@ softdep_disk_write_complete(bp)
"with outstanding dependencies for buffer %p", bp));
if (ump == NULL)
return;
if ((bp->b_ioflags & BIO_ERROR) != 0)
softdep_handle_error(bp);
/*
* If an error occurred while doing the write, then the data
* has not hit the disk and the dependencies cannot be processed.
@ -12305,6 +12345,13 @@ softdep_load_inodeblock(ip)
FREE_LOCK(ump);
return;
}
if (ip->i_nlink != inodedep->id_nlinkwrote &&
inodedep->id_nlinkwrote != -1) {
KASSERT(ip->i_nlink == 0 &&
(ump->um_flags & UM_FSFAIL_CLEANUP) != 0,
("read bad i_nlink value"));
ip->i_effnlink = ip->i_nlink = inodedep->id_nlinkwrote;
}
ip->i_effnlink -= inodedep->id_nlinkdelta;
KASSERT(ip->i_effnlink >= 0,
("softdep_load_inodeblock: negative i_effnlink"));
@ -12367,6 +12414,11 @@ softdep_update_inodeblock(ip, bp, waitfor)
panic("softdep_update_inodeblock: bad link count");
return;
}
KASSERT(ip->i_nlink >= inodedep->id_nlinkdelta,
("softdep_update_inodeblock inconsistent ip %p i_nlink %d "
"inodedep %p id_nlinkdelta %jd",
ip, ip->i_nlink, inodedep, (intmax_t)inodedep->id_nlinkdelta));
inodedep->id_nlinkwrote = ip->i_nlink;
if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink)
panic("softdep_update_inodeblock: bad delta");
/*
@ -12642,7 +12694,7 @@ softdep_fsync(vp)
else
brelse(bp);
vput(pvp);
if (error != 0)
if (!ffs_fsfail_cleanup(ump, error))
return (error);
ACQUIRE_LOCK(ump);
if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)

View File

@ -67,6 +67,7 @@ struct malloc_type;
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/ucred.h>
#include <sys/taskqueue.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
@ -136,7 +137,8 @@ ffs_load_inode(struct buf *bp, struct inode *ip, struct fs *fs, ino_t ino)
return (0);
}
dip2 = ((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino));
if ((error = ffs_verify_dinode_ckhash(fs, dip2)) != 0) {
if ((error = ffs_verify_dinode_ckhash(fs, dip2)) != 0 &&
!ffs_fsfail_cleanup(ITOUMP(ip), error)) {
printf("%s: inode %jd: check-hash failed\n", fs->fs_fsmnt,
(intmax_t)ino);
return (error);
@ -202,6 +204,93 @@ ffs_check_blkno(struct mount *mp, ino_t inum, ufs2_daddr_t daddr, int blksize)
UFS_UNLOCK(ump);
return (EINTEGRITY);
}
/*
* Initiate a forcible unmount.
* Used to unmount filesystems whose underlying media has gone away.
*/
static void
ffs_fsfail_unmount(void *v, int pending)
{
struct fsfail_task *etp;
struct mount *mp;
etp = v;
/*
* Find our mount and get a ref on it, then try to unmount.
*/
mp = vfs_getvfs(&etp->fsid);
if (mp != NULL)
dounmount(mp, MNT_FORCE, curthread);
free(etp, M_UFSMNT);
}
/*
* On first ENXIO error, start a task that forcibly unmounts the filesystem.
*
* Return true if a cleanup is in progress.
*/
int
ffs_fsfail_cleanup(struct ufsmount *ump, int error)
{
int retval;
UFS_LOCK(ump);
retval = ffs_fsfail_cleanup_locked(ump, error);
UFS_UNLOCK(ump);
return (retval);
}
int
ffs_fsfail_cleanup_locked(struct ufsmount *ump, int error)
{
struct fsfail_task *etp;
struct task *tp;
mtx_assert(UFS_MTX(ump), MA_OWNED);
if (error == ENXIO && (ump->um_flags & UM_FSFAIL_CLEANUP) == 0) {
ump->um_flags |= UM_FSFAIL_CLEANUP;
/*
* Queue an async forced unmount.
*/
etp = ump->um_fsfail_task;
ump->um_fsfail_task = NULL;
if (etp != NULL) {
tp = &etp->task;
TASK_INIT(tp, 0, ffs_fsfail_unmount, etp);
taskqueue_enqueue(taskqueue_thread, tp);
printf("UFS: forcibly unmounting %s from %s\n",
ump->um_mountp->mnt_stat.f_mntfromname,
ump->um_mountp->mnt_stat.f_mntonname);
}
}
return ((ump->um_flags & UM_FSFAIL_CLEANUP) != 0);
}
/*
* Wrapper used during ENXIO cleanup to allocate empty buffers when
* the kernel is unable to read the real one. They are needed so that
* the soft updates code can use them to unwind its dependencies.
*/
int
ffs_breadz(struct ufsmount *ump, struct vnode *vp, daddr_t lblkno,
daddr_t dblkno, int size, daddr_t *rablkno, int *rabsize, int cnt,
struct ucred *cred, int flags, void (*ckhashfunc)(struct buf *),
struct buf **bpp)
{
int error;
flags |= GB_CVTENXIO;
error = breadn_flags(vp, lblkno, dblkno, size, rablkno, rabsize, cnt,
cred, flags, ckhashfunc, bpp);
if (error != 0 && ffs_fsfail_cleanup(ump, error)) {
error = getblkx(vp, lblkno, dblkno, size, 0, 0, flags, bpp);
KASSERT(error == 0, ("getblkx failed"));
vfs_bio_bzero_buf(*bpp, 0, size);
}
return (error);
}
#endif /* _KERNEL */
/*

View File

@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/rwlock.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#include <security/mac/mac_framework.h>
@ -148,6 +149,12 @@ static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
"multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir",
"nosymfollow", "sync", "union", "userquota", "untrusted", NULL };
static int ffs_enxio_enable = 1;
SYSCTL_DECL(_vfs_ffs);
SYSCTL_INT(_vfs_ffs, OID_AUTO, enxio_enable, CTLFLAG_RWTUN,
&ffs_enxio_enable, 0,
"enable mapping of other disk I/O errors to ENXIO");
static int
ffs_mount(struct mount *mp)
{
@ -795,6 +802,7 @@ ffs_mountfs(odevvp, mp, td)
struct g_consumer *cp;
struct mount *nmp;
struct vnode *devvp;
struct fsfail_task *etp;
int candelete, canspeedup;
off_t loc;
@ -1085,6 +1093,9 @@ ffs_mountfs(odevvp, mp, td)
(void) ufs_extattr_autostart(mp, td);
#endif /* !UFS_EXTATTR_AUTOSTART */
#endif /* !UFS_EXTATTR */
etp = malloc(sizeof *ump->um_fsfail_task, M_UFSMNT, M_WAITOK | M_ZERO);
etp->fsid = mp->mnt_stat.f_fsid;
ump->um_fsfail_task = etp;
return (0);
out:
if (fs != NULL) {
@ -1134,7 +1145,6 @@ ffs_use_bread(void *devfd, off_t loc, void **bufp, int size)
return (0);
}
#include <sys/sysctl.h>
static int bigcgs = 0;
SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
@ -1271,7 +1281,7 @@ ffs_unmount(mp, mntflags)
error = softdep_flushfiles(mp, flags, td);
else
error = ffs_flushfiles(mp, flags, td);
if (error != 0 && error != ENXIO)
if (error != 0 && !ffs_fsfail_cleanup(ump, error))
goto fail;
UFS_LOCK(ump);
@ -1288,7 +1298,9 @@ ffs_unmount(mp, mntflags)
if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) {
fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
error = ffs_sbupdate(ump, MNT_WAIT, 0);
if (error && error != ENXIO) {
if (ffs_fsfail_cleanup(ump, error))
error = 0;
if (error != 0 && !ffs_fsfail_cleanup(ump, error)) {
fs->fs_clean = 0;
goto fail;
}
@ -1326,6 +1338,8 @@ ffs_unmount(mp, mntflags)
}
free(fs->fs_csp, M_UFSMNT);
free(fs, M_UFSMNT);
if (ump->um_fsfail_task != NULL)
free(ump->um_fsfail_task, M_UFSMNT);
free(ump, M_UFSMNT);
mp->mnt_data = NULL;
MNT_ILOCK(mp);
@ -1640,6 +1654,8 @@ ffs_sync(mp, waitfor)
if (waitfor == MNT_WAIT || rebooting) {
if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
allerror = error;
if (ffs_fsfail_cleanup(ump, allerror))
allerror = 0;
/* Flushed work items may create new vnodes to clean */
if (allerror == 0 && count)
goto loop;
@ -1657,6 +1673,8 @@ ffs_sync(mp, waitfor)
error = ffs_sbupdate(ump, waitfor, 0);
if (error != 0)
allerror = error;
if (ffs_fsfail_cleanup(ump, allerror))
allerror = 0;
if (allerror == 0 && waitfor == MNT_WAIT)
goto loop;
} else if (suspend != 0) {
@ -1681,6 +1699,8 @@ ffs_sync(mp, waitfor)
if (fs->fs_fmod != 0 &&
(error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
allerror = error;
if (ffs_fsfail_cleanup(ump, allerror))
allerror = 0;
return (allerror);
}
@ -1707,6 +1727,7 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
struct ufsmount *ump;
struct buf *bp;
struct vnode *vp;
daddr_t dbn;
int error;
MPASS((ffs_flags & FFSV_REPLACE) == 0 || (flags & LK_EXCLUSIVE) != 0);
@ -1796,9 +1817,10 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
}
/* Read in the disk contents for the inode, copy into the inode. */
error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
(int)fs->fs_bsize, NOCRED, &bp);
if (error) {
dbn = fsbtodb(fs, ino_to_fsba(fs, ino));
error = ffs_breadz(ump, ump->um_devvp, dbn, dbn, (int)fs->fs_bsize,
NULL, NULL, 0, NOCRED, 0, NULL, &bp);
if (error != 0) {
/*
* The inode does not contain anything useful, so it would
* be misleading to leave it on its hash chain. With mode
@ -1957,6 +1979,7 @@ ffs_uninit(vfsp)
ret = ufs_uninit(vfsp);
softdep_uninitialize();
ffs_susp_uninitialize();
taskqueue_drain_all(taskqueue_thread);
return (ret);
}
@ -2039,6 +2062,8 @@ ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size)
* Writing the superblock itself. We need to do special checks for it.
*/
bp = devfdp->sbbp;
if (ffs_fsfail_cleanup(ump, devfdp->error))
devfdp->error = 0;
if (devfdp->error != 0) {
brelse(bp);
return (devfdp->error);
@ -2112,6 +2137,11 @@ ffs_backgroundwritedone(struct buf *bp)
struct bufobj *bufobj;
struct buf *origbp;
#ifdef SOFTUPDATES
if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) != 0)
softdep_handle_error(bp);
#endif
/*
* Find the original buffer that we are writing.
*/
@ -2122,7 +2152,7 @@ ffs_backgroundwritedone(struct buf *bp)
/*
* We should mark the cylinder group buffer origbp as
* dirty, to not loose the failed write.
* dirty, to not lose the failed write.
*/
if ((bp->b_ioflags & BIO_ERROR) != 0)
origbp->b_vflags |= BV_BKGRDERR;
@ -2393,6 +2423,8 @@ ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
break;
}
}
if (bp->b_iocmd != BIO_READ && ffs_enxio_enable)
bp->b_xflags |= BX_CVTENXIO;
g_vfs_strategy(bo, bp);
}

View File

@ -239,6 +239,8 @@ ffs_fsync(struct vop_fsync_args *ap)
}
BO_UNLOCK(bo);
}
if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), 0))
return (ENXIO);
return (0);
}
@ -247,6 +249,7 @@ ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
{
struct inode *ip;
struct bufobj *bo;
struct ufsmount *ump;
struct buf *bp, *nbp;
ufs_lbn_t lbn;
int error, passes;
@ -255,14 +258,18 @@ ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
ip = VTOI(vp);
ip->i_flag &= ~IN_NEEDSYNC;
bo = &vp->v_bufobj;
ump = VFSTOUFS(vp->v_mount);
/*
* When doing MNT_WAIT we must first flush all dependencies
* on the inode.
*/
if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
(error = softdep_sync_metadata(vp)) != 0)
(error = softdep_sync_metadata(vp)) != 0) {
if (ffs_fsfail_cleanup(ump, error))
error = 0;
return (error);
}
/*
* Flush all dirty buffers associated with a vnode.
@ -332,7 +339,10 @@ ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
}
if (wait) {
bremfree(bp);
if ((error = bwrite(bp)) != 0)
error = bwrite(bp);
if (ffs_fsfail_cleanup(ump, error))
error = 0;
if (error != 0)
return (error);
} else if ((bp->b_flags & B_CLUSTEROK)) {
(void) vfs_bio_awrite(bp);
@ -901,8 +911,11 @@ ffs_write(ap)
uio->uio_offset -= resid - uio->uio_resid;
uio->uio_resid = resid;
}
} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
} else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
error = ffs_update(vp, 1);
if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error))
error = ENXIO;
}
return (error);
}

View File

@ -358,6 +358,7 @@ struct inodedep {
struct fs *id_fs; /* associated filesystem */
ino_t id_ino; /* dependent inode */
nlink_t id_nlinkdelta; /* saved effective link count */
nlink_t id_nlinkwrote; /* i_nlink that we wrote to disk */
nlink_t id_savednlink; /* Link saved during rollback */
LIST_ENTRY(inodedep) id_deps; /* bmsafemap's list of inodedep's */
struct bmsafemap *id_bmsafemap; /* related bmsafemap (if pending) */

View File

@ -1426,6 +1426,7 @@ ufs_rename(ap)
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(tip);
}
goto bad;
}
if (doingdirectory && !DOINGSOFTDEP(tvp)) {
/*
@ -1523,11 +1524,13 @@ ufs_rename(ap)
if (error == 0 && endoff != 0) {
error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL |
(DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred);
if (error != 0)
if (error != 0 && !ffs_fsfail_cleanup(VFSTOUFS(mp), error))
vn_printf(tdvp,
"ufs_rename: failed to truncate, error %d\n",
error);
#ifdef UFS_DIRHASH
if (error != 0)
ufsdirhash_free(tdp);
else if (tdp->i_dirhash != NULL)
ufsdirhash_dirtrunc(tdp, endoff);
#endif

View File

@ -45,6 +45,8 @@ struct ufs_args {
#ifdef _KERNEL
#include <sys/_task.h>
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_UFSMNT);
MALLOC_DECLARE(M_TRIM);
@ -65,6 +67,10 @@ struct inodedep;
TAILQ_HEAD(inodedeplst, inodedep);
LIST_HEAD(bmsafemaphd, bmsafemap);
LIST_HEAD(trimlist_hashhead, ffs_blkfree_trim_params);
struct fsfail_task {
struct task task;
fsid_t fsid;
};
/*
* This structure describes the UFS specific mount structure data.
@ -112,6 +118,7 @@ struct ufsmount {
struct taskqueue *um_trim_tq; /* (c) trim request queue */
struct trimlist_hashhead *um_trimhash; /* (i) trimlist hash table */
u_long um_trimlisthashsize; /* (i) trim hash table size-1 */
struct fsfail_task *um_fsfail_task; /* (i) task for fsfail cleanup*/
/* (c) - below function ptrs */
int (*um_balloc)(struct vnode *, off_t, int, struct ucred *,
int, struct buf **);
@ -133,7 +140,8 @@ struct ufsmount {
#define UM_CANDELETE 0x00000001 /* devvp supports TRIM */
#define UM_WRITESUSPENDED 0x00000002 /* suspension in progress */
#define UM_CANSPEEDUP 0x00000004 /* devvp supports SPEEDUP */
#define UM_FSFAIL_CLEANUP 0x00000008 /* need cleanup after
unrecoverable error */
/*
* function prototypes
*/