Retire two unused background fsck sysctls.

These two sysctls were added to support UFS softupdates journalling
with snapshots.  However, the changes to fsck to use them were never
committed and there have never been any in-tree uses of these sysctls.

More details from Kirk:

When journalling got added to soft updates, its journal rollback freed
blocks that it thought were no longer in use. But it does not take
snapshots into account (i.e., if a snapshot is still using it, then it
cannot be freed). So I added the needed logic to fsck by having the
free go through the kernel's blkfree code so it could grab blocks that
were still needed by snapshots. That is done using the setbufoutput
hack. I never got that code working reliably, so it is still sitting
in my work directory. Which also explains why you still cannot take
snapshots on filesystems running with journalling...

In looking over my use of this feature, and in particular the troubles
I was having with it, I conclude that it may be better to extract the
code from the kernel that handles freeing blocks claimed by snapshots
and putting it into fsck directly. My original intent was that it is
complex and at the time changing, so only having to maintain it in one
place was appealing. But at this point it has not changed in years and
the hacks like setinode and setbufoutput to be able to use the kernel
code is sufficiently ugly, that I am leaning towards just extracting
it.

Reviewed by:	mckusick
MFC after:	1 week
Sponsored by:	DARPA
Differential Revision:	https://reviews.freebsd.org/D24484
This commit is contained in:
John Baldwin 2020-04-21 17:42:32 +00:00
parent 5e61abf740
commit f2620e9ceb
2 changed files with 3 additions and 186 deletions

View File

@ -3086,18 +3086,6 @@ ffs_fserr(fs, inum, cp)
* in the current directory is oldvalue then change it to newvalue.
* unlink(nameptr, oldvalue) - Verify that the inode number associated
* with nameptr in the current directory is oldvalue then unlink it.
*
* The following functions may only be used on a quiescent filesystem
* by the soft updates journal. They are not safe to be run on an active
* filesystem.
*
* setinode(inode, dip) - the specified disk inode is replaced with the
* contents pointed to by dip.
* setbufoutput(fd, flags) - output associated with the specified file
* descriptor (which must reference the character device supporting
* the filesystem) switches from using physio to running through the
* buffer cache when flags is set to 1. The descriptor reverts to
* physio for output when flags is set to zero.
*/
static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS);
@ -3163,23 +3151,12 @@ static SYSCTL_NODE(_vfs_ffs, FFS_UNLINK, unlink,
CTLFLAG_WR | CTLFLAG_NEEDGIANT, sysctl_ffs_fsck,
"Unlink a Duplicate Name");
static SYSCTL_NODE(_vfs_ffs, FFS_SET_INODE, setinode,
CTLFLAG_WR | CTLFLAG_NEEDGIANT, sysctl_ffs_fsck,
"Update an On-Disk Inode");
static SYSCTL_NODE(_vfs_ffs, FFS_SET_BUFOUTPUT, setbufoutput,
CTLFLAG_WR | CTLFLAG_NEEDGIANT, sysctl_ffs_fsck,
"Set Buffered Writing for Descriptor");
#ifdef DIAGNOSTIC
static int fsckcmds = 0;
SYSCTL_INT(_debug, OID_AUTO, ffs_fsckcmds, CTLFLAG_RW, &fsckcmds, 0,
"print out fsck_ffs-based filesystem update commands");
#endif /* DIAGNOSTIC */
static int buffered_write(struct file *, struct uio *, struct ucred *,
int, struct thread *);
static int
sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
{
@ -3194,10 +3171,9 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
ufs2_daddr_t blkno;
long blkcnt, blksize;
u_long key;
struct file *fp, *vfp;
struct file *fp;
cap_rights_t rights;
int filetype, error;
static struct fileops *origops, bufferedops;
if (req->newlen > sizeof cmd)
return (EBADRPC);
@ -3490,76 +3466,6 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
0, (ino_t)cmd.size);
break;
case FFS_SET_INODE:
if (ump->um_fsckpid != td->td_proc->p_pid) {
error = EPERM;
break;
}
#ifdef DIAGNOSTIC
if (fsckcmds) {
printf("%s: update inode %jd\n",
mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
}
#endif /* DIAGNOSTIC */
if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
break;
AUDIT_ARG_VNODE1(vp);
ip = VTOI(vp);
if (I_IS_UFS1(ip))
error = copyin((void *)(intptr_t)cmd.size, ip->i_din1,
sizeof(struct ufs1_dinode));
else
error = copyin((void *)(intptr_t)cmd.size, ip->i_din2,
sizeof(struct ufs2_dinode));
if (error) {
vput(vp);
break;
}
UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_MODIFIED);
error = ffs_update(vp, 1);
vput(vp);
break;
case FFS_SET_BUFOUTPUT:
if (ump->um_fsckpid != td->td_proc->p_pid) {
error = EPERM;
break;
}
if (ITOUMP(VTOI(vp)) != ump) {
error = EINVAL;
break;
}
#ifdef DIAGNOSTIC
if (fsckcmds) {
printf("%s: %s buffered output for descriptor %jd\n",
mp->mnt_stat.f_mntonname,
cmd.size == 1 ? "enable" : "disable",
(intmax_t)cmd.value);
}
#endif /* DIAGNOSTIC */
if ((error = getvnode(td, cmd.value,
cap_rights_init(&rights, CAP_FSCK), &vfp)) != 0)
break;
if (vfp->f_vnode->v_type != VCHR) {
fdrop(vfp, td);
error = EINVAL;
break;
}
if (origops == NULL) {
origops = vfp->f_ops;
bcopy((void *)origops, (void *)&bufferedops,
sizeof(bufferedops));
bufferedops.fo_write = buffered_write;
}
if (cmd.size == 1)
atomic_store_rel_ptr((volatile uintptr_t *)&vfp->f_ops,
(uintptr_t)&bufferedops);
else
atomic_store_rel_ptr((volatile uintptr_t *)&vfp->f_ops,
(uintptr_t)origops);
fdrop(vfp, td);
break;
default:
#ifdef DIAGNOSTIC
if (fsckcmds) {
@ -3575,92 +3481,3 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
vn_finished_write(mp);
return (error);
}
/*
* Function to switch a descriptor to use the buffer cache to stage
* its I/O. This is needed so that writes to the filesystem device
* will give snapshots a chance to copy modified blocks for which it
* needs to retain copies.
*/
static int
buffered_write(fp, uio, active_cred, flags, td)
struct file *fp;
struct uio *uio;
struct ucred *active_cred;
int flags;
struct thread *td;
{
struct pwd *pwd;
struct vnode *devvp, *vp;
struct inode *ip;
struct buf *bp;
struct fs *fs;
struct ufsmount *ump;
struct filedesc *fdp;
int error;
daddr_t lbn;
/*
* The devvp is associated with the /dev filesystem. To discover
* the filesystem with which the device is associated, we depend
* on the application setting the current directory to a location
* within the filesystem being written. Yes, this is an ugly hack.
*/
devvp = fp->f_vnode;
if (!vn_isdisk(devvp, NULL))
return (EINVAL);
fdp = td->td_proc->p_fd;
FILEDESC_SLOCK(fdp);
pwd = FILEDESC_LOCKED_LOAD_PWD(fdp);
vp = pwd->pwd_cdir;
vref(vp);
FILEDESC_SUNLOCK(fdp);
vn_lock(vp, LK_SHARED | LK_RETRY);
/*
* Check that the current directory vnode indeed belongs to
* UFS before trying to dereference UFS-specific v_data fields.
*/
if (vp->v_op != &ffs_vnodeops1 && vp->v_op != &ffs_vnodeops2) {
vput(vp);
return (EINVAL);
}
ip = VTOI(vp);
ump = ip->i_ump;
if (ump->um_odevvp != devvp) {
vput(vp);
return (EINVAL);
}
devvp = ump->um_devvp;
fs = ITOFS(ip);
vput(vp);
foffset_lock_uio(fp, uio, flags);
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
#ifdef DIAGNOSTIC
if (fsckcmds) {
printf("%s: buffered write for block %jd\n",
fs->fs_fsmnt, (intmax_t)btodb(uio->uio_offset));
}
#endif /* DIAGNOSTIC */
/*
* All I/O must be contained within a filesystem block, start on
* a fragment boundary, and be a multiple of fragments in length.
*/
if (uio->uio_resid > fs->fs_bsize - (uio->uio_offset % fs->fs_bsize) ||
fragoff(fs, uio->uio_offset) != 0 ||
fragoff(fs, uio->uio_resid) != 0) {
error = EINVAL;
goto out;
}
lbn = numfrags(fs, uio->uio_offset);
bp = getblk(devvp, lbn, uio->uio_resid, 0, 0, 0);
bp->b_flags |= B_RELBUF;
if ((error = uiomove((char *)bp->b_data, uio->uio_resid, uio)) != 0) {
brelse(bp);
goto out;
}
error = bwrite(bp);
out:
VOP_UNLOCK(devvp);
foffset_unlock_uio(fp, uio, flags | FOF_NEXTOFF);
return (error);
}

View File

@ -219,8 +219,8 @@
#define FFS_SET_CWD 12 /* set current directory */
#define FFS_SET_DOTDOT 13 /* set inode number for ".." */
#define FFS_UNLINK 14 /* remove a name in the filesystem */
#define FFS_SET_INODE 15 /* update an on-disk inode */
#define FFS_SET_BUFOUTPUT 16 /* set buffered writing on descriptor */
/* Was FFS_SET_INODE 15 */
/* Was FFS_SET_BUFOUTPUT 16 */
#define FFS_SET_SIZE 17 /* set inode size */
#define FFS_MAXID 17 /* number of valid ffs ids */