2005-01-07 02:29:27 +00:00
|
|
|
/*-
|
2017-11-27 15:23:17 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
|
|
|
*
|
2000-07-11 22:07:57 +00:00
|
|
|
* Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
|
|
|
|
*
|
|
|
|
* Further information about snapshots can be obtained from:
|
|
|
|
*
|
|
|
|
* Marshall Kirk McKusick http://www.mckusick.com/softdep/
|
|
|
|
* 1614 Oxford Street mckusick@mckusick.com
|
|
|
|
* Berkeley, CA 94709-1608 +1-510-843-9542
|
|
|
|
* USA
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
*
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
|
|
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
|
* DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
|
|
|
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
2000-07-24 05:28:33 +00:00
|
|
|
* @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00
|
2000-07-11 22:07:57 +00:00
|
|
|
*/
|
|
|
|
|
2003-06-11 06:34:30 +00:00
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
2006-05-05 20:10:04 +00:00
|
|
|
#include "opt_quota.h"
|
|
|
|
|
2000-07-11 22:07:57 +00:00
|
|
|
#include <sys/param.h>
|
2002-10-16 00:19:23 +00:00
|
|
|
#include <sys/kernel.h>
|
2000-07-11 22:07:57 +00:00
|
|
|
#include <sys/systm.h>
|
2001-03-07 07:09:55 +00:00
|
|
|
#include <sys/conf.h>
|
2019-06-17 19:49:08 +00:00
|
|
|
#include <sys/gsb_crc32.h>
|
2000-07-11 22:07:57 +00:00
|
|
|
#include <sys/bio.h>
|
|
|
|
#include <sys/buf.h>
|
2008-03-31 12:01:21 +00:00
|
|
|
#include <sys/fcntl.h>
|
2000-07-11 22:07:57 +00:00
|
|
|
#include <sys/proc.h>
|
|
|
|
#include <sys/namei.h>
|
2003-04-12 01:05:19 +00:00
|
|
|
#include <sys/sched.h>
|
2000-07-11 22:07:57 +00:00
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/malloc.h>
|
|
|
|
#include <sys/mount.h>
|
|
|
|
#include <sys/resource.h>
|
|
|
|
#include <sys/resourcevar.h>
|
2013-05-31 00:43:41 +00:00
|
|
|
#include <sys/rwlock.h>
|
2000-07-11 22:07:57 +00:00
|
|
|
#include <sys/vnode.h>
|
|
|
|
|
2021-02-15 05:36:02 +02:00
|
|
|
#include <vm/vm.h>
|
|
|
|
#include <vm/vm_extern.h>
|
|
|
|
|
2004-10-29 10:15:56 +00:00
|
|
|
#include <geom/geom.h>
|
|
|
|
|
2000-07-11 22:07:57 +00:00
|
|
|
#include <ufs/ufs/extattr.h>
|
|
|
|
#include <ufs/ufs/quota.h>
|
|
|
|
#include <ufs/ufs/ufsmount.h>
|
|
|
|
#include <ufs/ufs/inode.h>
|
|
|
|
#include <ufs/ufs/ufs_extern.h>
|
|
|
|
|
|
|
|
#include <ufs/ffs/fs.h>
|
|
|
|
#include <ufs/ffs/ffs_extern.h>
|
|
|
|
|
2002-02-27 19:18:10 +00:00
|
|
|
#define KERNCRED thread0.td_ucred
|
2000-07-11 22:07:57 +00:00
|
|
|
|
2006-01-06 04:44:09 +00:00
|
|
|
#include "opt_ffs.h"
|
|
|
|
|
|
|
|
#ifdef NO_FFS_SNAPSHOT
|
|
|
|
int
|
|
|
|
ffs_snapshot(mp, snapfile)
|
|
|
|
struct mount *mp;
|
|
|
|
char *snapfile;
|
|
|
|
{
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2011-06-15 23:19:09 +00:00
|
|
|
ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd)
|
2006-01-06 04:44:09 +00:00
|
|
|
struct fs *fs;
|
|
|
|
struct vnode *devvp;
|
|
|
|
ufs2_daddr_t bno;
|
|
|
|
long size;
|
|
|
|
ino_t inum;
|
2011-06-15 23:19:09 +00:00
|
|
|
enum vtype vtype;
|
2011-06-12 19:27:05 +00:00
|
|
|
struct workhead *wkhd;
|
2006-01-06 04:44:09 +00:00
|
|
|
{
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
ffs_snapremove(vp)
|
|
|
|
struct vnode *vp;
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
ffs_snapshot_mount(mp)
|
|
|
|
struct mount *mp;
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
ffs_snapshot_unmount(mp)
|
|
|
|
struct mount *mp;
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
ffs_snapgone(ip)
|
|
|
|
struct inode *ip;
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
ffs_copyonwrite(devvp, bp)
|
|
|
|
struct vnode *devvp;
|
|
|
|
struct buf *bp;
|
|
|
|
{
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
2011-06-12 19:27:05 +00:00
|
|
|
void
|
|
|
|
ffs_sync_snap(mp, waitfor)
|
|
|
|
struct mount *mp;
|
|
|
|
int waitfor;
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2006-01-06 04:44:09 +00:00
|
|
|
#else
|
2011-02-09 15:33:13 +00:00
|
|
|
FEATURE(ffs_snapshot, "FFS snapshot support");
|
2006-01-06 04:44:09 +00:00
|
|
|
|
2008-03-31 07:47:08 +00:00
|
|
|
LIST_HEAD(, snapdata) snapfree;
|
|
|
|
static struct mtx snapfree_lock;
|
|
|
|
MTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF);
|
|
|
|
|
2002-03-19 22:40:48 +00:00
|
|
|
static int cgaccount(int, struct vnode *, struct buf *, int);
|
2002-06-21 06:18:05 +00:00
|
|
|
static int expunge_ufs1(struct vnode *, struct inode *, struct fs *,
|
|
|
|
int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
|
2010-04-24 07:05:35 +00:00
|
|
|
ufs_lbn_t, int), int, int);
|
2002-06-21 06:18:05 +00:00
|
|
|
static int indiracct_ufs1(struct vnode *, struct vnode *, int,
|
|
|
|
ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
|
|
|
|
int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
|
|
|
|
ufs_lbn_t, int), int);
|
|
|
|
static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
|
|
|
|
struct fs *, ufs_lbn_t, int);
|
|
|
|
static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
|
|
|
|
struct fs *, ufs_lbn_t, int);
|
|
|
|
static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
|
|
|
|
struct fs *, ufs_lbn_t, int);
|
|
|
|
static int expunge_ufs2(struct vnode *, struct inode *, struct fs *,
|
|
|
|
int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
|
2010-04-24 07:05:35 +00:00
|
|
|
ufs_lbn_t, int), int, int);
|
2002-06-21 06:18:05 +00:00
|
|
|
static int indiracct_ufs2(struct vnode *, struct vnode *, int,
|
|
|
|
ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
|
|
|
|
int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
|
|
|
|
ufs_lbn_t, int), int);
|
|
|
|
static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
|
|
|
|
struct fs *, ufs_lbn_t, int);
|
|
|
|
static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
|
|
|
|
struct fs *, ufs_lbn_t, int);
|
|
|
|
static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
|
|
|
|
struct fs *, ufs_lbn_t, int);
|
2004-09-13 07:29:45 +00:00
|
|
|
static int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t);
|
2008-03-31 07:47:08 +00:00
|
|
|
static void try_free_snapdata(struct vnode *devvp);
|
|
|
|
static struct snapdata *ffs_snapdata_acquire(struct vnode *devvp);
|
Cylinder group bitmaps and blocks containing inode for a snapshot
file are after snaplock, while other ffs device buffers are before
snaplock in global lock order. By itself, this could cause deadlock
when bdwrite() tries to flush dirty buffers on snapshotted ffs. If,
during the flush, COW activity for snapshot needs to allocate block
and ffs_alloccg() selects the cylinder group that is being written
by bdwrite(), then kernel would panic due to recursive buffer lock
acquision.
Avoid dealing with buffers in bdwrite() that are from other side of
snaplock divisor in the lock order then the buffer being written. Add
new BOP, bop_bdwrite(), to do dirty buffer flushing for same vnode in
the bdwrite(). Default implementation, bufbdflush(), refactors the code
from bdwrite(). For ffs device buffers, specialized implementation is
used.
Reviewed by: tegge, jeff, Russell Cattelan (cattelan xfs org, xfs changes)
Tested by: Peter Holm
X-MFC after: 3 weeks (if ever: it changes ABI)
2007-01-23 10:01:19 +00:00
|
|
|
static int ffs_bp_snapblk(struct vnode *, struct buf *);
|
2000-07-11 22:07:57 +00:00
|
|
|
|
2001-05-14 17:16:49 +00:00
|
|
|
/*
|
|
|
|
* To ensure the consistency of snapshots across crashes, we must
|
|
|
|
* synchronously write out copied blocks before allowing the
|
|
|
|
* originals to be modified. Because of the rather severe speed
|
2011-06-15 23:19:09 +00:00
|
|
|
* penalty that this imposes, the code normally only ensures
|
|
|
|
* persistence for the filesystem metadata contained within a
|
|
|
|
* snapshot. Setting the following flag allows this crash
|
|
|
|
* persistence to be enabled for file contents.
|
2001-05-14 17:16:49 +00:00
|
|
|
*/
|
|
|
|
int dopersistence = 0;
|
|
|
|
|
2019-05-28 16:32:04 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
2000-07-11 22:07:57 +00:00
|
|
|
#include <sys/sysctl.h>
|
2001-05-14 17:16:49 +00:00
|
|
|
SYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, "");
|
2003-04-30 12:57:40 +00:00
|
|
|
static int snapdebug = 0;
|
2000-07-11 22:07:57 +00:00
|
|
|
SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, "");
|
2001-12-14 00:15:06 +00:00
|
|
|
int collectsnapstats = 0;
|
|
|
|
SYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats,
|
|
|
|
0, "");
|
2019-05-28 16:32:04 +00:00
|
|
|
#endif /* DIAGNOSTIC */
|
2000-07-11 22:07:57 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a snapshot file and initialize it for the filesystem.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
ffs_snapshot(mp, snapfile)
|
|
|
|
struct mount *mp;
|
|
|
|
char *snapfile;
|
|
|
|
{
|
2003-02-22 00:59:34 +00:00
|
|
|
ufs2_daddr_t numblks, blkno, *blkp, *snapblklist;
|
2002-06-21 06:18:05 +00:00
|
|
|
int error, cg, snaploc;
|
2002-02-02 01:42:44 +00:00
|
|
|
int i, size, len, loc;
|
2012-03-01 18:45:25 +00:00
|
|
|
ufs2_daddr_t blockno;
|
2011-09-27 17:34:02 +00:00
|
|
|
uint64_t flag;
|
2001-12-14 00:15:06 +00:00
|
|
|
char saved_nice = 0;
|
2003-02-22 00:59:34 +00:00
|
|
|
long redo = 0, snaplistsize = 0;
|
2001-05-04 05:49:28 +00:00
|
|
|
int32_t *lp;
|
2001-01-15 18:30:40 +00:00
|
|
|
void *space;
|
2005-01-24 10:10:11 +00:00
|
|
|
struct fs *copy_fs = NULL, *fs;
|
2001-09-12 08:38:13 +00:00
|
|
|
struct thread *td = curthread;
|
2001-03-07 07:09:55 +00:00
|
|
|
struct inode *ip, *xp;
|
2011-09-27 17:41:48 +00:00
|
|
|
struct buf *bp, *nbp, *ibp;
|
2000-07-11 22:07:57 +00:00
|
|
|
struct nameidata nd;
|
|
|
|
struct mount *wrtmp;
|
|
|
|
struct vattr vat;
|
2006-01-09 20:42:19 +00:00
|
|
|
struct vnode *vp, *xvp, *mvp, *devvp;
|
2002-10-09 06:13:48 +00:00
|
|
|
struct uio auio;
|
|
|
|
struct iovec aiov;
|
2004-09-13 07:29:45 +00:00
|
|
|
struct snapdata *sn;
|
2005-01-24 10:10:11 +00:00
|
|
|
struct ufsmount *ump;
|
2019-05-28 16:32:04 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
struct timespec starttime = {0, 0}, endtime;
|
|
|
|
#endif
|
2004-09-13 07:29:45 +00:00
|
|
|
|
2005-01-24 10:10:11 +00:00
|
|
|
ump = VFSTOUFS(mp);
|
|
|
|
fs = ump->um_fs;
|
2006-05-15 22:52:22 +00:00
|
|
|
sn = NULL;
|
2012-01-17 01:14:56 +00:00
|
|
|
/*
|
|
|
|
* At the moment, journaled soft updates cannot support
|
|
|
|
* taking snapshots.
|
|
|
|
*/
|
|
|
|
if (MOUNTEDSUJ(mp)) {
|
|
|
|
vfs_mount_error(mp, "%s: Snapshots are not yet supported when "
|
|
|
|
"running with journaled soft updates", fs->fs_fsmnt);
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
}
|
2006-09-26 04:12:49 +00:00
|
|
|
MNT_ILOCK(mp);
|
|
|
|
flag = mp->mnt_flag;
|
|
|
|
MNT_IUNLOCK(mp);
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* Need to serialize access to snapshot code per filesystem.
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* Assign a snapshot slot in the superblock.
|
|
|
|
*/
|
2005-01-24 10:10:11 +00:00
|
|
|
UFS_LOCK(ump);
|
2000-07-11 22:07:57 +00:00
|
|
|
for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
|
|
|
|
if (fs->fs_snapinum[snaploc] == 0)
|
|
|
|
break;
|
2005-01-24 10:10:11 +00:00
|
|
|
UFS_UNLOCK(ump);
|
2000-07-11 22:07:57 +00:00
|
|
|
if (snaploc == FSMAXSNAP)
|
|
|
|
return (ENOSPC);
|
|
|
|
/*
|
|
|
|
* Create the snapshot file.
|
|
|
|
*/
|
|
|
|
restart:
|
2014-12-18 10:01:12 +00:00
|
|
|
NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF | NOCACHE, UIO_SYSSPACE,
|
|
|
|
snapfile, td);
|
2000-07-11 22:07:57 +00:00
|
|
|
if ((error = namei(&nd)) != 0)
|
|
|
|
return (error);
|
|
|
|
if (nd.ni_vp != NULL) {
|
|
|
|
vput(nd.ni_vp);
|
|
|
|
error = EEXIST;
|
|
|
|
}
|
|
|
|
if (nd.ni_dvp->v_mount != mp)
|
|
|
|
error = EXDEV;
|
|
|
|
if (error) {
|
|
|
|
NDFREE(&nd, NDF_ONLY_PNBUF);
|
|
|
|
if (nd.ni_dvp == nd.ni_vp)
|
|
|
|
vrele(nd.ni_dvp);
|
|
|
|
else
|
|
|
|
vput(nd.ni_dvp);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
VATTR_NULL(&vat);
|
|
|
|
vat.va_type = VREG;
|
|
|
|
vat.va_mode = S_IRUSR;
|
|
|
|
vat.va_vaflags |= VA_EXCLUSIVE;
|
|
|
|
if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp))
|
|
|
|
wrtmp = NULL;
|
|
|
|
if (wrtmp != mp)
|
|
|
|
panic("ffs_snapshot: mount mismatch");
|
2006-03-31 03:54:20 +00:00
|
|
|
vfs_rel(wrtmp);
|
2000-07-12 00:27:27 +00:00
|
|
|
if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) {
|
2000-07-11 22:07:57 +00:00
|
|
|
NDFREE(&nd, NDF_ONLY_PNBUF);
|
|
|
|
vput(nd.ni_dvp);
|
2000-07-12 00:27:27 +00:00
|
|
|
if ((error = vn_start_write(NULL, &wrtmp,
|
|
|
|
V_XSLEEP | PCATCH)) != 0)
|
2000-07-11 22:07:57 +00:00
|
|
|
return (error);
|
|
|
|
goto restart;
|
|
|
|
}
|
|
|
|
error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat);
|
|
|
|
if (error) {
|
2021-01-24 15:56:05 +02:00
|
|
|
VOP_VPUT_PAIR(nd.ni_dvp, NULL, true);
|
2000-07-11 22:07:57 +00:00
|
|
|
NDFREE(&nd, NDF_ONLY_PNBUF);
|
|
|
|
vn_finished_write(wrtmp);
|
Handle LoR in flush_pagedep_deps().
When operating in SU or SU+J mode, ffs_syncvnode() might need to
instantiate other vnode by inode number while owning syncing vnode
lock. Typically this other vnode is the parent of our vnode, but due
to renames occuring right before fsync (or during fsync when we drop
the syncing vnode lock, see below) it might be no longer parent.
More, the called function flush_pagedep_deps() needs to lock other
vnode while owning the lock for vnode which owns the buffer, for which
the dependencies are flushed. This creates another instance of the
same LoR as was fixed in softdep_sync().
Put the generic code for safe relocking into new SU helper
get_parent_vp() and use it in flush_pagedep_deps(). The case for safe
relocking of two vnodes with undefined lock order was extracted into
vn helper vn_lock_pair().
Due to call sequence
ffs_syncvnode()->softdep_sync_buf()->flush_pagedep_deps(),
ffs_syncvnode() indicates with ERELOOKUP that passed vnode was
unlocked in process, and can return ENOENT if the passed vnode
reclaimed. All callers of the function were inspected.
Because UFS namei lookups store auxiliary information about directory
entry in in-memory directory inode, and this information is then used
by UFS code that creates/removed directory entry in the actual
mutating VOPs, it is critical that directory vnode lock is not dropped
between lookup and VOP. For softdep_prelink(), which ensures that
later link/unlink operation can proceed without overflowing the
journal, calls were moved to the place where it is safe to drop
processing VOP because mutations are not yet applied. Then, ERELOOKUP
causes restart of the whole VFS operation (typically VFS syscall) at
top level, including the re-lookup of the involved pathes. [Note that
we already do the same restart for failing calls to vn_start_write(),
so formally this patch does not introduce new behavior.]
Similarly, unsafe calls to fsync in snapshot creation code were
plugged. A possible view on these failures is that it does not make
sense to continue creating snapshot if the snapshot vnode was
reclaimed due to forced unmount.
It is possible that relock/ERELOOKUP situation occurs in
ffs_truncate() called from ufs_inactive(). In this case, dropping the
vnode lock is not safe. Detect the situation with VI_DOINGINACT and
reschedule inactivation by setting VI_OWEINACT. ufs_inactive()
rechecks VI_OWEINACT and avoids reclaiming vnode is truncation failed
this way.
In ffs_truncate(), allocation of the EOF block for partial truncation
is re-done after vnode is synced, since we cannot leave the buffer
locked through ffs_syncvnode().
In collaboration with: pho
Reviewed by: mckusick (previous version), markj
Tested by: markj (syzkaller), pho
Sponsored by: The FreeBSD Foundation
Differential revision: https://reviews.freebsd.org/D26136
2020-11-14 05:30:10 +00:00
|
|
|
if (error == ERELOOKUP)
|
|
|
|
goto restart;
|
2000-07-11 22:07:57 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
vp = nd.ni_vp;
|
2021-01-24 15:56:05 +02:00
|
|
|
vref(nd.ni_dvp);
|
|
|
|
VOP_VPUT_PAIR(nd.ni_dvp, &vp, false);
|
|
|
|
if (VN_IS_DOOMED(vp)) {
|
|
|
|
error = EBADF;
|
|
|
|
goto out;
|
|
|
|
}
|
2018-12-23 18:54:09 +00:00
|
|
|
vnode_create_vobject(nd.ni_vp, fs->fs_size, td);
|
2007-01-20 11:58:32 +00:00
|
|
|
vp->v_vflag |= VV_SYSTEM;
|
2000-07-11 22:07:57 +00:00
|
|
|
ip = VTOI(vp);
|
2016-09-17 16:47:34 +00:00
|
|
|
devvp = ITODEVVP(ip);
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
2021-02-11 21:31:16 -08:00
|
|
|
* Calculate the size of the filesystem then allocate the block
|
|
|
|
* immediately following the last block of the filesystem that
|
|
|
|
* will contain the snapshot list. This operation allows us to
|
|
|
|
* set the size of the snapshot.
|
2000-07-11 22:07:57 +00:00
|
|
|
*/
|
|
|
|
numblks = howmany(fs->fs_size, fs->fs_frag);
|
2021-02-11 21:31:16 -08:00
|
|
|
error = UFS_BALLOC(vp, lblktosize(fs, (off_t)numblks),
|
2002-06-23 06:12:22 +00:00
|
|
|
fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp);
|
2000-07-11 22:07:57 +00:00
|
|
|
if (error)
|
|
|
|
goto out;
|
2021-02-11 21:31:16 -08:00
|
|
|
bawrite(bp);
|
|
|
|
ip->i_size = lblktosize(fs, (off_t)(numblks + 1));
|
2021-02-15 05:36:02 +02:00
|
|
|
vnode_pager_setsize(vp, ip->i_size);
|
2004-07-28 06:41:27 +00:00
|
|
|
DIP_SET(ip, i_size, ip->i_size);
|
2020-06-05 01:00:55 +00:00
|
|
|
UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* Preallocate critical data structures so that we can copy
|
|
|
|
* them in without further allocation after we suspend all
|
|
|
|
* operations on the filesystem. We would like to just release
|
|
|
|
* the allocated buffers without writing them since they will
|
|
|
|
* be filled in below once we are ready to go, but this upsets
|
|
|
|
* the soft update code, so we go ahead and write the new buffers.
|
|
|
|
*
|
2001-04-26 00:50:53 +00:00
|
|
|
* Allocate all indirect blocks and mark all of them as not
|
|
|
|
* needing to be copied.
|
2000-07-11 22:07:57 +00:00
|
|
|
*/
|
2017-02-15 19:50:26 +00:00
|
|
|
for (blkno = UFS_NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
|
2001-04-29 12:36:52 +00:00
|
|
|
error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno),
|
2002-06-23 06:12:22 +00:00
|
|
|
fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp);
|
2000-07-11 22:07:57 +00:00
|
|
|
if (error)
|
|
|
|
goto out;
|
2002-11-30 07:27:12 +00:00
|
|
|
bawrite(ibp);
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Allocate copies for the superblock and its summary information.
|
|
|
|
*/
|
2002-11-27 02:18:58 +00:00
|
|
|
error = UFS_BALLOC(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED,
|
|
|
|
0, &nbp);
|
2001-05-04 05:49:28 +00:00
|
|
|
if (error)
|
2000-07-11 22:07:57 +00:00
|
|
|
goto out;
|
|
|
|
bawrite(nbp);
|
|
|
|
blkno = fragstoblks(fs, fs->fs_csaddr);
|
|
|
|
len = howmany(fs->fs_cssize, fs->fs_bsize);
|
|
|
|
for (loc = 0; loc < len; loc++) {
|
2001-04-29 12:36:52 +00:00
|
|
|
error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)),
|
2000-07-11 22:07:57 +00:00
|
|
|
fs->fs_bsize, KERNCRED, 0, &nbp);
|
|
|
|
if (error)
|
|
|
|
goto out;
|
|
|
|
bawrite(nbp);
|
|
|
|
}
|
2001-12-14 00:15:06 +00:00
|
|
|
/*
|
|
|
|
* Allocate all cylinder group blocks.
|
|
|
|
*/
|
|
|
|
for (cg = 0; cg < fs->fs_ncg; cg++) {
|
2003-02-22 00:19:26 +00:00
|
|
|
error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)),
|
2001-12-14 00:15:06 +00:00
|
|
|
fs->fs_bsize, KERNCRED, 0, &nbp);
|
|
|
|
if (error)
|
|
|
|
goto out;
|
2002-11-30 07:27:12 +00:00
|
|
|
bawrite(nbp);
|
Handle LoR in flush_pagedep_deps().
When operating in SU or SU+J mode, ffs_syncvnode() might need to
instantiate other vnode by inode number while owning syncing vnode
lock. Typically this other vnode is the parent of our vnode, but due
to renames occuring right before fsync (or during fsync when we drop
the syncing vnode lock, see below) it might be no longer parent.
More, the called function flush_pagedep_deps() needs to lock other
vnode while owning the lock for vnode which owns the buffer, for which
the dependencies are flushed. This creates another instance of the
same LoR as was fixed in softdep_sync().
Put the generic code for safe relocking into new SU helper
get_parent_vp() and use it in flush_pagedep_deps(). The case for safe
relocking of two vnodes with undefined lock order was extracted into
vn helper vn_lock_pair().
Due to call sequence
ffs_syncvnode()->softdep_sync_buf()->flush_pagedep_deps(),
ffs_syncvnode() indicates with ERELOOKUP that passed vnode was
unlocked in process, and can return ENOENT if the passed vnode
reclaimed. All callers of the function were inspected.
Because UFS namei lookups store auxiliary information about directory
entry in in-memory directory inode, and this information is then used
by UFS code that creates/removed directory entry in the actual
mutating VOPs, it is critical that directory vnode lock is not dropped
between lookup and VOP. For softdep_prelink(), which ensures that
later link/unlink operation can proceed without overflowing the
journal, calls were moved to the place where it is safe to drop
processing VOP because mutations are not yet applied. Then, ERELOOKUP
causes restart of the whole VFS operation (typically VFS syscall) at
top level, including the re-lookup of the involved pathes. [Note that
we already do the same restart for failing calls to vn_start_write(),
so formally this patch does not introduce new behavior.]
Similarly, unsafe calls to fsync in snapshot creation code were
plugged. A possible view on these failures is that it does not make
sense to continue creating snapshot if the snapshot vnode was
reclaimed due to forced unmount.
It is possible that relock/ERELOOKUP situation occurs in
ffs_truncate() called from ufs_inactive(). In this case, dropping the
vnode lock is not safe. Detect the situation with VI_DOINGINACT and
reschedule inactivation by setting VI_OWEINACT. ufs_inactive()
rechecks VI_OWEINACT and avoids reclaiming vnode is truncation failed
this way.
In ffs_truncate(), allocation of the EOF block for partial truncation
is re-done after vnode is synced, since we cannot leave the buffer
locked through ffs_syncvnode().
In collaboration with: pho
Reviewed by: mckusick (previous version), markj
Tested by: markj (syzkaller), pho
Sponsored by: The FreeBSD Foundation
Differential revision: https://reviews.freebsd.org/D26136
2020-11-14 05:30:10 +00:00
|
|
|
if (cg % 10 == 0) {
|
|
|
|
error = ffs_syncvnode(vp, MNT_WAIT, 0);
|
|
|
|
/* vp possibly reclaimed if unlocked */
|
|
|
|
if (error != 0)
|
|
|
|
goto out;
|
|
|
|
}
|
2001-12-14 00:15:06 +00:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Copy all the cylinder group maps. Although the
|
|
|
|
* filesystem is still active, we hope that only a few
|
|
|
|
* cylinder groups will change between now and when we
|
|
|
|
* suspend operations. Thus, we will be able to quickly
|
|
|
|
* touch up the few cylinder groups that changed during
|
|
|
|
* the suspension period.
|
|
|
|
*/
|
2020-12-23 11:13:00 -05:00
|
|
|
len = roundup2(howmany(fs->fs_ncg, NBBY), sizeof(int));
|
|
|
|
space = malloc(len, M_DEVBUF, M_WAITOK | M_ZERO);
|
2005-01-24 10:10:11 +00:00
|
|
|
UFS_LOCK(ump);
|
|
|
|
fs->fs_active = space;
|
|
|
|
UFS_UNLOCK(ump);
|
2001-12-14 00:15:06 +00:00
|
|
|
for (cg = 0; cg < fs->fs_ncg; cg++) {
|
2003-02-22 00:19:26 +00:00
|
|
|
error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)),
|
2002-12-03 18:19:27 +00:00
|
|
|
fs->fs_bsize, KERNCRED, 0, &nbp);
|
|
|
|
if (error)
|
2001-12-14 00:15:06 +00:00
|
|
|
goto out;
|
|
|
|
error = cgaccount(cg, vp, nbp, 1);
|
|
|
|
bawrite(nbp);
|
Handle LoR in flush_pagedep_deps().
When operating in SU or SU+J mode, ffs_syncvnode() might need to
instantiate other vnode by inode number while owning syncing vnode
lock. Typically this other vnode is the parent of our vnode, but due
to renames occuring right before fsync (or during fsync when we drop
the syncing vnode lock, see below) it might be no longer parent.
More, the called function flush_pagedep_deps() needs to lock other
vnode while owning the lock for vnode which owns the buffer, for which
the dependencies are flushed. This creates another instance of the
same LoR as was fixed in softdep_sync().
Put the generic code for safe relocking into new SU helper
get_parent_vp() and use it in flush_pagedep_deps(). The case for safe
relocking of two vnodes with undefined lock order was extracted into
vn helper vn_lock_pair().
Due to call sequence
ffs_syncvnode()->softdep_sync_buf()->flush_pagedep_deps(),
ffs_syncvnode() indicates with ERELOOKUP that passed vnode was
unlocked in process, and can return ENOENT if the passed vnode
reclaimed. All callers of the function were inspected.
Because UFS namei lookups store auxiliary information about directory
entry in in-memory directory inode, and this information is then used
by UFS code that creates/removed directory entry in the actual
mutating VOPs, it is critical that directory vnode lock is not dropped
between lookup and VOP. For softdep_prelink(), which ensures that
later link/unlink operation can proceed without overflowing the
journal, calls were moved to the place where it is safe to drop
processing VOP because mutations are not yet applied. Then, ERELOOKUP
causes restart of the whole VFS operation (typically VFS syscall) at
top level, including the re-lookup of the involved pathes. [Note that
we already do the same restart for failing calls to vn_start_write(),
so formally this patch does not introduce new behavior.]
Similarly, unsafe calls to fsync in snapshot creation code were
plugged. A possible view on these failures is that it does not make
sense to continue creating snapshot if the snapshot vnode was
reclaimed due to forced unmount.
It is possible that relock/ERELOOKUP situation occurs in
ffs_truncate() called from ufs_inactive(). In this case, dropping the
vnode lock is not safe. Detect the situation with VI_DOINGINACT and
reschedule inactivation by setting VI_OWEINACT. ufs_inactive()
rechecks VI_OWEINACT and avoids reclaiming vnode is truncation failed
this way.
In ffs_truncate(), allocation of the EOF block for partial truncation
is re-done after vnode is synced, since we cannot leave the buffer
locked through ffs_syncvnode().
In collaboration with: pho
Reviewed by: mckusick (previous version), markj
Tested by: markj (syzkaller), pho
Sponsored by: The FreeBSD Foundation
Differential revision: https://reviews.freebsd.org/D26136
2020-11-14 05:30:10 +00:00
|
|
|
if (cg % 10 == 0 && error == 0)
|
|
|
|
error = ffs_syncvnode(vp, MNT_WAIT, 0);
|
2001-12-14 00:15:06 +00:00
|
|
|
if (error)
|
|
|
|
goto out;
|
|
|
|
}
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* Change inode to snapshot type file.
|
|
|
|
*/
|
2000-07-26 23:07:01 +00:00
|
|
|
ip->i_flags |= SF_SNAPSHOT;
|
2004-07-28 06:41:27 +00:00
|
|
|
DIP_SET(ip, i_flags, ip->i_flags);
|
2020-01-13 02:31:51 +00:00
|
|
|
UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* Ensure that the snapshot is completely on disk.
|
2002-11-30 07:27:12 +00:00
|
|
|
* Since we have marked it as a snapshot it is safe to
|
|
|
|
* unlock it as no process will be allowed to write to it.
|
2000-07-11 22:07:57 +00:00
|
|
|
*/
|
2012-03-25 00:02:37 +00:00
|
|
|
if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
|
2000-07-11 22:07:57 +00:00
|
|
|
goto out;
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(vp);
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* All allocations are done, so we can now snapshot the system.
|
|
|
|
*
|
2001-12-14 00:15:06 +00:00
|
|
|
* Recind nice scheduling while running with the filesystem suspended.
|
|
|
|
*/
|
2004-06-16 00:26:31 +00:00
|
|
|
if (td->td_proc->p_nice > 0) {
|
Commit 14/14 of sched_lock decomposition.
- Use thread_lock() rather than sched_lock for per-thread scheduling
sychronization.
- Use the per-process spinlock rather than the sched_lock for per-process
scheduling synchronization.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-05 00:00:57 +00:00
|
|
|
struct proc *p;
|
|
|
|
|
|
|
|
p = td->td_proc;
|
|
|
|
PROC_LOCK(p);
|
|
|
|
saved_nice = p->p_nice;
|
|
|
|
sched_nice(p, 0);
|
|
|
|
PROC_UNLOCK(p);
|
2001-12-14 00:15:06 +00:00
|
|
|
}
|
|
|
|
/*
|
2000-07-11 22:07:57 +00:00
|
|
|
* Suspend operation on filesystem.
|
|
|
|
*/
|
|
|
|
for (;;) {
|
|
|
|
vn_finished_write(wrtmp);
|
2013-07-09 20:49:32 +00:00
|
|
|
if ((error = vfs_write_suspend(vp->v_mount, 0)) != 0) {
|
2002-10-25 00:20:37 +00:00
|
|
|
vn_start_write(NULL, &wrtmp, V_WAIT);
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
2002-10-25 00:20:37 +00:00
|
|
|
goto out;
|
|
|
|
}
|
2000-07-11 22:07:57 +00:00
|
|
|
if (mp->mnt_kern_flag & MNTK_SUSPENDED)
|
|
|
|
break;
|
2000-07-12 00:27:27 +00:00
|
|
|
vn_start_write(NULL, &wrtmp, V_WAIT);
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
2006-05-03 00:29:22 +00:00
|
|
|
if (ip->i_effnlink == 0) {
|
|
|
|
error = ENOENT; /* Snapshot file unlinked */
|
2021-02-11 21:31:16 -08:00
|
|
|
goto resumefs;
|
2006-05-03 00:29:22 +00:00
|
|
|
}
|
2019-05-28 16:32:04 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
2002-02-02 01:42:44 +00:00
|
|
|
if (collectsnapstats)
|
|
|
|
nanotime(&starttime);
|
2019-05-28 16:32:04 +00:00
|
|
|
#endif
|
2006-05-15 23:18:49 +00:00
|
|
|
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
2001-12-14 00:15:06 +00:00
|
|
|
* First, copy all the cylinder group maps that have changed.
|
2000-07-11 22:07:57 +00:00
|
|
|
*/
|
|
|
|
for (cg = 0; cg < fs->fs_ncg; cg++) {
|
2001-12-18 18:05:17 +00:00
|
|
|
if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0)
|
2001-12-14 00:15:06 +00:00
|
|
|
continue;
|
|
|
|
redo++;
|
2003-02-22 00:19:26 +00:00
|
|
|
error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)),
|
2002-12-03 18:19:27 +00:00
|
|
|
fs->fs_bsize, KERNCRED, 0, &nbp);
|
|
|
|
if (error)
|
2021-02-11 21:31:16 -08:00
|
|
|
goto resumefs;
|
2001-12-14 00:15:06 +00:00
|
|
|
error = cgaccount(cg, vp, nbp, 2);
|
2002-01-17 08:33:32 +00:00
|
|
|
bawrite(nbp);
|
2001-12-14 00:15:06 +00:00
|
|
|
if (error)
|
2021-02-11 21:31:16 -08:00
|
|
|
goto resumefs;
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
2001-05-04 05:49:28 +00:00
|
|
|
/*
|
|
|
|
* Grab a copy of the superblock and its summary information.
|
|
|
|
* We delay writing it until the suspension is released below.
|
|
|
|
*/
|
2011-09-27 17:41:48 +00:00
|
|
|
copy_fs = malloc((u_long)fs->fs_bsize, M_UFSMNT, M_WAITOK);
|
2001-05-04 05:49:28 +00:00
|
|
|
bcopy(fs, copy_fs, fs->fs_sbsize);
|
2020-06-19 01:02:53 +00:00
|
|
|
copy_fs->fs_si = malloc(sizeof(struct fs_summary_info), M_UFSMNT,
|
|
|
|
M_ZERO | M_WAITOK);
|
2001-05-04 05:49:28 +00:00
|
|
|
if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
|
|
|
|
copy_fs->fs_clean = 1;
|
2003-03-07 23:49:16 +00:00
|
|
|
size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE;
|
|
|
|
if (fs->fs_sbsize < size)
|
2011-09-27 17:41:48 +00:00
|
|
|
bzero(&((char *)copy_fs)[fs->fs_sbsize],
|
|
|
|
size - fs->fs_sbsize);
|
2001-05-04 05:49:28 +00:00
|
|
|
size = blkroundup(fs, fs->fs_cssize);
|
|
|
|
if (fs->fs_contigsumsize > 0)
|
|
|
|
size += fs->fs_ncg * sizeof(int32_t);
|
2003-02-19 05:47:46 +00:00
|
|
|
space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
|
2001-05-04 05:49:28 +00:00
|
|
|
copy_fs->fs_csp = space;
|
|
|
|
bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize);
|
2004-06-08 13:08:19 +00:00
|
|
|
space = (char *)space + fs->fs_cssize;
|
2001-05-04 05:49:28 +00:00
|
|
|
loc = howmany(fs->fs_cssize, fs->fs_fsize);
|
2001-05-08 07:29:03 +00:00
|
|
|
i = fs->fs_frag - loc % fs->fs_frag;
|
|
|
|
len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize;
|
|
|
|
if (len > 0) {
|
2002-11-30 19:00:51 +00:00
|
|
|
if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc),
|
2001-05-04 05:49:28 +00:00
|
|
|
len, KERNCRED, &bp)) != 0) {
|
2002-02-02 01:42:44 +00:00
|
|
|
brelse(bp);
|
2021-02-11 21:31:16 -08:00
|
|
|
goto resumefs;
|
2001-05-04 05:49:28 +00:00
|
|
|
}
|
|
|
|
bcopy(bp->b_data, space, (u_int)len);
|
2004-06-08 13:08:19 +00:00
|
|
|
space = (char *)space + len;
|
2001-05-04 05:49:28 +00:00
|
|
|
bp->b_flags |= B_INVAL | B_NOCACHE;
|
|
|
|
brelse(bp);
|
|
|
|
}
|
|
|
|
if (fs->fs_contigsumsize > 0) {
|
|
|
|
copy_fs->fs_maxcluster = lp = space;
|
|
|
|
for (i = 0; i < fs->fs_ncg; i++)
|
|
|
|
*lp++ = fs->fs_contigsumsize;
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
2002-02-02 01:42:44 +00:00
|
|
|
/*
|
|
|
|
* We must check for active files that have been unlinked
|
|
|
|
* (e.g., with a zero link count). We have to expunge all
|
|
|
|
* trace of these files from the snapshot so that they are
|
|
|
|
* not reclaimed prematurely by fsck or unnecessarily dumped.
|
|
|
|
* We turn off the MNTK_SUSPENDED flag to avoid a panic from
|
|
|
|
* spec_strategy about writing on a suspended filesystem.
|
2002-10-09 06:13:48 +00:00
|
|
|
* Note that we skip unlinked snapshot files as they will
|
|
|
|
* be handled separately below.
|
2003-02-22 00:59:34 +00:00
|
|
|
*
|
2021-02-11 21:31:16 -08:00
|
|
|
* We also calculate the size needed for the snapshot list.
|
|
|
|
* Initial number of entries is composed of:
|
|
|
|
* - one for each cylinder group map
|
|
|
|
* - one for each block used by superblock summary table
|
|
|
|
* - one for each snapshot inode block
|
|
|
|
* - one for the superblock
|
|
|
|
* - one for the snapshot list
|
|
|
|
* The direct block entries in the snapshot are always
|
|
|
|
* copied (see reason below). Note that the superblock and
|
|
|
|
* the first cylinder group will almost always be allocated
|
|
|
|
* in the direct blocks, but we add the slop for them in case
|
|
|
|
* they do not end up there. The snapshot list size may get
|
|
|
|
* expanded by one because of an update of an inode block for
|
|
|
|
* an unlinked but still open file when it is expunged.
|
|
|
|
*
|
|
|
|
* Because the direct block pointers are always copied, they
|
|
|
|
* are not added to the list. Instead ffs_copyonwrite()
|
|
|
|
* explicitly checks for them before checking the snapshot list.
|
2002-02-02 01:42:44 +00:00
|
|
|
*/
|
2003-02-22 00:59:34 +00:00
|
|
|
snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
|
2021-02-11 21:31:16 -08:00
|
|
|
FSMAXSNAP + /* superblock */ 1 + /* snaplist */ 1;
|
2003-11-05 04:30:08 +00:00
|
|
|
MNT_ILOCK(mp);
|
2005-01-24 10:10:11 +00:00
|
|
|
mp->mnt_kern_flag &= ~MNTK_SUSPENDED;
|
2012-04-17 16:28:22 +00:00
|
|
|
MNT_IUNLOCK(mp);
|
2002-02-02 01:42:44 +00:00
|
|
|
loop:
|
2012-04-17 16:28:22 +00:00
|
|
|
MNT_VNODE_FOREACH_ALL(xvp, mp, mvp) {
|
|
|
|
if ((xvp->v_usecount == 0 &&
|
2006-03-11 01:08:37 +00:00
|
|
|
(xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) ||
|
|
|
|
xvp->v_type == VNON ||
|
2012-03-01 18:45:25 +00:00
|
|
|
IS_SNAPSHOT(VTOI(xvp))) {
|
2003-10-04 14:25:45 +00:00
|
|
|
VI_UNLOCK(xvp);
|
2002-02-02 01:42:44 +00:00
|
|
|
continue;
|
|
|
|
}
|
2004-06-18 14:35:17 +00:00
|
|
|
/*
|
|
|
|
* We can skip parent directory vnode because it must have
|
|
|
|
* this snapshot file in it.
|
|
|
|
*/
|
|
|
|
if (xvp == nd.ni_dvp) {
|
|
|
|
VI_UNLOCK(xvp);
|
|
|
|
continue;
|
|
|
|
}
|
2006-03-11 01:08:37 +00:00
|
|
|
vholdl(xvp);
|
2008-01-10 01:10:58 +00:00
|
|
|
if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK) != 0) {
|
2012-04-17 16:28:22 +00:00
|
|
|
MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
|
2006-03-11 01:08:37 +00:00
|
|
|
vdrop(xvp);
|
2002-02-02 01:42:44 +00:00
|
|
|
goto loop;
|
2003-10-04 14:25:45 +00:00
|
|
|
}
|
2006-03-11 01:08:37 +00:00
|
|
|
VI_LOCK(xvp);
|
|
|
|
if (xvp->v_usecount == 0 &&
|
|
|
|
(xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) {
|
|
|
|
VI_UNLOCK(xvp);
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(xvp);
|
2006-03-11 01:08:37 +00:00
|
|
|
vdrop(xvp);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
VI_UNLOCK(xvp);
|
2019-05-28 16:32:04 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
2004-01-04 04:08:34 +00:00
|
|
|
if (snapdebug)
|
2016-08-10 16:12:31 +00:00
|
|
|
vn_printf(xvp, "ffs_snapshot: busy vnode ");
|
2019-05-28 16:32:04 +00:00
|
|
|
#endif
|
2008-08-28 15:23:18 +00:00
|
|
|
if (VOP_GETATTR(xvp, &vat, td->td_ucred) == 0 &&
|
2003-10-04 14:25:45 +00:00
|
|
|
vat.va_nlink > 0) {
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(xvp);
|
2006-03-11 01:08:37 +00:00
|
|
|
vdrop(xvp);
|
2003-10-04 14:25:45 +00:00
|
|
|
continue;
|
|
|
|
}
|
2002-02-02 01:42:44 +00:00
|
|
|
xp = VTOI(xvp);
|
2003-02-22 00:29:51 +00:00
|
|
|
if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) {
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(xvp);
|
2006-03-11 01:08:37 +00:00
|
|
|
vdrop(xvp);
|
2003-02-22 00:29:51 +00:00
|
|
|
continue;
|
|
|
|
}
|
2002-02-02 01:42:44 +00:00
|
|
|
/*
|
|
|
|
* If there is a fragment, clear it here.
|
|
|
|
*/
|
|
|
|
blkno = 0;
|
|
|
|
loc = howmany(xp->i_size, fs->fs_bsize) - 1;
|
2017-02-15 19:50:26 +00:00
|
|
|
if (loc < UFS_NDADDR) {
|
2002-02-02 01:42:44 +00:00
|
|
|
len = fragroundup(fs, blkoff(fs, xp->i_size));
|
2005-02-19 07:31:33 +00:00
|
|
|
if (len != 0 && len < fs->fs_bsize) {
|
2005-01-24 10:10:11 +00:00
|
|
|
ffs_blkfree(ump, copy_fs, vp,
|
2010-04-24 07:05:35 +00:00
|
|
|
DIP(xp, i_db[loc]), len, xp->i_number,
|
2018-08-18 22:21:59 +00:00
|
|
|
xvp->v_type, NULL, SINGLETON_KEY);
|
2002-06-21 06:18:05 +00:00
|
|
|
blkno = DIP(xp, i_db[loc]);
|
2004-07-28 06:41:27 +00:00
|
|
|
DIP_SET(xp, i_db[loc], 0);
|
2002-02-02 01:42:44 +00:00
|
|
|
}
|
|
|
|
}
|
2003-02-22 00:59:34 +00:00
|
|
|
snaplistsize += 1;
|
2016-09-17 16:47:34 +00:00
|
|
|
if (I_IS_UFS1(xp))
|
2002-06-21 06:18:05 +00:00
|
|
|
error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
|
2010-04-24 07:05:35 +00:00
|
|
|
BLK_NOCOPY, 1);
|
2002-06-21 06:18:05 +00:00
|
|
|
else
|
|
|
|
error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
|
2010-04-24 07:05:35 +00:00
|
|
|
BLK_NOCOPY, 1);
|
2002-02-02 01:42:44 +00:00
|
|
|
if (blkno)
|
2004-07-28 06:41:27 +00:00
|
|
|
DIP_SET(xp, i_db[loc], blkno);
|
2002-02-02 01:42:44 +00:00
|
|
|
if (!error)
|
2005-01-24 10:10:11 +00:00
|
|
|
error = ffs_freefile(ump, copy_fs, vp, xp->i_number,
|
2010-04-24 07:05:35 +00:00
|
|
|
xp->i_mode, NULL);
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(xvp);
|
2006-03-11 01:08:37 +00:00
|
|
|
vdrop(xvp);
|
2002-02-02 01:42:44 +00:00
|
|
|
if (error) {
|
2012-04-17 16:28:22 +00:00
|
|
|
MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
|
2021-02-11 21:31:16 -08:00
|
|
|
goto resumefs;
|
2002-02-02 01:42:44 +00:00
|
|
|
}
|
|
|
|
}
|
2010-04-24 07:05:35 +00:00
|
|
|
/*
|
|
|
|
* Erase the journal file from the snapshot.
|
|
|
|
*/
|
|
|
|
if (fs->fs_flags & FS_SUJ) {
|
|
|
|
error = softdep_journal_lookup(mp, &xvp);
|
2021-02-11 21:31:16 -08:00
|
|
|
if (error)
|
|
|
|
goto resumefs;
|
2010-04-24 07:05:35 +00:00
|
|
|
xp = VTOI(xvp);
|
2016-09-17 16:47:34 +00:00
|
|
|
if (I_IS_UFS1(xp))
|
2010-04-24 07:05:35 +00:00
|
|
|
error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
|
|
|
|
BLK_NOCOPY, 0);
|
|
|
|
else
|
|
|
|
error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
|
|
|
|
BLK_NOCOPY, 0);
|
|
|
|
vput(xvp);
|
|
|
|
}
|
2002-10-16 00:19:23 +00:00
|
|
|
/*
|
2008-03-31 07:47:08 +00:00
|
|
|
* Acquire a lock on the snapdata structure, creating it if necessary.
|
2002-10-16 00:19:23 +00:00
|
|
|
*/
|
2008-03-31 07:47:08 +00:00
|
|
|
sn = ffs_snapdata_acquire(devvp);
|
|
|
|
/*
|
|
|
|
* Change vnode to use shared snapshot lock instead of the original
|
|
|
|
* private lock.
|
|
|
|
*/
|
|
|
|
vp->v_vnlock = &sn->sn_lock;
|
2008-01-24 12:34:30 +00:00
|
|
|
lockmgr(&vp->v_lock, LK_RELEASE, NULL);
|
2008-03-31 07:47:08 +00:00
|
|
|
xp = TAILQ_FIRST(&sn->sn_head);
|
2003-02-22 00:59:34 +00:00
|
|
|
/*
|
|
|
|
* If this is the first snapshot on this filesystem, then we need
|
|
|
|
* to allocate the space for the list of preallocated snapshot blocks.
|
|
|
|
* This list will be refined below, but this preliminary one will
|
|
|
|
* keep us out of deadlock until the full one is ready.
|
|
|
|
*/
|
|
|
|
if (xp == NULL) {
|
2018-01-24 16:44:57 +00:00
|
|
|
snapblklist = malloc(snaplistsize * sizeof(daddr_t),
|
2003-02-22 00:59:34 +00:00
|
|
|
M_UFSMNT, M_WAITOK);
|
|
|
|
blkp = &snapblklist[1];
|
|
|
|
*blkp++ = lblkno(fs, fs->fs_sblockloc);
|
|
|
|
blkno = fragstoblks(fs, fs->fs_csaddr);
|
|
|
|
for (cg = 0; cg < fs->fs_ncg; cg++) {
|
|
|
|
if (fragstoblks(fs, cgtod(fs, cg) > blkno))
|
|
|
|
break;
|
|
|
|
*blkp++ = fragstoblks(fs, cgtod(fs, cg));
|
|
|
|
}
|
|
|
|
len = howmany(fs->fs_cssize, fs->fs_bsize);
|
|
|
|
for (loc = 0; loc < len; loc++)
|
|
|
|
*blkp++ = blkno + loc;
|
|
|
|
for (; cg < fs->fs_ncg; cg++)
|
|
|
|
*blkp++ = fragstoblks(fs, cgtod(fs, cg));
|
|
|
|
snapblklist[0] = blkp - snapblklist;
|
|
|
|
VI_LOCK(devvp);
|
2004-09-13 07:29:45 +00:00
|
|
|
if (sn->sn_blklist != NULL)
|
2003-02-22 00:59:34 +00:00
|
|
|
panic("ffs_snapshot: non-empty list");
|
2004-09-13 07:29:45 +00:00
|
|
|
sn->sn_blklist = snapblklist;
|
|
|
|
sn->sn_listsize = blkp - snapblklist;
|
2003-02-22 00:59:34 +00:00
|
|
|
VI_UNLOCK(devvp);
|
|
|
|
}
|
2021-02-11 21:31:16 -08:00
|
|
|
/*
|
|
|
|
* Preallocate all the direct blocks in the snapshot inode so
|
|
|
|
* that we never have to write the inode itself to commit an
|
|
|
|
* update to the contents of the snapshot. Note that once
|
|
|
|
* created, the size of the snapshot will never change, so
|
|
|
|
* there will never be a need to write the inode except to
|
|
|
|
* update the non-integrity-critical time fields and
|
|
|
|
* allocated-block count.
|
|
|
|
*/
|
|
|
|
for (blockno = 0; blockno < UFS_NDADDR; blockno++) {
|
|
|
|
if (DIP(ip, i_db[blockno]) != 0)
|
|
|
|
continue;
|
|
|
|
error = UFS_BALLOC(vp, lblktosize(fs, blockno),
|
|
|
|
fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp);
|
|
|
|
if (error)
|
|
|
|
goto resumefs;
|
|
|
|
error = readblock(vp, bp, blockno);
|
|
|
|
bawrite(bp);
|
|
|
|
if (error != 0)
|
|
|
|
goto resumefs;
|
|
|
|
}
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* Record snapshot inode. Since this is the newest snapshot,
|
|
|
|
* it must be placed at the end of the list.
|
|
|
|
*/
|
2002-11-30 19:00:51 +00:00
|
|
|
VI_LOCK(devvp);
|
2000-07-11 22:07:57 +00:00
|
|
|
fs->fs_snapinum[snaploc] = ip->i_number;
|
2001-03-07 07:09:55 +00:00
|
|
|
if (ip->i_nextsnap.tqe_prev != 0)
|
2012-09-27 23:30:49 +00:00
|
|
|
panic("ffs_snapshot: %ju already on list",
|
|
|
|
(uintmax_t)ip->i_number);
|
2004-09-13 07:29:45 +00:00
|
|
|
TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap);
|
2002-11-30 19:00:51 +00:00
|
|
|
devvp->v_vflag |= VV_COPYONWRITE;
|
|
|
|
VI_UNLOCK(devvp);
|
2021-02-11 21:31:16 -08:00
|
|
|
resumefs:
|
2002-08-04 10:29:36 +00:00
|
|
|
ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp");
|
2021-02-11 21:31:16 -08:00
|
|
|
if (error != 0 && copy_fs != NULL) {
|
|
|
|
free(copy_fs->fs_csp, M_UFSMNT);
|
|
|
|
free(copy_fs->fs_si, M_UFSMNT);
|
|
|
|
free(copy_fs, M_UFSMNT);
|
|
|
|
copy_fs = NULL;
|
|
|
|
}
|
|
|
|
KASSERT(error != 0 || (sn != NULL && copy_fs != NULL),
|
|
|
|
("missing snapshot setup parameters"));
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* Resume operation on filesystem.
|
|
|
|
*/
|
2013-01-11 06:08:32 +00:00
|
|
|
vfs_write_resume(vp->v_mount, VR_START_WRITE | VR_NO_SUSPCLR);
|
2019-05-28 16:32:04 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
2001-12-14 00:15:06 +00:00
|
|
|
if (collectsnapstats && starttime.tv_sec > 0) {
|
|
|
|
nanotime(&endtime);
|
Make timespecadd(3) and friends public
The timespecadd(3) family of macros were imported from NetBSD back in
r35029. However, they were initially guarded by #ifdef _KERNEL. In the
meantime, we have grown at least 28 syscalls that use timespecs in some
way, leading many programs both inside and outside of the base system to
redefine those macros. It's better just to make the definitions public.
Our kernel currently defines two-argument versions of timespecadd and
timespecsub. NetBSD, OpenBSD, and FreeDesktop.org's libbsd, however, define
three-argument versions. Solaris also defines a three-argument version, but
only in its kernel. This revision changes our definition to match the
common three-argument version.
Bump _FreeBSD_version due to the breaking KPI change.
Discussed with: cem, jilles, ian, bde
Differential Revision: https://reviews.freebsd.org/D14725
2018-07-30 15:46:40 +00:00
|
|
|
timespecsub(&endtime, &starttime, &endtime);
|
2002-11-15 22:36:57 +00:00
|
|
|
printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n",
|
|
|
|
vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec,
|
2001-12-14 00:15:06 +00:00
|
|
|
endtime.tv_nsec / 1000000, redo, fs->fs_ncg);
|
|
|
|
}
|
2019-05-28 16:32:04 +00:00
|
|
|
#endif
|
2011-09-27 17:41:48 +00:00
|
|
|
if (copy_fs == NULL)
|
2002-02-02 01:42:44 +00:00
|
|
|
goto out;
|
|
|
|
/*
|
|
|
|
* Copy allocation information from all the snapshots in
|
|
|
|
* this snapshot and then expunge them from its view.
|
|
|
|
*/
|
2004-09-13 07:29:45 +00:00
|
|
|
TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) {
|
2002-02-02 01:42:44 +00:00
|
|
|
if (xp == ip)
|
|
|
|
break;
|
2016-09-17 16:47:34 +00:00
|
|
|
if (I_IS_UFS1(xp))
|
2002-06-21 06:18:05 +00:00
|
|
|
error = expunge_ufs1(vp, xp, fs, snapacct_ufs1,
|
2010-04-24 07:05:35 +00:00
|
|
|
BLK_SNAP, 0);
|
2002-06-21 06:18:05 +00:00
|
|
|
else
|
|
|
|
error = expunge_ufs2(vp, xp, fs, snapacct_ufs2,
|
2010-04-24 07:05:35 +00:00
|
|
|
BLK_SNAP, 0);
|
2006-05-13 20:41:37 +00:00
|
|
|
if (error == 0 && xp->i_effnlink == 0) {
|
|
|
|
error = ffs_freefile(ump,
|
|
|
|
copy_fs,
|
|
|
|
vp,
|
|
|
|
xp->i_number,
|
2010-04-24 07:05:35 +00:00
|
|
|
xp->i_mode, NULL);
|
2006-05-13 20:41:37 +00:00
|
|
|
}
|
2002-06-21 06:18:05 +00:00
|
|
|
if (error) {
|
2002-02-02 01:42:44 +00:00
|
|
|
fs->fs_snapinum[snaploc] = 0;
|
|
|
|
goto done;
|
2001-05-04 05:49:28 +00:00
|
|
|
}
|
2002-02-02 01:42:44 +00:00
|
|
|
}
|
2002-10-09 06:13:48 +00:00
|
|
|
/*
|
2003-02-22 00:59:34 +00:00
|
|
|
* Allocate space for the full list of preallocated snapshot blocks.
|
2002-10-09 06:13:48 +00:00
|
|
|
*/
|
2018-01-24 16:44:57 +00:00
|
|
|
snapblklist = malloc(snaplistsize * sizeof(daddr_t),
|
2003-02-19 05:47:46 +00:00
|
|
|
M_UFSMNT, M_WAITOK);
|
2002-12-15 19:25:59 +00:00
|
|
|
ip->i_snapblklist = &snapblklist[1];
|
2002-02-02 01:42:44 +00:00
|
|
|
/*
|
|
|
|
* Expunge the blocks used by the snapshots from the set of
|
2002-10-09 06:13:48 +00:00
|
|
|
* blocks marked as used in the snapshot bitmaps. Also, collect
|
2002-12-15 19:25:59 +00:00
|
|
|
* the list of allocated blocks in i_snapblklist.
|
2002-02-02 01:42:44 +00:00
|
|
|
*/
|
2016-09-17 16:47:34 +00:00
|
|
|
if (I_IS_UFS1(ip))
|
2010-04-24 07:05:35 +00:00
|
|
|
error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1,
|
|
|
|
BLK_SNAP, 0);
|
2002-06-21 06:18:05 +00:00
|
|
|
else
|
2010-04-24 07:05:35 +00:00
|
|
|
error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2,
|
|
|
|
BLK_SNAP, 0);
|
2002-06-21 06:18:05 +00:00
|
|
|
if (error) {
|
2002-02-02 01:42:44 +00:00
|
|
|
fs->fs_snapinum[snaploc] = 0;
|
2008-10-23 15:53:51 +00:00
|
|
|
free(snapblklist, M_UFSMNT);
|
2002-02-02 01:42:44 +00:00
|
|
|
goto done;
|
|
|
|
}
|
2003-02-22 00:59:34 +00:00
|
|
|
if (snaplistsize < ip->i_snapblklist - snapblklist)
|
|
|
|
panic("ffs_snapshot: list too small");
|
2002-12-15 19:25:59 +00:00
|
|
|
snaplistsize = ip->i_snapblklist - snapblklist;
|
2002-12-14 01:36:59 +00:00
|
|
|
snapblklist[0] = snaplistsize;
|
2002-12-15 19:25:59 +00:00
|
|
|
ip->i_snapblklist = 0;
|
2002-10-09 06:13:48 +00:00
|
|
|
/*
|
|
|
|
* Write out the list of allocated blocks to the end of the snapshot.
|
|
|
|
*/
|
|
|
|
auio.uio_iov = &aiov;
|
|
|
|
auio.uio_iovcnt = 1;
|
2002-12-14 01:36:59 +00:00
|
|
|
aiov.iov_base = (void *)snapblklist;
|
|
|
|
aiov.iov_len = snaplistsize * sizeof(daddr_t);
|
2010-01-07 21:01:37 +00:00
|
|
|
auio.uio_resid = aiov.iov_len;
|
2021-02-11 21:31:16 -08:00
|
|
|
auio.uio_offset = lblktosize(fs, (off_t)numblks);
|
2002-10-09 06:13:48 +00:00
|
|
|
auio.uio_segflg = UIO_SYSSPACE;
|
|
|
|
auio.uio_rw = UIO_WRITE;
|
|
|
|
auio.uio_td = td;
|
|
|
|
if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) {
|
|
|
|
fs->fs_snapinum[snaploc] = 0;
|
2008-10-23 15:53:51 +00:00
|
|
|
free(snapblklist, M_UFSMNT);
|
2002-10-09 06:13:48 +00:00
|
|
|
goto done;
|
|
|
|
}
|
2002-02-02 01:42:44 +00:00
|
|
|
/*
|
|
|
|
* Write the superblock and its summary information
|
|
|
|
* to the snapshot.
|
|
|
|
*/
|
|
|
|
blkno = fragstoblks(fs, fs->fs_csaddr);
|
|
|
|
len = howmany(fs->fs_cssize, fs->fs_bsize);
|
|
|
|
space = copy_fs->fs_csp;
|
|
|
|
for (loc = 0; loc < len; loc++) {
|
|
|
|
error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp);
|
|
|
|
if (error) {
|
|
|
|
fs->fs_snapinum[snaploc] = 0;
|
2008-10-23 15:53:51 +00:00
|
|
|
free(snapblklist, M_UFSMNT);
|
2002-02-02 01:42:44 +00:00
|
|
|
goto done;
|
2001-05-04 05:49:28 +00:00
|
|
|
}
|
2002-02-02 01:42:44 +00:00
|
|
|
bcopy(space, nbp->b_data, fs->fs_bsize);
|
|
|
|
space = (char *)space + fs->fs_bsize;
|
|
|
|
bawrite(nbp);
|
2001-05-04 05:49:28 +00:00
|
|
|
}
|
2011-09-27 17:41:48 +00:00
|
|
|
error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize,
|
|
|
|
KERNCRED, &nbp);
|
|
|
|
if (error) {
|
|
|
|
brelse(nbp);
|
|
|
|
} else {
|
|
|
|
loc = blkoff(fs, fs->fs_sblockloc);
|
2019-11-28 00:37:43 +00:00
|
|
|
copy_fs->fs_fmod = 0;
|
2018-11-25 18:01:15 +00:00
|
|
|
copy_fs->fs_ckhash = ffs_calc_sbhash(copy_fs);
|
2013-07-12 18:52:33 +00:00
|
|
|
bcopy((char *)copy_fs, &nbp->b_data[loc], (u_int)fs->fs_sbsize);
|
2011-09-27 17:41:48 +00:00
|
|
|
bawrite(nbp);
|
|
|
|
}
|
2002-12-14 01:36:59 +00:00
|
|
|
/*
|
|
|
|
* As this is the newest list, it is the most inclusive, so
|
|
|
|
* should replace the previous list.
|
|
|
|
*/
|
|
|
|
VI_LOCK(devvp);
|
2004-09-13 07:29:45 +00:00
|
|
|
space = sn->sn_blklist;
|
|
|
|
sn->sn_blklist = snapblklist;
|
|
|
|
sn->sn_listsize = snaplistsize;
|
2003-11-13 03:56:32 +00:00
|
|
|
VI_UNLOCK(devvp);
|
2003-02-22 00:59:34 +00:00
|
|
|
if (space != NULL)
|
2008-10-23 15:53:51 +00:00
|
|
|
free(space, M_UFSMNT);
|
2002-02-02 01:42:44 +00:00
|
|
|
done:
|
2008-10-23 15:53:51 +00:00
|
|
|
free(copy_fs->fs_csp, M_UFSMNT);
|
2020-06-19 01:02:53 +00:00
|
|
|
free(copy_fs->fs_si, M_UFSMNT);
|
2011-09-27 17:41:48 +00:00
|
|
|
free(copy_fs, M_UFSMNT);
|
|
|
|
copy_fs = NULL;
|
2000-07-11 22:07:57 +00:00
|
|
|
out:
|
2007-04-10 09:31:42 +00:00
|
|
|
NDFREE(&nd, NDF_ONLY_PNBUF);
|
2003-04-22 20:45:38 +00:00
|
|
|
if (saved_nice > 0) {
|
Commit 14/14 of sched_lock decomposition.
- Use thread_lock() rather than sched_lock for per-thread scheduling
sychronization.
- Use the per-process spinlock rather than the sched_lock for per-process
scheduling synchronization.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-05 00:00:57 +00:00
|
|
|
struct proc *p;
|
|
|
|
|
|
|
|
p = td->td_proc;
|
|
|
|
PROC_LOCK(p);
|
2004-06-16 00:26:31 +00:00
|
|
|
sched_nice(td->td_proc, saved_nice);
|
2003-04-22 20:45:38 +00:00
|
|
|
PROC_UNLOCK(td->td_proc);
|
|
|
|
}
|
2005-01-24 10:10:11 +00:00
|
|
|
UFS_LOCK(ump);
|
2001-12-14 00:15:06 +00:00
|
|
|
if (fs->fs_active != 0) {
|
2008-10-23 15:53:51 +00:00
|
|
|
free(fs->fs_active, M_DEVBUF);
|
2001-12-14 00:15:06 +00:00
|
|
|
fs->fs_active = 0;
|
|
|
|
}
|
2005-01-24 10:10:11 +00:00
|
|
|
UFS_UNLOCK(ump);
|
2006-09-26 04:12:49 +00:00
|
|
|
MNT_ILOCK(mp);
|
2006-09-26 04:19:11 +00:00
|
|
|
mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA);
|
2006-09-26 04:12:49 +00:00
|
|
|
MNT_IUNLOCK(mp);
|
2001-05-04 05:49:28 +00:00
|
|
|
if (error)
|
2012-04-23 13:21:28 +00:00
|
|
|
(void) ffs_truncate(vp, (off_t)0, 0, NOCRED);
|
2012-03-25 00:02:37 +00:00
|
|
|
(void) ffs_syncvnode(vp, MNT_WAIT, 0);
|
2000-07-11 22:07:57 +00:00
|
|
|
if (error)
|
|
|
|
vput(vp);
|
|
|
|
else
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(vp);
|
2006-03-19 21:05:10 +00:00
|
|
|
vrele(nd.ni_dvp);
|
2000-07-11 22:07:57 +00:00
|
|
|
vn_finished_write(wrtmp);
|
2006-03-11 01:08:37 +00:00
|
|
|
process_deferred_inactive(mp);
|
2000-07-11 22:07:57 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2001-12-14 00:15:06 +00:00
|
|
|
/*
|
|
|
|
* Copy a cylinder group map. All the unallocated blocks are marked
|
|
|
|
* BLK_NOCOPY so that the snapshot knows that it need not copy them
|
2002-03-15 18:49:47 +00:00
|
|
|
* if they are later written. If passno is one, then this is a first
|
|
|
|
* pass, so only setting needs to be done. If passno is 2, then this
|
2001-12-14 00:15:06 +00:00
|
|
|
* is a revision to a previous pass which must be undone as the
|
|
|
|
* replacement pass is done.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
cgaccount(cg, vp, nbp, passno)
|
|
|
|
int cg;
|
|
|
|
struct vnode *vp;
|
|
|
|
struct buf *nbp;
|
|
|
|
int passno;
|
|
|
|
{
|
|
|
|
struct buf *bp, *ibp;
|
|
|
|
struct inode *ip;
|
|
|
|
struct cg *cgp;
|
|
|
|
struct fs *fs;
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs2_daddr_t base, numblks;
|
|
|
|
int error, len, loc, indiroff;
|
2001-12-14 00:15:06 +00:00
|
|
|
|
|
|
|
ip = VTOI(vp);
|
2016-09-17 16:47:34 +00:00
|
|
|
fs = ITOFS(ip);
|
2019-10-04 05:28:36 +00:00
|
|
|
if ((error = ffs_getcg(fs, ITODEVVP(ip), cg, 0, &bp, &cgp)) != 0)
|
2001-12-14 00:15:06 +00:00
|
|
|
return (error);
|
2016-09-17 16:47:34 +00:00
|
|
|
UFS_LOCK(ITOUMP(ip));
|
2005-03-01 07:38:45 +00:00
|
|
|
ACTIVESET(fs, cg);
|
2008-10-13 14:05:01 +00:00
|
|
|
/*
|
|
|
|
* Recomputation of summary information might not have been performed
|
|
|
|
* at mount time. Sync up summary information for current cylinder
|
|
|
|
* group while data is in memory to ensure that result of background
|
|
|
|
* fsck is slightly more consistent.
|
|
|
|
*/
|
|
|
|
fs->fs_cs(fs, cg) = cgp->cg_cs;
|
2016-09-17 16:47:34 +00:00
|
|
|
UFS_UNLOCK(ITOUMP(ip));
|
2001-12-14 00:15:06 +00:00
|
|
|
bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize);
|
|
|
|
if (fs->fs_cgsize < fs->fs_bsize)
|
|
|
|
bzero(&nbp->b_data[fs->fs_cgsize],
|
|
|
|
fs->fs_bsize - fs->fs_cgsize);
|
2005-10-09 20:00:16 +00:00
|
|
|
cgp = (struct cg *)nbp->b_data;
|
|
|
|
bqrelse(bp);
|
2001-12-14 00:15:06 +00:00
|
|
|
if (passno == 2)
|
|
|
|
nbp->b_flags |= B_VALIDSUSPWRT;
|
|
|
|
numblks = howmany(fs->fs_size, fs->fs_frag);
|
|
|
|
len = howmany(fs->fs_fpg, fs->fs_frag);
|
2004-12-09 21:24:00 +00:00
|
|
|
base = cgbase(fs, cg) / fs->fs_frag;
|
2001-12-14 00:15:06 +00:00
|
|
|
if (base + len >= numblks)
|
|
|
|
len = numblks - base - 1;
|
|
|
|
loc = 0;
|
2017-02-15 19:50:26 +00:00
|
|
|
if (base < UFS_NDADDR) {
|
|
|
|
for ( ; loc < UFS_NDADDR; loc++) {
|
2001-12-14 00:15:06 +00:00
|
|
|
if (ffs_isblock(fs, cg_blksfree(cgp), loc))
|
2004-07-28 06:41:27 +00:00
|
|
|
DIP_SET(ip, i_db[loc], BLK_NOCOPY);
|
2002-06-21 06:18:05 +00:00
|
|
|
else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY)
|
2004-07-28 06:41:27 +00:00
|
|
|
DIP_SET(ip, i_db[loc], 0);
|
2002-06-21 06:18:05 +00:00
|
|
|
else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY)
|
2001-12-14 00:15:06 +00:00
|
|
|
panic("ffs_snapshot: lost direct block");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)),
|
2002-06-23 06:12:22 +00:00
|
|
|
fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
|
2001-12-14 00:15:06 +00:00
|
|
|
if (error) {
|
2017-09-22 12:45:15 +00:00
|
|
|
goto out;
|
2001-12-14 00:15:06 +00:00
|
|
|
}
|
2017-02-15 19:50:26 +00:00
|
|
|
indiroff = (base + loc - UFS_NDADDR) % NINDIR(fs);
|
2001-12-14 00:15:06 +00:00
|
|
|
for ( ; loc < len; loc++, indiroff++) {
|
|
|
|
if (indiroff >= NINDIR(fs)) {
|
|
|
|
if (passno == 2)
|
|
|
|
ibp->b_flags |= B_VALIDSUSPWRT;
|
|
|
|
bawrite(ibp);
|
|
|
|
error = UFS_BALLOC(vp,
|
|
|
|
lblktosize(fs, (off_t)(base + loc)),
|
2002-06-23 06:12:22 +00:00
|
|
|
fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
|
2001-12-14 00:15:06 +00:00
|
|
|
if (error) {
|
2017-09-22 12:45:15 +00:00
|
|
|
goto out;
|
2001-12-14 00:15:06 +00:00
|
|
|
}
|
|
|
|
indiroff = 0;
|
|
|
|
}
|
2016-09-17 16:47:34 +00:00
|
|
|
if (I_IS_UFS1(ip)) {
|
2002-06-21 06:18:05 +00:00
|
|
|
if (ffs_isblock(fs, cg_blksfree(cgp), loc))
|
|
|
|
((ufs1_daddr_t *)(ibp->b_data))[indiroff] =
|
|
|
|
BLK_NOCOPY;
|
|
|
|
else if (passno == 2 && ((ufs1_daddr_t *)(ibp->b_data))
|
|
|
|
[indiroff] == BLK_NOCOPY)
|
|
|
|
((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 0;
|
|
|
|
else if (passno == 1 && ((ufs1_daddr_t *)(ibp->b_data))
|
|
|
|
[indiroff] == BLK_NOCOPY)
|
|
|
|
panic("ffs_snapshot: lost indirect block");
|
|
|
|
continue;
|
|
|
|
}
|
2001-12-14 00:15:06 +00:00
|
|
|
if (ffs_isblock(fs, cg_blksfree(cgp), loc))
|
2002-06-21 06:18:05 +00:00
|
|
|
((ufs2_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY;
|
2001-12-14 00:15:06 +00:00
|
|
|
else if (passno == 2 &&
|
2002-06-21 06:18:05 +00:00
|
|
|
((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY)
|
|
|
|
((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 0;
|
2001-12-14 00:15:06 +00:00
|
|
|
else if (passno == 1 &&
|
2002-06-21 06:18:05 +00:00
|
|
|
((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY)
|
2001-12-14 00:15:06 +00:00
|
|
|
panic("ffs_snapshot: lost indirect block");
|
|
|
|
}
|
|
|
|
if (passno == 2)
|
|
|
|
ibp->b_flags |= B_VALIDSUSPWRT;
|
|
|
|
bdwrite(ibp);
|
2017-09-22 12:45:15 +00:00
|
|
|
out:
|
|
|
|
/*
|
|
|
|
* We have to calculate the crc32c here rather than just setting the
|
|
|
|
* BX_CYLGRP b_xflags because the allocation of the block for the
|
|
|
|
* the cylinder group map will always be a full size block (fs_bsize)
|
|
|
|
* even though the cylinder group may be smaller (fs_cgsize). The
|
|
|
|
* crc32c must be computed only over fs_cgsize whereas the BX_CYLGRP
|
|
|
|
* flag causes it to be computed over the size of the buffer.
|
|
|
|
*/
|
|
|
|
if ((fs->fs_metackhash & CK_CYLGRP) != 0) {
|
|
|
|
((struct cg *)nbp->b_data)->cg_ckhash = 0;
|
|
|
|
((struct cg *)nbp->b_data)->cg_ckhash =
|
|
|
|
calculate_crc32c(~0L, nbp->b_data, fs->fs_cgsize);
|
|
|
|
}
|
|
|
|
return (error);
|
2001-12-14 00:15:06 +00:00
|
|
|
}
|
|
|
|
|
2001-05-04 05:49:28 +00:00
|
|
|
/*
|
|
|
|
* Before expunging a snapshot inode, note all the
|
|
|
|
* blocks that it claims with BLK_SNAP so that fsck will
|
|
|
|
* be able to account for those blocks properly and so
|
|
|
|
* that this snapshot knows that it need not copy them
|
2002-06-21 06:18:05 +00:00
|
|
|
* if the other snapshot holding them is freed. This code
|
|
|
|
* is reproduced once each for UFS1 and UFS2.
|
2001-05-04 05:49:28 +00:00
|
|
|
*/
|
|
|
|
static int
|
2010-04-24 07:05:35 +00:00
|
|
|
expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
|
2002-02-02 01:42:44 +00:00
|
|
|
struct vnode *snapvp;
|
|
|
|
struct inode *cancelip;
|
2001-05-04 05:49:28 +00:00
|
|
|
struct fs *fs;
|
2002-06-21 06:18:05 +00:00
|
|
|
int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
|
|
|
|
struct fs *, ufs_lbn_t, int);
|
2002-02-02 01:42:44 +00:00
|
|
|
int expungetype;
|
2010-04-24 07:05:35 +00:00
|
|
|
int clearmode;
|
2001-05-04 05:49:28 +00:00
|
|
|
{
|
2002-06-21 06:18:05 +00:00
|
|
|
int i, error, indiroff;
|
|
|
|
ufs_lbn_t lbn, rlbn;
|
|
|
|
ufs2_daddr_t len, blkno, numblks, blksperindir;
|
|
|
|
struct ufs1_dinode *dip;
|
2002-02-02 01:42:44 +00:00
|
|
|
struct thread *td = curthread;
|
2001-05-04 05:49:28 +00:00
|
|
|
struct buf *bp;
|
|
|
|
|
|
|
|
/*
|
2002-02-02 01:42:44 +00:00
|
|
|
* Prepare to expunge the inode. If its inode block has not
|
|
|
|
* yet been copied, then allocate and fill the copy.
|
2001-05-04 05:49:28 +00:00
|
|
|
*/
|
2002-02-02 01:42:44 +00:00
|
|
|
lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
|
|
|
|
blkno = 0;
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn < UFS_NDADDR) {
|
2002-12-03 18:19:27 +00:00
|
|
|
blkno = VTOI(snapvp)->i_din1->di_db[lbn];
|
2002-02-02 01:42:44 +00:00
|
|
|
} else {
|
2010-05-07 08:45:21 +00:00
|
|
|
if (DOINGSOFTDEP(snapvp))
|
|
|
|
softdep_prealloc(snapvp, MNT_WAIT);
|
2003-10-23 21:14:08 +00:00
|
|
|
td->td_pflags |= TDP_COWINPROGRESS;
|
2005-02-08 17:40:01 +00:00
|
|
|
error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn),
|
2002-06-23 06:12:22 +00:00
|
|
|
fs->fs_bsize, KERNCRED, BA_METAONLY, &bp);
|
2003-10-23 21:14:08 +00:00
|
|
|
td->td_pflags &= ~TDP_COWINPROGRESS;
|
2002-02-02 01:42:44 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
2017-02-15 19:50:26 +00:00
|
|
|
indiroff = (lbn - UFS_NDADDR) % NINDIR(fs);
|
2002-06-21 06:18:05 +00:00
|
|
|
blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff];
|
2002-02-02 01:42:44 +00:00
|
|
|
bqrelse(bp);
|
|
|
|
}
|
2002-12-03 18:19:27 +00:00
|
|
|
if (blkno != 0) {
|
|
|
|
if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp)))
|
|
|
|
return (error);
|
|
|
|
} else {
|
2005-02-08 17:40:01 +00:00
|
|
|
error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn),
|
2002-12-03 18:19:27 +00:00
|
|
|
fs->fs_bsize, KERNCRED, 0, &bp);
|
|
|
|
if (error)
|
|
|
|
return (error);
|
2004-09-13 07:29:45 +00:00
|
|
|
if ((error = readblock(snapvp, bp, lbn)) != 0)
|
2002-12-03 18:19:27 +00:00
|
|
|
return (error);
|
|
|
|
}
|
2002-02-02 01:42:44 +00:00
|
|
|
/*
|
|
|
|
* Set a snapshot inode to be a zero length file, regular files
|
2006-05-13 20:41:37 +00:00
|
|
|
* or unlinked snapshots to be completely unallocated.
|
2002-02-02 01:42:44 +00:00
|
|
|
*/
|
2002-06-21 06:18:05 +00:00
|
|
|
dip = (struct ufs1_dinode *)bp->b_data +
|
|
|
|
ino_to_fsbo(fs, cancelip->i_number);
|
2010-04-24 07:05:35 +00:00
|
|
|
if (clearmode || cancelip->i_effnlink == 0)
|
2002-02-02 01:42:44 +00:00
|
|
|
dip->di_mode = 0;
|
2001-05-04 05:49:28 +00:00
|
|
|
dip->di_size = 0;
|
|
|
|
dip->di_blocks = 0;
|
|
|
|
dip->di_flags &= ~SF_SNAPSHOT;
|
2017-02-15 19:50:26 +00:00
|
|
|
bzero(&dip->di_db[0], (UFS_NDADDR + UFS_NIADDR) * sizeof(ufs1_daddr_t));
|
2001-05-04 05:49:28 +00:00
|
|
|
bdwrite(bp);
|
2002-12-14 01:36:59 +00:00
|
|
|
/*
|
|
|
|
* Now go through and expunge all the blocks in the file
|
|
|
|
* using the function requested.
|
|
|
|
*/
|
|
|
|
numblks = howmany(cancelip->i_size, fs->fs_bsize);
|
|
|
|
if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0],
|
2017-02-15 19:50:26 +00:00
|
|
|
&cancelip->i_din1->di_db[UFS_NDADDR], fs, 0, expungetype)))
|
2002-12-14 01:36:59 +00:00
|
|
|
return (error);
|
|
|
|
if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_ib[0],
|
2017-02-15 19:50:26 +00:00
|
|
|
&cancelip->i_din1->di_ib[UFS_NIADDR], fs, -1, expungetype)))
|
2002-12-14 01:36:59 +00:00
|
|
|
return (error);
|
|
|
|
blksperindir = 1;
|
2017-02-15 19:50:26 +00:00
|
|
|
lbn = -UFS_NDADDR;
|
|
|
|
len = numblks - UFS_NDADDR;
|
|
|
|
rlbn = UFS_NDADDR;
|
|
|
|
for (i = 0; len > 0 && i < UFS_NIADDR; i++) {
|
2002-12-14 01:36:59 +00:00
|
|
|
error = indiracct_ufs1(snapvp, ITOV(cancelip), i,
|
|
|
|
cancelip->i_din1->di_ib[i], lbn, rlbn, len,
|
|
|
|
blksperindir, fs, acctfunc, expungetype);
|
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
blksperindir *= NINDIR(fs);
|
|
|
|
lbn -= blksperindir + 1;
|
|
|
|
len -= blksperindir;
|
|
|
|
rlbn += blksperindir;
|
|
|
|
}
|
2001-05-04 05:49:28 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* Descend an indirect block chain for vnode cancelvp accounting for all
|
|
|
|
* its indirect blocks in snapvp.
|
|
|
|
*/
|
|
|
|
static int
|
2002-06-21 06:18:05 +00:00
|
|
|
indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks,
|
|
|
|
blksperindir, fs, acctfunc, expungetype)
|
2000-07-11 22:07:57 +00:00
|
|
|
struct vnode *snapvp;
|
|
|
|
struct vnode *cancelvp;
|
|
|
|
int level;
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs1_daddr_t blkno;
|
|
|
|
ufs_lbn_t lbn;
|
|
|
|
ufs_lbn_t rlbn;
|
|
|
|
ufs_lbn_t remblks;
|
|
|
|
ufs_lbn_t blksperindir;
|
2001-05-04 05:49:28 +00:00
|
|
|
struct fs *fs;
|
2002-06-21 06:18:05 +00:00
|
|
|
int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
|
|
|
|
struct fs *, ufs_lbn_t, int);
|
2002-02-02 01:42:44 +00:00
|
|
|
int expungetype;
|
2000-07-11 22:07:57 +00:00
|
|
|
{
|
2002-06-21 06:18:05 +00:00
|
|
|
int error, num, i;
|
|
|
|
ufs_lbn_t subblksperindir;
|
2017-02-15 19:50:26 +00:00
|
|
|
struct indir indirs[UFS_NIADDR + 2];
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs1_daddr_t last, *bap;
|
2000-07-11 22:07:57 +00:00
|
|
|
struct buf *bp;
|
|
|
|
|
2003-10-17 13:57:58 +00:00
|
|
|
if (blkno == 0) {
|
|
|
|
if (expungetype == BLK_NOCOPY)
|
|
|
|
return (0);
|
|
|
|
panic("indiracct_ufs1: missing indir");
|
|
|
|
}
|
2000-07-11 22:07:57 +00:00
|
|
|
if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
|
|
|
|
return (error);
|
2003-10-17 13:57:58 +00:00
|
|
|
if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
|
|
|
|
panic("indiracct_ufs1: botched params");
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* We have to expand bread here since it will deadlock looking
|
|
|
|
* up the block number for any blocks that are not in the cache.
|
|
|
|
*/
|
2003-03-04 00:04:44 +00:00
|
|
|
bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0);
|
2000-07-11 22:07:57 +00:00
|
|
|
bp->b_blkno = fsbtodb(fs, blkno);
|
|
|
|
if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
|
2004-09-13 07:29:45 +00:00
|
|
|
(error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) {
|
2000-07-11 22:07:57 +00:00
|
|
|
brelse(bp);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Account for the block pointers in this indirect block.
|
|
|
|
*/
|
|
|
|
last = howmany(remblks, blksperindir);
|
|
|
|
if (last > NINDIR(fs))
|
|
|
|
last = NINDIR(fs);
|
2008-10-23 15:53:51 +00:00
|
|
|
bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK);
|
2001-05-04 05:49:28 +00:00
|
|
|
bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
|
|
|
|
bqrelse(bp);
|
2002-12-14 01:36:59 +00:00
|
|
|
error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
|
|
|
|
level == 0 ? rlbn : -1, expungetype);
|
2000-07-11 22:07:57 +00:00
|
|
|
if (error || level == 0)
|
|
|
|
goto out;
|
|
|
|
/*
|
|
|
|
* Account for the block pointers in each of the indirect blocks
|
|
|
|
* in the levels below us.
|
|
|
|
*/
|
|
|
|
subblksperindir = blksperindir / NINDIR(fs);
|
|
|
|
for (lbn++, level--, i = 0; i < last; i++) {
|
2002-06-21 06:18:05 +00:00
|
|
|
error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn,
|
2002-02-02 01:42:44 +00:00
|
|
|
rlbn, remblks, subblksperindir, fs, acctfunc, expungetype);
|
2000-07-11 22:07:57 +00:00
|
|
|
if (error)
|
|
|
|
goto out;
|
|
|
|
rlbn += blksperindir;
|
|
|
|
lbn -= blksperindir;
|
|
|
|
remblks -= blksperindir;
|
|
|
|
}
|
|
|
|
out:
|
2008-10-23 15:53:51 +00:00
|
|
|
free(bap, M_DEVBUF);
|
2000-07-11 22:07:57 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2002-02-02 01:42:44 +00:00
|
|
|
/*
|
|
|
|
* Do both snap accounting and map accounting.
|
|
|
|
*/
|
|
|
|
static int
|
2002-06-21 06:18:05 +00:00
|
|
|
fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)
|
|
|
|
struct vnode *vp;
|
|
|
|
ufs1_daddr_t *oldblkp, *lastblkp;
|
|
|
|
struct fs *fs;
|
|
|
|
ufs_lbn_t lblkno;
|
|
|
|
int exptype; /* BLK_SNAP or BLK_NOCOPY */
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
|
|
|
|
return (error);
|
|
|
|
return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Identify a set of blocks allocated in a snapshot inode.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
|
2002-02-02 01:42:44 +00:00
|
|
|
struct vnode *vp;
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs1_daddr_t *oldblkp, *lastblkp;
|
2002-02-02 01:42:44 +00:00
|
|
|
struct fs *fs;
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs_lbn_t lblkno;
|
2002-02-02 01:42:44 +00:00
|
|
|
int expungetype; /* BLK_SNAP or BLK_NOCOPY */
|
2002-06-21 06:18:05 +00:00
|
|
|
{
|
|
|
|
struct inode *ip = VTOI(vp);
|
|
|
|
ufs1_daddr_t blkno, *blkp;
|
|
|
|
ufs_lbn_t lbn;
|
|
|
|
struct buf *ibp;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
for ( ; oldblkp < lastblkp; oldblkp++) {
|
|
|
|
blkno = *oldblkp;
|
|
|
|
if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
|
|
|
|
continue;
|
|
|
|
lbn = fragstoblks(fs, blkno);
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn < UFS_NDADDR) {
|
2002-06-21 06:18:05 +00:00
|
|
|
blkp = &ip->i_din1->di_db[lbn];
|
2020-01-13 02:31:51 +00:00
|
|
|
UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
|
2002-06-21 06:18:05 +00:00
|
|
|
} else {
|
2005-02-08 17:40:01 +00:00
|
|
|
error = ffs_balloc_ufs1(vp, lblktosize(fs, (off_t)lbn),
|
2002-06-23 06:12:22 +00:00
|
|
|
fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
|
2002-06-21 06:18:05 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
blkp = &((ufs1_daddr_t *)(ibp->b_data))
|
2017-02-15 19:50:26 +00:00
|
|
|
[(lbn - UFS_NDADDR) % NINDIR(fs)];
|
2002-06-21 06:18:05 +00:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If we are expunging a snapshot vnode and we
|
|
|
|
* find a block marked BLK_NOCOPY, then it is
|
|
|
|
* one that has been allocated to this snapshot after
|
|
|
|
* we took our current snapshot and can be ignored.
|
|
|
|
*/
|
|
|
|
if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) {
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn >= UFS_NDADDR)
|
2002-06-21 06:18:05 +00:00
|
|
|
brelse(ibp);
|
|
|
|
} else {
|
|
|
|
if (*blkp != 0)
|
2003-10-17 13:57:58 +00:00
|
|
|
panic("snapacct_ufs1: bad block");
|
2002-06-21 06:18:05 +00:00
|
|
|
*blkp = expungetype;
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn >= UFS_NDADDR)
|
2002-06-21 06:18:05 +00:00
|
|
|
bdwrite(ibp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Account for a set of blocks allocated in a snapshot inode.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
|
|
|
|
struct vnode *vp;
|
|
|
|
ufs1_daddr_t *oldblkp, *lastblkp;
|
|
|
|
struct fs *fs;
|
|
|
|
ufs_lbn_t lblkno;
|
|
|
|
int expungetype;
|
|
|
|
{
|
|
|
|
ufs1_daddr_t blkno;
|
2002-10-09 06:13:48 +00:00
|
|
|
struct inode *ip;
|
2002-06-21 06:18:05 +00:00
|
|
|
ino_t inum;
|
2002-12-18 19:50:28 +00:00
|
|
|
int acctit;
|
2002-06-21 06:18:05 +00:00
|
|
|
|
2002-10-09 06:13:48 +00:00
|
|
|
ip = VTOI(vp);
|
|
|
|
inum = ip->i_number;
|
2002-12-18 19:50:28 +00:00
|
|
|
if (lblkno == -1)
|
|
|
|
acctit = 0;
|
|
|
|
else
|
|
|
|
acctit = 1;
|
2002-06-21 06:18:05 +00:00
|
|
|
for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
|
|
|
|
blkno = *oldblkp;
|
|
|
|
if (blkno == 0 || blkno == BLK_NOCOPY)
|
|
|
|
continue;
|
2002-12-18 19:50:28 +00:00
|
|
|
if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
|
2002-12-15 19:25:59 +00:00
|
|
|
*ip->i_snapblklist++ = lblkno;
|
2002-06-21 06:18:05 +00:00
|
|
|
if (blkno == BLK_SNAP)
|
|
|
|
blkno = blkstofrags(fs, lblkno);
|
2016-09-17 16:47:34 +00:00
|
|
|
ffs_blkfree(ITOUMP(ip), fs, vp, blkno, fs->fs_bsize, inum,
|
2018-08-18 22:21:59 +00:00
|
|
|
vp->v_type, NULL, SINGLETON_KEY);
|
2002-06-21 06:18:05 +00:00
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Before expunging a snapshot inode, note all the
|
|
|
|
* blocks that it claims with BLK_SNAP so that fsck will
|
|
|
|
* be able to account for those blocks properly and so
|
|
|
|
* that this snapshot knows that it need not copy them
|
|
|
|
* if the other snapshot holding them is freed. This code
|
|
|
|
* is reproduced once each for UFS1 and UFS2.
|
|
|
|
*/
|
|
|
|
static int
|
2010-04-24 07:05:35 +00:00
|
|
|
expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
|
2002-06-21 06:18:05 +00:00
|
|
|
struct vnode *snapvp;
|
|
|
|
struct inode *cancelip;
|
|
|
|
struct fs *fs;
|
|
|
|
int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
|
|
|
|
struct fs *, ufs_lbn_t, int);
|
|
|
|
int expungetype;
|
2010-04-24 07:05:35 +00:00
|
|
|
int clearmode;
|
2002-06-21 06:18:05 +00:00
|
|
|
{
|
|
|
|
int i, error, indiroff;
|
|
|
|
ufs_lbn_t lbn, rlbn;
|
|
|
|
ufs2_daddr_t len, blkno, numblks, blksperindir;
|
|
|
|
struct ufs2_dinode *dip;
|
|
|
|
struct thread *td = curthread;
|
|
|
|
struct buf *bp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prepare to expunge the inode. If its inode block has not
|
|
|
|
* yet been copied, then allocate and fill the copy.
|
|
|
|
*/
|
|
|
|
lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
|
|
|
|
blkno = 0;
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn < UFS_NDADDR) {
|
2002-12-03 18:19:27 +00:00
|
|
|
blkno = VTOI(snapvp)->i_din2->di_db[lbn];
|
2002-06-21 06:18:05 +00:00
|
|
|
} else {
|
2010-05-07 08:45:21 +00:00
|
|
|
if (DOINGSOFTDEP(snapvp))
|
|
|
|
softdep_prealloc(snapvp, MNT_WAIT);
|
2003-10-23 21:14:08 +00:00
|
|
|
td->td_pflags |= TDP_COWINPROGRESS;
|
2005-02-08 17:40:01 +00:00
|
|
|
error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn),
|
2002-06-23 06:12:22 +00:00
|
|
|
fs->fs_bsize, KERNCRED, BA_METAONLY, &bp);
|
2003-10-23 21:14:08 +00:00
|
|
|
td->td_pflags &= ~TDP_COWINPROGRESS;
|
2002-06-21 06:18:05 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
2017-02-15 19:50:26 +00:00
|
|
|
indiroff = (lbn - UFS_NDADDR) % NINDIR(fs);
|
2002-06-21 06:18:05 +00:00
|
|
|
blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff];
|
|
|
|
bqrelse(bp);
|
|
|
|
}
|
2002-12-03 18:19:27 +00:00
|
|
|
if (blkno != 0) {
|
|
|
|
if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp)))
|
|
|
|
return (error);
|
|
|
|
} else {
|
2005-02-08 17:40:01 +00:00
|
|
|
error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn),
|
2002-12-03 18:19:27 +00:00
|
|
|
fs->fs_bsize, KERNCRED, 0, &bp);
|
|
|
|
if (error)
|
|
|
|
return (error);
|
2004-09-13 07:29:45 +00:00
|
|
|
if ((error = readblock(snapvp, bp, lbn)) != 0)
|
2002-12-03 18:19:27 +00:00
|
|
|
return (error);
|
|
|
|
}
|
2002-06-21 06:18:05 +00:00
|
|
|
/*
|
|
|
|
* Set a snapshot inode to be a zero length file, regular files
|
|
|
|
* to be completely unallocated.
|
|
|
|
*/
|
|
|
|
dip = (struct ufs2_dinode *)bp->b_data +
|
|
|
|
ino_to_fsbo(fs, cancelip->i_number);
|
|
|
|
dip->di_size = 0;
|
|
|
|
dip->di_blocks = 0;
|
|
|
|
dip->di_flags &= ~SF_SNAPSHOT;
|
2017-02-15 19:50:26 +00:00
|
|
|
bzero(&dip->di_db[0], (UFS_NDADDR + UFS_NIADDR) * sizeof(ufs2_daddr_t));
|
2018-11-13 21:40:56 +00:00
|
|
|
if (clearmode || cancelip->i_effnlink == 0)
|
|
|
|
dip->di_mode = 0;
|
2018-12-11 22:14:37 +00:00
|
|
|
else
|
|
|
|
ffs_update_dinode_ckhash(fs, dip);
|
2002-06-21 06:18:05 +00:00
|
|
|
bdwrite(bp);
|
2002-12-14 01:36:59 +00:00
|
|
|
/*
|
|
|
|
* Now go through and expunge all the blocks in the file
|
|
|
|
* using the function requested.
|
|
|
|
*/
|
|
|
|
numblks = howmany(cancelip->i_size, fs->fs_bsize);
|
|
|
|
if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0],
|
2017-02-15 19:50:26 +00:00
|
|
|
&cancelip->i_din2->di_db[UFS_NDADDR], fs, 0, expungetype)))
|
2002-12-14 01:36:59 +00:00
|
|
|
return (error);
|
|
|
|
if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_ib[0],
|
2017-02-15 19:50:26 +00:00
|
|
|
&cancelip->i_din2->di_ib[UFS_NIADDR], fs, -1, expungetype)))
|
2002-12-14 01:36:59 +00:00
|
|
|
return (error);
|
|
|
|
blksperindir = 1;
|
2017-02-15 19:50:26 +00:00
|
|
|
lbn = -UFS_NDADDR;
|
|
|
|
len = numblks - UFS_NDADDR;
|
|
|
|
rlbn = UFS_NDADDR;
|
|
|
|
for (i = 0; len > 0 && i < UFS_NIADDR; i++) {
|
2002-12-14 01:36:59 +00:00
|
|
|
error = indiracct_ufs2(snapvp, ITOV(cancelip), i,
|
|
|
|
cancelip->i_din2->di_ib[i], lbn, rlbn, len,
|
|
|
|
blksperindir, fs, acctfunc, expungetype);
|
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
blksperindir *= NINDIR(fs);
|
|
|
|
lbn -= blksperindir + 1;
|
|
|
|
len -= blksperindir;
|
|
|
|
rlbn += blksperindir;
|
|
|
|
}
|
2002-06-21 06:18:05 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Descend an indirect block chain for vnode cancelvp accounting for all
|
|
|
|
* its indirect blocks in snapvp.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks,
|
|
|
|
blksperindir, fs, acctfunc, expungetype)
|
|
|
|
struct vnode *snapvp;
|
|
|
|
struct vnode *cancelvp;
|
|
|
|
int level;
|
|
|
|
ufs2_daddr_t blkno;
|
|
|
|
ufs_lbn_t lbn;
|
|
|
|
ufs_lbn_t rlbn;
|
|
|
|
ufs_lbn_t remblks;
|
|
|
|
ufs_lbn_t blksperindir;
|
|
|
|
struct fs *fs;
|
|
|
|
int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
|
|
|
|
struct fs *, ufs_lbn_t, int);
|
|
|
|
int expungetype;
|
|
|
|
{
|
|
|
|
int error, num, i;
|
|
|
|
ufs_lbn_t subblksperindir;
|
2017-02-15 19:50:26 +00:00
|
|
|
struct indir indirs[UFS_NIADDR + 2];
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs2_daddr_t last, *bap;
|
|
|
|
struct buf *bp;
|
|
|
|
|
2003-10-17 13:57:58 +00:00
|
|
|
if (blkno == 0) {
|
|
|
|
if (expungetype == BLK_NOCOPY)
|
|
|
|
return (0);
|
|
|
|
panic("indiracct_ufs2: missing indir");
|
|
|
|
}
|
2002-06-21 06:18:05 +00:00
|
|
|
if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
|
|
|
|
return (error);
|
2003-10-17 13:57:58 +00:00
|
|
|
if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
|
|
|
|
panic("indiracct_ufs2: botched params");
|
2002-06-21 06:18:05 +00:00
|
|
|
/*
|
|
|
|
* We have to expand bread here since it will deadlock looking
|
|
|
|
* up the block number for any blocks that are not in the cache.
|
|
|
|
*/
|
2003-03-04 00:04:44 +00:00
|
|
|
bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0);
|
2002-06-21 06:18:05 +00:00
|
|
|
bp->b_blkno = fsbtodb(fs, blkno);
|
2017-07-31 20:41:45 +00:00
|
|
|
if ((bp->b_flags & B_CACHE) == 0 &&
|
2004-09-13 07:29:45 +00:00
|
|
|
(error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) {
|
2002-06-21 06:18:05 +00:00
|
|
|
brelse(bp);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Account for the block pointers in this indirect block.
|
|
|
|
*/
|
|
|
|
last = howmany(remblks, blksperindir);
|
|
|
|
if (last > NINDIR(fs))
|
|
|
|
last = NINDIR(fs);
|
2008-10-23 15:53:51 +00:00
|
|
|
bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK);
|
2002-06-21 06:18:05 +00:00
|
|
|
bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
|
|
|
|
bqrelse(bp);
|
2002-12-14 01:36:59 +00:00
|
|
|
error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
|
|
|
|
level == 0 ? rlbn : -1, expungetype);
|
2002-06-21 06:18:05 +00:00
|
|
|
if (error || level == 0)
|
|
|
|
goto out;
|
|
|
|
/*
|
|
|
|
* Account for the block pointers in each of the indirect blocks
|
|
|
|
* in the levels below us.
|
|
|
|
*/
|
|
|
|
subblksperindir = blksperindir / NINDIR(fs);
|
|
|
|
for (lbn++, level--, i = 0; i < last; i++) {
|
|
|
|
error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn,
|
|
|
|
rlbn, remblks, subblksperindir, fs, acctfunc, expungetype);
|
|
|
|
if (error)
|
|
|
|
goto out;
|
|
|
|
rlbn += blksperindir;
|
|
|
|
lbn -= blksperindir;
|
|
|
|
remblks -= blksperindir;
|
|
|
|
}
|
|
|
|
out:
|
2008-10-23 15:53:51 +00:00
|
|
|
free(bap, M_DEVBUF);
|
2002-06-21 06:18:05 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do both snap accounting and map accounting.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)
|
|
|
|
struct vnode *vp;
|
|
|
|
ufs2_daddr_t *oldblkp, *lastblkp;
|
|
|
|
struct fs *fs;
|
|
|
|
ufs_lbn_t lblkno;
|
|
|
|
int exptype; /* BLK_SNAP or BLK_NOCOPY */
|
2002-02-02 01:42:44 +00:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
2002-06-21 06:18:05 +00:00
|
|
|
if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
|
2002-02-02 01:42:44 +00:00
|
|
|
return (error);
|
2002-06-21 06:18:05 +00:00
|
|
|
return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype));
|
2002-02-02 01:42:44 +00:00
|
|
|
}
|
|
|
|
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
2001-12-14 00:15:06 +00:00
|
|
|
* Identify a set of blocks allocated in a snapshot inode.
|
2000-07-11 22:07:57 +00:00
|
|
|
*/
|
|
|
|
static int
|
2002-06-21 06:18:05 +00:00
|
|
|
snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
|
2000-07-11 22:07:57 +00:00
|
|
|
struct vnode *vp;
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs2_daddr_t *oldblkp, *lastblkp;
|
2001-05-04 05:49:28 +00:00
|
|
|
struct fs *fs;
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs_lbn_t lblkno;
|
2002-02-02 01:42:44 +00:00
|
|
|
int expungetype; /* BLK_SNAP or BLK_NOCOPY */
|
2000-07-11 22:07:57 +00:00
|
|
|
{
|
|
|
|
struct inode *ip = VTOI(vp);
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs2_daddr_t blkno, *blkp;
|
|
|
|
ufs_lbn_t lbn;
|
2000-07-11 22:07:57 +00:00
|
|
|
struct buf *ibp;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
for ( ; oldblkp < lastblkp; oldblkp++) {
|
|
|
|
blkno = *oldblkp;
|
|
|
|
if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
|
|
|
|
continue;
|
|
|
|
lbn = fragstoblks(fs, blkno);
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn < UFS_NDADDR) {
|
2002-06-21 06:18:05 +00:00
|
|
|
blkp = &ip->i_din2->di_db[lbn];
|
2020-01-13 02:31:51 +00:00
|
|
|
UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
|
2000-07-11 22:07:57 +00:00
|
|
|
} else {
|
2005-02-08 17:40:01 +00:00
|
|
|
error = ffs_balloc_ufs2(vp, lblktosize(fs, (off_t)lbn),
|
2002-06-23 06:12:22 +00:00
|
|
|
fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
|
2000-07-11 22:07:57 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
2002-06-21 06:18:05 +00:00
|
|
|
blkp = &((ufs2_daddr_t *)(ibp->b_data))
|
2017-02-15 19:50:26 +00:00
|
|
|
[(lbn - UFS_NDADDR) % NINDIR(fs)];
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
2001-12-14 00:15:06 +00:00
|
|
|
/*
|
2002-02-02 01:42:44 +00:00
|
|
|
* If we are expunging a snapshot vnode and we
|
|
|
|
* find a block marked BLK_NOCOPY, then it is
|
2001-12-14 00:15:06 +00:00
|
|
|
* one that has been allocated to this snapshot after
|
|
|
|
* we took our current snapshot and can be ignored.
|
|
|
|
*/
|
2002-02-02 01:42:44 +00:00
|
|
|
if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) {
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn >= UFS_NDADDR)
|
2001-12-14 00:15:06 +00:00
|
|
|
brelse(ibp);
|
|
|
|
} else {
|
|
|
|
if (*blkp != 0)
|
2003-10-17 13:57:58 +00:00
|
|
|
panic("snapacct_ufs2: bad block");
|
2002-02-02 01:42:44 +00:00
|
|
|
*blkp = expungetype;
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn >= UFS_NDADDR)
|
2001-12-14 00:15:06 +00:00
|
|
|
bdwrite(ibp);
|
2000-07-24 05:28:33 +00:00
|
|
|
}
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2001-05-04 05:49:28 +00:00
|
|
|
/*
|
|
|
|
* Account for a set of blocks allocated in a snapshot inode.
|
|
|
|
*/
|
|
|
|
static int
|
2002-06-21 06:18:05 +00:00
|
|
|
mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
|
2001-05-04 05:49:28 +00:00
|
|
|
struct vnode *vp;
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs2_daddr_t *oldblkp, *lastblkp;
|
2001-05-04 05:49:28 +00:00
|
|
|
struct fs *fs;
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs_lbn_t lblkno;
|
2002-02-02 01:42:44 +00:00
|
|
|
int expungetype;
|
2001-05-04 05:49:28 +00:00
|
|
|
{
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs2_daddr_t blkno;
|
2002-10-09 06:13:48 +00:00
|
|
|
struct inode *ip;
|
2002-02-02 01:42:44 +00:00
|
|
|
ino_t inum;
|
2002-12-18 19:50:28 +00:00
|
|
|
int acctit;
|
2001-05-04 05:49:28 +00:00
|
|
|
|
2002-10-09 06:13:48 +00:00
|
|
|
ip = VTOI(vp);
|
|
|
|
inum = ip->i_number;
|
2002-12-18 19:50:28 +00:00
|
|
|
if (lblkno == -1)
|
|
|
|
acctit = 0;
|
|
|
|
else
|
|
|
|
acctit = 1;
|
2001-05-04 05:49:28 +00:00
|
|
|
for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
|
|
|
|
blkno = *oldblkp;
|
|
|
|
if (blkno == 0 || blkno == BLK_NOCOPY)
|
|
|
|
continue;
|
2021-02-11 21:31:16 -08:00
|
|
|
if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP &&
|
|
|
|
lblkno >= UFS_NDADDR)
|
2002-12-15 19:25:59 +00:00
|
|
|
*ip->i_snapblklist++ = lblkno;
|
2001-05-04 05:49:28 +00:00
|
|
|
if (blkno == BLK_SNAP)
|
|
|
|
blkno = blkstofrags(fs, lblkno);
|
2016-09-17 16:47:34 +00:00
|
|
|
ffs_blkfree(ITOUMP(ip), fs, vp, blkno, fs->fs_bsize, inum,
|
2018-08-18 22:21:59 +00:00
|
|
|
vp->v_type, NULL, SINGLETON_KEY);
|
2001-05-04 05:49:28 +00:00
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2000-12-19 04:41:09 +00:00
|
|
|
/*
|
|
|
|
* Decrement extra reference on snapshot when last name is removed.
|
|
|
|
* It will not be freed until the last open reference goes away.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ffs_snapgone(ip)
|
|
|
|
struct inode *ip;
|
|
|
|
{
|
|
|
|
struct inode *xp;
|
2001-03-21 04:05:20 +00:00
|
|
|
struct fs *fs;
|
|
|
|
int snaploc;
|
2004-09-13 07:29:45 +00:00
|
|
|
struct snapdata *sn;
|
2005-01-24 10:10:11 +00:00
|
|
|
struct ufsmount *ump;
|
2000-12-19 04:41:09 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Find snapshot in incore list.
|
|
|
|
*/
|
2004-09-13 07:29:45 +00:00
|
|
|
xp = NULL;
|
2016-09-17 16:47:34 +00:00
|
|
|
sn = ITODEVVP(ip)->v_rdev->si_snapdata;
|
2004-09-13 07:29:45 +00:00
|
|
|
if (sn != NULL)
|
|
|
|
TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap)
|
|
|
|
if (xp == ip)
|
|
|
|
break;
|
2002-12-14 01:36:59 +00:00
|
|
|
if (xp != NULL)
|
|
|
|
vrele(ITOV(ip));
|
2019-05-28 16:32:04 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
2002-12-14 01:36:59 +00:00
|
|
|
else if (snapdebug)
|
2012-09-27 23:30:49 +00:00
|
|
|
printf("ffs_snapgone: lost snapshot vnode %ju\n",
|
|
|
|
(uintmax_t)ip->i_number);
|
2019-05-28 16:32:04 +00:00
|
|
|
#endif
|
2001-03-21 04:05:20 +00:00
|
|
|
/*
|
|
|
|
* Delete snapshot inode from superblock. Keep list dense.
|
|
|
|
*/
|
2016-09-17 16:47:34 +00:00
|
|
|
ump = ITOUMP(ip);
|
|
|
|
fs = ump->um_fs;
|
2005-01-24 10:10:11 +00:00
|
|
|
UFS_LOCK(ump);
|
2001-03-21 04:05:20 +00:00
|
|
|
for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
|
|
|
|
if (fs->fs_snapinum[snaploc] == ip->i_number)
|
|
|
|
break;
|
|
|
|
if (snaploc < FSMAXSNAP) {
|
|
|
|
for (snaploc++; snaploc < FSMAXSNAP; snaploc++) {
|
|
|
|
if (fs->fs_snapinum[snaploc] == 0)
|
|
|
|
break;
|
|
|
|
fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc];
|
|
|
|
}
|
|
|
|
fs->fs_snapinum[snaploc - 1] = 0;
|
|
|
|
}
|
2005-01-24 10:10:11 +00:00
|
|
|
UFS_UNLOCK(ump);
|
2000-12-19 04:41:09 +00:00
|
|
|
}
|
|
|
|
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* Prepare a snapshot file for being removed.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ffs_snapremove(vp)
|
|
|
|
struct vnode *vp;
|
|
|
|
{
|
2001-03-07 07:09:55 +00:00
|
|
|
struct inode *ip;
|
2000-07-11 22:07:57 +00:00
|
|
|
struct vnode *devvp;
|
|
|
|
struct buf *ibp;
|
|
|
|
struct fs *fs;
|
2006-05-02 23:52:43 +00:00
|
|
|
ufs2_daddr_t numblks, blkno, dblk;
|
2017-07-21 18:28:27 +00:00
|
|
|
int error, i, last, loc;
|
2004-09-13 07:29:45 +00:00
|
|
|
struct snapdata *sn;
|
2000-07-11 22:07:57 +00:00
|
|
|
|
|
|
|
ip = VTOI(vp);
|
2016-09-17 16:47:34 +00:00
|
|
|
fs = ITOFS(ip);
|
|
|
|
devvp = ITODEVVP(ip);
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
2001-04-25 08:11:18 +00:00
|
|
|
* If active, delete from incore list (this snapshot may
|
|
|
|
* already have been in the process of being deleted, so
|
|
|
|
* would not have been active).
|
|
|
|
*
|
2000-07-11 22:07:57 +00:00
|
|
|
* Clear copy-on-write flag if last snapshot.
|
|
|
|
*/
|
2006-05-02 23:52:43 +00:00
|
|
|
VI_LOCK(devvp);
|
2001-04-25 08:11:18 +00:00
|
|
|
if (ip->i_nextsnap.tqe_prev != 0) {
|
2006-05-02 23:52:43 +00:00
|
|
|
sn = devvp->v_rdev->si_snapdata;
|
2004-09-13 07:29:45 +00:00
|
|
|
TAILQ_REMOVE(&sn->sn_head, ip, i_nextsnap);
|
2001-03-07 07:09:55 +00:00
|
|
|
ip->i_nextsnap.tqe_prev = 0;
|
2006-05-02 23:52:43 +00:00
|
|
|
VI_UNLOCK(devvp);
|
2008-01-24 12:34:30 +00:00
|
|
|
lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
|
2017-07-21 18:28:27 +00:00
|
|
|
for (i = 0; i < sn->sn_lock.lk_recurse; i++)
|
|
|
|
lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
|
2006-05-02 23:52:43 +00:00
|
|
|
KASSERT(vp->v_vnlock == &sn->sn_lock,
|
|
|
|
("ffs_snapremove: lost lock mutation"));
|
2002-11-30 19:00:51 +00:00
|
|
|
vp->v_vnlock = &vp->v_lock;
|
2006-05-02 23:52:43 +00:00
|
|
|
VI_LOCK(devvp);
|
2017-07-21 18:28:27 +00:00
|
|
|
while (sn->sn_lock.lk_recurse > 0)
|
|
|
|
lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
|
2008-01-24 12:34:30 +00:00
|
|
|
lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
|
2008-03-31 07:47:08 +00:00
|
|
|
try_free_snapdata(devvp);
|
2006-05-02 23:52:43 +00:00
|
|
|
} else
|
|
|
|
VI_UNLOCK(devvp);
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* Clear all BLK_NOCOPY fields. Pass any block claims to other
|
|
|
|
* snapshots that want them (see ffs_snapblkfree below).
|
|
|
|
*/
|
2017-02-15 19:50:26 +00:00
|
|
|
for (blkno = 1; blkno < UFS_NDADDR; blkno++) {
|
2002-06-21 06:18:05 +00:00
|
|
|
dblk = DIP(ip, i_db[blkno]);
|
2005-10-09 19:45:01 +00:00
|
|
|
if (dblk == 0)
|
|
|
|
continue;
|
2001-05-08 07:29:03 +00:00
|
|
|
if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
|
2004-07-28 06:41:27 +00:00
|
|
|
DIP_SET(ip, i_db[blkno], 0);
|
2001-05-08 07:29:03 +00:00
|
|
|
else if ((dblk == blkstofrags(fs, blkno) &&
|
2016-09-17 16:47:34 +00:00
|
|
|
ffs_snapblkfree(fs, ITODEVVP(ip), dblk, fs->fs_bsize,
|
2011-06-15 23:19:09 +00:00
|
|
|
ip->i_number, vp->v_type, NULL))) {
|
2004-07-28 06:41:27 +00:00
|
|
|
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) -
|
|
|
|
btodb(fs->fs_bsize));
|
|
|
|
DIP_SET(ip, i_db[blkno], 0);
|
2001-05-08 07:29:03 +00:00
|
|
|
}
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
2001-05-08 07:29:03 +00:00
|
|
|
numblks = howmany(ip->i_size, fs->fs_bsize);
|
2017-02-15 19:50:26 +00:00
|
|
|
for (blkno = UFS_NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
|
2001-04-29 12:36:52 +00:00
|
|
|
error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno),
|
2002-06-23 06:12:22 +00:00
|
|
|
fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
|
2000-07-11 22:07:57 +00:00
|
|
|
if (error)
|
|
|
|
continue;
|
2002-06-21 06:18:05 +00:00
|
|
|
if (fs->fs_size - blkno > NINDIR(fs))
|
2000-07-11 22:07:57 +00:00
|
|
|
last = NINDIR(fs);
|
2002-06-21 06:18:05 +00:00
|
|
|
else
|
|
|
|
last = fs->fs_size - blkno;
|
2000-07-11 22:07:57 +00:00
|
|
|
for (loc = 0; loc < last; loc++) {
|
2016-09-17 16:47:34 +00:00
|
|
|
if (I_IS_UFS1(ip)) {
|
2002-06-21 06:18:05 +00:00
|
|
|
dblk = ((ufs1_daddr_t *)(ibp->b_data))[loc];
|
2005-10-09 19:45:01 +00:00
|
|
|
if (dblk == 0)
|
|
|
|
continue;
|
2002-06-21 06:18:05 +00:00
|
|
|
if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
|
|
|
|
((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
|
|
|
|
else if ((dblk == blkstofrags(fs, blkno) &&
|
2016-09-17 16:47:34 +00:00
|
|
|
ffs_snapblkfree(fs, ITODEVVP(ip), dblk,
|
2011-06-15 23:19:09 +00:00
|
|
|
fs->fs_bsize, ip->i_number, vp->v_type,
|
|
|
|
NULL))) {
|
2002-06-21 06:18:05 +00:00
|
|
|
ip->i_din1->di_blocks -=
|
|
|
|
btodb(fs->fs_bsize);
|
|
|
|
((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
dblk = ((ufs2_daddr_t *)(ibp->b_data))[loc];
|
2005-10-09 19:45:01 +00:00
|
|
|
if (dblk == 0)
|
|
|
|
continue;
|
2001-05-08 07:29:03 +00:00
|
|
|
if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
|
2002-06-21 06:18:05 +00:00
|
|
|
((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
|
2001-05-08 07:29:03 +00:00
|
|
|
else if ((dblk == blkstofrags(fs, blkno) &&
|
2016-09-17 16:47:34 +00:00
|
|
|
ffs_snapblkfree(fs, ITODEVVP(ip), dblk,
|
2011-06-15 23:19:09 +00:00
|
|
|
fs->fs_bsize, ip->i_number, vp->v_type, NULL))) {
|
2002-06-21 06:18:05 +00:00
|
|
|
ip->i_din2->di_blocks -= btodb(fs->fs_bsize);
|
|
|
|
((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
|
2001-05-08 07:29:03 +00:00
|
|
|
}
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
|
|
|
bawrite(ibp);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Clear snapshot flag and drop reference.
|
|
|
|
*/
|
2000-07-26 23:07:01 +00:00
|
|
|
ip->i_flags &= ~SF_SNAPSHOT;
|
2004-07-28 06:41:27 +00:00
|
|
|
DIP_SET(ip, i_flags, ip->i_flags);
|
2020-01-13 02:31:51 +00:00
|
|
|
UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
|
2010-04-24 07:05:35 +00:00
|
|
|
/*
|
|
|
|
* The dirtied indirects must be written out before
|
|
|
|
* softdep_setup_freeblocks() is called. Otherwise indir_trunc()
|
|
|
|
* may find indirect pointers using the magic BLK_* values.
|
|
|
|
*/
|
|
|
|
if (DOINGSOFTDEP(vp))
|
2012-03-25 00:02:37 +00:00
|
|
|
ffs_syncvnode(vp, MNT_WAIT, 0);
|
2006-05-05 20:10:04 +00:00
|
|
|
#ifdef QUOTA
|
|
|
|
/*
|
|
|
|
* Reenable disk quotas for ex-snapshot file.
|
|
|
|
*/
|
|
|
|
if (!getinoquota(ip))
|
|
|
|
(void) chkdq(ip, DIP(ip, i_blocks), KERNCRED, FORCE);
|
|
|
|
#endif
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Notification that a block is being freed. Return zero if the free
|
|
|
|
* should be allowed to proceed. Return non-zero if the snapshot file
|
|
|
|
* wants to claim the block. The block will be claimed if it is an
|
|
|
|
* uncopied part of one of the snapshots. It will be freed if it is
|
|
|
|
* either a BLK_NOCOPY or has already been copied in all of the snapshots.
|
|
|
|
* If a fragment is being freed, then all snapshots that care about
|
|
|
|
* it must make a copy since a snapshot file can only claim full sized
|
|
|
|
* blocks. Note that if more than one snapshot file maps the block,
|
|
|
|
* we can pick one at random to claim it. Since none of the snapshots
|
|
|
|
* can change, we are assurred that they will all see the same unmodified
|
|
|
|
* image. When deleting a snapshot file (see ffs_snapremove above), we
|
|
|
|
* must push any of these claimed blocks to one of the other snapshots
|
|
|
|
* that maps it. These claimed blocks are easily identified as they will
|
|
|
|
* have a block number equal to their logical block number within the
|
|
|
|
* snapshot. A copied block can never have this property because they
|
|
|
|
* must always have been allocated from a BLK_NOCOPY location.
|
|
|
|
*/
|
|
|
|
int
|
2011-06-15 23:19:09 +00:00
|
|
|
ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd)
|
2002-02-02 01:42:44 +00:00
|
|
|
struct fs *fs;
|
|
|
|
struct vnode *devvp;
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs2_daddr_t bno;
|
2000-07-11 22:07:57 +00:00
|
|
|
long size;
|
2002-02-02 01:42:44 +00:00
|
|
|
ino_t inum;
|
2011-06-15 23:19:09 +00:00
|
|
|
enum vtype vtype;
|
2011-06-12 19:27:05 +00:00
|
|
|
struct workhead *wkhd;
|
2000-07-11 22:07:57 +00:00
|
|
|
{
|
2012-07-22 15:40:31 +00:00
|
|
|
struct buf *ibp, *cbp, *savedcbp = NULL;
|
2001-09-12 08:38:13 +00:00
|
|
|
struct thread *td = curthread;
|
2000-07-11 22:07:57 +00:00
|
|
|
struct inode *ip;
|
2002-11-30 19:00:51 +00:00
|
|
|
struct vnode *vp = NULL;
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs_lbn_t lbn;
|
|
|
|
ufs2_daddr_t blkno;
|
2005-10-09 19:45:01 +00:00
|
|
|
int indiroff = 0, error = 0, claimedblk = 0;
|
2004-09-13 07:29:45 +00:00
|
|
|
struct snapdata *sn;
|
2000-07-11 22:07:57 +00:00
|
|
|
|
|
|
|
lbn = fragstoblks(fs, bno);
|
2002-11-30 19:00:51 +00:00
|
|
|
retry:
|
|
|
|
VI_LOCK(devvp);
|
2004-09-13 07:29:45 +00:00
|
|
|
sn = devvp->v_rdev->si_snapdata;
|
2004-09-16 17:28:56 +00:00
|
|
|
if (sn == NULL) {
|
|
|
|
VI_UNLOCK(devvp);
|
|
|
|
return (0);
|
|
|
|
}
|
2008-01-24 12:34:30 +00:00
|
|
|
if (lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
|
|
|
|
VI_MTX(devvp)) != 0)
|
2005-10-09 19:45:01 +00:00
|
|
|
goto retry;
|
2004-09-13 07:29:45 +00:00
|
|
|
TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
|
2000-07-11 22:07:57 +00:00
|
|
|
vp = ITOV(ip);
|
2010-05-07 08:45:21 +00:00
|
|
|
if (DOINGSOFTDEP(vp))
|
|
|
|
softdep_prealloc(vp, MNT_WAIT);
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* Lookup block being written.
|
|
|
|
*/
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn < UFS_NDADDR) {
|
2002-06-21 06:18:05 +00:00
|
|
|
blkno = DIP(ip, i_db[lbn]);
|
2000-07-11 22:07:57 +00:00
|
|
|
} else {
|
2003-10-23 21:14:08 +00:00
|
|
|
td->td_pflags |= TDP_COWINPROGRESS;
|
2001-04-29 12:36:52 +00:00
|
|
|
error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
|
2002-06-23 06:12:22 +00:00
|
|
|
fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
|
2003-10-23 21:14:08 +00:00
|
|
|
td->td_pflags &= ~TDP_COWINPROGRESS;
|
2000-07-11 22:07:57 +00:00
|
|
|
if (error)
|
|
|
|
break;
|
2017-02-15 19:50:26 +00:00
|
|
|
indiroff = (lbn - UFS_NDADDR) % NINDIR(fs);
|
2016-09-17 16:47:34 +00:00
|
|
|
if (I_IS_UFS1(ip))
|
2002-06-21 06:18:05 +00:00
|
|
|
blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff];
|
|
|
|
else
|
|
|
|
blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff];
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Check to see if block needs to be copied.
|
|
|
|
*/
|
2002-06-21 06:18:05 +00:00
|
|
|
if (blkno == 0) {
|
|
|
|
/*
|
|
|
|
* A block that we map is being freed. If it has not
|
|
|
|
* been claimed yet, we will claim or copy it (below).
|
|
|
|
*/
|
|
|
|
claimedblk = 1;
|
|
|
|
} else if (blkno == BLK_SNAP) {
|
|
|
|
/*
|
|
|
|
* No previous snapshot claimed the block,
|
2002-11-30 19:00:51 +00:00
|
|
|
* so it will be freed and become a BLK_NOCOPY
|
2002-06-21 06:18:05 +00:00
|
|
|
* (don't care) for us.
|
|
|
|
*/
|
2000-07-11 22:07:57 +00:00
|
|
|
if (claimedblk)
|
|
|
|
panic("snapblkfree: inconsistent block type");
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn < UFS_NDADDR) {
|
2004-07-28 06:41:27 +00:00
|
|
|
DIP_SET(ip, i_db[lbn], BLK_NOCOPY);
|
2020-01-13 02:31:51 +00:00
|
|
|
UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
|
2016-09-17 16:47:34 +00:00
|
|
|
} else if (I_IS_UFS1(ip)) {
|
2002-06-21 06:18:05 +00:00
|
|
|
((ufs1_daddr_t *)(ibp->b_data))[indiroff] =
|
|
|
|
BLK_NOCOPY;
|
|
|
|
bdwrite(ibp);
|
2000-07-11 22:07:57 +00:00
|
|
|
} else {
|
2002-06-21 06:18:05 +00:00
|
|
|
((ufs2_daddr_t *)(ibp->b_data))[indiroff] =
|
2000-07-11 22:07:57 +00:00
|
|
|
BLK_NOCOPY;
|
|
|
|
bdwrite(ibp);
|
|
|
|
}
|
|
|
|
continue;
|
2002-06-21 06:18:05 +00:00
|
|
|
} else /* BLK_NOCOPY or default */ {
|
|
|
|
/*
|
|
|
|
* If the snapshot has already copied the block
|
|
|
|
* (default), or does not care about the block,
|
|
|
|
* it is not needed.
|
|
|
|
*/
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn >= UFS_NDADDR)
|
2002-06-21 06:18:05 +00:00
|
|
|
bqrelse(ibp);
|
|
|
|
continue;
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If this is a full size block, we will just grab it
|
|
|
|
* and assign it to the snapshot inode. Otherwise we
|
|
|
|
* will proceed to copy it. See explanation for this
|
|
|
|
* routine as to why only a single snapshot needs to
|
|
|
|
* claim this block.
|
|
|
|
*/
|
|
|
|
if (size == fs->fs_bsize) {
|
2019-05-28 16:32:04 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
2000-07-11 22:07:57 +00:00
|
|
|
if (snapdebug)
|
2012-09-27 23:30:49 +00:00
|
|
|
printf("%s %ju lbn %jd from inum %ju\n",
|
|
|
|
"Grabonremove: snapino",
|
|
|
|
(uintmax_t)ip->i_number,
|
|
|
|
(intmax_t)lbn, (uintmax_t)inum);
|
2000-07-11 22:07:57 +00:00
|
|
|
#endif
|
2011-06-12 19:27:05 +00:00
|
|
|
/*
|
|
|
|
* If journaling is tracking this write we must add
|
|
|
|
* the work to the inode or indirect being written.
|
|
|
|
*/
|
|
|
|
if (wkhd != NULL) {
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn < UFS_NDADDR)
|
2011-06-12 19:27:05 +00:00
|
|
|
softdep_inode_append(ip,
|
|
|
|
curthread->td_ucred, wkhd);
|
|
|
|
else
|
|
|
|
softdep_buf_append(ibp, wkhd);
|
|
|
|
}
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn < UFS_NDADDR) {
|
2004-07-28 06:41:27 +00:00
|
|
|
DIP_SET(ip, i_db[lbn], bno);
|
2016-09-17 16:47:34 +00:00
|
|
|
} else if (I_IS_UFS1(ip)) {
|
2002-06-21 06:18:05 +00:00
|
|
|
((ufs1_daddr_t *)(ibp->b_data))[indiroff] = bno;
|
|
|
|
bdwrite(ibp);
|
2000-07-11 22:07:57 +00:00
|
|
|
} else {
|
2002-06-21 06:18:05 +00:00
|
|
|
((ufs2_daddr_t *)(ibp->b_data))[indiroff] = bno;
|
2000-07-11 22:07:57 +00:00
|
|
|
bdwrite(ibp);
|
|
|
|
}
|
2004-07-28 06:41:27 +00:00
|
|
|
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(size));
|
2020-01-13 02:31:51 +00:00
|
|
|
UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
|
2008-01-24 12:34:30 +00:00
|
|
|
lockmgr(vp->v_vnlock, LK_RELEASE, NULL);
|
2000-07-11 22:07:57 +00:00
|
|
|
return (1);
|
|
|
|
}
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn >= UFS_NDADDR)
|
2000-07-24 05:28:33 +00:00
|
|
|
bqrelse(ibp);
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* Allocate the block into which to do the copy. Note that this
|
|
|
|
* allocation will never require any additional allocations for
|
|
|
|
* the snapshot inode.
|
|
|
|
*/
|
2003-10-23 21:14:08 +00:00
|
|
|
td->td_pflags |= TDP_COWINPROGRESS;
|
2001-04-29 12:36:52 +00:00
|
|
|
error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
|
2000-07-11 22:07:57 +00:00
|
|
|
fs->fs_bsize, KERNCRED, 0, &cbp);
|
2003-10-23 21:14:08 +00:00
|
|
|
td->td_pflags &= ~TDP_COWINPROGRESS;
|
2002-11-30 19:00:51 +00:00
|
|
|
if (error)
|
2000-07-11 22:07:57 +00:00
|
|
|
break;
|
2019-05-28 16:32:04 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
2000-07-11 22:07:57 +00:00
|
|
|
if (snapdebug)
|
2012-09-27 23:30:49 +00:00
|
|
|
printf("%s%ju lbn %jd %s %ju size %ld to blkno %jd\n",
|
|
|
|
"Copyonremove: snapino ", (uintmax_t)ip->i_number,
|
|
|
|
(intmax_t)lbn, "for inum", (uintmax_t)inum, size,
|
2002-06-21 06:18:05 +00:00
|
|
|
(intmax_t)cbp->b_blkno);
|
2000-07-11 22:07:57 +00:00
|
|
|
#endif
|
|
|
|
/*
|
|
|
|
* If we have already read the old block contents, then
|
2001-04-25 08:11:18 +00:00
|
|
|
* simply copy them to the new block. Note that we need
|
|
|
|
* to synchronously write snapshots that have not been
|
|
|
|
* unlinked, and hence will be visible after a crash,
|
2011-06-15 23:19:09 +00:00
|
|
|
* to ensure their integrity. At a minimum we ensure the
|
|
|
|
* integrity of the filesystem metadata, but use the
|
|
|
|
* dopersistence sysctl-setable flag to decide on the
|
|
|
|
* persistence needed for file content data.
|
2000-07-11 22:07:57 +00:00
|
|
|
*/
|
2016-04-10 21:48:11 +00:00
|
|
|
if (savedcbp != NULL) {
|
2000-07-11 22:07:57 +00:00
|
|
|
bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
|
|
|
|
bawrite(cbp);
|
2011-06-15 23:19:09 +00:00
|
|
|
if ((vtype == VDIR || dopersistence) &&
|
|
|
|
ip->i_effnlink > 0)
|
2012-03-25 00:02:37 +00:00
|
|
|
(void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
|
2000-07-11 22:07:57 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Otherwise, read the old block contents into the buffer.
|
|
|
|
*/
|
2004-09-13 07:29:45 +00:00
|
|
|
if ((error = readblock(vp, cbp, lbn)) != 0) {
|
2001-04-25 08:11:18 +00:00
|
|
|
bzero(cbp->b_data, fs->fs_bsize);
|
|
|
|
bawrite(cbp);
|
2011-06-15 23:19:09 +00:00
|
|
|
if ((vtype == VDIR || dopersistence) &&
|
|
|
|
ip->i_effnlink > 0)
|
2012-03-25 00:02:37 +00:00
|
|
|
(void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
|
2000-07-11 22:07:57 +00:00
|
|
|
break;
|
2001-04-25 08:11:18 +00:00
|
|
|
}
|
2000-07-11 22:07:57 +00:00
|
|
|
savedcbp = cbp;
|
|
|
|
}
|
2001-04-25 08:11:18 +00:00
|
|
|
/*
|
|
|
|
* Note that we need to synchronously write snapshots that
|
|
|
|
* have not been unlinked, and hence will be visible after
|
2011-06-15 23:19:09 +00:00
|
|
|
* a crash, to ensure their integrity. At a minimum we
|
|
|
|
* ensure the integrity of the filesystem metadata, but
|
|
|
|
* use the dopersistence sysctl-setable flag to decide on
|
|
|
|
* the persistence needed for file content data.
|
2001-04-25 08:11:18 +00:00
|
|
|
*/
|
|
|
|
if (savedcbp) {
|
|
|
|
vp = savedcbp->b_vp;
|
2000-07-11 22:07:57 +00:00
|
|
|
bawrite(savedcbp);
|
2011-06-18 21:10:03 +00:00
|
|
|
if ((vtype == VDIR || dopersistence) &&
|
|
|
|
VTOI(vp)->i_effnlink > 0)
|
2012-03-25 00:02:37 +00:00
|
|
|
(void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
|
2001-04-25 08:11:18 +00:00
|
|
|
}
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* If we have been unable to allocate a block in which to do
|
|
|
|
* the copy, then return non-zero so that the fragment will
|
|
|
|
* not be freed. Although space will be lost, the snapshot
|
|
|
|
* will stay consistent.
|
|
|
|
*/
|
2011-06-12 19:27:05 +00:00
|
|
|
if (error != 0 && wkhd != NULL)
|
|
|
|
softdep_freework(wkhd);
|
2017-07-21 18:36:17 +00:00
|
|
|
lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
|
2000-07-11 22:07:57 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Associate snapshot files when mounting.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ffs_snapshot_mount(mp)
|
|
|
|
struct mount *mp;
|
|
|
|
{
|
|
|
|
struct ufsmount *ump = VFSTOUFS(mp);
|
2002-11-30 19:00:51 +00:00
|
|
|
struct vnode *devvp = ump->um_devvp;
|
2000-07-11 22:07:57 +00:00
|
|
|
struct fs *fs = ump->um_fs;
|
2001-09-12 08:38:13 +00:00
|
|
|
struct thread *td = curthread;
|
2004-09-13 07:29:45 +00:00
|
|
|
struct snapdata *sn;
|
2000-07-11 22:07:57 +00:00
|
|
|
struct vnode *vp;
|
2006-05-16 00:14:20 +00:00
|
|
|
struct vnode *lastvp;
|
2004-09-16 15:58:18 +00:00
|
|
|
struct inode *ip;
|
2002-10-09 06:13:48 +00:00
|
|
|
struct uio auio;
|
|
|
|
struct iovec aiov;
|
2002-12-14 01:36:59 +00:00
|
|
|
void *snapblklist;
|
2002-10-09 06:13:48 +00:00
|
|
|
char *reason;
|
2002-12-14 01:36:59 +00:00
|
|
|
daddr_t snaplistsize;
|
2000-07-11 22:07:57 +00:00
|
|
|
int error, snaploc, loc;
|
|
|
|
|
2002-10-09 06:13:48 +00:00
|
|
|
/*
|
2005-02-08 17:40:01 +00:00
|
|
|
* XXX The following needs to be set before ffs_truncate or
|
2002-10-09 06:13:48 +00:00
|
|
|
* VOP_READ can be called.
|
|
|
|
*/
|
|
|
|
mp->mnt_stat.f_iosize = fs->fs_bsize;
|
|
|
|
/*
|
|
|
|
* Process each snapshot listed in the superblock.
|
|
|
|
*/
|
2002-12-14 01:36:59 +00:00
|
|
|
vp = NULL;
|
2006-05-16 00:14:20 +00:00
|
|
|
lastvp = NULL;
|
2008-03-31 07:47:08 +00:00
|
|
|
sn = NULL;
|
2000-07-11 22:07:57 +00:00
|
|
|
for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) {
|
|
|
|
if (fs->fs_snapinum[snaploc] == 0)
|
2002-12-14 01:36:59 +00:00
|
|
|
break;
|
2005-02-08 17:40:01 +00:00
|
|
|
if ((error = ffs_vget(mp, fs->fs_snapinum[snaploc],
|
2002-03-17 01:25:47 +00:00
|
|
|
LK_EXCLUSIVE, &vp)) != 0){
|
2000-07-11 22:07:57 +00:00
|
|
|
printf("ffs_snapshot_mount: vget failed %d\n", error);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
ip = VTOI(vp);
|
2018-12-27 07:18:53 +00:00
|
|
|
if (vp->v_type != VREG) {
|
|
|
|
reason = "non-file snapshot";
|
|
|
|
} else if (!IS_SNAPSHOT(ip)) {
|
|
|
|
reason = "non-snapshot";
|
|
|
|
} else if (ip->i_size ==
|
2002-10-09 06:13:48 +00:00
|
|
|
lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) {
|
2018-12-27 07:18:53 +00:00
|
|
|
reason = "old format snapshot";
|
|
|
|
(void)ffs_truncate(vp, (off_t)0, 0, NOCRED);
|
|
|
|
(void)ffs_syncvnode(vp, MNT_WAIT, 0);
|
|
|
|
} else {
|
|
|
|
reason = NULL;
|
|
|
|
}
|
|
|
|
if (reason != NULL) {
|
2002-10-09 06:13:48 +00:00
|
|
|
printf("ffs_snapshot_mount: %s inode %d\n",
|
|
|
|
reason, fs->fs_snapinum[snaploc]);
|
2000-07-11 22:07:57 +00:00
|
|
|
vput(vp);
|
2002-12-14 01:36:59 +00:00
|
|
|
vp = NULL;
|
2000-07-11 22:07:57 +00:00
|
|
|
for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) {
|
|
|
|
if (fs->fs_snapinum[loc] == 0)
|
|
|
|
break;
|
|
|
|
fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc];
|
|
|
|
}
|
|
|
|
fs->fs_snapinum[loc - 1] = 0;
|
|
|
|
snaploc--;
|
|
|
|
continue;
|
|
|
|
}
|
2002-10-16 00:19:23 +00:00
|
|
|
/*
|
2008-03-31 07:47:08 +00:00
|
|
|
* Acquire a lock on the snapdata structure, creating it if
|
|
|
|
* necessary.
|
2002-10-16 00:19:23 +00:00
|
|
|
*/
|
2008-03-31 07:47:08 +00:00
|
|
|
sn = ffs_snapdata_acquire(devvp);
|
|
|
|
/*
|
|
|
|
* Change vnode to use shared snapshot lock instead of the
|
|
|
|
* original private lock.
|
|
|
|
*/
|
|
|
|
vp->v_vnlock = &sn->sn_lock;
|
2008-01-24 12:34:30 +00:00
|
|
|
lockmgr(&vp->v_lock, LK_RELEASE, NULL);
|
2002-10-09 06:13:48 +00:00
|
|
|
/*
|
|
|
|
* Link it onto the active snapshot list.
|
|
|
|
*/
|
2002-11-30 19:00:51 +00:00
|
|
|
VI_LOCK(devvp);
|
2001-03-07 07:09:55 +00:00
|
|
|
if (ip->i_nextsnap.tqe_prev != 0)
|
2012-09-27 23:30:49 +00:00
|
|
|
panic("ffs_snapshot_mount: %ju already on list",
|
|
|
|
(uintmax_t)ip->i_number);
|
2001-03-07 07:09:55 +00:00
|
|
|
else
|
2004-09-13 07:29:45 +00:00
|
|
|
TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap);
|
2002-08-04 10:29:36 +00:00
|
|
|
vp->v_vflag |= VV_SYSTEM;
|
2002-11-30 19:00:51 +00:00
|
|
|
VI_UNLOCK(devvp);
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(vp);
|
2006-05-16 00:14:20 +00:00
|
|
|
lastvp = vp;
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
2006-05-16 00:14:20 +00:00
|
|
|
vp = lastvp;
|
2002-12-14 01:36:59 +00:00
|
|
|
/*
|
|
|
|
* No usable snapshots found.
|
|
|
|
*/
|
2008-03-31 07:47:08 +00:00
|
|
|
if (sn == NULL || vp == NULL)
|
2002-12-14 01:36:59 +00:00
|
|
|
return;
|
|
|
|
/*
|
|
|
|
* Allocate the space for the block hints list. We always want to
|
|
|
|
* use the list from the newest snapshot.
|
|
|
|
*/
|
|
|
|
auio.uio_iov = &aiov;
|
|
|
|
auio.uio_iovcnt = 1;
|
|
|
|
aiov.iov_base = (void *)&snaplistsize;
|
|
|
|
aiov.iov_len = sizeof(snaplistsize);
|
|
|
|
auio.uio_resid = aiov.iov_len;
|
|
|
|
auio.uio_offset =
|
|
|
|
lblktosize(fs, howmany(fs->fs_size, fs->fs_frag));
|
|
|
|
auio.uio_segflg = UIO_SYSSPACE;
|
|
|
|
auio.uio_rw = UIO_READ;
|
|
|
|
auio.uio_td = td;
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
2002-12-14 01:36:59 +00:00
|
|
|
if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) {
|
|
|
|
printf("ffs_snapshot_mount: read_1 failed %d\n", error);
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(vp);
|
2002-12-14 01:36:59 +00:00
|
|
|
return;
|
|
|
|
}
|
2008-10-23 15:53:51 +00:00
|
|
|
snapblklist = malloc(snaplistsize * sizeof(daddr_t),
|
2003-02-19 05:47:46 +00:00
|
|
|
M_UFSMNT, M_WAITOK);
|
2002-12-14 01:36:59 +00:00
|
|
|
auio.uio_iovcnt = 1;
|
|
|
|
aiov.iov_base = snapblklist;
|
|
|
|
aiov.iov_len = snaplistsize * sizeof (daddr_t);
|
|
|
|
auio.uio_resid = aiov.iov_len;
|
|
|
|
auio.uio_offset -= sizeof(snaplistsize);
|
|
|
|
if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) {
|
|
|
|
printf("ffs_snapshot_mount: read_2 failed %d\n", error);
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(vp);
|
2008-10-23 15:53:51 +00:00
|
|
|
free(snapblklist, M_UFSMNT);
|
2002-12-14 01:36:59 +00:00
|
|
|
return;
|
|
|
|
}
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(vp);
|
2002-12-14 01:36:59 +00:00
|
|
|
VI_LOCK(devvp);
|
|
|
|
ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount");
|
2004-09-13 07:29:45 +00:00
|
|
|
sn->sn_listsize = snaplistsize;
|
|
|
|
sn->sn_blklist = (daddr_t *)snapblklist;
|
2002-12-14 01:36:59 +00:00
|
|
|
devvp->v_vflag |= VV_COPYONWRITE;
|
|
|
|
VI_UNLOCK(devvp);
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Disassociate snapshot files when unmounting.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ffs_snapshot_unmount(mp)
|
|
|
|
struct mount *mp;
|
|
|
|
{
|
2002-11-30 19:00:51 +00:00
|
|
|
struct vnode *devvp = VFSTOUFS(mp)->um_devvp;
|
2004-09-13 07:29:45 +00:00
|
|
|
struct snapdata *sn;
|
2000-07-11 22:07:57 +00:00
|
|
|
struct inode *xp;
|
2002-10-16 00:19:23 +00:00
|
|
|
struct vnode *vp;
|
2000-07-11 22:07:57 +00:00
|
|
|
|
2002-11-30 19:00:51 +00:00
|
|
|
VI_LOCK(devvp);
|
2006-05-02 23:52:43 +00:00
|
|
|
sn = devvp->v_rdev->si_snapdata;
|
|
|
|
while (sn != NULL && (xp = TAILQ_FIRST(&sn->sn_head)) != NULL) {
|
2002-10-16 00:19:23 +00:00
|
|
|
vp = ITOV(xp);
|
2004-09-13 07:29:45 +00:00
|
|
|
TAILQ_REMOVE(&sn->sn_head, xp, i_nextsnap);
|
2001-03-07 07:09:55 +00:00
|
|
|
xp->i_nextsnap.tqe_prev = 0;
|
2008-01-24 12:34:30 +00:00
|
|
|
lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE,
|
|
|
|
VI_MTX(devvp));
|
2021-01-15 16:00:17 -08:00
|
|
|
/*
|
|
|
|
* Avoid LOR with above snapshot lock. The LK_NOWAIT should
|
|
|
|
* never fail as the lock is currently unused. Rather than
|
|
|
|
* panic, we recover by doing the blocking lock.
|
|
|
|
*/
|
|
|
|
if (lockmgr(&vp->v_lock, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) {
|
|
|
|
printf("ffs_snapshot_unmount: Unexpected LK_NOWAIT "
|
|
|
|
"failure\n");
|
|
|
|
lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
|
|
|
|
}
|
2006-05-02 23:52:43 +00:00
|
|
|
KASSERT(vp->v_vnlock == &sn->sn_lock,
|
|
|
|
("ffs_snapshot_unmount: lost lock mutation"));
|
|
|
|
vp->v_vnlock = &vp->v_lock;
|
2008-01-24 12:34:30 +00:00
|
|
|
lockmgr(&vp->v_lock, LK_RELEASE, NULL);
|
|
|
|
lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
|
2006-05-02 23:52:43 +00:00
|
|
|
if (xp->i_effnlink > 0)
|
2002-10-16 00:19:23 +00:00
|
|
|
vrele(vp);
|
2006-05-02 23:52:43 +00:00
|
|
|
VI_LOCK(devvp);
|
|
|
|
sn = devvp->v_rdev->si_snapdata;
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
2008-03-31 07:47:08 +00:00
|
|
|
try_free_snapdata(devvp);
|
2002-11-30 19:00:51 +00:00
|
|
|
ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount");
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
|
|
|
|
Cylinder group bitmaps and blocks containing inode for a snapshot
file are after snaplock, while other ffs device buffers are before
snaplock in global lock order. By itself, this could cause deadlock
when bdwrite() tries to flush dirty buffers on snapshotted ffs. If,
during the flush, COW activity for snapshot needs to allocate block
and ffs_alloccg() selects the cylinder group that is being written
by bdwrite(), then kernel would panic due to recursive buffer lock
acquision.
Avoid dealing with buffers in bdwrite() that are from other side of
snaplock divisor in the lock order then the buffer being written. Add
new BOP, bop_bdwrite(), to do dirty buffer flushing for same vnode in
the bdwrite(). Default implementation, bufbdflush(), refactors the code
from bdwrite(). For ffs device buffers, specialized implementation is
used.
Reviewed by: tegge, jeff, Russell Cattelan (cattelan xfs org, xfs changes)
Tested by: Peter Holm
X-MFC after: 3 weeks (if ever: it changes ABI)
2007-01-23 10:01:19 +00:00
|
|
|
/*
|
|
|
|
* Check the buffer block to be belong to device buffer that shall be
|
|
|
|
* locked after snaplk. devvp shall be locked on entry, and will be
|
|
|
|
* leaved locked upon exit.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
ffs_bp_snapblk(devvp, bp)
|
|
|
|
struct vnode *devvp;
|
|
|
|
struct buf *bp;
|
|
|
|
{
|
|
|
|
struct snapdata *sn;
|
|
|
|
struct fs *fs;
|
|
|
|
ufs2_daddr_t lbn, *snapblklist;
|
|
|
|
int lower, upper, mid;
|
|
|
|
|
|
|
|
ASSERT_VI_LOCKED(devvp, "ffs_bp_snapblk");
|
|
|
|
KASSERT(devvp->v_type == VCHR, ("Not a device %p", devvp));
|
|
|
|
sn = devvp->v_rdev->si_snapdata;
|
|
|
|
if (sn == NULL || TAILQ_FIRST(&sn->sn_head) == NULL)
|
|
|
|
return (0);
|
2016-09-17 16:47:34 +00:00
|
|
|
fs = ITOFS(TAILQ_FIRST(&sn->sn_head));
|
Cylinder group bitmaps and blocks containing inode for a snapshot
file are after snaplock, while other ffs device buffers are before
snaplock in global lock order. By itself, this could cause deadlock
when bdwrite() tries to flush dirty buffers on snapshotted ffs. If,
during the flush, COW activity for snapshot needs to allocate block
and ffs_alloccg() selects the cylinder group that is being written
by bdwrite(), then kernel would panic due to recursive buffer lock
acquision.
Avoid dealing with buffers in bdwrite() that are from other side of
snaplock divisor in the lock order then the buffer being written. Add
new BOP, bop_bdwrite(), to do dirty buffer flushing for same vnode in
the bdwrite(). Default implementation, bufbdflush(), refactors the code
from bdwrite(). For ffs device buffers, specialized implementation is
used.
Reviewed by: tegge, jeff, Russell Cattelan (cattelan xfs org, xfs changes)
Tested by: Peter Holm
X-MFC after: 3 weeks (if ever: it changes ABI)
2007-01-23 10:01:19 +00:00
|
|
|
lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
|
|
|
|
snapblklist = sn->sn_blklist;
|
|
|
|
upper = sn->sn_listsize - 1;
|
|
|
|
lower = 1;
|
|
|
|
while (lower <= upper) {
|
|
|
|
mid = (lower + upper) / 2;
|
|
|
|
if (snapblklist[mid] == lbn)
|
|
|
|
break;
|
|
|
|
if (snapblklist[mid] < lbn)
|
|
|
|
lower = mid + 1;
|
|
|
|
else
|
|
|
|
upper = mid - 1;
|
|
|
|
}
|
|
|
|
if (lower <= upper)
|
|
|
|
return (1);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
ffs_bdflush(bo, bp)
|
|
|
|
struct bufobj *bo;
|
|
|
|
struct buf *bp;
|
|
|
|
{
|
|
|
|
struct thread *td;
|
|
|
|
struct vnode *vp, *devvp;
|
|
|
|
struct buf *nbp;
|
|
|
|
int bp_bdskip;
|
|
|
|
|
|
|
|
if (bo->bo_dirty.bv_cnt <= dirtybufthresh)
|
|
|
|
return;
|
|
|
|
|
|
|
|
td = curthread;
|
|
|
|
vp = bp->b_vp;
|
2016-09-30 17:11:03 +00:00
|
|
|
devvp = bo2vnode(bo);
|
Cylinder group bitmaps and blocks containing inode for a snapshot
file are after snaplock, while other ffs device buffers are before
snaplock in global lock order. By itself, this could cause deadlock
when bdwrite() tries to flush dirty buffers on snapshotted ffs. If,
during the flush, COW activity for snapshot needs to allocate block
and ffs_alloccg() selects the cylinder group that is being written
by bdwrite(), then kernel would panic due to recursive buffer lock
acquision.
Avoid dealing with buffers in bdwrite() that are from other side of
snaplock divisor in the lock order then the buffer being written. Add
new BOP, bop_bdwrite(), to do dirty buffer flushing for same vnode in
the bdwrite(). Default implementation, bufbdflush(), refactors the code
from bdwrite(). For ffs device buffers, specialized implementation is
used.
Reviewed by: tegge, jeff, Russell Cattelan (cattelan xfs org, xfs changes)
Tested by: Peter Holm
X-MFC after: 3 weeks (if ever: it changes ABI)
2007-01-23 10:01:19 +00:00
|
|
|
KASSERT(vp == devvp, ("devvp != vp %p %p", bo, bp));
|
|
|
|
|
|
|
|
VI_LOCK(devvp);
|
|
|
|
bp_bdskip = ffs_bp_snapblk(devvp, bp);
|
|
|
|
if (bp_bdskip)
|
|
|
|
bdwriteskip++;
|
|
|
|
VI_UNLOCK(devvp);
|
|
|
|
if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10 && !bp_bdskip) {
|
|
|
|
(void) VOP_FSYNC(vp, MNT_NOWAIT, td);
|
|
|
|
altbufferflushes++;
|
|
|
|
} else {
|
|
|
|
BO_LOCK(bo);
|
|
|
|
/*
|
|
|
|
* Try to find a buffer to flush.
|
|
|
|
*/
|
|
|
|
TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) {
|
|
|
|
if ((nbp->b_vflags & BV_BKGRDINPROG) ||
|
|
|
|
BUF_LOCK(nbp,
|
|
|
|
LK_EXCLUSIVE | LK_NOWAIT, NULL))
|
|
|
|
continue;
|
|
|
|
if (bp == nbp)
|
|
|
|
panic("bdwrite: found ourselves");
|
|
|
|
BO_UNLOCK(bo);
|
|
|
|
/*
|
|
|
|
* Don't countdeps with the bo lock
|
|
|
|
* held.
|
|
|
|
*/
|
|
|
|
if (buf_countdeps(nbp, 0)) {
|
|
|
|
BO_LOCK(bo);
|
|
|
|
BUF_UNLOCK(nbp);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (bp_bdskip) {
|
|
|
|
VI_LOCK(devvp);
|
|
|
|
if (!ffs_bp_snapblk(vp, nbp)) {
|
2013-05-31 00:43:41 +00:00
|
|
|
VI_UNLOCK(devvp);
|
|
|
|
BO_LOCK(bo);
|
Cylinder group bitmaps and blocks containing inode for a snapshot
file are after snaplock, while other ffs device buffers are before
snaplock in global lock order. By itself, this could cause deadlock
when bdwrite() tries to flush dirty buffers on snapshotted ffs. If,
during the flush, COW activity for snapshot needs to allocate block
and ffs_alloccg() selects the cylinder group that is being written
by bdwrite(), then kernel would panic due to recursive buffer lock
acquision.
Avoid dealing with buffers in bdwrite() that are from other side of
snaplock divisor in the lock order then the buffer being written. Add
new BOP, bop_bdwrite(), to do dirty buffer flushing for same vnode in
the bdwrite(). Default implementation, bufbdflush(), refactors the code
from bdwrite(). For ffs device buffers, specialized implementation is
used.
Reviewed by: tegge, jeff, Russell Cattelan (cattelan xfs org, xfs changes)
Tested by: Peter Holm
X-MFC after: 3 weeks (if ever: it changes ABI)
2007-01-23 10:01:19 +00:00
|
|
|
BUF_UNLOCK(nbp);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
VI_UNLOCK(devvp);
|
|
|
|
}
|
|
|
|
if (nbp->b_flags & B_CLUSTEROK) {
|
|
|
|
vfs_bio_awrite(nbp);
|
|
|
|
} else {
|
|
|
|
bremfree(nbp);
|
|
|
|
bawrite(nbp);
|
|
|
|
}
|
|
|
|
dirtybufferflushes++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (nbp == NULL)
|
|
|
|
BO_UNLOCK(bo);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* Check for need to copy block that is about to be written,
|
|
|
|
* copying the block if necessary.
|
|
|
|
*/
|
2004-10-26 06:25:56 +00:00
|
|
|
int
|
2001-03-07 07:09:55 +00:00
|
|
|
ffs_copyonwrite(devvp, bp)
|
|
|
|
struct vnode *devvp;
|
|
|
|
struct buf *bp;
|
2000-07-11 22:07:57 +00:00
|
|
|
{
|
2004-09-13 07:29:45 +00:00
|
|
|
struct snapdata *sn;
|
2012-07-22 15:40:31 +00:00
|
|
|
struct buf *ibp, *cbp, *savedcbp = NULL;
|
2001-09-12 08:38:13 +00:00
|
|
|
struct thread *td = curthread;
|
2001-03-07 07:09:55 +00:00
|
|
|
struct fs *fs;
|
2000-07-11 22:07:57 +00:00
|
|
|
struct inode *ip;
|
2012-07-22 15:40:31 +00:00
|
|
|
struct vnode *vp = NULL;
|
2002-12-14 01:36:59 +00:00
|
|
|
ufs2_daddr_t lbn, blkno, *snapblklist;
|
2005-10-09 19:45:01 +00:00
|
|
|
int lower, upper, mid, indiroff, error = 0;
|
2005-09-30 18:07:41 +00:00
|
|
|
int launched_async_io, prev_norunningbuf;
|
2006-05-03 00:04:38 +00:00
|
|
|
long saved_runningbufspace;
|
2000-07-11 22:07:57 +00:00
|
|
|
|
2012-03-01 18:45:25 +00:00
|
|
|
if (devvp != bp->b_vp && IS_SNAPSHOT(VTOI(bp->b_vp)))
|
2005-10-09 20:07:51 +00:00
|
|
|
return (0); /* Update on a snapshot file */
|
2003-10-23 21:14:08 +00:00
|
|
|
if (td->td_pflags & TDP_COWINPROGRESS)
|
2000-07-11 22:07:57 +00:00
|
|
|
panic("ffs_copyonwrite: recursive call");
|
2002-12-14 01:36:59 +00:00
|
|
|
/*
|
|
|
|
* First check to see if it is in the preallocated list.
|
|
|
|
* By doing this check we avoid several potential deadlocks.
|
|
|
|
*/
|
2002-11-30 19:00:51 +00:00
|
|
|
VI_LOCK(devvp);
|
2004-09-13 07:29:45 +00:00
|
|
|
sn = devvp->v_rdev->si_snapdata;
|
2005-10-09 19:45:01 +00:00
|
|
|
if (sn == NULL ||
|
2007-04-04 07:29:53 +00:00
|
|
|
TAILQ_EMPTY(&sn->sn_head)) {
|
2005-10-09 19:45:01 +00:00
|
|
|
VI_UNLOCK(devvp);
|
|
|
|
return (0); /* No snapshot */
|
|
|
|
}
|
2004-09-13 07:29:45 +00:00
|
|
|
ip = TAILQ_FIRST(&sn->sn_head);
|
2016-09-17 16:47:34 +00:00
|
|
|
fs = ITOFS(ip);
|
2002-10-16 00:19:23 +00:00
|
|
|
lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
|
2021-02-11 21:31:16 -08:00
|
|
|
if (lbn < UFS_NDADDR) {
|
|
|
|
VI_UNLOCK(devvp);
|
|
|
|
return (0); /* Direct blocks are always copied */
|
|
|
|
}
|
2004-09-13 07:29:45 +00:00
|
|
|
snapblklist = sn->sn_blklist;
|
|
|
|
upper = sn->sn_listsize - 1;
|
2002-12-14 01:36:59 +00:00
|
|
|
lower = 1;
|
|
|
|
while (lower <= upper) {
|
|
|
|
mid = (lower + upper) / 2;
|
|
|
|
if (snapblklist[mid] == lbn)
|
|
|
|
break;
|
|
|
|
if (snapblklist[mid] < lbn)
|
|
|
|
lower = mid + 1;
|
|
|
|
else
|
|
|
|
upper = mid - 1;
|
|
|
|
}
|
|
|
|
if (lower <= upper) {
|
|
|
|
VI_UNLOCK(devvp);
|
|
|
|
return (0);
|
|
|
|
}
|
2005-09-30 18:07:41 +00:00
|
|
|
launched_async_io = 0;
|
|
|
|
prev_norunningbuf = td->td_pflags & TDP_NORUNNINGBUF;
|
2005-09-30 01:30:01 +00:00
|
|
|
/*
|
|
|
|
* Since I/O on bp isn't yet in progress and it may be blocked
|
|
|
|
* for a long time waiting on snaplk, back it out of
|
|
|
|
* runningbufspace, possibly waking other threads waiting for space.
|
|
|
|
*/
|
2006-05-03 00:04:38 +00:00
|
|
|
saved_runningbufspace = bp->b_runningbufspace;
|
|
|
|
if (saved_runningbufspace != 0)
|
|
|
|
runningbufwakeup(bp);
|
2002-12-14 01:36:59 +00:00
|
|
|
/*
|
|
|
|
* Not in the precomputed list, so check the snapshots.
|
|
|
|
*/
|
2008-01-24 12:34:30 +00:00
|
|
|
while (lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
|
|
|
|
VI_MTX(devvp)) != 0) {
|
2005-10-09 19:45:01 +00:00
|
|
|
VI_LOCK(devvp);
|
|
|
|
sn = devvp->v_rdev->si_snapdata;
|
|
|
|
if (sn == NULL ||
|
2007-04-04 07:29:53 +00:00
|
|
|
TAILQ_EMPTY(&sn->sn_head)) {
|
2005-10-09 19:45:01 +00:00
|
|
|
VI_UNLOCK(devvp);
|
2006-05-03 00:04:38 +00:00
|
|
|
if (saved_runningbufspace != 0) {
|
|
|
|
bp->b_runningbufspace = saved_runningbufspace;
|
Adjust some variables (mostly related to the buffer cache) that hold
address space sizes to be longs instead of ints. Specifically, the follow
values are now longs: runningbufspace, bufspace, maxbufspace,
bufmallocspace, maxbufmallocspace, lobufspace, hibufspace, lorunningspace,
hirunningspace, maxswzone, maxbcache, and maxpipekva. Previously, a
relatively small number (~ 44000) of buffers set in kern.nbuf would result
in integer overflows resulting either in hangs or bogus values of
hidirtybuffers and lodirtybuffers. Now one has to overflow a long to see
such problems. There was a check for a nbuf setting that would cause
overflows in the auto-tuning of nbuf. I've changed it to always check and
cap nbuf but warn if a user-supplied tunable would cause overflow.
Note that this changes the ABI of several sysctls that are used by things
like top(1), etc., so any MFC would probably require a some gross shims
to allow for that.
MFC after: 1 month
2009-03-09 19:35:20 +00:00
|
|
|
atomic_add_long(&runningbufspace,
|
2005-10-09 19:45:01 +00:00
|
|
|
bp->b_runningbufspace);
|
2006-05-03 00:04:38 +00:00
|
|
|
}
|
2005-10-09 19:45:01 +00:00
|
|
|
return (0); /* Snapshot gone */
|
|
|
|
}
|
|
|
|
}
|
2004-09-13 07:29:45 +00:00
|
|
|
TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
|
2000-07-11 22:07:57 +00:00
|
|
|
vp = ITOV(ip);
|
2010-05-07 08:45:21 +00:00
|
|
|
if (DOINGSOFTDEP(vp))
|
|
|
|
softdep_prealloc(vp, MNT_WAIT);
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* We ensure that everything of our own that needs to be
|
|
|
|
* copied will be done at the time that ffs_snapshot is
|
|
|
|
* called. Thus we can skip the check here which can
|
2001-04-29 12:36:52 +00:00
|
|
|
* deadlock in doing the lookup in UFS_BALLOC.
|
2000-07-11 22:07:57 +00:00
|
|
|
*/
|
|
|
|
if (bp->b_vp == vp)
|
|
|
|
continue;
|
|
|
|
/*
|
2002-10-22 01:23:00 +00:00
|
|
|
* Check to see if block needs to be copied. We do not have
|
|
|
|
* to hold the snapshot lock while doing this lookup as it
|
|
|
|
* will never require any additional allocations for the
|
|
|
|
* snapshot inode.
|
2000-07-11 22:07:57 +00:00
|
|
|
*/
|
2017-02-15 19:50:26 +00:00
|
|
|
if (lbn < UFS_NDADDR) {
|
2002-06-21 06:18:05 +00:00
|
|
|
blkno = DIP(ip, i_db[lbn]);
|
2000-07-11 22:07:57 +00:00
|
|
|
} else {
|
2005-09-30 01:30:01 +00:00
|
|
|
td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF;
|
2001-04-29 12:36:52 +00:00
|
|
|
error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
|
2002-10-16 00:19:23 +00:00
|
|
|
fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
|
2003-10-23 21:14:08 +00:00
|
|
|
td->td_pflags &= ~TDP_COWINPROGRESS;
|
2002-10-16 00:19:23 +00:00
|
|
|
if (error)
|
|
|
|
break;
|
2017-02-15 19:50:26 +00:00
|
|
|
indiroff = (lbn - UFS_NDADDR) % NINDIR(fs);
|
2016-09-17 16:47:34 +00:00
|
|
|
if (I_IS_UFS1(ip))
|
2002-06-21 06:18:05 +00:00
|
|
|
blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff];
|
|
|
|
else
|
|
|
|
blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff];
|
2000-07-24 05:28:33 +00:00
|
|
|
bqrelse(ibp);
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
2007-11-08 17:21:51 +00:00
|
|
|
#ifdef INVARIANTS
|
2000-07-11 22:07:57 +00:00
|
|
|
if (blkno == BLK_SNAP && bp->b_lblkno >= 0)
|
|
|
|
panic("ffs_copyonwrite: bad copy block");
|
|
|
|
#endif
|
2002-10-16 00:19:23 +00:00
|
|
|
if (blkno != 0)
|
2000-07-11 22:07:57 +00:00
|
|
|
continue;
|
|
|
|
/*
|
2002-10-22 01:23:00 +00:00
|
|
|
* Allocate the block into which to do the copy. Since
|
|
|
|
* multiple processes may all try to copy the same block,
|
|
|
|
* we have to recheck our need to do a copy if we sleep
|
|
|
|
* waiting for the lock.
|
|
|
|
*
|
|
|
|
* Because all snapshots on a filesystem share a single
|
|
|
|
* lock, we ensure that we will never be in competition
|
|
|
|
* with another process to allocate a block.
|
2000-07-11 22:07:57 +00:00
|
|
|
*/
|
2005-09-30 01:30:01 +00:00
|
|
|
td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF;
|
2001-04-29 12:36:52 +00:00
|
|
|
error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
|
2002-10-16 00:19:23 +00:00
|
|
|
fs->fs_bsize, KERNCRED, 0, &cbp);
|
2003-10-23 21:14:08 +00:00
|
|
|
td->td_pflags &= ~TDP_COWINPROGRESS;
|
2002-10-16 00:19:23 +00:00
|
|
|
if (error)
|
|
|
|
break;
|
2019-05-28 16:32:04 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
2000-07-11 22:07:57 +00:00
|
|
|
if (snapdebug) {
|
2012-09-27 23:30:49 +00:00
|
|
|
printf("Copyonwrite: snapino %ju lbn %jd for ",
|
|
|
|
(uintmax_t)ip->i_number, (intmax_t)lbn);
|
2001-03-07 07:09:55 +00:00
|
|
|
if (bp->b_vp == devvp)
|
2000-07-11 22:07:57 +00:00
|
|
|
printf("fs metadata");
|
|
|
|
else
|
2012-09-27 23:30:49 +00:00
|
|
|
printf("inum %ju",
|
|
|
|
(uintmax_t)VTOI(bp->b_vp)->i_number);
|
2002-06-23 18:17:27 +00:00
|
|
|
printf(" lblkno %jd to blkno %jd\n",
|
2002-06-21 06:18:05 +00:00
|
|
|
(intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno);
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*
|
|
|
|
* If we have already read the old block contents, then
|
2001-04-25 08:11:18 +00:00
|
|
|
* simply copy them to the new block. Note that we need
|
|
|
|
* to synchronously write snapshots that have not been
|
|
|
|
* unlinked, and hence will be visible after a crash,
|
2011-06-15 23:19:09 +00:00
|
|
|
* to ensure their integrity. At a minimum we ensure the
|
|
|
|
* integrity of the filesystem metadata, but use the
|
|
|
|
* dopersistence sysctl-setable flag to decide on the
|
|
|
|
* persistence needed for file content data.
|
2000-07-11 22:07:57 +00:00
|
|
|
*/
|
2016-04-10 21:48:11 +00:00
|
|
|
if (savedcbp != NULL) {
|
2000-07-11 22:07:57 +00:00
|
|
|
bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
|
|
|
|
bawrite(cbp);
|
2011-06-15 23:19:09 +00:00
|
|
|
if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
|
|
|
|
dopersistence) && ip->i_effnlink > 0)
|
2012-03-25 00:02:37 +00:00
|
|
|
(void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
|
2005-09-30 18:07:41 +00:00
|
|
|
else
|
|
|
|
launched_async_io = 1;
|
2000-07-11 22:07:57 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Otherwise, read the old block contents into the buffer.
|
|
|
|
*/
|
2004-09-13 07:29:45 +00:00
|
|
|
if ((error = readblock(vp, cbp, lbn)) != 0) {
|
2001-04-25 08:11:18 +00:00
|
|
|
bzero(cbp->b_data, fs->fs_bsize);
|
|
|
|
bawrite(cbp);
|
2011-06-15 23:19:09 +00:00
|
|
|
if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
|
|
|
|
dopersistence) && ip->i_effnlink > 0)
|
2012-03-25 00:02:37 +00:00
|
|
|
(void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
|
2005-09-30 18:07:41 +00:00
|
|
|
else
|
|
|
|
launched_async_io = 1;
|
2000-07-11 22:07:57 +00:00
|
|
|
break;
|
2001-04-25 08:11:18 +00:00
|
|
|
}
|
2000-07-11 22:07:57 +00:00
|
|
|
savedcbp = cbp;
|
|
|
|
}
|
2001-04-25 08:11:18 +00:00
|
|
|
/*
|
|
|
|
* Note that we need to synchronously write snapshots that
|
|
|
|
* have not been unlinked, and hence will be visible after
|
2011-06-15 23:19:09 +00:00
|
|
|
* a crash, to ensure their integrity. At a minimum we
|
|
|
|
* ensure the integrity of the filesystem metadata, but
|
|
|
|
* use the dopersistence sysctl-setable flag to decide on
|
|
|
|
* the persistence needed for file content data.
|
2001-04-25 08:11:18 +00:00
|
|
|
*/
|
|
|
|
if (savedcbp) {
|
|
|
|
vp = savedcbp->b_vp;
|
2000-07-11 22:07:57 +00:00
|
|
|
bawrite(savedcbp);
|
2011-06-15 23:19:09 +00:00
|
|
|
if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
|
|
|
|
dopersistence) && VTOI(vp)->i_effnlink > 0)
|
2012-03-25 00:02:37 +00:00
|
|
|
(void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
|
2005-09-30 18:07:41 +00:00
|
|
|
else
|
|
|
|
launched_async_io = 1;
|
2001-04-25 08:11:18 +00:00
|
|
|
}
|
2008-01-24 12:34:30 +00:00
|
|
|
lockmgr(vp->v_vnlock, LK_RELEASE, NULL);
|
2005-10-09 19:45:01 +00:00
|
|
|
td->td_pflags = (td->td_pflags & ~TDP_NORUNNINGBUF) |
|
|
|
|
prev_norunningbuf;
|
2005-10-01 19:10:48 +00:00
|
|
|
if (launched_async_io && (td->td_pflags & TDP_NORUNNINGBUF) == 0)
|
2005-09-30 18:07:41 +00:00
|
|
|
waitrunningbufspace();
|
2005-09-30 01:30:01 +00:00
|
|
|
/*
|
|
|
|
* I/O on bp will now be started, so count it in runningbufspace.
|
|
|
|
*/
|
2006-05-03 00:04:38 +00:00
|
|
|
if (saved_runningbufspace != 0) {
|
|
|
|
bp->b_runningbufspace = saved_runningbufspace;
|
Adjust some variables (mostly related to the buffer cache) that hold
address space sizes to be longs instead of ints. Specifically, the follow
values are now longs: runningbufspace, bufspace, maxbufspace,
bufmallocspace, maxbufmallocspace, lobufspace, hibufspace, lorunningspace,
hirunningspace, maxswzone, maxbcache, and maxpipekva. Previously, a
relatively small number (~ 44000) of buffers set in kern.nbuf would result
in integer overflows resulting either in hangs or bogus values of
hidirtybuffers and lodirtybuffers. Now one has to overflow a long to see
such problems. There was a check for a nbuf setting that would cause
overflows in the auto-tuning of nbuf. I've changed it to always check and
cap nbuf but warn if a user-supplied tunable would cause overflow.
Note that this changes the ABI of several sysctls that are used by things
like top(1), etc., so any MFC would probably require a some gross shims
to allow for that.
MFC after: 1 month
2009-03-09 19:35:20 +00:00
|
|
|
atomic_add_long(&runningbufspace, bp->b_runningbufspace);
|
2006-05-03 00:04:38 +00:00
|
|
|
}
|
2000-07-11 22:07:57 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2011-06-12 19:27:05 +00:00
|
|
|
/*
|
|
|
|
* sync snapshots to force freework records waiting on snapshots to claim
|
|
|
|
* blocks to free.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ffs_sync_snap(mp, waitfor)
|
|
|
|
struct mount *mp;
|
|
|
|
int waitfor;
|
|
|
|
{
|
|
|
|
struct snapdata *sn;
|
|
|
|
struct vnode *devvp;
|
|
|
|
struct vnode *vp;
|
|
|
|
struct inode *ip;
|
|
|
|
|
|
|
|
devvp = VFSTOUFS(mp)->um_devvp;
|
|
|
|
if ((devvp->v_vflag & VV_COPYONWRITE) == 0)
|
|
|
|
return;
|
|
|
|
for (;;) {
|
|
|
|
VI_LOCK(devvp);
|
|
|
|
sn = devvp->v_rdev->si_snapdata;
|
|
|
|
if (sn == NULL) {
|
|
|
|
VI_UNLOCK(devvp);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (lockmgr(&sn->sn_lock,
|
|
|
|
LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
|
|
|
|
VI_MTX(devvp)) == 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
|
|
|
|
vp = ITOV(ip);
|
2012-03-25 00:02:37 +00:00
|
|
|
ffs_syncvnode(vp, waitfor, NO_INO_UPDT);
|
2011-06-12 19:27:05 +00:00
|
|
|
}
|
|
|
|
lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
|
|
|
|
}
|
|
|
|
|
2000-07-11 22:07:57 +00:00
|
|
|
/*
|
|
|
|
* Read the specified block into the given buffer.
|
|
|
|
* Much of this boiler-plate comes from bwrite().
|
|
|
|
*/
|
|
|
|
static int
|
2004-09-13 07:29:45 +00:00
|
|
|
readblock(vp, bp, lbn)
|
|
|
|
struct vnode *vp;
|
2000-07-11 22:07:57 +00:00
|
|
|
struct buf *bp;
|
2002-06-21 06:18:05 +00:00
|
|
|
ufs2_daddr_t lbn;
|
2000-07-11 22:07:57 +00:00
|
|
|
{
|
2017-12-23 16:24:02 +00:00
|
|
|
struct inode *ip;
|
2004-10-29 10:15:56 +00:00
|
|
|
struct bio *bip;
|
2016-09-17 16:47:34 +00:00
|
|
|
struct fs *fs;
|
|
|
|
|
|
|
|
ip = VTOI(vp);
|
|
|
|
fs = ITOFS(ip);
|
2000-07-11 22:07:57 +00:00
|
|
|
|
2004-10-29 10:15:56 +00:00
|
|
|
bip = g_alloc_bio();
|
|
|
|
bip->bio_cmd = BIO_READ;
|
2016-09-17 16:47:34 +00:00
|
|
|
bip->bio_offset = dbtob(fsbtodb(fs, blkstofrags(fs, lbn)));
|
2004-10-29 10:15:56 +00:00
|
|
|
bip->bio_data = bp->b_data;
|
|
|
|
bip->bio_length = bp->b_bcount;
|
2006-05-05 10:06:22 +00:00
|
|
|
bip->bio_done = NULL;
|
2004-10-29 10:15:56 +00:00
|
|
|
|
2016-09-17 16:47:34 +00:00
|
|
|
g_io_request(bip, ITODEVVP(ip)->v_bufobj.bo_private);
|
2006-05-05 10:06:22 +00:00
|
|
|
bp->b_error = biowait(bip, "snaprdb");
|
2004-10-29 10:15:56 +00:00
|
|
|
g_destroy_bio(bip);
|
|
|
|
return (bp->b_error);
|
2000-07-11 22:07:57 +00:00
|
|
|
}
|
2006-01-06 04:44:09 +00:00
|
|
|
|
2008-09-16 11:51:06 +00:00
|
|
|
#endif
|
|
|
|
|
2006-03-11 01:08:37 +00:00
|
|
|
/*
|
|
|
|
* Process file deletes that were deferred by ufs_inactive() due to
|
2006-10-10 09:20:54 +00:00
|
|
|
* the file system being suspended. Transfer IN_LAZYACCESS into
|
|
|
|
* IN_MODIFIED for vnodes that were accessed during suspension.
|
2006-03-11 01:08:37 +00:00
|
|
|
*/
|
2008-09-16 11:51:06 +00:00
|
|
|
void
|
2006-03-11 01:08:37 +00:00
|
|
|
process_deferred_inactive(struct mount *mp)
|
|
|
|
{
|
|
|
|
struct vnode *vp, *mvp;
|
2006-10-10 09:20:54 +00:00
|
|
|
struct inode *ip;
|
2006-03-11 01:08:37 +00:00
|
|
|
int error;
|
|
|
|
|
|
|
|
(void) vn_start_secondary_write(NULL, &mp, V_WAIT);
|
|
|
|
loop:
|
2012-04-17 16:28:22 +00:00
|
|
|
MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
|
2006-10-10 09:20:54 +00:00
|
|
|
/*
|
|
|
|
* IN_LAZYACCESS is checked here without holding any
|
|
|
|
* vnode lock, but this flag is set only while holding
|
|
|
|
* vnode interlock.
|
|
|
|
*/
|
2012-04-17 16:28:22 +00:00
|
|
|
if (vp->v_type == VNON ||
|
2006-10-10 09:20:54 +00:00
|
|
|
((VTOI(vp)->i_flag & IN_LAZYACCESS) == 0 &&
|
2012-04-17 16:28:22 +00:00
|
|
|
((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0))) {
|
2006-03-11 01:08:37 +00:00
|
|
|
VI_UNLOCK(vp);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
vholdl(vp);
|
2021-01-30 21:17:29 +02:00
|
|
|
retry_vnode:
|
2008-01-10 01:10:58 +00:00
|
|
|
error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK);
|
2006-03-11 01:08:37 +00:00
|
|
|
if (error != 0) {
|
|
|
|
vdrop(vp);
|
|
|
|
if (error == ENOENT)
|
|
|
|
continue; /* vnode recycled */
|
2012-04-17 16:28:22 +00:00
|
|
|
MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
|
2006-03-11 01:08:37 +00:00
|
|
|
goto loop;
|
|
|
|
}
|
2006-10-10 09:20:54 +00:00
|
|
|
ip = VTOI(vp);
|
|
|
|
if ((ip->i_flag & IN_LAZYACCESS) != 0) {
|
|
|
|
ip->i_flag &= ~IN_LAZYACCESS;
|
2020-01-13 02:31:51 +00:00
|
|
|
UFS_INODE_SET_FLAG(ip, IN_MODIFIED);
|
2006-10-10 09:20:54 +00:00
|
|
|
}
|
2006-03-11 01:08:37 +00:00
|
|
|
VI_LOCK(vp);
|
2021-01-30 21:17:29 +02:00
|
|
|
error = vinactive(vp);
|
|
|
|
if (error == ERELOOKUP && vp->v_usecount == 0) {
|
|
|
|
VI_UNLOCK(vp);
|
|
|
|
VOP_UNLOCK(vp);
|
|
|
|
goto retry_vnode;
|
|
|
|
}
|
2006-03-11 01:08:37 +00:00
|
|
|
VI_UNLOCK(vp);
|
2020-01-03 22:29:58 +00:00
|
|
|
VOP_UNLOCK(vp);
|
2006-03-11 01:08:37 +00:00
|
|
|
vdrop(vp);
|
|
|
|
}
|
|
|
|
vn_finished_secondary_write(mp);
|
|
|
|
}
|
2006-05-02 23:52:43 +00:00
|
|
|
|
2008-09-16 11:51:06 +00:00
|
|
|
#ifndef NO_FFS_SNAPSHOT
|
|
|
|
|
2008-03-31 07:47:08 +00:00
|
|
|
static struct snapdata *
|
|
|
|
ffs_snapdata_alloc(void)
|
|
|
|
{
|
|
|
|
struct snapdata *sn;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fetch a snapdata from the free list if there is one available.
|
|
|
|
*/
|
|
|
|
mtx_lock(&snapfree_lock);
|
|
|
|
sn = LIST_FIRST(&snapfree);
|
|
|
|
if (sn != NULL)
|
|
|
|
LIST_REMOVE(sn, sn_link);
|
|
|
|
mtx_unlock(&snapfree_lock);
|
|
|
|
if (sn != NULL)
|
|
|
|
return (sn);
|
|
|
|
/*
|
|
|
|
* If there were no free snapdatas allocate one.
|
|
|
|
*/
|
|
|
|
sn = malloc(sizeof *sn, M_UFSMNT, M_WAITOK | M_ZERO);
|
|
|
|
TAILQ_INIT(&sn->sn_head);
|
|
|
|
lockinit(&sn->sn_lock, PVFS, "snaplk", VLKTIMEOUT,
|
|
|
|
LK_CANRECURSE | LK_NOSHARE);
|
|
|
|
return (sn);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The snapdata is never freed because we can not be certain that
|
|
|
|
* there are no threads sleeping on the snap lock. Persisting
|
|
|
|
* them permanently avoids costly synchronization in ffs_lock().
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
ffs_snapdata_free(struct snapdata *sn)
|
|
|
|
{
|
|
|
|
mtx_lock(&snapfree_lock);
|
|
|
|
LIST_INSERT_HEAD(&snapfree, sn, sn_link);
|
|
|
|
mtx_unlock(&snapfree_lock);
|
|
|
|
}
|
|
|
|
|
2006-05-02 23:52:43 +00:00
|
|
|
/* Try to free snapdata associated with devvp */
|
|
|
|
static void
|
2008-03-31 07:47:08 +00:00
|
|
|
try_free_snapdata(struct vnode *devvp)
|
2006-05-02 23:52:43 +00:00
|
|
|
{
|
|
|
|
struct snapdata *sn;
|
|
|
|
ufs2_daddr_t *snapblklist;
|
|
|
|
|
2008-03-31 07:47:08 +00:00
|
|
|
ASSERT_VI_LOCKED(devvp, "try_free_snapdata");
|
2006-05-02 23:52:43 +00:00
|
|
|
sn = devvp->v_rdev->si_snapdata;
|
|
|
|
|
|
|
|
if (sn == NULL || TAILQ_FIRST(&sn->sn_head) != NULL ||
|
|
|
|
(devvp->v_vflag & VV_COPYONWRITE) == 0) {
|
|
|
|
VI_UNLOCK(devvp);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
devvp->v_rdev->si_snapdata = NULL;
|
|
|
|
devvp->v_vflag &= ~VV_COPYONWRITE;
|
2008-03-31 07:47:08 +00:00
|
|
|
lockmgr(&sn->sn_lock, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp));
|
2006-05-02 23:52:43 +00:00
|
|
|
snapblklist = sn->sn_blklist;
|
|
|
|
sn->sn_blklist = NULL;
|
|
|
|
sn->sn_listsize = 0;
|
2008-01-24 12:34:30 +00:00
|
|
|
lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
|
2006-05-02 23:52:43 +00:00
|
|
|
if (snapblklist != NULL)
|
2008-10-23 15:53:51 +00:00
|
|
|
free(snapblklist, M_UFSMNT);
|
2008-03-31 07:47:08 +00:00
|
|
|
ffs_snapdata_free(sn);
|
2006-05-02 23:52:43 +00:00
|
|
|
}
|
2008-03-31 07:47:08 +00:00
|
|
|
|
|
|
|
static struct snapdata *
|
|
|
|
ffs_snapdata_acquire(struct vnode *devvp)
|
|
|
|
{
|
2017-07-21 18:42:35 +00:00
|
|
|
struct snapdata *nsn, *sn;
|
|
|
|
int error;
|
2008-03-31 07:47:08 +00:00
|
|
|
|
|
|
|
/*
|
2014-03-02 02:52:34 +00:00
|
|
|
* Allocate a free snapdata. This is done before acquiring the
|
2008-03-31 07:47:08 +00:00
|
|
|
* devvp lock to avoid allocation while the devvp interlock is
|
|
|
|
* held.
|
|
|
|
*/
|
|
|
|
nsn = ffs_snapdata_alloc();
|
2017-07-21 18:42:35 +00:00
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
VI_LOCK(devvp);
|
|
|
|
sn = devvp->v_rdev->si_snapdata;
|
|
|
|
if (sn == NULL) {
|
|
|
|
/*
|
|
|
|
* This is the first snapshot on this
|
|
|
|
* filesystem and we use our pre-allocated
|
|
|
|
* snapdata. Publish sn with the sn_lock
|
|
|
|
* owned by us, to avoid the race.
|
|
|
|
*/
|
|
|
|
error = lockmgr(&nsn->sn_lock, LK_EXCLUSIVE |
|
|
|
|
LK_NOWAIT, NULL);
|
|
|
|
if (error != 0)
|
|
|
|
panic("leaked sn, lockmgr error %d", error);
|
|
|
|
sn = devvp->v_rdev->si_snapdata = nsn;
|
|
|
|
VI_UNLOCK(devvp);
|
|
|
|
nsn = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* There is a snapshots which already exists on this
|
|
|
|
* filesystem, grab a reference to the common lock.
|
|
|
|
*/
|
|
|
|
error = lockmgr(&sn->sn_lock, LK_INTERLOCK |
|
|
|
|
LK_EXCLUSIVE | LK_SLEEPFAIL, VI_MTX(devvp));
|
|
|
|
if (error == 0)
|
|
|
|
break;
|
2008-03-31 07:47:08 +00:00
|
|
|
}
|
2017-07-21 18:42:35 +00:00
|
|
|
|
2008-03-31 07:47:08 +00:00
|
|
|
/*
|
|
|
|
* Free any unused snapdata.
|
|
|
|
*/
|
|
|
|
if (nsn != NULL)
|
|
|
|
ffs_snapdata_free(nsn);
|
|
|
|
|
|
|
|
return (sn);
|
|
|
|
}
|
|
|
|
|
2006-01-06 04:44:09 +00:00
|
|
|
#endif
|