freebsd-dev/module/zfs/zpl_ctldir.c
Brian Behlendorf 7b3e34ba5a Fix 'zfs rollback' on mounted file systems
Rolling back a mounted filesystem with open file handles and
cached dentries+inodes never worked properly in ZoL.  The
major issue was that Linux provides no easy mechanism for
modules to invalidate the inode cache for a file system.

Because of this it was possible that an inode from the previous
filesystem would not get properly dropped from the cache during
rolling back.  Then a new inode with the same inode number would
be create and collide with the existing cached inode.  Ideally
this would trigger an VERIFY() but in practice the error wasn't
handled and it would just NULL reference.

Luckily, this issue can be resolved by sprucing up the existing
Solaris zfs_rezget() functionality for the Linux VFS.

The way it works now is that when a file system is rolled back
all the cached inodes will be traversed and refetched from disk.
If a version of the cached inode exists on disk the in-core
copy will be updated accordingly.  If there is no match for that
object on disk it will be unhashed from the inode cache and
marked as stale.

This will effectively make the inode unfindable for lookups
allowing the inode number to be immediately recycled.  The inode
will then only be accessible from the cached dentries.  Subsequent
dentry lookups which reference a stale inode will result in the
dentry being invalidated.  Once invalidated the dentry will drop
its reference on the inode allowing it to be safely pruned from
the cache.

Special care is taken for negative dentries since they do not
reference any inode.  These dentires will be invalidate based
on when they were added to the dentry cache.  Entries added
before the last rollback will be invalidate to prevent them
from masking real files in the dataset.

Two nice side effects of this fix are:

* Removes the dependency on spl_invalidate_inodes(), it can now
  be safely removed from the SPL when we choose to do so.

* zfs_znode_alloc() no longer requires a dentry to be passed.
  This effectively reverts this portition of the code to its
  upstream counterpart.  The dentry is not instantiated more
  correctly in the Linux ZPL layer.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ned Bass <bass6@llnl.gov>
Closes #795
2013-01-17 09:51:20 -08:00

543 lines
12 KiB
C

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (C) 2011 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* LLNL-CODE-403049.
* Rewritten for Linux by:
* Rohan Puri <rohan.puri15@gmail.com>
* Brian Behlendorf <behlendorf1@llnl.gov>
*/
#include <sys/zfs_vfsops.h>
#include <sys/zfs_vnops.h>
#include <sys/zfs_znode.h>
#include <sys/zfs_ctldir.h>
#include <sys/zpl.h>
/*
* Common open routine. Disallow any write access.
*/
/* ARGSUSED */
static int
zpl_common_open(struct inode *ip, struct file *filp)
{
if (filp->f_mode & FMODE_WRITE)
return (-EACCES);
return generic_file_open(ip, filp);
}
static int
zpl_common_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
struct dentry *dentry = filp->f_path.dentry;
struct inode *ip = dentry->d_inode;
int error = 0;
switch (filp->f_pos) {
case 0:
error = filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR);
if (error)
break;
filp->f_pos++;
/* fall-thru */
case 1:
error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR);
if (error)
break;
filp->f_pos++;
/* fall-thru */
default:
break;
}
return (error);
}
/*
* Get root directory contents.
*/
static int
zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
struct dentry *dentry = filp->f_path.dentry;
struct inode *ip = dentry->d_inode;
zfs_sb_t *zsb = ITOZSB(ip);
int error = 0;
ZFS_ENTER(zsb);
switch (filp->f_pos) {
case 0:
error = filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR);
if (error)
goto out;
filp->f_pos++;
/* fall-thru */
case 1:
error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR);
if (error)
goto out;
filp->f_pos++;
/* fall-thru */
case 2:
error = filldir(dirent, ZFS_SNAPDIR_NAME,
strlen(ZFS_SNAPDIR_NAME), 2, ZFSCTL_INO_SNAPDIR, DT_DIR);
if (error)
goto out;
filp->f_pos++;
/* fall-thru */
case 3:
error = filldir(dirent, ZFS_SHAREDIR_NAME,
strlen(ZFS_SHAREDIR_NAME), 3, ZFSCTL_INO_SHARES, DT_DIR);
if (error)
goto out;
filp->f_pos++;
/* fall-thru */
}
out:
ZFS_EXIT(zsb);
return (error);
}
/*
* Get root directory attributes.
*/
/* ARGSUSED */
static int
zpl_root_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
int error;
error = simple_getattr(mnt, dentry, stat);
stat->atime = CURRENT_TIME;
return (error);
}
static struct dentry *
#ifdef HAVE_LOOKUP_NAMEIDATA
zpl_root_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd)
#else
zpl_root_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags)
#endif
{
cred_t *cr = CRED();
struct inode *ip;
int error;
crhold(cr);
error = -zfsctl_root_lookup(dip, dname(dentry), &ip, 0, cr, NULL, NULL);
ASSERT3S(error, <=, 0);
crfree(cr);
if (error) {
if (error == -ENOENT)
return d_splice_alias(NULL, dentry);
else
return ERR_PTR(error);
}
return d_splice_alias(ip, dentry);
}
/*
* The '.zfs' control directory file and inode operations.
*/
const struct file_operations zpl_fops_root = {
.open = zpl_common_open,
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = zpl_root_readdir,
};
const struct inode_operations zpl_ops_root = {
.lookup = zpl_root_lookup,
.getattr = zpl_root_getattr,
};
#ifdef HAVE_AUTOMOUNT
static struct vfsmount *
zpl_snapdir_automount(struct path *path)
{
struct dentry *dentry = path->dentry;
int error;
/*
* We must briefly disable automounts for this dentry because the
* user space mount utility will trigger another lookup on this
* directory. That will result in zpl_snapdir_automount() being
* called repeatedly. The DCACHE_NEED_AUTOMOUNT flag can be
* safely reset once the mount completes.
*/
dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT;
error = -zfsctl_mount_snapshot(path, 0);
dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
if (error)
return ERR_PTR(error);
/*
* Rather than returning the new vfsmount for the snapshot we must
* return NULL to indicate a mount collision. This is done because
* the user space mount calls do_add_mount() which adds the vfsmount
* to the name space. If we returned the new mount here it would be
* added again to the vfsmount list resulting in list corruption.
*/
return (NULL);
}
#endif /* HAVE_AUTOMOUNT */
/*
* Revalidate any dentry in the snapshot directory on lookup, since a snapshot
* having the same name have been created or destroyed since it was cached.
*/
static int
#ifdef HAVE_D_REVALIDATE_NAMEIDATA
zpl_snapdir_revalidate(struct dentry *dentry, struct nameidata *i)
#else
zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags)
#endif
{
return 0;
}
dentry_operations_t zpl_dops_snapdirs = {
/*
* Auto mounting of snapshots is only supported for 2.6.37 and
* newer kernels. Prior to this kernel the ops->follow_link()
* callback was used as a hack to trigger the mount. The
* resulting vfsmount was then explicitly grafted in to the
* name space. While it might be possible to add compatibility
* code to accomplish this it would require considerable care.
*/
#ifdef HAVE_AUTOMOUNT
.d_automount = zpl_snapdir_automount,
#endif /* HAVE_AUTOMOUNT */
.d_revalidate = zpl_snapdir_revalidate,
};
static struct dentry *
#ifdef HAVE_LOOKUP_NAMEIDATA
zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
struct nameidata *nd)
#else
zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
unsigned int flags)
#endif
{
cred_t *cr = CRED();
struct inode *ip = NULL;
int error;
crhold(cr);
error = -zfsctl_snapdir_lookup(dip, dname(dentry), &ip,
0, cr, NULL, NULL);
ASSERT3S(error, <=, 0);
crfree(cr);
if (error && error != -ENOENT)
return ERR_PTR(error);
ASSERT(error == 0 || ip == NULL);
d_set_d_op(dentry, &zpl_dops_snapdirs);
return d_splice_alias(ip, dentry);
}
/* ARGSUSED */
static int
zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
struct dentry *dentry = filp->f_path.dentry;
struct inode *dip = dentry->d_inode;
zfs_sb_t *zsb = ITOZSB(dip);
char snapname[MAXNAMELEN];
uint64_t id, cookie;
boolean_t case_conflict;
int error = 0;
ZFS_ENTER(zsb);
cookie = filp->f_pos;
switch (filp->f_pos) {
case 0:
error = filldir(dirent, ".", 1, 0, dip->i_ino, DT_DIR);
if (error)
goto out;
filp->f_pos++;
/* fall-thru */
case 1:
error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR);
if (error)
goto out;
filp->f_pos++;
/* fall-thru */
default:
while (error == 0) {
error = -dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN,
snapname, &id, &cookie, &case_conflict);
if (error)
goto out;
error = filldir(dirent, snapname, strlen(snapname),
filp->f_pos, ZFSCTL_INO_SHARES - id, DT_DIR);
if (error)
goto out;
filp->f_pos = cookie;
}
}
out:
ZFS_EXIT(zsb);
if (error == -ENOENT)
return (0);
return (error);
}
int
zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
struct inode *tdip, struct dentry *tdentry)
{
cred_t *cr = CRED();
int error;
crhold(cr);
error = -zfsctl_snapdir_rename(sdip, dname(sdentry),
tdip, dname(tdentry), cr, 0);
ASSERT3S(error, <=, 0);
crfree(cr);
return (error);
}
static int
zpl_snapdir_rmdir(struct inode *dip, struct dentry *dentry)
{
cred_t *cr = CRED();
int error;
crhold(cr);
error = -zfsctl_snapdir_remove(dip, dname(dentry), cr, 0);
ASSERT3S(error, <=, 0);
crfree(cr);
return (error);
}
static int
zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, zpl_umode_t mode)
{
cred_t *cr = CRED();
vattr_t *vap;
struct inode *ip;
int error;
crhold(cr);
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
zpl_vap_init(vap, dip, mode | S_IFDIR, cr);
error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
if (error == 0) {
d_set_d_op(dentry, &zpl_dops_snapdirs);
d_instantiate(dentry, ip);
}
kmem_free(vap, sizeof(vattr_t));
ASSERT3S(error, <=, 0);
crfree(cr);
return (error);
}
/*
* Get snapshot directory attributes.
*/
/* ARGSUSED */
static int
zpl_snapdir_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
zfs_sb_t *zsb = ITOZSB(dentry->d_inode);
int error;
ZFS_ENTER(zsb);
error = simple_getattr(mnt, dentry, stat);
stat->nlink = stat->size = avl_numnodes(&zsb->z_ctldir_snaps) + 2;
stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zsb->z_os);
stat->atime = CURRENT_TIME;
ZFS_EXIT(zsb);
return (error);
}
/*
* The '.zfs/snapshot' directory file operations. These mainly control
* generating the list of available snapshots when doing an 'ls' in the
* directory. See zpl_snapdir_readdir().
*/
const struct file_operations zpl_fops_snapdir = {
.open = zpl_common_open,
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = zpl_snapdir_readdir,
};
/*
* The '.zfs/snapshot' directory inode operations. These mainly control
* creating an inode for a snapshot directory and initializing the needed
* infrastructure to automount the snapshot. See zpl_snapdir_lookup().
*/
const struct inode_operations zpl_ops_snapdir = {
.lookup = zpl_snapdir_lookup,
.getattr = zpl_snapdir_getattr,
.rename = zpl_snapdir_rename,
.rmdir = zpl_snapdir_rmdir,
.mkdir = zpl_snapdir_mkdir,
};
static struct dentry *
#ifdef HAVE_LOOKUP_NAMEIDATA
zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
struct nameidata *nd)
#else
zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
unsigned int flags)
#endif
{
cred_t *cr = CRED();
struct inode *ip = NULL;
int error;
crhold(cr);
error = -zfsctl_shares_lookup(dip, dname(dentry), &ip,
0, cr, NULL, NULL);
ASSERT3S(error, <=, 0);
crfree(cr);
if (error) {
if (error == -ENOENT)
return d_splice_alias(NULL, dentry);
else
return ERR_PTR(error);
}
return d_splice_alias(ip, dentry);
}
/* ARGSUSED */
static int
zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
cred_t *cr = CRED();
struct dentry *dentry = filp->f_path.dentry;
struct inode *ip = dentry->d_inode;
zfs_sb_t *zsb = ITOZSB(ip);
znode_t *dzp;
int error;
ZFS_ENTER(zsb);
if (zsb->z_shares_dir == 0) {
error = zpl_common_readdir(filp, dirent, filldir);
ZFS_EXIT(zsb);
return (error);
}
error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp);
if (error) {
ZFS_EXIT(zsb);
return (error);
}
crhold(cr);
error = -zfs_readdir(ZTOI(dzp), dirent, filldir, &filp->f_pos, cr);
crfree(cr);
iput(ZTOI(dzp));
ZFS_EXIT(zsb);
ASSERT3S(error, <=, 0);
return (error);
}
/* ARGSUSED */
static int
zpl_shares_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
struct inode *ip = dentry->d_inode;
zfs_sb_t *zsb = ITOZSB(ip);
znode_t *dzp;
int error;
ZFS_ENTER(zsb);
if (zsb->z_shares_dir == 0) {
error = simple_getattr(mnt, dentry, stat);
stat->nlink = stat->size = 2;
stat->atime = CURRENT_TIME;
ZFS_EXIT(zsb);
return (error);
}
error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp);
if (error == 0)
error = -zfs_getattr_fast(dentry->d_inode, stat);
iput(ZTOI(dzp));
ZFS_EXIT(zsb);
ASSERT3S(error, <=, 0);
return (error);
}
/*
* The '.zfs/shares' directory file operations.
*/
const struct file_operations zpl_fops_shares = {
.open = zpl_common_open,
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = zpl_shares_readdir,
};
/*
* The '.zfs/shares' directory inode operations.
*/
const struct inode_operations zpl_ops_shares = {
.lookup = zpl_shares_lookup,
.getattr = zpl_shares_getattr,
};