freebsd-nq/module/zfs/zpl_ctldir.c

533 lines
12 KiB
C
Raw Normal View History

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (C) 2011 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* LLNL-CODE-403049.
* Rewritten for Linux by:
* Rohan Puri <rohan.puri15@gmail.com>
* Brian Behlendorf <behlendorf1@llnl.gov>
*/
#include <sys/zfs_vfsops.h>
#include <sys/zfs_vnops.h>
#include <sys/zfs_znode.h>
#include <sys/zfs_ctldir.h>
#include <sys/zpl.h>
/*
* Common open routine. Disallow any write access.
*/
/* ARGSUSED */
static int
zpl_common_open(struct inode *ip, struct file *filp)
{
if (filp->f_mode & FMODE_WRITE)
return (-EACCES);
return (generic_file_open(ip, filp));
}
/*
* Get root directory contents.
*/
static int
zpl_root_iterate(struct file *filp, struct dir_context *ctx)
{
zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode);
int error = 0;
ZFS_ENTER(zsb);
if (!dir_emit_dots(filp, ctx))
goto out;
if (ctx->pos == 2) {
if (!dir_emit(ctx, ZFS_SNAPDIR_NAME, strlen(ZFS_SNAPDIR_NAME),
ZFSCTL_INO_SNAPDIR, DT_DIR))
goto out;
ctx->pos++;
}
if (ctx->pos == 3) {
if (!dir_emit(ctx, ZFS_SHAREDIR_NAME, strlen(ZFS_SHAREDIR_NAME),
ZFSCTL_INO_SHARES, DT_DIR))
goto out;
ctx->pos++;
}
out:
ZFS_EXIT(zsb);
return (error);
}
#if !defined(HAVE_VFS_ITERATE)
static int
zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
int error;
error = zpl_root_iterate(filp, &ctx);
filp->f_pos = ctx.pos;
return (error);
}
#endif /* HAVE_VFS_ITERATE */
/*
* Get root directory attributes.
*/
/* ARGSUSED */
static int
zpl_root_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
int error;
error = simple_getattr(mnt, dentry, stat);
stat->atime = CURRENT_TIME;
return (error);
}
static struct dentry *
#ifdef HAVE_LOOKUP_NAMEIDATA
zpl_root_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd)
#else
zpl_root_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags)
#endif
{
cred_t *cr = CRED();
struct inode *ip;
int error;
crhold(cr);
error = -zfsctl_root_lookup(dip, dname(dentry), &ip, 0, cr, NULL, NULL);
ASSERT3S(error, <=, 0);
crfree(cr);
if (error) {
if (error == -ENOENT)
return (d_splice_alias(NULL, dentry));
else
return (ERR_PTR(error));
}
return (d_splice_alias(ip, dentry));
}
/*
* The '.zfs' control directory file and inode operations.
*/
const struct file_operations zpl_fops_root = {
.open = zpl_common_open,
.llseek = generic_file_llseek,
.read = generic_read_dir,
#ifdef HAVE_VFS_ITERATE
.iterate = zpl_root_iterate,
#else
.readdir = zpl_root_readdir,
#endif
};
const struct inode_operations zpl_ops_root = {
.lookup = zpl_root_lookup,
.getattr = zpl_root_getattr,
};
#ifdef HAVE_AUTOMOUNT
static struct vfsmount *
zpl_snapdir_automount(struct path *path)
{
struct dentry *dentry = path->dentry;
int error;
/*
* We must briefly disable automounts for this dentry because the
* user space mount utility will trigger another lookup on this
* directory. That will result in zpl_snapdir_automount() being
* called repeatedly. The DCACHE_NEED_AUTOMOUNT flag can be
* safely reset once the mount completes.
*/
dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT;
error = -zfsctl_mount_snapshot(path, 0);
dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
if (error)
return (ERR_PTR(error));
/*
* Rather than returning the new vfsmount for the snapshot we must
* return NULL to indicate a mount collision. This is done because
* the user space mount calls do_add_mount() which adds the vfsmount
* to the name space. If we returned the new mount here it would be
* added again to the vfsmount list resulting in list corruption.
*/
return (NULL);
}
#endif /* HAVE_AUTOMOUNT */
/*
* Revalidate any dentry in the snapshot directory on lookup, since a snapshot
* having the same name have been created or destroyed since it was cached.
*/
static int
#ifdef HAVE_D_REVALIDATE_NAMEIDATA
zpl_snapdir_revalidate(struct dentry *dentry, struct nameidata *i)
#else
zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags)
#endif
{
return (0);
}
dentry_operations_t zpl_dops_snapdirs = {
/*
* Auto mounting of snapshots is only supported for 2.6.37 and
* newer kernels. Prior to this kernel the ops->follow_link()
* callback was used as a hack to trigger the mount. The
* resulting vfsmount was then explicitly grafted in to the
* name space. While it might be possible to add compatibility
* code to accomplish this it would require considerable care.
*/
#ifdef HAVE_AUTOMOUNT
.d_automount = zpl_snapdir_automount,
#endif /* HAVE_AUTOMOUNT */
.d_revalidate = zpl_snapdir_revalidate,
};
static struct dentry *
#ifdef HAVE_LOOKUP_NAMEIDATA
zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
struct nameidata *nd)
#else
zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
unsigned int flags)
#endif
{
cred_t *cr = CRED();
struct inode *ip = NULL;
int error;
crhold(cr);
error = -zfsctl_snapdir_lookup(dip, dname(dentry), &ip,
0, cr, NULL, NULL);
ASSERT3S(error, <=, 0);
crfree(cr);
if (error && error != -ENOENT)
return (ERR_PTR(error));
ASSERT(error == 0 || ip == NULL);
d_clear_d_op(dentry);
d_set_d_op(dentry, &zpl_dops_snapdirs);
return (d_splice_alias(ip, dentry));
}
static int
zpl_snapdir_iterate(struct file *filp, struct dir_context *ctx)
{
zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode);
char snapname[MAXNAMELEN];
boolean_t case_conflict;
uint64_t id;
int error = 0;
ZFS_ENTER(zsb);
if (!dir_emit_dots(filp, ctx))
goto out;
while (error == 0) {
dsl_pool_config_enter(dmu_objset_pool(zsb->z_os), FTAG);
error = -dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN,
snapname, &id, &ctx->pos, &case_conflict);
dsl_pool_config_exit(dmu_objset_pool(zsb->z_os), FTAG);
if (error)
goto out;
if (!dir_emit(ctx, snapname, strlen(snapname),
ZFSCTL_INO_SHARES - id, DT_DIR))
goto out;
}
out:
ZFS_EXIT(zsb);
if (error == -ENOENT)
return (0);
return (error);
}
#if !defined(HAVE_VFS_ITERATE)
static int
zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
int error;
error = zpl_snapdir_iterate(filp, &ctx);
filp->f_pos = ctx.pos;
return (error);
}
#endif /* HAVE_VFS_ITERATE */
int
zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
struct inode *tdip, struct dentry *tdentry)
{
cred_t *cr = CRED();
int error;
crhold(cr);
error = -zfsctl_snapdir_rename(sdip, dname(sdentry),
tdip, dname(tdentry), cr, 0);
ASSERT3S(error, <=, 0);
crfree(cr);
return (error);
}
static int
zpl_snapdir_rmdir(struct inode *dip, struct dentry *dentry)
{
cred_t *cr = CRED();
int error;
crhold(cr);
error = -zfsctl_snapdir_remove(dip, dname(dentry), cr, 0);
ASSERT3S(error, <=, 0);
crfree(cr);
return (error);
}
static int
zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, zpl_umode_t mode)
{
cred_t *cr = CRED();
vattr_t *vap;
struct inode *ip;
int error;
crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
Fix 'zfs rollback' on mounted file systems Rolling back a mounted filesystem with open file handles and cached dentries+inodes never worked properly in ZoL. The major issue was that Linux provides no easy mechanism for modules to invalidate the inode cache for a file system. Because of this it was possible that an inode from the previous filesystem would not get properly dropped from the cache during rolling back. Then a new inode with the same inode number would be create and collide with the existing cached inode. Ideally this would trigger an VERIFY() but in practice the error wasn't handled and it would just NULL reference. Luckily, this issue can be resolved by sprucing up the existing Solaris zfs_rezget() functionality for the Linux VFS. The way it works now is that when a file system is rolled back all the cached inodes will be traversed and refetched from disk. If a version of the cached inode exists on disk the in-core copy will be updated accordingly. If there is no match for that object on disk it will be unhashed from the inode cache and marked as stale. This will effectively make the inode unfindable for lookups allowing the inode number to be immediately recycled. The inode will then only be accessible from the cached dentries. Subsequent dentry lookups which reference a stale inode will result in the dentry being invalidated. Once invalidated the dentry will drop its reference on the inode allowing it to be safely pruned from the cache. Special care is taken for negative dentries since they do not reference any inode. These dentires will be invalidate based on when they were added to the dentry cache. Entries added before the last rollback will be invalidate to prevent them from masking real files in the dataset. Two nice side effects of this fix are: * Removes the dependency on spl_invalidate_inodes(), it can now be safely removed from the SPL when we choose to do so. * zfs_znode_alloc() no longer requires a dentry to be passed. This effectively reverts this portition of the code to its upstream counterpart. The dentry is not instantiated more correctly in the Linux ZPL layer. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Ned Bass <bass6@llnl.gov> Closes #795
2013-01-15 16:41:09 -08:00
zpl_vap_init(vap, dip, mode | S_IFDIR, cr);
error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
if (error == 0) {
d_clear_d_op(dentry);
d_set_d_op(dentry, &zpl_dops_snapdirs);
d_instantiate(dentry, ip);
}
kmem_free(vap, sizeof (vattr_t));
ASSERT3S(error, <=, 0);
crfree(cr);
return (error);
}
/*
* Get snapshot directory attributes.
*/
/* ARGSUSED */
static int
zpl_snapdir_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
zfs_sb_t *zsb = ITOZSB(dentry->d_inode);
int error;
ZFS_ENTER(zsb);
error = simple_getattr(mnt, dentry, stat);
stat->nlink = stat->size = avl_numnodes(&zsb->z_ctldir_snaps) + 2;
stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zsb->z_os);
stat->atime = CURRENT_TIME;
ZFS_EXIT(zsb);
return (error);
}
/*
* The '.zfs/snapshot' directory file operations. These mainly control
* generating the list of available snapshots when doing an 'ls' in the
* directory. See zpl_snapdir_readdir().
*/
const struct file_operations zpl_fops_snapdir = {
.open = zpl_common_open,
.llseek = generic_file_llseek,
.read = generic_read_dir,
#ifdef HAVE_VFS_ITERATE
.iterate = zpl_snapdir_iterate,
#else
.readdir = zpl_snapdir_readdir,
#endif
};
/*
* The '.zfs/snapshot' directory inode operations. These mainly control
* creating an inode for a snapshot directory and initializing the needed
* infrastructure to automount the snapshot. See zpl_snapdir_lookup().
*/
const struct inode_operations zpl_ops_snapdir = {
.lookup = zpl_snapdir_lookup,
.getattr = zpl_snapdir_getattr,
.rename = zpl_snapdir_rename,
.rmdir = zpl_snapdir_rmdir,
.mkdir = zpl_snapdir_mkdir,
};
static struct dentry *
#ifdef HAVE_LOOKUP_NAMEIDATA
zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
struct nameidata *nd)
#else
zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
unsigned int flags)
#endif
{
cred_t *cr = CRED();
struct inode *ip = NULL;
int error;
crhold(cr);
error = -zfsctl_shares_lookup(dip, dname(dentry), &ip,
0, cr, NULL, NULL);
ASSERT3S(error, <=, 0);
crfree(cr);
if (error) {
if (error == -ENOENT)
return (d_splice_alias(NULL, dentry));
else
return (ERR_PTR(error));
}
return (d_splice_alias(ip, dentry));
}
static int
zpl_shares_iterate(struct file *filp, struct dir_context *ctx)
{
cred_t *cr = CRED();
zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode);
znode_t *dzp;
int error = 0;
ZFS_ENTER(zsb);
if (zsb->z_shares_dir == 0) {
dir_emit_dots(filp, ctx);
goto out;
}
error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp);
if (error)
goto out;
crhold(cr);
error = -zfs_readdir(ZTOI(dzp), ctx, cr);
crfree(cr);
iput(ZTOI(dzp));
out:
ZFS_EXIT(zsb);
ASSERT3S(error, <=, 0);
return (error);
}
#if !defined(HAVE_VFS_ITERATE)
static int
zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
int error;
error = zpl_shares_iterate(filp, &ctx);
filp->f_pos = ctx.pos;
return (error);
}
#endif /* HAVE_VFS_ITERATE */
/* ARGSUSED */
static int
zpl_shares_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
struct inode *ip = dentry->d_inode;
zfs_sb_t *zsb = ITOZSB(ip);
znode_t *dzp;
int error;
ZFS_ENTER(zsb);
if (zsb->z_shares_dir == 0) {
error = simple_getattr(mnt, dentry, stat);
stat->nlink = stat->size = 2;
stat->atime = CURRENT_TIME;
ZFS_EXIT(zsb);
return (error);
}
error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp);
if (error == 0) {
error = -zfs_getattr_fast(ZTOI(dzp), stat);
iput(ZTOI(dzp));
}
ZFS_EXIT(zsb);
ASSERT3S(error, <=, 0);
return (error);
}
/*
* The '.zfs/shares' directory file operations.
*/
const struct file_operations zpl_fops_shares = {
.open = zpl_common_open,
.llseek = generic_file_llseek,
.read = generic_read_dir,
#ifdef HAVE_VFS_ITERATE
.iterate = zpl_shares_iterate,
#else
.readdir = zpl_shares_readdir,
#endif
};
/*
* The '.zfs/shares' directory inode operations.
*/
const struct inode_operations zpl_ops_shares = {
.lookup = zpl_shares_lookup,
.getattr = zpl_shares_getattr,
};