From 86db35c447aa3f4cc848497d78d54ec9c985d1ed Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Thu, 5 Dec 2019 01:52:27 +0100 Subject: [PATCH] Remove zpl_revalidate: fix snapshot rollback Open files, which aren't present in the snapshot, which is being roll-backed to, need to disappear from the visible VFS image of the dataset. Kernel provides d_drop function to drop invalid entry from the dcache, but inode can be referenced by dentry multiple dentries. The introduced zpl_d_drop_aliases function walks and invalidates all aliases of an inode. Reviewed-by: Ryan Moeller Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Pavel Snajdr Closes #9600 Closes #14070 --- config/kernel-dentry-alias.m4 | 30 +++++++++++++ config/kernel.m4 | 2 + include/os/linux/kernel/linux/dcache_compat.h | 21 +++++++++ include/os/linux/zfs/sys/trace_acl.h | 9 ++-- include/os/linux/zfs/sys/zpl.h | 3 +- include/sys/zfs_znode.h | 1 - module/os/linux/zfs/zfs_ctldir.c | 1 - module/os/linux/zfs/zfs_vfsops.c | 3 +- module/os/linux/zfs/zfs_znode.c | 1 - module/os/linux/zfs/zpl_inode.c | 44 ------------------- 10 files changed, 59 insertions(+), 56 deletions(-) create mode 100644 config/kernel-dentry-alias.m4 diff --git a/config/kernel-dentry-alias.m4 b/config/kernel-dentry-alias.m4 new file mode 100644 index 000000000000..f0ddb8d010b0 --- /dev/null +++ b/config/kernel-dentry-alias.m4 @@ -0,0 +1,30 @@ +dnl # +dnl # 3.18 API change +dnl # Dentry aliases are in d_u struct dentry member +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U], [ + ZFS_LINUX_TEST_SRC([dentry_alias_d_u], [ + #include + #include + #include + ], [ + struct inode *inode __attribute__ ((unused)) = NULL; + struct dentry *dentry __attribute__ ((unused)) = NULL; + hlist_for_each_entry(dentry, &inode->i_dentry, + d_u.d_alias) { + d_drop(dentry); + } + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_DENTRY_ALIAS_D_U], [ + AC_MSG_CHECKING([whether dentry aliases are in d_u member]) + ZFS_LINUX_TEST_RESULT([dentry_alias_d_u], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DENTRY_D_U_ALIASES, 1, + [dentry aliases are in d_u member]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + diff --git a/config/kernel.m4 b/config/kernel.m4 index d4d13ddd1d1a..b573881c4400 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -96,6 +96,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_SETATTR_PREPARE ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED ZFS_AC_KERNEL_SRC_DENTRY + ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SRC_SECURITY_INODE ZFS_AC_KERNEL_SRC_FST_MOUNT @@ -217,6 +218,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED ZFS_AC_KERNEL_DENTRY + ZFS_AC_KERNEL_DENTRY_ALIAS_D_U ZFS_AC_KERNEL_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FST_MOUNT diff --git a/include/os/linux/kernel/linux/dcache_compat.h b/include/os/linux/kernel/linux/dcache_compat.h index 0fbd92458679..4de1118daafa 100644 --- a/include/os/linux/kernel/linux/dcache_compat.h +++ b/include/os/linux/kernel/linux/dcache_compat.h @@ -61,4 +61,25 @@ d_clear_d_op(struct dentry *dentry) DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE); } +/* + * Walk and invalidate all dentry aliases of an inode + * unless it's a mountpoint + */ +static inline void +zpl_d_drop_aliases(struct inode *inode) +{ + struct dentry *dentry; + spin_lock(&inode->i_lock); +#ifdef HAVE_DENTRY_D_U_ALIASES + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { +#else + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { +#endif + if (!IS_ROOT(dentry) && !d_mountpoint(dentry) && + (dentry->d_inode == inode)) { + d_drop(dentry); + } + } + spin_unlock(&inode->i_lock); +} #endif /* _ZFS_DCACHE_H */ diff --git a/include/os/linux/zfs/sys/trace_acl.h b/include/os/linux/zfs/sys/trace_acl.h index 6a73545fecda..2c734322267a 100644 --- a/include/os/linux/zfs/sys/trace_acl.h +++ b/include/os/linux/zfs/sys/trace_acl.h @@ -64,7 +64,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __field(boolean_t, z_is_sa) __field(boolean_t, z_is_mapped) __field(boolean_t, z_is_ctldir) - __field(boolean_t, z_is_stale) __field(uint32_t, i_uid) __field(uint32_t, i_gid) @@ -99,7 +98,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __entry->z_is_sa = zn->z_is_sa; __entry->z_is_mapped = zn->z_is_mapped; __entry->z_is_ctldir = zn->z_is_ctldir; - __entry->z_is_stale = zn->z_is_stale; __entry->i_uid = KUID_TO_SUID(ZTOI(zn)->i_uid); __entry->i_gid = KGID_TO_SGID(ZTOI(zn)->i_gid); @@ -121,9 +119,8 @@ DECLARE_EVENT_CLASS(zfs_ace_class, "zn_prefetch %u blksz %u seq %u " "mapcnt %llu size %llu pflags %llu " "sync_cnt %u sync_writes_cnt %u async_writes_cnt %u " - "mode 0x%x is_sa %d is_mapped %d " - "is_ctldir %d is_stale %d inode { " - "uid %u gid %u ino %lu nlink %u size %lli " + "mode 0x%x is_sa %d is_mapped %d is_ctldir %d " + "inode { uid %u gid %u ino %lu nlink %u size %lli " "blkbits %u bytes %u mode 0x%x generation %x } } " "ace { type %u flags %u access_mask %u } mask_matched %u", __entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty, @@ -132,7 +129,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __entry->z_pflags, __entry->z_sync_cnt, __entry->z_sync_writes_cnt, __entry->z_async_writes_cnt, __entry->z_mode, __entry->z_is_sa, __entry->z_is_mapped, - __entry->z_is_ctldir, __entry->z_is_stale, __entry->i_uid, + __entry->z_is_ctldir, __entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink, __entry->i_size, __entry->i_blkbits, __entry->i_bytes, __entry->i_mode, __entry->i_generation, diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h index 30d73db6b9e8..83416d64744c 100644 --- a/include/os/linux/zfs/sys/zpl.h +++ b/include/os/linux/zfs/sys/zpl.h @@ -45,7 +45,8 @@ extern const struct inode_operations zpl_inode_operations; extern const struct inode_operations zpl_dir_inode_operations; extern const struct inode_operations zpl_symlink_inode_operations; extern const struct inode_operations zpl_special_inode_operations; -extern dentry_operations_t zpl_dentry_operations; + +/* zpl_file.c */ extern const struct address_space_operations zpl_address_space_operations; extern const struct file_operations zpl_file_operations; extern const struct file_operations zpl_dir_file_operations; diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index 7c906050bc47..c8656b3f6162 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -190,7 +190,6 @@ typedef struct znode { boolean_t z_is_sa; /* are we native sa? */ boolean_t z_is_mapped; /* are we mmap'ed */ boolean_t z_is_ctldir; /* are we .zfs entry */ - boolean_t z_is_stale; /* are we stale due to rollback? */ boolean_t z_suspended; /* extra ref from a suspend? */ uint_t z_blksz; /* block size in bytes */ uint_t z_seq; /* modification sequence number */ diff --git a/module/os/linux/zfs/zfs_ctldir.c b/module/os/linux/zfs/zfs_ctldir.c index 4ae0a65370e5..519f13212fac 100644 --- a/module/os/linux/zfs/zfs_ctldir.c +++ b/module/os/linux/zfs/zfs_ctldir.c @@ -487,7 +487,6 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, zp->z_is_sa = B_FALSE; zp->z_is_mapped = B_FALSE; zp->z_is_ctldir = B_TRUE; - zp->z_is_stale = B_FALSE; zp->z_sa_hdl = NULL; zp->z_blksz = 0; zp->z_seq = 0; diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index 7b682e49d84e..c921e587c75c 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -1522,7 +1522,6 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent) sb->s_op = &zpl_super_operations; sb->s_xattr = zpl_xattr_handlers; sb->s_export_op = &zpl_export_operations; - sb->s_d_op = &zpl_dentry_operations; /* Set features for file system. */ zfs_set_fuid_feature(zfsvfs); @@ -1881,8 +1880,8 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) zp = list_next(&zfsvfs->z_all_znodes, zp)) { err2 = zfs_rezget(zp); if (err2) { + zpl_d_drop_aliases(ZTOI(zp)); remove_inode_hash(ZTOI(zp)); - zp->z_is_stale = B_TRUE; } /* see comment in zfs_suspend_fs() */ diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c index 9aeffba86150..3ded79a30a6f 100644 --- a/module/os/linux/zfs/zfs_znode.c +++ b/module/os/linux/zfs/zfs_znode.c @@ -552,7 +552,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, zp->z_atime_dirty = B_FALSE; zp->z_is_mapped = B_FALSE; zp->z_is_ctldir = B_FALSE; - zp->z_is_stale = B_FALSE; zp->z_suspended = B_FALSE; zp->z_sa_hdl = NULL; zp->z_mapcnt = 0; diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index 8d073ff8cbd3..9b702c535ea7 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -728,46 +728,6 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) return (error); } -static int -#ifdef HAVE_D_REVALIDATE_NAMEIDATA -zpl_revalidate(struct dentry *dentry, struct nameidata *nd) -{ - unsigned int flags = (nd ? nd->flags : 0); -#else -zpl_revalidate(struct dentry *dentry, unsigned int flags) -{ -#endif /* HAVE_D_REVALIDATE_NAMEIDATA */ - /* CSTYLED */ - zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info; - int error; - - if (flags & LOOKUP_RCU) - return (-ECHILD); - - /* - * After a rollback negative dentries created before the rollback - * time must be invalidated. Otherwise they can obscure files which - * are only present in the rolled back dataset. - */ - if (dentry->d_inode == NULL) { - spin_lock(&dentry->d_lock); - error = time_before(dentry->d_time, zfsvfs->z_rollback_time); - spin_unlock(&dentry->d_lock); - - if (error) - return (0); - } - - /* - * The dentry may reference a stale inode if a mounted file system - * was rolled back to a point in time where the object didn't exist. - */ - if (dentry->d_inode && ITOZ(dentry->d_inode)->z_is_stale) - return (0); - - return (1); -} - const struct inode_operations zpl_inode_operations = { .setattr = zpl_setattr, .getattr = zpl_getattr, @@ -856,7 +816,3 @@ const struct inode_operations zpl_special_inode_operations = { .get_acl = zpl_get_acl, #endif /* CONFIG_FS_POSIX_ACL */ }; - -dentry_operations_t zpl_dentry_operations = { - .d_revalidate = zpl_revalidate, -};