From 19d55079aecb5c022c1c09e0eace4f7da7381a62 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Mon, 7 Jul 2014 11:49:36 -0800 Subject: [PATCH] Illumos 4950 - files sometimes can't be removed from a full filesystem 4950 files sometimes can't be removed from a full filesystem Reviewed by: Adam Leventhal Reviewed by: George Wilson Reviewed by: Sebastien Roy Reviewed by: Boris Protopopov Approved by: Dan McDonald References: https://www.illumos.org/issues/4950 https://github.com/illumos/illumos-gate/commit/4bb7380 Porting notes: - ZoL currently does not log discards to zvols, so the portion of this patch that modifies the discard logging to mark it as freeing space has been discarded. 2. may_delete_now had been removed from zfs_remove() in ZoL. It has been reintroduced. 3. We do not try to emulate vnodes, so the following lines are not valid on Linux: mutex_enter(&vp->v_lock); may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); mutex_exit(&vp->v_lock); This has been replaced with: mutex_enter(&zp->z_lock); may_delete_now = atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped); mutex_exit(&zp->z_lock); Ported-by: Richard Yao Signed-off-by: Brian Behlendorf --- include/sys/dmu.h | 1 + module/zfs/dmu.c | 7 +++++++ module/zfs/dmu_tx.c | 26 ++++++++++++++++++++++++++ module/zfs/zfs_dir.c | 3 ++- module/zfs/zfs_vnops.c | 15 ++++++++++++++- module/zfs/zfs_znode.c | 11 ++++++----- module/zfs/zvol.c | 1 + 7 files changed, 57 insertions(+), 7 deletions(-) diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 0a5b2809b05a..ab6b92fb860b 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -665,6 +665,7 @@ void dmu_tx_abort(dmu_tx_t *tx); int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how); void dmu_tx_wait(dmu_tx_t *tx); void dmu_tx_commit(dmu_tx_t *tx); +void dmu_tx_mark_netfree(dmu_tx_t *tx); /* * To register a commit callback, dmu_tx_callback_register() must be called. diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index f48c0e977db9..786287834183 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -687,6 +687,12 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, tx = dmu_tx_create(os); dmu_tx_hold_free(tx, dn->dn_object, chunk_begin, chunk_end - chunk_begin); + + /* + * Mark this transaction as typically resulting in a net + * reduction in space used. + */ + dmu_tx_mark_netfree(tx); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); @@ -738,6 +744,7 @@ dmu_free_long_object(objset_t *os, uint64_t object) tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, object); dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END); + dmu_tx_mark_netfree(tx); err = dmu_tx_assign(tx, TXG_WAIT); if (err == 0) { err = dmu_object_free(os, object, tx); diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 016defe79db8..74e323dbdf65 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -610,6 +610,32 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) txh->txh_space_tounref += unref; } +/* + * This function marks the transaction as being a "net free". The end + * result is that refquotas will be disabled for this transaction, and + * this transaction will be able to use half of the pool space overhead + * (see dsl_pool_adjustedsize()). Therefore this function should only + * be called for transactions that we expect will not cause a net increase + * in the amount of space used (but it's OK if that is occasionally not true). + */ +void +dmu_tx_mark_netfree(dmu_tx_t *tx) +{ + dmu_tx_hold_t *txh; + + txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, + DMU_NEW_OBJECT, THT_FREE, 0, 0); + + /* + * Pretend that this operation will free 1GB of space. This + * should be large enough to cancel out the largest write. + * We don't want to use something like UINT64_MAX, because that would + * cause overflows when doing math with these values (e.g. in + * dmu_tx_try_assign()). + */ + txh->txh_space_tofree = txh->txh_space_tounref = 1024 * 1024 * 1024; +} + void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len) { diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c index c1eadd0dc250..951acb0d6e5b 100644 --- a/module/zfs/zfs_dir.c +++ b/module/zfs/zfs_dir.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, 2014 by Delphix. All rights reserved. */ @@ -578,6 +578,7 @@ zfs_purgedir(znode_t *dzp) dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); /* Is this really needed ? */ zfs_sa_upgrade_txholds(tx, xzp); + dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 311613ae55c6..2f003de9f878 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2015 by Chunwei Chen. All rights reserved. */ @@ -1525,6 +1525,7 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr) uint64_t obj = 0; zfs_dirlock_t *dl; dmu_tx_t *tx; + boolean_t may_delete_now; boolean_t unlinked; uint64_t txtype; pathname_t *realnmp = NULL; @@ -1584,6 +1585,10 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr) dnlc_remove(dvp, name); #endif /* HAVE_DNLC */ + mutex_enter(&zp->z_lock); + may_delete_now = atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped); + mutex_exit(&zp->z_lock); + /* * We never delete the znode and always place it in the unlinked * set. The dentry cache will always hold the last reference and @@ -1609,6 +1614,14 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr) /* charge as an update -- would be nice not to charge at all */ dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); + /* + * Mark this transaction as typically resulting in a net free of + * space, unless object removal will be delayed indefinitely + * (due to active holds on the vnode due to the file being open). + */ + if (may_delete_now) + dmu_tx_mark_netfree(tx); + error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index e4bd62fd2713..7ebe543845ab 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ /* Portions Copyright 2007 Jeremy Teo */ @@ -1425,7 +1425,7 @@ zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) * IN: zp - znode of file to free data in. * end - new end-of-file * - * RETURN: 0 on success, error code on failure + * RETURN: 0 on success, error code on failure */ static int zfs_extend(znode_t *zp, uint64_t end) @@ -1545,7 +1545,7 @@ zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len) * off - start of section to free. * len - length of section to free. * - * RETURN: 0 on success, error code on failure + * RETURN: 0 on success, error code on failure */ static int zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) @@ -1624,7 +1624,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) * IN: zp - znode of file to free data in. * end - new end-of-file. * - * RETURN: 0 on success, error code on failure + * RETURN: 0 on success, error code on failure */ static int zfs_trunc(znode_t *zp, uint64_t end) @@ -1657,6 +1657,7 @@ zfs_trunc(znode_t *zp, uint64_t end) tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); + dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); @@ -1691,7 +1692,7 @@ zfs_trunc(znode_t *zp, uint64_t end) * flag - current file open mode flags. * log - TRUE if this action should be logged * - * RETURN: 0 on success, error code on failure + * RETURN: 0 on success, error code on failure */ int zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 9b7ab542d006..57d36468f7d0 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -277,6 +277,7 @@ zvol_update_volsize(uint64_t volsize, objset_t *os) tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); + dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx);