changeset 9653:a70048a304d1

6664765 Unable to remove files when using fat-zap and quota exceeded on ZFS filesystem
author Sanjeev Bagewadi <Sanjeev.Bagewadi@Sun.COM>
date Tue, 19 May 2009 11:19:21 +0530
parents 6b40e106879c
children 53ddae73bb20
files usr/src/uts/common/fs/zfs/dbuf.c usr/src/uts/common/fs/zfs/dmu_tx.c usr/src/uts/common/fs/zfs/dsl_dataset.c usr/src/uts/common/fs/zfs/sys/dmu.h usr/src/uts/common/fs/zfs/sys/dsl_dataset.h usr/src/uts/common/fs/zfs/sys/zap.h usr/src/uts/common/fs/zfs/sys/zap_impl.h usr/src/uts/common/fs/zfs/zap.c usr/src/uts/common/fs/zfs/zap_micro.c
diffstat 9 files changed, 166 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/fs/zfs/dbuf.c	Mon May 18 20:11:17 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dbuf.c	Tue May 19 11:19:21 2009 +0530
@@ -1912,6 +1912,19 @@
 	return (db->db_user_ptr);
 }
 
+boolean_t
+dmu_buf_freeable(dmu_buf_t *dbuf)
+{
+	boolean_t res = B_FALSE;
+	dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
+
+	if (db->db_blkptr)
+		res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset,
+		    db->db_blkptr->blk_birth);
+
+	return (res);
+}
+
 static void
 dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
 {
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c	Mon May 18 20:11:17 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c	Tue May 19 11:19:21 2009 +0530
@@ -696,12 +696,9 @@
 		}
 	}
 
-	/*
-	 * 3 blocks overwritten: target leaf, ptrtbl block, header block
-	 * 3 new blocks written if adding: new split leaf, 2 grown ptrtbl blocks
-	 */
-	dmu_tx_count_write(txh, dn->dn_maxblkid * dn->dn_datablksz,
-	    (3 + (add ? 3 : 0)) * dn->dn_datablksz);
+	err = zap_count_write(&dn->dn_objset->os, dn->dn_object, name, add,
+	    &txh->txh_space_towrite, &txh->txh_space_tooverwrite,
+	    txh->txh_dnode->dn_datablkshift);
 
 	/*
 	 * If the modified blocks are scattered to the four winds,
@@ -709,7 +706,10 @@
 	 */
 	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 	for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs)
-		txh->txh_space_towrite += 3 << dn->dn_indblkshift;
+		if (dn->dn_objset->os_dsl_dataset->ds_phys->ds_prev_snap_obj)
+			txh->txh_space_towrite += 3 << dn->dn_indblkshift;
+		else
+			txh->txh_space_tooverwrite += 3 << dn->dn_indblkshift;
 }
 
 void
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Mon May 18 20:11:17 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Tue May 19 11:19:21 2009 +0530
@@ -229,7 +229,7 @@
 	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
 }
 
-int
+boolean_t
 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
 {
 	return (blk_birth > dsl_dataset_prev_snap_txg(ds));
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Mon May 18 20:11:17 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Tue May 19 11:19:21 2009 +0530
@@ -405,6 +405,11 @@
 void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
 
 /*
+ * Tells if the given dbuf is freeable.
+ */
+boolean_t dmu_buf_freeable(dmu_buf_t *);
+
+/*
  * You must create a transaction, then hold the objects which you will
  * (or might) modify as part of this transaction.  Then you must assign
  * the transaction to a transaction group.  Once the transaction has
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Mon May 18 20:11:17 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Tue May 19 11:19:21 2009 +0530
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -195,7 +195,7 @@
 void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
 int dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
     dmu_tx_t *tx);
-int dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
+boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
 uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
 
 void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
--- a/usr/src/uts/common/fs/zfs/sys/zap.h	Mon May 18 20:11:17 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zap.h	Tue May 19 11:19:21 2009 +0530
@@ -181,6 +181,10 @@
     matchtype_t mt, char *realname, int rn_len,
     boolean_t *normalization_conflictp);
 
+int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
+    int add, uint64_t *towrite, uint64_t *tooverwrite,
+    uint64_t dn_datablkshift);
+
 /*
  * Create an attribute with the given name and value.
  *
--- a/usr/src/uts/common/fs/zfs/sys/zap_impl.h	Mon May 18 20:11:17 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zap_impl.h	Tue May 19 11:19:21 2009 +0530
@@ -19,15 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_SYS_ZAP_IMPL_H
 #define	_SYS_ZAP_IMPL_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/zap.h>
 #include <sys/zfs_context.h>
 #include <sys/avl.h>
@@ -195,6 +193,8 @@
 int fzap_lookup(zap_name_t *zn,
     uint64_t integer_size, uint64_t num_integers, void *buf,
     char *realname, int rn_len, boolean_t *normalization_conflictp);
+int fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
+    uint64_t *tooverwrite);
 int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
     const void *val, dmu_tx_t *tx);
 int fzap_update(zap_name_t *zn,
--- a/usr/src/uts/common/fs/zfs/zap.c	Mon May 18 20:11:17 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zap.c	Tue May 19 11:19:21 2009 +0530
@@ -1132,3 +1132,58 @@
 		}
 	}
 }
+
+int
+fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
+    uint64_t *tooverwrite)
+{
+	zap_t *zap = zn->zn_zap;
+	zap_leaf_t *l;
+	int err;
+
+	/*
+	 * Account for the header block of the fatzap.
+	 */
+	if (!add && dmu_buf_freeable(zap->zap_dbuf)) {
+		tooverwrite += zap->zap_dbuf->db_size;
+	} else {
+		towrite += zap->zap_dbuf->db_size;
+	}
+
+	/*
+	 * Account for the pointer table blocks.
+	 * If we are adding we need to account for the following cases :
+	 * - If the pointer table is embedded, this operation could force an
+	 *   external pointer table.
+	 * - If this already has an external pointer table this operation
+	 *   could extend the table.
+	 */
+	if (add) {
+		if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0)
+			towrite += zap->zap_dbuf->db_size;
+		else
+			towrite += (zap->zap_dbuf->db_size * 3);
+	}
+
+	/*
+	 * Now, check if the block containing leaf is freeable
+	 * and account accordingly.
+	 */
+	err = zap_deref_leaf(zap, zn->zn_hash, NULL, RW_READER, &l);
+	if (err != 0) {
+		return (err);
+	}
+
+	if (!add && dmu_buf_freeable(l->l_dbuf)) {
+		tooverwrite += l->l_dbuf->db_size;
+	} else {
+		/*
+		 * If this an add operation, the leaf block could split.
+		 * Hence, we need to account for an additional leaf block.
+		 */
+		towrite += (add ? 2 : 1) * l->l_dbuf->db_size;
+	}
+
+	zap_put_leaf(l);
+	return (0);
+}
--- a/usr/src/uts/common/fs/zfs/zap_micro.c	Mon May 18 20:11:17 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zap_micro.c	Tue May 19 11:19:21 2009 +0530
@@ -1065,3 +1065,79 @@
 	zap_unlockdir(zap);
 	return (0);
 }
+
+int
+zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
+    uint64_t *towrite, uint64_t *tooverwrite, uint64_t dn_datablkshift)
+{
+	zap_t *zap;
+	int err = 0;
+
+
+	/*
+	 * Since, we don't have a name, we cannot figure out which blocks will
+	 * be affected in this operation. So, account for the worst case :
+	 * - 3 blocks overwritten: target leaf, ptrtbl block, header block
+	 * - 4 new blocks written if adding:
+	 * 	- 2 blocks for possibly split leaves,
+	 * 	- 2 grown ptrtbl blocks
+	 *
+	 * This also accomodates the case where an add operation to a fairly
+	 * large microzap results in a promotion to fatzap.
+	 */
+	if (name == NULL) {
+		*towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
+		return (err);
+	}
+
+	/*
+	 * We lock the zap with adding ==  FALSE. Because, if we pass
+	 * the actual value of add, it could trigger a mzap_upgrade().
+	 * At present we are just evaluating the possibility of this operation
+	 * and hence we donot want to trigger an upgrade.
+	 */
+	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
+	if (err)
+		return (err);
+
+	if (!zap->zap_ismicro) {
+		zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT);
+		if (zn) {
+			err = fzap_count_write(zn, add, towrite,
+			    tooverwrite);
+			zap_name_free(zn);
+		} else {
+			/*
+			 * We treat this case as similar to (name == NULL)
+			 */
+			*towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
+		}
+	} else {
+		if (!add) {
+			if (dmu_buf_freeable(zap->zap_dbuf))
+				*tooverwrite += SPA_MAXBLOCKSIZE;
+			else
+				*towrite += SPA_MAXBLOCKSIZE;
+		} else {
+			/*
+			 * We are here if we are adding and (name != NULL).
+			 * It is hard to find out if this add will promote this
+			 * microzap to fatzap. Hence, we assume the worst case
+			 * and account for the blocks assuming this microzap
+			 * would be promoted to a fatzap.
+			 *
+			 * 1 block overwritten  : header block
+			 * 4 new blocks written : 2 new split leaf, 2 grown
+			 *			ptrtbl blocks
+			 */
+			if (dmu_buf_freeable(zap->zap_dbuf))
+				*tooverwrite += 1 << dn_datablkshift;
+			else
+				*towrite += 1 << dn_datablkshift;
+			*towrite += 4 << dn_datablkshift;
+		}
+	}
+
+	zap_unlockdir(zap);
+	return (err);
+}