changeset 9788:f660bc44f2e8

6843700 zfs_znode_move() does not ensure valid file system pointer
author Tom Erickson <Tom.Erickson@Sun.COM>
date Thu, 04 Jun 2009 13:01:01 -0700
parents 746c247ce3b2
children 33dc583075ed
files usr/src/uts/common/fs/zfs/zfs_vfsops.c usr/src/uts/common/fs/zfs/zfs_znode.c
diffstat 2 files changed, 40 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Thu Jun 04 15:16:57 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Thu Jun 04 13:01:01 2009 -0700
@@ -1037,6 +1037,16 @@
 zfsvfs_free(zfsvfs_t *zfsvfs)
 {
 	int i;
+	extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
+
+	/*
+	 * This is a barrier to prevent the filesystem from going away in
+	 * zfs_znode_move() until we can safely ensure that the filesystem is
+	 * not unmounted. We consider the filesystem valid before the barrier
+	 * and invalid after the barrier.
+	 */
+	rw_enter(&zfsvfs_lock, RW_READER);
+	rw_exit(&zfsvfs_lock);
 
 	zfs_fuid_destroy(zfsvfs);
 
--- a/usr/src/uts/common/fs/zfs/zfs_znode.c	Thu Jun 04 15:16:57 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/zfs_znode.c	Thu Jun 04 13:01:01 2009 -0700
@@ -87,6 +87,12 @@
  * (such as VFS logic) that will not compile easily in userland.
  */
 #ifdef _KERNEL
+/*
+ * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to
+ * be freed before it can be safely accessed.
+ */
+krwlock_t zfsvfs_lock;
+
 static kmem_cache_t *znode_cache = NULL;
 
 /*ARGSUSED*/
@@ -154,8 +160,9 @@
 #ifdef	ZNODE_STATS
 static struct {
 	uint64_t zms_zfsvfs_invalid;
+	uint64_t zms_zfsvfs_recheck1;
 	uint64_t zms_zfsvfs_unmounted;
-	uint64_t zms_zfsvfs_recheck_invalid;
+	uint64_t zms_zfsvfs_recheck2;
 	uint64_t zms_obj_held;
 	uint64_t zms_vnode_locked;
 	uint64_t zms_not_only_dnlc;
@@ -229,15 +236,32 @@
 	}
 
 	/*
-	 * Ensure that the filesystem is not unmounted during the move.
-	 * This is the equivalent to ZFS_ENTER().
+	 * Close a small window in which it's possible that the filesystem could
+	 * be unmounted and freed, and zfsvfs, though valid in the previous
+	 * statement, could point to unrelated memory by the time we try to
+	 * prevent the filesystem from being unmounted.
+	 */
+	rw_enter(&zfsvfs_lock, RW_WRITER);
+	if (zfsvfs != ozp->z_zfsvfs) {
+		rw_exit(&zfsvfs_lock);
+		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1);
+		return (KMEM_CBRC_DONT_KNOW);
+	}
+
+	/*
+	 * If the znode is still valid, then so is the file system. We know that
+	 * no valid file system can be freed while we hold zfsvfs_lock, so we
+	 * can safely ensure that the filesystem is not and will not be
+	 * unmounted. The next statement is equivalent to ZFS_ENTER().
 	 */
 	rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
 	if (zfsvfs->z_unmounted) {
 		ZFS_EXIT(zfsvfs);
+		rw_exit(&zfsvfs_lock);
 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
 		return (KMEM_CBRC_DONT_KNOW);
 	}
+	rw_exit(&zfsvfs_lock);
 
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	/*
@@ -247,7 +271,7 @@
 	if (zfsvfs != ozp->z_zfsvfs) {
 		mutex_exit(&zfsvfs->z_znodes_lock);
 		ZFS_EXIT(zfsvfs);
-		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck_invalid);
+		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2);
 		return (KMEM_CBRC_DONT_KNOW);
 	}
 
@@ -303,6 +327,7 @@
 	/*
 	 * Initialize zcache
 	 */
+	rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL);
 	ASSERT(znode_cache == NULL);
 	znode_cache = kmem_cache_create("zfs_znode_cache",
 	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
@@ -324,6 +349,7 @@
 	if (znode_cache)
 		kmem_cache_destroy(znode_cache);
 	znode_cache = NULL;
+	rw_destroy(&zfsvfs_lock);
 }
 
 struct vnodeops *zfs_dvnodeops;