changeset 13380:161b964a0e10

883 ZIL reuse during remount can lead to data corruption Reviewed by: Matt Ahrens <Matt.Ahrens@delphix.com> Reviewed by: Adam Leventhal <Adam.Leventhal@delphix.com> Reviewed by: Albert Lee <trisk@nexenta.com> Reviewed by: Gordon Ross <gwr@nexenta.com> Reviewed by: Garrett D'Amore <garrett@nexenta.com> Reivewed by: Dan McDonald <danmcd@nexenta.com> Approved by: Gordon Ross <gwr@nexenta.com>
author Eric Schrock <Eric.Schrock@delphix.com>
date Tue, 31 May 2011 13:48:11 -0700
parents 4df42cc92254
children baff3bb71074
files usr/src/cmd/ztest/ztest.c usr/src/uts/common/fs/zfs/zil.c
diffstat 2 files changed, 59 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/ztest/ztest.c	Sun May 29 06:08:44 2011 -0700
+++ b/usr/src/cmd/ztest/ztest.c	Tue May 31 13:48:11 2011 -0700
@@ -203,6 +203,7 @@
  */
 typedef struct ztest_ds {
 	objset_t	*zd_os;
+	rwlock_t	zd_zilog_lock;
 	zilog_t		*zd_zilog;
 	uint64_t	zd_seq;
 	ztest_od_t	*zd_od;		/* debugging aid */
@@ -236,6 +237,7 @@
 ztest_func_t ztest_zap;
 ztest_func_t ztest_zap_parallel;
 ztest_func_t ztest_zil_commit;
+ztest_func_t ztest_zil_remount;
 ztest_func_t ztest_dmu_read_write_zcopy;
 ztest_func_t ztest_dmu_objset_create_destroy;
 ztest_func_t ztest_dmu_prealloc;
@@ -271,6 +273,7 @@
 	{ ztest_zap_parallel,			100,	&zopt_always	},
 	{ ztest_split_pool,			1,	&zopt_always	},
 	{ ztest_zil_commit,			1,	&zopt_incessant	},
+	{ ztest_zil_remount,			1,	&zopt_sometimes	},
 	{ ztest_dmu_read_write_zcopy,		1,	&zopt_often	},
 	{ ztest_dmu_objset_create_destroy,	1,	&zopt_often	},
 	{ ztest_dsl_prop_get_set,		1,	&zopt_often	},
@@ -981,6 +984,7 @@
 	zd->zd_seq = 0;
 	dmu_objset_name(os, zd->zd_name);
 
+	VERIFY(rwlock_init(&zd->zd_zilog_lock, USYNC_THREAD, NULL) == 0);
 	VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0);
 
 	for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
@@ -1960,6 +1964,8 @@
 	if (ztest_random(2) == 0)
 		io_type = ZTEST_IO_WRITE_TAG;
 
+	(void) rw_rdlock(&zd->zd_zilog_lock);
+
 	switch (io_type) {
 
 	case ZTEST_IO_WRITE_TAG:
@@ -1995,6 +2001,8 @@
 		break;
 	}
 
+	(void) rw_unlock(&zd->zd_zilog_lock);
+
 	umem_free(data, blocksize);
 }
 
@@ -2049,6 +2057,8 @@
 {
 	zilog_t *zilog = zd->zd_zilog;
 
+	(void) rw_rdlock(&zd->zd_zilog_lock);
+
 	zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
 
 	/*
@@ -2060,6 +2070,31 @@
 	ASSERT(zd->zd_seq <= zilog->zl_commit_lr_seq);
 	zd->zd_seq = zilog->zl_commit_lr_seq;
 	mutex_exit(&zilog->zl_lock);
+
+	(void) rw_unlock(&zd->zd_zilog_lock);
+}
+
+/*
+ * This function is designed to simulate the operations that occur during a
+ * mount/unmount operation.  We hold the dataset across these operations in an
+ * attempt to expose any implicit assumptions about ZIL management.
+ */
+/* ARGSUSED */
+void
+ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
+{
+	objset_t *os = zd->zd_os;
+
+	(void) rw_wrlock(&zd->zd_zilog_lock);
+
+	/* zfsvfs_teardown() */
+	zil_close(zd->zd_zilog);
+
+	/* zfsvfs_setup() */
+	VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog);
+	zil_replay(os, zd, ztest_replay_vector);
+
+	(void) rw_unlock(&zd->zd_zilog_lock);
 }
 
 /*
--- a/usr/src/uts/common/fs/zfs/zil.c	Sun May 29 06:08:44 2011 -0700
+++ b/usr/src/uts/common/fs/zfs/zil.c	Tue May 31 13:48:11 2011 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -560,7 +561,7 @@
 
 	if (!list_is_empty(&zilog->zl_lwb_list)) {
 		ASSERT(zh->zh_claim_txg == 0);
-		ASSERT(!keep_first);
+		VERIFY(!keep_first);
 		while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
 			list_remove(&zilog->zl_lwb_list, lwb);
 			if (lwb->lwb_buf != NULL)
@@ -1661,20 +1662,9 @@
 void
 zil_free(zilog_t *zilog)
 {
-	lwb_t *head_lwb;
-
 	zilog->zl_stop_sync = 1;
 
-	/*
-	 * After zil_close() there should only be one lwb with a buffer.
-	 */
-	head_lwb = list_head(&zilog->zl_lwb_list);
-	if (head_lwb) {
-		ASSERT(head_lwb == list_tail(&zilog->zl_lwb_list));
-		list_remove(&zilog->zl_lwb_list, head_lwb);
-		zio_buf_free(head_lwb->lwb_buf, head_lwb->lwb_sz);
-		kmem_cache_free(zil_lwb_cache, head_lwb);
-	}
+	ASSERT(list_is_empty(&zilog->zl_lwb_list));
 	list_destroy(&zilog->zl_lwb_list);
 
 	avl_destroy(&zilog->zl_vdev_tree);
@@ -1714,6 +1704,10 @@
 {
 	zilog_t *zilog = dmu_objset_zil(os);
 
+	ASSERT(zilog->zl_clean_taskq == NULL);
+	ASSERT(zilog->zl_get_data == NULL);
+	ASSERT(list_is_empty(&zilog->zl_lwb_list));
+
 	zilog->zl_get_data = get_data;
 	zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri,
 	    2, 2, TASKQ_PREPOPULATE);
@@ -1727,7 +1721,7 @@
 void
 zil_close(zilog_t *zilog)
 {
-	lwb_t *tail_lwb;
+	lwb_t *lwb;
 	uint64_t txg = 0;
 
 	zil_commit(zilog, 0); /* commit all itx */
@@ -1739,9 +1733,9 @@
 	 * destroy the zl_clean_taskq.
 	 */
 	mutex_enter(&zilog->zl_lock);
-	tail_lwb = list_tail(&zilog->zl_lwb_list);
-	if (tail_lwb != NULL)
-		txg = tail_lwb->lwb_max_txg;
+	lwb = list_tail(&zilog->zl_lwb_list);
+	if (lwb != NULL)
+		txg = lwb->lwb_max_txg;
 	mutex_exit(&zilog->zl_lock);
 	if (txg)
 		txg_wait_synced(zilog->zl_dmu_pool, txg);
@@ -1749,6 +1743,19 @@
 	taskq_destroy(zilog->zl_clean_taskq);
 	zilog->zl_clean_taskq = NULL;
 	zilog->zl_get_data = NULL;
+
+	/*
+	 * We should have only one LWB left on the list; remove it now.
+	 */
+	mutex_enter(&zilog->zl_lock);
+	lwb = list_head(&zilog->zl_lwb_list);
+	if (lwb != NULL) {
+		ASSERT(lwb == list_tail(&zilog->zl_lwb_list));
+		list_remove(&zilog->zl_lwb_list, lwb);
+		zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
+		kmem_cache_free(zil_lwb_cache, lwb);
+	}
+	mutex_exit(&zilog->zl_lock);
 }
 
 /*