diff usr/src/uts/common/fs/zfs/spa.c @ 1585:4ad213e858a9

6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041 6395485 ensure the gu in vdev_guid 6395487 config cache can become stale relative to mosconfig 6395488 vdev addition must sync to config cache before allocation begins
author bonwick
date Thu, 09 Mar 2006 16:56:05 -0800
parents 938876158511
children 438b928f80c7
line wrap: on
line diff
--- a/usr/src/uts/common/fs/zfs/spa.c	Thu Mar 09 16:31:45 2006 -0800
+++ b/usr/src/uts/common/fs/zfs/spa.c	Thu Mar 09 16:56:05 2006 -0800
@@ -252,10 +252,9 @@
 	/*
 	 * Close all vdevs.
 	 */
-	if (spa->spa_root_vdev) {
+	if (spa->spa_root_vdev)
 		vdev_free(spa->spa_root_vdev);
-		spa->spa_root_vdev = NULL;
-	}
+	ASSERT(spa->spa_root_vdev == NULL);
 
 	spa->spa_async_suspended = 0;
 }
@@ -268,6 +267,7 @@
 spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
 {
 	int error = 0;
+	uint64_t config_cache_txg = spa->spa_config_txg;
 	nvlist_t *nvroot = NULL;
 	vdev_t *rvd;
 	uberblock_t *ub = &spa->spa_uberblock;
@@ -303,7 +303,7 @@
 		goto out;
 	}
 
-	spa->spa_root_vdev = rvd;
+	ASSERT(spa->spa_root_vdev == rvd);
 	ASSERT(spa_guid(spa) == pool_guid);
 
 	/*
@@ -475,6 +475,7 @@
 	 * This must all happen in a single txg.
 	 */
 	if ((spa_mode & FWRITE) && state != SPA_LOAD_TRYIMPORT) {
+		int c;
 		dmu_tx_t *tx = dmu_tx_create_assigned(spa_get_dsl(spa),
 		    spa_first_txg(spa));
 		dmu_objset_find(spa->spa_name, zil_claim, tx, 0);
@@ -488,6 +489,38 @@
 		 * Wait for all claims to sync.
 		 */
 		txg_wait_synced(spa->spa_dsl_pool, 0);
+
+		/*
+		 * If the config cache is stale relative to the mosconfig,
+		 * sync the config cache.
+		 */
+		if (config_cache_txg != spa->spa_config_txg)
+			spa_config_sync();
+
+		/*
+		 * If we have top-level vdevs that were added but have
+		 * not yet been prepared for allocation, do that now.
+		 * (It's safe now because the config cache is up to date,
+		 * so it will be able to translate the new DVAs.)
+		 * See comments in spa_vdev_add() for full details.
+		 */
+		for (c = 0; c < rvd->vdev_children; c++) {
+			vdev_t *tvd = rvd->vdev_child[c];
+			if (tvd->vdev_ms_array == 0) {
+				uint64_t txg = spa_last_synced_txg(spa) + 1;
+				ASSERT(tvd->vdev_ms_shift == 0);
+				spa_config_enter(spa, RW_WRITER, FTAG);
+				vdev_init(tvd, txg);
+				vdev_config_dirty(tvd);
+				spa_config_set(spa,
+				    spa_config_generate(spa, rvd, txg, 0));
+				spa_config_exit(spa, FTAG);
+				txg_wait_synced(spa->spa_dsl_pool, txg);
+				ASSERT(tvd->vdev_ms_shift != 0);
+				ASSERT(tvd->vdev_ms_array != 0);
+				spa_config_sync();
+			}
+		}
 	}
 
 	error = 0;
@@ -1035,9 +1068,9 @@
 spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
 {
 	uint64_t txg;
-	int c, error;
+	int c, c0, children, error;
 	vdev_t *rvd = spa->spa_root_vdev;
-	vdev_t *vd;
+	vdev_t *vd, *tvd;
 
 	txg = spa_vdev_enter(spa);
 
@@ -1046,32 +1079,61 @@
 	if (vd == NULL)
 		return (spa_vdev_exit(spa, vd, txg, EINVAL));
 
-	if (rvd == NULL)			/* spa_create() */
-		spa->spa_root_vdev = rvd = vd;
+	if (rvd == NULL) {			/* spa_create() */
+		rvd = vd;
+		c0 = 0;
+	} else {
+		c0 = rvd->vdev_children;
+	}
+
+	ASSERT(spa->spa_root_vdev == rvd);
 
 	if ((error = vdev_create(vd, txg)) != 0)
 		return (spa_vdev_exit(spa, vd, txg, error));
 
+	children = vd->vdev_children;
+
 	/*
-	 * Transfer each top-level vdev from the temporary root
-	 * to the spa's root and initialize its metaslabs.
+	 * Transfer each new top-level vdev from vd to rvd.
 	 */
-	for (c = 0; c < vd->vdev_children; c++) {
-		vdev_t *tvd = vd->vdev_child[c];
+	for (c = 0; c < children; c++) {
+		tvd = vd->vdev_child[c];
 		if (vd != rvd) {
 			vdev_remove_child(vd, tvd);
-			tvd->vdev_id = rvd->vdev_children;
+			tvd->vdev_id = c0 + c;
 			vdev_add_child(rvd, tvd);
 		}
-		if ((error = vdev_init(tvd, txg)) != 0)
-			return (spa_vdev_exit(spa, vd, txg, error));
 		vdev_config_dirty(tvd);
 	}
 
 	/*
-	 * Update the config based on the new in-core state.
+	 * We have to be careful when adding new vdevs to an existing pool.
+	 * If other threads start allocating from these vdevs before we
+	 * sync the config cache, and we lose power, then upon reboot we may
+	 * fail to open the pool because there are DVAs that the config cache
+	 * can't translate.  Therefore, we first add the vdevs without
+	 * initializing metaslabs; sync the config cache (via spa_vdev_exit());
+	 * initialize the metaslabs; and sync the config cache again.
+	 *
+	 * spa_load() checks for added-but-not-initialized vdevs, so that
+	 * if we lose power at any point in this sequence, the remaining
+	 * steps will be completed the next time we load the pool.
 	 */
-	spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0));
+	if (vd != rvd) {
+		(void) spa_vdev_exit(spa, vd, txg, 0);
+		txg = spa_vdev_enter(spa);
+		vd = NULL;
+	}
+
+	/*
+	 * Now that the config is safely on disk, we can use the new space.
+	 */
+	for (c = 0; c < children; c++) {
+		tvd = rvd->vdev_child[c0 + c];
+		ASSERT(tvd->vdev_ms_array == 0);
+		vdev_init(tvd, txg);
+		vdev_config_dirty(tvd);
+	}
 
 	return (spa_vdev_exit(spa, vd, txg, 0));
 }
@@ -1105,6 +1167,9 @@
 	if (oldvd == NULL)
 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
 
+	if (!oldvd->vdev_ops->vdev_op_leaf)
+		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+
 	pvd = oldvd->vdev_parent;
 
 	/*
@@ -1183,10 +1248,6 @@
 	ASSERT(pvd->vdev_top == tvd);
 	ASSERT(tvd->vdev_parent == rvd);
 
-	/*
-	 * Update the config based on the new in-core state.
-	 */
-	spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0));
 	vdev_config_dirty(tvd);
 
 	/*
@@ -1238,6 +1299,9 @@
 	if (vd == NULL)
 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
 
+	if (!vd->vdev_ops->vdev_op_leaf)
+		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+
 	pvd = vd->vdev_parent;
 
 	/*
@@ -1337,11 +1401,6 @@
 	 */
 	(void) vdev_metaslab_init(tvd, txg);
 
-	/*
-	 * Update the config based on the new in-core state.
-	 */
-	spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0));
-
 	vdev_config_dirty(tvd);
 
 	/*
@@ -1429,11 +1488,12 @@
 	if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL)
 		return (spa_vdev_exit(spa, NULL, txg, ENOENT));
 
+	if (!vd->vdev_ops->vdev_op_leaf)
+		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+
 	spa_strfree(vd->vdev_path);
 	vd->vdev_path = spa_strdup(newpath);
 
-	spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0));
-
 	vdev_config_dirty(vd->vdev_top);
 
 	return (spa_vdev_exit(spa, NULL, txg, 0));