Mercurial > illumos > illumos-gate
changeset 1585:4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
6395485 ensure the gu in vdev_guid
6395487 config cache can become stale relative to mosconfig
6395488 vdev addition must sync to config cache before allocation begins
author | bonwick |
---|---|
date | Thu, 09 Mar 2006 16:56:05 -0800 |
parents | 889b0aa2153e |
children | 9657e3dadaf6 |
files | usr/src/cmd/ztest/ztest.c usr/src/uts/common/fs/zfs/spa.c usr/src/uts/common/fs/zfs/spa_misc.c usr/src/uts/common/fs/zfs/sys/vdev.h usr/src/uts/common/fs/zfs/vdev.c |
diffstat | 5 files changed, 169 insertions(+), 62 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/cmd/ztest/ztest.c Thu Mar 09 16:31:45 2006 -0800 +++ b/usr/src/cmd/ztest/ztest.c Thu Mar 09 16:56:05 2006 -0800 @@ -2038,6 +2038,9 @@ bcopy(&iobuf[blkoff], &rbt, sizeof (rbt)); + if (rbt.bt_objset == 0) /* concurrent free */ + continue; + ASSERT3U(rbt.bt_objset, ==, wbt.bt_objset); ASSERT3U(rbt.bt_object, ==, wbt.bt_object); ASSERT3U(rbt.bt_offset, ==, wbt.bt_offset); @@ -2970,9 +2973,11 @@ * Verify that we can export the pool and reimport it under a * different name. */ - (void) snprintf(name, 100, "%s_import", pool); - ztest_spa_import_export(pool, name); - ztest_spa_import_export(name, pool); + if (ztest_random(2) == 0) { + (void) snprintf(name, 100, "%s_import", pool); + ztest_spa_import_export(pool, name); + ztest_spa_import_export(name, pool); + } /* * Verify that we can loop over all pools.
--- a/usr/src/uts/common/fs/zfs/spa.c Thu Mar 09 16:31:45 2006 -0800 +++ b/usr/src/uts/common/fs/zfs/spa.c Thu Mar 09 16:56:05 2006 -0800 @@ -252,10 +252,9 @@ /* * Close all vdevs. */ - if (spa->spa_root_vdev) { + if (spa->spa_root_vdev) vdev_free(spa->spa_root_vdev); - spa->spa_root_vdev = NULL; - } + ASSERT(spa->spa_root_vdev == NULL); spa->spa_async_suspended = 0; } @@ -268,6 +267,7 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) { int error = 0; + uint64_t config_cache_txg = spa->spa_config_txg; nvlist_t *nvroot = NULL; vdev_t *rvd; uberblock_t *ub = &spa->spa_uberblock; @@ -303,7 +303,7 @@ goto out; } - spa->spa_root_vdev = rvd; + ASSERT(spa->spa_root_vdev == rvd); ASSERT(spa_guid(spa) == pool_guid); /* @@ -475,6 +475,7 @@ * This must all happen in a single txg. */ if ((spa_mode & FWRITE) && state != SPA_LOAD_TRYIMPORT) { + int c; dmu_tx_t *tx = dmu_tx_create_assigned(spa_get_dsl(spa), spa_first_txg(spa)); dmu_objset_find(spa->spa_name, zil_claim, tx, 0); @@ -488,6 +489,38 @@ * Wait for all claims to sync. */ txg_wait_synced(spa->spa_dsl_pool, 0); + + /* + * If the config cache is stale relative to the mosconfig, + * sync the config cache. + */ + if (config_cache_txg != spa->spa_config_txg) + spa_config_sync(); + + /* + * If we have top-level vdevs that were added but have + * not yet been prepared for allocation, do that now. + * (It's safe now because the config cache is up to date, + * so it will be able to translate the new DVAs.) + * See comments in spa_vdev_add() for full details. + */ + for (c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + if (tvd->vdev_ms_array == 0) { + uint64_t txg = spa_last_synced_txg(spa) + 1; + ASSERT(tvd->vdev_ms_shift == 0); + spa_config_enter(spa, RW_WRITER, FTAG); + vdev_init(tvd, txg); + vdev_config_dirty(tvd); + spa_config_set(spa, + spa_config_generate(spa, rvd, txg, 0)); + spa_config_exit(spa, FTAG); + txg_wait_synced(spa->spa_dsl_pool, txg); + ASSERT(tvd->vdev_ms_shift != 0); + ASSERT(tvd->vdev_ms_array != 0); + spa_config_sync(); + } + } } error = 0; @@ -1035,9 +1068,9 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot) { uint64_t txg; - int c, error; + int c, c0, children, error; vdev_t *rvd = spa->spa_root_vdev; - vdev_t *vd; + vdev_t *vd, *tvd; txg = spa_vdev_enter(spa); @@ -1046,32 +1079,61 @@ if (vd == NULL) return (spa_vdev_exit(spa, vd, txg, EINVAL)); - if (rvd == NULL) /* spa_create() */ - spa->spa_root_vdev = rvd = vd; + if (rvd == NULL) { /* spa_create() */ + rvd = vd; + c0 = 0; + } else { + c0 = rvd->vdev_children; + } + + ASSERT(spa->spa_root_vdev == rvd); if ((error = vdev_create(vd, txg)) != 0) return (spa_vdev_exit(spa, vd, txg, error)); + children = vd->vdev_children; + /* - * Transfer each top-level vdev from the temporary root - * to the spa's root and initialize its metaslabs. + * Transfer each new top-level vdev from vd to rvd. */ - for (c = 0; c < vd->vdev_children; c++) { - vdev_t *tvd = vd->vdev_child[c]; + for (c = 0; c < children; c++) { + tvd = vd->vdev_child[c]; if (vd != rvd) { vdev_remove_child(vd, tvd); - tvd->vdev_id = rvd->vdev_children; + tvd->vdev_id = c0 + c; vdev_add_child(rvd, tvd); } - if ((error = vdev_init(tvd, txg)) != 0) - return (spa_vdev_exit(spa, vd, txg, error)); vdev_config_dirty(tvd); } /* - * Update the config based on the new in-core state. + * We have to be careful when adding new vdevs to an existing pool. + * If other threads start allocating from these vdevs before we + * sync the config cache, and we lose power, then upon reboot we may + * fail to open the pool because there are DVAs that the config cache + * can't translate. Therefore, we first add the vdevs without + * initializing metaslabs; sync the config cache (via spa_vdev_exit()); + * initialize the metaslabs; and sync the config cache again. + * + * spa_load() checks for added-but-not-initialized vdevs, so that + * if we lose power at any point in this sequence, the remaining + * steps will be completed the next time we load the pool. */ - spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0)); + if (vd != rvd) { + (void) spa_vdev_exit(spa, vd, txg, 0); + txg = spa_vdev_enter(spa); + vd = NULL; + } + + /* + * Now that the config is safely on disk, we can use the new space. + */ + for (c = 0; c < children; c++) { + tvd = rvd->vdev_child[c0 + c]; + ASSERT(tvd->vdev_ms_array == 0); + vdev_init(tvd, txg); + vdev_config_dirty(tvd); + } return (spa_vdev_exit(spa, vd, txg, 0)); } @@ -1105,6 +1167,9 @@ if (oldvd == NULL) return (spa_vdev_exit(spa, NULL, txg, ENODEV)); + if (!oldvd->vdev_ops->vdev_op_leaf) + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + pvd = oldvd->vdev_parent; /* @@ -1183,10 +1248,6 @@ ASSERT(pvd->vdev_top == tvd); ASSERT(tvd->vdev_parent == rvd); - /* - * Update the config based on the new in-core state. - */ - spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0)); vdev_config_dirty(tvd); /* @@ -1238,6 +1299,9 @@ if (vd == NULL) return (spa_vdev_exit(spa, NULL, txg, ENODEV)); + if (!vd->vdev_ops->vdev_op_leaf) + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + pvd = vd->vdev_parent; /* @@ -1337,11 +1401,6 @@ */ (void) vdev_metaslab_init(tvd, txg); - /* - * Update the config based on the new in-core state. - */ - spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0)); - vdev_config_dirty(tvd); /* @@ -1429,11 +1488,12 @@ if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) return (spa_vdev_exit(spa, NULL, txg, ENOENT)); + if (!vd->vdev_ops->vdev_op_leaf) + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + spa_strfree(vd->vdev_path); vd->vdev_path = spa_strdup(newpath); - spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0)); - vdev_config_dirty(vd->vdev_top); return (spa_vdev_exit(spa, NULL, txg, 0));
--- a/usr/src/uts/common/fs/zfs/spa_misc.c Thu Mar 09 16:31:45 2006 -0800 +++ b/usr/src/uts/common/fs/zfs/spa_misc.c Thu Mar 09 16:56:05 2006 -0800 @@ -439,14 +439,29 @@ int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error) { - ASSERT(txg != 0); + vdev_t *rvd = spa->spa_root_vdev; + uint64_t next_txg = spa_last_synced_txg(spa) + 1; + int config_changed = B_FALSE; + + /* + * Usually txg == next_txg, but spa_vdev_attach() + * actually needs to wait for the open txg to sync. + */ + ASSERT(txg >= next_txg); /* - * Reassess the DTLs. spa_scrub() looks at the DTLs without - * taking the config lock at all, so keep it safe. + * Reassess the DTLs. */ - if (spa->spa_root_vdev) - vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE); + if (rvd != NULL) + vdev_dtl_reassess(rvd, 0, 0, B_FALSE); + + /* + * Update the in-core config if it changed. + */ + if (error == 0 && !list_is_empty(&spa->spa_dirty_list)) { + config_changed = B_TRUE; + spa_config_set(spa, spa_config_generate(spa, rvd, next_txg, 0)); + } spa_config_exit(spa, spa); @@ -457,7 +472,7 @@ spa_scrub_restart(spa, txg); spa_scrub_resume(spa); - if (vd == spa->spa_root_vdev) /* spa_create() */ + if (vd == rvd) /* spa_create() */ return (error); /* @@ -474,10 +489,9 @@ } /* - * If we're in the middle of export or destroy, don't sync the - * config -- it will do that anyway, and we deadlock if we try. + * If the config changed, update the config cache. */ - if (error == 0 && spa->spa_state == POOL_STATE_ACTIVE) + if (config_changed) spa_config_sync(); mutex_exit(&spa_namespace_lock);
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h Thu Mar 09 16:31:45 2006 -0800 +++ b/usr/src/uts/common/fs/zfs/sys/vdev.h Thu Mar 09 16:56:05 2006 -0800 @@ -60,7 +60,7 @@ extern int vdev_open(vdev_t *); extern void vdev_close(vdev_t *); extern int vdev_create(vdev_t *, uint64_t txg); -extern int vdev_init(vdev_t *, uint64_t txg); +extern void vdev_init(vdev_t *, uint64_t txg); extern void vdev_reopen(vdev_t *); extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
--- a/usr/src/uts/common/fs/zfs/vdev.c Thu Mar 09 16:31:45 2006 -0800 +++ b/usr/src/uts/common/fs/zfs/vdev.c Thu Mar 09 16:56:05 2006 -0800 @@ -143,7 +143,7 @@ int c; vdev_t *mvd; - if (vd->vdev_children == 0 && vd->vdev_guid == guid) + if (vd->vdev_guid == guid) return (vd); for (c = 0; c < vd->vdev_children; c++) @@ -266,10 +266,31 @@ { vdev_t *vd; - while (guid == 0) - guid = spa_get_random(-1ULL); + vd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); + + if (spa->spa_root_vdev == NULL) { + ASSERT(ops == &vdev_root_ops); + spa->spa_root_vdev = vd; + } - vd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); + if (guid == 0) { + if (spa->spa_root_vdev == vd) { + /* + * The root vdev's guid will also be the pool guid, + * which must be unique among all pools. + */ + while (guid == 0 || spa_guid_exists(guid, 0)) + guid = spa_get_random(-1ULL); + } else { + /* + * Any other vdev's guid must be unique within the pool. + */ + while (guid == 0 || + spa_guid_exists(spa_guid(spa), guid)) + guid = spa_get_random(-1ULL); + } + ASSERT(!spa_guid_exists(spa_guid(spa), guid)); + } vd->vdev_spa = spa; vd->vdev_id = id; @@ -297,6 +318,8 @@ static void vdev_free_common(vdev_t *vd) { + spa_t *spa = vd->vdev_spa; + if (vd->vdev_path) spa_strfree(vd->vdev_path); if (vd->vdev_devid) @@ -313,6 +336,9 @@ mutex_destroy(&vd->vdev_dtl_lock); mutex_destroy(&vd->vdev_dirty_lock); + if (vd == spa->spa_root_vdev) + spa->spa_root_vdev = NULL; + kmem_free(vd, sizeof (vdev_t)); } @@ -596,6 +622,9 @@ metaslab_t **mspp = vd->vdev_ms; int ret; + if (vd->vdev_ms_shift == 0) /* not being allocated from yet */ + return (0); + dprintf("%s oldc %llu newc %llu\n", vdev_description(vd), oldc, newc); ASSERT(oldc <= newc); @@ -925,7 +954,7 @@ * For creation, we want to try to create all vdevs at once and then undo it * if anything fails; this is much harder if we have pending transactions. */ -int +void vdev_init(vdev_t *vd, uint64_t txg) { /* @@ -935,9 +964,10 @@ vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT); /* - * Initialize the vdev's metaslabs. + * Initialize the vdev's metaslabs. This can't fail because + * there's nothing to read when creating all new metaslabs. */ - return (vdev_metaslab_init(vd, txg)); + VERIFY(vdev_metaslab_init(vd, txg) == 0); } void @@ -1226,15 +1256,11 @@ } /* - * If this is a top-level vdev, make sure its allocation parameters - * exist and initialize its metaslabs. + * If this is a top-level vdev, initialize its metaslabs. */ if (vd == vd->vdev_top) { - if (vd->vdev_ms_array == 0 || - vd->vdev_ms_shift == 0 || - vd->vdev_ashift == 0 || - vd->vdev_asize == 0) { + if (vd->vdev_ashift == 0 || vd->vdev_asize == 0) { vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); return (0); @@ -1364,17 +1390,19 @@ txg = spa_vdev_enter(spa); rvd = spa->spa_root_vdev; + if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) return (spa_vdev_exit(spa, NULL, txg, ENODEV)); + if (!vd->vdev_ops->vdev_op_leaf) + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + dprintf("ONLINE: %s\n", vdev_description(vd)); vd->vdev_offline = B_FALSE; vd->vdev_tmpoffline = B_FALSE; vdev_reopen(vd->vdev_top); - spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0)); - vdev_config_dirty(vd->vdev_top); (void) spa_vdev_exit(spa, NULL, txg, 0); @@ -1393,9 +1421,13 @@ txg = spa_vdev_enter(spa); rvd = spa->spa_root_vdev; + if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) return (spa_vdev_exit(spa, NULL, txg, ENODEV)); + if (!vd->vdev_ops->vdev_op_leaf) + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + dprintf("OFFLINE: %s\n", vdev_description(vd)); /* vdev is already offlined, do nothing */ @@ -1426,12 +1458,8 @@ } vd->vdev_tmpoffline = istmp; - if (istmp) - return (spa_vdev_exit(spa, NULL, txg, 0)); - - spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0)); - - vdev_config_dirty(vd->vdev_top); + if (!istmp) + vdev_config_dirty(vd->vdev_top); return (spa_vdev_exit(spa, NULL, txg, 0)); }