Mercurial > illumos > illumos-gate
diff usr/src/uts/common/fs/zfs/spa.c @ 5329:33cb98223b2d
PSARC 2007/567 zpool failmode property
6322646 ZFS should gracefully handle all devices failing (when writing)
6413847 vdev label write failure should be handled more gracefully
6417772 need nicer message on write failure
6417779 ZFS: I/O failure (write on ...) -- need to reallocate writes
6467927 Node gets into a panic loop when devices are fenced off
6565042 ZFS should gracefully handle all devices failing (when reading)
6596239 Stop issuing IOs to a vdev that is going to be removed
author | gw25295 |
---|---|
date | Wed, 24 Oct 2007 20:00:39 -0700 |
parents | 71a3e95fb9e2 |
children | 36eeffc5336d |
line wrap: on
line diff
--- a/usr/src/uts/common/fs/zfs/spa.c Wed Oct 24 19:08:06 2007 -0700 +++ b/usr/src/uts/common/fs/zfs/spa.c Wed Oct 24 20:00:39 2007 -0700 @@ -362,6 +362,27 @@ dmu_objset_close(os); } break; + case ZPOOL_PROP_FAILUREMODE: + error = nvpair_value_uint64(elem, &intval); + if (!error && (intval < ZIO_FAILURE_MODE_WAIT || + intval > ZIO_FAILURE_MODE_PANIC)) + error = EINVAL; + + /* + * This is a special case which only occurs when + * the pool has completely failed. This allows + * the user to change the in-core failmode property + * without syncing it out to disk (I/Os might + * currently be blocked). We do this by returning + * EIO to the caller (spa_prop_set) to trick it + * into thinking we encountered a property validation + * error. + */ + if (!error && spa_state(spa) == POOL_STATE_IO_FAILURE) { + spa->spa_failmode = intval; + error = EIO; + } + break; } if (error) @@ -477,6 +498,8 @@ list_create(&spa->spa_dirty_list, sizeof (vdev_t), offsetof(vdev_t, vdev_dirty_node)); + list_create(&spa->spa_zio_list, sizeof (zio_t), + offsetof(zio_t, zio_link_node)); txg_list_create(&spa->spa_vdev_txg_list, offsetof(struct vdev, vdev_txg_node)); @@ -506,6 +529,7 @@ txg_list_destroy(&spa->spa_vdev_txg_list); list_destroy(&spa->spa_dirty_list); + list_destroy(&spa->spa_zio_list); for (t = 0; t < ZIO_TYPES; t++) { taskq_destroy(spa->spa_zio_issue_taskq[t]); @@ -1077,6 +1101,10 @@ spa->spa_pool_props_object, zpool_prop_to_name(ZPOOL_PROP_DELEGATION), sizeof (uint64_t), 1, &spa->spa_delegation); + (void) zap_lookup(spa->spa_meta_objset, + spa->spa_pool_props_object, + zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), + sizeof (uint64_t), 1, &spa->spa_failmode); } /* @@ -1618,6 +1646,7 @@ spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); spa->spa_temporary = zpool_prop_default_numeric(ZPOOL_PROP_TEMPORARY); + spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); if (props) spa_sync_props(spa, props, CRED(), tx); @@ -3091,7 +3120,7 @@ tvd->vdev_remove_wanted = 0; vdev_set_state(tvd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); - vdev_clear(spa, tvd); + vdev_clear(spa, tvd, B_TRUE); vdev_config_dirty(tvd->vdev_top); } spa_async_remove(spa, tvd); @@ -3122,8 +3151,14 @@ /* * See if any devices need to be marked REMOVED. + * + * XXX - We avoid doing this when we are in + * I/O failure state since spa_vdev_enter() grabs + * the namespace lock and would not be able to obtain + * the writer config lock. */ - if (tasks & SPA_ASYNC_REMOVE) { + if (tasks & SPA_ASYNC_REMOVE && + spa_state(spa) != POOL_STATE_IO_FAILURE) { txg = spa_vdev_enter(spa); spa_async_remove(spa, spa->spa_root_vdev); (void) spa_vdev_exit(spa, NULL, txg, 0); @@ -3379,7 +3414,6 @@ VERIFY(nvpair_value_uint64(elem, &intval) == 0); spa->spa_temporary = intval; break; - default: /* * Set pool property values in the poolprops mos object. @@ -3425,11 +3459,19 @@ ASSERT(0); /* not allowed */ } - if (prop == ZPOOL_PROP_DELEGATION) + switch (prop) { + case ZPOOL_PROP_DELEGATION: spa->spa_delegation = intval; - - if (prop == ZPOOL_PROP_BOOTFS) + break; + case ZPOOL_PROP_BOOTFS: spa->spa_bootfs = intval; + break; + case ZPOOL_PROP_FAILUREMODE: + spa->spa_failmode = intval; + break; + default: + break; + } } /* log internal history if this is not a zpool create */