Mercurial > illumos > illumos-gate
annotate usr/src/uts/common/fs/zfs/spa.c @ 1544:938876158511
PSARC 2006/077 zpool clear
PSARC 2006/139 FMA for ZFS
6284889 arc should replace the znode cache
6333006 DMU & DSL should not panic upon I/O error
6333092 concurrent reads to a file not scaling with number of readers
6338081 ZFS/FMA phase 1
6338386 need persistent error log
6341326 i/o error causes arc buf hash table corruption
6341639 zfs backup/restore should compute/verify checksum of backup stream
6348002 out of space due to changing properties
6354724 inaccurate error message from zfs restore
6354872 dmu_sync() blows predictive accounting
6355416 zpool scrubbing consumes all memory, system hung
6363995 df should only load libzfs when it encounters a ZFS filesystem
6366320 zfs backup/restore doesn't like signals
6368892 mount -m support needed for legacy mounts
6368902 boot archive fstat support needed for ZFS Mountroot
6369424 BFU complains when bfu'ing a ZFS root filesystem
6374062 mountroot support needed for ZFS
6376356 dirtying dbuf obj=43 lvl=0 blkid=0 but not tx_held
6378391 unused members of dmu_objset_stats_t
6378392 clean up zfs_cmd_t structure
6378685 buf_init should allocate its hash table more carefully
6378976 ziltest should be a first class citizen
6381086 zdb segfaults if there is a spa deferred-free bplist
6381203 deadlock due to i/o while assigning (tc_lock held)
6381209 freed space is not immediately available
6381344 'zpool clear'
6381345 FAULTED devices should really be UNAVAIL
6381346 import should mark devices as persistently unavailable
6383272 recursive mutex_enter() during log replay with zfs root
6386326 origin property is not displayed
6386354 libzfs does too much in its _init section, calls exit(1)
6386624 zpool should not complain about non-existent devices from libdiskmgt
6386910 spa needs to be i/o error hardened
6387735 need a mechanism to inject faults into ZFS
6387736 internal ZFS utilities should be placed in an ON-private package
6389928 libzfs should ship a lint library
6390609 malformed vdev config panics on zpool_create()
6390677 version number checking makes upgrades challenging
6390713 ztest hangs in zil_suspend()
6391873 metadata compression should be turned back on
6392113 ztest sometimes reports leaked blocks because ZIL isn't resilvered
6393004 minor memory leak in unique_insert()
author | eschrock |
---|---|
date | Fri, 03 Mar 2006 20:08:16 -0800 |
parents | 81359ee1ee63 |
children | 4ad213e858a9 |
rev | line source |
---|---|
789 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
1544 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
789 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
21 /* | |
1354
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 23 * Use is subject to license terms. |
24 */ | |
25 | |
26 #pragma ident "%Z%%M% %I% %E% SMI" | |
27 | |
28 /* | |
29 * This file contains all the routines used when modifying on-disk SPA state. | |
30 * This includes opening, importing, destroying, exporting a pool, and syncing a | |
31 * pool. | |
32 */ | |
33 | |
34 #include <sys/zfs_context.h> | |
1544 | 35 #include <sys/fm/fs/zfs.h> |
789 | 36 #include <sys/spa_impl.h> |
37 #include <sys/zio.h> | |
38 #include <sys/zio_checksum.h> | |
39 #include <sys/zio_compress.h> | |
40 #include <sys/dmu.h> | |
41 #include <sys/dmu_tx.h> | |
42 #include <sys/zap.h> | |
43 #include <sys/zil.h> | |
44 #include <sys/vdev_impl.h> | |
45 #include <sys/metaslab.h> | |
46 #include <sys/uberblock_impl.h> | |
47 #include <sys/txg.h> | |
48 #include <sys/avl.h> | |
49 #include <sys/dmu_traverse.h> | |
50 #include <sys/unique.h> | |
51 #include <sys/dsl_pool.h> | |
52 #include <sys/dsl_dir.h> | |
53 #include <sys/dsl_prop.h> | |
54 #include <sys/fs/zfs.h> | |
55 #include <sys/callb.h> | |
56 | |
57 static uint32_t spa_active_count; | |
58 | |
59 /* | |
60 * ========================================================================== | |
61 * SPA state manipulation (open/create/destroy/import/export) | |
62 * ========================================================================== | |
63 */ | |
64 | |
1544 | 65 static int |
66 spa_error_entry_compare(const void *a, const void *b) | |
67 { | |
68 spa_error_entry_t *sa = (spa_error_entry_t *)a; | |
69 spa_error_entry_t *sb = (spa_error_entry_t *)b; | |
70 int ret; | |
71 | |
72 ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, | |
73 sizeof (zbookmark_t)); | |
74 | |
75 if (ret < 0) | |
76 return (-1); | |
77 else if (ret > 0) | |
78 return (1); | |
79 else | |
80 return (0); | |
81 } | |
82 | |
83 /* | |
84 * Utility function which retrieves copies of the current logs and | |
85 * re-initializes them in the process. | |
86 */ | |
87 void | |
88 spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) | |
89 { | |
90 ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); | |
91 | |
92 bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); | |
93 bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); | |
94 | |
95 avl_create(&spa->spa_errlist_scrub, | |
96 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
97 offsetof(spa_error_entry_t, se_avl)); | |
98 avl_create(&spa->spa_errlist_last, | |
99 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
100 offsetof(spa_error_entry_t, se_avl)); | |
101 } | |
102 | |
789 | 103 /* |
104 * Activate an uninitialized pool. | |
105 */ | |
106 static void | |
107 spa_activate(spa_t *spa) | |
108 { | |
109 int t; | |
110 | |
111 ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); | |
112 | |
113 spa->spa_state = POOL_STATE_ACTIVE; | |
114 | |
115 spa->spa_normal_class = metaslab_class_create(); | |
116 | |
117 for (t = 0; t < ZIO_TYPES; t++) { | |
118 spa->spa_zio_issue_taskq[t] = taskq_create("spa_zio_issue", | |
119 8, maxclsyspri, 50, INT_MAX, | |
120 TASKQ_PREPOPULATE); | |
121 spa->spa_zio_intr_taskq[t] = taskq_create("spa_zio_intr", | |
122 8, maxclsyspri, 50, INT_MAX, | |
123 TASKQ_PREPOPULATE); | |
124 } | |
125 | |
126 rw_init(&spa->spa_traverse_lock, NULL, RW_DEFAULT, NULL); | |
127 | |
128 list_create(&spa->spa_dirty_list, sizeof (vdev_t), | |
129 offsetof(vdev_t, vdev_dirty_node)); | |
130 | |
131 txg_list_create(&spa->spa_vdev_txg_list, | |
132 offsetof(struct vdev, vdev_txg_node)); | |
1544 | 133 |
134 avl_create(&spa->spa_errlist_scrub, | |
135 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
136 offsetof(spa_error_entry_t, se_avl)); | |
137 avl_create(&spa->spa_errlist_last, | |
138 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
139 offsetof(spa_error_entry_t, se_avl)); | |
789 | 140 } |
141 | |
142 /* | |
143 * Opposite of spa_activate(). | |
144 */ | |
145 static void | |
146 spa_deactivate(spa_t *spa) | |
147 { | |
148 int t; | |
149 | |
150 ASSERT(spa->spa_sync_on == B_FALSE); | |
151 ASSERT(spa->spa_dsl_pool == NULL); | |
152 ASSERT(spa->spa_root_vdev == NULL); | |
153 | |
154 ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); | |
155 | |
156 txg_list_destroy(&spa->spa_vdev_txg_list); | |
157 | |
158 list_destroy(&spa->spa_dirty_list); | |
159 | |
160 rw_destroy(&spa->spa_traverse_lock); | |
161 | |
162 for (t = 0; t < ZIO_TYPES; t++) { | |
163 taskq_destroy(spa->spa_zio_issue_taskq[t]); | |
164 taskq_destroy(spa->spa_zio_intr_taskq[t]); | |
165 spa->spa_zio_issue_taskq[t] = NULL; | |
166 spa->spa_zio_intr_taskq[t] = NULL; | |
167 } | |
168 | |
169 metaslab_class_destroy(spa->spa_normal_class); | |
170 spa->spa_normal_class = NULL; | |
171 | |
1544 | 172 /* |
173 * If this was part of an import or the open otherwise failed, we may | |
174 * still have errors left in the queues. Empty them just in case. | |
175 */ | |
176 spa_errlog_drain(spa); | |
177 | |
178 avl_destroy(&spa->spa_errlist_scrub); | |
179 avl_destroy(&spa->spa_errlist_last); | |
180 | |
789 | 181 spa->spa_state = POOL_STATE_UNINITIALIZED; |
182 } | |
183 | |
184 /* | |
185 * Verify a pool configuration, and construct the vdev tree appropriately. This | |
186 * will create all the necessary vdevs in the appropriate layout, with each vdev | |
187 * in the CLOSED state. This will prep the pool before open/creation/import. | |
188 * All vdev validation is done by the vdev_alloc() routine. | |
189 */ | |
190 static vdev_t * | |
191 spa_config_parse(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int atype) | |
192 { | |
193 nvlist_t **child; | |
194 uint_t c, children; | |
195 vdev_t *vd; | |
196 | |
197 if ((vd = vdev_alloc(spa, nv, parent, id, atype)) == NULL) | |
198 return (NULL); | |
199 | |
200 if (vd->vdev_ops->vdev_op_leaf) | |
201 return (vd); | |
202 | |
203 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, | |
204 &child, &children) != 0) { | |
205 vdev_free(vd); | |
206 return (NULL); | |
207 } | |
208 | |
209 for (c = 0; c < children; c++) { | |
210 if (spa_config_parse(spa, child[c], vd, c, atype) == NULL) { | |
211 vdev_free(vd); | |
212 return (NULL); | |
213 } | |
214 } | |
215 | |
216 return (vd); | |
217 } | |
218 | |
219 /* | |
220 * Opposite of spa_load(). | |
221 */ | |
222 static void | |
223 spa_unload(spa_t *spa) | |
224 { | |
225 /* | |
1544 | 226 * Stop async tasks. |
227 */ | |
228 spa_async_suspend(spa); | |
229 | |
230 /* | |
789 | 231 * Stop syncing. |
232 */ | |
233 if (spa->spa_sync_on) { | |
234 txg_sync_stop(spa->spa_dsl_pool); | |
235 spa->spa_sync_on = B_FALSE; | |
236 } | |
237 | |
238 /* | |
239 * Wait for any outstanding prefetch I/O to complete. | |
240 */ | |
1544 | 241 spa_config_enter(spa, RW_WRITER, FTAG); |
242 spa_config_exit(spa, FTAG); | |
789 | 243 |
244 /* | |
245 * Close the dsl pool. | |
246 */ | |
247 if (spa->spa_dsl_pool) { | |
248 dsl_pool_close(spa->spa_dsl_pool); | |
249 spa->spa_dsl_pool = NULL; | |
250 } | |
251 | |
252 /* | |
253 * Close all vdevs. | |
254 */ | |
255 if (spa->spa_root_vdev) { | |
256 vdev_free(spa->spa_root_vdev); | |
257 spa->spa_root_vdev = NULL; | |
258 } | |
1544 | 259 |
260 spa->spa_async_suspended = 0; | |
789 | 261 } |
262 | |
263 /* | |
264 * Load an existing storage pool, using the pool's builtin spa_config as a | |
1544 | 265 * source of configuration information. |
789 | 266 */ |
267 static int | |
1544 | 268 spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) |
789 | 269 { |
270 int error = 0; | |
271 nvlist_t *nvroot = NULL; | |
272 vdev_t *rvd; | |
273 uberblock_t *ub = &spa->spa_uberblock; | |
274 uint64_t pool_guid; | |
275 zio_t *zio; | |
276 | |
1544 | 277 spa->spa_load_state = state; |
789 | 278 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || |
1544 | 279 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { |
280 error = EINVAL; | |
281 goto out; | |
282 } | |
789 | 283 |
284 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, | |
285 &spa->spa_config_txg); | |
286 | |
1544 | 287 if ((spa->spa_load_state == SPA_LOAD_IMPORT || |
288 spa->spa_load_state == SPA_LOAD_TRYIMPORT) && | |
289 spa_guid_exists(pool_guid, 0)) { | |
290 error = EEXIST; | |
291 goto out; | |
292 } | |
789 | 293 |
294 /* | |
295 * Parse the configuration into a vdev tree. | |
296 */ | |
1544 | 297 spa_config_enter(spa, RW_WRITER, FTAG); |
789 | 298 rvd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_LOAD); |
1544 | 299 spa_config_exit(spa, FTAG); |
789 | 300 |
1544 | 301 if (rvd == NULL) { |
302 error = EINVAL; | |
303 goto out; | |
304 } | |
789 | 305 |
306 spa->spa_root_vdev = rvd; | |
307 ASSERT(spa_guid(spa) == pool_guid); | |
308 | |
309 /* | |
310 * Try to open all vdevs, loading each label in the process. | |
311 */ | |
1544 | 312 if (vdev_open(rvd) != 0) { |
313 error = ENXIO; | |
314 goto out; | |
315 } | |
789 | 316 |
317 /* | |
318 * Find the best uberblock. | |
319 */ | |
320 bzero(ub, sizeof (uberblock_t)); | |
321 | |
322 zio = zio_root(spa, NULL, NULL, | |
323 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); | |
324 vdev_uberblock_load(zio, rvd, ub); | |
325 error = zio_wait(zio); | |
326 | |
327 /* | |
328 * If we weren't able to find a single valid uberblock, return failure. | |
329 */ | |
330 if (ub->ub_txg == 0) { | |
1544 | 331 error = ENXIO; |
332 goto out; | |
333 } | |
334 | |
335 /* | |
336 * If the pool is newer than the code, we can't open it. | |
337 */ | |
338 if (ub->ub_version > UBERBLOCK_VERSION) { | |
339 error = ENOTSUP; | |
340 goto out; | |
789 | 341 } |
342 | |
343 /* | |
344 * If the vdev guid sum doesn't match the uberblock, we have an | |
345 * incomplete configuration. | |
346 */ | |
347 if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { | |
1544 | 348 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
349 VDEV_AUX_BAD_GUID_SUM); | |
350 error = ENXIO; | |
351 goto out; | |
789 | 352 } |
353 | |
354 /* | |
355 * Initialize internal SPA structures. | |
356 */ | |
357 spa->spa_state = POOL_STATE_ACTIVE; | |
358 spa->spa_ubsync = spa->spa_uberblock; | |
359 spa->spa_first_txg = spa_last_synced_txg(spa) + 1; | |
1544 | 360 error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); |
361 if (error) { | |
362 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
363 VDEV_AUX_CORRUPT_DATA); | |
364 goto out; | |
365 } | |
789 | 366 spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; |
367 | |
1544 | 368 if (zap_lookup(spa->spa_meta_objset, |
789 | 369 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, |
1544 | 370 sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { |
371 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
372 VDEV_AUX_CORRUPT_DATA); | |
373 error = EIO; | |
374 goto out; | |
375 } | |
789 | 376 |
377 if (!mosconfig) { | |
378 dmu_buf_t *db; | |
379 char *packed = NULL; | |
380 size_t nvsize = 0; | |
381 nvlist_t *newconfig = NULL; | |
382 | |
1544 | 383 VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, |
384 spa->spa_config_object, FTAG, &db)); | |
789 | 385 nvsize = *(uint64_t *)db->db_data; |
1544 | 386 dmu_buf_rele(db, FTAG); |
789 | 387 |
388 packed = kmem_alloc(nvsize, KM_SLEEP); | |
1544 | 389 error = dmu_read(spa->spa_meta_objset, |
789 | 390 spa->spa_config_object, 0, nvsize, packed); |
391 if (error == 0) | |
392 error = nvlist_unpack(packed, nvsize, &newconfig, 0); | |
393 kmem_free(packed, nvsize); | |
394 | |
1544 | 395 if (error) { |
396 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
397 VDEV_AUX_CORRUPT_DATA); | |
398 error = EIO; | |
399 goto out; | |
400 } | |
789 | 401 |
402 spa_config_set(spa, newconfig); | |
403 | |
404 spa_unload(spa); | |
405 spa_deactivate(spa); | |
406 spa_activate(spa); | |
407 | |
1544 | 408 return (spa_load(spa, newconfig, state, B_TRUE)); |
409 } | |
410 | |
411 if (zap_lookup(spa->spa_meta_objset, | |
412 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, | |
413 sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { | |
414 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
415 VDEV_AUX_CORRUPT_DATA); | |
416 error = EIO; | |
417 goto out; | |
789 | 418 } |
419 | |
1544 | 420 /* |
421 * Load the persistent error log. If we have an older pool, this will | |
422 * not be present. | |
423 */ | |
424 error = zap_lookup(spa->spa_meta_objset, | |
425 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, | |
426 sizeof (uint64_t), 1, &spa->spa_errlog_last); | |
427 if (error != 0 &&error != ENOENT) { | |
428 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
429 VDEV_AUX_CORRUPT_DATA); | |
430 error = EIO; | |
431 goto out; | |
432 } | |
433 | |
434 error = zap_lookup(spa->spa_meta_objset, | |
435 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, | |
436 sizeof (uint64_t), 1, &spa->spa_errlog_scrub); | |
437 if (error != 0 && error != ENOENT) { | |
438 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
439 VDEV_AUX_CORRUPT_DATA); | |
440 error = EIO; | |
441 goto out; | |
442 } | |
789 | 443 |
444 /* | |
1544 | 445 * Load the vdev state for all top level vdevs. We need to grab the |
446 * config lock because all label I/O is done with the | |
447 * ZIO_FLAG_CONFIG_HELD flag. | |
789 | 448 */ |
1544 | 449 spa_config_enter(spa, RW_READER, FTAG); |
450 if ((error = vdev_load(rvd)) != 0) { | |
451 spa_config_exit(spa, FTAG); | |
452 goto out; | |
453 } | |
454 spa_config_exit(spa, FTAG); | |
789 | 455 |
456 /* | |
457 * Propagate the leaf DTLs we just loaded all the way up the tree. | |
458 */ | |
1544 | 459 spa_config_enter(spa, RW_WRITER, FTAG); |
789 | 460 vdev_dtl_reassess(rvd, 0, 0, B_FALSE); |
1544 | 461 spa_config_exit(spa, FTAG); |
789 | 462 |
463 /* | |
464 * Check the state of the root vdev. If it can't be opened, it | |
465 * indicates one or more toplevel vdevs are faulted. | |
466 */ | |
1544 | 467 if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { |
468 error = ENXIO; | |
469 goto out; | |
470 } | |
789 | 471 |
472 /* | |
473 * Claim log blocks that haven't been committed yet, and update all | |
474 * top-level vdevs to sync any config changes found in vdev_load(). | |
475 * This must all happen in a single txg. | |
476 */ | |
1544 | 477 if ((spa_mode & FWRITE) && state != SPA_LOAD_TRYIMPORT) { |
789 | 478 dmu_tx_t *tx = dmu_tx_create_assigned(spa_get_dsl(spa), |
479 spa_first_txg(spa)); | |
480 dmu_objset_find(spa->spa_name, zil_claim, tx, 0); | |
481 vdev_config_dirty(rvd); | |
482 dmu_tx_commit(tx); | |
483 | |
484 spa->spa_sync_on = B_TRUE; | |
485 txg_sync_start(spa->spa_dsl_pool); | |
486 | |
487 /* | |
488 * Wait for all claims to sync. | |
489 */ | |
490 txg_wait_synced(spa->spa_dsl_pool, 0); | |
491 } | |
492 | |
1544 | 493 error = 0; |
494 out: | |
495 if (error) | |
496 zfs_ereport_post(FM_EREPORT_ZFS_POOL, spa, NULL, NULL, 0, 0); | |
497 spa->spa_load_state = SPA_LOAD_NONE; | |
498 spa->spa_ena = 0; | |
499 | |
500 return (error); | |
789 | 501 } |
502 | |
503 /* | |
504 * Pool Open/Import | |
505 * | |
506 * The import case is identical to an open except that the configuration is sent | |
507 * down from userland, instead of grabbed from the configuration cache. For the | |
508 * case of an open, the pool configuration will exist in the | |
509 * POOL_STATE_UNITIALIZED state. | |
510 * | |
511 * The stats information (gen/count/ustats) is used to gather vdev statistics at | |
512 * the same time open the pool, without having to keep around the spa_t in some | |
513 * ambiguous state. | |
514 */ | |
515 static int | |
516 spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) | |
517 { | |
518 spa_t *spa; | |
519 int error; | |
520 int loaded = B_FALSE; | |
521 int locked = B_FALSE; | |
522 | |
523 *spapp = NULL; | |
524 | |
525 /* | |
526 * As disgusting as this is, we need to support recursive calls to this | |
527 * function because dsl_dir_open() is called during spa_load(), and ends | |
528 * up calling spa_open() again. The real fix is to figure out how to | |
529 * avoid dsl_dir_open() calling this in the first place. | |
530 */ | |
531 if (mutex_owner(&spa_namespace_lock) != curthread) { | |
532 mutex_enter(&spa_namespace_lock); | |
533 locked = B_TRUE; | |
534 } | |
535 | |
536 if ((spa = spa_lookup(pool)) == NULL) { | |
537 if (locked) | |
538 mutex_exit(&spa_namespace_lock); | |
539 return (ENOENT); | |
540 } | |
541 if (spa->spa_state == POOL_STATE_UNINITIALIZED) { | |
542 | |
543 spa_activate(spa); | |
544 | |
545 error = spa_load(spa, spa->spa_config, | |
1544 | 546 SPA_LOAD_OPEN, B_FALSE); |
789 | 547 |
548 if (error == EBADF) { | |
549 /* | |
550 * If vdev_load() returns EBADF, it indicates that one | |
551 * of the vdevs indicates that the pool has been | |
552 * exported or destroyed. If this is the case, the | |
553 * config cache is out of sync and we should remove the | |
554 * pool from the namespace. | |
555 */ | |
556 spa_unload(spa); | |
557 spa_deactivate(spa); | |
558 spa_remove(spa); | |
559 spa_config_sync(); | |
560 if (locked) | |
561 mutex_exit(&spa_namespace_lock); | |
562 return (ENOENT); | |
1544 | 563 } |
564 | |
565 if (error) { | |
789 | 566 /* |
567 * We can't open the pool, but we still have useful | |
568 * information: the state of each vdev after the | |
569 * attempted vdev_open(). Return this to the user. | |
570 */ | |
571 if (config != NULL && spa->spa_root_vdev != NULL) | |
572 *config = spa_config_generate(spa, NULL, -1ULL, | |
573 B_TRUE); | |
574 spa_unload(spa); | |
575 spa_deactivate(spa); | |
1544 | 576 spa->spa_last_open_failed = B_TRUE; |
789 | 577 if (locked) |
578 mutex_exit(&spa_namespace_lock); | |
579 *spapp = NULL; | |
580 return (error); | |
1544 | 581 } else { |
582 zfs_post_ok(spa, NULL); | |
583 spa->spa_last_open_failed = B_FALSE; | |
789 | 584 } |
585 | |
586 loaded = B_TRUE; | |
587 } | |
588 | |
589 spa_open_ref(spa, tag); | |
590 if (locked) | |
591 mutex_exit(&spa_namespace_lock); | |
592 | |
593 *spapp = spa; | |
594 | |
595 if (config != NULL) { | |
1544 | 596 spa_config_enter(spa, RW_READER, FTAG); |
789 | 597 *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); |
1544 | 598 spa_config_exit(spa, FTAG); |
789 | 599 } |
600 | |
601 /* | |
602 * If we just loaded the pool, resilver anything that's out of date. | |
603 */ | |
604 if (loaded && (spa_mode & FWRITE)) | |
605 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); | |
606 | |
607 return (0); | |
608 } | |
609 | |
610 int | |
611 spa_open(const char *name, spa_t **spapp, void *tag) | |
612 { | |
613 return (spa_open_common(name, spapp, tag, NULL)); | |
614 } | |
615 | |
1544 | 616 /* |
617 * Lookup the given spa_t, incrementing the inject count in the process, | |
618 * preventing it from being exported or destroyed. | |
619 */ | |
620 spa_t * | |
621 spa_inject_addref(char *name) | |
622 { | |
623 spa_t *spa; | |
624 | |
625 mutex_enter(&spa_namespace_lock); | |
626 if ((spa = spa_lookup(name)) == NULL) { | |
627 mutex_exit(&spa_namespace_lock); | |
628 return (NULL); | |
629 } | |
630 spa->spa_inject_ref++; | |
631 mutex_exit(&spa_namespace_lock); | |
632 | |
633 return (spa); | |
634 } | |
635 | |
636 void | |
637 spa_inject_delref(spa_t *spa) | |
638 { | |
639 mutex_enter(&spa_namespace_lock); | |
640 spa->spa_inject_ref--; | |
641 mutex_exit(&spa_namespace_lock); | |
642 } | |
643 | |
789 | 644 int |
1544 | 645 spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) |
789 | 646 { |
647 int error; | |
648 spa_t *spa; | |
649 | |
650 *config = NULL; | |
651 error = spa_open_common(name, &spa, FTAG, config); | |
652 | |
1544 | 653 if (spa && *config != NULL) |
654 VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, | |
655 spa_get_errlog_size(spa)) == 0); | |
656 | |
657 /* | |
658 * We want to get the alternate root even for faulted pools, so we cheat | |
659 * and call spa_lookup() directly. | |
660 */ | |
661 if (altroot) { | |
662 if (spa == NULL) { | |
663 mutex_enter(&spa_namespace_lock); | |
664 spa = spa_lookup(name); | |
665 if (spa) | |
666 spa_altroot(spa, altroot, buflen); | |
667 else | |
668 altroot[0] = '\0'; | |
669 spa = NULL; | |
670 mutex_exit(&spa_namespace_lock); | |
671 } else { | |
672 spa_altroot(spa, altroot, buflen); | |
673 } | |
674 } | |
675 | |
789 | 676 if (spa != NULL) |
677 spa_close(spa, FTAG); | |
678 | |
679 return (error); | |
680 } | |
681 | |
682 /* | |
683 * Pool Creation | |
684 */ | |
685 int | |
686 spa_create(const char *pool, nvlist_t *nvroot, char *altroot) | |
687 { | |
688 spa_t *spa; | |
689 dsl_pool_t *dp; | |
690 dmu_tx_t *tx; | |
691 int error; | |
692 uint64_t txg = TXG_INITIAL; | |
693 | |
694 /* | |
695 * If this pool already exists, return failure. | |
696 */ | |
697 mutex_enter(&spa_namespace_lock); | |
698 if (spa_lookup(pool) != NULL) { | |
699 mutex_exit(&spa_namespace_lock); | |
700 return (EEXIST); | |
701 } | |
702 spa = spa_add(pool); | |
703 | |
704 /* | |
705 * Allocate a new spa_t structure. | |
706 */ | |
707 spa_activate(spa); | |
708 | |
709 spa->spa_uberblock.ub_txg = txg - 1; | |
710 spa->spa_ubsync = spa->spa_uberblock; | |
711 | |
712 error = spa_vdev_add(spa, nvroot); | |
713 | |
714 if (error) { | |
715 spa_unload(spa); | |
716 spa_deactivate(spa); | |
717 spa_remove(spa); | |
718 mutex_exit(&spa_namespace_lock); | |
719 return (error); | |
720 } | |
721 | |
722 if (altroot != NULL) { | |
723 spa->spa_root = spa_strdup(altroot); | |
724 atomic_add_32(&spa_active_count, 1); | |
725 } | |
726 | |
727 spa->spa_dsl_pool = dp = dsl_pool_create(spa, txg); | |
728 spa->spa_meta_objset = dp->dp_meta_objset; | |
729 | |
730 tx = dmu_tx_create_assigned(dp, txg); | |
731 | |
732 /* | |
733 * Create the pool config object. | |
734 */ | |
735 spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, | |
736 DMU_OT_PACKED_NVLIST, 1 << 14, | |
737 DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); | |
738 | |
1544 | 739 if (zap_add(spa->spa_meta_objset, |
789 | 740 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, |
1544 | 741 sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { |
742 cmn_err(CE_PANIC, "failed to add pool config"); | |
743 } | |
789 | 744 |
745 /* | |
746 * Create the deferred-free bplist object. Turn off compression | |
747 * because sync-to-convergence takes longer if the blocksize | |
748 * keeps changing. | |
749 */ | |
750 spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, | |
751 1 << 14, tx); | |
752 dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, | |
753 ZIO_COMPRESS_OFF, tx); | |
754 | |
1544 | 755 if (zap_add(spa->spa_meta_objset, |
789 | 756 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, |
1544 | 757 sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { |
758 cmn_err(CE_PANIC, "failed to add bplist"); | |
759 } | |
789 | 760 |
761 dmu_tx_commit(tx); | |
762 | |
763 spa->spa_sync_on = B_TRUE; | |
764 txg_sync_start(spa->spa_dsl_pool); | |
765 | |
766 /* | |
767 * We explicitly wait for the first transaction to complete so that our | |
768 * bean counters are appropriately updated. | |
769 */ | |
770 txg_wait_synced(spa->spa_dsl_pool, txg); | |
771 | |
772 spa_config_sync(); | |
773 | |
774 mutex_exit(&spa_namespace_lock); | |
775 | |
776 return (0); | |
777 } | |
778 | |
779 /* | |
780 * Import the given pool into the system. We set up the necessary spa_t and | |
781 * then call spa_load() to do the dirty work. | |
782 */ | |
783 int | |
784 spa_import(const char *pool, nvlist_t *config, char *altroot) | |
785 { | |
786 spa_t *spa; | |
787 int error; | |
788 | |
789 if (!(spa_mode & FWRITE)) | |
790 return (EROFS); | |
791 | |
792 /* | |
793 * If a pool with this name exists, return failure. | |
794 */ | |
795 mutex_enter(&spa_namespace_lock); | |
796 if (spa_lookup(pool) != NULL) { | |
797 mutex_exit(&spa_namespace_lock); | |
798 return (EEXIST); | |
799 } | |
800 | |
801 /* | |
802 * Create an initialize the spa structure | |
803 */ | |
804 spa = spa_add(pool); | |
805 spa_activate(spa); | |
806 | |
807 /* | |
808 * Pass off the heavy lifting to spa_load(). We pass TRUE for mosconfig | |
809 * so that we don't try to open the pool if the config is damaged. | |
810 */ | |
1544 | 811 error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); |
789 | 812 |
813 if (error) { | |
814 spa_unload(spa); | |
815 spa_deactivate(spa); | |
816 spa_remove(spa); | |
817 mutex_exit(&spa_namespace_lock); | |
818 return (error); | |
819 } | |
820 | |
821 /* | |
822 * Set the alternate root, if there is one. | |
823 */ | |
824 if (altroot != NULL) { | |
825 atomic_add_32(&spa_active_count, 1); | |
826 spa->spa_root = spa_strdup(altroot); | |
827 } | |
828 | |
829 /* | |
830 * Initialize the config based on the in-core state. | |
831 */ | |
832 config = spa_config_generate(spa, NULL, spa_last_synced_txg(spa), 0); | |
833 | |
834 spa_config_set(spa, config); | |
835 | |
836 /* | |
837 * Sync the configuration cache. | |
838 */ | |
839 spa_config_sync(); | |
840 | |
841 mutex_exit(&spa_namespace_lock); | |
842 | |
843 /* | |
844 * Resilver anything that's out of date. | |
845 */ | |
846 if (spa_mode & FWRITE) | |
847 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); | |
848 | |
849 return (0); | |
850 } | |
851 | |
852 /* | |
853 * This (illegal) pool name is used when temporarily importing a spa_t in order | |
854 * to get the vdev stats associated with the imported devices. | |
855 */ | |
856 #define TRYIMPORT_NAME "$import" | |
857 | |
858 nvlist_t * | |
859 spa_tryimport(nvlist_t *tryconfig) | |
860 { | |
861 nvlist_t *config = NULL; | |
862 char *poolname; | |
863 spa_t *spa; | |
864 uint64_t state; | |
865 | |
866 if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) | |
867 return (NULL); | |
868 | |
869 if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) | |
870 return (NULL); | |
871 | |
872 mutex_enter(&spa_namespace_lock); | |
873 spa = spa_add(TRYIMPORT_NAME); | |
874 | |
875 ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); | |
876 | |
877 /* | |
878 * Initialize the spa_t structure. | |
879 */ | |
880 spa_activate(spa); | |
881 | |
882 /* | |
883 * Pass off the heavy lifting to spa_load(). We pass TRUE for mosconfig | |
884 * so we don't try to open the pool if the config is damaged. | |
885 */ | |
1544 | 886 (void) spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); |
789 | 887 |
888 /* | |
889 * If 'tryconfig' was at least parsable, return the current config. | |
890 */ | |
891 if (spa->spa_root_vdev != NULL) { | |
892 config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); | |
893 VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, | |
894 poolname) == 0); | |
895 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, | |
896 state) == 0); | |
897 } | |
898 | |
899 spa_unload(spa); | |
900 spa_deactivate(spa); | |
901 spa_remove(spa); | |
902 mutex_exit(&spa_namespace_lock); | |
903 | |
904 return (config); | |
905 } | |
906 | |
907 /* | |
908 * Pool export/destroy | |
909 * | |
910 * The act of destroying or exporting a pool is very simple. We make sure there | |
911 * is no more pending I/O and any references to the pool are gone. Then, we | |
912 * update the pool state and sync all the labels to disk, removing the | |
913 * configuration from the cache afterwards. | |
914 */ | |
915 static int | |
916 spa_export_common(char *pool, int new_state) | |
917 { | |
918 spa_t *spa; | |
919 | |
920 if (!(spa_mode & FWRITE)) | |
921 return (EROFS); | |
922 | |
923 mutex_enter(&spa_namespace_lock); | |
924 if ((spa = spa_lookup(pool)) == NULL) { | |
925 mutex_exit(&spa_namespace_lock); | |
926 return (ENOENT); | |
927 } | |
928 | |
929 /* | |
1544 | 930 * Put a hold on the pool, drop the namespace lock, stop async tasks, |
931 * reacquire the namespace lock, and see if we can export. | |
932 */ | |
933 spa_open_ref(spa, FTAG); | |
934 mutex_exit(&spa_namespace_lock); | |
935 spa_async_suspend(spa); | |
936 mutex_enter(&spa_namespace_lock); | |
937 spa_close(spa, FTAG); | |
938 | |
939 /* | |
789 | 940 * The pool will be in core if it's openable, |
941 * in which case we can modify its state. | |
942 */ | |
943 if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { | |
944 /* | |
945 * Objsets may be open only because they're dirty, so we | |
946 * have to force it to sync before checking spa_refcnt. | |
947 */ | |
948 spa_scrub_suspend(spa); | |
949 txg_wait_synced(spa->spa_dsl_pool, 0); | |
950 | |
1544 | 951 /* |
952 * A pool cannot be exported or destroyed if there are active | |
953 * references. If we are resetting a pool, allow references by | |
954 * fault injection handlers. | |
955 */ | |
956 if (!spa_refcount_zero(spa) || | |
957 (spa->spa_inject_ref != 0 && | |
958 new_state != POOL_STATE_UNINITIALIZED)) { | |
789 | 959 spa_scrub_resume(spa); |
1544 | 960 spa_async_resume(spa); |
789 | 961 mutex_exit(&spa_namespace_lock); |
962 return (EBUSY); | |
963 } | |
964 | |
965 spa_scrub_resume(spa); | |
966 VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); | |
967 | |
968 if (spa->spa_root != NULL) | |
969 atomic_add_32(&spa_active_count, -1); | |
970 | |
971 /* | |
972 * We want this to be reflected on every label, | |
973 * so mark them all dirty. spa_unload() will do the | |
974 * final sync that pushes these changes out. | |
975 */ | |
1544 | 976 if (new_state != POOL_STATE_UNINITIALIZED) { |
977 spa->spa_state = new_state; | |
978 vdev_config_dirty(spa->spa_root_vdev); | |
979 } | |
789 | 980 } |
981 | |
982 if (spa->spa_state != POOL_STATE_UNINITIALIZED) { | |
983 spa_unload(spa); | |
984 spa_deactivate(spa); | |
985 } | |
986 | |
1544 | 987 if (new_state != POOL_STATE_UNINITIALIZED) { |
988 spa_remove(spa); | |
989 spa_config_sync(); | |
990 } | |
789 | 991 mutex_exit(&spa_namespace_lock); |
992 | |
993 return (0); | |
994 } | |
995 | |
996 /* | |
997 * Destroy a storage pool. | |
998 */ | |
999 int | |
1000 spa_destroy(char *pool) | |
1001 { | |
1002 return (spa_export_common(pool, POOL_STATE_DESTROYED)); | |
1003 } | |
1004 | |
1005 /* | |
1006 * Export a storage pool. | |
1007 */ | |
1008 int | |
1009 spa_export(char *pool) | |
1010 { | |
1011 return (spa_export_common(pool, POOL_STATE_EXPORTED)); | |
1012 } | |
1013 | |
1014 /* | |
1544 | 1015 * Similar to spa_export(), this unloads the spa_t without actually removing it |
1016 * from the namespace in any way. | |
1017 */ | |
1018 int | |
1019 spa_reset(char *pool) | |
1020 { | |
1021 return (spa_export_common(pool, POOL_STATE_UNINITIALIZED)); | |
1022 } | |
1023 | |
1024 | |
1025 /* | |
789 | 1026 * ========================================================================== |
1027 * Device manipulation | |
1028 * ========================================================================== | |
1029 */ | |
1030 | |
1031 /* | |
1032 * Add capacity to a storage pool. | |
1033 */ | |
1034 int | |
1035 spa_vdev_add(spa_t *spa, nvlist_t *nvroot) | |
1036 { | |
1037 uint64_t txg; | |
1038 int c, error; | |
1039 vdev_t *rvd = spa->spa_root_vdev; | |
1040 vdev_t *vd; | |
1041 | |
1042 txg = spa_vdev_enter(spa); | |
1043 | |
1044 vd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_ADD); | |
1045 | |
1046 if (vd == NULL) | |
1047 return (spa_vdev_exit(spa, vd, txg, EINVAL)); | |
1048 | |
1049 if (rvd == NULL) /* spa_create() */ | |
1050 spa->spa_root_vdev = rvd = vd; | |
1051 | |
1052 if ((error = vdev_create(vd, txg)) != 0) | |
1053 return (spa_vdev_exit(spa, vd, txg, error)); | |
1054 | |
1055 /* | |
1056 * Transfer each top-level vdev from the temporary root | |
1057 * to the spa's root and initialize its metaslabs. | |
1058 */ | |
1059 for (c = 0; c < vd->vdev_children; c++) { | |
1060 vdev_t *tvd = vd->vdev_child[c]; | |
1061 if (vd != rvd) { | |
1062 vdev_remove_child(vd, tvd); | |
1063 tvd->vdev_id = rvd->vdev_children; | |
1064 vdev_add_child(rvd, tvd); | |
1065 } | |
1544 | 1066 if ((error = vdev_init(tvd, txg)) != 0) |
1067 return (spa_vdev_exit(spa, vd, txg, error)); | |
789 | 1068 vdev_config_dirty(tvd); |
1069 } | |
1070 | |
1071 /* | |
1072 * Update the config based on the new in-core state. | |
1073 */ | |
1074 spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0)); | |
1075 | |
1076 return (spa_vdev_exit(spa, vd, txg, 0)); | |
1077 } | |
1078 | |
1079 /* | |
1080 * Attach a device to a mirror. The arguments are the path to any device | |
1081 * in the mirror, and the nvroot for the new device. If the path specifies | |
1082 * a device that is not mirrored, we automatically insert the mirror vdev. | |
1083 * | |
1084 * If 'replacing' is specified, the new device is intended to replace the | |
1085 * existing device; in this case the two devices are made into their own | |
1086 * mirror using the 'replacing' vdev, which is functionally idendical to | |
1087 * the mirror vdev (it actually reuses all the same ops) but has a few | |
1088 * extra rules: you can't attach to it after it's been created, and upon | |
1089 * completion of resilvering, the first disk (the one being replaced) | |
1090 * is automatically detached. | |
1091 */ | |
1092 int | |
1544 | 1093 spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) |
789 | 1094 { |
1095 uint64_t txg, open_txg; | |
1096 int error; | |
1097 vdev_t *rvd = spa->spa_root_vdev; | |
1098 vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; | |
1099 vdev_ops_t *pvops = replacing ? &vdev_replacing_ops : &vdev_mirror_ops; | |
1100 | |
1101 txg = spa_vdev_enter(spa); | |
1102 | |
1544 | 1103 oldvd = vdev_lookup_by_guid(rvd, guid); |
789 | 1104 |
1105 if (oldvd == NULL) | |
1106 return (spa_vdev_exit(spa, NULL, txg, ENODEV)); | |
1107 | |
1108 pvd = oldvd->vdev_parent; | |
1109 | |
1110 /* | |
1111 * The parent must be a mirror or the root, unless we're replacing; | |
1112 * in that case, the parent can be anything but another replacing vdev. | |
1113 */ | |
1114 if (pvd->vdev_ops != &vdev_mirror_ops && | |
1115 pvd->vdev_ops != &vdev_root_ops && | |
1116 (!replacing || pvd->vdev_ops == &vdev_replacing_ops)) | |
1117 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); | |
1118 | |
1119 newrootvd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_ADD); | |
1120 | |
1121 if (newrootvd == NULL || newrootvd->vdev_children != 1) | |
1122 return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); | |
1123 | |
1124 newvd = newrootvd->vdev_child[0]; | |
1125 | |
1126 if (!newvd->vdev_ops->vdev_op_leaf) | |
1127 return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); | |
1128 | |
1129 if ((error = vdev_create(newrootvd, txg)) != 0) | |
1130 return (spa_vdev_exit(spa, newrootvd, txg, error)); | |
1131 | |
1175
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
1132 /* |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
1133 * Compare the new device size with the replaceable/attachable |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
1134 * device size. |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
1135 */ |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
1136 if (newvd->vdev_psize < vdev_get_rsize(oldvd)) |
789 | 1137 return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); |
1138 | |
1139 if (newvd->vdev_ashift != oldvd->vdev_ashift && oldvd->vdev_ashift != 0) | |
1140 return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); | |
1141 | |
1142 /* | |
1143 * If this is an in-place replacement, update oldvd's path and devid | |
1144 * to make it distinguishable from newvd, and unopenable from now on. | |
1145 */ | |
1146 if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { | |
1147 spa_strfree(oldvd->vdev_path); | |
1148 oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, | |
1149 KM_SLEEP); | |
1150 (void) sprintf(oldvd->vdev_path, "%s/%s", | |
1151 newvd->vdev_path, "old"); | |
1152 if (oldvd->vdev_devid != NULL) { | |
1153 spa_strfree(oldvd->vdev_devid); | |
1154 oldvd->vdev_devid = NULL; | |
1155 } | |
1156 } | |
1157 | |
1158 /* | |
1159 * If the parent is not a mirror, or if we're replacing, | |
1160 * insert the new mirror/replacing vdev above oldvd. | |
1161 */ | |
1162 if (pvd->vdev_ops != pvops) | |
1163 pvd = vdev_add_parent(oldvd, pvops); | |
1164 | |
1165 ASSERT(pvd->vdev_top->vdev_parent == rvd); | |
1166 ASSERT(pvd->vdev_ops == pvops); | |
1167 ASSERT(oldvd->vdev_parent == pvd); | |
1168 | |
1169 /* | |
1170 * Extract the new device from its root and add it to pvd. | |
1171 */ | |
1172 vdev_remove_child(newrootvd, newvd); | |
1173 newvd->vdev_id = pvd->vdev_children; | |
1174 vdev_add_child(pvd, newvd); | |
1175 | |
1544 | 1176 /* |
1177 * If newvd is smaller than oldvd, but larger than its rsize, | |
1178 * the addition of newvd may have decreased our parent's asize. | |
1179 */ | |
1180 pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize); | |
1181 | |
789 | 1182 tvd = newvd->vdev_top; |
1183 ASSERT(pvd->vdev_top == tvd); | |
1184 ASSERT(tvd->vdev_parent == rvd); | |
1185 | |
1186 /* | |
1187 * Update the config based on the new in-core state. | |
1188 */ | |
1189 spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0)); | |
1190 vdev_config_dirty(tvd); | |
1191 | |
1192 /* | |
1193 * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate | |
1194 * upward when spa_vdev_exit() calls vdev_dtl_reassess(). | |
1195 */ | |
1196 open_txg = txg + TXG_CONCURRENT_STATES - 1; | |
1197 | |
1198 mutex_enter(&newvd->vdev_dtl_lock); | |
1199 space_map_add(&newvd->vdev_dtl_map, TXG_INITIAL, | |
1200 open_txg - TXG_INITIAL + 1); | |
1201 mutex_exit(&newvd->vdev_dtl_lock); | |
1202 | |
1544 | 1203 dprintf("attached %s in txg %llu\n", newvd->vdev_path, txg); |
1204 | |
789 | 1205 /* |
1206 * Mark newvd's DTL dirty in this txg. | |
1207 */ | |
1208 vdev_dirty(tvd, VDD_DTL, txg); | |
1209 (void) txg_list_add(&tvd->vdev_dtl_list, newvd, txg); | |
1210 | |
1211 (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); | |
1212 | |
1213 /* | |
1214 * Kick off a resilver to update newvd. | |
1215 */ | |
1216 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); | |
1217 | |
1218 return (0); | |
1219 } | |
1220 | |
1221 /* | |
1222 * Detach a device from a mirror or replacing vdev. | |
1223 * If 'replace_done' is specified, only detach if the parent | |
1224 * is a replacing vdev. | |
1225 */ | |
1226 int | |
1544 | 1227 spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) |
789 | 1228 { |
1229 uint64_t txg; | |
1230 int c, t, error; | |
1231 vdev_t *rvd = spa->spa_root_vdev; | |
1232 vdev_t *vd, *pvd, *cvd, *tvd; | |
1233 | |
1234 txg = spa_vdev_enter(spa); | |
1235 | |
1544 | 1236 vd = vdev_lookup_by_guid(rvd, guid); |
789 | 1237 |
1238 if (vd == NULL) | |
1239 return (spa_vdev_exit(spa, NULL, txg, ENODEV)); | |
1240 | |
1241 pvd = vd->vdev_parent; | |
1242 | |
1243 /* | |
1244 * If replace_done is specified, only remove this device if it's | |
1245 * the first child of a replacing vdev. | |
1246 */ | |
1247 if (replace_done && | |
1248 (vd->vdev_id != 0 || pvd->vdev_ops != &vdev_replacing_ops)) | |
1249 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); | |
1250 | |
1251 /* | |
1252 * Only mirror and replacing vdevs support detach. | |
1253 */ | |
1254 if (pvd->vdev_ops != &vdev_replacing_ops && | |
1255 pvd->vdev_ops != &vdev_mirror_ops) | |
1256 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); | |
1257 | |
1258 /* | |
1259 * If there's only one replica, you can't detach it. | |
1260 */ | |
1261 if (pvd->vdev_children <= 1) | |
1262 return (spa_vdev_exit(spa, NULL, txg, EBUSY)); | |
1263 | |
1264 /* | |
1265 * If all siblings have non-empty DTLs, this device may have the only | |
1266 * valid copy of the data, which means we cannot safely detach it. | |
1267 * | |
1268 * XXX -- as in the vdev_offline() case, we really want a more | |
1269 * precise DTL check. | |
1270 */ | |
1271 for (c = 0; c < pvd->vdev_children; c++) { | |
1272 uint64_t dirty; | |
1273 | |
1274 cvd = pvd->vdev_child[c]; | |
1275 if (cvd == vd) | |
1276 continue; | |
1277 if (vdev_is_dead(cvd)) | |
1278 continue; | |
1279 mutex_enter(&cvd->vdev_dtl_lock); | |
1280 dirty = cvd->vdev_dtl_map.sm_space | | |
1281 cvd->vdev_dtl_scrub.sm_space; | |
1282 mutex_exit(&cvd->vdev_dtl_lock); | |
1283 if (!dirty) | |
1284 break; | |
1285 } | |
1286 if (c == pvd->vdev_children) | |
1287 return (spa_vdev_exit(spa, NULL, txg, EBUSY)); | |
1288 | |
1289 /* | |
1290 * Erase the disk labels so the disk can be used for other things. | |
1291 * This must be done after all other error cases are handled, | |
1292 * but before we disembowel vd (so we can still do I/O to it). | |
1293 * But if we can't do it, don't treat the error as fatal -- | |
1294 * it may be that the unwritability of the disk is the reason | |
1295 * it's being detached! | |
1296 */ | |
1297 error = vdev_label_init(vd, 0); | |
1298 if (error) | |
1299 dprintf("unable to erase labels on %s\n", vdev_description(vd)); | |
1300 | |
1301 /* | |
1302 * Remove vd from its parent and compact the parent's children. | |
1303 */ | |
1304 vdev_remove_child(pvd, vd); | |
1305 vdev_compact_children(pvd); | |
1306 | |
1307 /* | |
1308 * Remember one of the remaining children so we can get tvd below. | |
1309 */ | |
1310 cvd = pvd->vdev_child[0]; | |
1311 | |
1312 /* | |
1313 * If the parent mirror/replacing vdev only has one child, | |
1314 * the parent is no longer needed. Remove it from the tree. | |
1315 */ | |
1316 if (pvd->vdev_children == 1) | |
1317 vdev_remove_parent(cvd); | |
1318 | |
1319 /* | |
1320 * We don't set tvd until now because the parent we just removed | |
1321 * may have been the previous top-level vdev. | |
1322 */ | |
1323 tvd = cvd->vdev_top; | |
1324 ASSERT(tvd->vdev_parent == rvd); | |
1325 | |
1326 /* | |
1327 * Reopen this top-level vdev to reassess health after detach. | |
1328 */ | |
1544 | 1329 vdev_reopen(tvd); |
789 | 1330 |
1331 /* | |
1332 * If the device we just detached was smaller than the others, | |
1544 | 1333 * it may be possible to add metaslabs (i.e. grow the pool). We ignore |
1334 * the error here because the detach still succeeded - we just weren't | |
1335 * able to reinitialize the metaslabs. This pool is in for a world of | |
1336 * hurt, in any case. | |
789 | 1337 */ |
1544 | 1338 (void) vdev_metaslab_init(tvd, txg); |
789 | 1339 |
1340 /* | |
1341 * Update the config based on the new in-core state. | |
1342 */ | |
1343 spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0)); | |
1344 | |
1345 vdev_config_dirty(tvd); | |
1346 | |
1347 /* | |
1348 * Mark vd's DTL as dirty in this txg. | |
1349 * vdev_dtl_sync() will see that vd->vdev_detached is set | |
1350 * and free vd's DTL object in syncing context. | |
1351 * But first make sure we're not on any *other* txg's DTL list, | |
1352 * to prevent vd from being accessed after it's freed. | |
1353 */ | |
1354 vdev_dirty(tvd, VDD_DTL, txg); | |
1355 vd->vdev_detached = B_TRUE; | |
1356 for (t = 0; t < TXG_SIZE; t++) | |
1357 (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); | |
1358 (void) txg_list_add(&tvd->vdev_dtl_list, vd, txg); | |
1359 | |
1544 | 1360 dprintf("detached %s in txg %llu\n", vd->vdev_path, txg); |
789 | 1361 |
1362 return (spa_vdev_exit(spa, vd, txg, 0)); | |
1363 } | |
1364 | |
1365 /* | |
1544 | 1366 * Find any device that's done replacing, so we can detach it. |
789 | 1367 */ |
1544 | 1368 static vdev_t * |
1369 spa_vdev_replace_done_hunt(vdev_t *vd) | |
789 | 1370 { |
1544 | 1371 vdev_t *newvd, *oldvd; |
789 | 1372 int c; |
1373 | |
1544 | 1374 for (c = 0; c < vd->vdev_children; c++) { |
1375 oldvd = spa_vdev_replace_done_hunt(vd->vdev_child[c]); | |
1376 if (oldvd != NULL) | |
1377 return (oldvd); | |
1378 } | |
789 | 1379 |
1380 if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { | |
1544 | 1381 oldvd = vd->vdev_child[0]; |
1382 newvd = vd->vdev_child[1]; | |
789 | 1383 |
1544 | 1384 mutex_enter(&newvd->vdev_dtl_lock); |
1385 if (newvd->vdev_dtl_map.sm_space == 0 && | |
1386 newvd->vdev_dtl_scrub.sm_space == 0) { | |
1387 mutex_exit(&newvd->vdev_dtl_lock); | |
1388 return (oldvd); | |
1389 } | |
1390 mutex_exit(&newvd->vdev_dtl_lock); | |
1391 } | |
789 | 1392 |
1544 | 1393 return (NULL); |
789 | 1394 } |
1395 | |
1544 | 1396 static void |
789 | 1397 spa_vdev_replace_done(spa_t *spa) |
1398 { | |
1544 | 1399 vdev_t *vd; |
1400 uint64_t guid; | |
789 | 1401 |
1544 | 1402 spa_config_enter(spa, RW_READER, FTAG); |
789 | 1403 |
1544 | 1404 while ((vd = spa_vdev_replace_done_hunt(spa->spa_root_vdev)) != NULL) { |
1405 guid = vd->vdev_guid; | |
1406 spa_config_exit(spa, FTAG); | |
1407 if (spa_vdev_detach(spa, guid, B_TRUE) != 0) | |
1408 return; | |
1409 spa_config_enter(spa, RW_READER, FTAG); | |
789 | 1410 } |
1411 | |
1544 | 1412 spa_config_exit(spa, FTAG); |
789 | 1413 } |
1414 | |
1415 /* | |
1354
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1416 * Update the stored path for this vdev. Dirty the vdev configuration, relying |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1417 * on spa_vdev_enter/exit() to synchronize the labels and cache. |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1418 */ |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1419 int |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1420 spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1421 { |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1422 vdev_t *rvd, *vd; |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1423 uint64_t txg; |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1424 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1425 rvd = spa->spa_root_vdev; |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1426 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1427 txg = spa_vdev_enter(spa); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1428 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1429 if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1430 return (spa_vdev_exit(spa, NULL, txg, ENOENT)); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1431 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1432 spa_strfree(vd->vdev_path); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1433 vd->vdev_path = spa_strdup(newpath); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1434 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1435 spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0)); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1436 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1437 vdev_config_dirty(vd->vdev_top); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1438 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1439 return (spa_vdev_exit(spa, NULL, txg, 0)); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1440 } |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1441 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
1442 /* |
789 | 1443 * ========================================================================== |
1444 * SPA Scrubbing | |
1445 * ========================================================================== | |
1446 */ | |
1447 | |
1544 | 1448 void |
1449 spa_scrub_throttle(spa_t *spa, int direction) | |
1450 { | |
1451 mutex_enter(&spa->spa_scrub_lock); | |
1452 spa->spa_scrub_throttled += direction; | |
1453 ASSERT(spa->spa_scrub_throttled >= 0); | |
1454 if (spa->spa_scrub_throttled == 0) | |
1455 cv_broadcast(&spa->spa_scrub_io_cv); | |
1456 mutex_exit(&spa->spa_scrub_lock); | |
1457 } | |
789 | 1458 |
1459 static void | |
1460 spa_scrub_io_done(zio_t *zio) | |
1461 { | |
1462 spa_t *spa = zio->io_spa; | |
1463 | |
1464 zio_buf_free(zio->io_data, zio->io_size); | |
1465 | |
1466 mutex_enter(&spa->spa_scrub_lock); | |
1544 | 1467 if (zio->io_error && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { |
1468 vdev_t *vd = zio->io_vd; | |
789 | 1469 spa->spa_scrub_errors++; |
1470 mutex_enter(&vd->vdev_stat_lock); | |
1471 vd->vdev_stat.vs_scrub_errors++; | |
1472 mutex_exit(&vd->vdev_stat_lock); | |
1473 } | |
1544 | 1474 if (--spa->spa_scrub_inflight == 0) { |
1475 cv_broadcast(&spa->spa_scrub_io_cv); | |
1476 ASSERT(spa->spa_scrub_throttled == 0); | |
1477 } | |
1478 mutex_exit(&spa->spa_scrub_lock); | |
789 | 1479 } |
1480 | |
1481 static void | |
1544 | 1482 spa_scrub_io_start(spa_t *spa, blkptr_t *bp, int priority, int flags, |
1483 zbookmark_t *zb) | |
789 | 1484 { |
1485 size_t size = BP_GET_LSIZE(bp); | |
1486 void *data = zio_buf_alloc(size); | |
1487 | |
1488 mutex_enter(&spa->spa_scrub_lock); | |
1489 spa->spa_scrub_inflight++; | |
1490 mutex_exit(&spa->spa_scrub_lock); | |
1491 | |
1544 | 1492 if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET) |
1493 flags |= ZIO_FLAG_SPECULATIVE; /* intent log block */ | |
1494 | |
1495 flags |= ZIO_FLAG_CANFAIL; | |
1496 | |
789 | 1497 zio_nowait(zio_read(NULL, spa, bp, data, size, |
1544 | 1498 spa_scrub_io_done, NULL, priority, flags, zb)); |
789 | 1499 } |
1500 | |
1501 /* ARGSUSED */ | |
1502 static int | |
1503 spa_scrub_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) | |
1504 { | |
1505 blkptr_t *bp = &bc->bc_blkptr; | |
1506 vdev_t *vd = vdev_lookup_top(spa, DVA_GET_VDEV(&bp->blk_dva[0])); | |
1507 | |
1508 if (bc->bc_errno || vd == NULL) { | |
1509 /* | |
1510 * We can't scrub this block, but we can continue to scrub | |
1511 * the rest of the pool. Note the error and move along. | |
1512 */ | |
1513 mutex_enter(&spa->spa_scrub_lock); | |
1514 spa->spa_scrub_errors++; | |
1515 mutex_exit(&spa->spa_scrub_lock); | |
1516 | |
1517 if (vd != NULL) { | |
1518 mutex_enter(&vd->vdev_stat_lock); | |
1519 vd->vdev_stat.vs_scrub_errors++; | |
1520 mutex_exit(&vd->vdev_stat_lock); | |
1521 } | |
1522 | |
1523 return (ERESTART); | |
1524 } | |
1525 | |
1526 ASSERT(bp->blk_birth < spa->spa_scrub_maxtxg); | |
1527 | |
1528 /* | |
1529 * Keep track of how much data we've examined so that | |
1530 * zpool(1M) status can make useful progress reports. | |
1531 */ | |
1532 mutex_enter(&vd->vdev_stat_lock); | |
1533 vd->vdev_stat.vs_scrub_examined += BP_GET_ASIZE(bp); | |
1534 mutex_exit(&vd->vdev_stat_lock); | |
1535 | |
1536 if (spa->spa_scrub_type == POOL_SCRUB_RESILVER) { | |
1537 if (DVA_GET_GANG(&bp->blk_dva[0])) { | |
1538 /* | |
1539 * Gang members may be spread across multiple vdevs, | |
1540 * so the best we can do is look at the pool-wide DTL. | |
1541 * XXX -- it would be better to change our allocation | |
1542 * policy to ensure that this can't happen. | |
1543 */ | |
1544 vd = spa->spa_root_vdev; | |
1545 } | |
1546 if (vdev_dtl_contains(&vd->vdev_dtl_map, bp->blk_birth, 1)) { | |
1547 spa_scrub_io_start(spa, bp, ZIO_PRIORITY_RESILVER, | |
1544 | 1548 ZIO_FLAG_RESILVER, &bc->bc_bookmark); |
789 | 1549 } |
1550 } else { | |
1551 spa_scrub_io_start(spa, bp, ZIO_PRIORITY_SCRUB, | |
1544 | 1552 ZIO_FLAG_SCRUB, &bc->bc_bookmark); |
789 | 1553 } |
1554 | |
1555 return (0); | |
1556 } | |
1557 | |
1558 static void | |
1559 spa_scrub_thread(spa_t *spa) | |
1560 { | |
1561 callb_cpr_t cprinfo; | |
1562 traverse_handle_t *th = spa->spa_scrub_th; | |
1563 vdev_t *rvd = spa->spa_root_vdev; | |
1564 pool_scrub_type_t scrub_type = spa->spa_scrub_type; | |
1565 int error = 0; | |
1566 boolean_t complete; | |
1567 | |
1568 CALLB_CPR_INIT(&cprinfo, &spa->spa_scrub_lock, callb_generic_cpr, FTAG); | |
1569 | |
797
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
1570 /* |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
1571 * If we're restarting due to a snapshot create/delete, |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
1572 * wait for that to complete. |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
1573 */ |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
1574 txg_wait_synced(spa_get_dsl(spa), 0); |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
1575 |
1544 | 1576 dprintf("start %s mintxg=%llu maxtxg=%llu\n", |
1577 scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", | |
1578 spa->spa_scrub_mintxg, spa->spa_scrub_maxtxg); | |
1579 | |
1580 spa_config_enter(spa, RW_WRITER, FTAG); | |
1581 vdev_reopen(rvd); /* purge all vdev caches */ | |
789 | 1582 vdev_config_dirty(rvd); /* rewrite all disk labels */ |
1583 vdev_scrub_stat_update(rvd, scrub_type, B_FALSE); | |
1544 | 1584 spa_config_exit(spa, FTAG); |
789 | 1585 |
1586 mutex_enter(&spa->spa_scrub_lock); | |
1587 spa->spa_scrub_errors = 0; | |
1588 spa->spa_scrub_active = 1; | |
1544 | 1589 ASSERT(spa->spa_scrub_inflight == 0); |
1590 ASSERT(spa->spa_scrub_throttled == 0); | |
789 | 1591 |
1592 while (!spa->spa_scrub_stop) { | |
1593 CALLB_CPR_SAFE_BEGIN(&cprinfo); | |
1544 | 1594 while (spa->spa_scrub_suspended) { |
789 | 1595 spa->spa_scrub_active = 0; |
1596 cv_broadcast(&spa->spa_scrub_cv); | |
1597 cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); | |
1598 spa->spa_scrub_active = 1; | |
1599 } | |
1600 CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_scrub_lock); | |
1601 | |
1602 if (spa->spa_scrub_restart_txg != 0) | |
1603 break; | |
1604 | |
1605 mutex_exit(&spa->spa_scrub_lock); | |
1606 error = traverse_more(th); | |
1607 mutex_enter(&spa->spa_scrub_lock); | |
1608 if (error != EAGAIN) | |
1609 break; | |
1544 | 1610 |
1611 while (spa->spa_scrub_throttled > 0) | |
1612 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); | |
789 | 1613 } |
1614 | |
1615 while (spa->spa_scrub_inflight) | |
1616 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); | |
1617 | |
1618 if (spa->spa_scrub_restart_txg != 0) | |
1619 error = ERESTART; | |
1620 | |
1544 | 1621 if (spa->spa_scrub_stop) |
1622 error = EINTR; | |
1623 | |
789 | 1624 spa->spa_scrub_active = 0; |
1625 cv_broadcast(&spa->spa_scrub_cv); | |
1626 | |
1627 /* | |
1544 | 1628 * Even if there were uncorrectable errors, we consider the scrub |
1629 * completed. The downside is that if there is a transient error during | |
1630 * a resilver, we won't resilver the data properly to the target. But | |
1631 * if the damage is permanent (more likely) we will resilver forever, | |
1632 * which isn't really acceptable. Since there is enough information for | |
1633 * the user to know what has failed and why, this seems like a more | |
1634 * tractable approach. | |
789 | 1635 */ |
1544 | 1636 complete = (error == 0); |
789 | 1637 |
1544 | 1638 dprintf("end %s to maxtxg=%llu %s, traverse=%d, %llu errors, stop=%u\n", |
1639 scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", | |
789 | 1640 spa->spa_scrub_maxtxg, complete ? "done" : "FAILED", |
1641 error, spa->spa_scrub_errors, spa->spa_scrub_stop); | |
1642 | |
1643 mutex_exit(&spa->spa_scrub_lock); | |
1644 | |
1645 /* | |
1646 * If the scrub/resilver completed, update all DTLs to reflect this. | |
1647 * Whether it succeeded or not, vacate all temporary scrub DTLs. | |
1648 */ | |
1544 | 1649 spa_config_enter(spa, RW_WRITER, FTAG); |
789 | 1650 vdev_dtl_reassess(rvd, spa_last_synced_txg(spa) + 1, |
1651 complete ? spa->spa_scrub_maxtxg : 0, B_TRUE); | |
1652 vdev_scrub_stat_update(rvd, POOL_SCRUB_NONE, complete); | |
1544 | 1653 spa_errlog_rotate(spa); |
1654 spa_config_exit(spa, FTAG); | |
789 | 1655 |
1656 mutex_enter(&spa->spa_scrub_lock); | |
1657 | |
1544 | 1658 /* |
1659 * We may have finished replacing a device. | |
1660 * Let the async thread assess this and handle the detach. | |
1661 */ | |
1662 spa_async_request(spa, SPA_ASYNC_REPLACE_DONE); | |
789 | 1663 |
1664 /* | |
1665 * If we were told to restart, our final act is to start a new scrub. | |
1666 */ | |
1667 if (error == ERESTART) | |
1544 | 1668 spa_async_request(spa, scrub_type == POOL_SCRUB_RESILVER ? |
1669 SPA_ASYNC_RESILVER : SPA_ASYNC_SCRUB); | |
789 | 1670 |
1544 | 1671 spa->spa_scrub_type = POOL_SCRUB_NONE; |
1672 spa->spa_scrub_active = 0; | |
1673 spa->spa_scrub_thread = NULL; | |
1674 cv_broadcast(&spa->spa_scrub_cv); | |
789 | 1675 CALLB_CPR_EXIT(&cprinfo); /* drops &spa->spa_scrub_lock */ |
1676 thread_exit(); | |
1677 } | |
1678 | |
1679 void | |
1680 spa_scrub_suspend(spa_t *spa) | |
1681 { | |
1682 mutex_enter(&spa->spa_scrub_lock); | |
1544 | 1683 spa->spa_scrub_suspended++; |
789 | 1684 while (spa->spa_scrub_active) { |
1685 cv_broadcast(&spa->spa_scrub_cv); | |
1686 cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); | |
1687 } | |
1688 while (spa->spa_scrub_inflight) | |
1689 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); | |
1690 mutex_exit(&spa->spa_scrub_lock); | |
1691 } | |
1692 | |
1693 void | |
1694 spa_scrub_resume(spa_t *spa) | |
1695 { | |
1696 mutex_enter(&spa->spa_scrub_lock); | |
1544 | 1697 ASSERT(spa->spa_scrub_suspended != 0); |
1698 if (--spa->spa_scrub_suspended == 0) | |
789 | 1699 cv_broadcast(&spa->spa_scrub_cv); |
1700 mutex_exit(&spa->spa_scrub_lock); | |
1701 } | |
1702 | |
1703 void | |
1704 spa_scrub_restart(spa_t *spa, uint64_t txg) | |
1705 { | |
1706 /* | |
1707 * Something happened (e.g. snapshot create/delete) that means | |
1708 * we must restart any in-progress scrubs. The itinerary will | |
1709 * fix this properly. | |
1710 */ | |
1711 mutex_enter(&spa->spa_scrub_lock); | |
1712 spa->spa_scrub_restart_txg = txg; | |
1713 mutex_exit(&spa->spa_scrub_lock); | |
1714 } | |
1715 | |
1544 | 1716 int |
1717 spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force) | |
789 | 1718 { |
1719 space_seg_t *ss; | |
1720 uint64_t mintxg, maxtxg; | |
1721 vdev_t *rvd = spa->spa_root_vdev; | |
1544 | 1722 int advance = ADVANCE_PRE | ADVANCE_ZIL; |
789 | 1723 |
1724 if ((uint_t)type >= POOL_SCRUB_TYPES) | |
1725 return (ENOTSUP); | |
1726 | |
1544 | 1727 mutex_enter(&spa->spa_scrub_lock); |
1728 | |
789 | 1729 /* |
1730 * If there's a scrub or resilver already in progress, stop it. | |
1731 */ | |
1732 while (spa->spa_scrub_thread != NULL) { | |
1733 /* | |
1734 * Don't stop a resilver unless forced. | |
1735 */ | |
1544 | 1736 if (spa->spa_scrub_type == POOL_SCRUB_RESILVER && !force) { |
1737 mutex_exit(&spa->spa_scrub_lock); | |
789 | 1738 return (EBUSY); |
1544 | 1739 } |
789 | 1740 spa->spa_scrub_stop = 1; |
1741 cv_broadcast(&spa->spa_scrub_cv); | |
1742 cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); | |
1743 } | |
1744 | |
1745 /* | |
1746 * Terminate the previous traverse. | |
1747 */ | |
1748 if (spa->spa_scrub_th != NULL) { | |
1749 traverse_fini(spa->spa_scrub_th); | |
1750 spa->spa_scrub_th = NULL; | |
1751 } | |
1752 | |
1544 | 1753 if (rvd == NULL) { |
1754 ASSERT(spa->spa_scrub_stop == 0); | |
1755 ASSERT(spa->spa_scrub_type == type); | |
1756 ASSERT(spa->spa_scrub_restart_txg == 0); | |
1757 mutex_exit(&spa->spa_scrub_lock); | |
1758 return (0); | |
1759 } | |
789 | 1760 |
1761 mintxg = TXG_INITIAL - 1; | |
1762 maxtxg = spa_last_synced_txg(spa) + 1; | |
1763 | |
1544 | 1764 mutex_enter(&rvd->vdev_dtl_lock); |
789 | 1765 |
1544 | 1766 if (rvd->vdev_dtl_map.sm_space == 0) { |
1767 /* | |
1768 * The pool-wide DTL is empty. | |
1769 * If this is a resilver, there's nothing to do. | |
1770 */ | |
1771 if (type == POOL_SCRUB_RESILVER) | |
1772 type = POOL_SCRUB_NONE; | |
1773 } else { | |
1774 /* | |
1775 * The pool-wide DTL is non-empty. | |
1776 * If this is a normal scrub, upgrade to a resilver instead. | |
1777 */ | |
1778 if (type == POOL_SCRUB_EVERYTHING) | |
1779 type = POOL_SCRUB_RESILVER; | |
1780 } | |
789 | 1781 |
1544 | 1782 if (type == POOL_SCRUB_RESILVER) { |
789 | 1783 /* |
1784 * Determine the resilvering boundaries. | |
1785 * | |
1786 * Note: (mintxg, maxtxg) is an open interval, | |
1787 * i.e. mintxg and maxtxg themselves are not included. | |
1788 * | |
1789 * Note: for maxtxg, we MIN with spa_last_synced_txg(spa) + 1 | |
1790 * so we don't claim to resilver a txg that's still changing. | |
1791 */ | |
1792 ss = avl_first(&rvd->vdev_dtl_map.sm_root); | |
1544 | 1793 mintxg = ss->ss_start - 1; |
789 | 1794 ss = avl_last(&rvd->vdev_dtl_map.sm_root); |
1544 | 1795 maxtxg = MIN(ss->ss_end, maxtxg); |
789 | 1796 |
1544 | 1797 advance |= ADVANCE_PRUNE; |
789 | 1798 } |
1799 | |
1544 | 1800 mutex_exit(&rvd->vdev_dtl_lock); |
1801 | |
1802 spa->spa_scrub_stop = 0; | |
1803 spa->spa_scrub_type = type; | |
1804 spa->spa_scrub_restart_txg = 0; | |
1805 | |
1806 if (type != POOL_SCRUB_NONE) { | |
1807 spa->spa_scrub_mintxg = mintxg; | |
789 | 1808 spa->spa_scrub_maxtxg = maxtxg; |
1809 spa->spa_scrub_th = traverse_init(spa, spa_scrub_cb, NULL, | |
1810 advance, ZIO_FLAG_CANFAIL); | |
1811 traverse_add_pool(spa->spa_scrub_th, mintxg, maxtxg); | |
1812 spa->spa_scrub_thread = thread_create(NULL, 0, | |
1813 spa_scrub_thread, spa, 0, &p0, TS_RUN, minclsyspri); | |
1814 } | |
1815 | |
1544 | 1816 mutex_exit(&spa->spa_scrub_lock); |
1817 | |
789 | 1818 return (0); |
1819 } | |
1820 | |
1544 | 1821 /* |
1822 * ========================================================================== | |
1823 * SPA async task processing | |
1824 * ========================================================================== | |
1825 */ | |
1826 | |
1827 static void | |
1828 spa_async_reopen(spa_t *spa) | |
789 | 1829 { |
1544 | 1830 vdev_t *rvd = spa->spa_root_vdev; |
1831 vdev_t *tvd; | |
1832 int c; | |
1833 | |
1834 spa_config_enter(spa, RW_WRITER, FTAG); | |
1835 | |
1836 for (c = 0; c < rvd->vdev_children; c++) { | |
1837 tvd = rvd->vdev_child[c]; | |
1838 if (tvd->vdev_reopen_wanted) { | |
1839 tvd->vdev_reopen_wanted = 0; | |
1840 vdev_reopen(tvd); | |
1841 } | |
1842 } | |
789 | 1843 |
1544 | 1844 spa_config_exit(spa, FTAG); |
1845 } | |
1846 | |
1847 static void | |
1848 spa_async_thread(spa_t *spa) | |
1849 { | |
1850 int tasks; | |
1851 | |
1852 ASSERT(spa->spa_sync_on); | |
789 | 1853 |
1544 | 1854 mutex_enter(&spa->spa_async_lock); |
1855 tasks = spa->spa_async_tasks; | |
1856 spa->spa_async_tasks = 0; | |
1857 mutex_exit(&spa->spa_async_lock); | |
1858 | |
1859 /* | |
1860 * See if any devices need to be reopened. | |
1861 */ | |
1862 if (tasks & SPA_ASYNC_REOPEN) | |
1863 spa_async_reopen(spa); | |
1864 | |
1865 /* | |
1866 * If any devices are done replacing, detach them. | |
1867 */ | |
1868 if (tasks & SPA_ASYNC_REPLACE_DONE) | |
789 | 1869 spa_vdev_replace_done(spa); |
1870 | |
1544 | 1871 /* |
1872 * Kick off a scrub. | |
1873 */ | |
1874 if (tasks & SPA_ASYNC_SCRUB) | |
1875 VERIFY(spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_TRUE) == 0); | |
1876 | |
1877 /* | |
1878 * Kick off a resilver. | |
1879 */ | |
1880 if (tasks & SPA_ASYNC_RESILVER) | |
1881 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); | |
1882 | |
1883 /* | |
1884 * Let the world know that we're done. | |
1885 */ | |
1886 mutex_enter(&spa->spa_async_lock); | |
1887 spa->spa_async_thread = NULL; | |
1888 cv_broadcast(&spa->spa_async_cv); | |
1889 mutex_exit(&spa->spa_async_lock); | |
1890 thread_exit(); | |
1891 } | |
1892 | |
1893 void | |
1894 spa_async_suspend(spa_t *spa) | |
1895 { | |
1896 mutex_enter(&spa->spa_async_lock); | |
1897 spa->spa_async_suspended++; | |
1898 while (spa->spa_async_thread != NULL) | |
1899 cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); | |
1900 mutex_exit(&spa->spa_async_lock); | |
1901 } | |
1902 | |
1903 void | |
1904 spa_async_resume(spa_t *spa) | |
1905 { | |
1906 mutex_enter(&spa->spa_async_lock); | |
1907 ASSERT(spa->spa_async_suspended != 0); | |
1908 spa->spa_async_suspended--; | |
1909 mutex_exit(&spa->spa_async_lock); | |
1910 } | |
1911 | |
1912 static void | |
1913 spa_async_dispatch(spa_t *spa) | |
1914 { | |
1915 mutex_enter(&spa->spa_async_lock); | |
1916 if (spa->spa_async_tasks && !spa->spa_async_suspended && | |
1917 spa->spa_async_thread == NULL) | |
1918 spa->spa_async_thread = thread_create(NULL, 0, | |
1919 spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); | |
1920 mutex_exit(&spa->spa_async_lock); | |
1921 } | |
1922 | |
1923 void | |
1924 spa_async_request(spa_t *spa, int task) | |
1925 { | |
1926 mutex_enter(&spa->spa_async_lock); | |
1927 spa->spa_async_tasks |= task; | |
1928 mutex_exit(&spa->spa_async_lock); | |
789 | 1929 } |
1930 | |
1931 /* | |
1932 * ========================================================================== | |
1933 * SPA syncing routines | |
1934 * ========================================================================== | |
1935 */ | |
1936 | |
1937 static void | |
1938 spa_sync_deferred_frees(spa_t *spa, uint64_t txg) | |
1939 { | |
1940 bplist_t *bpl = &spa->spa_sync_bplist; | |
1941 dmu_tx_t *tx; | |
1942 blkptr_t blk; | |
1943 uint64_t itor = 0; | |
1944 zio_t *zio; | |
1945 int error; | |
1946 uint8_t c = 1; | |
1947 | |
1948 zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CONFIG_HELD); | |
1949 | |
1950 while (bplist_iterate(bpl, &itor, &blk) == 0) | |
1951 zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL)); | |
1952 | |
1953 error = zio_wait(zio); | |
1954 ASSERT3U(error, ==, 0); | |
1955 | |
1956 tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); | |
1957 bplist_vacate(bpl, tx); | |
1958 | |
1959 /* | |
1960 * Pre-dirty the first block so we sync to convergence faster. | |
1961 * (Usually only the first block is needed.) | |
1962 */ | |
1963 dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); | |
1964 dmu_tx_commit(tx); | |
1965 } | |
1966 | |
1967 static void | |
1968 spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) | |
1969 { | |
1970 nvlist_t *config; | |
1971 char *packed = NULL; | |
1972 size_t nvsize = 0; | |
1973 dmu_buf_t *db; | |
1974 | |
1975 if (list_is_empty(&spa->spa_dirty_list)) | |
1976 return; | |
1977 | |
1978 config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE); | |
1979 | |
1980 spa_config_set(spa, config); | |
1981 | |
1982 VERIFY(nvlist_size(config, &nvsize, NV_ENCODE_XDR) == 0); | |
1983 | |
1984 packed = kmem_alloc(nvsize, KM_SLEEP); | |
1985 | |
1544 | 1986 VERIFY(nvlist_pack(config, &packed, &nvsize, NV_ENCODE_XDR, |
1987 KM_SLEEP) == 0); | |
789 | 1988 |
1989 dmu_write(spa->spa_meta_objset, spa->spa_config_object, 0, nvsize, | |
1990 packed, tx); | |
1991 | |
1992 kmem_free(packed, nvsize); | |
1993 | |
1544 | 1994 VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, |
1995 spa->spa_config_object, FTAG, &db)); | |
789 | 1996 dmu_buf_will_dirty(db, tx); |
1997 *(uint64_t *)db->db_data = nvsize; | |
1544 | 1998 dmu_buf_rele(db, FTAG); |
789 | 1999 } |
2000 | |
2001 /* | |
2002 * Sync the specified transaction group. New blocks may be dirtied as | |
2003 * part of the process, so we iterate until it converges. | |
2004 */ | |
2005 void | |
2006 spa_sync(spa_t *spa, uint64_t txg) | |
2007 { | |
2008 dsl_pool_t *dp = spa->spa_dsl_pool; | |
2009 objset_t *mos = spa->spa_meta_objset; | |
2010 bplist_t *bpl = &spa->spa_sync_bplist; | |
2011 vdev_t *vd; | |
2012 dmu_tx_t *tx; | |
2013 int dirty_vdevs; | |
2014 | |
2015 /* | |
2016 * Lock out configuration changes. | |
2017 */ | |
1544 | 2018 spa_config_enter(spa, RW_READER, FTAG); |
789 | 2019 |
2020 spa->spa_syncing_txg = txg; | |
2021 spa->spa_sync_pass = 0; | |
2022 | |
1544 | 2023 VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); |
789 | 2024 |
2025 /* | |
2026 * If anything has changed in this txg, push the deferred frees | |
2027 * from the previous txg. If not, leave them alone so that we | |
2028 * don't generate work on an otherwise idle system. | |
2029 */ | |
2030 if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || | |
2031 !txg_list_empty(&dp->dp_dirty_dirs, txg)) | |
2032 spa_sync_deferred_frees(spa, txg); | |
2033 | |
2034 /* | |
2035 * Iterate to convergence. | |
2036 */ | |
2037 do { | |
2038 spa->spa_sync_pass++; | |
2039 | |
2040 tx = dmu_tx_create_assigned(dp, txg); | |
2041 spa_sync_config_object(spa, tx); | |
2042 dmu_tx_commit(tx); | |
2043 | |
1544 | 2044 spa_errlog_sync(spa, txg); |
2045 | |
789 | 2046 dsl_pool_sync(dp, txg); |
2047 | |
2048 dirty_vdevs = 0; | |
2049 while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { | |
2050 vdev_sync(vd, txg); | |
2051 dirty_vdevs++; | |
2052 } | |
2053 | |
2054 tx = dmu_tx_create_assigned(dp, txg); | |
2055 bplist_sync(bpl, tx); | |
2056 dmu_tx_commit(tx); | |
2057 | |
2058 } while (dirty_vdevs); | |
2059 | |
2060 bplist_close(bpl); | |
2061 | |
2062 dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); | |
2063 | |
2064 /* | |
2065 * Rewrite the vdev configuration (which includes the uberblock) | |
2066 * to commit the transaction group. | |
2067 */ | |
1544 | 2068 VERIFY(0 == spa_sync_labels(spa, txg)); |
789 | 2069 |
2070 /* | |
2071 * Make a stable copy of the fully synced uberblock. | |
2072 * We use this as the root for pool traversals. | |
2073 */ | |
2074 spa->spa_traverse_wanted = 1; /* tells traverse_more() to stop */ | |
2075 | |
2076 spa_scrub_suspend(spa); /* stop scrubbing and finish I/Os */ | |
2077 | |
2078 rw_enter(&spa->spa_traverse_lock, RW_WRITER); | |
2079 spa->spa_traverse_wanted = 0; | |
2080 spa->spa_ubsync = spa->spa_uberblock; | |
2081 rw_exit(&spa->spa_traverse_lock); | |
2082 | |
2083 spa_scrub_resume(spa); /* resume scrub with new ubsync */ | |
2084 | |
2085 /* | |
2086 * Clean up the ZIL records for the synced txg. | |
2087 */ | |
2088 dsl_pool_zil_clean(dp); | |
2089 | |
2090 /* | |
2091 * Update usable space statistics. | |
2092 */ | |
2093 while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) | |
2094 vdev_sync_done(vd, txg); | |
2095 | |
2096 /* | |
2097 * It had better be the case that we didn't dirty anything | |
2098 * since spa_sync_labels(). | |
2099 */ | |
2100 ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); | |
2101 ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); | |
2102 ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); | |
2103 ASSERT(bpl->bpl_queue == NULL); | |
2104 | |
1544 | 2105 spa_config_exit(spa, FTAG); |
2106 | |
2107 /* | |
2108 * If any async tasks have been requested, kick them off. | |
2109 */ | |
2110 spa_async_dispatch(spa); | |
789 | 2111 } |
2112 | |
2113 /* | |
2114 * Sync all pools. We don't want to hold the namespace lock across these | |
2115 * operations, so we take a reference on the spa_t and drop the lock during the | |
2116 * sync. | |
2117 */ | |
2118 void | |
2119 spa_sync_allpools(void) | |
2120 { | |
2121 spa_t *spa = NULL; | |
2122 mutex_enter(&spa_namespace_lock); | |
2123 while ((spa = spa_next(spa)) != NULL) { | |
2124 if (spa_state(spa) != POOL_STATE_ACTIVE) | |
2125 continue; | |
2126 spa_open_ref(spa, FTAG); | |
2127 mutex_exit(&spa_namespace_lock); | |
2128 txg_wait_synced(spa_get_dsl(spa), 0); | |
2129 mutex_enter(&spa_namespace_lock); | |
2130 spa_close(spa, FTAG); | |
2131 } | |
2132 mutex_exit(&spa_namespace_lock); | |
2133 } | |
2134 | |
2135 /* | |
2136 * ========================================================================== | |
2137 * Miscellaneous routines | |
2138 * ========================================================================== | |
2139 */ | |
2140 | |
2141 int | |
2142 spa_busy(void) | |
2143 { | |
2144 return (spa_active_count != 0); | |
2145 } | |
2146 | |
2147 /* | |
2148 * Remove all pools in the system. | |
2149 */ | |
2150 void | |
2151 spa_evict_all(void) | |
2152 { | |
2153 spa_t *spa; | |
2154 | |
2155 /* | |
2156 * Remove all cached state. All pools should be closed now, | |
2157 * so every spa in the AVL tree should be unreferenced. | |
2158 */ | |
2159 mutex_enter(&spa_namespace_lock); | |
2160 while ((spa = spa_next(NULL)) != NULL) { | |
2161 /* | |
1544 | 2162 * Stop async tasks. The async thread may need to detach |
2163 * a device that's been replaced, which requires grabbing | |
2164 * spa_namespace_lock, so we must drop it here. | |
789 | 2165 */ |
2166 spa_open_ref(spa, FTAG); | |
2167 mutex_exit(&spa_namespace_lock); | |
1544 | 2168 spa_async_suspend(spa); |
789 | 2169 VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); |
2170 mutex_enter(&spa_namespace_lock); | |
2171 spa_close(spa, FTAG); | |
2172 | |
2173 if (spa->spa_state != POOL_STATE_UNINITIALIZED) { | |
2174 spa_unload(spa); | |
2175 spa_deactivate(spa); | |
2176 } | |
2177 spa_remove(spa); | |
2178 } | |
2179 mutex_exit(&spa_namespace_lock); | |
2180 } | |
1544 | 2181 |
2182 vdev_t * | |
2183 spa_lookup_by_guid(spa_t *spa, uint64_t guid) | |
2184 { | |
2185 return (vdev_lookup_by_guid(spa->spa_root_vdev, guid)); | |
2186 } |