Mercurial > illumos > illumos-gate
annotate usr/src/uts/common/fs/zfs/spa.c @ 5094:71a3e95fb9e2
PSARC 2007/342 Enhanced ZFS Pool Properties
PSARC 2007/482 zpool upgrade -V
6565437 zpool property extensions
6561384 want 'zpool upgrade -V <version>' to upgrade to specific version number
6582755 zfs.h has some incorrect version macros
6595601 libzfs headers declare functions which don't exist
6603938 libzfs is using VERIFY() again
6538984 duplicated messages when get pool properties from an unsupported pool version
author | lling |
---|---|
date | Wed, 19 Sep 2007 10:32:40 -0700 |
parents | 41ec732c6d9f |
children | 33cb98223b2d |
rev | line source |
---|---|
789 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
1544 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
789 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
2082 | 21 |
789 | 22 /* |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. |
789 | 24 * Use is subject to license terms. |
25 */ | |
26 | |
27 #pragma ident "%Z%%M% %I% %E% SMI" | |
28 | |
29 /* | |
30 * This file contains all the routines used when modifying on-disk SPA state. | |
31 * This includes opening, importing, destroying, exporting a pool, and syncing a | |
32 * pool. | |
33 */ | |
34 | |
35 #include <sys/zfs_context.h> | |
1544 | 36 #include <sys/fm/fs/zfs.h> |
789 | 37 #include <sys/spa_impl.h> |
38 #include <sys/zio.h> | |
39 #include <sys/zio_checksum.h> | |
40 #include <sys/zio_compress.h> | |
41 #include <sys/dmu.h> | |
42 #include <sys/dmu_tx.h> | |
43 #include <sys/zap.h> | |
44 #include <sys/zil.h> | |
45 #include <sys/vdev_impl.h> | |
46 #include <sys/metaslab.h> | |
47 #include <sys/uberblock_impl.h> | |
48 #include <sys/txg.h> | |
49 #include <sys/avl.h> | |
50 #include <sys/dmu_traverse.h> | |
3912 | 51 #include <sys/dmu_objset.h> |
789 | 52 #include <sys/unique.h> |
53 #include <sys/dsl_pool.h> | |
3912 | 54 #include <sys/dsl_dataset.h> |
789 | 55 #include <sys/dsl_dir.h> |
56 #include <sys/dsl_prop.h> | |
3912 | 57 #include <sys/dsl_synctask.h> |
789 | 58 #include <sys/fs/zfs.h> |
59 #include <sys/callb.h> | |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
60 #include <sys/systeminfo.h> |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
61 #include <sys/sunddi.h> |
789 | 62 |
5094 | 63 #include "zfs_prop.h" |
64 | |
2986 | 65 int zio_taskq_threads = 8; |
66 | |
5094 | 67 static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); |
68 | |
69 /* | |
70 * ========================================================================== | |
71 * SPA properties routines | |
72 * ========================================================================== | |
73 */ | |
74 | |
75 /* | |
76 * Add a (source=src, propname=propval) list to an nvlist. | |
77 */ | |
78 static int | |
79 spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, | |
80 uint64_t intval, zprop_source_t src) | |
81 { | |
82 const char *propname = zpool_prop_to_name(prop); | |
83 nvlist_t *propval; | |
84 int err = 0; | |
85 | |
86 if (err = nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP)) | |
87 return (err); | |
88 | |
89 if (err = nvlist_add_uint64(propval, ZPROP_SOURCE, src)) | |
90 goto out; | |
91 | |
92 if (strval != NULL) { | |
93 if (err = nvlist_add_string(propval, ZPROP_VALUE, strval)) | |
94 goto out; | |
95 } else { | |
96 if (err = nvlist_add_uint64(propval, ZPROP_VALUE, intval)) | |
97 goto out; | |
98 } | |
99 | |
100 err = nvlist_add_nvlist(nvl, propname, propval); | |
101 out: | |
102 nvlist_free(propval); | |
103 return (err); | |
104 } | |
105 | |
106 /* | |
107 * Get property values from the spa configuration. | |
108 */ | |
109 static int | |
110 spa_prop_get_config(spa_t *spa, nvlist_t **nvp) | |
111 { | |
112 uint64_t size = spa_get_space(spa); | |
113 uint64_t used = spa_get_alloc(spa); | |
114 uint64_t cap, version; | |
115 zprop_source_t src = ZPROP_SRC_NONE; | |
116 int err; | |
117 | |
118 /* | |
119 * readonly properties | |
120 */ | |
121 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa->spa_name, | |
122 0, src)) | |
123 return (err); | |
124 | |
125 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src)) | |
126 return (err); | |
127 | |
128 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src)) | |
129 return (err); | |
130 | |
131 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, | |
132 size - used, src)) | |
133 return (err); | |
134 | |
135 cap = (size == 0) ? 0 : (used * 100 / size); | |
136 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src)) | |
137 return (err); | |
138 | |
139 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, | |
140 spa_guid(spa), src)) | |
141 return (err); | |
142 | |
143 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, | |
144 spa->spa_root_vdev->vdev_state, src)) | |
145 return (err); | |
146 | |
147 /* | |
148 * settable properties that are not stored in the pool property object. | |
149 */ | |
150 version = spa_version(spa); | |
151 if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) | |
152 src = ZPROP_SRC_DEFAULT; | |
153 else | |
154 src = ZPROP_SRC_LOCAL; | |
155 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, | |
156 version, src)) | |
157 return (err); | |
158 | |
159 if (spa->spa_root != NULL) { | |
160 src = ZPROP_SRC_LOCAL; | |
161 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, | |
162 spa->spa_root, 0, src)) | |
163 return (err); | |
164 } | |
165 | |
166 if (spa->spa_temporary == | |
167 zpool_prop_default_numeric(ZPOOL_PROP_TEMPORARY)) | |
168 src = ZPROP_SRC_DEFAULT; | |
169 else | |
170 src = ZPROP_SRC_LOCAL; | |
171 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_TEMPORARY, NULL, | |
172 spa->spa_temporary, src)) | |
173 return (err); | |
174 | |
175 return (0); | |
176 } | |
177 | |
178 /* | |
179 * Get zpool property values. | |
180 */ | |
181 int | |
182 spa_prop_get(spa_t *spa, nvlist_t **nvp) | |
183 { | |
184 zap_cursor_t zc; | |
185 zap_attribute_t za; | |
186 objset_t *mos = spa->spa_meta_objset; | |
187 int err; | |
188 | |
189 if (err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)) | |
190 return (err); | |
191 | |
192 /* | |
193 * Get properties from the spa config. | |
194 */ | |
195 if (err = spa_prop_get_config(spa, nvp)) | |
196 goto out; | |
197 | |
198 mutex_enter(&spa->spa_props_lock); | |
199 /* If no pool property object, no more prop to get. */ | |
200 if (spa->spa_pool_props_object == 0) { | |
201 mutex_exit(&spa->spa_props_lock); | |
202 return (0); | |
203 } | |
204 | |
205 /* | |
206 * Get properties from the MOS pool property object. | |
207 */ | |
208 for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); | |
209 (err = zap_cursor_retrieve(&zc, &za)) == 0; | |
210 zap_cursor_advance(&zc)) { | |
211 uint64_t intval = 0; | |
212 char *strval = NULL; | |
213 zprop_source_t src = ZPROP_SRC_DEFAULT; | |
214 zpool_prop_t prop; | |
215 | |
216 if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) | |
217 continue; | |
218 | |
219 switch (za.za_integer_length) { | |
220 case 8: | |
221 /* integer property */ | |
222 if (za.za_first_integer != | |
223 zpool_prop_default_numeric(prop)) | |
224 src = ZPROP_SRC_LOCAL; | |
225 | |
226 if (prop == ZPOOL_PROP_BOOTFS) { | |
227 dsl_pool_t *dp; | |
228 dsl_dataset_t *ds = NULL; | |
229 | |
230 dp = spa_get_dsl(spa); | |
231 rw_enter(&dp->dp_config_rwlock, RW_READER); | |
232 if (err = dsl_dataset_open_obj(dp, | |
233 za.za_first_integer, NULL, DS_MODE_NONE, | |
234 FTAG, &ds)) { | |
235 rw_exit(&dp->dp_config_rwlock); | |
236 break; | |
237 } | |
238 | |
239 strval = kmem_alloc( | |
240 MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, | |
241 KM_SLEEP); | |
242 dsl_dataset_name(ds, strval); | |
243 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
244 rw_exit(&dp->dp_config_rwlock); | |
245 } else { | |
246 strval = NULL; | |
247 intval = za.za_first_integer; | |
248 } | |
249 | |
250 err = spa_prop_add_list(*nvp, prop, strval, | |
251 intval, src); | |
252 | |
253 if (strval != NULL) | |
254 kmem_free(strval, | |
255 MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); | |
256 | |
257 break; | |
258 | |
259 case 1: | |
260 /* string property */ | |
261 strval = kmem_alloc(za.za_num_integers, KM_SLEEP); | |
262 err = zap_lookup(mos, spa->spa_pool_props_object, | |
263 za.za_name, 1, za.za_num_integers, strval); | |
264 if (err) { | |
265 kmem_free(strval, za.za_num_integers); | |
266 break; | |
267 } | |
268 err = spa_prop_add_list(*nvp, prop, strval, 0, src); | |
269 kmem_free(strval, za.za_num_integers); | |
270 break; | |
271 | |
272 default: | |
273 break; | |
274 } | |
275 } | |
276 zap_cursor_fini(&zc); | |
277 mutex_exit(&spa->spa_props_lock); | |
278 out: | |
279 if (err && err != ENOENT) { | |
280 nvlist_free(*nvp); | |
281 return (err); | |
282 } | |
283 | |
284 return (0); | |
285 } | |
286 | |
287 /* | |
288 * Validate the given pool properties nvlist and modify the list | |
289 * for the property values to be set. | |
290 */ | |
291 static int | |
292 spa_prop_validate(spa_t *spa, nvlist_t *props) | |
293 { | |
294 nvpair_t *elem; | |
295 int error = 0, reset_bootfs = 0; | |
296 uint64_t objnum; | |
297 | |
298 elem = NULL; | |
299 while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { | |
300 zpool_prop_t prop; | |
301 char *propname, *strval; | |
302 uint64_t intval; | |
303 vdev_t *rvdev; | |
304 char *vdev_type; | |
305 objset_t *os; | |
306 | |
307 propname = nvpair_name(elem); | |
308 | |
309 if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) | |
310 return (EINVAL); | |
311 | |
312 switch (prop) { | |
313 case ZPOOL_PROP_VERSION: | |
314 error = nvpair_value_uint64(elem, &intval); | |
315 if (!error && | |
316 (intval < spa_version(spa) || intval > SPA_VERSION)) | |
317 error = EINVAL; | |
318 break; | |
319 | |
320 case ZPOOL_PROP_DELEGATION: | |
321 case ZPOOL_PROP_AUTOREPLACE: | |
322 error = nvpair_value_uint64(elem, &intval); | |
323 if (!error && intval > 1) | |
324 error = EINVAL; | |
325 break; | |
326 | |
327 case ZPOOL_PROP_BOOTFS: | |
328 if (spa_version(spa) < SPA_VERSION_BOOTFS) { | |
329 error = ENOTSUP; | |
330 break; | |
331 } | |
332 | |
333 /* | |
334 * A bootable filesystem can not be on a RAIDZ pool | |
335 * nor a striped pool with more than 1 device. | |
336 */ | |
337 rvdev = spa->spa_root_vdev; | |
338 vdev_type = | |
339 rvdev->vdev_child[0]->vdev_ops->vdev_op_type; | |
340 if (rvdev->vdev_children > 1 || | |
341 strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 || | |
342 strcmp(vdev_type, VDEV_TYPE_MISSING) == 0) { | |
343 error = ENOTSUP; | |
344 break; | |
345 } | |
346 | |
347 reset_bootfs = 1; | |
348 | |
349 error = nvpair_value_string(elem, &strval); | |
350 | |
351 if (!error) { | |
352 if (strval == NULL || strval[0] == '\0') { | |
353 objnum = zpool_prop_default_numeric( | |
354 ZPOOL_PROP_BOOTFS); | |
355 break; | |
356 } | |
357 | |
358 if (error = dmu_objset_open(strval, DMU_OST_ZFS, | |
359 DS_MODE_STANDARD | DS_MODE_READONLY, &os)) | |
360 break; | |
361 objnum = dmu_objset_id(os); | |
362 dmu_objset_close(os); | |
363 } | |
364 break; | |
365 } | |
366 | |
367 if (error) | |
368 break; | |
369 } | |
370 | |
371 if (!error && reset_bootfs) { | |
372 error = nvlist_remove(props, | |
373 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); | |
374 | |
375 if (!error) { | |
376 error = nvlist_add_uint64(props, | |
377 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); | |
378 } | |
379 } | |
380 | |
381 return (error); | |
382 } | |
383 | |
384 int | |
385 spa_prop_set(spa_t *spa, nvlist_t *nvp) | |
386 { | |
387 int error; | |
388 | |
389 if ((error = spa_prop_validate(spa, nvp)) != 0) | |
390 return (error); | |
391 | |
392 return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, | |
393 spa, nvp, 3)); | |
394 } | |
395 | |
396 /* | |
397 * If the bootfs property value is dsobj, clear it. | |
398 */ | |
399 void | |
400 spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) | |
401 { | |
402 if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { | |
403 VERIFY(zap_remove(spa->spa_meta_objset, | |
404 spa->spa_pool_props_object, | |
405 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); | |
406 spa->spa_bootfs = 0; | |
407 } | |
408 } | |
409 | |
789 | 410 /* |
411 * ========================================================================== | |
412 * SPA state manipulation (open/create/destroy/import/export) | |
413 * ========================================================================== | |
414 */ | |
415 | |
1544 | 416 static int |
417 spa_error_entry_compare(const void *a, const void *b) | |
418 { | |
419 spa_error_entry_t *sa = (spa_error_entry_t *)a; | |
420 spa_error_entry_t *sb = (spa_error_entry_t *)b; | |
421 int ret; | |
422 | |
423 ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, | |
424 sizeof (zbookmark_t)); | |
425 | |
426 if (ret < 0) | |
427 return (-1); | |
428 else if (ret > 0) | |
429 return (1); | |
430 else | |
431 return (0); | |
432 } | |
433 | |
434 /* | |
435 * Utility function which retrieves copies of the current logs and | |
436 * re-initializes them in the process. | |
437 */ | |
438 void | |
439 spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) | |
440 { | |
441 ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); | |
442 | |
443 bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); | |
444 bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); | |
445 | |
446 avl_create(&spa->spa_errlist_scrub, | |
447 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
448 offsetof(spa_error_entry_t, se_avl)); | |
449 avl_create(&spa->spa_errlist_last, | |
450 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
451 offsetof(spa_error_entry_t, se_avl)); | |
452 } | |
453 | |
789 | 454 /* |
455 * Activate an uninitialized pool. | |
456 */ | |
457 static void | |
458 spa_activate(spa_t *spa) | |
459 { | |
460 int t; | |
461 | |
462 ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); | |
463 | |
464 spa->spa_state = POOL_STATE_ACTIVE; | |
465 | |
466 spa->spa_normal_class = metaslab_class_create(); | |
4527 | 467 spa->spa_log_class = metaslab_class_create(); |
789 | 468 |
469 for (t = 0; t < ZIO_TYPES; t++) { | |
470 spa->spa_zio_issue_taskq[t] = taskq_create("spa_zio_issue", | |
2986 | 471 zio_taskq_threads, maxclsyspri, 50, INT_MAX, |
789 | 472 TASKQ_PREPOPULATE); |
473 spa->spa_zio_intr_taskq[t] = taskq_create("spa_zio_intr", | |
2986 | 474 zio_taskq_threads, maxclsyspri, 50, INT_MAX, |
789 | 475 TASKQ_PREPOPULATE); |
476 } | |
477 | |
478 list_create(&spa->spa_dirty_list, sizeof (vdev_t), | |
479 offsetof(vdev_t, vdev_dirty_node)); | |
480 | |
481 txg_list_create(&spa->spa_vdev_txg_list, | |
482 offsetof(struct vdev, vdev_txg_node)); | |
1544 | 483 |
484 avl_create(&spa->spa_errlist_scrub, | |
485 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
486 offsetof(spa_error_entry_t, se_avl)); | |
487 avl_create(&spa->spa_errlist_last, | |
488 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
489 offsetof(spa_error_entry_t, se_avl)); | |
789 | 490 } |
491 | |
492 /* | |
493 * Opposite of spa_activate(). | |
494 */ | |
495 static void | |
496 spa_deactivate(spa_t *spa) | |
497 { | |
498 int t; | |
499 | |
500 ASSERT(spa->spa_sync_on == B_FALSE); | |
501 ASSERT(spa->spa_dsl_pool == NULL); | |
502 ASSERT(spa->spa_root_vdev == NULL); | |
503 | |
504 ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); | |
505 | |
506 txg_list_destroy(&spa->spa_vdev_txg_list); | |
507 | |
508 list_destroy(&spa->spa_dirty_list); | |
509 | |
510 for (t = 0; t < ZIO_TYPES; t++) { | |
511 taskq_destroy(spa->spa_zio_issue_taskq[t]); | |
512 taskq_destroy(spa->spa_zio_intr_taskq[t]); | |
513 spa->spa_zio_issue_taskq[t] = NULL; | |
514 spa->spa_zio_intr_taskq[t] = NULL; | |
515 } | |
516 | |
517 metaslab_class_destroy(spa->spa_normal_class); | |
518 spa->spa_normal_class = NULL; | |
519 | |
4527 | 520 metaslab_class_destroy(spa->spa_log_class); |
521 spa->spa_log_class = NULL; | |
522 | |
1544 | 523 /* |
524 * If this was part of an import or the open otherwise failed, we may | |
525 * still have errors left in the queues. Empty them just in case. | |
526 */ | |
527 spa_errlog_drain(spa); | |
528 | |
529 avl_destroy(&spa->spa_errlist_scrub); | |
530 avl_destroy(&spa->spa_errlist_last); | |
531 | |
789 | 532 spa->spa_state = POOL_STATE_UNINITIALIZED; |
533 } | |
534 | |
535 /* | |
536 * Verify a pool configuration, and construct the vdev tree appropriately. This | |
537 * will create all the necessary vdevs in the appropriate layout, with each vdev | |
538 * in the CLOSED state. This will prep the pool before open/creation/import. | |
539 * All vdev validation is done by the vdev_alloc() routine. | |
540 */ | |
2082 | 541 static int |
542 spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, | |
543 uint_t id, int atype) | |
789 | 544 { |
545 nvlist_t **child; | |
546 uint_t c, children; | |
2082 | 547 int error; |
548 | |
549 if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) | |
550 return (error); | |
551 | |
552 if ((*vdp)->vdev_ops->vdev_op_leaf) | |
553 return (0); | |
789 | 554 |
555 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, | |
556 &child, &children) != 0) { | |
2082 | 557 vdev_free(*vdp); |
558 *vdp = NULL; | |
559 return (EINVAL); | |
789 | 560 } |
561 | |
562 for (c = 0; c < children; c++) { | |
2082 | 563 vdev_t *vd; |
564 if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, | |
565 atype)) != 0) { | |
566 vdev_free(*vdp); | |
567 *vdp = NULL; | |
568 return (error); | |
789 | 569 } |
570 } | |
571 | |
2082 | 572 ASSERT(*vdp != NULL); |
573 | |
574 return (0); | |
789 | 575 } |
576 | |
577 /* | |
578 * Opposite of spa_load(). | |
579 */ | |
580 static void | |
581 spa_unload(spa_t *spa) | |
582 { | |
2082 | 583 int i; |
584 | |
789 | 585 /* |
1544 | 586 * Stop async tasks. |
587 */ | |
588 spa_async_suspend(spa); | |
589 | |
590 /* | |
789 | 591 * Stop syncing. |
592 */ | |
593 if (spa->spa_sync_on) { | |
594 txg_sync_stop(spa->spa_dsl_pool); | |
595 spa->spa_sync_on = B_FALSE; | |
596 } | |
597 | |
598 /* | |
599 * Wait for any outstanding prefetch I/O to complete. | |
600 */ | |
1544 | 601 spa_config_enter(spa, RW_WRITER, FTAG); |
602 spa_config_exit(spa, FTAG); | |
789 | 603 |
604 /* | |
605 * Close the dsl pool. | |
606 */ | |
607 if (spa->spa_dsl_pool) { | |
608 dsl_pool_close(spa->spa_dsl_pool); | |
609 spa->spa_dsl_pool = NULL; | |
610 } | |
611 | |
612 /* | |
613 * Close all vdevs. | |
614 */ | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
615 if (spa->spa_root_vdev) |
789 | 616 vdev_free(spa->spa_root_vdev); |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
617 ASSERT(spa->spa_root_vdev == NULL); |
1544 | 618 |
2082 | 619 for (i = 0; i < spa->spa_nspares; i++) |
620 vdev_free(spa->spa_spares[i]); | |
621 if (spa->spa_spares) { | |
622 kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *)); | |
623 spa->spa_spares = NULL; | |
624 } | |
625 if (spa->spa_sparelist) { | |
626 nvlist_free(spa->spa_sparelist); | |
627 spa->spa_sparelist = NULL; | |
628 } | |
629 | |
1544 | 630 spa->spa_async_suspended = 0; |
789 | 631 } |
632 | |
633 /* | |
2082 | 634 * Load (or re-load) the current list of vdevs describing the active spares for |
635 * this pool. When this is called, we have some form of basic information in | |
636 * 'spa_sparelist'. We parse this into vdevs, try to open them, and then | |
637 * re-generate a more complete list including status information. | |
638 */ | |
639 static void | |
640 spa_load_spares(spa_t *spa) | |
641 { | |
642 nvlist_t **spares; | |
643 uint_t nspares; | |
644 int i; | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
645 vdev_t *vd, *tvd; |
2082 | 646 |
647 /* | |
648 * First, close and free any existing spare vdevs. | |
649 */ | |
650 for (i = 0; i < spa->spa_nspares; i++) { | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
651 vd = spa->spa_spares[i]; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
652 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
653 /* Undo the call to spa_activate() below */ |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
654 if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL && |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
655 tvd->vdev_isspare) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
656 spa_spare_remove(tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
657 vdev_close(vd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
658 vdev_free(vd); |
2082 | 659 } |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
660 |
2082 | 661 if (spa->spa_spares) |
662 kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *)); | |
663 | |
664 if (spa->spa_sparelist == NULL) | |
665 nspares = 0; | |
666 else | |
667 VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, | |
668 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); | |
669 | |
670 spa->spa_nspares = (int)nspares; | |
671 spa->spa_spares = NULL; | |
672 | |
673 if (nspares == 0) | |
674 return; | |
675 | |
676 /* | |
677 * Construct the array of vdevs, opening them to get status in the | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
678 * process. For each spare, there is potentially two different vdev_t |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
679 * structures associated with it: one in the list of spares (used only |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
680 * for basic validation purposes) and one in the active vdev |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
681 * configuration (if it's spared in). During this phase we open and |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
682 * validate each vdev on the spare list. If the vdev also exists in the |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
683 * active configuration, then we also mark this vdev as an active spare. |
2082 | 684 */ |
685 spa->spa_spares = kmem_alloc(nspares * sizeof (void *), KM_SLEEP); | |
686 for (i = 0; i < spa->spa_nspares; i++) { | |
687 VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, | |
688 VDEV_ALLOC_SPARE) == 0); | |
689 ASSERT(vd != NULL); | |
690 | |
691 spa->spa_spares[i] = vd; | |
692 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
693 if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
694 if (!tvd->vdev_isspare) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
695 spa_spare_add(tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
696 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
697 /* |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
698 * We only mark the spare active if we were successfully |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
699 * able to load the vdev. Otherwise, importing a pool |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
700 * with a bad active spare would result in strange |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
701 * behavior, because multiple pool would think the spare |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
702 * is actively in use. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
703 * |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
704 * There is a vulnerability here to an equally bizarre |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
705 * circumstance, where a dead active spare is later |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
706 * brought back to life (onlined or otherwise). Given |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
707 * the rarity of this scenario, and the extra complexity |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
708 * it adds, we ignore the possibility. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
709 */ |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
710 if (!vdev_is_dead(tvd)) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
711 spa_spare_activate(tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
712 } |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
713 |
2082 | 714 if (vdev_open(vd) != 0) |
715 continue; | |
716 | |
717 vd->vdev_top = vd; | |
718 (void) vdev_validate_spare(vd); | |
719 } | |
720 | |
721 /* | |
722 * Recompute the stashed list of spares, with status information | |
723 * this time. | |
724 */ | |
725 VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, | |
726 DATA_TYPE_NVLIST_ARRAY) == 0); | |
727 | |
728 spares = kmem_alloc(spa->spa_nspares * sizeof (void *), KM_SLEEP); | |
729 for (i = 0; i < spa->spa_nspares; i++) | |
730 spares[i] = vdev_config_generate(spa, spa->spa_spares[i], | |
731 B_TRUE, B_TRUE); | |
732 VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, | |
733 spares, spa->spa_nspares) == 0); | |
734 for (i = 0; i < spa->spa_nspares; i++) | |
735 nvlist_free(spares[i]); | |
736 kmem_free(spares, spa->spa_nspares * sizeof (void *)); | |
737 } | |
738 | |
739 static int | |
740 load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) | |
741 { | |
742 dmu_buf_t *db; | |
743 char *packed = NULL; | |
744 size_t nvsize = 0; | |
745 int error; | |
746 *value = NULL; | |
747 | |
748 VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); | |
749 nvsize = *(uint64_t *)db->db_data; | |
750 dmu_buf_rele(db, FTAG); | |
751 | |
752 packed = kmem_alloc(nvsize, KM_SLEEP); | |
753 error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed); | |
754 if (error == 0) | |
755 error = nvlist_unpack(packed, nvsize, value, 0); | |
756 kmem_free(packed, nvsize); | |
757 | |
758 return (error); | |
759 } | |
760 | |
761 /* | |
4451 | 762 * Checks to see if the given vdev could not be opened, in which case we post a |
763 * sysevent to notify the autoreplace code that the device has been removed. | |
764 */ | |
765 static void | |
766 spa_check_removed(vdev_t *vd) | |
767 { | |
768 int c; | |
769 | |
770 for (c = 0; c < vd->vdev_children; c++) | |
771 spa_check_removed(vd->vdev_child[c]); | |
772 | |
773 if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { | |
774 zfs_post_autoreplace(vd->vdev_spa, vd); | |
775 spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); | |
776 } | |
777 } | |
778 | |
779 /* | |
789 | 780 * Load an existing storage pool, using the pool's builtin spa_config as a |
1544 | 781 * source of configuration information. |
789 | 782 */ |
783 static int | |
1544 | 784 spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) |
789 | 785 { |
786 int error = 0; | |
787 nvlist_t *nvroot = NULL; | |
788 vdev_t *rvd; | |
789 uberblock_t *ub = &spa->spa_uberblock; | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
790 uint64_t config_cache_txg = spa->spa_config_txg; |
789 | 791 uint64_t pool_guid; |
2082 | 792 uint64_t version; |
789 | 793 zio_t *zio; |
4451 | 794 uint64_t autoreplace = 0; |
789 | 795 |
1544 | 796 spa->spa_load_state = state; |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
797 |
789 | 798 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || |
1733 | 799 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { |
1544 | 800 error = EINVAL; |
801 goto out; | |
802 } | |
789 | 803 |
2082 | 804 /* |
805 * Versioning wasn't explicitly added to the label until later, so if | |
806 * it's not present treat it as the initial version. | |
807 */ | |
808 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) | |
4577 | 809 version = SPA_VERSION_INITIAL; |
2082 | 810 |
1733 | 811 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, |
812 &spa->spa_config_txg); | |
813 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
814 if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && |
1544 | 815 spa_guid_exists(pool_guid, 0)) { |
816 error = EEXIST; | |
817 goto out; | |
818 } | |
789 | 819 |
2174
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
820 spa->spa_load_guid = pool_guid; |
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
821 |
789 | 822 /* |
2082 | 823 * Parse the configuration into a vdev tree. We explicitly set the |
824 * value that will be returned by spa_version() since parsing the | |
825 * configuration requires knowing the version number. | |
789 | 826 */ |
1544 | 827 spa_config_enter(spa, RW_WRITER, FTAG); |
2082 | 828 spa->spa_ubsync.ub_version = version; |
829 error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); | |
1544 | 830 spa_config_exit(spa, FTAG); |
789 | 831 |
2082 | 832 if (error != 0) |
1544 | 833 goto out; |
789 | 834 |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
835 ASSERT(spa->spa_root_vdev == rvd); |
789 | 836 ASSERT(spa_guid(spa) == pool_guid); |
837 | |
838 /* | |
839 * Try to open all vdevs, loading each label in the process. | |
840 */ | |
4070
4390ea390a1e
6386594 zdb message should be clearer when failing for lack of permissions
mc142369
parents:
3975
diff
changeset
|
841 error = vdev_open(rvd); |
4390ea390a1e
6386594 zdb message should be clearer when failing for lack of permissions
mc142369
parents:
3975
diff
changeset
|
842 if (error != 0) |
1544 | 843 goto out; |
789 | 844 |
845 /* | |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
846 * Validate the labels for all leaf vdevs. We need to grab the config |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
847 * lock because all label I/O is done with the ZIO_FLAG_CONFIG_HELD |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
848 * flag. |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
849 */ |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
850 spa_config_enter(spa, RW_READER, FTAG); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
851 error = vdev_validate(rvd); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
852 spa_config_exit(spa, FTAG); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
853 |
4070
4390ea390a1e
6386594 zdb message should be clearer when failing for lack of permissions
mc142369
parents:
3975
diff
changeset
|
854 if (error != 0) |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
855 goto out; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
856 |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
857 if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
858 error = ENXIO; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
859 goto out; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
860 } |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
861 |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
862 /* |
789 | 863 * Find the best uberblock. |
864 */ | |
865 bzero(ub, sizeof (uberblock_t)); | |
866 | |
867 zio = zio_root(spa, NULL, NULL, | |
868 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); | |
869 vdev_uberblock_load(zio, rvd, ub); | |
870 error = zio_wait(zio); | |
871 | |
872 /* | |
873 * If we weren't able to find a single valid uberblock, return failure. | |
874 */ | |
875 if (ub->ub_txg == 0) { | |
1760 | 876 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
877 VDEV_AUX_CORRUPT_DATA); | |
1544 | 878 error = ENXIO; |
879 goto out; | |
880 } | |
881 | |
882 /* | |
883 * If the pool is newer than the code, we can't open it. | |
884 */ | |
4577 | 885 if (ub->ub_version > SPA_VERSION) { |
1760 | 886 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
887 VDEV_AUX_VERSION_NEWER); | |
1544 | 888 error = ENOTSUP; |
889 goto out; | |
789 | 890 } |
891 | |
892 /* | |
893 * If the vdev guid sum doesn't match the uberblock, we have an | |
894 * incomplete configuration. | |
895 */ | |
1732 | 896 if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { |
1544 | 897 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
898 VDEV_AUX_BAD_GUID_SUM); | |
899 error = ENXIO; | |
900 goto out; | |
789 | 901 } |
902 | |
903 /* | |
904 * Initialize internal SPA structures. | |
905 */ | |
906 spa->spa_state = POOL_STATE_ACTIVE; | |
907 spa->spa_ubsync = spa->spa_uberblock; | |
908 spa->spa_first_txg = spa_last_synced_txg(spa) + 1; | |
1544 | 909 error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); |
910 if (error) { | |
911 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
912 VDEV_AUX_CORRUPT_DATA); | |
913 goto out; | |
914 } | |
789 | 915 spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; |
916 | |
1544 | 917 if (zap_lookup(spa->spa_meta_objset, |
789 | 918 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, |
1544 | 919 sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { |
920 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
921 VDEV_AUX_CORRUPT_DATA); | |
922 error = EIO; | |
923 goto out; | |
924 } | |
789 | 925 |
926 if (!mosconfig) { | |
2082 | 927 nvlist_t *newconfig; |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
928 uint64_t hostid; |
2082 | 929 |
930 if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { | |
1544 | 931 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
932 VDEV_AUX_CORRUPT_DATA); | |
933 error = EIO; | |
934 goto out; | |
935 } | |
789 | 936 |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
937 if (nvlist_lookup_uint64(newconfig, ZPOOL_CONFIG_HOSTID, |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
938 &hostid) == 0) { |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
939 char *hostname; |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
940 unsigned long myhostid = 0; |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
941 |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
942 VERIFY(nvlist_lookup_string(newconfig, |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
943 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
944 |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
945 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); |
4178
ad95fd86760b
6553537 zfs root fails to boot from a snv_63+zfsboot-pfinstall netinstall image
lling
parents:
4070
diff
changeset
|
946 if (hostid != 0 && myhostid != 0 && |
ad95fd86760b
6553537 zfs root fails to boot from a snv_63+zfsboot-pfinstall netinstall image
lling
parents:
4070
diff
changeset
|
947 (unsigned long)hostid != myhostid) { |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
948 cmn_err(CE_WARN, "pool '%s' could not be " |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
949 "loaded as it was last accessed by " |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
950 "another system (host: %s hostid: 0x%lx). " |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
951 "See: http://www.sun.com/msg/ZFS-8000-EY", |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
952 spa->spa_name, hostname, |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
953 (unsigned long)hostid); |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
954 error = EBADF; |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
955 goto out; |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
956 } |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
957 } |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
958 |
789 | 959 spa_config_set(spa, newconfig); |
960 spa_unload(spa); | |
961 spa_deactivate(spa); | |
962 spa_activate(spa); | |
963 | |
1544 | 964 return (spa_load(spa, newconfig, state, B_TRUE)); |
965 } | |
966 | |
967 if (zap_lookup(spa->spa_meta_objset, | |
968 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, | |
969 sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { | |
970 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
971 VDEV_AUX_CORRUPT_DATA); | |
972 error = EIO; | |
973 goto out; | |
789 | 974 } |
975 | |
1544 | 976 /* |
2082 | 977 * Load the bit that tells us to use the new accounting function |
978 * (raid-z deflation). If we have an older pool, this will not | |
979 * be present. | |
980 */ | |
981 error = zap_lookup(spa->spa_meta_objset, | |
982 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, | |
983 sizeof (uint64_t), 1, &spa->spa_deflate); | |
984 if (error != 0 && error != ENOENT) { | |
985 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
986 VDEV_AUX_CORRUPT_DATA); | |
987 error = EIO; | |
988 goto out; | |
989 } | |
990 | |
991 /* | |
1544 | 992 * Load the persistent error log. If we have an older pool, this will |
993 * not be present. | |
994 */ | |
995 error = zap_lookup(spa->spa_meta_objset, | |
996 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, | |
997 sizeof (uint64_t), 1, &spa->spa_errlog_last); | |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
998 if (error != 0 && error != ENOENT) { |
1544 | 999 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
1000 VDEV_AUX_CORRUPT_DATA); | |
1001 error = EIO; | |
1002 goto out; | |
1003 } | |
1004 | |
1005 error = zap_lookup(spa->spa_meta_objset, | |
1006 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, | |
1007 sizeof (uint64_t), 1, &spa->spa_errlog_scrub); | |
1008 if (error != 0 && error != ENOENT) { | |
1009 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1010 VDEV_AUX_CORRUPT_DATA); | |
1011 error = EIO; | |
1012 goto out; | |
1013 } | |
789 | 1014 |
1015 /* | |
2926 | 1016 * Load the history object. If we have an older pool, this |
1017 * will not be present. | |
1018 */ | |
1019 error = zap_lookup(spa->spa_meta_objset, | |
1020 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, | |
1021 sizeof (uint64_t), 1, &spa->spa_history); | |
1022 if (error != 0 && error != ENOENT) { | |
1023 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1024 VDEV_AUX_CORRUPT_DATA); | |
1025 error = EIO; | |
1026 goto out; | |
1027 } | |
1028 | |
1029 /* | |
2082 | 1030 * Load any hot spares for this pool. |
1031 */ | |
1032 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, | |
1033 DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares_object); | |
1034 if (error != 0 && error != ENOENT) { | |
1035 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1036 VDEV_AUX_CORRUPT_DATA); | |
1037 error = EIO; | |
1038 goto out; | |
1039 } | |
1040 if (error == 0) { | |
4577 | 1041 ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); |
2082 | 1042 if (load_nvlist(spa, spa->spa_spares_object, |
1043 &spa->spa_sparelist) != 0) { | |
1044 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1045 VDEV_AUX_CORRUPT_DATA); | |
1046 error = EIO; | |
1047 goto out; | |
1048 } | |
1049 | |
1050 spa_config_enter(spa, RW_WRITER, FTAG); | |
1051 spa_load_spares(spa); | |
1052 spa_config_exit(spa, FTAG); | |
1053 } | |
1054 | |
5094 | 1055 spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); |
4543 | 1056 |
3912 | 1057 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, |
1058 DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); | |
1059 | |
1060 if (error && error != ENOENT) { | |
1061 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1062 VDEV_AUX_CORRUPT_DATA); | |
1063 error = EIO; | |
1064 goto out; | |
1065 } | |
1066 | |
1067 if (error == 0) { | |
1068 (void) zap_lookup(spa->spa_meta_objset, | |
1069 spa->spa_pool_props_object, | |
4451 | 1070 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), |
3912 | 1071 sizeof (uint64_t), 1, &spa->spa_bootfs); |
4451 | 1072 (void) zap_lookup(spa->spa_meta_objset, |
1073 spa->spa_pool_props_object, | |
1074 zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), | |
1075 sizeof (uint64_t), 1, &autoreplace); | |
4543 | 1076 (void) zap_lookup(spa->spa_meta_objset, |
1077 spa->spa_pool_props_object, | |
1078 zpool_prop_to_name(ZPOOL_PROP_DELEGATION), | |
1079 sizeof (uint64_t), 1, &spa->spa_delegation); | |
3912 | 1080 } |
1081 | |
2082 | 1082 /* |
4451 | 1083 * If the 'autoreplace' property is set, then post a resource notifying |
1084 * the ZFS DE that it should not issue any faults for unopenable | |
1085 * devices. We also iterate over the vdevs, and post a sysevent for any | |
1086 * unopenable vdevs so that the normal autoreplace handler can take | |
1087 * over. | |
1088 */ | |
1089 if (autoreplace) | |
1090 spa_check_removed(spa->spa_root_vdev); | |
1091 | |
1092 /* | |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1093 * Load the vdev state for all toplevel vdevs. |
789 | 1094 */ |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1095 vdev_load(rvd); |
789 | 1096 |
1097 /* | |
1098 * Propagate the leaf DTLs we just loaded all the way up the tree. | |
1099 */ | |
1544 | 1100 spa_config_enter(spa, RW_WRITER, FTAG); |
789 | 1101 vdev_dtl_reassess(rvd, 0, 0, B_FALSE); |
1544 | 1102 spa_config_exit(spa, FTAG); |
789 | 1103 |
1104 /* | |
1105 * Check the state of the root vdev. If it can't be opened, it | |
1106 * indicates one or more toplevel vdevs are faulted. | |
1107 */ | |
1544 | 1108 if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { |
1109 error = ENXIO; | |
1110 goto out; | |
1111 } | |
789 | 1112 |
1544 | 1113 if ((spa_mode & FWRITE) && state != SPA_LOAD_TRYIMPORT) { |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1114 dmu_tx_t *tx; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1115 int need_update = B_FALSE; |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1116 int c; |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1117 |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1118 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1119 * Claim log blocks that haven't been committed yet. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1120 * This must all happen in a single txg. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1121 */ |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1122 tx = dmu_tx_create_assigned(spa_get_dsl(spa), |
789 | 1123 spa_first_txg(spa)); |
2417 | 1124 (void) dmu_objset_find(spa->spa_name, |
1125 zil_claim, tx, DS_FIND_CHILDREN); | |
789 | 1126 dmu_tx_commit(tx); |
1127 | |
1128 spa->spa_sync_on = B_TRUE; | |
1129 txg_sync_start(spa->spa_dsl_pool); | |
1130 | |
1131 /* | |
1132 * Wait for all claims to sync. | |
1133 */ | |
1134 txg_wait_synced(spa->spa_dsl_pool, 0); | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1135 |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1136 /* |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1137 * If the config cache is stale, or we have uninitialized |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1138 * metaslabs (see spa_vdev_add()), then update the config. |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1139 */ |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1140 if (config_cache_txg != spa->spa_config_txg || |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1141 state == SPA_LOAD_IMPORT) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1142 need_update = B_TRUE; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1143 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1144 for (c = 0; c < rvd->vdev_children; c++) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1145 if (rvd->vdev_child[c]->vdev_ms_array == 0) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1146 need_update = B_TRUE; |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1147 |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1148 /* |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1149 * Update the config cache asychronously in case we're the |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1150 * root pool, in which case the config cache isn't writable yet. |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1151 */ |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1152 if (need_update) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1153 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); |
789 | 1154 } |
1155 | |
1544 | 1156 error = 0; |
1157 out: | |
2082 | 1158 if (error && error != EBADF) |
1544 | 1159 zfs_ereport_post(FM_EREPORT_ZFS_POOL, spa, NULL, NULL, 0, 0); |
1160 spa->spa_load_state = SPA_LOAD_NONE; | |
1161 spa->spa_ena = 0; | |
1162 | |
1163 return (error); | |
789 | 1164 } |
1165 | |
1166 /* | |
1167 * Pool Open/Import | |
1168 * | |
1169 * The import case is identical to an open except that the configuration is sent | |
1170 * down from userland, instead of grabbed from the configuration cache. For the | |
1171 * case of an open, the pool configuration will exist in the | |
4451 | 1172 * POOL_STATE_UNINITIALIZED state. |
789 | 1173 * |
1174 * The stats information (gen/count/ustats) is used to gather vdev statistics at | |
1175 * the same time open the pool, without having to keep around the spa_t in some | |
1176 * ambiguous state. | |
1177 */ | |
1178 static int | |
1179 spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) | |
1180 { | |
1181 spa_t *spa; | |
1182 int error; | |
1183 int loaded = B_FALSE; | |
1184 int locked = B_FALSE; | |
1185 | |
1186 *spapp = NULL; | |
1187 | |
1188 /* | |
1189 * As disgusting as this is, we need to support recursive calls to this | |
1190 * function because dsl_dir_open() is called during spa_load(), and ends | |
1191 * up calling spa_open() again. The real fix is to figure out how to | |
1192 * avoid dsl_dir_open() calling this in the first place. | |
1193 */ | |
1194 if (mutex_owner(&spa_namespace_lock) != curthread) { | |
1195 mutex_enter(&spa_namespace_lock); | |
1196 locked = B_TRUE; | |
1197 } | |
1198 | |
1199 if ((spa = spa_lookup(pool)) == NULL) { | |
1200 if (locked) | |
1201 mutex_exit(&spa_namespace_lock); | |
1202 return (ENOENT); | |
1203 } | |
1204 if (spa->spa_state == POOL_STATE_UNINITIALIZED) { | |
1205 | |
1206 spa_activate(spa); | |
1207 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1208 error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); |
789 | 1209 |
1210 if (error == EBADF) { | |
1211 /* | |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1212 * If vdev_validate() returns failure (indicated by |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1213 * EBADF), it indicates that one of the vdevs indicates |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1214 * that the pool has been exported or destroyed. If |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1215 * this is the case, the config cache is out of sync and |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1216 * we should remove the pool from the namespace. |
789 | 1217 */ |
2082 | 1218 zfs_post_ok(spa, NULL); |
789 | 1219 spa_unload(spa); |
1220 spa_deactivate(spa); | |
1221 spa_remove(spa); | |
1222 spa_config_sync(); | |
1223 if (locked) | |
1224 mutex_exit(&spa_namespace_lock); | |
1225 return (ENOENT); | |
1544 | 1226 } |
1227 | |
1228 if (error) { | |
789 | 1229 /* |
1230 * We can't open the pool, but we still have useful | |
1231 * information: the state of each vdev after the | |
1232 * attempted vdev_open(). Return this to the user. | |
1233 */ | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1234 if (config != NULL && spa->spa_root_vdev != NULL) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1235 spa_config_enter(spa, RW_READER, FTAG); |
789 | 1236 *config = spa_config_generate(spa, NULL, -1ULL, |
1237 B_TRUE); | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1238 spa_config_exit(spa, FTAG); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1239 } |
789 | 1240 spa_unload(spa); |
1241 spa_deactivate(spa); | |
1544 | 1242 spa->spa_last_open_failed = B_TRUE; |
789 | 1243 if (locked) |
1244 mutex_exit(&spa_namespace_lock); | |
1245 *spapp = NULL; | |
1246 return (error); | |
1544 | 1247 } else { |
1248 zfs_post_ok(spa, NULL); | |
1249 spa->spa_last_open_failed = B_FALSE; | |
789 | 1250 } |
1251 | |
1252 loaded = B_TRUE; | |
1253 } | |
1254 | |
1255 spa_open_ref(spa, tag); | |
4451 | 1256 |
1257 /* | |
1258 * If we just loaded the pool, resilver anything that's out of date. | |
1259 */ | |
1260 if (loaded && (spa_mode & FWRITE)) | |
1261 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); | |
1262 | |
789 | 1263 if (locked) |
1264 mutex_exit(&spa_namespace_lock); | |
1265 | |
1266 *spapp = spa; | |
1267 | |
1268 if (config != NULL) { | |
1544 | 1269 spa_config_enter(spa, RW_READER, FTAG); |
789 | 1270 *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); |
1544 | 1271 spa_config_exit(spa, FTAG); |
789 | 1272 } |
1273 | |
1274 return (0); | |
1275 } | |
1276 | |
1277 int | |
1278 spa_open(const char *name, spa_t **spapp, void *tag) | |
1279 { | |
1280 return (spa_open_common(name, spapp, tag, NULL)); | |
1281 } | |
1282 | |
1544 | 1283 /* |
1284 * Lookup the given spa_t, incrementing the inject count in the process, | |
1285 * preventing it from being exported or destroyed. | |
1286 */ | |
1287 spa_t * | |
1288 spa_inject_addref(char *name) | |
1289 { | |
1290 spa_t *spa; | |
1291 | |
1292 mutex_enter(&spa_namespace_lock); | |
1293 if ((spa = spa_lookup(name)) == NULL) { | |
1294 mutex_exit(&spa_namespace_lock); | |
1295 return (NULL); | |
1296 } | |
1297 spa->spa_inject_ref++; | |
1298 mutex_exit(&spa_namespace_lock); | |
1299 | |
1300 return (spa); | |
1301 } | |
1302 | |
1303 void | |
1304 spa_inject_delref(spa_t *spa) | |
1305 { | |
1306 mutex_enter(&spa_namespace_lock); | |
1307 spa->spa_inject_ref--; | |
1308 mutex_exit(&spa_namespace_lock); | |
1309 } | |
1310 | |
2082 | 1311 static void |
1312 spa_add_spares(spa_t *spa, nvlist_t *config) | |
1313 { | |
1314 nvlist_t **spares; | |
1315 uint_t i, nspares; | |
1316 nvlist_t *nvroot; | |
1317 uint64_t guid; | |
1318 vdev_stat_t *vs; | |
1319 uint_t vsc; | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1320 uint64_t pool; |
2082 | 1321 |
1322 if (spa->spa_nspares == 0) | |
1323 return; | |
1324 | |
1325 VERIFY(nvlist_lookup_nvlist(config, | |
1326 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); | |
1327 VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, | |
1328 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); | |
1329 if (nspares != 0) { | |
1330 VERIFY(nvlist_add_nvlist_array(nvroot, | |
1331 ZPOOL_CONFIG_SPARES, spares, nspares) == 0); | |
1332 VERIFY(nvlist_lookup_nvlist_array(nvroot, | |
1333 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); | |
1334 | |
1335 /* | |
1336 * Go through and find any spares which have since been | |
1337 * repurposed as an active spare. If this is the case, update | |
1338 * their status appropriately. | |
1339 */ | |
1340 for (i = 0; i < nspares; i++) { | |
1341 VERIFY(nvlist_lookup_uint64(spares[i], | |
1342 ZPOOL_CONFIG_GUID, &guid) == 0); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1343 if (spa_spare_exists(guid, &pool) && pool != 0ULL) { |
2082 | 1344 VERIFY(nvlist_lookup_uint64_array( |
1345 spares[i], ZPOOL_CONFIG_STATS, | |
1346 (uint64_t **)&vs, &vsc) == 0); | |
1347 vs->vs_state = VDEV_STATE_CANT_OPEN; | |
1348 vs->vs_aux = VDEV_AUX_SPARED; | |
1349 } | |
1350 } | |
1351 } | |
1352 } | |
1353 | |
789 | 1354 int |
1544 | 1355 spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) |
789 | 1356 { |
1357 int error; | |
1358 spa_t *spa; | |
1359 | |
1360 *config = NULL; | |
1361 error = spa_open_common(name, &spa, FTAG, config); | |
1362 | |
2082 | 1363 if (spa && *config != NULL) { |
1544 | 1364 VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, |
1365 spa_get_errlog_size(spa)) == 0); | |
1366 | |
2082 | 1367 spa_add_spares(spa, *config); |
1368 } | |
1369 | |
1544 | 1370 /* |
1371 * We want to get the alternate root even for faulted pools, so we cheat | |
1372 * and call spa_lookup() directly. | |
1373 */ | |
1374 if (altroot) { | |
1375 if (spa == NULL) { | |
1376 mutex_enter(&spa_namespace_lock); | |
1377 spa = spa_lookup(name); | |
1378 if (spa) | |
1379 spa_altroot(spa, altroot, buflen); | |
1380 else | |
1381 altroot[0] = '\0'; | |
1382 spa = NULL; | |
1383 mutex_exit(&spa_namespace_lock); | |
1384 } else { | |
1385 spa_altroot(spa, altroot, buflen); | |
1386 } | |
1387 } | |
1388 | |
789 | 1389 if (spa != NULL) |
1390 spa_close(spa, FTAG); | |
1391 | |
1392 return (error); | |
1393 } | |
1394 | |
1395 /* | |
2082 | 1396 * Validate that the 'spares' array is well formed. We must have an array of |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1397 * nvlists, each which describes a valid leaf vdev. If this is an import (mode |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1398 * is VDEV_ALLOC_SPARE), then we allow corrupted spares to be specified, as long |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1399 * as they are well-formed. |
2082 | 1400 */ |
1401 static int | |
1402 spa_validate_spares(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) | |
1403 { | |
1404 nvlist_t **spares; | |
1405 uint_t i, nspares; | |
1406 vdev_t *vd; | |
1407 int error; | |
1408 | |
1409 /* | |
1410 * It's acceptable to have no spares specified. | |
1411 */ | |
1412 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, | |
1413 &spares, &nspares) != 0) | |
1414 return (0); | |
1415 | |
1416 if (nspares == 0) | |
1417 return (EINVAL); | |
1418 | |
1419 /* | |
1420 * Make sure the pool is formatted with a version that supports hot | |
1421 * spares. | |
1422 */ | |
4577 | 1423 if (spa_version(spa) < SPA_VERSION_SPARES) |
2082 | 1424 return (ENOTSUP); |
1425 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1426 /* |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1427 * Set the pending spare list so we correctly handle device in-use |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1428 * checking. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1429 */ |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1430 spa->spa_pending_spares = spares; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1431 spa->spa_pending_nspares = nspares; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1432 |
2082 | 1433 for (i = 0; i < nspares; i++) { |
1434 if ((error = spa_config_parse(spa, &vd, spares[i], NULL, 0, | |
1435 mode)) != 0) | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1436 goto out; |
2082 | 1437 |
1438 if (!vd->vdev_ops->vdev_op_leaf) { | |
1439 vdev_free(vd); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1440 error = EINVAL; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1441 goto out; |
2082 | 1442 } |
1443 | |
1444 vd->vdev_top = vd; | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1445 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1446 if ((error = vdev_open(vd)) == 0 && |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1447 (error = vdev_label_init(vd, crtxg, |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1448 VDEV_LABEL_SPARE)) == 0) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1449 VERIFY(nvlist_add_uint64(spares[i], ZPOOL_CONFIG_GUID, |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1450 vd->vdev_guid) == 0); |
2082 | 1451 } |
1452 | |
1453 vdev_free(vd); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1454 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1455 if (error && mode != VDEV_ALLOC_SPARE) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1456 goto out; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1457 else |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1458 error = 0; |
2082 | 1459 } |
1460 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1461 out: |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1462 spa->spa_pending_spares = NULL; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1463 spa->spa_pending_nspares = 0; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1464 return (error); |
2082 | 1465 } |
1466 | |
1467 /* | |
789 | 1468 * Pool Creation |
1469 */ | |
1470 int | |
5094 | 1471 spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, |
4715
e8d212dda064
6535695 Panic: shpp->sh_eof == shpp->sh_pool_create_len, file: ../../common/fs/zfs/spa_history.c, line: 235
ek110237
parents:
4627
diff
changeset
|
1472 const char *history_str) |
789 | 1473 { |
1474 spa_t *spa; | |
5094 | 1475 char *altroot = NULL; |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1476 vdev_t *rvd; |
789 | 1477 dsl_pool_t *dp; |
1478 dmu_tx_t *tx; | |
2082 | 1479 int c, error = 0; |
789 | 1480 uint64_t txg = TXG_INITIAL; |
2082 | 1481 nvlist_t **spares; |
1482 uint_t nspares; | |
5094 | 1483 uint64_t version; |
789 | 1484 |
1485 /* | |
1486 * If this pool already exists, return failure. | |
1487 */ | |
1488 mutex_enter(&spa_namespace_lock); | |
1489 if (spa_lookup(pool) != NULL) { | |
1490 mutex_exit(&spa_namespace_lock); | |
1491 return (EEXIST); | |
1492 } | |
1493 | |
1494 /* | |
1495 * Allocate a new spa_t structure. | |
1496 */ | |
5094 | 1497 (void) nvlist_lookup_string(props, |
1498 zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1499 spa = spa_add(pool, altroot); |
789 | 1500 spa_activate(spa); |
1501 | |
1502 spa->spa_uberblock.ub_txg = txg - 1; | |
5094 | 1503 |
1504 if (props && (error = spa_prop_validate(spa, props))) { | |
1505 spa_unload(spa); | |
1506 spa_deactivate(spa); | |
1507 spa_remove(spa); | |
1508 return (error); | |
1509 } | |
1510 | |
1511 if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), | |
1512 &version) != 0) | |
1513 version = SPA_VERSION; | |
1514 ASSERT(version <= SPA_VERSION); | |
1515 spa->spa_uberblock.ub_version = version; | |
789 | 1516 spa->spa_ubsync = spa->spa_uberblock; |
1517 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1518 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1519 * Create the root vdev. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1520 */ |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1521 spa_config_enter(spa, RW_WRITER, FTAG); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1522 |
2082 | 1523 error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); |
1524 | |
1525 ASSERT(error != 0 || rvd != NULL); | |
1526 ASSERT(error != 0 || spa->spa_root_vdev == rvd); | |
1527 | |
1528 if (error == 0 && rvd->vdev_children == 0) | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1529 error = EINVAL; |
2082 | 1530 |
1531 if (error == 0 && | |
1532 (error = vdev_create(rvd, txg, B_FALSE)) == 0 && | |
1533 (error = spa_validate_spares(spa, nvroot, txg, | |
1534 VDEV_ALLOC_ADD)) == 0) { | |
1535 for (c = 0; c < rvd->vdev_children; c++) | |
1536 vdev_init(rvd->vdev_child[c], txg); | |
1537 vdev_config_dirty(rvd); | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1538 } |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1539 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1540 spa_config_exit(spa, FTAG); |
789 | 1541 |
2082 | 1542 if (error != 0) { |
789 | 1543 spa_unload(spa); |
1544 spa_deactivate(spa); | |
1545 spa_remove(spa); | |
1546 mutex_exit(&spa_namespace_lock); | |
1547 return (error); | |
1548 } | |
1549 | |
2082 | 1550 /* |
1551 * Get the list of spares, if specified. | |
1552 */ | |
1553 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, | |
1554 &spares, &nspares) == 0) { | |
1555 VERIFY(nvlist_alloc(&spa->spa_sparelist, NV_UNIQUE_NAME, | |
1556 KM_SLEEP) == 0); | |
1557 VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, | |
1558 ZPOOL_CONFIG_SPARES, spares, nspares) == 0); | |
1559 spa_config_enter(spa, RW_WRITER, FTAG); | |
1560 spa_load_spares(spa); | |
1561 spa_config_exit(spa, FTAG); | |
1562 spa->spa_sync_spares = B_TRUE; | |
1563 } | |
1564 | |
789 | 1565 spa->spa_dsl_pool = dp = dsl_pool_create(spa, txg); |
1566 spa->spa_meta_objset = dp->dp_meta_objset; | |
1567 | |
1568 tx = dmu_tx_create_assigned(dp, txg); | |
1569 | |
1570 /* | |
1571 * Create the pool config object. | |
1572 */ | |
1573 spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, | |
1574 DMU_OT_PACKED_NVLIST, 1 << 14, | |
1575 DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); | |
1576 | |
1544 | 1577 if (zap_add(spa->spa_meta_objset, |
789 | 1578 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, |
1544 | 1579 sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { |
1580 cmn_err(CE_PANIC, "failed to add pool config"); | |
1581 } | |
789 | 1582 |
5094 | 1583 /* Newly created pools with the right version are always deflated. */ |
1584 if (version >= SPA_VERSION_RAIDZ_DEFLATE) { | |
1585 spa->spa_deflate = TRUE; | |
1586 if (zap_add(spa->spa_meta_objset, | |
1587 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, | |
1588 sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { | |
1589 cmn_err(CE_PANIC, "failed to add deflate"); | |
1590 } | |
2082 | 1591 } |
1592 | |
789 | 1593 /* |
1594 * Create the deferred-free bplist object. Turn off compression | |
1595 * because sync-to-convergence takes longer if the blocksize | |
1596 * keeps changing. | |
1597 */ | |
1598 spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, | |
1599 1 << 14, tx); | |
1600 dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, | |
1601 ZIO_COMPRESS_OFF, tx); | |
1602 | |
1544 | 1603 if (zap_add(spa->spa_meta_objset, |
789 | 1604 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, |
1544 | 1605 sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { |
1606 cmn_err(CE_PANIC, "failed to add bplist"); | |
1607 } | |
789 | 1608 |
2926 | 1609 /* |
1610 * Create the pool's history object. | |
1611 */ | |
5094 | 1612 if (version >= SPA_VERSION_ZPOOL_HISTORY) |
1613 spa_history_create_obj(spa, tx); | |
1614 | |
1615 /* | |
1616 * Set pool properties. | |
1617 */ | |
1618 spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); | |
1619 spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); | |
1620 spa->spa_temporary = zpool_prop_default_numeric(ZPOOL_PROP_TEMPORARY); | |
1621 if (props) | |
1622 spa_sync_props(spa, props, CRED(), tx); | |
2926 | 1623 |
789 | 1624 dmu_tx_commit(tx); |
1625 | |
1626 spa->spa_sync_on = B_TRUE; | |
1627 txg_sync_start(spa->spa_dsl_pool); | |
1628 | |
1629 /* | |
1630 * We explicitly wait for the first transaction to complete so that our | |
1631 * bean counters are appropriately updated. | |
1632 */ | |
1633 txg_wait_synced(spa->spa_dsl_pool, txg); | |
1634 | |
1635 spa_config_sync(); | |
1636 | |
5094 | 1637 if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) |
4715
e8d212dda064
6535695 Panic: shpp->sh_eof == shpp->sh_pool_create_len, file: ../../common/fs/zfs/spa_history.c, line: 235
ek110237
parents:
4627
diff
changeset
|
1638 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); |
e8d212dda064
6535695 Panic: shpp->sh_eof == shpp->sh_pool_create_len, file: ../../common/fs/zfs/spa_history.c, line: 235
ek110237
parents:
4627
diff
changeset
|
1639 |
789 | 1640 mutex_exit(&spa_namespace_lock); |
1641 | |
1642 return (0); | |
1643 } | |
1644 | |
1645 /* | |
1646 * Import the given pool into the system. We set up the necessary spa_t and | |
1647 * then call spa_load() to do the dirty work. | |
1648 */ | |
1649 int | |
5094 | 1650 spa_import(const char *pool, nvlist_t *config, nvlist_t *props) |
789 | 1651 { |
1652 spa_t *spa; | |
5094 | 1653 char *altroot = NULL; |
789 | 1654 int error; |
2082 | 1655 nvlist_t *nvroot; |
1656 nvlist_t **spares; | |
1657 uint_t nspares; | |
789 | 1658 |
1659 /* | |
1660 * If a pool with this name exists, return failure. | |
1661 */ | |
1662 mutex_enter(&spa_namespace_lock); | |
1663 if (spa_lookup(pool) != NULL) { | |
1664 mutex_exit(&spa_namespace_lock); | |
1665 return (EEXIST); | |
1666 } | |
1667 | |
1668 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1669 * Create and initialize the spa structure. |
789 | 1670 */ |
5094 | 1671 (void) nvlist_lookup_string(props, |
1672 zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1673 spa = spa_add(pool, altroot); |
789 | 1674 spa_activate(spa); |
1675 | |
1676 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1677 * Pass off the heavy lifting to spa_load(). |
1732 | 1678 * Pass TRUE for mosconfig because the user-supplied config |
1679 * is actually the one to trust when doing an import. | |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1680 */ |
1732 | 1681 error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); |
789 | 1682 |
2082 | 1683 spa_config_enter(spa, RW_WRITER, FTAG); |
1684 /* | |
1685 * Toss any existing sparelist, as it doesn't have any validity anymore, | |
1686 * and conflicts with spa_has_spare(). | |
1687 */ | |
1688 if (spa->spa_sparelist) { | |
1689 nvlist_free(spa->spa_sparelist); | |
1690 spa->spa_sparelist = NULL; | |
1691 spa_load_spares(spa); | |
1692 } | |
1693 | |
1694 VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, | |
1695 &nvroot) == 0); | |
5094 | 1696 if (error == 0) { |
2082 | 1697 error = spa_validate_spares(spa, nvroot, -1ULL, |
1698 VDEV_ALLOC_SPARE); | |
5094 | 1699 } |
2082 | 1700 spa_config_exit(spa, FTAG); |
1701 | |
5094 | 1702 if (error != 0 || (props && (error = spa_prop_set(spa, props)))) { |
789 | 1703 spa_unload(spa); |
1704 spa_deactivate(spa); | |
1705 spa_remove(spa); | |
1706 mutex_exit(&spa_namespace_lock); | |
1707 return (error); | |
1708 } | |
1709 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1710 /* |
2082 | 1711 * Override any spares as specified by the user, as these may have |
1712 * correct device names/devids, etc. | |
1713 */ | |
1714 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, | |
1715 &spares, &nspares) == 0) { | |
1716 if (spa->spa_sparelist) | |
1717 VERIFY(nvlist_remove(spa->spa_sparelist, | |
1718 ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); | |
1719 else | |
1720 VERIFY(nvlist_alloc(&spa->spa_sparelist, | |
1721 NV_UNIQUE_NAME, KM_SLEEP) == 0); | |
1722 VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, | |
1723 ZPOOL_CONFIG_SPARES, spares, nspares) == 0); | |
1724 spa_config_enter(spa, RW_WRITER, FTAG); | |
1725 spa_load_spares(spa); | |
1726 spa_config_exit(spa, FTAG); | |
1727 spa->spa_sync_spares = B_TRUE; | |
1728 } | |
1729 | |
1730 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1731 * Update the config cache to include the newly-imported pool. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1732 */ |
4627
c85631613c19
6509812 it would be useful if zdb(1M) could run on exported pools
ck153898
parents:
4577
diff
changeset
|
1733 if (spa_mode & FWRITE) |
c85631613c19
6509812 it would be useful if zdb(1M) could run on exported pools
ck153898
parents:
4577
diff
changeset
|
1734 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1735 |
789 | 1736 /* |
1737 * Resilver anything that's out of date. | |
1738 */ | |
1739 if (spa_mode & FWRITE) | |
1740 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); | |
1741 | |
4451 | 1742 mutex_exit(&spa_namespace_lock); |
1743 | |
789 | 1744 return (0); |
1745 } | |
1746 | |
1747 /* | |
1748 * This (illegal) pool name is used when temporarily importing a spa_t in order | |
1749 * to get the vdev stats associated with the imported devices. | |
1750 */ | |
1751 #define TRYIMPORT_NAME "$import" | |
1752 | |
1753 nvlist_t * | |
1754 spa_tryimport(nvlist_t *tryconfig) | |
1755 { | |
1756 nvlist_t *config = NULL; | |
1757 char *poolname; | |
1758 spa_t *spa; | |
1759 uint64_t state; | |
1760 | |
1761 if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) | |
1762 return (NULL); | |
1763 | |
1764 if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) | |
1765 return (NULL); | |
1766 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1767 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1768 * Create and initialize the spa structure. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1769 */ |
789 | 1770 mutex_enter(&spa_namespace_lock); |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1771 spa = spa_add(TRYIMPORT_NAME, NULL); |
789 | 1772 spa_activate(spa); |
1773 | |
1774 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1775 * Pass off the heavy lifting to spa_load(). |
1732 | 1776 * Pass TRUE for mosconfig because the user-supplied config |
1777 * is actually the one to trust when doing an import. | |
789 | 1778 */ |
1732 | 1779 (void) spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); |
789 | 1780 |
1781 /* | |
1782 * If 'tryconfig' was at least parsable, return the current config. | |
1783 */ | |
1784 if (spa->spa_root_vdev != NULL) { | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1785 spa_config_enter(spa, RW_READER, FTAG); |
789 | 1786 config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1787 spa_config_exit(spa, FTAG); |
789 | 1788 VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, |
1789 poolname) == 0); | |
1790 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, | |
1791 state) == 0); | |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1792 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1793 spa->spa_uberblock.ub_timestamp) == 0); |
2082 | 1794 |
1795 /* | |
1796 * Add the list of hot spares. | |
1797 */ | |
1798 spa_add_spares(spa, config); | |
789 | 1799 } |
1800 | |
1801 spa_unload(spa); | |
1802 spa_deactivate(spa); | |
1803 spa_remove(spa); | |
1804 mutex_exit(&spa_namespace_lock); | |
1805 | |
1806 return (config); | |
1807 } | |
1808 | |
1809 /* | |
1810 * Pool export/destroy | |
1811 * | |
1812 * The act of destroying or exporting a pool is very simple. We make sure there | |
1813 * is no more pending I/O and any references to the pool are gone. Then, we | |
1814 * update the pool state and sync all the labels to disk, removing the | |
1815 * configuration from the cache afterwards. | |
1816 */ | |
1817 static int | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1818 spa_export_common(char *pool, int new_state, nvlist_t **oldconfig) |
789 | 1819 { |
1820 spa_t *spa; | |
1821 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1822 if (oldconfig) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1823 *oldconfig = NULL; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1824 |
789 | 1825 if (!(spa_mode & FWRITE)) |
1826 return (EROFS); | |
1827 | |
1828 mutex_enter(&spa_namespace_lock); | |
1829 if ((spa = spa_lookup(pool)) == NULL) { | |
1830 mutex_exit(&spa_namespace_lock); | |
1831 return (ENOENT); | |
1832 } | |
1833 | |
1834 /* | |
1544 | 1835 * Put a hold on the pool, drop the namespace lock, stop async tasks, |
1836 * reacquire the namespace lock, and see if we can export. | |
1837 */ | |
1838 spa_open_ref(spa, FTAG); | |
1839 mutex_exit(&spa_namespace_lock); | |
1840 spa_async_suspend(spa); | |
1841 mutex_enter(&spa_namespace_lock); | |
1842 spa_close(spa, FTAG); | |
1843 | |
1844 /* | |
789 | 1845 * The pool will be in core if it's openable, |
1846 * in which case we can modify its state. | |
1847 */ | |
1848 if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { | |
1849 /* | |
1850 * Objsets may be open only because they're dirty, so we | |
1851 * have to force it to sync before checking spa_refcnt. | |
1852 */ | |
1853 spa_scrub_suspend(spa); | |
1854 txg_wait_synced(spa->spa_dsl_pool, 0); | |
1855 | |
1544 | 1856 /* |
1857 * A pool cannot be exported or destroyed if there are active | |
1858 * references. If we are resetting a pool, allow references by | |
1859 * fault injection handlers. | |
1860 */ | |
1861 if (!spa_refcount_zero(spa) || | |
1862 (spa->spa_inject_ref != 0 && | |
1863 new_state != POOL_STATE_UNINITIALIZED)) { | |
789 | 1864 spa_scrub_resume(spa); |
1544 | 1865 spa_async_resume(spa); |
789 | 1866 mutex_exit(&spa_namespace_lock); |
1867 return (EBUSY); | |
1868 } | |
1869 | |
1870 spa_scrub_resume(spa); | |
1871 VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); | |
1872 | |
1873 /* | |
1874 * We want this to be reflected on every label, | |
1875 * so mark them all dirty. spa_unload() will do the | |
1876 * final sync that pushes these changes out. | |
1877 */ | |
1544 | 1878 if (new_state != POOL_STATE_UNINITIALIZED) { |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1879 spa_config_enter(spa, RW_WRITER, FTAG); |
1544 | 1880 spa->spa_state = new_state; |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1881 spa->spa_final_txg = spa_last_synced_txg(spa) + 1; |
1544 | 1882 vdev_config_dirty(spa->spa_root_vdev); |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1883 spa_config_exit(spa, FTAG); |
1544 | 1884 } |
789 | 1885 } |
1886 | |
4451 | 1887 spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); |
1888 | |
789 | 1889 if (spa->spa_state != POOL_STATE_UNINITIALIZED) { |
1890 spa_unload(spa); | |
1891 spa_deactivate(spa); | |
1892 } | |
1893 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1894 if (oldconfig && spa->spa_config) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1895 VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1896 |
1544 | 1897 if (new_state != POOL_STATE_UNINITIALIZED) { |
1898 spa_remove(spa); | |
1899 spa_config_sync(); | |
1900 } | |
789 | 1901 mutex_exit(&spa_namespace_lock); |
1902 | |
1903 return (0); | |
1904 } | |
1905 | |
1906 /* | |
1907 * Destroy a storage pool. | |
1908 */ | |
1909 int | |
1910 spa_destroy(char *pool) | |
1911 { | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1912 return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL)); |
789 | 1913 } |
1914 | |
1915 /* | |
1916 * Export a storage pool. | |
1917 */ | |
1918 int | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1919 spa_export(char *pool, nvlist_t **oldconfig) |
789 | 1920 { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1921 return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig)); |
789 | 1922 } |
1923 | |
1924 /* | |
1544 | 1925 * Similar to spa_export(), this unloads the spa_t without actually removing it |
1926 * from the namespace in any way. | |
1927 */ | |
1928 int | |
1929 spa_reset(char *pool) | |
1930 { | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1931 return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL)); |
1544 | 1932 } |
1933 | |
1934 | |
1935 /* | |
789 | 1936 * ========================================================================== |
1937 * Device manipulation | |
1938 * ========================================================================== | |
1939 */ | |
1940 | |
1941 /* | |
4527 | 1942 * Add a device to a storage pool. |
789 | 1943 */ |
1944 int | |
1945 spa_vdev_add(spa_t *spa, nvlist_t *nvroot) | |
1946 { | |
1947 uint64_t txg; | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1948 int c, error; |
789 | 1949 vdev_t *rvd = spa->spa_root_vdev; |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1950 vdev_t *vd, *tvd; |
2082 | 1951 nvlist_t **spares; |
1952 uint_t i, nspares; | |
789 | 1953 |
1954 txg = spa_vdev_enter(spa); | |
1955 | |
2082 | 1956 if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, |
1957 VDEV_ALLOC_ADD)) != 0) | |
1958 return (spa_vdev_exit(spa, NULL, txg, error)); | |
1959 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1960 spa->spa_pending_vdev = vd; |
789 | 1961 |
2082 | 1962 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, |
1963 &spares, &nspares) != 0) | |
1964 nspares = 0; | |
1965 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1966 if (vd->vdev_children == 0 && nspares == 0) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1967 spa->spa_pending_vdev = NULL; |
2082 | 1968 return (spa_vdev_exit(spa, vd, txg, EINVAL)); |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1969 } |
2082 | 1970 |
1971 if (vd->vdev_children != 0) { | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1972 if ((error = vdev_create(vd, txg, B_FALSE)) != 0) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1973 spa->spa_pending_vdev = NULL; |
2082 | 1974 return (spa_vdev_exit(spa, vd, txg, error)); |
1975 } | |
1976 } | |
1977 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1978 /* |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1979 * We must validate the spares after checking the children. Otherwise, |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1980 * vdev_inuse() will blindly overwrite the spare. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1981 */ |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1982 if ((error = spa_validate_spares(spa, nvroot, txg, |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1983 VDEV_ALLOC_ADD)) != 0) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1984 spa->spa_pending_vdev = NULL; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1985 return (spa_vdev_exit(spa, vd, txg, error)); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1986 } |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1987 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1988 spa->spa_pending_vdev = NULL; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1989 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1990 /* |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1991 * Transfer each new top-level vdev from vd to rvd. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1992 */ |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1993 for (c = 0; c < vd->vdev_children; c++) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1994 tvd = vd->vdev_child[c]; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1995 vdev_remove_child(vd, tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1996 tvd->vdev_id = rvd->vdev_children; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1997 vdev_add_child(rvd, tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1998 vdev_config_dirty(tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1999 } |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2000 |
2082 | 2001 if (nspares != 0) { |
2002 if (spa->spa_sparelist != NULL) { | |
2003 nvlist_t **oldspares; | |
2004 uint_t oldnspares; | |
2005 nvlist_t **newspares; | |
2006 | |
2007 VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, | |
2008 ZPOOL_CONFIG_SPARES, &oldspares, &oldnspares) == 0); | |
2009 | |
2010 newspares = kmem_alloc(sizeof (void *) * | |
2011 (nspares + oldnspares), KM_SLEEP); | |
2012 for (i = 0; i < oldnspares; i++) | |
2013 VERIFY(nvlist_dup(oldspares[i], | |
2014 &newspares[i], KM_SLEEP) == 0); | |
2015 for (i = 0; i < nspares; i++) | |
2016 VERIFY(nvlist_dup(spares[i], | |
2017 &newspares[i + oldnspares], | |
2018 KM_SLEEP) == 0); | |
2019 | |
2020 VERIFY(nvlist_remove(spa->spa_sparelist, | |
2021 ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); | |
2022 | |
2023 VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, | |
2024 ZPOOL_CONFIG_SPARES, newspares, | |
2025 nspares + oldnspares) == 0); | |
2026 for (i = 0; i < oldnspares + nspares; i++) | |
2027 nvlist_free(newspares[i]); | |
2028 kmem_free(newspares, (oldnspares + nspares) * | |
2029 sizeof (void *)); | |
2030 } else { | |
2031 VERIFY(nvlist_alloc(&spa->spa_sparelist, | |
2032 NV_UNIQUE_NAME, KM_SLEEP) == 0); | |
2033 VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, | |
2034 ZPOOL_CONFIG_SPARES, spares, nspares) == 0); | |
2035 } | |
2036 | |
2037 spa_load_spares(spa); | |
2038 spa->spa_sync_spares = B_TRUE; | |
789 | 2039 } |
2040 | |
2041 /* | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2042 * We have to be careful when adding new vdevs to an existing pool. |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2043 * If other threads start allocating from these vdevs before we |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2044 * sync the config cache, and we lose power, then upon reboot we may |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2045 * fail to open the pool because there are DVAs that the config cache |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2046 * can't translate. Therefore, we first add the vdevs without |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2047 * initializing metaslabs; sync the config cache (via spa_vdev_exit()); |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2048 * and then let spa_config_update() initialize the new metaslabs. |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2049 * |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2050 * spa_load() checks for added-but-not-initialized vdevs, so that |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2051 * if we lose power at any point in this sequence, the remaining |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2052 * steps will be completed the next time we load the pool. |
789 | 2053 */ |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2054 (void) spa_vdev_exit(spa, vd, txg, 0); |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2055 |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2056 mutex_enter(&spa_namespace_lock); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2057 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2058 mutex_exit(&spa_namespace_lock); |
789 | 2059 |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2060 return (0); |
789 | 2061 } |
2062 | |
2063 /* | |
2064 * Attach a device to a mirror. The arguments are the path to any device | |
2065 * in the mirror, and the nvroot for the new device. If the path specifies | |
2066 * a device that is not mirrored, we automatically insert the mirror vdev. | |
2067 * | |
2068 * If 'replacing' is specified, the new device is intended to replace the | |
2069 * existing device; in this case the two devices are made into their own | |
4451 | 2070 * mirror using the 'replacing' vdev, which is functionally identical to |
789 | 2071 * the mirror vdev (it actually reuses all the same ops) but has a few |
2072 * extra rules: you can't attach to it after it's been created, and upon | |
2073 * completion of resilvering, the first disk (the one being replaced) | |
2074 * is automatically detached. | |
2075 */ | |
2076 int | |
1544 | 2077 spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) |
789 | 2078 { |
2079 uint64_t txg, open_txg; | |
2080 int error; | |
2081 vdev_t *rvd = spa->spa_root_vdev; | |
2082 vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; | |
2082 | 2083 vdev_ops_t *pvops; |
4527 | 2084 int is_log; |
789 | 2085 |
2086 txg = spa_vdev_enter(spa); | |
2087 | |
1544 | 2088 oldvd = vdev_lookup_by_guid(rvd, guid); |
789 | 2089 |
2090 if (oldvd == NULL) | |
2091 return (spa_vdev_exit(spa, NULL, txg, ENODEV)); | |
2092 | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2093 if (!oldvd->vdev_ops->vdev_op_leaf) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2094 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2095 |
789 | 2096 pvd = oldvd->vdev_parent; |
2097 | |
2082 | 2098 if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, |
4451 | 2099 VDEV_ALLOC_ADD)) != 0) |
2100 return (spa_vdev_exit(spa, NULL, txg, EINVAL)); | |
2101 | |
2102 if (newrootvd->vdev_children != 1) | |
789 | 2103 return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); |
2104 | |
2105 newvd = newrootvd->vdev_child[0]; | |
2106 | |
2107 if (!newvd->vdev_ops->vdev_op_leaf) | |
2108 return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); | |
2109 | |
2082 | 2110 if ((error = vdev_create(newrootvd, txg, replacing)) != 0) |
789 | 2111 return (spa_vdev_exit(spa, newrootvd, txg, error)); |
2112 | |
4527 | 2113 /* |
2114 * Spares can't replace logs | |
2115 */ | |
2116 is_log = oldvd->vdev_islog; | |
2117 if (is_log && newvd->vdev_isspare) | |
2118 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | |
2119 | |
2082 | 2120 if (!replacing) { |
2121 /* | |
2122 * For attach, the only allowable parent is a mirror or the root | |
2123 * vdev. | |
2124 */ | |
2125 if (pvd->vdev_ops != &vdev_mirror_ops && | |
2126 pvd->vdev_ops != &vdev_root_ops) | |
2127 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | |
2128 | |
2129 pvops = &vdev_mirror_ops; | |
2130 } else { | |
2131 /* | |
2132 * Active hot spares can only be replaced by inactive hot | |
2133 * spares. | |
2134 */ | |
2135 if (pvd->vdev_ops == &vdev_spare_ops && | |
2136 pvd->vdev_child[1] == oldvd && | |
2137 !spa_has_spare(spa, newvd->vdev_guid)) | |
2138 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | |
2139 | |
2140 /* | |
2141 * If the source is a hot spare, and the parent isn't already a | |
2142 * spare, then we want to create a new hot spare. Otherwise, we | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2143 * want to create a replacing vdev. The user is not allowed to |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2144 * attach to a spared vdev child unless the 'isspare' state is |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2145 * the same (spare replaces spare, non-spare replaces |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2146 * non-spare). |
2082 | 2147 */ |
2148 if (pvd->vdev_ops == &vdev_replacing_ops) | |
2149 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2150 else if (pvd->vdev_ops == &vdev_spare_ops && |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2151 newvd->vdev_isspare != oldvd->vdev_isspare) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2152 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); |
2082 | 2153 else if (pvd->vdev_ops != &vdev_spare_ops && |
2154 newvd->vdev_isspare) | |
2155 pvops = &vdev_spare_ops; | |
2156 else | |
2157 pvops = &vdev_replacing_ops; | |
2158 } | |
2159 | |
1175
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
2160 /* |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
2161 * Compare the new device size with the replaceable/attachable |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
2162 * device size. |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
2163 */ |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
2164 if (newvd->vdev_psize < vdev_get_rsize(oldvd)) |
789 | 2165 return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); |
2166 | |
1732 | 2167 /* |
2168 * The new device cannot have a higher alignment requirement | |
2169 * than the top-level vdev. | |
2170 */ | |
2171 if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) | |
789 | 2172 return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); |
2173 | |
2174 /* | |
2175 * If this is an in-place replacement, update oldvd's path and devid | |
2176 * to make it distinguishable from newvd, and unopenable from now on. | |
2177 */ | |
2178 if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { | |
2179 spa_strfree(oldvd->vdev_path); | |
2180 oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, | |
2181 KM_SLEEP); | |
2182 (void) sprintf(oldvd->vdev_path, "%s/%s", | |
2183 newvd->vdev_path, "old"); | |
2184 if (oldvd->vdev_devid != NULL) { | |
2185 spa_strfree(oldvd->vdev_devid); | |
2186 oldvd->vdev_devid = NULL; | |
2187 } | |
2188 } | |
2189 | |
2190 /* | |
2082 | 2191 * If the parent is not a mirror, or if we're replacing, insert the new |
2192 * mirror/replacing/spare vdev above oldvd. | |
789 | 2193 */ |
2194 if (pvd->vdev_ops != pvops) | |
2195 pvd = vdev_add_parent(oldvd, pvops); | |
2196 | |
2197 ASSERT(pvd->vdev_top->vdev_parent == rvd); | |
2198 ASSERT(pvd->vdev_ops == pvops); | |
2199 ASSERT(oldvd->vdev_parent == pvd); | |
2200 | |
2201 /* | |
2202 * Extract the new device from its root and add it to pvd. | |
2203 */ | |
2204 vdev_remove_child(newrootvd, newvd); | |
2205 newvd->vdev_id = pvd->vdev_children; | |
2206 vdev_add_child(pvd, newvd); | |
2207 | |
1544 | 2208 /* |
2209 * If newvd is smaller than oldvd, but larger than its rsize, | |
2210 * the addition of newvd may have decreased our parent's asize. | |
2211 */ | |
2212 pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize); | |
2213 | |
789 | 2214 tvd = newvd->vdev_top; |
2215 ASSERT(pvd->vdev_top == tvd); | |
2216 ASSERT(tvd->vdev_parent == rvd); | |
2217 | |
2218 vdev_config_dirty(tvd); | |
2219 | |
2220 /* | |
2221 * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate | |
2222 * upward when spa_vdev_exit() calls vdev_dtl_reassess(). | |
2223 */ | |
2224 open_txg = txg + TXG_CONCURRENT_STATES - 1; | |
2225 | |
2226 mutex_enter(&newvd->vdev_dtl_lock); | |
2227 space_map_add(&newvd->vdev_dtl_map, TXG_INITIAL, | |
2228 open_txg - TXG_INITIAL + 1); | |
2229 mutex_exit(&newvd->vdev_dtl_lock); | |
2230 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2231 if (newvd->vdev_isspare) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2232 spa_spare_activate(newvd); |
1544 | 2233 |
789 | 2234 /* |
2235 * Mark newvd's DTL dirty in this txg. | |
2236 */ | |
1732 | 2237 vdev_dirty(tvd, VDD_DTL, newvd, txg); |
789 | 2238 |
2239 (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); | |
2240 | |
2241 /* | |
4451 | 2242 * Kick off a resilver to update newvd. We need to grab the namespace |
2243 * lock because spa_scrub() needs to post a sysevent with the pool name. | |
789 | 2244 */ |
4451 | 2245 mutex_enter(&spa_namespace_lock); |
789 | 2246 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); |
4451 | 2247 mutex_exit(&spa_namespace_lock); |
789 | 2248 |
2249 return (0); | |
2250 } | |
2251 | |
2252 /* | |
2253 * Detach a device from a mirror or replacing vdev. | |
2254 * If 'replace_done' is specified, only detach if the parent | |
2255 * is a replacing vdev. | |
2256 */ | |
2257 int | |
1544 | 2258 spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) |
789 | 2259 { |
2260 uint64_t txg; | |
2261 int c, t, error; | |
2262 vdev_t *rvd = spa->spa_root_vdev; | |
2263 vdev_t *vd, *pvd, *cvd, *tvd; | |
2082 | 2264 boolean_t unspare = B_FALSE; |
2265 uint64_t unspare_guid; | |
789 | 2266 |
2267 txg = spa_vdev_enter(spa); | |
2268 | |
1544 | 2269 vd = vdev_lookup_by_guid(rvd, guid); |
789 | 2270 |
2271 if (vd == NULL) | |
2272 return (spa_vdev_exit(spa, NULL, txg, ENODEV)); | |
2273 | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2274 if (!vd->vdev_ops->vdev_op_leaf) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2275 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2276 |
789 | 2277 pvd = vd->vdev_parent; |
2278 | |
2279 /* | |
2280 * If replace_done is specified, only remove this device if it's | |
2082 | 2281 * the first child of a replacing vdev. For the 'spare' vdev, either |
2282 * disk can be removed. | |
789 | 2283 */ |
2082 | 2284 if (replace_done) { |
2285 if (pvd->vdev_ops == &vdev_replacing_ops) { | |
2286 if (vd->vdev_id != 0) | |
2287 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); | |
2288 } else if (pvd->vdev_ops != &vdev_spare_ops) { | |
2289 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); | |
2290 } | |
2291 } | |
2292 | |
2293 ASSERT(pvd->vdev_ops != &vdev_spare_ops || | |
4577 | 2294 spa_version(spa) >= SPA_VERSION_SPARES); |
789 | 2295 |
2296 /* | |
2082 | 2297 * Only mirror, replacing, and spare vdevs support detach. |
789 | 2298 */ |
2299 if (pvd->vdev_ops != &vdev_replacing_ops && | |
2082 | 2300 pvd->vdev_ops != &vdev_mirror_ops && |
2301 pvd->vdev_ops != &vdev_spare_ops) | |
789 | 2302 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
2303 | |
2304 /* | |
2305 * If there's only one replica, you can't detach it. | |
2306 */ | |
2307 if (pvd->vdev_children <= 1) | |
2308 return (spa_vdev_exit(spa, NULL, txg, EBUSY)); | |
2309 | |
2310 /* | |
2311 * If all siblings have non-empty DTLs, this device may have the only | |
2312 * valid copy of the data, which means we cannot safely detach it. | |
2313 * | |
2314 * XXX -- as in the vdev_offline() case, we really want a more | |
2315 * precise DTL check. | |
2316 */ | |
2317 for (c = 0; c < pvd->vdev_children; c++) { | |
2318 uint64_t dirty; | |
2319 | |
2320 cvd = pvd->vdev_child[c]; | |
2321 if (cvd == vd) | |
2322 continue; | |
2323 if (vdev_is_dead(cvd)) | |
2324 continue; | |
2325 mutex_enter(&cvd->vdev_dtl_lock); | |
2326 dirty = cvd->vdev_dtl_map.sm_space | | |
2327 cvd->vdev_dtl_scrub.sm_space; | |
2328 mutex_exit(&cvd->vdev_dtl_lock); | |
2329 if (!dirty) | |
2330 break; | |
2331 } | |
2082 | 2332 |
2333 /* | |
2334 * If we are a replacing or spare vdev, then we can always detach the | |
2335 * latter child, as that is how one cancels the operation. | |
2336 */ | |
2337 if ((pvd->vdev_ops == &vdev_mirror_ops || vd->vdev_id != 1) && | |
2338 c == pvd->vdev_children) | |
789 | 2339 return (spa_vdev_exit(spa, NULL, txg, EBUSY)); |
2340 | |
2341 /* | |
2082 | 2342 * If we are detaching the original disk from a spare, then it implies |
2343 * that the spare should become a real disk, and be removed from the | |
2344 * active spare list for the pool. | |
2345 */ | |
2346 if (pvd->vdev_ops == &vdev_spare_ops && | |
2347 vd->vdev_id == 0) | |
2348 unspare = B_TRUE; | |
2349 | |
2350 /* | |
789 | 2351 * Erase the disk labels so the disk can be used for other things. |
2352 * This must be done after all other error cases are handled, | |
2353 * but before we disembowel vd (so we can still do I/O to it). | |
2354 * But if we can't do it, don't treat the error as fatal -- | |
2355 * it may be that the unwritability of the disk is the reason | |
2356 * it's being detached! | |
2357 */ | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2358 error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); |
789 | 2359 |
2360 /* | |
2361 * Remove vd from its parent and compact the parent's children. | |
2362 */ | |
2363 vdev_remove_child(pvd, vd); | |
2364 vdev_compact_children(pvd); | |
2365 | |
2366 /* | |
2367 * Remember one of the remaining children so we can get tvd below. | |
2368 */ | |
2369 cvd = pvd->vdev_child[0]; | |
2370 | |
2371 /* | |
2082 | 2372 * If we need to remove the remaining child from the list of hot spares, |
2373 * do it now, marking the vdev as no longer a spare in the process. We | |
2374 * must do this before vdev_remove_parent(), because that can change the | |
2375 * GUID if it creates a new toplevel GUID. | |
2376 */ | |
2377 if (unspare) { | |
2378 ASSERT(cvd->vdev_isspare); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2379 spa_spare_remove(cvd); |
2082 | 2380 unspare_guid = cvd->vdev_guid; |
2381 } | |
2382 | |
2383 /* | |
789 | 2384 * If the parent mirror/replacing vdev only has one child, |
2385 * the parent is no longer needed. Remove it from the tree. | |
2386 */ | |
2387 if (pvd->vdev_children == 1) | |
2388 vdev_remove_parent(cvd); | |
2389 | |
2390 /* | |
2391 * We don't set tvd until now because the parent we just removed | |
2392 * may have been the previous top-level vdev. | |
2393 */ | |
2394 tvd = cvd->vdev_top; | |
2395 ASSERT(tvd->vdev_parent == rvd); | |
2396 | |
2397 /* | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2398 * Reevaluate the parent vdev state. |
789 | 2399 */ |
4451 | 2400 vdev_propagate_state(cvd); |
789 | 2401 |
2402 /* | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2403 * If the device we just detached was smaller than the others, it may be |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2404 * possible to add metaslabs (i.e. grow the pool). vdev_metaslab_init() |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2405 * can't fail because the existing metaslabs are already in core, so |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2406 * there's nothing to read from disk. |
789 | 2407 */ |
1732 | 2408 VERIFY(vdev_metaslab_init(tvd, txg) == 0); |
789 | 2409 |
2410 vdev_config_dirty(tvd); | |
2411 | |
2412 /* | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2413 * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2414 * vd->vdev_detached is set and free vd's DTL object in syncing context. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2415 * But first make sure we're not on any *other* txg's DTL list, to |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2416 * prevent vd from being accessed after it's freed. |
789 | 2417 */ |
2418 for (t = 0; t < TXG_SIZE; t++) | |
2419 (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); | |
1732 | 2420 vd->vdev_detached = B_TRUE; |
2421 vdev_dirty(tvd, VDD_DTL, vd, txg); | |
789 | 2422 |
4451 | 2423 spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); |
2424 | |
2082 | 2425 error = spa_vdev_exit(spa, vd, txg, 0); |
2426 | |
2427 /* | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2428 * If this was the removal of the original device in a hot spare vdev, |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2429 * then we want to go through and remove the device from the hot spare |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2430 * list of every other pool. |
2082 | 2431 */ |
2432 if (unspare) { | |
2433 spa = NULL; | |
2434 mutex_enter(&spa_namespace_lock); | |
2435 while ((spa = spa_next(spa)) != NULL) { | |
2436 if (spa->spa_state != POOL_STATE_ACTIVE) | |
2437 continue; | |
2438 | |
2439 (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); | |
2440 } | |
2441 mutex_exit(&spa_namespace_lock); | |
2442 } | |
2443 | |
2444 return (error); | |
2445 } | |
2446 | |
2447 /* | |
2448 * Remove a device from the pool. Currently, this supports removing only hot | |
2449 * spares. | |
2450 */ | |
2451 int | |
2452 spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) | |
2453 { | |
2454 vdev_t *vd; | |
2455 nvlist_t **spares, *nv, **newspares; | |
2456 uint_t i, j, nspares; | |
2457 int ret = 0; | |
2458 | |
2459 spa_config_enter(spa, RW_WRITER, FTAG); | |
2460 | |
2461 vd = spa_lookup_by_guid(spa, guid); | |
2462 | |
2463 nv = NULL; | |
2464 if (spa->spa_spares != NULL && | |
2465 nvlist_lookup_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, | |
2466 &spares, &nspares) == 0) { | |
2467 for (i = 0; i < nspares; i++) { | |
2468 uint64_t theguid; | |
2469 | |
2470 VERIFY(nvlist_lookup_uint64(spares[i], | |
2471 ZPOOL_CONFIG_GUID, &theguid) == 0); | |
2472 if (theguid == guid) { | |
2473 nv = spares[i]; | |
2474 break; | |
2475 } | |
2476 } | |
2477 } | |
2478 | |
2479 /* | |
2480 * We only support removing a hot spare, and only if it's not currently | |
2481 * in use in this pool. | |
2482 */ | |
2483 if (nv == NULL && vd == NULL) { | |
2484 ret = ENOENT; | |
2485 goto out; | |
2486 } | |
2487 | |
2488 if (nv == NULL && vd != NULL) { | |
2489 ret = ENOTSUP; | |
2490 goto out; | |
2491 } | |
2492 | |
2493 if (!unspare && nv != NULL && vd != NULL) { | |
2494 ret = EBUSY; | |
2495 goto out; | |
2496 } | |
2497 | |
2498 if (nspares == 1) { | |
2499 newspares = NULL; | |
2500 } else { | |
2501 newspares = kmem_alloc((nspares - 1) * sizeof (void *), | |
2502 KM_SLEEP); | |
2503 for (i = 0, j = 0; i < nspares; i++) { | |
2504 if (spares[i] != nv) | |
2505 VERIFY(nvlist_dup(spares[i], | |
2506 &newspares[j++], KM_SLEEP) == 0); | |
2507 } | |
2508 } | |
2509 | |
2510 VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, | |
2511 DATA_TYPE_NVLIST_ARRAY) == 0); | |
2512 VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, | |
2513 newspares, nspares - 1) == 0); | |
2514 for (i = 0; i < nspares - 1; i++) | |
2515 nvlist_free(newspares[i]); | |
2516 kmem_free(newspares, (nspares - 1) * sizeof (void *)); | |
2517 spa_load_spares(spa); | |
2518 spa->spa_sync_spares = B_TRUE; | |
2519 | |
2520 out: | |
2521 spa_config_exit(spa, FTAG); | |
2522 | |
2523 return (ret); | |
789 | 2524 } |
2525 | |
2526 /* | |
4451 | 2527 * Find any device that's done replacing, or a vdev marked 'unspare' that's |
2528 * current spared, so we can detach it. | |
789 | 2529 */ |
1544 | 2530 static vdev_t * |
4451 | 2531 spa_vdev_resilver_done_hunt(vdev_t *vd) |
789 | 2532 { |
1544 | 2533 vdev_t *newvd, *oldvd; |
789 | 2534 int c; |
2535 | |
1544 | 2536 for (c = 0; c < vd->vdev_children; c++) { |
4451 | 2537 oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); |
1544 | 2538 if (oldvd != NULL) |
2539 return (oldvd); | |
2540 } | |
789 | 2541 |
4451 | 2542 /* |
2543 * Check for a completed replacement. | |
2544 */ | |
789 | 2545 if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { |
1544 | 2546 oldvd = vd->vdev_child[0]; |
2547 newvd = vd->vdev_child[1]; | |
789 | 2548 |
1544 | 2549 mutex_enter(&newvd->vdev_dtl_lock); |
2550 if (newvd->vdev_dtl_map.sm_space == 0 && | |
2551 newvd->vdev_dtl_scrub.sm_space == 0) { | |
2552 mutex_exit(&newvd->vdev_dtl_lock); | |
2553 return (oldvd); | |
2554 } | |
2555 mutex_exit(&newvd->vdev_dtl_lock); | |
2556 } | |
789 | 2557 |
4451 | 2558 /* |
2559 * Check for a completed resilver with the 'unspare' flag set. | |
2560 */ | |
2561 if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { | |
2562 newvd = vd->vdev_child[0]; | |
2563 oldvd = vd->vdev_child[1]; | |
2564 | |
2565 mutex_enter(&newvd->vdev_dtl_lock); | |
2566 if (newvd->vdev_unspare && | |
2567 newvd->vdev_dtl_map.sm_space == 0 && | |
2568 newvd->vdev_dtl_scrub.sm_space == 0) { | |
2569 newvd->vdev_unspare = 0; | |
2570 mutex_exit(&newvd->vdev_dtl_lock); | |
2571 return (oldvd); | |
2572 } | |
2573 mutex_exit(&newvd->vdev_dtl_lock); | |
2574 } | |
2575 | |
1544 | 2576 return (NULL); |
789 | 2577 } |
2578 | |
1544 | 2579 static void |
4451 | 2580 spa_vdev_resilver_done(spa_t *spa) |
789 | 2581 { |
1544 | 2582 vdev_t *vd; |
2082 | 2583 vdev_t *pvd; |
1544 | 2584 uint64_t guid; |
2082 | 2585 uint64_t pguid = 0; |
789 | 2586 |
1544 | 2587 spa_config_enter(spa, RW_READER, FTAG); |
789 | 2588 |
4451 | 2589 while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { |
1544 | 2590 guid = vd->vdev_guid; |
2082 | 2591 /* |
2592 * If we have just finished replacing a hot spared device, then | |
2593 * we need to detach the parent's first child (the original hot | |
2594 * spare) as well. | |
2595 */ | |
2596 pvd = vd->vdev_parent; | |
2597 if (pvd->vdev_parent->vdev_ops == &vdev_spare_ops && | |
2598 pvd->vdev_id == 0) { | |
2599 ASSERT(pvd->vdev_ops == &vdev_replacing_ops); | |
2600 ASSERT(pvd->vdev_parent->vdev_children == 2); | |
2601 pguid = pvd->vdev_parent->vdev_child[1]->vdev_guid; | |
2602 } | |
1544 | 2603 spa_config_exit(spa, FTAG); |
2604 if (spa_vdev_detach(spa, guid, B_TRUE) != 0) | |
2605 return; | |
2082 | 2606 if (pguid != 0 && spa_vdev_detach(spa, pguid, B_TRUE) != 0) |
2607 return; | |
1544 | 2608 spa_config_enter(spa, RW_READER, FTAG); |
789 | 2609 } |
2610 | |
1544 | 2611 spa_config_exit(spa, FTAG); |
789 | 2612 } |
2613 | |
2614 /* | |
1354
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2615 * Update the stored path for this vdev. Dirty the vdev configuration, relying |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2616 * on spa_vdev_enter/exit() to synchronize the labels and cache. |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2617 */ |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2618 int |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2619 spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2620 { |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2621 vdev_t *rvd, *vd; |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2622 uint64_t txg; |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2623 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2624 rvd = spa->spa_root_vdev; |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2625 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2626 txg = spa_vdev_enter(spa); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2627 |
2082 | 2628 if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) { |
2629 /* | |
2630 * Determine if this is a reference to a hot spare. In that | |
2631 * case, update the path as stored in the spare list. | |
2632 */ | |
2633 nvlist_t **spares; | |
2634 uint_t i, nspares; | |
2635 if (spa->spa_sparelist != NULL) { | |
2636 VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, | |
2637 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); | |
2638 for (i = 0; i < nspares; i++) { | |
2639 uint64_t theguid; | |
2640 VERIFY(nvlist_lookup_uint64(spares[i], | |
2641 ZPOOL_CONFIG_GUID, &theguid) == 0); | |
2642 if (theguid == guid) | |
2643 break; | |
2644 } | |
2645 | |
2646 if (i == nspares) | |
2647 return (spa_vdev_exit(spa, NULL, txg, ENOENT)); | |
2648 | |
2649 VERIFY(nvlist_add_string(spares[i], | |
2650 ZPOOL_CONFIG_PATH, newpath) == 0); | |
2651 spa_load_spares(spa); | |
2652 spa->spa_sync_spares = B_TRUE; | |
2653 return (spa_vdev_exit(spa, NULL, txg, 0)); | |
2654 } else { | |
2655 return (spa_vdev_exit(spa, NULL, txg, ENOENT)); | |
2656 } | |
2657 } | |
1354
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2658 |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2659 if (!vd->vdev_ops->vdev_op_leaf) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2660 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2661 |
1354
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2662 spa_strfree(vd->vdev_path); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2663 vd->vdev_path = spa_strdup(newpath); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2664 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2665 vdev_config_dirty(vd->vdev_top); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2666 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2667 return (spa_vdev_exit(spa, NULL, txg, 0)); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2668 } |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2669 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2670 /* |
789 | 2671 * ========================================================================== |
2672 * SPA Scrubbing | |
2673 * ========================================================================== | |
2674 */ | |
2675 | |
2676 static void | |
2677 spa_scrub_io_done(zio_t *zio) | |
2678 { | |
2679 spa_t *spa = zio->io_spa; | |
2680 | |
4309
3dfde0f4662d
6542676 ARC needs to track meta-data memory overhead
maybee
parents:
4178
diff
changeset
|
2681 arc_data_buf_free(zio->io_data, zio->io_size); |
789 | 2682 |
2683 mutex_enter(&spa->spa_scrub_lock); | |
1544 | 2684 if (zio->io_error && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2685 vdev_t *vd = zio->io_vd ? zio->io_vd : spa->spa_root_vdev; |
789 | 2686 spa->spa_scrub_errors++; |
2687 mutex_enter(&vd->vdev_stat_lock); | |
2688 vd->vdev_stat.vs_scrub_errors++; | |
2689 mutex_exit(&vd->vdev_stat_lock); | |
2690 } | |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2691 |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2692 if (--spa->spa_scrub_inflight < spa->spa_scrub_maxinflight) |
1544 | 2693 cv_broadcast(&spa->spa_scrub_io_cv); |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2694 |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2695 ASSERT(spa->spa_scrub_inflight >= 0); |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2696 |
1544 | 2697 mutex_exit(&spa->spa_scrub_lock); |
789 | 2698 } |
2699 | |
2700 static void | |
1544 | 2701 spa_scrub_io_start(spa_t *spa, blkptr_t *bp, int priority, int flags, |
2702 zbookmark_t *zb) | |
789 | 2703 { |
2704 size_t size = BP_GET_LSIZE(bp); | |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2705 void *data; |
789 | 2706 |
2707 mutex_enter(&spa->spa_scrub_lock); | |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2708 /* |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2709 * Do not give too much work to vdev(s). |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2710 */ |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2711 while (spa->spa_scrub_inflight >= spa->spa_scrub_maxinflight) { |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2712 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2713 } |
789 | 2714 spa->spa_scrub_inflight++; |
2715 mutex_exit(&spa->spa_scrub_lock); | |
2716 | |
4309
3dfde0f4662d
6542676 ARC needs to track meta-data memory overhead
maybee
parents:
4178
diff
changeset
|
2717 data = arc_data_buf_alloc(size); |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2718 |
1544 | 2719 if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET) |
2720 flags |= ZIO_FLAG_SPECULATIVE; /* intent log block */ | |
2721 | |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
2722 flags |= ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_CANFAIL; |
1544 | 2723 |
789 | 2724 zio_nowait(zio_read(NULL, spa, bp, data, size, |
1544 | 2725 spa_scrub_io_done, NULL, priority, flags, zb)); |
789 | 2726 } |
2727 | |
2728 /* ARGSUSED */ | |
2729 static int | |
2730 spa_scrub_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) | |
2731 { | |
2732 blkptr_t *bp = &bc->bc_blkptr; | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2733 vdev_t *vd = spa->spa_root_vdev; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2734 dva_t *dva = bp->blk_dva; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2735 int needs_resilver = B_FALSE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2736 int d; |
789 | 2737 |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2738 if (bc->bc_errno) { |
789 | 2739 /* |
2740 * We can't scrub this block, but we can continue to scrub | |
2741 * the rest of the pool. Note the error and move along. | |
2742 */ | |
2743 mutex_enter(&spa->spa_scrub_lock); | |
2744 spa->spa_scrub_errors++; | |
2745 mutex_exit(&spa->spa_scrub_lock); | |
2746 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2747 mutex_enter(&vd->vdev_stat_lock); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2748 vd->vdev_stat.vs_scrub_errors++; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2749 mutex_exit(&vd->vdev_stat_lock); |
789 | 2750 |
2751 return (ERESTART); | |
2752 } | |
2753 | |
2754 ASSERT(bp->blk_birth < spa->spa_scrub_maxtxg); | |
2755 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2756 for (d = 0; d < BP_GET_NDVAS(bp); d++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2757 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d])); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2758 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2759 ASSERT(vd != NULL); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2760 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2761 /* |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2762 * Keep track of how much data we've examined so that |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2763 * zpool(1M) status can make useful progress reports. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2764 */ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2765 mutex_enter(&vd->vdev_stat_lock); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2766 vd->vdev_stat.vs_scrub_examined += DVA_GET_ASIZE(&dva[d]); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2767 mutex_exit(&vd->vdev_stat_lock); |
789 | 2768 |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2769 if (spa->spa_scrub_type == POOL_SCRUB_RESILVER) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2770 if (DVA_GET_GANG(&dva[d])) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2771 /* |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2772 * Gang members may be spread across multiple |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2773 * vdevs, so the best we can do is look at the |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2774 * pool-wide DTL. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2775 * XXX -- it would be better to change our |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2776 * allocation policy to ensure that this can't |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2777 * happen. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2778 */ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2779 vd = spa->spa_root_vdev; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2780 } |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2781 if (vdev_dtl_contains(&vd->vdev_dtl_map, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2782 bp->blk_birth, 1)) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2783 needs_resilver = B_TRUE; |
789 | 2784 } |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2785 } |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2786 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2787 if (spa->spa_scrub_type == POOL_SCRUB_EVERYTHING) |
789 | 2788 spa_scrub_io_start(spa, bp, ZIO_PRIORITY_SCRUB, |
1544 | 2789 ZIO_FLAG_SCRUB, &bc->bc_bookmark); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2790 else if (needs_resilver) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2791 spa_scrub_io_start(spa, bp, ZIO_PRIORITY_RESILVER, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2792 ZIO_FLAG_RESILVER, &bc->bc_bookmark); |
789 | 2793 |
2794 return (0); | |
2795 } | |
2796 | |
2797 static void | |
2798 spa_scrub_thread(spa_t *spa) | |
2799 { | |
2800 callb_cpr_t cprinfo; | |
2801 traverse_handle_t *th = spa->spa_scrub_th; | |
2802 vdev_t *rvd = spa->spa_root_vdev; | |
2803 pool_scrub_type_t scrub_type = spa->spa_scrub_type; | |
2804 int error = 0; | |
2805 boolean_t complete; | |
2806 | |
2807 CALLB_CPR_INIT(&cprinfo, &spa->spa_scrub_lock, callb_generic_cpr, FTAG); | |
2808 | |
797
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
2809 /* |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
2810 * If we're restarting due to a snapshot create/delete, |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
2811 * wait for that to complete. |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
2812 */ |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
2813 txg_wait_synced(spa_get_dsl(spa), 0); |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
2814 |
1544 | 2815 dprintf("start %s mintxg=%llu maxtxg=%llu\n", |
2816 scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", | |
2817 spa->spa_scrub_mintxg, spa->spa_scrub_maxtxg); | |
2818 | |
2819 spa_config_enter(spa, RW_WRITER, FTAG); | |
2820 vdev_reopen(rvd); /* purge all vdev caches */ | |
789 | 2821 vdev_config_dirty(rvd); /* rewrite all disk labels */ |
2822 vdev_scrub_stat_update(rvd, scrub_type, B_FALSE); | |
1544 | 2823 spa_config_exit(spa, FTAG); |
789 | 2824 |
2825 mutex_enter(&spa->spa_scrub_lock); | |
2826 spa->spa_scrub_errors = 0; | |
2827 spa->spa_scrub_active = 1; | |
1544 | 2828 ASSERT(spa->spa_scrub_inflight == 0); |
789 | 2829 |
2830 while (!spa->spa_scrub_stop) { | |
2831 CALLB_CPR_SAFE_BEGIN(&cprinfo); | |
1544 | 2832 while (spa->spa_scrub_suspended) { |
789 | 2833 spa->spa_scrub_active = 0; |
2834 cv_broadcast(&spa->spa_scrub_cv); | |
2835 cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); | |
2836 spa->spa_scrub_active = 1; | |
2837 } | |
2838 CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_scrub_lock); | |
2839 | |
2840 if (spa->spa_scrub_restart_txg != 0) | |
2841 break; | |
2842 | |
2843 mutex_exit(&spa->spa_scrub_lock); | |
2844 error = traverse_more(th); | |
2845 mutex_enter(&spa->spa_scrub_lock); | |
2846 if (error != EAGAIN) | |
2847 break; | |
2848 } | |
2849 | |
2850 while (spa->spa_scrub_inflight) | |
2851 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); | |
2852 | |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2853 spa->spa_scrub_active = 0; |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2854 cv_broadcast(&spa->spa_scrub_cv); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2855 |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2856 mutex_exit(&spa->spa_scrub_lock); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2857 |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2858 spa_config_enter(spa, RW_WRITER, FTAG); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2859 |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2860 mutex_enter(&spa->spa_scrub_lock); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2861 |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2862 /* |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2863 * Note: we check spa_scrub_restart_txg under both spa_scrub_lock |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2864 * AND the spa config lock to synchronize with any config changes |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2865 * that revise the DTLs under spa_vdev_enter() / spa_vdev_exit(). |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2866 */ |
789 | 2867 if (spa->spa_scrub_restart_txg != 0) |
2868 error = ERESTART; | |
2869 | |
1544 | 2870 if (spa->spa_scrub_stop) |
2871 error = EINTR; | |
2872 | |
789 | 2873 /* |
1544 | 2874 * Even if there were uncorrectable errors, we consider the scrub |
2875 * completed. The downside is that if there is a transient error during | |
2876 * a resilver, we won't resilver the data properly to the target. But | |
2877 * if the damage is permanent (more likely) we will resilver forever, | |
2878 * which isn't really acceptable. Since there is enough information for | |
2879 * the user to know what has failed and why, this seems like a more | |
2880 * tractable approach. | |
789 | 2881 */ |
1544 | 2882 complete = (error == 0); |
789 | 2883 |
1544 | 2884 dprintf("end %s to maxtxg=%llu %s, traverse=%d, %llu errors, stop=%u\n", |
2885 scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", | |
789 | 2886 spa->spa_scrub_maxtxg, complete ? "done" : "FAILED", |
2887 error, spa->spa_scrub_errors, spa->spa_scrub_stop); | |
2888 | |
2889 mutex_exit(&spa->spa_scrub_lock); | |
2890 | |
2891 /* | |
2892 * If the scrub/resilver completed, update all DTLs to reflect this. | |
2893 * Whether it succeeded or not, vacate all temporary scrub DTLs. | |
2894 */ | |
2895 vdev_dtl_reassess(rvd, spa_last_synced_txg(spa) + 1, | |
2896 complete ? spa->spa_scrub_maxtxg : 0, B_TRUE); | |
2897 vdev_scrub_stat_update(rvd, POOL_SCRUB_NONE, complete); | |
1544 | 2898 spa_errlog_rotate(spa); |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2899 |
4451 | 2900 if (scrub_type == POOL_SCRUB_RESILVER && complete) |
2901 spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_FINISH); | |
2902 | |
1544 | 2903 spa_config_exit(spa, FTAG); |
789 | 2904 |
2905 mutex_enter(&spa->spa_scrub_lock); | |
2906 | |
1544 | 2907 /* |
2908 * We may have finished replacing a device. | |
2909 * Let the async thread assess this and handle the detach. | |
2910 */ | |
4451 | 2911 spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); |
789 | 2912 |
2913 /* | |
2914 * If we were told to restart, our final act is to start a new scrub. | |
2915 */ | |
2916 if (error == ERESTART) | |
1544 | 2917 spa_async_request(spa, scrub_type == POOL_SCRUB_RESILVER ? |
2918 SPA_ASYNC_RESILVER : SPA_ASYNC_SCRUB); | |
789 | 2919 |
1544 | 2920 spa->spa_scrub_type = POOL_SCRUB_NONE; |
2921 spa->spa_scrub_active = 0; | |
2922 spa->spa_scrub_thread = NULL; | |
2923 cv_broadcast(&spa->spa_scrub_cv); | |
789 | 2924 CALLB_CPR_EXIT(&cprinfo); /* drops &spa->spa_scrub_lock */ |
2925 thread_exit(); | |
2926 } | |
2927 | |
2928 void | |
2929 spa_scrub_suspend(spa_t *spa) | |
2930 { | |
2931 mutex_enter(&spa->spa_scrub_lock); | |
1544 | 2932 spa->spa_scrub_suspended++; |
789 | 2933 while (spa->spa_scrub_active) { |
2934 cv_broadcast(&spa->spa_scrub_cv); | |
2935 cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); | |
2936 } | |
2937 while (spa->spa_scrub_inflight) | |
2938 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); | |
2939 mutex_exit(&spa->spa_scrub_lock); | |
2940 } | |
2941 | |
2942 void | |
2943 spa_scrub_resume(spa_t *spa) | |
2944 { | |
2945 mutex_enter(&spa->spa_scrub_lock); | |
1544 | 2946 ASSERT(spa->spa_scrub_suspended != 0); |
2947 if (--spa->spa_scrub_suspended == 0) | |
789 | 2948 cv_broadcast(&spa->spa_scrub_cv); |
2949 mutex_exit(&spa->spa_scrub_lock); | |
2950 } | |
2951 | |
2952 void | |
2953 spa_scrub_restart(spa_t *spa, uint64_t txg) | |
2954 { | |
2955 /* | |
2956 * Something happened (e.g. snapshot create/delete) that means | |
2957 * we must restart any in-progress scrubs. The itinerary will | |
2958 * fix this properly. | |
2959 */ | |
2960 mutex_enter(&spa->spa_scrub_lock); | |
2961 spa->spa_scrub_restart_txg = txg; | |
2962 mutex_exit(&spa->spa_scrub_lock); | |
2963 } | |
2964 | |
1544 | 2965 int |
2966 spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force) | |
789 | 2967 { |
2968 space_seg_t *ss; | |
2969 uint64_t mintxg, maxtxg; | |
2970 vdev_t *rvd = spa->spa_root_vdev; | |
2971 | |
4808 | 2972 ASSERT(MUTEX_HELD(&spa_namespace_lock)); |
2973 ASSERT(!spa_config_held(spa, RW_WRITER)); | |
2974 | |
789 | 2975 if ((uint_t)type >= POOL_SCRUB_TYPES) |
2976 return (ENOTSUP); | |
2977 | |
1544 | 2978 mutex_enter(&spa->spa_scrub_lock); |
2979 | |
789 | 2980 /* |
2981 * If there's a scrub or resilver already in progress, stop it. | |
2982 */ | |
2983 while (spa->spa_scrub_thread != NULL) { | |
2984 /* | |
2985 * Don't stop a resilver unless forced. | |
2986 */ | |
1544 | 2987 if (spa->spa_scrub_type == POOL_SCRUB_RESILVER && !force) { |
2988 mutex_exit(&spa->spa_scrub_lock); | |
789 | 2989 return (EBUSY); |
1544 | 2990 } |
789 | 2991 spa->spa_scrub_stop = 1; |
2992 cv_broadcast(&spa->spa_scrub_cv); | |
2993 cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); | |
2994 } | |
2995 | |
2996 /* | |
2997 * Terminate the previous traverse. | |
2998 */ | |
2999 if (spa->spa_scrub_th != NULL) { | |
3000 traverse_fini(spa->spa_scrub_th); | |
3001 spa->spa_scrub_th = NULL; | |
3002 } | |
3003 | |
1544 | 3004 if (rvd == NULL) { |
3005 ASSERT(spa->spa_scrub_stop == 0); | |
3006 ASSERT(spa->spa_scrub_type == type); | |
3007 ASSERT(spa->spa_scrub_restart_txg == 0); | |
3008 mutex_exit(&spa->spa_scrub_lock); | |
3009 return (0); | |
3010 } | |
789 | 3011 |
3012 mintxg = TXG_INITIAL - 1; | |
3013 maxtxg = spa_last_synced_txg(spa) + 1; | |
3014 | |
1544 | 3015 mutex_enter(&rvd->vdev_dtl_lock); |
789 | 3016 |
1544 | 3017 if (rvd->vdev_dtl_map.sm_space == 0) { |
3018 /* | |
3019 * The pool-wide DTL is empty. | |
1732 | 3020 * If this is a resilver, there's nothing to do except |
3021 * check whether any in-progress replacements have completed. | |
1544 | 3022 */ |
1732 | 3023 if (type == POOL_SCRUB_RESILVER) { |
1544 | 3024 type = POOL_SCRUB_NONE; |
4451 | 3025 spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); |
1732 | 3026 } |
1544 | 3027 } else { |
3028 /* | |
3029 * The pool-wide DTL is non-empty. | |
3030 * If this is a normal scrub, upgrade to a resilver instead. | |
3031 */ | |
3032 if (type == POOL_SCRUB_EVERYTHING) | |
3033 type = POOL_SCRUB_RESILVER; | |
3034 } | |
789 | 3035 |
1544 | 3036 if (type == POOL_SCRUB_RESILVER) { |
789 | 3037 /* |
3038 * Determine the resilvering boundaries. | |
3039 * | |
3040 * Note: (mintxg, maxtxg) is an open interval, | |
3041 * i.e. mintxg and maxtxg themselves are not included. | |
3042 * | |
3043 * Note: for maxtxg, we MIN with spa_last_synced_txg(spa) + 1 | |
3044 * so we don't claim to resilver a txg that's still changing. | |
3045 */ | |
3046 ss = avl_first(&rvd->vdev_dtl_map.sm_root); | |
1544 | 3047 mintxg = ss->ss_start - 1; |
789 | 3048 ss = avl_last(&rvd->vdev_dtl_map.sm_root); |
1544 | 3049 maxtxg = MIN(ss->ss_end, maxtxg); |
4451 | 3050 |
3051 spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START); | |
789 | 3052 } |
3053 | |
1544 | 3054 mutex_exit(&rvd->vdev_dtl_lock); |
3055 | |
3056 spa->spa_scrub_stop = 0; | |
3057 spa->spa_scrub_type = type; | |
3058 spa->spa_scrub_restart_txg = 0; | |
3059 | |
3060 if (type != POOL_SCRUB_NONE) { | |
3061 spa->spa_scrub_mintxg = mintxg; | |
789 | 3062 spa->spa_scrub_maxtxg = maxtxg; |
3063 spa->spa_scrub_th = traverse_init(spa, spa_scrub_cb, NULL, | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3064 ADVANCE_PRE | ADVANCE_PRUNE | ADVANCE_ZIL, |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3065 ZIO_FLAG_CANFAIL); |
789 | 3066 traverse_add_pool(spa->spa_scrub_th, mintxg, maxtxg); |
3067 spa->spa_scrub_thread = thread_create(NULL, 0, | |
3068 spa_scrub_thread, spa, 0, &p0, TS_RUN, minclsyspri); | |
3069 } | |
3070 | |
1544 | 3071 mutex_exit(&spa->spa_scrub_lock); |
3072 | |
789 | 3073 return (0); |
3074 } | |
3075 | |
1544 | 3076 /* |
3077 * ========================================================================== | |
3078 * SPA async task processing | |
3079 * ========================================================================== | |
3080 */ | |
3081 | |
3082 static void | |
4451 | 3083 spa_async_remove(spa_t *spa, vdev_t *vd) |
789 | 3084 { |
1544 | 3085 vdev_t *tvd; |
3086 int c; | |
3087 | |
4451 | 3088 for (c = 0; c < vd->vdev_children; c++) { |
3089 tvd = vd->vdev_child[c]; | |
3090 if (tvd->vdev_remove_wanted) { | |
3091 tvd->vdev_remove_wanted = 0; | |
3092 vdev_set_state(tvd, B_FALSE, VDEV_STATE_REMOVED, | |
3093 VDEV_AUX_NONE); | |
3094 vdev_clear(spa, tvd); | |
3095 vdev_config_dirty(tvd->vdev_top); | |
1544 | 3096 } |
4451 | 3097 spa_async_remove(spa, tvd); |
1544 | 3098 } |
3099 } | |
3100 | |
3101 static void | |
3102 spa_async_thread(spa_t *spa) | |
3103 { | |
3104 int tasks; | |
4451 | 3105 uint64_t txg; |
1544 | 3106 |
3107 ASSERT(spa->spa_sync_on); | |
789 | 3108 |
1544 | 3109 mutex_enter(&spa->spa_async_lock); |
3110 tasks = spa->spa_async_tasks; | |
3111 spa->spa_async_tasks = 0; | |
3112 mutex_exit(&spa->spa_async_lock); | |
3113 | |
3114 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3115 * See if the config needs to be updated. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3116 */ |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3117 if (tasks & SPA_ASYNC_CONFIG_UPDATE) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3118 mutex_enter(&spa_namespace_lock); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3119 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3120 mutex_exit(&spa_namespace_lock); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3121 } |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3122 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3123 /* |
4451 | 3124 * See if any devices need to be marked REMOVED. |
1544 | 3125 */ |
4451 | 3126 if (tasks & SPA_ASYNC_REMOVE) { |
3127 txg = spa_vdev_enter(spa); | |
3128 spa_async_remove(spa, spa->spa_root_vdev); | |
3129 (void) spa_vdev_exit(spa, NULL, txg, 0); | |
3130 } | |
1544 | 3131 |
3132 /* | |
3133 * If any devices are done replacing, detach them. | |
3134 */ | |
4451 | 3135 if (tasks & SPA_ASYNC_RESILVER_DONE) |
3136 spa_vdev_resilver_done(spa); | |
789 | 3137 |
1544 | 3138 /* |
4451 | 3139 * Kick off a scrub. When starting a RESILVER scrub (or an EVERYTHING |
3140 * scrub which can become a resilver), we need to hold | |
3141 * spa_namespace_lock() because the sysevent we post via | |
3142 * spa_event_notify() needs to get the name of the pool. | |
1544 | 3143 */ |
4451 | 3144 if (tasks & SPA_ASYNC_SCRUB) { |
3145 mutex_enter(&spa_namespace_lock); | |
1544 | 3146 VERIFY(spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_TRUE) == 0); |
4451 | 3147 mutex_exit(&spa_namespace_lock); |
3148 } | |
1544 | 3149 |
3150 /* | |
3151 * Kick off a resilver. | |
3152 */ | |
4451 | 3153 if (tasks & SPA_ASYNC_RESILVER) { |
3154 mutex_enter(&spa_namespace_lock); | |
1544 | 3155 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); |
4451 | 3156 mutex_exit(&spa_namespace_lock); |
3157 } | |
1544 | 3158 |
3159 /* | |
3160 * Let the world know that we're done. | |
3161 */ | |
3162 mutex_enter(&spa->spa_async_lock); | |
3163 spa->spa_async_thread = NULL; | |
3164 cv_broadcast(&spa->spa_async_cv); | |
3165 mutex_exit(&spa->spa_async_lock); | |
3166 thread_exit(); | |
3167 } | |
3168 | |
3169 void | |
3170 spa_async_suspend(spa_t *spa) | |
3171 { | |
3172 mutex_enter(&spa->spa_async_lock); | |
3173 spa->spa_async_suspended++; | |
3174 while (spa->spa_async_thread != NULL) | |
3175 cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); | |
3176 mutex_exit(&spa->spa_async_lock); | |
3177 } | |
3178 | |
3179 void | |
3180 spa_async_resume(spa_t *spa) | |
3181 { | |
3182 mutex_enter(&spa->spa_async_lock); | |
3183 ASSERT(spa->spa_async_suspended != 0); | |
3184 spa->spa_async_suspended--; | |
3185 mutex_exit(&spa->spa_async_lock); | |
3186 } | |
3187 | |
3188 static void | |
3189 spa_async_dispatch(spa_t *spa) | |
3190 { | |
3191 mutex_enter(&spa->spa_async_lock); | |
3192 if (spa->spa_async_tasks && !spa->spa_async_suspended && | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3193 spa->spa_async_thread == NULL && |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3194 rootdir != NULL && !vn_is_readonly(rootdir)) |
1544 | 3195 spa->spa_async_thread = thread_create(NULL, 0, |
3196 spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); | |
3197 mutex_exit(&spa->spa_async_lock); | |
3198 } | |
3199 | |
3200 void | |
3201 spa_async_request(spa_t *spa, int task) | |
3202 { | |
3203 mutex_enter(&spa->spa_async_lock); | |
3204 spa->spa_async_tasks |= task; | |
3205 mutex_exit(&spa->spa_async_lock); | |
789 | 3206 } |
3207 | |
3208 /* | |
3209 * ========================================================================== | |
3210 * SPA syncing routines | |
3211 * ========================================================================== | |
3212 */ | |
3213 | |
3214 static void | |
3215 spa_sync_deferred_frees(spa_t *spa, uint64_t txg) | |
3216 { | |
3217 bplist_t *bpl = &spa->spa_sync_bplist; | |
3218 dmu_tx_t *tx; | |
3219 blkptr_t blk; | |
3220 uint64_t itor = 0; | |
3221 zio_t *zio; | |
3222 int error; | |
3223 uint8_t c = 1; | |
3224 | |
3225 zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CONFIG_HELD); | |
3226 | |
3227 while (bplist_iterate(bpl, &itor, &blk) == 0) | |
3228 zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL)); | |
3229 | |
3230 error = zio_wait(zio); | |
3231 ASSERT3U(error, ==, 0); | |
3232 | |
3233 tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); | |
3234 bplist_vacate(bpl, tx); | |
3235 | |
3236 /* | |
3237 * Pre-dirty the first block so we sync to convergence faster. | |
3238 * (Usually only the first block is needed.) | |
3239 */ | |
3240 dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); | |
3241 dmu_tx_commit(tx); | |
3242 } | |
3243 | |
3244 static void | |
2082 | 3245 spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) |
3246 { | |
3247 char *packed = NULL; | |
3248 size_t nvsize = 0; | |
3249 dmu_buf_t *db; | |
3250 | |
3251 VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); | |
3252 | |
3253 packed = kmem_alloc(nvsize, KM_SLEEP); | |
3254 | |
3255 VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, | |
3256 KM_SLEEP) == 0); | |
3257 | |
3258 dmu_write(spa->spa_meta_objset, obj, 0, nvsize, packed, tx); | |
3259 | |
3260 kmem_free(packed, nvsize); | |
3261 | |
3262 VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); | |
3263 dmu_buf_will_dirty(db, tx); | |
3264 *(uint64_t *)db->db_data = nvsize; | |
3265 dmu_buf_rele(db, FTAG); | |
3266 } | |
3267 | |
3268 static void | |
3269 spa_sync_spares(spa_t *spa, dmu_tx_t *tx) | |
3270 { | |
3271 nvlist_t *nvroot; | |
3272 nvlist_t **spares; | |
3273 int i; | |
3274 | |
3275 if (!spa->spa_sync_spares) | |
3276 return; | |
3277 | |
3278 /* | |
3279 * Update the MOS nvlist describing the list of available spares. | |
3280 * spa_validate_spares() will have already made sure this nvlist is | |
4451 | 3281 * valid and the vdevs are labeled appropriately. |
2082 | 3282 */ |
3283 if (spa->spa_spares_object == 0) { | |
3284 spa->spa_spares_object = dmu_object_alloc(spa->spa_meta_objset, | |
3285 DMU_OT_PACKED_NVLIST, 1 << 14, | |
3286 DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); | |
3287 VERIFY(zap_update(spa->spa_meta_objset, | |
3288 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SPARES, | |
3289 sizeof (uint64_t), 1, &spa->spa_spares_object, tx) == 0); | |
3290 } | |
3291 | |
3292 VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); | |
3293 if (spa->spa_nspares == 0) { | |
3294 VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, | |
3295 NULL, 0) == 0); | |
3296 } else { | |
3297 spares = kmem_alloc(spa->spa_nspares * sizeof (void *), | |
3298 KM_SLEEP); | |
3299 for (i = 0; i < spa->spa_nspares; i++) | |
3300 spares[i] = vdev_config_generate(spa, | |
3301 spa->spa_spares[i], B_FALSE, B_TRUE); | |
3302 VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, | |
3303 spares, spa->spa_nspares) == 0); | |
3304 for (i = 0; i < spa->spa_nspares; i++) | |
3305 nvlist_free(spares[i]); | |
3306 kmem_free(spares, spa->spa_nspares * sizeof (void *)); | |
3307 } | |
3308 | |
3309 spa_sync_nvlist(spa, spa->spa_spares_object, nvroot, tx); | |
2926 | 3310 nvlist_free(nvroot); |
2082 | 3311 |
3312 spa->spa_sync_spares = B_FALSE; | |
3313 } | |
3314 | |
3315 static void | |
789 | 3316 spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) |
3317 { | |
3318 nvlist_t *config; | |
3319 | |
3320 if (list_is_empty(&spa->spa_dirty_list)) | |
3321 return; | |
3322 | |
3323 config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE); | |
3324 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3325 if (spa->spa_config_syncing) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3326 nvlist_free(spa->spa_config_syncing); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3327 spa->spa_config_syncing = config; |
789 | 3328 |
2082 | 3329 spa_sync_nvlist(spa, spa->spa_config_object, config, tx); |
789 | 3330 } |
3331 | |
5094 | 3332 /* |
3333 * Set zpool properties. | |
3334 */ | |
3912 | 3335 static void |
4543 | 3336 spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) |
3912 | 3337 { |
3338 spa_t *spa = arg1; | |
5094 | 3339 objset_t *mos = spa->spa_meta_objset; |
3912 | 3340 nvlist_t *nvp = arg2; |
5094 | 3341 nvpair_t *elem; |
4451 | 3342 uint64_t intval; |
5094 | 3343 char *strval; |
3344 zpool_prop_t prop; | |
3345 const char *propname; | |
3346 zprop_type_t proptype; | |
3347 | |
3348 elem = NULL; | |
3349 while ((elem = nvlist_next_nvpair(nvp, elem))) { | |
3350 switch (prop = zpool_name_to_prop(nvpair_name(elem))) { | |
3351 case ZPOOL_PROP_VERSION: | |
3352 /* | |
3353 * Only set version for non-zpool-creation cases | |
3354 * (set/import). spa_create() needs special care | |
3355 * for version setting. | |
3356 */ | |
3357 if (tx->tx_txg != TXG_INITIAL) { | |
3358 VERIFY(nvpair_value_uint64(elem, | |
3359 &intval) == 0); | |
3360 ASSERT(intval <= SPA_VERSION); | |
3361 ASSERT(intval >= spa_version(spa)); | |
3362 spa->spa_uberblock.ub_version = intval; | |
3363 vdev_config_dirty(spa->spa_root_vdev); | |
3364 } | |
3365 break; | |
3366 | |
3367 case ZPOOL_PROP_ALTROOT: | |
3368 /* | |
3369 * 'altroot' is a non-persistent property. It should | |
3370 * have been set temporarily at creation or import time. | |
3371 */ | |
3372 ASSERT(spa->spa_root != NULL); | |
3373 break; | |
3374 | |
3375 case ZPOOL_PROP_TEMPORARY: | |
3376 /* | |
3377 * 'temporary' is a non-persistant property. | |
3378 */ | |
3379 VERIFY(nvpair_value_uint64(elem, &intval) == 0); | |
3380 spa->spa_temporary = intval; | |
4543 | 3381 break; |
5094 | 3382 |
3383 default: | |
3384 /* | |
3385 * Set pool property values in the poolprops mos object. | |
3386 */ | |
3387 mutex_enter(&spa->spa_props_lock); | |
3388 if (spa->spa_pool_props_object == 0) { | |
3389 objset_t *mos = spa->spa_meta_objset; | |
3390 | |
3391 VERIFY((spa->spa_pool_props_object = | |
3392 zap_create(mos, DMU_OT_POOL_PROPS, | |
3393 DMU_OT_NONE, 0, tx)) > 0); | |
3394 | |
3395 VERIFY(zap_update(mos, | |
3396 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, | |
3397 8, 1, &spa->spa_pool_props_object, tx) | |
3398 == 0); | |
3399 } | |
3400 mutex_exit(&spa->spa_props_lock); | |
3401 | |
3402 /* normalize the property name */ | |
3403 propname = zpool_prop_to_name(prop); | |
3404 proptype = zpool_prop_get_type(prop); | |
3405 | |
3406 if (nvpair_type(elem) == DATA_TYPE_STRING) { | |
3407 ASSERT(proptype == PROP_TYPE_STRING); | |
3408 VERIFY(nvpair_value_string(elem, &strval) == 0); | |
3409 VERIFY(zap_update(mos, | |
3410 spa->spa_pool_props_object, propname, | |
3411 1, strlen(strval) + 1, strval, tx) == 0); | |
3412 | |
3413 } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { | |
3414 VERIFY(nvpair_value_uint64(elem, &intval) == 0); | |
3415 | |
3416 if (proptype == PROP_TYPE_INDEX) { | |
3417 const char *unused; | |
3418 VERIFY(zpool_prop_index_to_string( | |
3419 prop, intval, &unused) == 0); | |
3420 } | |
3421 VERIFY(zap_update(mos, | |
3422 spa->spa_pool_props_object, propname, | |
3423 8, 1, &intval, tx) == 0); | |
3424 } else { | |
3425 ASSERT(0); /* not allowed */ | |
3426 } | |
3427 | |
3428 if (prop == ZPOOL_PROP_DELEGATION) | |
3429 spa->spa_delegation = intval; | |
3430 | |
3431 if (prop == ZPOOL_PROP_BOOTFS) | |
3432 spa->spa_bootfs = intval; | |
3912 | 3433 } |
5094 | 3434 |
3435 /* log internal history if this is not a zpool create */ | |
3436 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && | |
3437 tx->tx_txg != TXG_INITIAL) { | |
3438 spa_history_internal_log(LOG_POOL_PROPSET, | |
3439 spa, tx, cr, "%s %lld %s", | |
3440 nvpair_name(elem), intval, spa->spa_name); | |
3441 } | |
3912 | 3442 } |
3443 } | |
3444 | |
789 | 3445 /* |
3446 * Sync the specified transaction group. New blocks may be dirtied as | |
3447 * part of the process, so we iterate until it converges. | |
3448 */ | |
3449 void | |
3450 spa_sync(spa_t *spa, uint64_t txg) | |
3451 { | |
3452 dsl_pool_t *dp = spa->spa_dsl_pool; | |
3453 objset_t *mos = spa->spa_meta_objset; | |
3454 bplist_t *bpl = &spa->spa_sync_bplist; | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3455 vdev_t *rvd = spa->spa_root_vdev; |
789 | 3456 vdev_t *vd; |
3457 dmu_tx_t *tx; | |
3458 int dirty_vdevs; | |
3459 | |
3460 /* | |
3461 * Lock out configuration changes. | |
3462 */ | |
1544 | 3463 spa_config_enter(spa, RW_READER, FTAG); |
789 | 3464 |
3465 spa->spa_syncing_txg = txg; | |
3466 spa->spa_sync_pass = 0; | |
3467 | |
1544 | 3468 VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); |
789 | 3469 |
2082 | 3470 tx = dmu_tx_create_assigned(dp, txg); |
3471 | |
3472 /* | |
4577 | 3473 * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, |
2082 | 3474 * set spa_deflate if we have no raid-z vdevs. |
3475 */ | |
4577 | 3476 if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && |
3477 spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { | |
2082 | 3478 int i; |
3479 | |
3480 for (i = 0; i < rvd->vdev_children; i++) { | |
3481 vd = rvd->vdev_child[i]; | |
3482 if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) | |
3483 break; | |
3484 } | |
3485 if (i == rvd->vdev_children) { | |
3486 spa->spa_deflate = TRUE; | |
3487 VERIFY(0 == zap_add(spa->spa_meta_objset, | |
3488 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, | |
3489 sizeof (uint64_t), 1, &spa->spa_deflate, tx)); | |
3490 } | |
3491 } | |
3492 | |
789 | 3493 /* |
3494 * If anything has changed in this txg, push the deferred frees | |
3495 * from the previous txg. If not, leave them alone so that we | |
3496 * don't generate work on an otherwise idle system. | |
3497 */ | |
3498 if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || | |
2329
e640bebc73b3
6446569 deferred list is hooked on flintstone vitamins
ek110237
parents:
2199
diff
changeset
|
3499 !txg_list_empty(&dp->dp_dirty_dirs, txg) || |
e640bebc73b3
6446569 deferred list is hooked on flintstone vitamins
ek110237
parents:
2199
diff
changeset
|
3500 !txg_list_empty(&dp->dp_sync_tasks, txg)) |
789 | 3501 spa_sync_deferred_frees(spa, txg); |
3502 | |
3503 /* | |
3504 * Iterate to convergence. | |
3505 */ | |
3506 do { | |
3507 spa->spa_sync_pass++; | |
3508 | |
3509 spa_sync_config_object(spa, tx); | |
2082 | 3510 spa_sync_spares(spa, tx); |
1544 | 3511 spa_errlog_sync(spa, txg); |
789 | 3512 dsl_pool_sync(dp, txg); |
3513 | |
3514 dirty_vdevs = 0; | |
3515 while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { | |
3516 vdev_sync(vd, txg); | |
3517 dirty_vdevs++; | |
3518 } | |
3519 | |
3520 bplist_sync(bpl, tx); | |
3521 } while (dirty_vdevs); | |
3522 | |
3523 bplist_close(bpl); | |
3524 | |
3525 dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); | |
3526 | |
3527 /* | |
3528 * Rewrite the vdev configuration (which includes the uberblock) | |
3529 * to commit the transaction group. | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3530 * |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3531 * If there are any dirty vdevs, sync the uberblock to all vdevs. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3532 * Otherwise, pick a random top-level vdev that's known to be |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3533 * visible in the config cache (see spa_vdev_add() for details). |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3534 * If the write fails, try the next vdev until we're tried them all. |
789 | 3535 */ |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3536 if (!list_is_empty(&spa->spa_dirty_list)) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3537 VERIFY(vdev_config_sync(rvd, txg) == 0); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3538 } else { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3539 int children = rvd->vdev_children; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3540 int c0 = spa_get_random(children); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3541 int c; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3542 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3543 for (c = 0; c < children; c++) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3544 vd = rvd->vdev_child[(c0 + c) % children]; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3545 if (vd->vdev_ms_array == 0) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3546 continue; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3547 if (vdev_config_sync(vd, txg) == 0) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3548 break; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3549 } |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3550 if (c == children) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3551 VERIFY(vdev_config_sync(rvd, txg) == 0); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3552 } |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3553 |
2082 | 3554 dmu_tx_commit(tx); |
3555 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3556 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3557 * Clear the dirty config list. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3558 */ |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3559 while ((vd = list_head(&spa->spa_dirty_list)) != NULL) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3560 vdev_config_clean(vd); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3561 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3562 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3563 * Now that the new config has synced transactionally, |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3564 * let it become visible to the config cache. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3565 */ |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3566 if (spa->spa_config_syncing != NULL) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3567 spa_config_set(spa, spa->spa_config_syncing); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3568 spa->spa_config_txg = txg; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3569 spa->spa_config_syncing = NULL; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3570 } |
789 | 3571 |
3572 /* | |
3573 * Make a stable copy of the fully synced uberblock. | |
3574 * We use this as the root for pool traversals. | |
3575 */ | |
3576 spa->spa_traverse_wanted = 1; /* tells traverse_more() to stop */ | |
3577 | |
3578 spa_scrub_suspend(spa); /* stop scrubbing and finish I/Os */ | |
3579 | |
3580 rw_enter(&spa->spa_traverse_lock, RW_WRITER); | |
3581 spa->spa_traverse_wanted = 0; | |
3582 spa->spa_ubsync = spa->spa_uberblock; | |
3583 rw_exit(&spa->spa_traverse_lock); | |
3584 | |
3585 spa_scrub_resume(spa); /* resume scrub with new ubsync */ | |
3586 | |
3587 /* | |
3588 * Clean up the ZIL records for the synced txg. | |
3589 */ | |
3590 dsl_pool_zil_clean(dp); | |
3591 | |
3592 /* | |
3593 * Update usable space statistics. | |
3594 */ | |
3595 while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) | |
3596 vdev_sync_done(vd, txg); | |
3597 | |
3598 /* | |
3599 * It had better be the case that we didn't dirty anything | |
2082 | 3600 * since vdev_config_sync(). |
789 | 3601 */ |
3602 ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); | |
3603 ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); | |
3604 ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); | |
3605 ASSERT(bpl->bpl_queue == NULL); | |
3606 | |
1544 | 3607 spa_config_exit(spa, FTAG); |
3608 | |
3609 /* | |
3610 * If any async tasks have been requested, kick them off. | |
3611 */ | |
3612 spa_async_dispatch(spa); | |
789 | 3613 } |
3614 | |
3615 /* | |
3616 * Sync all pools. We don't want to hold the namespace lock across these | |
3617 * operations, so we take a reference on the spa_t and drop the lock during the | |
3618 * sync. | |
3619 */ | |
3620 void | |
3621 spa_sync_allpools(void) | |
3622 { | |
3623 spa_t *spa = NULL; | |
3624 mutex_enter(&spa_namespace_lock); | |
3625 while ((spa = spa_next(spa)) != NULL) { | |
3626 if (spa_state(spa) != POOL_STATE_ACTIVE) | |
3627 continue; | |
3628 spa_open_ref(spa, FTAG); | |
3629 mutex_exit(&spa_namespace_lock); | |
3630 txg_wait_synced(spa_get_dsl(spa), 0); | |
3631 mutex_enter(&spa_namespace_lock); | |
3632 spa_close(spa, FTAG); | |
3633 } | |
3634 mutex_exit(&spa_namespace_lock); | |
3635 } | |
3636 | |
3637 /* | |
3638 * ========================================================================== | |
3639 * Miscellaneous routines | |
3640 * ========================================================================== | |
3641 */ | |
3642 | |
3643 /* | |
3644 * Remove all pools in the system. | |
3645 */ | |
3646 void | |
3647 spa_evict_all(void) | |
3648 { | |
3649 spa_t *spa; | |
3650 | |
3651 /* | |
3652 * Remove all cached state. All pools should be closed now, | |
3653 * so every spa in the AVL tree should be unreferenced. | |
3654 */ | |
3655 mutex_enter(&spa_namespace_lock); | |
3656 while ((spa = spa_next(NULL)) != NULL) { | |
3657 /* | |
1544 | 3658 * Stop async tasks. The async thread may need to detach |
3659 * a device that's been replaced, which requires grabbing | |
3660 * spa_namespace_lock, so we must drop it here. | |
789 | 3661 */ |
3662 spa_open_ref(spa, FTAG); | |
3663 mutex_exit(&spa_namespace_lock); | |
1544 | 3664 spa_async_suspend(spa); |
4808 | 3665 mutex_enter(&spa_namespace_lock); |
789 | 3666 VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); |
3667 spa_close(spa, FTAG); | |
3668 | |
3669 if (spa->spa_state != POOL_STATE_UNINITIALIZED) { | |
3670 spa_unload(spa); | |
3671 spa_deactivate(spa); | |
3672 } | |
3673 spa_remove(spa); | |
3674 } | |
3675 mutex_exit(&spa_namespace_lock); | |
3676 } | |
1544 | 3677 |
3678 vdev_t * | |
3679 spa_lookup_by_guid(spa_t *spa, uint64_t guid) | |
3680 { | |
3681 return (vdev_lookup_by_guid(spa->spa_root_vdev, guid)); | |
3682 } | |
1760 | 3683 |
3684 void | |
5094 | 3685 spa_upgrade(spa_t *spa, uint64_t version) |
1760 | 3686 { |
3687 spa_config_enter(spa, RW_WRITER, FTAG); | |
3688 | |
3689 /* | |
3690 * This should only be called for a non-faulted pool, and since a | |
3691 * future version would result in an unopenable pool, this shouldn't be | |
3692 * possible. | |
3693 */ | |
4577 | 3694 ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); |
5094 | 3695 ASSERT(version >= spa->spa_uberblock.ub_version); |
3696 | |
3697 spa->spa_uberblock.ub_version = version; | |
1760 | 3698 vdev_config_dirty(spa->spa_root_vdev); |
3699 | |
3700 spa_config_exit(spa, FTAG); | |
2082 | 3701 |
3702 txg_wait_synced(spa_get_dsl(spa), 0); | |
1760 | 3703 } |
2082 | 3704 |
3705 boolean_t | |
3706 spa_has_spare(spa_t *spa, uint64_t guid) | |
3707 { | |
3708 int i; | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3709 uint64_t spareguid; |
2082 | 3710 |
3711 for (i = 0; i < spa->spa_nspares; i++) | |
3712 if (spa->spa_spares[i]->vdev_guid == guid) | |
3713 return (B_TRUE); | |
3714 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3715 for (i = 0; i < spa->spa_pending_nspares; i++) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3716 if (nvlist_lookup_uint64(spa->spa_pending_spares[i], |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3717 ZPOOL_CONFIG_GUID, &spareguid) == 0 && |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3718 spareguid == guid) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3719 return (B_TRUE); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3720 } |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3721 |
2082 | 3722 return (B_FALSE); |
3723 } | |
3912 | 3724 |
4451 | 3725 /* |
3726 * Post a sysevent corresponding to the given event. The 'name' must be one of | |
3727 * the event definitions in sys/sysevent/eventdefs.h. The payload will be | |
3728 * filled in from the spa and (optionally) the vdev. This doesn't do anything | |
3729 * in the userland libzpool, as we don't want consumers to misinterpret ztest | |
3730 * or zdb as real changes. | |
3731 */ | |
3732 void | |
3733 spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) | |
3734 { | |
3735 #ifdef _KERNEL | |
3736 sysevent_t *ev; | |
3737 sysevent_attr_list_t *attr = NULL; | |
3738 sysevent_value_t value; | |
3739 sysevent_id_t eid; | |
3740 | |
3741 ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", | |
3742 SE_SLEEP); | |
3743 | |
3744 value.value_type = SE_DATA_TYPE_STRING; | |
3745 value.value.sv_string = spa_name(spa); | |
3746 if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) | |
3747 goto done; | |
3748 | |
3749 value.value_type = SE_DATA_TYPE_UINT64; | |
3750 value.value.sv_uint64 = spa_guid(spa); | |
3751 if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) | |
3752 goto done; | |
3753 | |
3754 if (vd) { | |
3755 value.value_type = SE_DATA_TYPE_UINT64; | |
3756 value.value.sv_uint64 = vd->vdev_guid; | |
3757 if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, | |
3758 SE_SLEEP) != 0) | |
3759 goto done; | |
3760 | |
3761 if (vd->vdev_path) { | |
3762 value.value_type = SE_DATA_TYPE_STRING; | |
3763 value.value.sv_string = vd->vdev_path; | |
3764 if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, | |
3765 &value, SE_SLEEP) != 0) | |
3766 goto done; | |
3767 } | |
3768 } | |
3769 | |
3770 (void) log_sysevent(ev, SE_SLEEP, &eid); | |
3771 | |
3772 done: | |
3773 if (attr) | |
3774 sysevent_free_attr(attr); | |
3775 sysevent_free(ev); | |
3776 #endif | |
3777 } |