Mercurial > illumos > illumos-gate
annotate usr/src/uts/common/fs/zfs/spa.c @ 5621:cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
6635464 Unable to put zfs cachefile in root directory
6637499 bitfield declaration macros would be useful
6637506 sunoem IPMI LED command structure should be private to libipmi
author | eschrock |
---|---|
date | Thu, 06 Dec 2007 13:27:42 -0800 |
parents | b25030891c44 |
children | c0b02c8fd2c0 |
rev | line source |
---|---|
789 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
1544 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
789 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
2082 | 21 |
789 | 22 /* |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. |
789 | 24 * Use is subject to license terms. |
25 */ | |
26 | |
27 #pragma ident "%Z%%M% %I% %E% SMI" | |
28 | |
29 /* | |
30 * This file contains all the routines used when modifying on-disk SPA state. | |
31 * This includes opening, importing, destroying, exporting a pool, and syncing a | |
32 * pool. | |
33 */ | |
34 | |
35 #include <sys/zfs_context.h> | |
1544 | 36 #include <sys/fm/fs/zfs.h> |
789 | 37 #include <sys/spa_impl.h> |
38 #include <sys/zio.h> | |
39 #include <sys/zio_checksum.h> | |
40 #include <sys/zio_compress.h> | |
41 #include <sys/dmu.h> | |
42 #include <sys/dmu_tx.h> | |
43 #include <sys/zap.h> | |
44 #include <sys/zil.h> | |
45 #include <sys/vdev_impl.h> | |
46 #include <sys/metaslab.h> | |
47 #include <sys/uberblock_impl.h> | |
48 #include <sys/txg.h> | |
49 #include <sys/avl.h> | |
50 #include <sys/dmu_traverse.h> | |
3912 | 51 #include <sys/dmu_objset.h> |
789 | 52 #include <sys/unique.h> |
53 #include <sys/dsl_pool.h> | |
3912 | 54 #include <sys/dsl_dataset.h> |
789 | 55 #include <sys/dsl_dir.h> |
56 #include <sys/dsl_prop.h> | |
3912 | 57 #include <sys/dsl_synctask.h> |
789 | 58 #include <sys/fs/zfs.h> |
5450 | 59 #include <sys/arc.h> |
789 | 60 #include <sys/callb.h> |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
61 #include <sys/systeminfo.h> |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
62 #include <sys/sunddi.h> |
789 | 63 |
5094 | 64 #include "zfs_prop.h" |
65 | |
2986 | 66 int zio_taskq_threads = 8; |
67 | |
5094 | 68 static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); |
69 | |
70 /* | |
71 * ========================================================================== | |
72 * SPA properties routines | |
73 * ========================================================================== | |
74 */ | |
75 | |
76 /* | |
77 * Add a (source=src, propname=propval) list to an nvlist. | |
78 */ | |
79 static int | |
80 spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, | |
81 uint64_t intval, zprop_source_t src) | |
82 { | |
83 const char *propname = zpool_prop_to_name(prop); | |
84 nvlist_t *propval; | |
85 int err = 0; | |
86 | |
87 if (err = nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP)) | |
88 return (err); | |
89 | |
90 if (err = nvlist_add_uint64(propval, ZPROP_SOURCE, src)) | |
91 goto out; | |
92 | |
93 if (strval != NULL) { | |
94 if (err = nvlist_add_string(propval, ZPROP_VALUE, strval)) | |
95 goto out; | |
96 } else { | |
97 if (err = nvlist_add_uint64(propval, ZPROP_VALUE, intval)) | |
98 goto out; | |
99 } | |
100 | |
101 err = nvlist_add_nvlist(nvl, propname, propval); | |
102 out: | |
103 nvlist_free(propval); | |
104 return (err); | |
105 } | |
106 | |
107 /* | |
108 * Get property values from the spa configuration. | |
109 */ | |
110 static int | |
111 spa_prop_get_config(spa_t *spa, nvlist_t **nvp) | |
112 { | |
113 uint64_t size = spa_get_space(spa); | |
114 uint64_t used = spa_get_alloc(spa); | |
115 uint64_t cap, version; | |
116 zprop_source_t src = ZPROP_SRC_NONE; | |
117 int err; | |
5363 | 118 char *cachefile; |
119 size_t len; | |
5094 | 120 |
121 /* | |
122 * readonly properties | |
123 */ | |
124 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa->spa_name, | |
125 0, src)) | |
126 return (err); | |
127 | |
128 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src)) | |
129 return (err); | |
130 | |
131 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src)) | |
132 return (err); | |
133 | |
134 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, | |
135 size - used, src)) | |
136 return (err); | |
137 | |
138 cap = (size == 0) ? 0 : (used * 100 / size); | |
139 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src)) | |
140 return (err); | |
141 | |
142 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, | |
143 spa_guid(spa), src)) | |
144 return (err); | |
145 | |
146 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, | |
147 spa->spa_root_vdev->vdev_state, src)) | |
148 return (err); | |
149 | |
150 /* | |
151 * settable properties that are not stored in the pool property object. | |
152 */ | |
153 version = spa_version(spa); | |
154 if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) | |
155 src = ZPROP_SRC_DEFAULT; | |
156 else | |
157 src = ZPROP_SRC_LOCAL; | |
158 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, | |
159 version, src)) | |
160 return (err); | |
161 | |
162 if (spa->spa_root != NULL) { | |
163 src = ZPROP_SRC_LOCAL; | |
164 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, | |
165 spa->spa_root, 0, src)) | |
166 return (err); | |
167 } | |
168 | |
5363 | 169 if (spa->spa_config_dir != NULL) { |
170 if (strcmp(spa->spa_config_dir, "none") == 0) { | |
171 err = spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, | |
172 spa->spa_config_dir, 0, ZPROP_SRC_LOCAL); | |
173 } else { | |
174 len = strlen(spa->spa_config_dir) + | |
175 strlen(spa->spa_config_file) + 2; | |
176 cachefile = kmem_alloc(len, KM_SLEEP); | |
177 (void) snprintf(cachefile, len, "%s/%s", | |
178 spa->spa_config_dir, spa->spa_config_file); | |
179 err = spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, | |
180 cachefile, 0, ZPROP_SRC_LOCAL); | |
181 kmem_free(cachefile, len); | |
182 } | |
183 | |
184 if (err) | |
185 return (err); | |
186 } | |
5094 | 187 |
188 return (0); | |
189 } | |
190 | |
191 /* | |
192 * Get zpool property values. | |
193 */ | |
194 int | |
195 spa_prop_get(spa_t *spa, nvlist_t **nvp) | |
196 { | |
197 zap_cursor_t zc; | |
198 zap_attribute_t za; | |
199 objset_t *mos = spa->spa_meta_objset; | |
200 int err; | |
201 | |
202 if (err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)) | |
203 return (err); | |
204 | |
205 /* | |
206 * Get properties from the spa config. | |
207 */ | |
208 if (err = spa_prop_get_config(spa, nvp)) | |
209 goto out; | |
210 | |
211 mutex_enter(&spa->spa_props_lock); | |
212 /* If no pool property object, no more prop to get. */ | |
213 if (spa->spa_pool_props_object == 0) { | |
214 mutex_exit(&spa->spa_props_lock); | |
215 return (0); | |
216 } | |
217 | |
218 /* | |
219 * Get properties from the MOS pool property object. | |
220 */ | |
221 for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); | |
222 (err = zap_cursor_retrieve(&zc, &za)) == 0; | |
223 zap_cursor_advance(&zc)) { | |
224 uint64_t intval = 0; | |
225 char *strval = NULL; | |
226 zprop_source_t src = ZPROP_SRC_DEFAULT; | |
227 zpool_prop_t prop; | |
228 | |
229 if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) | |
230 continue; | |
231 | |
232 switch (za.za_integer_length) { | |
233 case 8: | |
234 /* integer property */ | |
235 if (za.za_first_integer != | |
236 zpool_prop_default_numeric(prop)) | |
237 src = ZPROP_SRC_LOCAL; | |
238 | |
239 if (prop == ZPOOL_PROP_BOOTFS) { | |
240 dsl_pool_t *dp; | |
241 dsl_dataset_t *ds = NULL; | |
242 | |
243 dp = spa_get_dsl(spa); | |
244 rw_enter(&dp->dp_config_rwlock, RW_READER); | |
245 if (err = dsl_dataset_open_obj(dp, | |
246 za.za_first_integer, NULL, DS_MODE_NONE, | |
247 FTAG, &ds)) { | |
248 rw_exit(&dp->dp_config_rwlock); | |
249 break; | |
250 } | |
251 | |
252 strval = kmem_alloc( | |
253 MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, | |
254 KM_SLEEP); | |
255 dsl_dataset_name(ds, strval); | |
256 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
257 rw_exit(&dp->dp_config_rwlock); | |
258 } else { | |
259 strval = NULL; | |
260 intval = za.za_first_integer; | |
261 } | |
262 | |
263 err = spa_prop_add_list(*nvp, prop, strval, | |
264 intval, src); | |
265 | |
266 if (strval != NULL) | |
267 kmem_free(strval, | |
268 MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); | |
269 | |
270 break; | |
271 | |
272 case 1: | |
273 /* string property */ | |
274 strval = kmem_alloc(za.za_num_integers, KM_SLEEP); | |
275 err = zap_lookup(mos, spa->spa_pool_props_object, | |
276 za.za_name, 1, za.za_num_integers, strval); | |
277 if (err) { | |
278 kmem_free(strval, za.za_num_integers); | |
279 break; | |
280 } | |
281 err = spa_prop_add_list(*nvp, prop, strval, 0, src); | |
282 kmem_free(strval, za.za_num_integers); | |
283 break; | |
284 | |
285 default: | |
286 break; | |
287 } | |
288 } | |
289 zap_cursor_fini(&zc); | |
290 mutex_exit(&spa->spa_props_lock); | |
291 out: | |
292 if (err && err != ENOENT) { | |
293 nvlist_free(*nvp); | |
294 return (err); | |
295 } | |
296 | |
297 return (0); | |
298 } | |
299 | |
300 /* | |
301 * Validate the given pool properties nvlist and modify the list | |
302 * for the property values to be set. | |
303 */ | |
304 static int | |
305 spa_prop_validate(spa_t *spa, nvlist_t *props) | |
306 { | |
307 nvpair_t *elem; | |
308 int error = 0, reset_bootfs = 0; | |
309 uint64_t objnum; | |
310 | |
311 elem = NULL; | |
312 while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { | |
313 zpool_prop_t prop; | |
314 char *propname, *strval; | |
315 uint64_t intval; | |
316 vdev_t *rvdev; | |
317 char *vdev_type; | |
318 objset_t *os; | |
5363 | 319 char *slash; |
5094 | 320 |
321 propname = nvpair_name(elem); | |
322 | |
323 if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) | |
324 return (EINVAL); | |
325 | |
326 switch (prop) { | |
327 case ZPOOL_PROP_VERSION: | |
328 error = nvpair_value_uint64(elem, &intval); | |
329 if (!error && | |
330 (intval < spa_version(spa) || intval > SPA_VERSION)) | |
331 error = EINVAL; | |
332 break; | |
333 | |
334 case ZPOOL_PROP_DELEGATION: | |
335 case ZPOOL_PROP_AUTOREPLACE: | |
336 error = nvpair_value_uint64(elem, &intval); | |
337 if (!error && intval > 1) | |
338 error = EINVAL; | |
339 break; | |
340 | |
341 case ZPOOL_PROP_BOOTFS: | |
342 if (spa_version(spa) < SPA_VERSION_BOOTFS) { | |
343 error = ENOTSUP; | |
344 break; | |
345 } | |
346 | |
347 /* | |
348 * A bootable filesystem can not be on a RAIDZ pool | |
349 * nor a striped pool with more than 1 device. | |
350 */ | |
351 rvdev = spa->spa_root_vdev; | |
352 vdev_type = | |
353 rvdev->vdev_child[0]->vdev_ops->vdev_op_type; | |
354 if (rvdev->vdev_children > 1 || | |
355 strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 || | |
356 strcmp(vdev_type, VDEV_TYPE_MISSING) == 0) { | |
357 error = ENOTSUP; | |
358 break; | |
359 } | |
360 | |
361 reset_bootfs = 1; | |
362 | |
363 error = nvpair_value_string(elem, &strval); | |
364 | |
365 if (!error) { | |
366 if (strval == NULL || strval[0] == '\0') { | |
367 objnum = zpool_prop_default_numeric( | |
368 ZPOOL_PROP_BOOTFS); | |
369 break; | |
370 } | |
371 | |
372 if (error = dmu_objset_open(strval, DMU_OST_ZFS, | |
373 DS_MODE_STANDARD | DS_MODE_READONLY, &os)) | |
374 break; | |
375 objnum = dmu_objset_id(os); | |
376 dmu_objset_close(os); | |
377 } | |
378 break; | |
5329 | 379 case ZPOOL_PROP_FAILUREMODE: |
380 error = nvpair_value_uint64(elem, &intval); | |
381 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || | |
382 intval > ZIO_FAILURE_MODE_PANIC)) | |
383 error = EINVAL; | |
384 | |
385 /* | |
386 * This is a special case which only occurs when | |
387 * the pool has completely failed. This allows | |
388 * the user to change the in-core failmode property | |
389 * without syncing it out to disk (I/Os might | |
390 * currently be blocked). We do this by returning | |
391 * EIO to the caller (spa_prop_set) to trick it | |
392 * into thinking we encountered a property validation | |
393 * error. | |
394 */ | |
395 if (!error && spa_state(spa) == POOL_STATE_IO_FAILURE) { | |
396 spa->spa_failmode = intval; | |
397 error = EIO; | |
398 } | |
399 break; | |
5363 | 400 |
401 case ZPOOL_PROP_CACHEFILE: | |
402 if ((error = nvpair_value_string(elem, &strval)) != 0) | |
403 break; | |
404 | |
405 if (strval[0] == '\0') | |
406 break; | |
407 | |
408 if (strcmp(strval, "none") == 0) | |
409 break; | |
410 | |
411 if (strval[0] != '/') { | |
412 error = EINVAL; | |
413 break; | |
414 } | |
415 | |
416 slash = strrchr(strval, '/'); | |
417 ASSERT(slash != NULL); | |
418 | |
419 if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || | |
420 strcmp(slash, "/..") == 0) | |
421 error = EINVAL; | |
422 break; | |
5094 | 423 } |
424 | |
425 if (error) | |
426 break; | |
427 } | |
428 | |
429 if (!error && reset_bootfs) { | |
430 error = nvlist_remove(props, | |
431 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); | |
432 | |
433 if (!error) { | |
434 error = nvlist_add_uint64(props, | |
435 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); | |
436 } | |
437 } | |
438 | |
439 return (error); | |
440 } | |
441 | |
442 int | |
443 spa_prop_set(spa_t *spa, nvlist_t *nvp) | |
444 { | |
445 int error; | |
446 | |
447 if ((error = spa_prop_validate(spa, nvp)) != 0) | |
448 return (error); | |
449 | |
450 return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, | |
451 spa, nvp, 3)); | |
452 } | |
453 | |
454 /* | |
455 * If the bootfs property value is dsobj, clear it. | |
456 */ | |
457 void | |
458 spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) | |
459 { | |
460 if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { | |
461 VERIFY(zap_remove(spa->spa_meta_objset, | |
462 spa->spa_pool_props_object, | |
463 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); | |
464 spa->spa_bootfs = 0; | |
465 } | |
466 } | |
467 | |
789 | 468 /* |
469 * ========================================================================== | |
470 * SPA state manipulation (open/create/destroy/import/export) | |
471 * ========================================================================== | |
472 */ | |
473 | |
1544 | 474 static int |
475 spa_error_entry_compare(const void *a, const void *b) | |
476 { | |
477 spa_error_entry_t *sa = (spa_error_entry_t *)a; | |
478 spa_error_entry_t *sb = (spa_error_entry_t *)b; | |
479 int ret; | |
480 | |
481 ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, | |
482 sizeof (zbookmark_t)); | |
483 | |
484 if (ret < 0) | |
485 return (-1); | |
486 else if (ret > 0) | |
487 return (1); | |
488 else | |
489 return (0); | |
490 } | |
491 | |
492 /* | |
493 * Utility function which retrieves copies of the current logs and | |
494 * re-initializes them in the process. | |
495 */ | |
496 void | |
497 spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) | |
498 { | |
499 ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); | |
500 | |
501 bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); | |
502 bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); | |
503 | |
504 avl_create(&spa->spa_errlist_scrub, | |
505 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
506 offsetof(spa_error_entry_t, se_avl)); | |
507 avl_create(&spa->spa_errlist_last, | |
508 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
509 offsetof(spa_error_entry_t, se_avl)); | |
510 } | |
511 | |
789 | 512 /* |
513 * Activate an uninitialized pool. | |
514 */ | |
515 static void | |
516 spa_activate(spa_t *spa) | |
517 { | |
518 int t; | |
519 | |
520 ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); | |
521 | |
522 spa->spa_state = POOL_STATE_ACTIVE; | |
523 | |
524 spa->spa_normal_class = metaslab_class_create(); | |
4527 | 525 spa->spa_log_class = metaslab_class_create(); |
789 | 526 |
527 for (t = 0; t < ZIO_TYPES; t++) { | |
528 spa->spa_zio_issue_taskq[t] = taskq_create("spa_zio_issue", | |
2986 | 529 zio_taskq_threads, maxclsyspri, 50, INT_MAX, |
789 | 530 TASKQ_PREPOPULATE); |
531 spa->spa_zio_intr_taskq[t] = taskq_create("spa_zio_intr", | |
2986 | 532 zio_taskq_threads, maxclsyspri, 50, INT_MAX, |
789 | 533 TASKQ_PREPOPULATE); |
534 } | |
535 | |
536 list_create(&spa->spa_dirty_list, sizeof (vdev_t), | |
537 offsetof(vdev_t, vdev_dirty_node)); | |
5329 | 538 list_create(&spa->spa_zio_list, sizeof (zio_t), |
539 offsetof(zio_t, zio_link_node)); | |
789 | 540 |
541 txg_list_create(&spa->spa_vdev_txg_list, | |
542 offsetof(struct vdev, vdev_txg_node)); | |
1544 | 543 |
544 avl_create(&spa->spa_errlist_scrub, | |
545 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
546 offsetof(spa_error_entry_t, se_avl)); | |
547 avl_create(&spa->spa_errlist_last, | |
548 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
549 offsetof(spa_error_entry_t, se_avl)); | |
789 | 550 } |
551 | |
552 /* | |
553 * Opposite of spa_activate(). | |
554 */ | |
555 static void | |
556 spa_deactivate(spa_t *spa) | |
557 { | |
558 int t; | |
559 | |
560 ASSERT(spa->spa_sync_on == B_FALSE); | |
561 ASSERT(spa->spa_dsl_pool == NULL); | |
562 ASSERT(spa->spa_root_vdev == NULL); | |
563 | |
564 ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); | |
565 | |
566 txg_list_destroy(&spa->spa_vdev_txg_list); | |
567 | |
568 list_destroy(&spa->spa_dirty_list); | |
5329 | 569 list_destroy(&spa->spa_zio_list); |
789 | 570 |
571 for (t = 0; t < ZIO_TYPES; t++) { | |
572 taskq_destroy(spa->spa_zio_issue_taskq[t]); | |
573 taskq_destroy(spa->spa_zio_intr_taskq[t]); | |
574 spa->spa_zio_issue_taskq[t] = NULL; | |
575 spa->spa_zio_intr_taskq[t] = NULL; | |
576 } | |
577 | |
578 metaslab_class_destroy(spa->spa_normal_class); | |
579 spa->spa_normal_class = NULL; | |
580 | |
4527 | 581 metaslab_class_destroy(spa->spa_log_class); |
582 spa->spa_log_class = NULL; | |
583 | |
1544 | 584 /* |
585 * If this was part of an import or the open otherwise failed, we may | |
586 * still have errors left in the queues. Empty them just in case. | |
587 */ | |
588 spa_errlog_drain(spa); | |
589 | |
590 avl_destroy(&spa->spa_errlist_scrub); | |
591 avl_destroy(&spa->spa_errlist_last); | |
592 | |
789 | 593 spa->spa_state = POOL_STATE_UNINITIALIZED; |
594 } | |
595 | |
596 /* | |
597 * Verify a pool configuration, and construct the vdev tree appropriately. This | |
598 * will create all the necessary vdevs in the appropriate layout, with each vdev | |
599 * in the CLOSED state. This will prep the pool before open/creation/import. | |
600 * All vdev validation is done by the vdev_alloc() routine. | |
601 */ | |
2082 | 602 static int |
603 spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, | |
604 uint_t id, int atype) | |
789 | 605 { |
606 nvlist_t **child; | |
607 uint_t c, children; | |
2082 | 608 int error; |
609 | |
610 if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) | |
611 return (error); | |
612 | |
613 if ((*vdp)->vdev_ops->vdev_op_leaf) | |
614 return (0); | |
789 | 615 |
616 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, | |
617 &child, &children) != 0) { | |
2082 | 618 vdev_free(*vdp); |
619 *vdp = NULL; | |
620 return (EINVAL); | |
789 | 621 } |
622 | |
623 for (c = 0; c < children; c++) { | |
2082 | 624 vdev_t *vd; |
625 if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, | |
626 atype)) != 0) { | |
627 vdev_free(*vdp); | |
628 *vdp = NULL; | |
629 return (error); | |
789 | 630 } |
631 } | |
632 | |
2082 | 633 ASSERT(*vdp != NULL); |
634 | |
635 return (0); | |
789 | 636 } |
637 | |
638 /* | |
639 * Opposite of spa_load(). | |
640 */ | |
641 static void | |
642 spa_unload(spa_t *spa) | |
643 { | |
2082 | 644 int i; |
645 | |
789 | 646 /* |
1544 | 647 * Stop async tasks. |
648 */ | |
649 spa_async_suspend(spa); | |
650 | |
651 /* | |
789 | 652 * Stop syncing. |
653 */ | |
654 if (spa->spa_sync_on) { | |
655 txg_sync_stop(spa->spa_dsl_pool); | |
656 spa->spa_sync_on = B_FALSE; | |
657 } | |
658 | |
659 /* | |
660 * Wait for any outstanding prefetch I/O to complete. | |
661 */ | |
1544 | 662 spa_config_enter(spa, RW_WRITER, FTAG); |
663 spa_config_exit(spa, FTAG); | |
789 | 664 |
665 /* | |
5450 | 666 * Drop and purge level 2 cache |
667 */ | |
668 spa_l2cache_drop(spa); | |
669 | |
670 /* | |
789 | 671 * Close the dsl pool. |
672 */ | |
673 if (spa->spa_dsl_pool) { | |
674 dsl_pool_close(spa->spa_dsl_pool); | |
675 spa->spa_dsl_pool = NULL; | |
676 } | |
677 | |
678 /* | |
679 * Close all vdevs. | |
680 */ | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
681 if (spa->spa_root_vdev) |
789 | 682 vdev_free(spa->spa_root_vdev); |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
683 ASSERT(spa->spa_root_vdev == NULL); |
1544 | 684 |
5450 | 685 for (i = 0; i < spa->spa_spares.sav_count; i++) |
686 vdev_free(spa->spa_spares.sav_vdevs[i]); | |
687 if (spa->spa_spares.sav_vdevs) { | |
688 kmem_free(spa->spa_spares.sav_vdevs, | |
689 spa->spa_spares.sav_count * sizeof (void *)); | |
690 spa->spa_spares.sav_vdevs = NULL; | |
691 } | |
692 if (spa->spa_spares.sav_config) { | |
693 nvlist_free(spa->spa_spares.sav_config); | |
694 spa->spa_spares.sav_config = NULL; | |
2082 | 695 } |
5450 | 696 |
697 for (i = 0; i < spa->spa_l2cache.sav_count; i++) | |
698 vdev_free(spa->spa_l2cache.sav_vdevs[i]); | |
699 if (spa->spa_l2cache.sav_vdevs) { | |
700 kmem_free(spa->spa_l2cache.sav_vdevs, | |
701 spa->spa_l2cache.sav_count * sizeof (void *)); | |
702 spa->spa_l2cache.sav_vdevs = NULL; | |
703 } | |
704 if (spa->spa_l2cache.sav_config) { | |
705 nvlist_free(spa->spa_l2cache.sav_config); | |
706 spa->spa_l2cache.sav_config = NULL; | |
2082 | 707 } |
708 | |
1544 | 709 spa->spa_async_suspended = 0; |
789 | 710 } |
711 | |
712 /* | |
2082 | 713 * Load (or re-load) the current list of vdevs describing the active spares for |
714 * this pool. When this is called, we have some form of basic information in | |
5450 | 715 * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and |
716 * then re-generate a more complete list including status information. | |
2082 | 717 */ |
718 static void | |
719 spa_load_spares(spa_t *spa) | |
720 { | |
721 nvlist_t **spares; | |
722 uint_t nspares; | |
723 int i; | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
724 vdev_t *vd, *tvd; |
2082 | 725 |
726 /* | |
727 * First, close and free any existing spare vdevs. | |
728 */ | |
5450 | 729 for (i = 0; i < spa->spa_spares.sav_count; i++) { |
730 vd = spa->spa_spares.sav_vdevs[i]; | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
731 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
732 /* Undo the call to spa_activate() below */ |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
733 if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL && |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
734 tvd->vdev_isspare) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
735 spa_spare_remove(tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
736 vdev_close(vd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
737 vdev_free(vd); |
2082 | 738 } |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
739 |
5450 | 740 if (spa->spa_spares.sav_vdevs) |
741 kmem_free(spa->spa_spares.sav_vdevs, | |
742 spa->spa_spares.sav_count * sizeof (void *)); | |
743 | |
744 if (spa->spa_spares.sav_config == NULL) | |
2082 | 745 nspares = 0; |
746 else | |
5450 | 747 VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, |
2082 | 748 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); |
749 | |
5450 | 750 spa->spa_spares.sav_count = (int)nspares; |
751 spa->spa_spares.sav_vdevs = NULL; | |
2082 | 752 |
753 if (nspares == 0) | |
754 return; | |
755 | |
756 /* | |
757 * Construct the array of vdevs, opening them to get status in the | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
758 * process. For each spare, there is potentially two different vdev_t |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
759 * structures associated with it: one in the list of spares (used only |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
760 * for basic validation purposes) and one in the active vdev |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
761 * configuration (if it's spared in). During this phase we open and |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
762 * validate each vdev on the spare list. If the vdev also exists in the |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
763 * active configuration, then we also mark this vdev as an active spare. |
2082 | 764 */ |
5450 | 765 spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), |
766 KM_SLEEP); | |
767 for (i = 0; i < spa->spa_spares.sav_count; i++) { | |
2082 | 768 VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, |
769 VDEV_ALLOC_SPARE) == 0); | |
770 ASSERT(vd != NULL); | |
771 | |
5450 | 772 spa->spa_spares.sav_vdevs[i] = vd; |
2082 | 773 |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
774 if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
775 if (!tvd->vdev_isspare) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
776 spa_spare_add(tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
777 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
778 /* |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
779 * We only mark the spare active if we were successfully |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
780 * able to load the vdev. Otherwise, importing a pool |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
781 * with a bad active spare would result in strange |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
782 * behavior, because multiple pool would think the spare |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
783 * is actively in use. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
784 * |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
785 * There is a vulnerability here to an equally bizarre |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
786 * circumstance, where a dead active spare is later |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
787 * brought back to life (onlined or otherwise). Given |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
788 * the rarity of this scenario, and the extra complexity |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
789 * it adds, we ignore the possibility. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
790 */ |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
791 if (!vdev_is_dead(tvd)) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
792 spa_spare_activate(tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
793 } |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
794 |
2082 | 795 if (vdev_open(vd) != 0) |
796 continue; | |
797 | |
798 vd->vdev_top = vd; | |
5450 | 799 if (vdev_validate_aux(vd) == 0) |
800 spa_spare_add(vd); | |
2082 | 801 } |
802 | |
803 /* | |
804 * Recompute the stashed list of spares, with status information | |
805 * this time. | |
806 */ | |
5450 | 807 VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, |
2082 | 808 DATA_TYPE_NVLIST_ARRAY) == 0); |
809 | |
5450 | 810 spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), |
811 KM_SLEEP); | |
812 for (i = 0; i < spa->spa_spares.sav_count; i++) | |
813 spares[i] = vdev_config_generate(spa, | |
814 spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); | |
815 VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, | |
816 ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); | |
817 for (i = 0; i < spa->spa_spares.sav_count; i++) | |
2082 | 818 nvlist_free(spares[i]); |
5450 | 819 kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); |
820 } | |
821 | |
822 /* | |
823 * Load (or re-load) the current list of vdevs describing the active l2cache for | |
824 * this pool. When this is called, we have some form of basic information in | |
825 * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and | |
826 * then re-generate a more complete list including status information. | |
827 * Devices which are already active have their details maintained, and are | |
828 * not re-opened. | |
829 */ | |
830 static void | |
831 spa_load_l2cache(spa_t *spa) | |
832 { | |
833 nvlist_t **l2cache; | |
834 uint_t nl2cache; | |
835 int i, j, oldnvdevs; | |
836 uint64_t guid; | |
837 vdev_t *vd, **oldvdevs, **newvdevs; | |
838 spa_aux_vdev_t *sav = &spa->spa_l2cache; | |
839 | |
840 if (sav->sav_config != NULL) { | |
841 VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, | |
842 ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); | |
843 newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); | |
844 } else { | |
845 nl2cache = 0; | |
846 } | |
847 | |
848 oldvdevs = sav->sav_vdevs; | |
849 oldnvdevs = sav->sav_count; | |
850 sav->sav_vdevs = NULL; | |
851 sav->sav_count = 0; | |
852 | |
853 /* | |
854 * Process new nvlist of vdevs. | |
855 */ | |
856 for (i = 0; i < nl2cache; i++) { | |
857 VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, | |
858 &guid) == 0); | |
859 | |
860 newvdevs[i] = NULL; | |
861 for (j = 0; j < oldnvdevs; j++) { | |
862 vd = oldvdevs[j]; | |
863 if (vd != NULL && guid == vd->vdev_guid) { | |
864 /* | |
865 * Retain previous vdev for add/remove ops. | |
866 */ | |
867 newvdevs[i] = vd; | |
868 oldvdevs[j] = NULL; | |
869 break; | |
870 } | |
871 } | |
872 | |
873 if (newvdevs[i] == NULL) { | |
874 /* | |
875 * Create new vdev | |
876 */ | |
877 VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, | |
878 VDEV_ALLOC_L2CACHE) == 0); | |
879 ASSERT(vd != NULL); | |
880 newvdevs[i] = vd; | |
881 | |
882 /* | |
883 * Commit this vdev as an l2cache device, | |
884 * even if it fails to open. | |
885 */ | |
886 spa_l2cache_add(vd); | |
887 | |
888 if (vdev_open(vd) != 0) | |
889 continue; | |
890 | |
891 vd->vdev_top = vd; | |
892 (void) vdev_validate_aux(vd); | |
893 | |
894 if (!vdev_is_dead(vd)) { | |
895 uint64_t size; | |
896 size = vdev_get_rsize(vd); | |
897 ASSERT3U(size, >, 0); | |
898 if (spa_mode & FWRITE) { | |
899 l2arc_add_vdev(spa, vd, | |
900 VDEV_LABEL_START_SIZE, | |
901 size - VDEV_LABEL_START_SIZE); | |
902 } | |
903 spa_l2cache_activate(vd); | |
904 } | |
905 } | |
906 } | |
907 | |
908 /* | |
909 * Purge vdevs that were dropped | |
910 */ | |
911 for (i = 0; i < oldnvdevs; i++) { | |
912 uint64_t pool; | |
913 | |
914 vd = oldvdevs[i]; | |
915 if (vd != NULL) { | |
916 if (spa_mode & FWRITE && | |
917 spa_l2cache_exists(vd->vdev_guid, &pool) && | |
918 pool != 0ULL) { | |
919 l2arc_remove_vdev(vd); | |
920 } | |
921 (void) vdev_close(vd); | |
922 spa_l2cache_remove(vd); | |
923 } | |
924 } | |
925 | |
926 if (oldvdevs) | |
927 kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); | |
928 | |
929 if (sav->sav_config == NULL) | |
930 goto out; | |
931 | |
932 sav->sav_vdevs = newvdevs; | |
933 sav->sav_count = (int)nl2cache; | |
934 | |
935 /* | |
936 * Recompute the stashed list of l2cache devices, with status | |
937 * information this time. | |
938 */ | |
939 VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, | |
940 DATA_TYPE_NVLIST_ARRAY) == 0); | |
941 | |
942 l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); | |
943 for (i = 0; i < sav->sav_count; i++) | |
944 l2cache[i] = vdev_config_generate(spa, | |
945 sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); | |
946 VERIFY(nvlist_add_nvlist_array(sav->sav_config, | |
947 ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); | |
948 out: | |
949 for (i = 0; i < sav->sav_count; i++) | |
950 nvlist_free(l2cache[i]); | |
951 if (sav->sav_count) | |
952 kmem_free(l2cache, sav->sav_count * sizeof (void *)); | |
2082 | 953 } |
954 | |
955 static int | |
956 load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) | |
957 { | |
958 dmu_buf_t *db; | |
959 char *packed = NULL; | |
960 size_t nvsize = 0; | |
961 int error; | |
962 *value = NULL; | |
963 | |
964 VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); | |
965 nvsize = *(uint64_t *)db->db_data; | |
966 dmu_buf_rele(db, FTAG); | |
967 | |
968 packed = kmem_alloc(nvsize, KM_SLEEP); | |
969 error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed); | |
970 if (error == 0) | |
971 error = nvlist_unpack(packed, nvsize, value, 0); | |
972 kmem_free(packed, nvsize); | |
973 | |
974 return (error); | |
975 } | |
976 | |
977 /* | |
4451 | 978 * Checks to see if the given vdev could not be opened, in which case we post a |
979 * sysevent to notify the autoreplace code that the device has been removed. | |
980 */ | |
981 static void | |
982 spa_check_removed(vdev_t *vd) | |
983 { | |
984 int c; | |
985 | |
986 for (c = 0; c < vd->vdev_children; c++) | |
987 spa_check_removed(vd->vdev_child[c]); | |
988 | |
989 if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { | |
990 zfs_post_autoreplace(vd->vdev_spa, vd); | |
991 spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); | |
992 } | |
993 } | |
994 | |
995 /* | |
789 | 996 * Load an existing storage pool, using the pool's builtin spa_config as a |
1544 | 997 * source of configuration information. |
789 | 998 */ |
999 static int | |
1544 | 1000 spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) |
789 | 1001 { |
1002 int error = 0; | |
1003 nvlist_t *nvroot = NULL; | |
1004 vdev_t *rvd; | |
1005 uberblock_t *ub = &spa->spa_uberblock; | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1006 uint64_t config_cache_txg = spa->spa_config_txg; |
789 | 1007 uint64_t pool_guid; |
2082 | 1008 uint64_t version; |
789 | 1009 zio_t *zio; |
4451 | 1010 uint64_t autoreplace = 0; |
789 | 1011 |
1544 | 1012 spa->spa_load_state = state; |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1013 |
789 | 1014 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || |
1733 | 1015 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { |
1544 | 1016 error = EINVAL; |
1017 goto out; | |
1018 } | |
789 | 1019 |
2082 | 1020 /* |
1021 * Versioning wasn't explicitly added to the label until later, so if | |
1022 * it's not present treat it as the initial version. | |
1023 */ | |
1024 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) | |
4577 | 1025 version = SPA_VERSION_INITIAL; |
2082 | 1026 |
1733 | 1027 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, |
1028 &spa->spa_config_txg); | |
1029 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1030 if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && |
1544 | 1031 spa_guid_exists(pool_guid, 0)) { |
1032 error = EEXIST; | |
1033 goto out; | |
1034 } | |
789 | 1035 |
2174
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
1036 spa->spa_load_guid = pool_guid; |
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
1037 |
789 | 1038 /* |
2082 | 1039 * Parse the configuration into a vdev tree. We explicitly set the |
1040 * value that will be returned by spa_version() since parsing the | |
1041 * configuration requires knowing the version number. | |
789 | 1042 */ |
1544 | 1043 spa_config_enter(spa, RW_WRITER, FTAG); |
2082 | 1044 spa->spa_ubsync.ub_version = version; |
1045 error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); | |
1544 | 1046 spa_config_exit(spa, FTAG); |
789 | 1047 |
2082 | 1048 if (error != 0) |
1544 | 1049 goto out; |
789 | 1050 |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1051 ASSERT(spa->spa_root_vdev == rvd); |
789 | 1052 ASSERT(spa_guid(spa) == pool_guid); |
1053 | |
1054 /* | |
1055 * Try to open all vdevs, loading each label in the process. | |
1056 */ | |
4070
4390ea390a1e
6386594 zdb message should be clearer when failing for lack of permissions
mc142369
parents:
3975
diff
changeset
|
1057 error = vdev_open(rvd); |
4390ea390a1e
6386594 zdb message should be clearer when failing for lack of permissions
mc142369
parents:
3975
diff
changeset
|
1058 if (error != 0) |
1544 | 1059 goto out; |
789 | 1060 |
1061 /* | |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1062 * Validate the labels for all leaf vdevs. We need to grab the config |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1063 * lock because all label I/O is done with the ZIO_FLAG_CONFIG_HELD |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1064 * flag. |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1065 */ |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1066 spa_config_enter(spa, RW_READER, FTAG); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1067 error = vdev_validate(rvd); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1068 spa_config_exit(spa, FTAG); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1069 |
4070
4390ea390a1e
6386594 zdb message should be clearer when failing for lack of permissions
mc142369
parents:
3975
diff
changeset
|
1070 if (error != 0) |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1071 goto out; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1072 |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1073 if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1074 error = ENXIO; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1075 goto out; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1076 } |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1077 |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1078 /* |
789 | 1079 * Find the best uberblock. |
1080 */ | |
1081 bzero(ub, sizeof (uberblock_t)); | |
1082 | |
1083 zio = zio_root(spa, NULL, NULL, | |
1084 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); | |
1085 vdev_uberblock_load(zio, rvd, ub); | |
1086 error = zio_wait(zio); | |
1087 | |
1088 /* | |
1089 * If we weren't able to find a single valid uberblock, return failure. | |
1090 */ | |
1091 if (ub->ub_txg == 0) { | |
1760 | 1092 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
1093 VDEV_AUX_CORRUPT_DATA); | |
1544 | 1094 error = ENXIO; |
1095 goto out; | |
1096 } | |
1097 | |
1098 /* | |
1099 * If the pool is newer than the code, we can't open it. | |
1100 */ | |
4577 | 1101 if (ub->ub_version > SPA_VERSION) { |
1760 | 1102 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
1103 VDEV_AUX_VERSION_NEWER); | |
1544 | 1104 error = ENOTSUP; |
1105 goto out; | |
789 | 1106 } |
1107 | |
1108 /* | |
1109 * If the vdev guid sum doesn't match the uberblock, we have an | |
1110 * incomplete configuration. | |
1111 */ | |
1732 | 1112 if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { |
1544 | 1113 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
1114 VDEV_AUX_BAD_GUID_SUM); | |
1115 error = ENXIO; | |
1116 goto out; | |
789 | 1117 } |
1118 | |
1119 /* | |
1120 * Initialize internal SPA structures. | |
1121 */ | |
1122 spa->spa_state = POOL_STATE_ACTIVE; | |
1123 spa->spa_ubsync = spa->spa_uberblock; | |
1124 spa->spa_first_txg = spa_last_synced_txg(spa) + 1; | |
1544 | 1125 error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); |
1126 if (error) { | |
1127 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1128 VDEV_AUX_CORRUPT_DATA); | |
1129 goto out; | |
1130 } | |
789 | 1131 spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; |
1132 | |
1544 | 1133 if (zap_lookup(spa->spa_meta_objset, |
789 | 1134 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, |
1544 | 1135 sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { |
1136 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1137 VDEV_AUX_CORRUPT_DATA); | |
1138 error = EIO; | |
1139 goto out; | |
1140 } | |
789 | 1141 |
1142 if (!mosconfig) { | |
2082 | 1143 nvlist_t *newconfig; |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1144 uint64_t hostid; |
2082 | 1145 |
1146 if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { | |
1544 | 1147 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
1148 VDEV_AUX_CORRUPT_DATA); | |
1149 error = EIO; | |
1150 goto out; | |
1151 } | |
789 | 1152 |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1153 if (nvlist_lookup_uint64(newconfig, ZPOOL_CONFIG_HOSTID, |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1154 &hostid) == 0) { |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1155 char *hostname; |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1156 unsigned long myhostid = 0; |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1157 |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1158 VERIFY(nvlist_lookup_string(newconfig, |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1159 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1160 |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1161 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); |
4178
ad95fd86760b
6553537 zfs root fails to boot from a snv_63+zfsboot-pfinstall netinstall image
lling
parents:
4070
diff
changeset
|
1162 if (hostid != 0 && myhostid != 0 && |
ad95fd86760b
6553537 zfs root fails to boot from a snv_63+zfsboot-pfinstall netinstall image
lling
parents:
4070
diff
changeset
|
1163 (unsigned long)hostid != myhostid) { |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1164 cmn_err(CE_WARN, "pool '%s' could not be " |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1165 "loaded as it was last accessed by " |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1166 "another system (host: %s hostid: 0x%lx). " |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1167 "See: http://www.sun.com/msg/ZFS-8000-EY", |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1168 spa->spa_name, hostname, |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1169 (unsigned long)hostid); |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1170 error = EBADF; |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1171 goto out; |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1172 } |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1173 } |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1174 |
789 | 1175 spa_config_set(spa, newconfig); |
1176 spa_unload(spa); | |
1177 spa_deactivate(spa); | |
1178 spa_activate(spa); | |
1179 | |
1544 | 1180 return (spa_load(spa, newconfig, state, B_TRUE)); |
1181 } | |
1182 | |
1183 if (zap_lookup(spa->spa_meta_objset, | |
1184 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, | |
1185 sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { | |
1186 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1187 VDEV_AUX_CORRUPT_DATA); | |
1188 error = EIO; | |
1189 goto out; | |
789 | 1190 } |
1191 | |
1544 | 1192 /* |
2082 | 1193 * Load the bit that tells us to use the new accounting function |
1194 * (raid-z deflation). If we have an older pool, this will not | |
1195 * be present. | |
1196 */ | |
1197 error = zap_lookup(spa->spa_meta_objset, | |
1198 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, | |
1199 sizeof (uint64_t), 1, &spa->spa_deflate); | |
1200 if (error != 0 && error != ENOENT) { | |
1201 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1202 VDEV_AUX_CORRUPT_DATA); | |
1203 error = EIO; | |
1204 goto out; | |
1205 } | |
1206 | |
1207 /* | |
1544 | 1208 * Load the persistent error log. If we have an older pool, this will |
1209 * not be present. | |
1210 */ | |
1211 error = zap_lookup(spa->spa_meta_objset, | |
1212 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, | |
1213 sizeof (uint64_t), 1, &spa->spa_errlog_last); | |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1214 if (error != 0 && error != ENOENT) { |
1544 | 1215 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
1216 VDEV_AUX_CORRUPT_DATA); | |
1217 error = EIO; | |
1218 goto out; | |
1219 } | |
1220 | |
1221 error = zap_lookup(spa->spa_meta_objset, | |
1222 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, | |
1223 sizeof (uint64_t), 1, &spa->spa_errlog_scrub); | |
1224 if (error != 0 && error != ENOENT) { | |
1225 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1226 VDEV_AUX_CORRUPT_DATA); | |
1227 error = EIO; | |
1228 goto out; | |
1229 } | |
789 | 1230 |
1231 /* | |
2926 | 1232 * Load the history object. If we have an older pool, this |
1233 * will not be present. | |
1234 */ | |
1235 error = zap_lookup(spa->spa_meta_objset, | |
1236 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, | |
1237 sizeof (uint64_t), 1, &spa->spa_history); | |
1238 if (error != 0 && error != ENOENT) { | |
1239 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1240 VDEV_AUX_CORRUPT_DATA); | |
1241 error = EIO; | |
1242 goto out; | |
1243 } | |
1244 | |
1245 /* | |
2082 | 1246 * Load any hot spares for this pool. |
1247 */ | |
1248 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, | |
5450 | 1249 DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); |
2082 | 1250 if (error != 0 && error != ENOENT) { |
1251 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1252 VDEV_AUX_CORRUPT_DATA); | |
1253 error = EIO; | |
1254 goto out; | |
1255 } | |
1256 if (error == 0) { | |
4577 | 1257 ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); |
5450 | 1258 if (load_nvlist(spa, spa->spa_spares.sav_object, |
1259 &spa->spa_spares.sav_config) != 0) { | |
2082 | 1260 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
1261 VDEV_AUX_CORRUPT_DATA); | |
1262 error = EIO; | |
1263 goto out; | |
1264 } | |
1265 | |
1266 spa_config_enter(spa, RW_WRITER, FTAG); | |
1267 spa_load_spares(spa); | |
1268 spa_config_exit(spa, FTAG); | |
1269 } | |
1270 | |
5450 | 1271 /* |
1272 * Load any level 2 ARC devices for this pool. | |
1273 */ | |
1274 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, | |
1275 DMU_POOL_L2CACHE, sizeof (uint64_t), 1, | |
1276 &spa->spa_l2cache.sav_object); | |
1277 if (error != 0 && error != ENOENT) { | |
1278 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1279 VDEV_AUX_CORRUPT_DATA); | |
1280 error = EIO; | |
1281 goto out; | |
1282 } | |
1283 if (error == 0) { | |
1284 ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); | |
1285 if (load_nvlist(spa, spa->spa_l2cache.sav_object, | |
1286 &spa->spa_l2cache.sav_config) != 0) { | |
1287 vdev_set_state(rvd, B_TRUE, | |
1288 VDEV_STATE_CANT_OPEN, | |
1289 VDEV_AUX_CORRUPT_DATA); | |
1290 error = EIO; | |
1291 goto out; | |
1292 } | |
1293 | |
1294 spa_config_enter(spa, RW_WRITER, FTAG); | |
1295 spa_load_l2cache(spa); | |
1296 spa_config_exit(spa, FTAG); | |
1297 } | |
1298 | |
5094 | 1299 spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); |
4543 | 1300 |
3912 | 1301 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, |
1302 DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); | |
1303 | |
1304 if (error && error != ENOENT) { | |
1305 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
1306 VDEV_AUX_CORRUPT_DATA); | |
1307 error = EIO; | |
1308 goto out; | |
1309 } | |
1310 | |
1311 if (error == 0) { | |
1312 (void) zap_lookup(spa->spa_meta_objset, | |
1313 spa->spa_pool_props_object, | |
4451 | 1314 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), |
3912 | 1315 sizeof (uint64_t), 1, &spa->spa_bootfs); |
4451 | 1316 (void) zap_lookup(spa->spa_meta_objset, |
1317 spa->spa_pool_props_object, | |
1318 zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), | |
1319 sizeof (uint64_t), 1, &autoreplace); | |
4543 | 1320 (void) zap_lookup(spa->spa_meta_objset, |
1321 spa->spa_pool_props_object, | |
1322 zpool_prop_to_name(ZPOOL_PROP_DELEGATION), | |
1323 sizeof (uint64_t), 1, &spa->spa_delegation); | |
5329 | 1324 (void) zap_lookup(spa->spa_meta_objset, |
1325 spa->spa_pool_props_object, | |
1326 zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), | |
1327 sizeof (uint64_t), 1, &spa->spa_failmode); | |
3912 | 1328 } |
1329 | |
2082 | 1330 /* |
4451 | 1331 * If the 'autoreplace' property is set, then post a resource notifying |
1332 * the ZFS DE that it should not issue any faults for unopenable | |
1333 * devices. We also iterate over the vdevs, and post a sysevent for any | |
1334 * unopenable vdevs so that the normal autoreplace handler can take | |
1335 * over. | |
1336 */ | |
1337 if (autoreplace) | |
1338 spa_check_removed(spa->spa_root_vdev); | |
1339 | |
1340 /* | |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1341 * Load the vdev state for all toplevel vdevs. |
789 | 1342 */ |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1343 vdev_load(rvd); |
789 | 1344 |
1345 /* | |
1346 * Propagate the leaf DTLs we just loaded all the way up the tree. | |
1347 */ | |
1544 | 1348 spa_config_enter(spa, RW_WRITER, FTAG); |
789 | 1349 vdev_dtl_reassess(rvd, 0, 0, B_FALSE); |
1544 | 1350 spa_config_exit(spa, FTAG); |
789 | 1351 |
1352 /* | |
1353 * Check the state of the root vdev. If it can't be opened, it | |
1354 * indicates one or more toplevel vdevs are faulted. | |
1355 */ | |
1544 | 1356 if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { |
1357 error = ENXIO; | |
1358 goto out; | |
1359 } | |
789 | 1360 |
1544 | 1361 if ((spa_mode & FWRITE) && state != SPA_LOAD_TRYIMPORT) { |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1362 dmu_tx_t *tx; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1363 int need_update = B_FALSE; |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1364 int c; |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1365 |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1366 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1367 * Claim log blocks that haven't been committed yet. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1368 * This must all happen in a single txg. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1369 */ |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1370 tx = dmu_tx_create_assigned(spa_get_dsl(spa), |
789 | 1371 spa_first_txg(spa)); |
2417 | 1372 (void) dmu_objset_find(spa->spa_name, |
1373 zil_claim, tx, DS_FIND_CHILDREN); | |
789 | 1374 dmu_tx_commit(tx); |
1375 | |
1376 spa->spa_sync_on = B_TRUE; | |
1377 txg_sync_start(spa->spa_dsl_pool); | |
1378 | |
1379 /* | |
1380 * Wait for all claims to sync. | |
1381 */ | |
1382 txg_wait_synced(spa->spa_dsl_pool, 0); | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1383 |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1384 /* |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1385 * If the config cache is stale, or we have uninitialized |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1386 * metaslabs (see spa_vdev_add()), then update the config. |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1387 */ |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1388 if (config_cache_txg != spa->spa_config_txg || |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1389 state == SPA_LOAD_IMPORT) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1390 need_update = B_TRUE; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1391 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1392 for (c = 0; c < rvd->vdev_children; c++) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1393 if (rvd->vdev_child[c]->vdev_ms_array == 0) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1394 need_update = B_TRUE; |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1395 |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1396 /* |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1397 * Update the config cache asychronously in case we're the |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1398 * root pool, in which case the config cache isn't writable yet. |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1399 */ |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1400 if (need_update) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1401 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); |
789 | 1402 } |
1403 | |
1544 | 1404 error = 0; |
1405 out: | |
2082 | 1406 if (error && error != EBADF) |
1544 | 1407 zfs_ereport_post(FM_EREPORT_ZFS_POOL, spa, NULL, NULL, 0, 0); |
1408 spa->spa_load_state = SPA_LOAD_NONE; | |
1409 spa->spa_ena = 0; | |
1410 | |
1411 return (error); | |
789 | 1412 } |
1413 | |
1414 /* | |
1415 * Pool Open/Import | |
1416 * | |
1417 * The import case is identical to an open except that the configuration is sent | |
1418 * down from userland, instead of grabbed from the configuration cache. For the | |
1419 * case of an open, the pool configuration will exist in the | |
4451 | 1420 * POOL_STATE_UNINITIALIZED state. |
789 | 1421 * |
1422 * The stats information (gen/count/ustats) is used to gather vdev statistics at | |
1423 * the same time open the pool, without having to keep around the spa_t in some | |
1424 * ambiguous state. | |
1425 */ | |
1426 static int | |
1427 spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) | |
1428 { | |
1429 spa_t *spa; | |
1430 int error; | |
1431 int loaded = B_FALSE; | |
1432 int locked = B_FALSE; | |
1433 | |
1434 *spapp = NULL; | |
1435 | |
1436 /* | |
1437 * As disgusting as this is, we need to support recursive calls to this | |
1438 * function because dsl_dir_open() is called during spa_load(), and ends | |
1439 * up calling spa_open() again. The real fix is to figure out how to | |
1440 * avoid dsl_dir_open() calling this in the first place. | |
1441 */ | |
1442 if (mutex_owner(&spa_namespace_lock) != curthread) { | |
1443 mutex_enter(&spa_namespace_lock); | |
1444 locked = B_TRUE; | |
1445 } | |
1446 | |
1447 if ((spa = spa_lookup(pool)) == NULL) { | |
1448 if (locked) | |
1449 mutex_exit(&spa_namespace_lock); | |
1450 return (ENOENT); | |
1451 } | |
1452 if (spa->spa_state == POOL_STATE_UNINITIALIZED) { | |
1453 | |
1454 spa_activate(spa); | |
1455 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1456 error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); |
789 | 1457 |
1458 if (error == EBADF) { | |
1459 /* | |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1460 * If vdev_validate() returns failure (indicated by |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1461 * EBADF), it indicates that one of the vdevs indicates |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1462 * that the pool has been exported or destroyed. If |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1463 * this is the case, the config cache is out of sync and |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1464 * we should remove the pool from the namespace. |
789 | 1465 */ |
2082 | 1466 zfs_post_ok(spa, NULL); |
789 | 1467 spa_unload(spa); |
1468 spa_deactivate(spa); | |
1469 spa_remove(spa); | |
1470 spa_config_sync(); | |
1471 if (locked) | |
1472 mutex_exit(&spa_namespace_lock); | |
1473 return (ENOENT); | |
1544 | 1474 } |
1475 | |
1476 if (error) { | |
789 | 1477 /* |
1478 * We can't open the pool, but we still have useful | |
1479 * information: the state of each vdev after the | |
1480 * attempted vdev_open(). Return this to the user. | |
1481 */ | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1482 if (config != NULL && spa->spa_root_vdev != NULL) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1483 spa_config_enter(spa, RW_READER, FTAG); |
789 | 1484 *config = spa_config_generate(spa, NULL, -1ULL, |
1485 B_TRUE); | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1486 spa_config_exit(spa, FTAG); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1487 } |
789 | 1488 spa_unload(spa); |
1489 spa_deactivate(spa); | |
1544 | 1490 spa->spa_last_open_failed = B_TRUE; |
789 | 1491 if (locked) |
1492 mutex_exit(&spa_namespace_lock); | |
1493 *spapp = NULL; | |
1494 return (error); | |
1544 | 1495 } else { |
1496 zfs_post_ok(spa, NULL); | |
1497 spa->spa_last_open_failed = B_FALSE; | |
789 | 1498 } |
1499 | |
1500 loaded = B_TRUE; | |
1501 } | |
1502 | |
1503 spa_open_ref(spa, tag); | |
4451 | 1504 |
1505 /* | |
1506 * If we just loaded the pool, resilver anything that's out of date. | |
1507 */ | |
1508 if (loaded && (spa_mode & FWRITE)) | |
1509 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); | |
1510 | |
789 | 1511 if (locked) |
1512 mutex_exit(&spa_namespace_lock); | |
1513 | |
1514 *spapp = spa; | |
1515 | |
1516 if (config != NULL) { | |
1544 | 1517 spa_config_enter(spa, RW_READER, FTAG); |
789 | 1518 *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); |
1544 | 1519 spa_config_exit(spa, FTAG); |
789 | 1520 } |
1521 | |
1522 return (0); | |
1523 } | |
1524 | |
1525 int | |
1526 spa_open(const char *name, spa_t **spapp, void *tag) | |
1527 { | |
1528 return (spa_open_common(name, spapp, tag, NULL)); | |
1529 } | |
1530 | |
1544 | 1531 /* |
1532 * Lookup the given spa_t, incrementing the inject count in the process, | |
1533 * preventing it from being exported or destroyed. | |
1534 */ | |
1535 spa_t * | |
1536 spa_inject_addref(char *name) | |
1537 { | |
1538 spa_t *spa; | |
1539 | |
1540 mutex_enter(&spa_namespace_lock); | |
1541 if ((spa = spa_lookup(name)) == NULL) { | |
1542 mutex_exit(&spa_namespace_lock); | |
1543 return (NULL); | |
1544 } | |
1545 spa->spa_inject_ref++; | |
1546 mutex_exit(&spa_namespace_lock); | |
1547 | |
1548 return (spa); | |
1549 } | |
1550 | |
1551 void | |
1552 spa_inject_delref(spa_t *spa) | |
1553 { | |
1554 mutex_enter(&spa_namespace_lock); | |
1555 spa->spa_inject_ref--; | |
1556 mutex_exit(&spa_namespace_lock); | |
1557 } | |
1558 | |
5450 | 1559 /* |
1560 * Add spares device information to the nvlist. | |
1561 */ | |
2082 | 1562 static void |
1563 spa_add_spares(spa_t *spa, nvlist_t *config) | |
1564 { | |
1565 nvlist_t **spares; | |
1566 uint_t i, nspares; | |
1567 nvlist_t *nvroot; | |
1568 uint64_t guid; | |
1569 vdev_stat_t *vs; | |
1570 uint_t vsc; | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1571 uint64_t pool; |
2082 | 1572 |
5450 | 1573 if (spa->spa_spares.sav_count == 0) |
2082 | 1574 return; |
1575 | |
1576 VERIFY(nvlist_lookup_nvlist(config, | |
1577 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); | |
5450 | 1578 VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, |
2082 | 1579 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); |
1580 if (nspares != 0) { | |
1581 VERIFY(nvlist_add_nvlist_array(nvroot, | |
1582 ZPOOL_CONFIG_SPARES, spares, nspares) == 0); | |
1583 VERIFY(nvlist_lookup_nvlist_array(nvroot, | |
1584 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); | |
1585 | |
1586 /* | |
1587 * Go through and find any spares which have since been | |
1588 * repurposed as an active spare. If this is the case, update | |
1589 * their status appropriately. | |
1590 */ | |
1591 for (i = 0; i < nspares; i++) { | |
1592 VERIFY(nvlist_lookup_uint64(spares[i], | |
1593 ZPOOL_CONFIG_GUID, &guid) == 0); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1594 if (spa_spare_exists(guid, &pool) && pool != 0ULL) { |
2082 | 1595 VERIFY(nvlist_lookup_uint64_array( |
1596 spares[i], ZPOOL_CONFIG_STATS, | |
1597 (uint64_t **)&vs, &vsc) == 0); | |
1598 vs->vs_state = VDEV_STATE_CANT_OPEN; | |
1599 vs->vs_aux = VDEV_AUX_SPARED; | |
1600 } | |
1601 } | |
1602 } | |
1603 } | |
1604 | |
5450 | 1605 /* |
1606 * Add l2cache device information to the nvlist, including vdev stats. | |
1607 */ | |
1608 static void | |
1609 spa_add_l2cache(spa_t *spa, nvlist_t *config) | |
1610 { | |
1611 nvlist_t **l2cache; | |
1612 uint_t i, j, nl2cache; | |
1613 nvlist_t *nvroot; | |
1614 uint64_t guid; | |
1615 vdev_t *vd; | |
1616 vdev_stat_t *vs; | |
1617 uint_t vsc; | |
1618 | |
1619 if (spa->spa_l2cache.sav_count == 0) | |
1620 return; | |
1621 | |
1622 spa_config_enter(spa, RW_READER, FTAG); | |
1623 | |
1624 VERIFY(nvlist_lookup_nvlist(config, | |
1625 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); | |
1626 VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, | |
1627 ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); | |
1628 if (nl2cache != 0) { | |
1629 VERIFY(nvlist_add_nvlist_array(nvroot, | |
1630 ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); | |
1631 VERIFY(nvlist_lookup_nvlist_array(nvroot, | |
1632 ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); | |
1633 | |
1634 /* | |
1635 * Update level 2 cache device stats. | |
1636 */ | |
1637 | |
1638 for (i = 0; i < nl2cache; i++) { | |
1639 VERIFY(nvlist_lookup_uint64(l2cache[i], | |
1640 ZPOOL_CONFIG_GUID, &guid) == 0); | |
1641 | |
1642 vd = NULL; | |
1643 for (j = 0; j < spa->spa_l2cache.sav_count; j++) { | |
1644 if (guid == | |
1645 spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { | |
1646 vd = spa->spa_l2cache.sav_vdevs[j]; | |
1647 break; | |
1648 } | |
1649 } | |
1650 ASSERT(vd != NULL); | |
1651 | |
1652 VERIFY(nvlist_lookup_uint64_array(l2cache[i], | |
1653 ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); | |
1654 vdev_get_stats(vd, vs); | |
1655 } | |
1656 } | |
1657 | |
1658 spa_config_exit(spa, FTAG); | |
1659 } | |
1660 | |
789 | 1661 int |
1544 | 1662 spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) |
789 | 1663 { |
1664 int error; | |
1665 spa_t *spa; | |
1666 | |
1667 *config = NULL; | |
1668 error = spa_open_common(name, &spa, FTAG, config); | |
1669 | |
2082 | 1670 if (spa && *config != NULL) { |
1544 | 1671 VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, |
1672 spa_get_errlog_size(spa)) == 0); | |
1673 | |
2082 | 1674 spa_add_spares(spa, *config); |
5450 | 1675 spa_add_l2cache(spa, *config); |
2082 | 1676 } |
1677 | |
1544 | 1678 /* |
1679 * We want to get the alternate root even for faulted pools, so we cheat | |
1680 * and call spa_lookup() directly. | |
1681 */ | |
1682 if (altroot) { | |
1683 if (spa == NULL) { | |
1684 mutex_enter(&spa_namespace_lock); | |
1685 spa = spa_lookup(name); | |
1686 if (spa) | |
1687 spa_altroot(spa, altroot, buflen); | |
1688 else | |
1689 altroot[0] = '\0'; | |
1690 spa = NULL; | |
1691 mutex_exit(&spa_namespace_lock); | |
1692 } else { | |
1693 spa_altroot(spa, altroot, buflen); | |
1694 } | |
1695 } | |
1696 | |
789 | 1697 if (spa != NULL) |
1698 spa_close(spa, FTAG); | |
1699 | |
1700 return (error); | |
1701 } | |
1702 | |
1703 /* | |
5450 | 1704 * Validate that the auxiliary device array is well formed. We must have an |
1705 * array of nvlists, each which describes a valid leaf vdev. If this is an | |
1706 * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be | |
1707 * specified, as long as they are well-formed. | |
2082 | 1708 */ |
1709 static int | |
5450 | 1710 spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, |
1711 spa_aux_vdev_t *sav, const char *config, uint64_t version, | |
1712 vdev_labeltype_t label) | |
2082 | 1713 { |
5450 | 1714 nvlist_t **dev; |
1715 uint_t i, ndev; | |
2082 | 1716 vdev_t *vd; |
1717 int error; | |
1718 | |
1719 /* | |
5450 | 1720 * It's acceptable to have no devs specified. |
2082 | 1721 */ |
5450 | 1722 if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) |
2082 | 1723 return (0); |
1724 | |
5450 | 1725 if (ndev == 0) |
2082 | 1726 return (EINVAL); |
1727 | |
1728 /* | |
5450 | 1729 * Make sure the pool is formatted with a version that supports this |
1730 * device type. | |
2082 | 1731 */ |
5450 | 1732 if (spa_version(spa) < version) |
2082 | 1733 return (ENOTSUP); |
1734 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1735 /* |
5450 | 1736 * Set the pending device list so we correctly handle device in-use |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1737 * checking. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1738 */ |
5450 | 1739 sav->sav_pending = dev; |
1740 sav->sav_npending = ndev; | |
1741 | |
1742 for (i = 0; i < ndev; i++) { | |
1743 if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, | |
2082 | 1744 mode)) != 0) |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1745 goto out; |
2082 | 1746 |
1747 if (!vd->vdev_ops->vdev_op_leaf) { | |
1748 vdev_free(vd); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1749 error = EINVAL; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1750 goto out; |
2082 | 1751 } |
1752 | |
5450 | 1753 /* |
1754 * The L2ARC currently only supports disk devices. | |
1755 */ | |
1756 if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && | |
1757 strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { | |
1758 error = ENOTBLK; | |
1759 goto out; | |
1760 } | |
1761 | |
2082 | 1762 vd->vdev_top = vd; |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1763 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1764 if ((error = vdev_open(vd)) == 0 && |
5450 | 1765 (error = vdev_label_init(vd, crtxg, label)) == 0) { |
1766 VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1767 vd->vdev_guid) == 0); |
2082 | 1768 } |
1769 | |
1770 vdev_free(vd); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1771 |
5450 | 1772 if (error && |
1773 (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1774 goto out; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1775 else |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1776 error = 0; |
2082 | 1777 } |
1778 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1779 out: |
5450 | 1780 sav->sav_pending = NULL; |
1781 sav->sav_npending = 0; | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1782 return (error); |
2082 | 1783 } |
1784 | |
5450 | 1785 static int |
1786 spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) | |
1787 { | |
1788 int error; | |
1789 | |
1790 if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, | |
1791 &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, | |
1792 VDEV_LABEL_SPARE)) != 0) { | |
1793 return (error); | |
1794 } | |
1795 | |
1796 return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, | |
1797 &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, | |
1798 VDEV_LABEL_L2CACHE)); | |
1799 } | |
1800 | |
1801 static void | |
1802 spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, | |
1803 const char *config) | |
1804 { | |
1805 int i; | |
1806 | |
1807 if (sav->sav_config != NULL) { | |
1808 nvlist_t **olddevs; | |
1809 uint_t oldndevs; | |
1810 nvlist_t **newdevs; | |
1811 | |
1812 /* | |
1813 * Generate new dev list by concatentating with the | |
1814 * current dev list. | |
1815 */ | |
1816 VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, | |
1817 &olddevs, &oldndevs) == 0); | |
1818 | |
1819 newdevs = kmem_alloc(sizeof (void *) * | |
1820 (ndevs + oldndevs), KM_SLEEP); | |
1821 for (i = 0; i < oldndevs; i++) | |
1822 VERIFY(nvlist_dup(olddevs[i], &newdevs[i], | |
1823 KM_SLEEP) == 0); | |
1824 for (i = 0; i < ndevs; i++) | |
1825 VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], | |
1826 KM_SLEEP) == 0); | |
1827 | |
1828 VERIFY(nvlist_remove(sav->sav_config, config, | |
1829 DATA_TYPE_NVLIST_ARRAY) == 0); | |
1830 | |
1831 VERIFY(nvlist_add_nvlist_array(sav->sav_config, | |
1832 config, newdevs, ndevs + oldndevs) == 0); | |
1833 for (i = 0; i < oldndevs + ndevs; i++) | |
1834 nvlist_free(newdevs[i]); | |
1835 kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); | |
1836 } else { | |
1837 /* | |
1838 * Generate a new dev list. | |
1839 */ | |
1840 VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, | |
1841 KM_SLEEP) == 0); | |
1842 VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, | |
1843 devs, ndevs) == 0); | |
1844 } | |
1845 } | |
1846 | |
1847 /* | |
1848 * Stop and drop level 2 ARC devices | |
1849 */ | |
1850 void | |
1851 spa_l2cache_drop(spa_t *spa) | |
1852 { | |
1853 vdev_t *vd; | |
1854 int i; | |
1855 spa_aux_vdev_t *sav = &spa->spa_l2cache; | |
1856 | |
1857 for (i = 0; i < sav->sav_count; i++) { | |
1858 uint64_t pool; | |
1859 | |
1860 vd = sav->sav_vdevs[i]; | |
1861 ASSERT(vd != NULL); | |
1862 | |
1863 if (spa_mode & FWRITE && | |
1864 spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL) { | |
1865 l2arc_remove_vdev(vd); | |
1866 } | |
1867 if (vd->vdev_isl2cache) | |
1868 spa_l2cache_remove(vd); | |
1869 vdev_clear_stats(vd); | |
1870 (void) vdev_close(vd); | |
1871 } | |
1872 } | |
1873 | |
2082 | 1874 /* |
789 | 1875 * Pool Creation |
1876 */ | |
1877 int | |
5094 | 1878 spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, |
4715
e8d212dda064
6535695 Panic: shpp->sh_eof == shpp->sh_pool_create_len, file: ../../common/fs/zfs/spa_history.c, line: 235
ek110237
parents:
4627
diff
changeset
|
1879 const char *history_str) |
789 | 1880 { |
1881 spa_t *spa; | |
5094 | 1882 char *altroot = NULL; |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1883 vdev_t *rvd; |
789 | 1884 dsl_pool_t *dp; |
1885 dmu_tx_t *tx; | |
2082 | 1886 int c, error = 0; |
789 | 1887 uint64_t txg = TXG_INITIAL; |
5450 | 1888 nvlist_t **spares, **l2cache; |
1889 uint_t nspares, nl2cache; | |
5094 | 1890 uint64_t version; |
789 | 1891 |
1892 /* | |
1893 * If this pool already exists, return failure. | |
1894 */ | |
1895 mutex_enter(&spa_namespace_lock); | |
1896 if (spa_lookup(pool) != NULL) { | |
1897 mutex_exit(&spa_namespace_lock); | |
1898 return (EEXIST); | |
1899 } | |
1900 | |
1901 /* | |
1902 * Allocate a new spa_t structure. | |
1903 */ | |
5094 | 1904 (void) nvlist_lookup_string(props, |
1905 zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1906 spa = spa_add(pool, altroot); |
789 | 1907 spa_activate(spa); |
1908 | |
1909 spa->spa_uberblock.ub_txg = txg - 1; | |
5094 | 1910 |
1911 if (props && (error = spa_prop_validate(spa, props))) { | |
1912 spa_unload(spa); | |
1913 spa_deactivate(spa); | |
1914 spa_remove(spa); | |
1915 return (error); | |
1916 } | |
1917 | |
1918 if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), | |
1919 &version) != 0) | |
1920 version = SPA_VERSION; | |
1921 ASSERT(version <= SPA_VERSION); | |
1922 spa->spa_uberblock.ub_version = version; | |
789 | 1923 spa->spa_ubsync = spa->spa_uberblock; |
1924 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1925 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1926 * Create the root vdev. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1927 */ |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1928 spa_config_enter(spa, RW_WRITER, FTAG); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1929 |
2082 | 1930 error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); |
1931 | |
1932 ASSERT(error != 0 || rvd != NULL); | |
1933 ASSERT(error != 0 || spa->spa_root_vdev == rvd); | |
1934 | |
1935 if (error == 0 && rvd->vdev_children == 0) | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1936 error = EINVAL; |
2082 | 1937 |
1938 if (error == 0 && | |
1939 (error = vdev_create(rvd, txg, B_FALSE)) == 0 && | |
5450 | 1940 (error = spa_validate_aux(spa, nvroot, txg, |
2082 | 1941 VDEV_ALLOC_ADD)) == 0) { |
1942 for (c = 0; c < rvd->vdev_children; c++) | |
1943 vdev_init(rvd->vdev_child[c], txg); | |
1944 vdev_config_dirty(rvd); | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1945 } |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1946 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1947 spa_config_exit(spa, FTAG); |
789 | 1948 |
2082 | 1949 if (error != 0) { |
789 | 1950 spa_unload(spa); |
1951 spa_deactivate(spa); | |
1952 spa_remove(spa); | |
1953 mutex_exit(&spa_namespace_lock); | |
1954 return (error); | |
1955 } | |
1956 | |
2082 | 1957 /* |
1958 * Get the list of spares, if specified. | |
1959 */ | |
1960 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, | |
1961 &spares, &nspares) == 0) { | |
5450 | 1962 VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, |
2082 | 1963 KM_SLEEP) == 0); |
5450 | 1964 VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, |
2082 | 1965 ZPOOL_CONFIG_SPARES, spares, nspares) == 0); |
1966 spa_config_enter(spa, RW_WRITER, FTAG); | |
1967 spa_load_spares(spa); | |
1968 spa_config_exit(spa, FTAG); | |
5450 | 1969 spa->spa_spares.sav_sync = B_TRUE; |
1970 } | |
1971 | |
1972 /* | |
1973 * Get the list of level 2 cache devices, if specified. | |
1974 */ | |
1975 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, | |
1976 &l2cache, &nl2cache) == 0) { | |
1977 VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, | |
1978 NV_UNIQUE_NAME, KM_SLEEP) == 0); | |
1979 VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, | |
1980 ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); | |
1981 spa_config_enter(spa, RW_WRITER, FTAG); | |
1982 spa_load_l2cache(spa); | |
1983 spa_config_exit(spa, FTAG); | |
1984 spa->spa_l2cache.sav_sync = B_TRUE; | |
2082 | 1985 } |
1986 | |
789 | 1987 spa->spa_dsl_pool = dp = dsl_pool_create(spa, txg); |
1988 spa->spa_meta_objset = dp->dp_meta_objset; | |
1989 | |
1990 tx = dmu_tx_create_assigned(dp, txg); | |
1991 | |
1992 /* | |
1993 * Create the pool config object. | |
1994 */ | |
1995 spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, | |
1996 DMU_OT_PACKED_NVLIST, 1 << 14, | |
1997 DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); | |
1998 | |
1544 | 1999 if (zap_add(spa->spa_meta_objset, |
789 | 2000 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, |
1544 | 2001 sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { |
2002 cmn_err(CE_PANIC, "failed to add pool config"); | |
2003 } | |
789 | 2004 |
5094 | 2005 /* Newly created pools with the right version are always deflated. */ |
2006 if (version >= SPA_VERSION_RAIDZ_DEFLATE) { | |
2007 spa->spa_deflate = TRUE; | |
2008 if (zap_add(spa->spa_meta_objset, | |
2009 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, | |
2010 sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { | |
2011 cmn_err(CE_PANIC, "failed to add deflate"); | |
2012 } | |
2082 | 2013 } |
2014 | |
789 | 2015 /* |
2016 * Create the deferred-free bplist object. Turn off compression | |
2017 * because sync-to-convergence takes longer if the blocksize | |
2018 * keeps changing. | |
2019 */ | |
2020 spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, | |
2021 1 << 14, tx); | |
2022 dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, | |
2023 ZIO_COMPRESS_OFF, tx); | |
2024 | |
1544 | 2025 if (zap_add(spa->spa_meta_objset, |
789 | 2026 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, |
1544 | 2027 sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { |
2028 cmn_err(CE_PANIC, "failed to add bplist"); | |
2029 } | |
789 | 2030 |
2926 | 2031 /* |
2032 * Create the pool's history object. | |
2033 */ | |
5094 | 2034 if (version >= SPA_VERSION_ZPOOL_HISTORY) |
2035 spa_history_create_obj(spa, tx); | |
2036 | |
2037 /* | |
2038 * Set pool properties. | |
2039 */ | |
2040 spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); | |
2041 spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); | |
5329 | 2042 spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); |
5094 | 2043 if (props) |
2044 spa_sync_props(spa, props, CRED(), tx); | |
2926 | 2045 |
789 | 2046 dmu_tx_commit(tx); |
2047 | |
2048 spa->spa_sync_on = B_TRUE; | |
2049 txg_sync_start(spa->spa_dsl_pool); | |
2050 | |
2051 /* | |
2052 * We explicitly wait for the first transaction to complete so that our | |
2053 * bean counters are appropriately updated. | |
2054 */ | |
2055 txg_wait_synced(spa->spa_dsl_pool, txg); | |
2056 | |
2057 spa_config_sync(); | |
2058 | |
5094 | 2059 if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) |
4715
e8d212dda064
6535695 Panic: shpp->sh_eof == shpp->sh_pool_create_len, file: ../../common/fs/zfs/spa_history.c, line: 235
ek110237
parents:
4627
diff
changeset
|
2060 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); |
e8d212dda064
6535695 Panic: shpp->sh_eof == shpp->sh_pool_create_len, file: ../../common/fs/zfs/spa_history.c, line: 235
ek110237
parents:
4627
diff
changeset
|
2061 |
789 | 2062 mutex_exit(&spa_namespace_lock); |
2063 | |
2064 return (0); | |
2065 } | |
2066 | |
2067 /* | |
2068 * Import the given pool into the system. We set up the necessary spa_t and | |
2069 * then call spa_load() to do the dirty work. | |
2070 */ | |
2071 int | |
5094 | 2072 spa_import(const char *pool, nvlist_t *config, nvlist_t *props) |
789 | 2073 { |
2074 spa_t *spa; | |
5094 | 2075 char *altroot = NULL; |
789 | 2076 int error; |
2082 | 2077 nvlist_t *nvroot; |
5450 | 2078 nvlist_t **spares, **l2cache; |
2079 uint_t nspares, nl2cache; | |
789 | 2080 |
2081 /* | |
2082 * If a pool with this name exists, return failure. | |
2083 */ | |
2084 mutex_enter(&spa_namespace_lock); | |
2085 if (spa_lookup(pool) != NULL) { | |
2086 mutex_exit(&spa_namespace_lock); | |
2087 return (EEXIST); | |
2088 } | |
2089 | |
2090 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2091 * Create and initialize the spa structure. |
789 | 2092 */ |
5094 | 2093 (void) nvlist_lookup_string(props, |
2094 zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2095 spa = spa_add(pool, altroot); |
789 | 2096 spa_activate(spa); |
2097 | |
2098 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2099 * Pass off the heavy lifting to spa_load(). |
1732 | 2100 * Pass TRUE for mosconfig because the user-supplied config |
2101 * is actually the one to trust when doing an import. | |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2102 */ |
1732 | 2103 error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); |
789 | 2104 |
2082 | 2105 spa_config_enter(spa, RW_WRITER, FTAG); |
2106 /* | |
2107 * Toss any existing sparelist, as it doesn't have any validity anymore, | |
2108 * and conflicts with spa_has_spare(). | |
2109 */ | |
5450 | 2110 if (spa->spa_spares.sav_config) { |
2111 nvlist_free(spa->spa_spares.sav_config); | |
2112 spa->spa_spares.sav_config = NULL; | |
2082 | 2113 spa_load_spares(spa); |
2114 } | |
5450 | 2115 if (spa->spa_l2cache.sav_config) { |
2116 nvlist_free(spa->spa_l2cache.sav_config); | |
2117 spa->spa_l2cache.sav_config = NULL; | |
2118 spa_load_l2cache(spa); | |
2119 } | |
2082 | 2120 |
2121 VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, | |
2122 &nvroot) == 0); | |
5450 | 2123 if (error == 0) |
2124 error = spa_validate_aux(spa, nvroot, -1ULL, VDEV_ALLOC_SPARE); | |
2125 if (error == 0) | |
2126 error = spa_validate_aux(spa, nvroot, -1ULL, | |
2127 VDEV_ALLOC_L2CACHE); | |
2082 | 2128 spa_config_exit(spa, FTAG); |
2129 | |
5094 | 2130 if (error != 0 || (props && (error = spa_prop_set(spa, props)))) { |
789 | 2131 spa_unload(spa); |
2132 spa_deactivate(spa); | |
2133 spa_remove(spa); | |
2134 mutex_exit(&spa_namespace_lock); | |
2135 return (error); | |
2136 } | |
2137 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2138 /* |
5450 | 2139 * Override any spares and level 2 cache devices as specified by |
2140 * the user, as these may have correct device names/devids, etc. | |
2082 | 2141 */ |
2142 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, | |
2143 &spares, &nspares) == 0) { | |
5450 | 2144 if (spa->spa_spares.sav_config) |
2145 VERIFY(nvlist_remove(spa->spa_spares.sav_config, | |
2082 | 2146 ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); |
2147 else | |
5450 | 2148 VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, |
2082 | 2149 NV_UNIQUE_NAME, KM_SLEEP) == 0); |
5450 | 2150 VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, |
2082 | 2151 ZPOOL_CONFIG_SPARES, spares, nspares) == 0); |
2152 spa_config_enter(spa, RW_WRITER, FTAG); | |
2153 spa_load_spares(spa); | |
2154 spa_config_exit(spa, FTAG); | |
5450 | 2155 spa->spa_spares.sav_sync = B_TRUE; |
2156 } | |
2157 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, | |
2158 &l2cache, &nl2cache) == 0) { | |
2159 if (spa->spa_l2cache.sav_config) | |
2160 VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, | |
2161 ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); | |
2162 else | |
2163 VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, | |
2164 NV_UNIQUE_NAME, KM_SLEEP) == 0); | |
2165 VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, | |
2166 ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); | |
2167 spa_config_enter(spa, RW_WRITER, FTAG); | |
2168 spa_load_l2cache(spa); | |
2169 spa_config_exit(spa, FTAG); | |
2170 spa->spa_l2cache.sav_sync = B_TRUE; | |
2082 | 2171 } |
2172 | |
2173 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2174 * Update the config cache to include the newly-imported pool. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2175 */ |
4627
c85631613c19
6509812 it would be useful if zdb(1M) could run on exported pools
ck153898
parents:
4577
diff
changeset
|
2176 if (spa_mode & FWRITE) |
c85631613c19
6509812 it would be useful if zdb(1M) could run on exported pools
ck153898
parents:
4577
diff
changeset
|
2177 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2178 |
789 | 2179 /* |
2180 * Resilver anything that's out of date. | |
2181 */ | |
2182 if (spa_mode & FWRITE) | |
2183 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); | |
2184 | |
4451 | 2185 mutex_exit(&spa_namespace_lock); |
2186 | |
789 | 2187 return (0); |
2188 } | |
2189 | |
2190 /* | |
2191 * This (illegal) pool name is used when temporarily importing a spa_t in order | |
2192 * to get the vdev stats associated with the imported devices. | |
2193 */ | |
2194 #define TRYIMPORT_NAME "$import" | |
2195 | |
2196 nvlist_t * | |
2197 spa_tryimport(nvlist_t *tryconfig) | |
2198 { | |
2199 nvlist_t *config = NULL; | |
2200 char *poolname; | |
2201 spa_t *spa; | |
2202 uint64_t state; | |
2203 | |
2204 if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) | |
2205 return (NULL); | |
2206 | |
2207 if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) | |
2208 return (NULL); | |
2209 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2210 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2211 * Create and initialize the spa structure. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2212 */ |
789 | 2213 mutex_enter(&spa_namespace_lock); |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2214 spa = spa_add(TRYIMPORT_NAME, NULL); |
789 | 2215 spa_activate(spa); |
2216 | |
2217 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2218 * Pass off the heavy lifting to spa_load(). |
1732 | 2219 * Pass TRUE for mosconfig because the user-supplied config |
2220 * is actually the one to trust when doing an import. | |
789 | 2221 */ |
1732 | 2222 (void) spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); |
789 | 2223 |
2224 /* | |
2225 * If 'tryconfig' was at least parsable, return the current config. | |
2226 */ | |
2227 if (spa->spa_root_vdev != NULL) { | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2228 spa_config_enter(spa, RW_READER, FTAG); |
789 | 2229 config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2230 spa_config_exit(spa, FTAG); |
789 | 2231 VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, |
2232 poolname) == 0); | |
2233 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, | |
2234 state) == 0); | |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
2235 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
2236 spa->spa_uberblock.ub_timestamp) == 0); |
2082 | 2237 |
2238 /* | |
5450 | 2239 * Add the list of hot spares and level 2 cache devices. |
2082 | 2240 */ |
2241 spa_add_spares(spa, config); | |
5450 | 2242 spa_add_l2cache(spa, config); |
789 | 2243 } |
2244 | |
2245 spa_unload(spa); | |
2246 spa_deactivate(spa); | |
2247 spa_remove(spa); | |
2248 mutex_exit(&spa_namespace_lock); | |
2249 | |
2250 return (config); | |
2251 } | |
2252 | |
2253 /* | |
2254 * Pool export/destroy | |
2255 * | |
2256 * The act of destroying or exporting a pool is very simple. We make sure there | |
2257 * is no more pending I/O and any references to the pool are gone. Then, we | |
2258 * update the pool state and sync all the labels to disk, removing the | |
2259 * configuration from the cache afterwards. | |
2260 */ | |
2261 static int | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2262 spa_export_common(char *pool, int new_state, nvlist_t **oldconfig) |
789 | 2263 { |
2264 spa_t *spa; | |
2265 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2266 if (oldconfig) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2267 *oldconfig = NULL; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2268 |
789 | 2269 if (!(spa_mode & FWRITE)) |
2270 return (EROFS); | |
2271 | |
2272 mutex_enter(&spa_namespace_lock); | |
2273 if ((spa = spa_lookup(pool)) == NULL) { | |
2274 mutex_exit(&spa_namespace_lock); | |
2275 return (ENOENT); | |
2276 } | |
2277 | |
2278 /* | |
1544 | 2279 * Put a hold on the pool, drop the namespace lock, stop async tasks, |
2280 * reacquire the namespace lock, and see if we can export. | |
2281 */ | |
2282 spa_open_ref(spa, FTAG); | |
2283 mutex_exit(&spa_namespace_lock); | |
2284 spa_async_suspend(spa); | |
2285 mutex_enter(&spa_namespace_lock); | |
2286 spa_close(spa, FTAG); | |
2287 | |
2288 /* | |
789 | 2289 * The pool will be in core if it's openable, |
2290 * in which case we can modify its state. | |
2291 */ | |
2292 if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { | |
2293 /* | |
2294 * Objsets may be open only because they're dirty, so we | |
2295 * have to force it to sync before checking spa_refcnt. | |
2296 */ | |
2297 spa_scrub_suspend(spa); | |
2298 txg_wait_synced(spa->spa_dsl_pool, 0); | |
2299 | |
1544 | 2300 /* |
2301 * A pool cannot be exported or destroyed if there are active | |
2302 * references. If we are resetting a pool, allow references by | |
2303 * fault injection handlers. | |
2304 */ | |
2305 if (!spa_refcount_zero(spa) || | |
2306 (spa->spa_inject_ref != 0 && | |
2307 new_state != POOL_STATE_UNINITIALIZED)) { | |
789 | 2308 spa_scrub_resume(spa); |
1544 | 2309 spa_async_resume(spa); |
789 | 2310 mutex_exit(&spa_namespace_lock); |
2311 return (EBUSY); | |
2312 } | |
2313 | |
2314 spa_scrub_resume(spa); | |
2315 VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); | |
2316 | |
2317 /* | |
2318 * We want this to be reflected on every label, | |
2319 * so mark them all dirty. spa_unload() will do the | |
2320 * final sync that pushes these changes out. | |
2321 */ | |
1544 | 2322 if (new_state != POOL_STATE_UNINITIALIZED) { |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2323 spa_config_enter(spa, RW_WRITER, FTAG); |
1544 | 2324 spa->spa_state = new_state; |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2325 spa->spa_final_txg = spa_last_synced_txg(spa) + 1; |
1544 | 2326 vdev_config_dirty(spa->spa_root_vdev); |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2327 spa_config_exit(spa, FTAG); |
1544 | 2328 } |
789 | 2329 } |
2330 | |
4451 | 2331 spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); |
2332 | |
789 | 2333 if (spa->spa_state != POOL_STATE_UNINITIALIZED) { |
2334 spa_unload(spa); | |
2335 spa_deactivate(spa); | |
2336 } | |
2337 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2338 if (oldconfig && spa->spa_config) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2339 VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2340 |
1544 | 2341 if (new_state != POOL_STATE_UNINITIALIZED) { |
5363 | 2342 spa_config_check(spa->spa_config_dir, |
2343 spa->spa_config_file); | |
1544 | 2344 spa_remove(spa); |
2345 spa_config_sync(); | |
2346 } | |
789 | 2347 mutex_exit(&spa_namespace_lock); |
2348 | |
2349 return (0); | |
2350 } | |
2351 | |
2352 /* | |
2353 * Destroy a storage pool. | |
2354 */ | |
2355 int | |
2356 spa_destroy(char *pool) | |
2357 { | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2358 return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL)); |
789 | 2359 } |
2360 | |
2361 /* | |
2362 * Export a storage pool. | |
2363 */ | |
2364 int | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2365 spa_export(char *pool, nvlist_t **oldconfig) |
789 | 2366 { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2367 return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig)); |
789 | 2368 } |
2369 | |
2370 /* | |
1544 | 2371 * Similar to spa_export(), this unloads the spa_t without actually removing it |
2372 * from the namespace in any way. | |
2373 */ | |
2374 int | |
2375 spa_reset(char *pool) | |
2376 { | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2377 return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL)); |
1544 | 2378 } |
2379 | |
2380 | |
2381 /* | |
789 | 2382 * ========================================================================== |
2383 * Device manipulation | |
2384 * ========================================================================== | |
2385 */ | |
2386 | |
2387 /* | |
4527 | 2388 * Add a device to a storage pool. |
789 | 2389 */ |
2390 int | |
2391 spa_vdev_add(spa_t *spa, nvlist_t *nvroot) | |
2392 { | |
2393 uint64_t txg; | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2394 int c, error; |
789 | 2395 vdev_t *rvd = spa->spa_root_vdev; |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2396 vdev_t *vd, *tvd; |
5450 | 2397 nvlist_t **spares, **l2cache; |
2398 uint_t nspares, nl2cache; | |
789 | 2399 |
2400 txg = spa_vdev_enter(spa); | |
2401 | |
2082 | 2402 if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, |
2403 VDEV_ALLOC_ADD)) != 0) | |
2404 return (spa_vdev_exit(spa, NULL, txg, error)); | |
2405 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2406 spa->spa_pending_vdev = vd; |
789 | 2407 |
5450 | 2408 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, |
2409 &nspares) != 0) | |
2082 | 2410 nspares = 0; |
2411 | |
5450 | 2412 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, |
2413 &nl2cache) != 0) | |
2414 nl2cache = 0; | |
2415 | |
2416 if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) { | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2417 spa->spa_pending_vdev = NULL; |
2082 | 2418 return (spa_vdev_exit(spa, vd, txg, EINVAL)); |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2419 } |
2082 | 2420 |
2421 if (vd->vdev_children != 0) { | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2422 if ((error = vdev_create(vd, txg, B_FALSE)) != 0) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2423 spa->spa_pending_vdev = NULL; |
2082 | 2424 return (spa_vdev_exit(spa, vd, txg, error)); |
2425 } | |
2426 } | |
2427 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2428 /* |
5450 | 2429 * We must validate the spares and l2cache devices after checking the |
2430 * children. Otherwise, vdev_inuse() will blindly overwrite the spare. | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2431 */ |
5450 | 2432 if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) { |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2433 spa->spa_pending_vdev = NULL; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2434 return (spa_vdev_exit(spa, vd, txg, error)); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2435 } |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2436 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2437 spa->spa_pending_vdev = NULL; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2438 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2439 /* |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2440 * Transfer each new top-level vdev from vd to rvd. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2441 */ |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2442 for (c = 0; c < vd->vdev_children; c++) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2443 tvd = vd->vdev_child[c]; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2444 vdev_remove_child(vd, tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2445 tvd->vdev_id = rvd->vdev_children; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2446 vdev_add_child(rvd, tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2447 vdev_config_dirty(tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2448 } |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2449 |
2082 | 2450 if (nspares != 0) { |
5450 | 2451 spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, |
2452 ZPOOL_CONFIG_SPARES); | |
2082 | 2453 spa_load_spares(spa); |
5450 | 2454 spa->spa_spares.sav_sync = B_TRUE; |
2455 } | |
2456 | |
2457 if (nl2cache != 0) { | |
2458 spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, | |
2459 ZPOOL_CONFIG_L2CACHE); | |
2460 spa_load_l2cache(spa); | |
2461 spa->spa_l2cache.sav_sync = B_TRUE; | |
789 | 2462 } |
2463 | |
2464 /* | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2465 * We have to be careful when adding new vdevs to an existing pool. |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2466 * If other threads start allocating from these vdevs before we |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2467 * sync the config cache, and we lose power, then upon reboot we may |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2468 * fail to open the pool because there are DVAs that the config cache |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2469 * can't translate. Therefore, we first add the vdevs without |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2470 * initializing metaslabs; sync the config cache (via spa_vdev_exit()); |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2471 * and then let spa_config_update() initialize the new metaslabs. |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2472 * |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2473 * spa_load() checks for added-but-not-initialized vdevs, so that |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2474 * if we lose power at any point in this sequence, the remaining |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2475 * steps will be completed the next time we load the pool. |
789 | 2476 */ |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2477 (void) spa_vdev_exit(spa, vd, txg, 0); |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2478 |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2479 mutex_enter(&spa_namespace_lock); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2480 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2481 mutex_exit(&spa_namespace_lock); |
789 | 2482 |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2483 return (0); |
789 | 2484 } |
2485 | |
2486 /* | |
2487 * Attach a device to a mirror. The arguments are the path to any device | |
2488 * in the mirror, and the nvroot for the new device. If the path specifies | |
2489 * a device that is not mirrored, we automatically insert the mirror vdev. | |
2490 * | |
2491 * If 'replacing' is specified, the new device is intended to replace the | |
2492 * existing device; in this case the two devices are made into their own | |
4451 | 2493 * mirror using the 'replacing' vdev, which is functionally identical to |
789 | 2494 * the mirror vdev (it actually reuses all the same ops) but has a few |
2495 * extra rules: you can't attach to it after it's been created, and upon | |
2496 * completion of resilvering, the first disk (the one being replaced) | |
2497 * is automatically detached. | |
2498 */ | |
2499 int | |
1544 | 2500 spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) |
789 | 2501 { |
2502 uint64_t txg, open_txg; | |
2503 int error; | |
2504 vdev_t *rvd = spa->spa_root_vdev; | |
2505 vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; | |
2082 | 2506 vdev_ops_t *pvops; |
4527 | 2507 int is_log; |
789 | 2508 |
2509 txg = spa_vdev_enter(spa); | |
2510 | |
1544 | 2511 oldvd = vdev_lookup_by_guid(rvd, guid); |
789 | 2512 |
2513 if (oldvd == NULL) | |
2514 return (spa_vdev_exit(spa, NULL, txg, ENODEV)); | |
2515 | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2516 if (!oldvd->vdev_ops->vdev_op_leaf) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2517 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2518 |
789 | 2519 pvd = oldvd->vdev_parent; |
2520 | |
2082 | 2521 if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, |
4451 | 2522 VDEV_ALLOC_ADD)) != 0) |
2523 return (spa_vdev_exit(spa, NULL, txg, EINVAL)); | |
2524 | |
2525 if (newrootvd->vdev_children != 1) | |
789 | 2526 return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); |
2527 | |
2528 newvd = newrootvd->vdev_child[0]; | |
2529 | |
2530 if (!newvd->vdev_ops->vdev_op_leaf) | |
2531 return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); | |
2532 | |
2082 | 2533 if ((error = vdev_create(newrootvd, txg, replacing)) != 0) |
789 | 2534 return (spa_vdev_exit(spa, newrootvd, txg, error)); |
2535 | |
4527 | 2536 /* |
2537 * Spares can't replace logs | |
2538 */ | |
2539 is_log = oldvd->vdev_islog; | |
2540 if (is_log && newvd->vdev_isspare) | |
2541 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | |
2542 | |
2082 | 2543 if (!replacing) { |
2544 /* | |
2545 * For attach, the only allowable parent is a mirror or the root | |
2546 * vdev. | |
2547 */ | |
2548 if (pvd->vdev_ops != &vdev_mirror_ops && | |
2549 pvd->vdev_ops != &vdev_root_ops) | |
2550 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | |
2551 | |
2552 pvops = &vdev_mirror_ops; | |
2553 } else { | |
2554 /* | |
2555 * Active hot spares can only be replaced by inactive hot | |
2556 * spares. | |
2557 */ | |
2558 if (pvd->vdev_ops == &vdev_spare_ops && | |
2559 pvd->vdev_child[1] == oldvd && | |
2560 !spa_has_spare(spa, newvd->vdev_guid)) | |
2561 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | |
2562 | |
2563 /* | |
2564 * If the source is a hot spare, and the parent isn't already a | |
2565 * spare, then we want to create a new hot spare. Otherwise, we | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2566 * want to create a replacing vdev. The user is not allowed to |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2567 * attach to a spared vdev child unless the 'isspare' state is |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2568 * the same (spare replaces spare, non-spare replaces |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2569 * non-spare). |
2082 | 2570 */ |
2571 if (pvd->vdev_ops == &vdev_replacing_ops) | |
2572 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2573 else if (pvd->vdev_ops == &vdev_spare_ops && |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2574 newvd->vdev_isspare != oldvd->vdev_isspare) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2575 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); |
2082 | 2576 else if (pvd->vdev_ops != &vdev_spare_ops && |
2577 newvd->vdev_isspare) | |
2578 pvops = &vdev_spare_ops; | |
2579 else | |
2580 pvops = &vdev_replacing_ops; | |
2581 } | |
2582 | |
1175
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
2583 /* |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
2584 * Compare the new device size with the replaceable/attachable |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
2585 * device size. |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
2586 */ |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
2587 if (newvd->vdev_psize < vdev_get_rsize(oldvd)) |
789 | 2588 return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); |
2589 | |
1732 | 2590 /* |
2591 * The new device cannot have a higher alignment requirement | |
2592 * than the top-level vdev. | |
2593 */ | |
2594 if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) | |
789 | 2595 return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); |
2596 | |
2597 /* | |
2598 * If this is an in-place replacement, update oldvd's path and devid | |
2599 * to make it distinguishable from newvd, and unopenable from now on. | |
2600 */ | |
2601 if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { | |
2602 spa_strfree(oldvd->vdev_path); | |
2603 oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, | |
2604 KM_SLEEP); | |
2605 (void) sprintf(oldvd->vdev_path, "%s/%s", | |
2606 newvd->vdev_path, "old"); | |
2607 if (oldvd->vdev_devid != NULL) { | |
2608 spa_strfree(oldvd->vdev_devid); | |
2609 oldvd->vdev_devid = NULL; | |
2610 } | |
2611 } | |
2612 | |
2613 /* | |
2082 | 2614 * If the parent is not a mirror, or if we're replacing, insert the new |
2615 * mirror/replacing/spare vdev above oldvd. | |
789 | 2616 */ |
2617 if (pvd->vdev_ops != pvops) | |
2618 pvd = vdev_add_parent(oldvd, pvops); | |
2619 | |
2620 ASSERT(pvd->vdev_top->vdev_parent == rvd); | |
2621 ASSERT(pvd->vdev_ops == pvops); | |
2622 ASSERT(oldvd->vdev_parent == pvd); | |
2623 | |
2624 /* | |
2625 * Extract the new device from its root and add it to pvd. | |
2626 */ | |
2627 vdev_remove_child(newrootvd, newvd); | |
2628 newvd->vdev_id = pvd->vdev_children; | |
2629 vdev_add_child(pvd, newvd); | |
2630 | |
1544 | 2631 /* |
2632 * If newvd is smaller than oldvd, but larger than its rsize, | |
2633 * the addition of newvd may have decreased our parent's asize. | |
2634 */ | |
2635 pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize); | |
2636 | |
789 | 2637 tvd = newvd->vdev_top; |
2638 ASSERT(pvd->vdev_top == tvd); | |
2639 ASSERT(tvd->vdev_parent == rvd); | |
2640 | |
2641 vdev_config_dirty(tvd); | |
2642 | |
2643 /* | |
2644 * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate | |
2645 * upward when spa_vdev_exit() calls vdev_dtl_reassess(). | |
2646 */ | |
2647 open_txg = txg + TXG_CONCURRENT_STATES - 1; | |
2648 | |
2649 mutex_enter(&newvd->vdev_dtl_lock); | |
2650 space_map_add(&newvd->vdev_dtl_map, TXG_INITIAL, | |
2651 open_txg - TXG_INITIAL + 1); | |
2652 mutex_exit(&newvd->vdev_dtl_lock); | |
2653 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2654 if (newvd->vdev_isspare) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2655 spa_spare_activate(newvd); |
1544 | 2656 |
789 | 2657 /* |
2658 * Mark newvd's DTL dirty in this txg. | |
2659 */ | |
1732 | 2660 vdev_dirty(tvd, VDD_DTL, newvd, txg); |
789 | 2661 |
2662 (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); | |
2663 | |
2664 /* | |
4451 | 2665 * Kick off a resilver to update newvd. We need to grab the namespace |
2666 * lock because spa_scrub() needs to post a sysevent with the pool name. | |
789 | 2667 */ |
4451 | 2668 mutex_enter(&spa_namespace_lock); |
789 | 2669 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); |
4451 | 2670 mutex_exit(&spa_namespace_lock); |
789 | 2671 |
2672 return (0); | |
2673 } | |
2674 | |
2675 /* | |
2676 * Detach a device from a mirror or replacing vdev. | |
2677 * If 'replace_done' is specified, only detach if the parent | |
2678 * is a replacing vdev. | |
2679 */ | |
2680 int | |
1544 | 2681 spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) |
789 | 2682 { |
2683 uint64_t txg; | |
2684 int c, t, error; | |
2685 vdev_t *rvd = spa->spa_root_vdev; | |
2686 vdev_t *vd, *pvd, *cvd, *tvd; | |
2082 | 2687 boolean_t unspare = B_FALSE; |
2688 uint64_t unspare_guid; | |
789 | 2689 |
2690 txg = spa_vdev_enter(spa); | |
2691 | |
1544 | 2692 vd = vdev_lookup_by_guid(rvd, guid); |
789 | 2693 |
2694 if (vd == NULL) | |
2695 return (spa_vdev_exit(spa, NULL, txg, ENODEV)); | |
2696 | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2697 if (!vd->vdev_ops->vdev_op_leaf) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2698 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2699 |
789 | 2700 pvd = vd->vdev_parent; |
2701 | |
2702 /* | |
2703 * If replace_done is specified, only remove this device if it's | |
2082 | 2704 * the first child of a replacing vdev. For the 'spare' vdev, either |
2705 * disk can be removed. | |
789 | 2706 */ |
2082 | 2707 if (replace_done) { |
2708 if (pvd->vdev_ops == &vdev_replacing_ops) { | |
2709 if (vd->vdev_id != 0) | |
2710 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); | |
2711 } else if (pvd->vdev_ops != &vdev_spare_ops) { | |
2712 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); | |
2713 } | |
2714 } | |
2715 | |
2716 ASSERT(pvd->vdev_ops != &vdev_spare_ops || | |
4577 | 2717 spa_version(spa) >= SPA_VERSION_SPARES); |
789 | 2718 |
2719 /* | |
2082 | 2720 * Only mirror, replacing, and spare vdevs support detach. |
789 | 2721 */ |
2722 if (pvd->vdev_ops != &vdev_replacing_ops && | |
2082 | 2723 pvd->vdev_ops != &vdev_mirror_ops && |
2724 pvd->vdev_ops != &vdev_spare_ops) | |
789 | 2725 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
2726 | |
2727 /* | |
2728 * If there's only one replica, you can't detach it. | |
2729 */ | |
2730 if (pvd->vdev_children <= 1) | |
2731 return (spa_vdev_exit(spa, NULL, txg, EBUSY)); | |
2732 | |
2733 /* | |
2734 * If all siblings have non-empty DTLs, this device may have the only | |
2735 * valid copy of the data, which means we cannot safely detach it. | |
2736 * | |
2737 * XXX -- as in the vdev_offline() case, we really want a more | |
2738 * precise DTL check. | |
2739 */ | |
2740 for (c = 0; c < pvd->vdev_children; c++) { | |
2741 uint64_t dirty; | |
2742 | |
2743 cvd = pvd->vdev_child[c]; | |
2744 if (cvd == vd) | |
2745 continue; | |
2746 if (vdev_is_dead(cvd)) | |
2747 continue; | |
2748 mutex_enter(&cvd->vdev_dtl_lock); | |
2749 dirty = cvd->vdev_dtl_map.sm_space | | |
2750 cvd->vdev_dtl_scrub.sm_space; | |
2751 mutex_exit(&cvd->vdev_dtl_lock); | |
2752 if (!dirty) | |
2753 break; | |
2754 } | |
2082 | 2755 |
2756 /* | |
2757 * If we are a replacing or spare vdev, then we can always detach the | |
2758 * latter child, as that is how one cancels the operation. | |
2759 */ | |
2760 if ((pvd->vdev_ops == &vdev_mirror_ops || vd->vdev_id != 1) && | |
2761 c == pvd->vdev_children) | |
789 | 2762 return (spa_vdev_exit(spa, NULL, txg, EBUSY)); |
2763 | |
2764 /* | |
2082 | 2765 * If we are detaching the original disk from a spare, then it implies |
2766 * that the spare should become a real disk, and be removed from the | |
2767 * active spare list for the pool. | |
2768 */ | |
2769 if (pvd->vdev_ops == &vdev_spare_ops && | |
2770 vd->vdev_id == 0) | |
2771 unspare = B_TRUE; | |
2772 | |
2773 /* | |
789 | 2774 * Erase the disk labels so the disk can be used for other things. |
2775 * This must be done after all other error cases are handled, | |
2776 * but before we disembowel vd (so we can still do I/O to it). | |
2777 * But if we can't do it, don't treat the error as fatal -- | |
2778 * it may be that the unwritability of the disk is the reason | |
2779 * it's being detached! | |
2780 */ | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2781 error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); |
789 | 2782 |
2783 /* | |
2784 * Remove vd from its parent and compact the parent's children. | |
2785 */ | |
2786 vdev_remove_child(pvd, vd); | |
2787 vdev_compact_children(pvd); | |
2788 | |
2789 /* | |
2790 * Remember one of the remaining children so we can get tvd below. | |
2791 */ | |
2792 cvd = pvd->vdev_child[0]; | |
2793 | |
2794 /* | |
2082 | 2795 * If we need to remove the remaining child from the list of hot spares, |
2796 * do it now, marking the vdev as no longer a spare in the process. We | |
2797 * must do this before vdev_remove_parent(), because that can change the | |
2798 * GUID if it creates a new toplevel GUID. | |
2799 */ | |
2800 if (unspare) { | |
2801 ASSERT(cvd->vdev_isspare); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2802 spa_spare_remove(cvd); |
2082 | 2803 unspare_guid = cvd->vdev_guid; |
2804 } | |
2805 | |
2806 /* | |
789 | 2807 * If the parent mirror/replacing vdev only has one child, |
2808 * the parent is no longer needed. Remove it from the tree. | |
2809 */ | |
2810 if (pvd->vdev_children == 1) | |
2811 vdev_remove_parent(cvd); | |
2812 | |
2813 /* | |
2814 * We don't set tvd until now because the parent we just removed | |
2815 * may have been the previous top-level vdev. | |
2816 */ | |
2817 tvd = cvd->vdev_top; | |
2818 ASSERT(tvd->vdev_parent == rvd); | |
2819 | |
2820 /* | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2821 * Reevaluate the parent vdev state. |
789 | 2822 */ |
4451 | 2823 vdev_propagate_state(cvd); |
789 | 2824 |
2825 /* | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2826 * If the device we just detached was smaller than the others, it may be |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2827 * possible to add metaslabs (i.e. grow the pool). vdev_metaslab_init() |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2828 * can't fail because the existing metaslabs are already in core, so |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2829 * there's nothing to read from disk. |
789 | 2830 */ |
1732 | 2831 VERIFY(vdev_metaslab_init(tvd, txg) == 0); |
789 | 2832 |
2833 vdev_config_dirty(tvd); | |
2834 | |
2835 /* | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2836 * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2837 * vd->vdev_detached is set and free vd's DTL object in syncing context. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2838 * But first make sure we're not on any *other* txg's DTL list, to |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2839 * prevent vd from being accessed after it's freed. |
789 | 2840 */ |
2841 for (t = 0; t < TXG_SIZE; t++) | |
2842 (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); | |
1732 | 2843 vd->vdev_detached = B_TRUE; |
2844 vdev_dirty(tvd, VDD_DTL, vd, txg); | |
789 | 2845 |
4451 | 2846 spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); |
2847 | |
2082 | 2848 error = spa_vdev_exit(spa, vd, txg, 0); |
2849 | |
2850 /* | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2851 * If this was the removal of the original device in a hot spare vdev, |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2852 * then we want to go through and remove the device from the hot spare |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2853 * list of every other pool. |
2082 | 2854 */ |
2855 if (unspare) { | |
2856 spa = NULL; | |
2857 mutex_enter(&spa_namespace_lock); | |
2858 while ((spa = spa_next(spa)) != NULL) { | |
2859 if (spa->spa_state != POOL_STATE_ACTIVE) | |
2860 continue; | |
2861 | |
2862 (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); | |
2863 } | |
2864 mutex_exit(&spa_namespace_lock); | |
2865 } | |
2866 | |
2867 return (error); | |
2868 } | |
2869 | |
2870 /* | |
5450 | 2871 * Remove a spares vdev from the nvlist config. |
2082 | 2872 */ |
5450 | 2873 static int |
2874 spa_remove_spares(spa_aux_vdev_t *sav, uint64_t guid, boolean_t unspare, | |
2875 nvlist_t **spares, int nspares, vdev_t *vd) | |
2082 | 2876 { |
5450 | 2877 nvlist_t *nv, **newspares; |
2878 int i, j; | |
2082 | 2879 |
2880 nv = NULL; | |
5450 | 2881 for (i = 0; i < nspares; i++) { |
2882 uint64_t theguid; | |
2883 | |
2884 VERIFY(nvlist_lookup_uint64(spares[i], | |
2885 ZPOOL_CONFIG_GUID, &theguid) == 0); | |
2886 if (theguid == guid) { | |
2887 nv = spares[i]; | |
2888 break; | |
2082 | 2889 } |
2890 } | |
2891 | |
2892 /* | |
5450 | 2893 * Only remove the hot spare if it's not currently in use in this pool. |
2082 | 2894 */ |
5450 | 2895 if (nv == NULL && vd == NULL) |
2896 return (ENOENT); | |
2897 | |
2898 if (nv == NULL && vd != NULL) | |
2899 return (ENOTSUP); | |
2900 | |
2901 if (!unspare && nv != NULL && vd != NULL) | |
2902 return (EBUSY); | |
2082 | 2903 |
2904 if (nspares == 1) { | |
2905 newspares = NULL; | |
2906 } else { | |
2907 newspares = kmem_alloc((nspares - 1) * sizeof (void *), | |
2908 KM_SLEEP); | |
2909 for (i = 0, j = 0; i < nspares; i++) { | |
2910 if (spares[i] != nv) | |
2911 VERIFY(nvlist_dup(spares[i], | |
2912 &newspares[j++], KM_SLEEP) == 0); | |
2913 } | |
2914 } | |
2915 | |
5450 | 2916 VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_SPARES, |
2082 | 2917 DATA_TYPE_NVLIST_ARRAY) == 0); |
5450 | 2918 VERIFY(nvlist_add_nvlist_array(sav->sav_config, |
2919 ZPOOL_CONFIG_SPARES, newspares, nspares - 1) == 0); | |
2082 | 2920 for (i = 0; i < nspares - 1; i++) |
2921 nvlist_free(newspares[i]); | |
2922 kmem_free(newspares, (nspares - 1) * sizeof (void *)); | |
5450 | 2923 |
2924 return (0); | |
2925 } | |
2926 | |
2927 /* | |
2928 * Remove an l2cache vdev from the nvlist config. | |
2929 */ | |
2930 static int | |
2931 spa_remove_l2cache(spa_aux_vdev_t *sav, uint64_t guid, nvlist_t **l2cache, | |
2932 int nl2cache, vdev_t *vd) | |
2933 { | |
2934 nvlist_t *nv, **newl2cache; | |
2935 int i, j; | |
2936 | |
2937 nv = NULL; | |
2938 for (i = 0; i < nl2cache; i++) { | |
2939 uint64_t theguid; | |
2940 | |
2941 VERIFY(nvlist_lookup_uint64(l2cache[i], | |
2942 ZPOOL_CONFIG_GUID, &theguid) == 0); | |
2943 if (theguid == guid) { | |
2944 nv = l2cache[i]; | |
2945 break; | |
2946 } | |
2947 } | |
2948 | |
2949 if (vd == NULL) { | |
2950 for (i = 0; i < nl2cache; i++) { | |
2951 if (sav->sav_vdevs[i]->vdev_guid == guid) { | |
2952 vd = sav->sav_vdevs[i]; | |
2953 break; | |
2954 } | |
2955 } | |
2956 } | |
2957 | |
2958 if (nv == NULL && vd == NULL) | |
2959 return (ENOENT); | |
2960 | |
2961 if (nv == NULL && vd != NULL) | |
2962 return (ENOTSUP); | |
2963 | |
2964 if (nl2cache == 1) { | |
2965 newl2cache = NULL; | |
2966 } else { | |
2967 newl2cache = kmem_alloc((nl2cache - 1) * sizeof (void *), | |
2968 KM_SLEEP); | |
2969 for (i = 0, j = 0; i < nl2cache; i++) { | |
2970 if (l2cache[i] != nv) | |
2971 VERIFY(nvlist_dup(l2cache[i], | |
2972 &newl2cache[j++], KM_SLEEP) == 0); | |
2973 } | |
2974 } | |
2975 | |
2976 VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, | |
2977 DATA_TYPE_NVLIST_ARRAY) == 0); | |
2978 VERIFY(nvlist_add_nvlist_array(sav->sav_config, | |
2979 ZPOOL_CONFIG_L2CACHE, newl2cache, nl2cache - 1) == 0); | |
2980 for (i = 0; i < nl2cache - 1; i++) | |
2981 nvlist_free(newl2cache[i]); | |
2982 kmem_free(newl2cache, (nl2cache - 1) * sizeof (void *)); | |
2983 | |
2984 return (0); | |
2985 } | |
2986 | |
2987 /* | |
2988 * Remove a device from the pool. Currently, this supports removing only hot | |
2989 * spares and level 2 ARC devices. | |
2990 */ | |
2991 int | |
2992 spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) | |
2993 { | |
2994 vdev_t *vd; | |
2995 nvlist_t **spares, **l2cache; | |
2996 uint_t nspares, nl2cache; | |
2997 int error = 0; | |
2998 | |
2999 spa_config_enter(spa, RW_WRITER, FTAG); | |
3000 | |
3001 vd = spa_lookup_by_guid(spa, guid); | |
3002 | |
3003 if (spa->spa_spares.sav_vdevs != NULL && | |
3004 spa_spare_exists(guid, NULL) && | |
3005 nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, | |
3006 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { | |
3007 if ((error = spa_remove_spares(&spa->spa_spares, guid, unspare, | |
3008 spares, nspares, vd)) != 0) | |
3009 goto out; | |
3010 spa_load_spares(spa); | |
3011 spa->spa_spares.sav_sync = B_TRUE; | |
3012 goto out; | |
3013 } | |
3014 | |
3015 if (spa->spa_l2cache.sav_vdevs != NULL && | |
3016 spa_l2cache_exists(guid, NULL) && | |
3017 nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, | |
3018 ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) { | |
3019 if ((error = spa_remove_l2cache(&spa->spa_l2cache, guid, | |
3020 l2cache, nl2cache, vd)) != 0) | |
3021 goto out; | |
3022 spa_load_l2cache(spa); | |
3023 spa->spa_l2cache.sav_sync = B_TRUE; | |
3024 } | |
2082 | 3025 |
3026 out: | |
3027 spa_config_exit(spa, FTAG); | |
5450 | 3028 return (error); |
789 | 3029 } |
3030 | |
3031 /* | |
4451 | 3032 * Find any device that's done replacing, or a vdev marked 'unspare' that's |
3033 * current spared, so we can detach it. | |
789 | 3034 */ |
1544 | 3035 static vdev_t * |
4451 | 3036 spa_vdev_resilver_done_hunt(vdev_t *vd) |
789 | 3037 { |
1544 | 3038 vdev_t *newvd, *oldvd; |
789 | 3039 int c; |
3040 | |
1544 | 3041 for (c = 0; c < vd->vdev_children; c++) { |
4451 | 3042 oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); |
1544 | 3043 if (oldvd != NULL) |
3044 return (oldvd); | |
3045 } | |
789 | 3046 |
4451 | 3047 /* |
3048 * Check for a completed replacement. | |
3049 */ | |
789 | 3050 if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { |
1544 | 3051 oldvd = vd->vdev_child[0]; |
3052 newvd = vd->vdev_child[1]; | |
789 | 3053 |
1544 | 3054 mutex_enter(&newvd->vdev_dtl_lock); |
3055 if (newvd->vdev_dtl_map.sm_space == 0 && | |
3056 newvd->vdev_dtl_scrub.sm_space == 0) { | |
3057 mutex_exit(&newvd->vdev_dtl_lock); | |
3058 return (oldvd); | |
3059 } | |
3060 mutex_exit(&newvd->vdev_dtl_lock); | |
3061 } | |
789 | 3062 |
4451 | 3063 /* |
3064 * Check for a completed resilver with the 'unspare' flag set. | |
3065 */ | |
3066 if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { | |
3067 newvd = vd->vdev_child[0]; | |
3068 oldvd = vd->vdev_child[1]; | |
3069 | |
3070 mutex_enter(&newvd->vdev_dtl_lock); | |
3071 if (newvd->vdev_unspare && | |
3072 newvd->vdev_dtl_map.sm_space == 0 && | |
3073 newvd->vdev_dtl_scrub.sm_space == 0) { | |
3074 newvd->vdev_unspare = 0; | |
3075 mutex_exit(&newvd->vdev_dtl_lock); | |
3076 return (oldvd); | |
3077 } | |
3078 mutex_exit(&newvd->vdev_dtl_lock); | |
3079 } | |
3080 | |
1544 | 3081 return (NULL); |
789 | 3082 } |
3083 | |
1544 | 3084 static void |
4451 | 3085 spa_vdev_resilver_done(spa_t *spa) |
789 | 3086 { |
1544 | 3087 vdev_t *vd; |
2082 | 3088 vdev_t *pvd; |
1544 | 3089 uint64_t guid; |
2082 | 3090 uint64_t pguid = 0; |
789 | 3091 |
1544 | 3092 spa_config_enter(spa, RW_READER, FTAG); |
789 | 3093 |
4451 | 3094 while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { |
1544 | 3095 guid = vd->vdev_guid; |
2082 | 3096 /* |
3097 * If we have just finished replacing a hot spared device, then | |
3098 * we need to detach the parent's first child (the original hot | |
3099 * spare) as well. | |
3100 */ | |
3101 pvd = vd->vdev_parent; | |
3102 if (pvd->vdev_parent->vdev_ops == &vdev_spare_ops && | |
3103 pvd->vdev_id == 0) { | |
3104 ASSERT(pvd->vdev_ops == &vdev_replacing_ops); | |
3105 ASSERT(pvd->vdev_parent->vdev_children == 2); | |
3106 pguid = pvd->vdev_parent->vdev_child[1]->vdev_guid; | |
3107 } | |
1544 | 3108 spa_config_exit(spa, FTAG); |
3109 if (spa_vdev_detach(spa, guid, B_TRUE) != 0) | |
3110 return; | |
2082 | 3111 if (pguid != 0 && spa_vdev_detach(spa, pguid, B_TRUE) != 0) |
3112 return; | |
1544 | 3113 spa_config_enter(spa, RW_READER, FTAG); |
789 | 3114 } |
3115 | |
1544 | 3116 spa_config_exit(spa, FTAG); |
789 | 3117 } |
3118 | |
3119 /* | |
1354
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3120 * Update the stored path for this vdev. Dirty the vdev configuration, relying |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3121 * on spa_vdev_enter/exit() to synchronize the labels and cache. |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3122 */ |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3123 int |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3124 spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3125 { |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3126 vdev_t *rvd, *vd; |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3127 uint64_t txg; |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3128 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3129 rvd = spa->spa_root_vdev; |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3130 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3131 txg = spa_vdev_enter(spa); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3132 |
2082 | 3133 if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) { |
3134 /* | |
5450 | 3135 * Determine if this is a reference to a hot spare or l2cache |
3136 * device. If it is, update the path as stored in their | |
3137 * device list. | |
2082 | 3138 */ |
5450 | 3139 nvlist_t **spares, **l2cache; |
3140 uint_t i, nspares, nl2cache; | |
3141 | |
3142 if (spa->spa_spares.sav_config != NULL) { | |
3143 VERIFY(nvlist_lookup_nvlist_array( | |
3144 spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, | |
3145 &spares, &nspares) == 0); | |
2082 | 3146 for (i = 0; i < nspares; i++) { |
3147 uint64_t theguid; | |
3148 VERIFY(nvlist_lookup_uint64(spares[i], | |
3149 ZPOOL_CONFIG_GUID, &theguid) == 0); | |
5450 | 3150 if (theguid == guid) { |
3151 VERIFY(nvlist_add_string(spares[i], | |
3152 ZPOOL_CONFIG_PATH, newpath) == 0); | |
3153 spa_load_spares(spa); | |
3154 spa->spa_spares.sav_sync = B_TRUE; | |
3155 return (spa_vdev_exit(spa, NULL, txg, | |
3156 0)); | |
3157 } | |
2082 | 3158 } |
3159 } | |
5450 | 3160 |
3161 if (spa->spa_l2cache.sav_config != NULL) { | |
3162 VERIFY(nvlist_lookup_nvlist_array( | |
3163 spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, | |
3164 &l2cache, &nl2cache) == 0); | |
3165 for (i = 0; i < nl2cache; i++) { | |
3166 uint64_t theguid; | |
3167 VERIFY(nvlist_lookup_uint64(l2cache[i], | |
3168 ZPOOL_CONFIG_GUID, &theguid) == 0); | |
3169 if (theguid == guid) { | |
3170 VERIFY(nvlist_add_string(l2cache[i], | |
3171 ZPOOL_CONFIG_PATH, newpath) == 0); | |
3172 spa_load_l2cache(spa); | |
3173 spa->spa_l2cache.sav_sync = B_TRUE; | |
3174 return (spa_vdev_exit(spa, NULL, txg, | |
3175 0)); | |
3176 } | |
3177 } | |
3178 } | |
3179 | |
3180 return (spa_vdev_exit(spa, NULL, txg, ENOENT)); | |
2082 | 3181 } |
1354
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3182 |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
3183 if (!vd->vdev_ops->vdev_op_leaf) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
3184 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
3185 |
1354
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3186 spa_strfree(vd->vdev_path); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3187 vd->vdev_path = spa_strdup(newpath); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3188 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3189 vdev_config_dirty(vd->vdev_top); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3190 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3191 return (spa_vdev_exit(spa, NULL, txg, 0)); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3192 } |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3193 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
3194 /* |
789 | 3195 * ========================================================================== |
3196 * SPA Scrubbing | |
3197 * ========================================================================== | |
3198 */ | |
3199 | |
3200 static void | |
3201 spa_scrub_io_done(zio_t *zio) | |
3202 { | |
3203 spa_t *spa = zio->io_spa; | |
3204 | |
4309
3dfde0f4662d
6542676 ARC needs to track meta-data memory overhead
maybee
parents:
4178
diff
changeset
|
3205 arc_data_buf_free(zio->io_data, zio->io_size); |
789 | 3206 |
3207 mutex_enter(&spa->spa_scrub_lock); | |
1544 | 3208 if (zio->io_error && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3209 vdev_t *vd = zio->io_vd ? zio->io_vd : spa->spa_root_vdev; |
789 | 3210 spa->spa_scrub_errors++; |
3211 mutex_enter(&vd->vdev_stat_lock); | |
3212 vd->vdev_stat.vs_scrub_errors++; | |
3213 mutex_exit(&vd->vdev_stat_lock); | |
3214 } | |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
3215 |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
3216 if (--spa->spa_scrub_inflight < spa->spa_scrub_maxinflight) |
1544 | 3217 cv_broadcast(&spa->spa_scrub_io_cv); |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
3218 |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
3219 ASSERT(spa->spa_scrub_inflight >= 0); |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
3220 |
1544 | 3221 mutex_exit(&spa->spa_scrub_lock); |
789 | 3222 } |
3223 | |
3224 static void | |
1544 | 3225 spa_scrub_io_start(spa_t *spa, blkptr_t *bp, int priority, int flags, |
3226 zbookmark_t *zb) | |
789 | 3227 { |
3228 size_t size = BP_GET_LSIZE(bp); | |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
3229 void *data; |
789 | 3230 |
3231 mutex_enter(&spa->spa_scrub_lock); | |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
3232 /* |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
3233 * Do not give too much work to vdev(s). |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
3234 */ |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
3235 while (spa->spa_scrub_inflight >= spa->spa_scrub_maxinflight) { |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
3236 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
3237 } |
789 | 3238 spa->spa_scrub_inflight++; |
3239 mutex_exit(&spa->spa_scrub_lock); | |
3240 | |
4309
3dfde0f4662d
6542676 ARC needs to track meta-data memory overhead
maybee
parents:
4178
diff
changeset
|
3241 data = arc_data_buf_alloc(size); |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
3242 |
1544 | 3243 if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET) |
3244 flags |= ZIO_FLAG_SPECULATIVE; /* intent log block */ | |
3245 | |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
3246 flags |= ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_CANFAIL; |
1544 | 3247 |
789 | 3248 zio_nowait(zio_read(NULL, spa, bp, data, size, |
1544 | 3249 spa_scrub_io_done, NULL, priority, flags, zb)); |
789 | 3250 } |
3251 | |
3252 /* ARGSUSED */ | |
3253 static int | |
3254 spa_scrub_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) | |
3255 { | |
3256 blkptr_t *bp = &bc->bc_blkptr; | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3257 vdev_t *vd = spa->spa_root_vdev; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3258 dva_t *dva = bp->blk_dva; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3259 int needs_resilver = B_FALSE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3260 int d; |
789 | 3261 |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3262 if (bc->bc_errno) { |
789 | 3263 /* |
3264 * We can't scrub this block, but we can continue to scrub | |
3265 * the rest of the pool. Note the error and move along. | |
3266 */ | |
3267 mutex_enter(&spa->spa_scrub_lock); | |
3268 spa->spa_scrub_errors++; | |
3269 mutex_exit(&spa->spa_scrub_lock); | |
3270 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3271 mutex_enter(&vd->vdev_stat_lock); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3272 vd->vdev_stat.vs_scrub_errors++; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3273 mutex_exit(&vd->vdev_stat_lock); |
789 | 3274 |
3275 return (ERESTART); | |
3276 } | |
3277 | |
3278 ASSERT(bp->blk_birth < spa->spa_scrub_maxtxg); | |
3279 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3280 for (d = 0; d < BP_GET_NDVAS(bp); d++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3281 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d])); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3282 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3283 ASSERT(vd != NULL); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3284 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3285 /* |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3286 * Keep track of how much data we've examined so that |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3287 * zpool(1M) status can make useful progress reports. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3288 */ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3289 mutex_enter(&vd->vdev_stat_lock); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3290 vd->vdev_stat.vs_scrub_examined += DVA_GET_ASIZE(&dva[d]); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3291 mutex_exit(&vd->vdev_stat_lock); |
789 | 3292 |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3293 if (spa->spa_scrub_type == POOL_SCRUB_RESILVER) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3294 if (DVA_GET_GANG(&dva[d])) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3295 /* |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3296 * Gang members may be spread across multiple |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3297 * vdevs, so the best we can do is look at the |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3298 * pool-wide DTL. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3299 * XXX -- it would be better to change our |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3300 * allocation policy to ensure that this can't |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3301 * happen. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3302 */ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3303 vd = spa->spa_root_vdev; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3304 } |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3305 if (vdev_dtl_contains(&vd->vdev_dtl_map, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3306 bp->blk_birth, 1)) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3307 needs_resilver = B_TRUE; |
789 | 3308 } |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3309 } |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3310 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3311 if (spa->spa_scrub_type == POOL_SCRUB_EVERYTHING) |
789 | 3312 spa_scrub_io_start(spa, bp, ZIO_PRIORITY_SCRUB, |
1544 | 3313 ZIO_FLAG_SCRUB, &bc->bc_bookmark); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3314 else if (needs_resilver) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3315 spa_scrub_io_start(spa, bp, ZIO_PRIORITY_RESILVER, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
3316 ZIO_FLAG_RESILVER, &bc->bc_bookmark); |
789 | 3317 |
3318 return (0); | |
3319 } | |
3320 | |
3321 static void | |
3322 spa_scrub_thread(spa_t *spa) | |
3323 { | |
3324 callb_cpr_t cprinfo; | |
3325 traverse_handle_t *th = spa->spa_scrub_th; | |
3326 vdev_t *rvd = spa->spa_root_vdev; | |
3327 pool_scrub_type_t scrub_type = spa->spa_scrub_type; | |
3328 int error = 0; | |
3329 boolean_t complete; | |
3330 | |
3331 CALLB_CPR_INIT(&cprinfo, &spa->spa_scrub_lock, callb_generic_cpr, FTAG); | |
3332 | |
797
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
3333 /* |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
3334 * If we're restarting due to a snapshot create/delete, |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
3335 * wait for that to complete. |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
3336 */ |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
3337 txg_wait_synced(spa_get_dsl(spa), 0); |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
3338 |
1544 | 3339 dprintf("start %s mintxg=%llu maxtxg=%llu\n", |
3340 scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", | |
3341 spa->spa_scrub_mintxg, spa->spa_scrub_maxtxg); | |
3342 | |
3343 spa_config_enter(spa, RW_WRITER, FTAG); | |
3344 vdev_reopen(rvd); /* purge all vdev caches */ | |
789 | 3345 vdev_config_dirty(rvd); /* rewrite all disk labels */ |
3346 vdev_scrub_stat_update(rvd, scrub_type, B_FALSE); | |
1544 | 3347 spa_config_exit(spa, FTAG); |
789 | 3348 |
3349 mutex_enter(&spa->spa_scrub_lock); | |
3350 spa->spa_scrub_errors = 0; | |
3351 spa->spa_scrub_active = 1; | |
1544 | 3352 ASSERT(spa->spa_scrub_inflight == 0); |
789 | 3353 |
3354 while (!spa->spa_scrub_stop) { | |
3355 CALLB_CPR_SAFE_BEGIN(&cprinfo); | |
1544 | 3356 while (spa->spa_scrub_suspended) { |
789 | 3357 spa->spa_scrub_active = 0; |
3358 cv_broadcast(&spa->spa_scrub_cv); | |
3359 cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); | |
3360 spa->spa_scrub_active = 1; | |
3361 } | |
3362 CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_scrub_lock); | |
3363 | |
3364 if (spa->spa_scrub_restart_txg != 0) | |
3365 break; | |
3366 | |
3367 mutex_exit(&spa->spa_scrub_lock); | |
3368 error = traverse_more(th); | |
3369 mutex_enter(&spa->spa_scrub_lock); | |
3370 if (error != EAGAIN) | |
3371 break; | |
3372 } | |
3373 | |
3374 while (spa->spa_scrub_inflight) | |
3375 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); | |
3376 | |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3377 spa->spa_scrub_active = 0; |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3378 cv_broadcast(&spa->spa_scrub_cv); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3379 |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3380 mutex_exit(&spa->spa_scrub_lock); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3381 |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3382 spa_config_enter(spa, RW_WRITER, FTAG); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3383 |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3384 mutex_enter(&spa->spa_scrub_lock); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3385 |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3386 /* |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3387 * Note: we check spa_scrub_restart_txg under both spa_scrub_lock |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3388 * AND the spa config lock to synchronize with any config changes |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3389 * that revise the DTLs under spa_vdev_enter() / spa_vdev_exit(). |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3390 */ |
789 | 3391 if (spa->spa_scrub_restart_txg != 0) |
3392 error = ERESTART; | |
3393 | |
1544 | 3394 if (spa->spa_scrub_stop) |
3395 error = EINTR; | |
3396 | |
789 | 3397 /* |
1544 | 3398 * Even if there were uncorrectable errors, we consider the scrub |
3399 * completed. The downside is that if there is a transient error during | |
3400 * a resilver, we won't resilver the data properly to the target. But | |
3401 * if the damage is permanent (more likely) we will resilver forever, | |
3402 * which isn't really acceptable. Since there is enough information for | |
3403 * the user to know what has failed and why, this seems like a more | |
3404 * tractable approach. | |
789 | 3405 */ |
1544 | 3406 complete = (error == 0); |
789 | 3407 |
1544 | 3408 dprintf("end %s to maxtxg=%llu %s, traverse=%d, %llu errors, stop=%u\n", |
3409 scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", | |
789 | 3410 spa->spa_scrub_maxtxg, complete ? "done" : "FAILED", |
3411 error, spa->spa_scrub_errors, spa->spa_scrub_stop); | |
3412 | |
3413 mutex_exit(&spa->spa_scrub_lock); | |
3414 | |
3415 /* | |
3416 * If the scrub/resilver completed, update all DTLs to reflect this. | |
3417 * Whether it succeeded or not, vacate all temporary scrub DTLs. | |
3418 */ | |
3419 vdev_dtl_reassess(rvd, spa_last_synced_txg(spa) + 1, | |
3420 complete ? spa->spa_scrub_maxtxg : 0, B_TRUE); | |
3421 vdev_scrub_stat_update(rvd, POOL_SCRUB_NONE, complete); | |
1544 | 3422 spa_errlog_rotate(spa); |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
3423 |
4451 | 3424 if (scrub_type == POOL_SCRUB_RESILVER && complete) |
3425 spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_FINISH); | |
3426 | |
1544 | 3427 spa_config_exit(spa, FTAG); |
789 | 3428 |
3429 mutex_enter(&spa->spa_scrub_lock); | |
3430 | |
1544 | 3431 /* |
3432 * We may have finished replacing a device. | |
3433 * Let the async thread assess this and handle the detach. | |
3434 */ | |
4451 | 3435 spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); |
789 | 3436 |
3437 /* | |
3438 * If we were told to restart, our final act is to start a new scrub. | |
3439 */ | |
3440 if (error == ERESTART) | |
1544 | 3441 spa_async_request(spa, scrub_type == POOL_SCRUB_RESILVER ? |
3442 SPA_ASYNC_RESILVER : SPA_ASYNC_SCRUB); | |
789 | 3443 |
1544 | 3444 spa->spa_scrub_type = POOL_SCRUB_NONE; |
3445 spa->spa_scrub_active = 0; | |
3446 spa->spa_scrub_thread = NULL; | |
3447 cv_broadcast(&spa->spa_scrub_cv); | |
789 | 3448 CALLB_CPR_EXIT(&cprinfo); /* drops &spa->spa_scrub_lock */ |
3449 thread_exit(); | |
3450 } | |
3451 | |
3452 void | |
3453 spa_scrub_suspend(spa_t *spa) | |
3454 { | |
3455 mutex_enter(&spa->spa_scrub_lock); | |
1544 | 3456 spa->spa_scrub_suspended++; |
789 | 3457 while (spa->spa_scrub_active) { |
3458 cv_broadcast(&spa->spa_scrub_cv); | |
3459 cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); | |
3460 } | |
3461 while (spa->spa_scrub_inflight) | |
3462 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); | |
3463 mutex_exit(&spa->spa_scrub_lock); | |
3464 } | |
3465 | |
3466 void | |
3467 spa_scrub_resume(spa_t *spa) | |
3468 { | |
3469 mutex_enter(&spa->spa_scrub_lock); | |
1544 | 3470 ASSERT(spa->spa_scrub_suspended != 0); |
3471 if (--spa->spa_scrub_suspended == 0) | |
789 | 3472 cv_broadcast(&spa->spa_scrub_cv); |
3473 mutex_exit(&spa->spa_scrub_lock); | |
3474 } | |
3475 | |
3476 void | |
3477 spa_scrub_restart(spa_t *spa, uint64_t txg) | |
3478 { | |
3479 /* | |
3480 * Something happened (e.g. snapshot create/delete) that means | |
3481 * we must restart any in-progress scrubs. The itinerary will | |
3482 * fix this properly. | |
3483 */ | |
3484 mutex_enter(&spa->spa_scrub_lock); | |
3485 spa->spa_scrub_restart_txg = txg; | |
3486 mutex_exit(&spa->spa_scrub_lock); | |
3487 } | |
3488 | |
1544 | 3489 int |
3490 spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force) | |
789 | 3491 { |
3492 space_seg_t *ss; | |
3493 uint64_t mintxg, maxtxg; | |
3494 vdev_t *rvd = spa->spa_root_vdev; | |
3495 | |
4808 | 3496 ASSERT(MUTEX_HELD(&spa_namespace_lock)); |
3497 ASSERT(!spa_config_held(spa, RW_WRITER)); | |
3498 | |
789 | 3499 if ((uint_t)type >= POOL_SCRUB_TYPES) |
3500 return (ENOTSUP); | |
3501 | |
1544 | 3502 mutex_enter(&spa->spa_scrub_lock); |
3503 | |
789 | 3504 /* |
3505 * If there's a scrub or resilver already in progress, stop it. | |
3506 */ | |
3507 while (spa->spa_scrub_thread != NULL) { | |
3508 /* | |
3509 * Don't stop a resilver unless forced. | |
3510 */ | |
1544 | 3511 if (spa->spa_scrub_type == POOL_SCRUB_RESILVER && !force) { |
3512 mutex_exit(&spa->spa_scrub_lock); | |
789 | 3513 return (EBUSY); |
1544 | 3514 } |
789 | 3515 spa->spa_scrub_stop = 1; |
3516 cv_broadcast(&spa->spa_scrub_cv); | |
3517 cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); | |
3518 } | |
3519 | |
3520 /* | |
3521 * Terminate the previous traverse. | |
3522 */ | |
3523 if (spa->spa_scrub_th != NULL) { | |
3524 traverse_fini(spa->spa_scrub_th); | |
3525 spa->spa_scrub_th = NULL; | |
3526 } | |
3527 | |
1544 | 3528 if (rvd == NULL) { |
3529 ASSERT(spa->spa_scrub_stop == 0); | |
3530 ASSERT(spa->spa_scrub_type == type); | |
3531 ASSERT(spa->spa_scrub_restart_txg == 0); | |
3532 mutex_exit(&spa->spa_scrub_lock); | |
3533 return (0); | |
3534 } | |
789 | 3535 |
3536 mintxg = TXG_INITIAL - 1; | |
3537 maxtxg = spa_last_synced_txg(spa) + 1; | |
3538 | |
1544 | 3539 mutex_enter(&rvd->vdev_dtl_lock); |
789 | 3540 |
1544 | 3541 if (rvd->vdev_dtl_map.sm_space == 0) { |
3542 /* | |
3543 * The pool-wide DTL is empty. | |
1732 | 3544 * If this is a resilver, there's nothing to do except |
3545 * check whether any in-progress replacements have completed. | |
1544 | 3546 */ |
1732 | 3547 if (type == POOL_SCRUB_RESILVER) { |
1544 | 3548 type = POOL_SCRUB_NONE; |
4451 | 3549 spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); |
1732 | 3550 } |
1544 | 3551 } else { |
3552 /* | |
3553 * The pool-wide DTL is non-empty. | |
3554 * If this is a normal scrub, upgrade to a resilver instead. | |
3555 */ | |
3556 if (type == POOL_SCRUB_EVERYTHING) | |
3557 type = POOL_SCRUB_RESILVER; | |
3558 } | |
789 | 3559 |
1544 | 3560 if (type == POOL_SCRUB_RESILVER) { |
789 | 3561 /* |
3562 * Determine the resilvering boundaries. | |
3563 * | |
3564 * Note: (mintxg, maxtxg) is an open interval, | |
3565 * i.e. mintxg and maxtxg themselves are not included. | |
3566 * | |
3567 * Note: for maxtxg, we MIN with spa_last_synced_txg(spa) + 1 | |
3568 * so we don't claim to resilver a txg that's still changing. | |
3569 */ | |
3570 ss = avl_first(&rvd->vdev_dtl_map.sm_root); | |
1544 | 3571 mintxg = ss->ss_start - 1; |
789 | 3572 ss = avl_last(&rvd->vdev_dtl_map.sm_root); |
1544 | 3573 maxtxg = MIN(ss->ss_end, maxtxg); |
4451 | 3574 |
3575 spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START); | |
789 | 3576 } |
3577 | |
1544 | 3578 mutex_exit(&rvd->vdev_dtl_lock); |
3579 | |
3580 spa->spa_scrub_stop = 0; | |
3581 spa->spa_scrub_type = type; | |
3582 spa->spa_scrub_restart_txg = 0; | |
3583 | |
3584 if (type != POOL_SCRUB_NONE) { | |
3585 spa->spa_scrub_mintxg = mintxg; | |
789 | 3586 spa->spa_scrub_maxtxg = maxtxg; |
3587 spa->spa_scrub_th = traverse_init(spa, spa_scrub_cb, NULL, | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3588 ADVANCE_PRE | ADVANCE_PRUNE | ADVANCE_ZIL, |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3589 ZIO_FLAG_CANFAIL); |
789 | 3590 traverse_add_pool(spa->spa_scrub_th, mintxg, maxtxg); |
3591 spa->spa_scrub_thread = thread_create(NULL, 0, | |
3592 spa_scrub_thread, spa, 0, &p0, TS_RUN, minclsyspri); | |
3593 } | |
3594 | |
1544 | 3595 mutex_exit(&spa->spa_scrub_lock); |
3596 | |
789 | 3597 return (0); |
3598 } | |
3599 | |
1544 | 3600 /* |
3601 * ========================================================================== | |
3602 * SPA async task processing | |
3603 * ========================================================================== | |
3604 */ | |
3605 | |
3606 static void | |
4451 | 3607 spa_async_remove(spa_t *spa, vdev_t *vd) |
789 | 3608 { |
1544 | 3609 vdev_t *tvd; |
3610 int c; | |
3611 | |
4451 | 3612 for (c = 0; c < vd->vdev_children; c++) { |
3613 tvd = vd->vdev_child[c]; | |
3614 if (tvd->vdev_remove_wanted) { | |
3615 tvd->vdev_remove_wanted = 0; | |
3616 vdev_set_state(tvd, B_FALSE, VDEV_STATE_REMOVED, | |
3617 VDEV_AUX_NONE); | |
5329 | 3618 vdev_clear(spa, tvd, B_TRUE); |
4451 | 3619 vdev_config_dirty(tvd->vdev_top); |
1544 | 3620 } |
4451 | 3621 spa_async_remove(spa, tvd); |
1544 | 3622 } |
3623 } | |
3624 | |
3625 static void | |
3626 spa_async_thread(spa_t *spa) | |
3627 { | |
3628 int tasks; | |
4451 | 3629 uint64_t txg; |
1544 | 3630 |
3631 ASSERT(spa->spa_sync_on); | |
789 | 3632 |
1544 | 3633 mutex_enter(&spa->spa_async_lock); |
3634 tasks = spa->spa_async_tasks; | |
3635 spa->spa_async_tasks = 0; | |
3636 mutex_exit(&spa->spa_async_lock); | |
3637 | |
3638 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3639 * See if the config needs to be updated. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3640 */ |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3641 if (tasks & SPA_ASYNC_CONFIG_UPDATE) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3642 mutex_enter(&spa_namespace_lock); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3643 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3644 mutex_exit(&spa_namespace_lock); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3645 } |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3646 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3647 /* |
4451 | 3648 * See if any devices need to be marked REMOVED. |
5329 | 3649 * |
3650 * XXX - We avoid doing this when we are in | |
3651 * I/O failure state since spa_vdev_enter() grabs | |
3652 * the namespace lock and would not be able to obtain | |
3653 * the writer config lock. | |
1544 | 3654 */ |
5329 | 3655 if (tasks & SPA_ASYNC_REMOVE && |
3656 spa_state(spa) != POOL_STATE_IO_FAILURE) { | |
4451 | 3657 txg = spa_vdev_enter(spa); |
3658 spa_async_remove(spa, spa->spa_root_vdev); | |
3659 (void) spa_vdev_exit(spa, NULL, txg, 0); | |
3660 } | |
1544 | 3661 |
3662 /* | |
3663 * If any devices are done replacing, detach them. | |
3664 */ | |
4451 | 3665 if (tasks & SPA_ASYNC_RESILVER_DONE) |
3666 spa_vdev_resilver_done(spa); | |
789 | 3667 |
1544 | 3668 /* |
4451 | 3669 * Kick off a scrub. When starting a RESILVER scrub (or an EVERYTHING |
3670 * scrub which can become a resilver), we need to hold | |
3671 * spa_namespace_lock() because the sysevent we post via | |
3672 * spa_event_notify() needs to get the name of the pool. | |
1544 | 3673 */ |
4451 | 3674 if (tasks & SPA_ASYNC_SCRUB) { |
3675 mutex_enter(&spa_namespace_lock); | |
1544 | 3676 VERIFY(spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_TRUE) == 0); |
4451 | 3677 mutex_exit(&spa_namespace_lock); |
3678 } | |
1544 | 3679 |
3680 /* | |
3681 * Kick off a resilver. | |
3682 */ | |
4451 | 3683 if (tasks & SPA_ASYNC_RESILVER) { |
3684 mutex_enter(&spa_namespace_lock); | |
1544 | 3685 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); |
4451 | 3686 mutex_exit(&spa_namespace_lock); |
3687 } | |
1544 | 3688 |
3689 /* | |
3690 * Let the world know that we're done. | |
3691 */ | |
3692 mutex_enter(&spa->spa_async_lock); | |
3693 spa->spa_async_thread = NULL; | |
3694 cv_broadcast(&spa->spa_async_cv); | |
3695 mutex_exit(&spa->spa_async_lock); | |
3696 thread_exit(); | |
3697 } | |
3698 | |
3699 void | |
3700 spa_async_suspend(spa_t *spa) | |
3701 { | |
3702 mutex_enter(&spa->spa_async_lock); | |
3703 spa->spa_async_suspended++; | |
3704 while (spa->spa_async_thread != NULL) | |
3705 cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); | |
3706 mutex_exit(&spa->spa_async_lock); | |
3707 } | |
3708 | |
3709 void | |
3710 spa_async_resume(spa_t *spa) | |
3711 { | |
3712 mutex_enter(&spa->spa_async_lock); | |
3713 ASSERT(spa->spa_async_suspended != 0); | |
3714 spa->spa_async_suspended--; | |
3715 mutex_exit(&spa->spa_async_lock); | |
3716 } | |
3717 | |
3718 static void | |
3719 spa_async_dispatch(spa_t *spa) | |
3720 { | |
3721 mutex_enter(&spa->spa_async_lock); | |
3722 if (spa->spa_async_tasks && !spa->spa_async_suspended && | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3723 spa->spa_async_thread == NULL && |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3724 rootdir != NULL && !vn_is_readonly(rootdir)) |
1544 | 3725 spa->spa_async_thread = thread_create(NULL, 0, |
3726 spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); | |
3727 mutex_exit(&spa->spa_async_lock); | |
3728 } | |
3729 | |
3730 void | |
3731 spa_async_request(spa_t *spa, int task) | |
3732 { | |
3733 mutex_enter(&spa->spa_async_lock); | |
3734 spa->spa_async_tasks |= task; | |
3735 mutex_exit(&spa->spa_async_lock); | |
789 | 3736 } |
3737 | |
3738 /* | |
3739 * ========================================================================== | |
3740 * SPA syncing routines | |
3741 * ========================================================================== | |
3742 */ | |
3743 | |
3744 static void | |
3745 spa_sync_deferred_frees(spa_t *spa, uint64_t txg) | |
3746 { | |
3747 bplist_t *bpl = &spa->spa_sync_bplist; | |
3748 dmu_tx_t *tx; | |
3749 blkptr_t blk; | |
3750 uint64_t itor = 0; | |
3751 zio_t *zio; | |
3752 int error; | |
3753 uint8_t c = 1; | |
3754 | |
3755 zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CONFIG_HELD); | |
3756 | |
3757 while (bplist_iterate(bpl, &itor, &blk) == 0) | |
3758 zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL)); | |
3759 | |
3760 error = zio_wait(zio); | |
3761 ASSERT3U(error, ==, 0); | |
3762 | |
3763 tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); | |
3764 bplist_vacate(bpl, tx); | |
3765 | |
3766 /* | |
3767 * Pre-dirty the first block so we sync to convergence faster. | |
3768 * (Usually only the first block is needed.) | |
3769 */ | |
3770 dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); | |
3771 dmu_tx_commit(tx); | |
3772 } | |
3773 | |
3774 static void | |
2082 | 3775 spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) |
3776 { | |
3777 char *packed = NULL; | |
3778 size_t nvsize = 0; | |
3779 dmu_buf_t *db; | |
3780 | |
3781 VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); | |
3782 | |
3783 packed = kmem_alloc(nvsize, KM_SLEEP); | |
3784 | |
3785 VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, | |
3786 KM_SLEEP) == 0); | |
3787 | |
3788 dmu_write(spa->spa_meta_objset, obj, 0, nvsize, packed, tx); | |
3789 | |
3790 kmem_free(packed, nvsize); | |
3791 | |
3792 VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); | |
3793 dmu_buf_will_dirty(db, tx); | |
3794 *(uint64_t *)db->db_data = nvsize; | |
3795 dmu_buf_rele(db, FTAG); | |
3796 } | |
3797 | |
3798 static void | |
5450 | 3799 spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, |
3800 const char *config, const char *entry) | |
2082 | 3801 { |
3802 nvlist_t *nvroot; | |
5450 | 3803 nvlist_t **list; |
2082 | 3804 int i; |
3805 | |
5450 | 3806 if (!sav->sav_sync) |
2082 | 3807 return; |
3808 | |
3809 /* | |
5450 | 3810 * Update the MOS nvlist describing the list of available devices. |
3811 * spa_validate_aux() will have already made sure this nvlist is | |
4451 | 3812 * valid and the vdevs are labeled appropriately. |
2082 | 3813 */ |
5450 | 3814 if (sav->sav_object == 0) { |
3815 sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, | |
3816 DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, | |
3817 sizeof (uint64_t), tx); | |
2082 | 3818 VERIFY(zap_update(spa->spa_meta_objset, |
5450 | 3819 DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, |
3820 &sav->sav_object, tx) == 0); | |
2082 | 3821 } |
3822 | |
3823 VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); | |
5450 | 3824 if (sav->sav_count == 0) { |
3825 VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); | |
2082 | 3826 } else { |
5450 | 3827 list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); |
3828 for (i = 0; i < sav->sav_count; i++) | |
3829 list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], | |
3830 B_FALSE, B_FALSE, B_TRUE); | |
3831 VERIFY(nvlist_add_nvlist_array(nvroot, config, list, | |
3832 sav->sav_count) == 0); | |
3833 for (i = 0; i < sav->sav_count; i++) | |
3834 nvlist_free(list[i]); | |
3835 kmem_free(list, sav->sav_count * sizeof (void *)); | |
2082 | 3836 } |
3837 | |
5450 | 3838 spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); |
2926 | 3839 nvlist_free(nvroot); |
2082 | 3840 |
5450 | 3841 sav->sav_sync = B_FALSE; |
2082 | 3842 } |
3843 | |
3844 static void | |
789 | 3845 spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) |
3846 { | |
3847 nvlist_t *config; | |
3848 | |
3849 if (list_is_empty(&spa->spa_dirty_list)) | |
3850 return; | |
3851 | |
3852 config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE); | |
3853 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3854 if (spa->spa_config_syncing) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3855 nvlist_free(spa->spa_config_syncing); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3856 spa->spa_config_syncing = config; |
789 | 3857 |
2082 | 3858 spa_sync_nvlist(spa, spa->spa_config_object, config, tx); |
789 | 3859 } |
3860 | |
5094 | 3861 /* |
3862 * Set zpool properties. | |
3863 */ | |
3912 | 3864 static void |
4543 | 3865 spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) |
3912 | 3866 { |
3867 spa_t *spa = arg1; | |
5094 | 3868 objset_t *mos = spa->spa_meta_objset; |
3912 | 3869 nvlist_t *nvp = arg2; |
5094 | 3870 nvpair_t *elem; |
4451 | 3871 uint64_t intval; |
5363 | 3872 char *strval, *slash; |
5094 | 3873 zpool_prop_t prop; |
3874 const char *propname; | |
3875 zprop_type_t proptype; | |
3876 | |
3877 elem = NULL; | |
3878 while ((elem = nvlist_next_nvpair(nvp, elem))) { | |
3879 switch (prop = zpool_name_to_prop(nvpair_name(elem))) { | |
3880 case ZPOOL_PROP_VERSION: | |
3881 /* | |
3882 * Only set version for non-zpool-creation cases | |
3883 * (set/import). spa_create() needs special care | |
3884 * for version setting. | |
3885 */ | |
3886 if (tx->tx_txg != TXG_INITIAL) { | |
3887 VERIFY(nvpair_value_uint64(elem, | |
3888 &intval) == 0); | |
3889 ASSERT(intval <= SPA_VERSION); | |
3890 ASSERT(intval >= spa_version(spa)); | |
3891 spa->spa_uberblock.ub_version = intval; | |
3892 vdev_config_dirty(spa->spa_root_vdev); | |
3893 } | |
3894 break; | |
3895 | |
3896 case ZPOOL_PROP_ALTROOT: | |
3897 /* | |
3898 * 'altroot' is a non-persistent property. It should | |
3899 * have been set temporarily at creation or import time. | |
3900 */ | |
3901 ASSERT(spa->spa_root != NULL); | |
3902 break; | |
3903 | |
5363 | 3904 case ZPOOL_PROP_CACHEFILE: |
5094 | 3905 /* |
5363 | 3906 * 'cachefile' is a non-persistent property, but note |
3907 * an async request that the config cache needs to be | |
3908 * udpated. | |
5094 | 3909 */ |
5363 | 3910 VERIFY(nvpair_value_string(elem, &strval) == 0); |
3911 if (spa->spa_config_dir) | |
3912 spa_strfree(spa->spa_config_dir); | |
3913 if (spa->spa_config_file) | |
3914 spa_strfree(spa->spa_config_file); | |
3915 | |
3916 if (strval[0] == '\0') { | |
3917 spa->spa_config_dir = NULL; | |
3918 spa->spa_config_file = NULL; | |
3919 } else if (strcmp(strval, "none") == 0) { | |
3920 spa->spa_config_dir = spa_strdup(strval); | |
3921 spa->spa_config_file = NULL; | |
3922 } else { | |
5621
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3923 /* |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3924 * If the cachefile is in the root directory, |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3925 * we will end up with an empty string for |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3926 * spa_config_dir. This value is only ever |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3927 * used when concatenated with '/', so an empty |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3928 * string still behaves correctly and keeps the |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3929 * rest of the code simple. |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3930 */ |
5363 | 3931 slash = strrchr(strval, '/'); |
3932 ASSERT(slash != NULL); | |
3933 *slash = '\0'; | |
5621
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3934 if (strcmp(strval, spa_config_dir) == 0 && |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3935 strcmp(slash + 1, ZPOOL_CACHE_FILE) == 0) { |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3936 spa->spa_config_dir = NULL; |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3937 spa->spa_config_file = NULL; |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3938 } else { |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3939 spa->spa_config_dir = |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3940 spa_strdup(strval); |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3941 spa->spa_config_file = |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3942 spa_strdup(slash + 1); |
cd0984d5b1c1
6627006 setting cachefile to default value confuses ZFS
eschrock
parents:
5450
diff
changeset
|
3943 } |
5363 | 3944 } |
3945 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); | |
4543 | 3946 break; |
5094 | 3947 default: |
3948 /* | |
3949 * Set pool property values in the poolprops mos object. | |
3950 */ | |
3951 mutex_enter(&spa->spa_props_lock); | |
3952 if (spa->spa_pool_props_object == 0) { | |
3953 objset_t *mos = spa->spa_meta_objset; | |
3954 | |
3955 VERIFY((spa->spa_pool_props_object = | |
3956 zap_create(mos, DMU_OT_POOL_PROPS, | |
3957 DMU_OT_NONE, 0, tx)) > 0); | |
3958 | |
3959 VERIFY(zap_update(mos, | |
3960 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, | |
3961 8, 1, &spa->spa_pool_props_object, tx) | |
3962 == 0); | |
3963 } | |
3964 mutex_exit(&spa->spa_props_lock); | |
3965 | |
3966 /* normalize the property name */ | |
3967 propname = zpool_prop_to_name(prop); | |
3968 proptype = zpool_prop_get_type(prop); | |
3969 | |
3970 if (nvpair_type(elem) == DATA_TYPE_STRING) { | |
3971 ASSERT(proptype == PROP_TYPE_STRING); | |
3972 VERIFY(nvpair_value_string(elem, &strval) == 0); | |
3973 VERIFY(zap_update(mos, | |
3974 spa->spa_pool_props_object, propname, | |
3975 1, strlen(strval) + 1, strval, tx) == 0); | |
3976 | |
3977 } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { | |
3978 VERIFY(nvpair_value_uint64(elem, &intval) == 0); | |
3979 | |
3980 if (proptype == PROP_TYPE_INDEX) { | |
3981 const char *unused; | |
3982 VERIFY(zpool_prop_index_to_string( | |
3983 prop, intval, &unused) == 0); | |
3984 } | |
3985 VERIFY(zap_update(mos, | |
3986 spa->spa_pool_props_object, propname, | |
3987 8, 1, &intval, tx) == 0); | |
3988 } else { | |
3989 ASSERT(0); /* not allowed */ | |
3990 } | |
3991 | |
5329 | 3992 switch (prop) { |
3993 case ZPOOL_PROP_DELEGATION: | |
5094 | 3994 spa->spa_delegation = intval; |
5329 | 3995 break; |
3996 case ZPOOL_PROP_BOOTFS: | |
5094 | 3997 spa->spa_bootfs = intval; |
5329 | 3998 break; |
3999 case ZPOOL_PROP_FAILUREMODE: | |
4000 spa->spa_failmode = intval; | |
4001 break; | |
4002 default: | |
4003 break; | |
4004 } | |
3912 | 4005 } |
5094 | 4006 |
4007 /* log internal history if this is not a zpool create */ | |
4008 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && | |
4009 tx->tx_txg != TXG_INITIAL) { | |
4010 spa_history_internal_log(LOG_POOL_PROPSET, | |
4011 spa, tx, cr, "%s %lld %s", | |
4012 nvpair_name(elem), intval, spa->spa_name); | |
4013 } | |
3912 | 4014 } |
4015 } | |
4016 | |
789 | 4017 /* |
4018 * Sync the specified transaction group. New blocks may be dirtied as | |
4019 * part of the process, so we iterate until it converges. | |
4020 */ | |
4021 void | |
4022 spa_sync(spa_t *spa, uint64_t txg) | |
4023 { | |
4024 dsl_pool_t *dp = spa->spa_dsl_pool; | |
4025 objset_t *mos = spa->spa_meta_objset; | |
4026 bplist_t *bpl = &spa->spa_sync_bplist; | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4027 vdev_t *rvd = spa->spa_root_vdev; |
789 | 4028 vdev_t *vd; |
4029 dmu_tx_t *tx; | |
4030 int dirty_vdevs; | |
4031 | |
4032 /* | |
4033 * Lock out configuration changes. | |
4034 */ | |
1544 | 4035 spa_config_enter(spa, RW_READER, FTAG); |
789 | 4036 |
4037 spa->spa_syncing_txg = txg; | |
4038 spa->spa_sync_pass = 0; | |
4039 | |
1544 | 4040 VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); |
789 | 4041 |
2082 | 4042 tx = dmu_tx_create_assigned(dp, txg); |
4043 | |
4044 /* | |
4577 | 4045 * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, |
2082 | 4046 * set spa_deflate if we have no raid-z vdevs. |
4047 */ | |
4577 | 4048 if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && |
4049 spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { | |
2082 | 4050 int i; |
4051 | |
4052 for (i = 0; i < rvd->vdev_children; i++) { | |
4053 vd = rvd->vdev_child[i]; | |
4054 if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) | |
4055 break; | |
4056 } | |
4057 if (i == rvd->vdev_children) { | |
4058 spa->spa_deflate = TRUE; | |
4059 VERIFY(0 == zap_add(spa->spa_meta_objset, | |
4060 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, | |
4061 sizeof (uint64_t), 1, &spa->spa_deflate, tx)); | |
4062 } | |
4063 } | |
4064 | |
789 | 4065 /* |
4066 * If anything has changed in this txg, push the deferred frees | |
4067 * from the previous txg. If not, leave them alone so that we | |
4068 * don't generate work on an otherwise idle system. | |
4069 */ | |
4070 if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || | |
2329
e640bebc73b3
6446569 deferred list is hooked on flintstone vitamins
ek110237
parents:
2199
diff
changeset
|
4071 !txg_list_empty(&dp->dp_dirty_dirs, txg) || |
e640bebc73b3
6446569 deferred list is hooked on flintstone vitamins
ek110237
parents:
2199
diff
changeset
|
4072 !txg_list_empty(&dp->dp_sync_tasks, txg)) |
789 | 4073 spa_sync_deferred_frees(spa, txg); |
4074 | |
4075 /* | |
4076 * Iterate to convergence. | |
4077 */ | |
4078 do { | |
4079 spa->spa_sync_pass++; | |
4080 | |
4081 spa_sync_config_object(spa, tx); | |
5450 | 4082 spa_sync_aux_dev(spa, &spa->spa_spares, tx, |
4083 ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); | |
4084 spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, | |
4085 ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); | |
1544 | 4086 spa_errlog_sync(spa, txg); |
789 | 4087 dsl_pool_sync(dp, txg); |
4088 | |
4089 dirty_vdevs = 0; | |
4090 while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { | |
4091 vdev_sync(vd, txg); | |
4092 dirty_vdevs++; | |
4093 } | |
4094 | |
4095 bplist_sync(bpl, tx); | |
4096 } while (dirty_vdevs); | |
4097 | |
4098 bplist_close(bpl); | |
4099 | |
4100 dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); | |
4101 | |
4102 /* | |
4103 * Rewrite the vdev configuration (which includes the uberblock) | |
4104 * to commit the transaction group. | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4105 * |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4106 * If there are any dirty vdevs, sync the uberblock to all vdevs. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4107 * Otherwise, pick a random top-level vdev that's known to be |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4108 * visible in the config cache (see spa_vdev_add() for details). |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4109 * If the write fails, try the next vdev until we're tried them all. |
789 | 4110 */ |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4111 if (!list_is_empty(&spa->spa_dirty_list)) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4112 VERIFY(vdev_config_sync(rvd, txg) == 0); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4113 } else { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4114 int children = rvd->vdev_children; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4115 int c0 = spa_get_random(children); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4116 int c; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4117 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4118 for (c = 0; c < children; c++) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4119 vd = rvd->vdev_child[(c0 + c) % children]; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4120 if (vd->vdev_ms_array == 0) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4121 continue; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4122 if (vdev_config_sync(vd, txg) == 0) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4123 break; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4124 } |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4125 if (c == children) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4126 VERIFY(vdev_config_sync(rvd, txg) == 0); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4127 } |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4128 |
2082 | 4129 dmu_tx_commit(tx); |
4130 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4131 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4132 * Clear the dirty config list. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4133 */ |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4134 while ((vd = list_head(&spa->spa_dirty_list)) != NULL) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4135 vdev_config_clean(vd); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4136 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4137 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4138 * Now that the new config has synced transactionally, |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4139 * let it become visible to the config cache. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4140 */ |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4141 if (spa->spa_config_syncing != NULL) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4142 spa_config_set(spa, spa->spa_config_syncing); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4143 spa->spa_config_txg = txg; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4144 spa->spa_config_syncing = NULL; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
4145 } |
789 | 4146 |
4147 /* | |
4148 * Make a stable copy of the fully synced uberblock. | |
4149 * We use this as the root for pool traversals. | |
4150 */ | |
4151 spa->spa_traverse_wanted = 1; /* tells traverse_more() to stop */ | |
4152 | |
4153 spa_scrub_suspend(spa); /* stop scrubbing and finish I/Os */ | |
4154 | |
4155 rw_enter(&spa->spa_traverse_lock, RW_WRITER); | |
4156 spa->spa_traverse_wanted = 0; | |
4157 spa->spa_ubsync = spa->spa_uberblock; | |
4158 rw_exit(&spa->spa_traverse_lock); | |
4159 | |
4160 spa_scrub_resume(spa); /* resume scrub with new ubsync */ | |
4161 | |
4162 /* | |
4163 * Clean up the ZIL records for the synced txg. | |
4164 */ | |
4165 dsl_pool_zil_clean(dp); | |
4166 | |
4167 /* | |
4168 * Update usable space statistics. | |
4169 */ | |
4170 while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) | |
4171 vdev_sync_done(vd, txg); | |
4172 | |
4173 /* | |
4174 * It had better be the case that we didn't dirty anything | |
2082 | 4175 * since vdev_config_sync(). |
789 | 4176 */ |
4177 ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); | |
4178 ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); | |
4179 ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); | |
4180 ASSERT(bpl->bpl_queue == NULL); | |
4181 | |
1544 | 4182 spa_config_exit(spa, FTAG); |
4183 | |
4184 /* | |
4185 * If any async tasks have been requested, kick them off. | |
4186 */ | |
4187 spa_async_dispatch(spa); | |
789 | 4188 } |
4189 | |
4190 /* | |
4191 * Sync all pools. We don't want to hold the namespace lock across these | |
4192 * operations, so we take a reference on the spa_t and drop the lock during the | |
4193 * sync. | |
4194 */ | |
4195 void | |
4196 spa_sync_allpools(void) | |
4197 { | |
4198 spa_t *spa = NULL; | |
4199 mutex_enter(&spa_namespace_lock); | |
4200 while ((spa = spa_next(spa)) != NULL) { | |
4201 if (spa_state(spa) != POOL_STATE_ACTIVE) | |
4202 continue; | |
4203 spa_open_ref(spa, FTAG); | |
4204 mutex_exit(&spa_namespace_lock); | |
4205 txg_wait_synced(spa_get_dsl(spa), 0); | |
4206 mutex_enter(&spa_namespace_lock); | |
4207 spa_close(spa, FTAG); | |
4208 } | |
4209 mutex_exit(&spa_namespace_lock); | |
4210 } | |
4211 | |
4212 /* | |
4213 * ========================================================================== | |
4214 * Miscellaneous routines | |
4215 * ========================================================================== | |
4216 */ | |
4217 | |
4218 /* | |
4219 * Remove all pools in the system. | |
4220 */ | |
4221 void | |
4222 spa_evict_all(void) | |
4223 { | |
4224 spa_t *spa; | |
4225 | |
4226 /* | |
4227 * Remove all cached state. All pools should be closed now, | |
4228 * so every spa in the AVL tree should be unreferenced. | |
4229 */ | |
4230 mutex_enter(&spa_namespace_lock); | |
4231 while ((spa = spa_next(NULL)) != NULL) { | |
4232 /* | |
1544 | 4233 * Stop async tasks. The async thread may need to detach |
4234 * a device that's been replaced, which requires grabbing | |
4235 * spa_namespace_lock, so we must drop it here. | |
789 | 4236 */ |
4237 spa_open_ref(spa, FTAG); | |
4238 mutex_exit(&spa_namespace_lock); | |
1544 | 4239 spa_async_suspend(spa); |
4808 | 4240 mutex_enter(&spa_namespace_lock); |
789 | 4241 VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); |
4242 spa_close(spa, FTAG); | |
4243 | |
4244 if (spa->spa_state != POOL_STATE_UNINITIALIZED) { | |
4245 spa_unload(spa); | |
4246 spa_deactivate(spa); | |
4247 } | |
4248 spa_remove(spa); | |
4249 } | |
4250 mutex_exit(&spa_namespace_lock); | |
4251 } | |
1544 | 4252 |
4253 vdev_t * | |
4254 spa_lookup_by_guid(spa_t *spa, uint64_t guid) | |
4255 { | |
4256 return (vdev_lookup_by_guid(spa->spa_root_vdev, guid)); | |
4257 } | |
1760 | 4258 |
4259 void | |
5094 | 4260 spa_upgrade(spa_t *spa, uint64_t version) |
1760 | 4261 { |
4262 spa_config_enter(spa, RW_WRITER, FTAG); | |
4263 | |
4264 /* | |
4265 * This should only be called for a non-faulted pool, and since a | |
4266 * future version would result in an unopenable pool, this shouldn't be | |
4267 * possible. | |
4268 */ | |
4577 | 4269 ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); |
5094 | 4270 ASSERT(version >= spa->spa_uberblock.ub_version); |
4271 | |
4272 spa->spa_uberblock.ub_version = version; | |
1760 | 4273 vdev_config_dirty(spa->spa_root_vdev); |
4274 | |
4275 spa_config_exit(spa, FTAG); | |
2082 | 4276 |
4277 txg_wait_synced(spa_get_dsl(spa), 0); | |
1760 | 4278 } |
2082 | 4279 |
4280 boolean_t | |
4281 spa_has_spare(spa_t *spa, uint64_t guid) | |
4282 { | |
4283 int i; | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
4284 uint64_t spareguid; |
5450 | 4285 spa_aux_vdev_t *sav = &spa->spa_spares; |
4286 | |
4287 for (i = 0; i < sav->sav_count; i++) | |
4288 if (sav->sav_vdevs[i]->vdev_guid == guid) | |
2082 | 4289 return (B_TRUE); |
4290 | |
5450 | 4291 for (i = 0; i < sav->sav_npending; i++) { |
4292 if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, | |
4293 &spareguid) == 0 && spareguid == guid) | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
4294 return (B_TRUE); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
4295 } |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
4296 |
2082 | 4297 return (B_FALSE); |
4298 } | |
3912 | 4299 |
4451 | 4300 /* |
4301 * Post a sysevent corresponding to the given event. The 'name' must be one of | |
4302 * the event definitions in sys/sysevent/eventdefs.h. The payload will be | |
4303 * filled in from the spa and (optionally) the vdev. This doesn't do anything | |
4304 * in the userland libzpool, as we don't want consumers to misinterpret ztest | |
4305 * or zdb as real changes. | |
4306 */ | |
4307 void | |
4308 spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) | |
4309 { | |
4310 #ifdef _KERNEL | |
4311 sysevent_t *ev; | |
4312 sysevent_attr_list_t *attr = NULL; | |
4313 sysevent_value_t value; | |
4314 sysevent_id_t eid; | |
4315 | |
4316 ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", | |
4317 SE_SLEEP); | |
4318 | |
4319 value.value_type = SE_DATA_TYPE_STRING; | |
4320 value.value.sv_string = spa_name(spa); | |
4321 if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) | |
4322 goto done; | |
4323 | |
4324 value.value_type = SE_DATA_TYPE_UINT64; | |
4325 value.value.sv_uint64 = spa_guid(spa); | |
4326 if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) | |
4327 goto done; | |
4328 | |
4329 if (vd) { | |
4330 value.value_type = SE_DATA_TYPE_UINT64; | |
4331 value.value.sv_uint64 = vd->vdev_guid; | |
4332 if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, | |
4333 SE_SLEEP) != 0) | |
4334 goto done; | |
4335 | |
4336 if (vd->vdev_path) { | |
4337 value.value_type = SE_DATA_TYPE_STRING; | |
4338 value.value.sv_string = vd->vdev_path; | |
4339 if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, | |
4340 &value, SE_SLEEP) != 0) | |
4341 goto done; | |
4342 } | |
4343 } | |
4344 | |
4345 (void) log_sysevent(ev, SE_SLEEP, &eid); | |
4346 | |
4347 done: | |
4348 if (attr) | |
4349 sysevent_free_attr(attr); | |
4350 sysevent_free(ev); | |
4351 #endif | |
4352 } |