Mercurial > illumos > illumos-gate
annotate usr/src/uts/common/fs/zfs/spa.c @ 4577:ed36b0e652bc
PSARC/2007/328 zfs upgrade
6552536 'zpool status -v' doesn't work with new (type-bearing) directory entries
6559635 can not import pool whose front labels are gone
6572636 need "zfs upgrade" to change ZPL version number
6572637 store object type in directory entries.
6572648 ZPL's delete queue should not be processed if the filesystem is mounted read-only
6572650 ZFS_VERSION should be SPA_VERSION for readability
author | ahrens |
---|---|
date | Fri, 29 Jun 2007 16:23:19 -0700 |
parents | 12bb2876a62e |
children | c85631613c19 |
rev | line source |
---|---|
789 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
1544 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
789 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
2082 | 21 |
789 | 22 /* |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. |
789 | 24 * Use is subject to license terms. |
25 */ | |
26 | |
27 #pragma ident "%Z%%M% %I% %E% SMI" | |
28 | |
29 /* | |
30 * This file contains all the routines used when modifying on-disk SPA state. | |
31 * This includes opening, importing, destroying, exporting a pool, and syncing a | |
32 * pool. | |
33 */ | |
34 | |
35 #include <sys/zfs_context.h> | |
1544 | 36 #include <sys/fm/fs/zfs.h> |
789 | 37 #include <sys/spa_impl.h> |
38 #include <sys/zio.h> | |
39 #include <sys/zio_checksum.h> | |
40 #include <sys/zio_compress.h> | |
41 #include <sys/dmu.h> | |
42 #include <sys/dmu_tx.h> | |
43 #include <sys/zap.h> | |
44 #include <sys/zil.h> | |
45 #include <sys/vdev_impl.h> | |
46 #include <sys/metaslab.h> | |
47 #include <sys/uberblock_impl.h> | |
48 #include <sys/txg.h> | |
49 #include <sys/avl.h> | |
50 #include <sys/dmu_traverse.h> | |
3912 | 51 #include <sys/dmu_objset.h> |
789 | 52 #include <sys/unique.h> |
53 #include <sys/dsl_pool.h> | |
3912 | 54 #include <sys/dsl_dataset.h> |
789 | 55 #include <sys/dsl_dir.h> |
56 #include <sys/dsl_prop.h> | |
3912 | 57 #include <sys/dsl_synctask.h> |
789 | 58 #include <sys/fs/zfs.h> |
59 #include <sys/callb.h> | |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
60 #include <sys/systeminfo.h> |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
61 #include <sys/sunddi.h> |
789 | 62 |
2986 | 63 int zio_taskq_threads = 8; |
64 | |
789 | 65 /* |
66 * ========================================================================== | |
67 * SPA state manipulation (open/create/destroy/import/export) | |
68 * ========================================================================== | |
69 */ | |
70 | |
1544 | 71 static int |
72 spa_error_entry_compare(const void *a, const void *b) | |
73 { | |
74 spa_error_entry_t *sa = (spa_error_entry_t *)a; | |
75 spa_error_entry_t *sb = (spa_error_entry_t *)b; | |
76 int ret; | |
77 | |
78 ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, | |
79 sizeof (zbookmark_t)); | |
80 | |
81 if (ret < 0) | |
82 return (-1); | |
83 else if (ret > 0) | |
84 return (1); | |
85 else | |
86 return (0); | |
87 } | |
88 | |
89 /* | |
90 * Utility function which retrieves copies of the current logs and | |
91 * re-initializes them in the process. | |
92 */ | |
93 void | |
94 spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) | |
95 { | |
96 ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); | |
97 | |
98 bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); | |
99 bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); | |
100 | |
101 avl_create(&spa->spa_errlist_scrub, | |
102 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
103 offsetof(spa_error_entry_t, se_avl)); | |
104 avl_create(&spa->spa_errlist_last, | |
105 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
106 offsetof(spa_error_entry_t, se_avl)); | |
107 } | |
108 | |
789 | 109 /* |
110 * Activate an uninitialized pool. | |
111 */ | |
112 static void | |
113 spa_activate(spa_t *spa) | |
114 { | |
115 int t; | |
116 | |
117 ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); | |
118 | |
119 spa->spa_state = POOL_STATE_ACTIVE; | |
120 | |
121 spa->spa_normal_class = metaslab_class_create(); | |
4527 | 122 spa->spa_log_class = metaslab_class_create(); |
789 | 123 |
124 for (t = 0; t < ZIO_TYPES; t++) { | |
125 spa->spa_zio_issue_taskq[t] = taskq_create("spa_zio_issue", | |
2986 | 126 zio_taskq_threads, maxclsyspri, 50, INT_MAX, |
789 | 127 TASKQ_PREPOPULATE); |
128 spa->spa_zio_intr_taskq[t] = taskq_create("spa_zio_intr", | |
2986 | 129 zio_taskq_threads, maxclsyspri, 50, INT_MAX, |
789 | 130 TASKQ_PREPOPULATE); |
131 } | |
132 | |
133 rw_init(&spa->spa_traverse_lock, NULL, RW_DEFAULT, NULL); | |
134 | |
2856 | 135 mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL); |
136 mutex_init(&spa->spa_config_cache_lock, NULL, MUTEX_DEFAULT, NULL); | |
137 mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL); | |
138 mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL); | |
139 mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL); | |
140 mutex_init(&spa->spa_config_lock.scl_lock, NULL, MUTEX_DEFAULT, NULL); | |
141 mutex_init(&spa->spa_sync_bplist.bpl_lock, NULL, MUTEX_DEFAULT, NULL); | |
2926 | 142 mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL); |
3912 | 143 mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL); |
2856 | 144 |
789 | 145 list_create(&spa->spa_dirty_list, sizeof (vdev_t), |
146 offsetof(vdev_t, vdev_dirty_node)); | |
147 | |
148 txg_list_create(&spa->spa_vdev_txg_list, | |
149 offsetof(struct vdev, vdev_txg_node)); | |
1544 | 150 |
151 avl_create(&spa->spa_errlist_scrub, | |
152 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
153 offsetof(spa_error_entry_t, se_avl)); | |
154 avl_create(&spa->spa_errlist_last, | |
155 spa_error_entry_compare, sizeof (spa_error_entry_t), | |
156 offsetof(spa_error_entry_t, se_avl)); | |
789 | 157 } |
158 | |
159 /* | |
160 * Opposite of spa_activate(). | |
161 */ | |
162 static void | |
163 spa_deactivate(spa_t *spa) | |
164 { | |
165 int t; | |
166 | |
167 ASSERT(spa->spa_sync_on == B_FALSE); | |
168 ASSERT(spa->spa_dsl_pool == NULL); | |
169 ASSERT(spa->spa_root_vdev == NULL); | |
170 | |
171 ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); | |
172 | |
173 txg_list_destroy(&spa->spa_vdev_txg_list); | |
174 | |
175 list_destroy(&spa->spa_dirty_list); | |
176 | |
177 rw_destroy(&spa->spa_traverse_lock); | |
178 | |
179 for (t = 0; t < ZIO_TYPES; t++) { | |
180 taskq_destroy(spa->spa_zio_issue_taskq[t]); | |
181 taskq_destroy(spa->spa_zio_intr_taskq[t]); | |
182 spa->spa_zio_issue_taskq[t] = NULL; | |
183 spa->spa_zio_intr_taskq[t] = NULL; | |
184 } | |
185 | |
186 metaslab_class_destroy(spa->spa_normal_class); | |
187 spa->spa_normal_class = NULL; | |
188 | |
4527 | 189 metaslab_class_destroy(spa->spa_log_class); |
190 spa->spa_log_class = NULL; | |
191 | |
1544 | 192 /* |
193 * If this was part of an import or the open otherwise failed, we may | |
194 * still have errors left in the queues. Empty them just in case. | |
195 */ | |
196 spa_errlog_drain(spa); | |
197 | |
198 avl_destroy(&spa->spa_errlist_scrub); | |
199 avl_destroy(&spa->spa_errlist_last); | |
200 | |
789 | 201 spa->spa_state = POOL_STATE_UNINITIALIZED; |
202 } | |
203 | |
204 /* | |
205 * Verify a pool configuration, and construct the vdev tree appropriately. This | |
206 * will create all the necessary vdevs in the appropriate layout, with each vdev | |
207 * in the CLOSED state. This will prep the pool before open/creation/import. | |
208 * All vdev validation is done by the vdev_alloc() routine. | |
209 */ | |
2082 | 210 static int |
211 spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, | |
212 uint_t id, int atype) | |
789 | 213 { |
214 nvlist_t **child; | |
215 uint_t c, children; | |
2082 | 216 int error; |
217 | |
218 if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) | |
219 return (error); | |
220 | |
221 if ((*vdp)->vdev_ops->vdev_op_leaf) | |
222 return (0); | |
789 | 223 |
224 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, | |
225 &child, &children) != 0) { | |
2082 | 226 vdev_free(*vdp); |
227 *vdp = NULL; | |
228 return (EINVAL); | |
789 | 229 } |
230 | |
231 for (c = 0; c < children; c++) { | |
2082 | 232 vdev_t *vd; |
233 if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, | |
234 atype)) != 0) { | |
235 vdev_free(*vdp); | |
236 *vdp = NULL; | |
237 return (error); | |
789 | 238 } |
239 } | |
240 | |
2082 | 241 ASSERT(*vdp != NULL); |
242 | |
243 return (0); | |
789 | 244 } |
245 | |
246 /* | |
247 * Opposite of spa_load(). | |
248 */ | |
249 static void | |
250 spa_unload(spa_t *spa) | |
251 { | |
2082 | 252 int i; |
253 | |
789 | 254 /* |
1544 | 255 * Stop async tasks. |
256 */ | |
257 spa_async_suspend(spa); | |
258 | |
259 /* | |
789 | 260 * Stop syncing. |
261 */ | |
262 if (spa->spa_sync_on) { | |
263 txg_sync_stop(spa->spa_dsl_pool); | |
264 spa->spa_sync_on = B_FALSE; | |
265 } | |
266 | |
267 /* | |
268 * Wait for any outstanding prefetch I/O to complete. | |
269 */ | |
1544 | 270 spa_config_enter(spa, RW_WRITER, FTAG); |
271 spa_config_exit(spa, FTAG); | |
789 | 272 |
273 /* | |
274 * Close the dsl pool. | |
275 */ | |
276 if (spa->spa_dsl_pool) { | |
277 dsl_pool_close(spa->spa_dsl_pool); | |
278 spa->spa_dsl_pool = NULL; | |
279 } | |
280 | |
281 /* | |
282 * Close all vdevs. | |
283 */ | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
284 if (spa->spa_root_vdev) |
789 | 285 vdev_free(spa->spa_root_vdev); |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
286 ASSERT(spa->spa_root_vdev == NULL); |
1544 | 287 |
2082 | 288 for (i = 0; i < spa->spa_nspares; i++) |
289 vdev_free(spa->spa_spares[i]); | |
290 if (spa->spa_spares) { | |
291 kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *)); | |
292 spa->spa_spares = NULL; | |
293 } | |
294 if (spa->spa_sparelist) { | |
295 nvlist_free(spa->spa_sparelist); | |
296 spa->spa_sparelist = NULL; | |
297 } | |
298 | |
1544 | 299 spa->spa_async_suspended = 0; |
789 | 300 } |
301 | |
302 /* | |
2082 | 303 * Load (or re-load) the current list of vdevs describing the active spares for |
304 * this pool. When this is called, we have some form of basic information in | |
305 * 'spa_sparelist'. We parse this into vdevs, try to open them, and then | |
306 * re-generate a more complete list including status information. | |
307 */ | |
308 static void | |
309 spa_load_spares(spa_t *spa) | |
310 { | |
311 nvlist_t **spares; | |
312 uint_t nspares; | |
313 int i; | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
314 vdev_t *vd, *tvd; |
2082 | 315 |
316 /* | |
317 * First, close and free any existing spare vdevs. | |
318 */ | |
319 for (i = 0; i < spa->spa_nspares; i++) { | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
320 vd = spa->spa_spares[i]; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
321 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
322 /* Undo the call to spa_activate() below */ |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
323 if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL && |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
324 tvd->vdev_isspare) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
325 spa_spare_remove(tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
326 vdev_close(vd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
327 vdev_free(vd); |
2082 | 328 } |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
329 |
2082 | 330 if (spa->spa_spares) |
331 kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *)); | |
332 | |
333 if (spa->spa_sparelist == NULL) | |
334 nspares = 0; | |
335 else | |
336 VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, | |
337 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); | |
338 | |
339 spa->spa_nspares = (int)nspares; | |
340 spa->spa_spares = NULL; | |
341 | |
342 if (nspares == 0) | |
343 return; | |
344 | |
345 /* | |
346 * Construct the array of vdevs, opening them to get status in the | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
347 * process. For each spare, there is potentially two different vdev_t |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
348 * structures associated with it: one in the list of spares (used only |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
349 * for basic validation purposes) and one in the active vdev |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
350 * configuration (if it's spared in). During this phase we open and |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
351 * validate each vdev on the spare list. If the vdev also exists in the |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
352 * active configuration, then we also mark this vdev as an active spare. |
2082 | 353 */ |
354 spa->spa_spares = kmem_alloc(nspares * sizeof (void *), KM_SLEEP); | |
355 for (i = 0; i < spa->spa_nspares; i++) { | |
356 VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, | |
357 VDEV_ALLOC_SPARE) == 0); | |
358 ASSERT(vd != NULL); | |
359 | |
360 spa->spa_spares[i] = vd; | |
361 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
362 if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
363 if (!tvd->vdev_isspare) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
364 spa_spare_add(tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
365 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
366 /* |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
367 * We only mark the spare active if we were successfully |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
368 * able to load the vdev. Otherwise, importing a pool |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
369 * with a bad active spare would result in strange |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
370 * behavior, because multiple pool would think the spare |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
371 * is actively in use. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
372 * |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
373 * There is a vulnerability here to an equally bizarre |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
374 * circumstance, where a dead active spare is later |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
375 * brought back to life (onlined or otherwise). Given |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
376 * the rarity of this scenario, and the extra complexity |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
377 * it adds, we ignore the possibility. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
378 */ |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
379 if (!vdev_is_dead(tvd)) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
380 spa_spare_activate(tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
381 } |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
382 |
2082 | 383 if (vdev_open(vd) != 0) |
384 continue; | |
385 | |
386 vd->vdev_top = vd; | |
387 (void) vdev_validate_spare(vd); | |
388 } | |
389 | |
390 /* | |
391 * Recompute the stashed list of spares, with status information | |
392 * this time. | |
393 */ | |
394 VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, | |
395 DATA_TYPE_NVLIST_ARRAY) == 0); | |
396 | |
397 spares = kmem_alloc(spa->spa_nspares * sizeof (void *), KM_SLEEP); | |
398 for (i = 0; i < spa->spa_nspares; i++) | |
399 spares[i] = vdev_config_generate(spa, spa->spa_spares[i], | |
400 B_TRUE, B_TRUE); | |
401 VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, | |
402 spares, spa->spa_nspares) == 0); | |
403 for (i = 0; i < spa->spa_nspares; i++) | |
404 nvlist_free(spares[i]); | |
405 kmem_free(spares, spa->spa_nspares * sizeof (void *)); | |
406 } | |
407 | |
408 static int | |
409 load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) | |
410 { | |
411 dmu_buf_t *db; | |
412 char *packed = NULL; | |
413 size_t nvsize = 0; | |
414 int error; | |
415 *value = NULL; | |
416 | |
417 VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); | |
418 nvsize = *(uint64_t *)db->db_data; | |
419 dmu_buf_rele(db, FTAG); | |
420 | |
421 packed = kmem_alloc(nvsize, KM_SLEEP); | |
422 error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed); | |
423 if (error == 0) | |
424 error = nvlist_unpack(packed, nvsize, value, 0); | |
425 kmem_free(packed, nvsize); | |
426 | |
427 return (error); | |
428 } | |
429 | |
430 /* | |
4451 | 431 * Checks to see if the given vdev could not be opened, in which case we post a |
432 * sysevent to notify the autoreplace code that the device has been removed. | |
433 */ | |
434 static void | |
435 spa_check_removed(vdev_t *vd) | |
436 { | |
437 int c; | |
438 | |
439 for (c = 0; c < vd->vdev_children; c++) | |
440 spa_check_removed(vd->vdev_child[c]); | |
441 | |
442 if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { | |
443 zfs_post_autoreplace(vd->vdev_spa, vd); | |
444 spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); | |
445 } | |
446 } | |
447 | |
448 /* | |
789 | 449 * Load an existing storage pool, using the pool's builtin spa_config as a |
1544 | 450 * source of configuration information. |
789 | 451 */ |
452 static int | |
1544 | 453 spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) |
789 | 454 { |
455 int error = 0; | |
456 nvlist_t *nvroot = NULL; | |
457 vdev_t *rvd; | |
458 uberblock_t *ub = &spa->spa_uberblock; | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
459 uint64_t config_cache_txg = spa->spa_config_txg; |
789 | 460 uint64_t pool_guid; |
2082 | 461 uint64_t version; |
789 | 462 zio_t *zio; |
4451 | 463 uint64_t autoreplace = 0; |
789 | 464 |
1544 | 465 spa->spa_load_state = state; |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
466 |
789 | 467 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || |
1733 | 468 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { |
1544 | 469 error = EINVAL; |
470 goto out; | |
471 } | |
789 | 472 |
2082 | 473 /* |
474 * Versioning wasn't explicitly added to the label until later, so if | |
475 * it's not present treat it as the initial version. | |
476 */ | |
477 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) | |
4577 | 478 version = SPA_VERSION_INITIAL; |
2082 | 479 |
1733 | 480 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, |
481 &spa->spa_config_txg); | |
482 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
483 if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && |
1544 | 484 spa_guid_exists(pool_guid, 0)) { |
485 error = EEXIST; | |
486 goto out; | |
487 } | |
789 | 488 |
2174
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
489 spa->spa_load_guid = pool_guid; |
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
490 |
789 | 491 /* |
2082 | 492 * Parse the configuration into a vdev tree. We explicitly set the |
493 * value that will be returned by spa_version() since parsing the | |
494 * configuration requires knowing the version number. | |
789 | 495 */ |
1544 | 496 spa_config_enter(spa, RW_WRITER, FTAG); |
2082 | 497 spa->spa_ubsync.ub_version = version; |
498 error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); | |
1544 | 499 spa_config_exit(spa, FTAG); |
789 | 500 |
2082 | 501 if (error != 0) |
1544 | 502 goto out; |
789 | 503 |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
504 ASSERT(spa->spa_root_vdev == rvd); |
789 | 505 ASSERT(spa_guid(spa) == pool_guid); |
506 | |
507 /* | |
508 * Try to open all vdevs, loading each label in the process. | |
509 */ | |
4070
4390ea390a1e
6386594 zdb message should be clearer when failing for lack of permissions
mc142369
parents:
3975
diff
changeset
|
510 error = vdev_open(rvd); |
4390ea390a1e
6386594 zdb message should be clearer when failing for lack of permissions
mc142369
parents:
3975
diff
changeset
|
511 if (error != 0) |
1544 | 512 goto out; |
789 | 513 |
514 /* | |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
515 * Validate the labels for all leaf vdevs. We need to grab the config |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
516 * lock because all label I/O is done with the ZIO_FLAG_CONFIG_HELD |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
517 * flag. |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
518 */ |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
519 spa_config_enter(spa, RW_READER, FTAG); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
520 error = vdev_validate(rvd); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
521 spa_config_exit(spa, FTAG); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
522 |
4070
4390ea390a1e
6386594 zdb message should be clearer when failing for lack of permissions
mc142369
parents:
3975
diff
changeset
|
523 if (error != 0) |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
524 goto out; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
525 |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
526 if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
527 error = ENXIO; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
528 goto out; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
529 } |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
530 |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
531 /* |
789 | 532 * Find the best uberblock. |
533 */ | |
534 bzero(ub, sizeof (uberblock_t)); | |
535 | |
536 zio = zio_root(spa, NULL, NULL, | |
537 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); | |
538 vdev_uberblock_load(zio, rvd, ub); | |
539 error = zio_wait(zio); | |
540 | |
541 /* | |
542 * If we weren't able to find a single valid uberblock, return failure. | |
543 */ | |
544 if (ub->ub_txg == 0) { | |
1760 | 545 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
546 VDEV_AUX_CORRUPT_DATA); | |
1544 | 547 error = ENXIO; |
548 goto out; | |
549 } | |
550 | |
551 /* | |
552 * If the pool is newer than the code, we can't open it. | |
553 */ | |
4577 | 554 if (ub->ub_version > SPA_VERSION) { |
1760 | 555 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
556 VDEV_AUX_VERSION_NEWER); | |
1544 | 557 error = ENOTSUP; |
558 goto out; | |
789 | 559 } |
560 | |
561 /* | |
562 * If the vdev guid sum doesn't match the uberblock, we have an | |
563 * incomplete configuration. | |
564 */ | |
1732 | 565 if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { |
1544 | 566 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
567 VDEV_AUX_BAD_GUID_SUM); | |
568 error = ENXIO; | |
569 goto out; | |
789 | 570 } |
571 | |
572 /* | |
573 * Initialize internal SPA structures. | |
574 */ | |
575 spa->spa_state = POOL_STATE_ACTIVE; | |
576 spa->spa_ubsync = spa->spa_uberblock; | |
577 spa->spa_first_txg = spa_last_synced_txg(spa) + 1; | |
1544 | 578 error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); |
579 if (error) { | |
580 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
581 VDEV_AUX_CORRUPT_DATA); | |
582 goto out; | |
583 } | |
789 | 584 spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; |
585 | |
1544 | 586 if (zap_lookup(spa->spa_meta_objset, |
789 | 587 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, |
1544 | 588 sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { |
589 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
590 VDEV_AUX_CORRUPT_DATA); | |
591 error = EIO; | |
592 goto out; | |
593 } | |
789 | 594 |
595 if (!mosconfig) { | |
2082 | 596 nvlist_t *newconfig; |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
597 uint64_t hostid; |
2082 | 598 |
599 if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { | |
1544 | 600 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
601 VDEV_AUX_CORRUPT_DATA); | |
602 error = EIO; | |
603 goto out; | |
604 } | |
789 | 605 |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
606 if (nvlist_lookup_uint64(newconfig, ZPOOL_CONFIG_HOSTID, |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
607 &hostid) == 0) { |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
608 char *hostname; |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
609 unsigned long myhostid = 0; |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
610 |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
611 VERIFY(nvlist_lookup_string(newconfig, |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
612 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
613 |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
614 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); |
4178
ad95fd86760b
6553537 zfs root fails to boot from a snv_63+zfsboot-pfinstall netinstall image
lling
parents:
4070
diff
changeset
|
615 if (hostid != 0 && myhostid != 0 && |
ad95fd86760b
6553537 zfs root fails to boot from a snv_63+zfsboot-pfinstall netinstall image
lling
parents:
4070
diff
changeset
|
616 (unsigned long)hostid != myhostid) { |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
617 cmn_err(CE_WARN, "pool '%s' could not be " |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
618 "loaded as it was last accessed by " |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
619 "another system (host: %s hostid: 0x%lx). " |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
620 "See: http://www.sun.com/msg/ZFS-8000-EY", |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
621 spa->spa_name, hostname, |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
622 (unsigned long)hostid); |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
623 error = EBADF; |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
624 goto out; |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
625 } |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
626 } |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
627 |
789 | 628 spa_config_set(spa, newconfig); |
629 spa_unload(spa); | |
630 spa_deactivate(spa); | |
631 spa_activate(spa); | |
632 | |
1544 | 633 return (spa_load(spa, newconfig, state, B_TRUE)); |
634 } | |
635 | |
636 if (zap_lookup(spa->spa_meta_objset, | |
637 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, | |
638 sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { | |
639 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
640 VDEV_AUX_CORRUPT_DATA); | |
641 error = EIO; | |
642 goto out; | |
789 | 643 } |
644 | |
1544 | 645 /* |
2082 | 646 * Load the bit that tells us to use the new accounting function |
647 * (raid-z deflation). If we have an older pool, this will not | |
648 * be present. | |
649 */ | |
650 error = zap_lookup(spa->spa_meta_objset, | |
651 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, | |
652 sizeof (uint64_t), 1, &spa->spa_deflate); | |
653 if (error != 0 && error != ENOENT) { | |
654 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
655 VDEV_AUX_CORRUPT_DATA); | |
656 error = EIO; | |
657 goto out; | |
658 } | |
659 | |
660 /* | |
1544 | 661 * Load the persistent error log. If we have an older pool, this will |
662 * not be present. | |
663 */ | |
664 error = zap_lookup(spa->spa_meta_objset, | |
665 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, | |
666 sizeof (uint64_t), 1, &spa->spa_errlog_last); | |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
667 if (error != 0 && error != ENOENT) { |
1544 | 668 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, |
669 VDEV_AUX_CORRUPT_DATA); | |
670 error = EIO; | |
671 goto out; | |
672 } | |
673 | |
674 error = zap_lookup(spa->spa_meta_objset, | |
675 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, | |
676 sizeof (uint64_t), 1, &spa->spa_errlog_scrub); | |
677 if (error != 0 && error != ENOENT) { | |
678 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
679 VDEV_AUX_CORRUPT_DATA); | |
680 error = EIO; | |
681 goto out; | |
682 } | |
789 | 683 |
684 /* | |
2926 | 685 * Load the history object. If we have an older pool, this |
686 * will not be present. | |
687 */ | |
688 error = zap_lookup(spa->spa_meta_objset, | |
689 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, | |
690 sizeof (uint64_t), 1, &spa->spa_history); | |
691 if (error != 0 && error != ENOENT) { | |
692 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
693 VDEV_AUX_CORRUPT_DATA); | |
694 error = EIO; | |
695 goto out; | |
696 } | |
697 | |
698 /* | |
2082 | 699 * Load any hot spares for this pool. |
700 */ | |
701 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, | |
702 DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares_object); | |
703 if (error != 0 && error != ENOENT) { | |
704 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
705 VDEV_AUX_CORRUPT_DATA); | |
706 error = EIO; | |
707 goto out; | |
708 } | |
709 if (error == 0) { | |
4577 | 710 ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); |
2082 | 711 if (load_nvlist(spa, spa->spa_spares_object, |
712 &spa->spa_sparelist) != 0) { | |
713 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
714 VDEV_AUX_CORRUPT_DATA); | |
715 error = EIO; | |
716 goto out; | |
717 } | |
718 | |
719 spa_config_enter(spa, RW_WRITER, FTAG); | |
720 spa_load_spares(spa); | |
721 spa_config_exit(spa, FTAG); | |
722 } | |
723 | |
4543 | 724 spa->spa_delegation = zfs_prop_default_numeric(ZPOOL_PROP_DELEGATION); |
725 | |
3912 | 726 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, |
727 DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); | |
728 | |
729 if (error && error != ENOENT) { | |
730 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, | |
731 VDEV_AUX_CORRUPT_DATA); | |
732 error = EIO; | |
733 goto out; | |
734 } | |
735 | |
736 if (error == 0) { | |
737 (void) zap_lookup(spa->spa_meta_objset, | |
738 spa->spa_pool_props_object, | |
4451 | 739 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), |
3912 | 740 sizeof (uint64_t), 1, &spa->spa_bootfs); |
4451 | 741 (void) zap_lookup(spa->spa_meta_objset, |
742 spa->spa_pool_props_object, | |
743 zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), | |
744 sizeof (uint64_t), 1, &autoreplace); | |
4543 | 745 (void) zap_lookup(spa->spa_meta_objset, |
746 spa->spa_pool_props_object, | |
747 zpool_prop_to_name(ZPOOL_PROP_DELEGATION), | |
748 sizeof (uint64_t), 1, &spa->spa_delegation); | |
3912 | 749 } |
750 | |
2082 | 751 /* |
4451 | 752 * If the 'autoreplace' property is set, then post a resource notifying |
753 * the ZFS DE that it should not issue any faults for unopenable | |
754 * devices. We also iterate over the vdevs, and post a sysevent for any | |
755 * unopenable vdevs so that the normal autoreplace handler can take | |
756 * over. | |
757 */ | |
758 if (autoreplace) | |
759 spa_check_removed(spa->spa_root_vdev); | |
760 | |
761 /* | |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
762 * Load the vdev state for all toplevel vdevs. |
789 | 763 */ |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
764 vdev_load(rvd); |
789 | 765 |
766 /* | |
767 * Propagate the leaf DTLs we just loaded all the way up the tree. | |
768 */ | |
1544 | 769 spa_config_enter(spa, RW_WRITER, FTAG); |
789 | 770 vdev_dtl_reassess(rvd, 0, 0, B_FALSE); |
1544 | 771 spa_config_exit(spa, FTAG); |
789 | 772 |
773 /* | |
774 * Check the state of the root vdev. If it can't be opened, it | |
775 * indicates one or more toplevel vdevs are faulted. | |
776 */ | |
1544 | 777 if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { |
778 error = ENXIO; | |
779 goto out; | |
780 } | |
789 | 781 |
1544 | 782 if ((spa_mode & FWRITE) && state != SPA_LOAD_TRYIMPORT) { |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
783 dmu_tx_t *tx; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
784 int need_update = B_FALSE; |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
785 int c; |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
786 |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
787 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
788 * Claim log blocks that haven't been committed yet. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
789 * This must all happen in a single txg. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
790 */ |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
791 tx = dmu_tx_create_assigned(spa_get_dsl(spa), |
789 | 792 spa_first_txg(spa)); |
2417 | 793 (void) dmu_objset_find(spa->spa_name, |
794 zil_claim, tx, DS_FIND_CHILDREN); | |
789 | 795 dmu_tx_commit(tx); |
796 | |
797 spa->spa_sync_on = B_TRUE; | |
798 txg_sync_start(spa->spa_dsl_pool); | |
799 | |
800 /* | |
801 * Wait for all claims to sync. | |
802 */ | |
803 txg_wait_synced(spa->spa_dsl_pool, 0); | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
804 |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
805 /* |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
806 * If the config cache is stale, or we have uninitialized |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
807 * metaslabs (see spa_vdev_add()), then update the config. |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
808 */ |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
809 if (config_cache_txg != spa->spa_config_txg || |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
810 state == SPA_LOAD_IMPORT) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
811 need_update = B_TRUE; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
812 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
813 for (c = 0; c < rvd->vdev_children; c++) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
814 if (rvd->vdev_child[c]->vdev_ms_array == 0) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
815 need_update = B_TRUE; |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
816 |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
817 /* |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
818 * Update the config cache asychronously in case we're the |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
819 * root pool, in which case the config cache isn't writable yet. |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
820 */ |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
821 if (need_update) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
822 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); |
789 | 823 } |
824 | |
1544 | 825 error = 0; |
826 out: | |
2082 | 827 if (error && error != EBADF) |
1544 | 828 zfs_ereport_post(FM_EREPORT_ZFS_POOL, spa, NULL, NULL, 0, 0); |
829 spa->spa_load_state = SPA_LOAD_NONE; | |
830 spa->spa_ena = 0; | |
831 | |
832 return (error); | |
789 | 833 } |
834 | |
835 /* | |
836 * Pool Open/Import | |
837 * | |
838 * The import case is identical to an open except that the configuration is sent | |
839 * down from userland, instead of grabbed from the configuration cache. For the | |
840 * case of an open, the pool configuration will exist in the | |
4451 | 841 * POOL_STATE_UNINITIALIZED state. |
789 | 842 * |
843 * The stats information (gen/count/ustats) is used to gather vdev statistics at | |
844 * the same time open the pool, without having to keep around the spa_t in some | |
845 * ambiguous state. | |
846 */ | |
847 static int | |
848 spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) | |
849 { | |
850 spa_t *spa; | |
851 int error; | |
852 int loaded = B_FALSE; | |
853 int locked = B_FALSE; | |
854 | |
855 *spapp = NULL; | |
856 | |
857 /* | |
858 * As disgusting as this is, we need to support recursive calls to this | |
859 * function because dsl_dir_open() is called during spa_load(), and ends | |
860 * up calling spa_open() again. The real fix is to figure out how to | |
861 * avoid dsl_dir_open() calling this in the first place. | |
862 */ | |
863 if (mutex_owner(&spa_namespace_lock) != curthread) { | |
864 mutex_enter(&spa_namespace_lock); | |
865 locked = B_TRUE; | |
866 } | |
867 | |
868 if ((spa = spa_lookup(pool)) == NULL) { | |
869 if (locked) | |
870 mutex_exit(&spa_namespace_lock); | |
871 return (ENOENT); | |
872 } | |
873 if (spa->spa_state == POOL_STATE_UNINITIALIZED) { | |
874 | |
875 spa_activate(spa); | |
876 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
877 error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); |
789 | 878 |
879 if (error == EBADF) { | |
880 /* | |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
881 * If vdev_validate() returns failure (indicated by |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
882 * EBADF), it indicates that one of the vdevs indicates |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
883 * that the pool has been exported or destroyed. If |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
884 * this is the case, the config cache is out of sync and |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
885 * we should remove the pool from the namespace. |
789 | 886 */ |
2082 | 887 zfs_post_ok(spa, NULL); |
789 | 888 spa_unload(spa); |
889 spa_deactivate(spa); | |
890 spa_remove(spa); | |
891 spa_config_sync(); | |
892 if (locked) | |
893 mutex_exit(&spa_namespace_lock); | |
894 return (ENOENT); | |
1544 | 895 } |
896 | |
897 if (error) { | |
789 | 898 /* |
899 * We can't open the pool, but we still have useful | |
900 * information: the state of each vdev after the | |
901 * attempted vdev_open(). Return this to the user. | |
902 */ | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
903 if (config != NULL && spa->spa_root_vdev != NULL) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
904 spa_config_enter(spa, RW_READER, FTAG); |
789 | 905 *config = spa_config_generate(spa, NULL, -1ULL, |
906 B_TRUE); | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
907 spa_config_exit(spa, FTAG); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
908 } |
789 | 909 spa_unload(spa); |
910 spa_deactivate(spa); | |
1544 | 911 spa->spa_last_open_failed = B_TRUE; |
789 | 912 if (locked) |
913 mutex_exit(&spa_namespace_lock); | |
914 *spapp = NULL; | |
915 return (error); | |
1544 | 916 } else { |
917 zfs_post_ok(spa, NULL); | |
918 spa->spa_last_open_failed = B_FALSE; | |
789 | 919 } |
920 | |
921 loaded = B_TRUE; | |
922 } | |
923 | |
924 spa_open_ref(spa, tag); | |
4451 | 925 |
926 /* | |
927 * If we just loaded the pool, resilver anything that's out of date. | |
928 */ | |
929 if (loaded && (spa_mode & FWRITE)) | |
930 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); | |
931 | |
789 | 932 if (locked) |
933 mutex_exit(&spa_namespace_lock); | |
934 | |
935 *spapp = spa; | |
936 | |
937 if (config != NULL) { | |
1544 | 938 spa_config_enter(spa, RW_READER, FTAG); |
789 | 939 *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); |
1544 | 940 spa_config_exit(spa, FTAG); |
789 | 941 } |
942 | |
943 return (0); | |
944 } | |
945 | |
946 int | |
947 spa_open(const char *name, spa_t **spapp, void *tag) | |
948 { | |
949 return (spa_open_common(name, spapp, tag, NULL)); | |
950 } | |
951 | |
1544 | 952 /* |
953 * Lookup the given spa_t, incrementing the inject count in the process, | |
954 * preventing it from being exported or destroyed. | |
955 */ | |
956 spa_t * | |
957 spa_inject_addref(char *name) | |
958 { | |
959 spa_t *spa; | |
960 | |
961 mutex_enter(&spa_namespace_lock); | |
962 if ((spa = spa_lookup(name)) == NULL) { | |
963 mutex_exit(&spa_namespace_lock); | |
964 return (NULL); | |
965 } | |
966 spa->spa_inject_ref++; | |
967 mutex_exit(&spa_namespace_lock); | |
968 | |
969 return (spa); | |
970 } | |
971 | |
972 void | |
973 spa_inject_delref(spa_t *spa) | |
974 { | |
975 mutex_enter(&spa_namespace_lock); | |
976 spa->spa_inject_ref--; | |
977 mutex_exit(&spa_namespace_lock); | |
978 } | |
979 | |
2082 | 980 static void |
981 spa_add_spares(spa_t *spa, nvlist_t *config) | |
982 { | |
983 nvlist_t **spares; | |
984 uint_t i, nspares; | |
985 nvlist_t *nvroot; | |
986 uint64_t guid; | |
987 vdev_stat_t *vs; | |
988 uint_t vsc; | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
989 uint64_t pool; |
2082 | 990 |
991 if (spa->spa_nspares == 0) | |
992 return; | |
993 | |
994 VERIFY(nvlist_lookup_nvlist(config, | |
995 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); | |
996 VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, | |
997 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); | |
998 if (nspares != 0) { | |
999 VERIFY(nvlist_add_nvlist_array(nvroot, | |
1000 ZPOOL_CONFIG_SPARES, spares, nspares) == 0); | |
1001 VERIFY(nvlist_lookup_nvlist_array(nvroot, | |
1002 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); | |
1003 | |
1004 /* | |
1005 * Go through and find any spares which have since been | |
1006 * repurposed as an active spare. If this is the case, update | |
1007 * their status appropriately. | |
1008 */ | |
1009 for (i = 0; i < nspares; i++) { | |
1010 VERIFY(nvlist_lookup_uint64(spares[i], | |
1011 ZPOOL_CONFIG_GUID, &guid) == 0); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1012 if (spa_spare_exists(guid, &pool) && pool != 0ULL) { |
2082 | 1013 VERIFY(nvlist_lookup_uint64_array( |
1014 spares[i], ZPOOL_CONFIG_STATS, | |
1015 (uint64_t **)&vs, &vsc) == 0); | |
1016 vs->vs_state = VDEV_STATE_CANT_OPEN; | |
1017 vs->vs_aux = VDEV_AUX_SPARED; | |
1018 } | |
1019 } | |
1020 } | |
1021 } | |
1022 | |
789 | 1023 int |
1544 | 1024 spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) |
789 | 1025 { |
1026 int error; | |
1027 spa_t *spa; | |
1028 | |
1029 *config = NULL; | |
1030 error = spa_open_common(name, &spa, FTAG, config); | |
1031 | |
2082 | 1032 if (spa && *config != NULL) { |
1544 | 1033 VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, |
1034 spa_get_errlog_size(spa)) == 0); | |
1035 | |
2082 | 1036 spa_add_spares(spa, *config); |
1037 } | |
1038 | |
1544 | 1039 /* |
1040 * We want to get the alternate root even for faulted pools, so we cheat | |
1041 * and call spa_lookup() directly. | |
1042 */ | |
1043 if (altroot) { | |
1044 if (spa == NULL) { | |
1045 mutex_enter(&spa_namespace_lock); | |
1046 spa = spa_lookup(name); | |
1047 if (spa) | |
1048 spa_altroot(spa, altroot, buflen); | |
1049 else | |
1050 altroot[0] = '\0'; | |
1051 spa = NULL; | |
1052 mutex_exit(&spa_namespace_lock); | |
1053 } else { | |
1054 spa_altroot(spa, altroot, buflen); | |
1055 } | |
1056 } | |
1057 | |
789 | 1058 if (spa != NULL) |
1059 spa_close(spa, FTAG); | |
1060 | |
1061 return (error); | |
1062 } | |
1063 | |
1064 /* | |
2082 | 1065 * Validate that the 'spares' array is well formed. We must have an array of |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1066 * nvlists, each which describes a valid leaf vdev. If this is an import (mode |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1067 * is VDEV_ALLOC_SPARE), then we allow corrupted spares to be specified, as long |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1068 * as they are well-formed. |
2082 | 1069 */ |
1070 static int | |
1071 spa_validate_spares(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) | |
1072 { | |
1073 nvlist_t **spares; | |
1074 uint_t i, nspares; | |
1075 vdev_t *vd; | |
1076 int error; | |
1077 | |
1078 /* | |
1079 * It's acceptable to have no spares specified. | |
1080 */ | |
1081 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, | |
1082 &spares, &nspares) != 0) | |
1083 return (0); | |
1084 | |
1085 if (nspares == 0) | |
1086 return (EINVAL); | |
1087 | |
1088 /* | |
1089 * Make sure the pool is formatted with a version that supports hot | |
1090 * spares. | |
1091 */ | |
4577 | 1092 if (spa_version(spa) < SPA_VERSION_SPARES) |
2082 | 1093 return (ENOTSUP); |
1094 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1095 /* |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1096 * Set the pending spare list so we correctly handle device in-use |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1097 * checking. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1098 */ |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1099 spa->spa_pending_spares = spares; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1100 spa->spa_pending_nspares = nspares; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1101 |
2082 | 1102 for (i = 0; i < nspares; i++) { |
1103 if ((error = spa_config_parse(spa, &vd, spares[i], NULL, 0, | |
1104 mode)) != 0) | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1105 goto out; |
2082 | 1106 |
1107 if (!vd->vdev_ops->vdev_op_leaf) { | |
1108 vdev_free(vd); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1109 error = EINVAL; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1110 goto out; |
2082 | 1111 } |
1112 | |
1113 vd->vdev_top = vd; | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1114 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1115 if ((error = vdev_open(vd)) == 0 && |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1116 (error = vdev_label_init(vd, crtxg, |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1117 VDEV_LABEL_SPARE)) == 0) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1118 VERIFY(nvlist_add_uint64(spares[i], ZPOOL_CONFIG_GUID, |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1119 vd->vdev_guid) == 0); |
2082 | 1120 } |
1121 | |
1122 vdev_free(vd); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1123 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1124 if (error && mode != VDEV_ALLOC_SPARE) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1125 goto out; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1126 else |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1127 error = 0; |
2082 | 1128 } |
1129 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1130 out: |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1131 spa->spa_pending_spares = NULL; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1132 spa->spa_pending_nspares = 0; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1133 return (error); |
2082 | 1134 } |
1135 | |
1136 /* | |
789 | 1137 * Pool Creation |
1138 */ | |
1139 int | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1140 spa_create(const char *pool, nvlist_t *nvroot, const char *altroot) |
789 | 1141 { |
1142 spa_t *spa; | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1143 vdev_t *rvd; |
789 | 1144 dsl_pool_t *dp; |
1145 dmu_tx_t *tx; | |
2082 | 1146 int c, error = 0; |
789 | 1147 uint64_t txg = TXG_INITIAL; |
2082 | 1148 nvlist_t **spares; |
1149 uint_t nspares; | |
789 | 1150 |
1151 /* | |
1152 * If this pool already exists, return failure. | |
1153 */ | |
1154 mutex_enter(&spa_namespace_lock); | |
1155 if (spa_lookup(pool) != NULL) { | |
1156 mutex_exit(&spa_namespace_lock); | |
1157 return (EEXIST); | |
1158 } | |
1159 | |
1160 /* | |
1161 * Allocate a new spa_t structure. | |
1162 */ | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1163 spa = spa_add(pool, altroot); |
789 | 1164 spa_activate(spa); |
1165 | |
1166 spa->spa_uberblock.ub_txg = txg - 1; | |
4577 | 1167 spa->spa_uberblock.ub_version = SPA_VERSION; |
789 | 1168 spa->spa_ubsync = spa->spa_uberblock; |
1169 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1170 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1171 * Create the root vdev. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1172 */ |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1173 spa_config_enter(spa, RW_WRITER, FTAG); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1174 |
2082 | 1175 error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); |
1176 | |
1177 ASSERT(error != 0 || rvd != NULL); | |
1178 ASSERT(error != 0 || spa->spa_root_vdev == rvd); | |
1179 | |
1180 if (error == 0 && rvd->vdev_children == 0) | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1181 error = EINVAL; |
2082 | 1182 |
1183 if (error == 0 && | |
1184 (error = vdev_create(rvd, txg, B_FALSE)) == 0 && | |
1185 (error = spa_validate_spares(spa, nvroot, txg, | |
1186 VDEV_ALLOC_ADD)) == 0) { | |
1187 for (c = 0; c < rvd->vdev_children; c++) | |
1188 vdev_init(rvd->vdev_child[c], txg); | |
1189 vdev_config_dirty(rvd); | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1190 } |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1191 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1192 spa_config_exit(spa, FTAG); |
789 | 1193 |
2082 | 1194 if (error != 0) { |
789 | 1195 spa_unload(spa); |
1196 spa_deactivate(spa); | |
1197 spa_remove(spa); | |
1198 mutex_exit(&spa_namespace_lock); | |
1199 return (error); | |
1200 } | |
1201 | |
2082 | 1202 /* |
1203 * Get the list of spares, if specified. | |
1204 */ | |
1205 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, | |
1206 &spares, &nspares) == 0) { | |
1207 VERIFY(nvlist_alloc(&spa->spa_sparelist, NV_UNIQUE_NAME, | |
1208 KM_SLEEP) == 0); | |
1209 VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, | |
1210 ZPOOL_CONFIG_SPARES, spares, nspares) == 0); | |
1211 spa_config_enter(spa, RW_WRITER, FTAG); | |
1212 spa_load_spares(spa); | |
1213 spa_config_exit(spa, FTAG); | |
1214 spa->spa_sync_spares = B_TRUE; | |
1215 } | |
1216 | |
789 | 1217 spa->spa_dsl_pool = dp = dsl_pool_create(spa, txg); |
1218 spa->spa_meta_objset = dp->dp_meta_objset; | |
1219 | |
1220 tx = dmu_tx_create_assigned(dp, txg); | |
1221 | |
1222 /* | |
1223 * Create the pool config object. | |
1224 */ | |
1225 spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, | |
1226 DMU_OT_PACKED_NVLIST, 1 << 14, | |
1227 DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); | |
1228 | |
1544 | 1229 if (zap_add(spa->spa_meta_objset, |
789 | 1230 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, |
1544 | 1231 sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { |
1232 cmn_err(CE_PANIC, "failed to add pool config"); | |
1233 } | |
789 | 1234 |
2082 | 1235 /* Newly created pools are always deflated. */ |
1236 spa->spa_deflate = TRUE; | |
1237 if (zap_add(spa->spa_meta_objset, | |
1238 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, | |
1239 sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { | |
1240 cmn_err(CE_PANIC, "failed to add deflate"); | |
1241 } | |
1242 | |
789 | 1243 /* |
1244 * Create the deferred-free bplist object. Turn off compression | |
1245 * because sync-to-convergence takes longer if the blocksize | |
1246 * keeps changing. | |
1247 */ | |
1248 spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, | |
1249 1 << 14, tx); | |
1250 dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, | |
1251 ZIO_COMPRESS_OFF, tx); | |
1252 | |
1544 | 1253 if (zap_add(spa->spa_meta_objset, |
789 | 1254 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, |
1544 | 1255 sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { |
1256 cmn_err(CE_PANIC, "failed to add bplist"); | |
1257 } | |
789 | 1258 |
2926 | 1259 /* |
1260 * Create the pool's history object. | |
1261 */ | |
1262 spa_history_create_obj(spa, tx); | |
1263 | |
789 | 1264 dmu_tx_commit(tx); |
1265 | |
4451 | 1266 spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); |
4543 | 1267 spa->spa_delegation = zfs_prop_default_numeric(ZPOOL_PROP_DELEGATION); |
789 | 1268 spa->spa_sync_on = B_TRUE; |
1269 txg_sync_start(spa->spa_dsl_pool); | |
1270 | |
1271 /* | |
1272 * We explicitly wait for the first transaction to complete so that our | |
1273 * bean counters are appropriately updated. | |
1274 */ | |
1275 txg_wait_synced(spa->spa_dsl_pool, txg); | |
1276 | |
1277 spa_config_sync(); | |
1278 | |
1279 mutex_exit(&spa_namespace_lock); | |
1280 | |
1281 return (0); | |
1282 } | |
1283 | |
1284 /* | |
1285 * Import the given pool into the system. We set up the necessary spa_t and | |
1286 * then call spa_load() to do the dirty work. | |
1287 */ | |
1288 int | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1289 spa_import(const char *pool, nvlist_t *config, const char *altroot) |
789 | 1290 { |
1291 spa_t *spa; | |
1292 int error; | |
2082 | 1293 nvlist_t *nvroot; |
1294 nvlist_t **spares; | |
1295 uint_t nspares; | |
789 | 1296 |
1297 if (!(spa_mode & FWRITE)) | |
1298 return (EROFS); | |
1299 | |
1300 /* | |
1301 * If a pool with this name exists, return failure. | |
1302 */ | |
1303 mutex_enter(&spa_namespace_lock); | |
1304 if (spa_lookup(pool) != NULL) { | |
1305 mutex_exit(&spa_namespace_lock); | |
1306 return (EEXIST); | |
1307 } | |
1308 | |
1309 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1310 * Create and initialize the spa structure. |
789 | 1311 */ |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1312 spa = spa_add(pool, altroot); |
789 | 1313 spa_activate(spa); |
1314 | |
1315 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1316 * Pass off the heavy lifting to spa_load(). |
1732 | 1317 * Pass TRUE for mosconfig because the user-supplied config |
1318 * is actually the one to trust when doing an import. | |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1319 */ |
1732 | 1320 error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); |
789 | 1321 |
2082 | 1322 spa_config_enter(spa, RW_WRITER, FTAG); |
1323 /* | |
1324 * Toss any existing sparelist, as it doesn't have any validity anymore, | |
1325 * and conflicts with spa_has_spare(). | |
1326 */ | |
1327 if (spa->spa_sparelist) { | |
1328 nvlist_free(spa->spa_sparelist); | |
1329 spa->spa_sparelist = NULL; | |
1330 spa_load_spares(spa); | |
1331 } | |
1332 | |
1333 VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, | |
1334 &nvroot) == 0); | |
1335 if (error == 0) | |
1336 error = spa_validate_spares(spa, nvroot, -1ULL, | |
1337 VDEV_ALLOC_SPARE); | |
1338 spa_config_exit(spa, FTAG); | |
1339 | |
1340 if (error != 0) { | |
789 | 1341 spa_unload(spa); |
1342 spa_deactivate(spa); | |
1343 spa_remove(spa); | |
1344 mutex_exit(&spa_namespace_lock); | |
1345 return (error); | |
1346 } | |
1347 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1348 /* |
2082 | 1349 * Override any spares as specified by the user, as these may have |
1350 * correct device names/devids, etc. | |
1351 */ | |
1352 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, | |
1353 &spares, &nspares) == 0) { | |
1354 if (spa->spa_sparelist) | |
1355 VERIFY(nvlist_remove(spa->spa_sparelist, | |
1356 ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); | |
1357 else | |
1358 VERIFY(nvlist_alloc(&spa->spa_sparelist, | |
1359 NV_UNIQUE_NAME, KM_SLEEP) == 0); | |
1360 VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, | |
1361 ZPOOL_CONFIG_SPARES, spares, nspares) == 0); | |
1362 spa_config_enter(spa, RW_WRITER, FTAG); | |
1363 spa_load_spares(spa); | |
1364 spa_config_exit(spa, FTAG); | |
1365 spa->spa_sync_spares = B_TRUE; | |
1366 } | |
1367 | |
1368 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1369 * Update the config cache to include the newly-imported pool. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1370 */ |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1371 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1372 |
789 | 1373 /* |
1374 * Resilver anything that's out of date. | |
1375 */ | |
1376 if (spa_mode & FWRITE) | |
1377 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); | |
1378 | |
4451 | 1379 mutex_exit(&spa_namespace_lock); |
1380 | |
789 | 1381 return (0); |
1382 } | |
1383 | |
1384 /* | |
1385 * This (illegal) pool name is used when temporarily importing a spa_t in order | |
1386 * to get the vdev stats associated with the imported devices. | |
1387 */ | |
1388 #define TRYIMPORT_NAME "$import" | |
1389 | |
1390 nvlist_t * | |
1391 spa_tryimport(nvlist_t *tryconfig) | |
1392 { | |
1393 nvlist_t *config = NULL; | |
1394 char *poolname; | |
1395 spa_t *spa; | |
1396 uint64_t state; | |
1397 | |
1398 if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) | |
1399 return (NULL); | |
1400 | |
1401 if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) | |
1402 return (NULL); | |
1403 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1404 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1405 * Create and initialize the spa structure. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1406 */ |
789 | 1407 mutex_enter(&spa_namespace_lock); |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1408 spa = spa_add(TRYIMPORT_NAME, NULL); |
789 | 1409 spa_activate(spa); |
1410 | |
1411 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1412 * Pass off the heavy lifting to spa_load(). |
1732 | 1413 * Pass TRUE for mosconfig because the user-supplied config |
1414 * is actually the one to trust when doing an import. | |
789 | 1415 */ |
1732 | 1416 (void) spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); |
789 | 1417 |
1418 /* | |
1419 * If 'tryconfig' was at least parsable, return the current config. | |
1420 */ | |
1421 if (spa->spa_root_vdev != NULL) { | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1422 spa_config_enter(spa, RW_READER, FTAG); |
789 | 1423 config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1424 spa_config_exit(spa, FTAG); |
789 | 1425 VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, |
1426 poolname) == 0); | |
1427 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, | |
1428 state) == 0); | |
3975
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1429 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, |
6674f5d79069
6282725 hostname/hostid should be stored in the label
ek110237
parents:
3912
diff
changeset
|
1430 spa->spa_uberblock.ub_timestamp) == 0); |
2082 | 1431 |
1432 /* | |
1433 * Add the list of hot spares. | |
1434 */ | |
1435 spa_add_spares(spa, config); | |
789 | 1436 } |
1437 | |
1438 spa_unload(spa); | |
1439 spa_deactivate(spa); | |
1440 spa_remove(spa); | |
1441 mutex_exit(&spa_namespace_lock); | |
1442 | |
1443 return (config); | |
1444 } | |
1445 | |
1446 /* | |
1447 * Pool export/destroy | |
1448 * | |
1449 * The act of destroying or exporting a pool is very simple. We make sure there | |
1450 * is no more pending I/O and any references to the pool are gone. Then, we | |
1451 * update the pool state and sync all the labels to disk, removing the | |
1452 * configuration from the cache afterwards. | |
1453 */ | |
1454 static int | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1455 spa_export_common(char *pool, int new_state, nvlist_t **oldconfig) |
789 | 1456 { |
1457 spa_t *spa; | |
1458 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1459 if (oldconfig) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1460 *oldconfig = NULL; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1461 |
789 | 1462 if (!(spa_mode & FWRITE)) |
1463 return (EROFS); | |
1464 | |
1465 mutex_enter(&spa_namespace_lock); | |
1466 if ((spa = spa_lookup(pool)) == NULL) { | |
1467 mutex_exit(&spa_namespace_lock); | |
1468 return (ENOENT); | |
1469 } | |
1470 | |
1471 /* | |
1544 | 1472 * Put a hold on the pool, drop the namespace lock, stop async tasks, |
1473 * reacquire the namespace lock, and see if we can export. | |
1474 */ | |
1475 spa_open_ref(spa, FTAG); | |
1476 mutex_exit(&spa_namespace_lock); | |
1477 spa_async_suspend(spa); | |
1478 mutex_enter(&spa_namespace_lock); | |
1479 spa_close(spa, FTAG); | |
1480 | |
1481 /* | |
789 | 1482 * The pool will be in core if it's openable, |
1483 * in which case we can modify its state. | |
1484 */ | |
1485 if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { | |
1486 /* | |
1487 * Objsets may be open only because they're dirty, so we | |
1488 * have to force it to sync before checking spa_refcnt. | |
1489 */ | |
1490 spa_scrub_suspend(spa); | |
1491 txg_wait_synced(spa->spa_dsl_pool, 0); | |
1492 | |
1544 | 1493 /* |
1494 * A pool cannot be exported or destroyed if there are active | |
1495 * references. If we are resetting a pool, allow references by | |
1496 * fault injection handlers. | |
1497 */ | |
1498 if (!spa_refcount_zero(spa) || | |
1499 (spa->spa_inject_ref != 0 && | |
1500 new_state != POOL_STATE_UNINITIALIZED)) { | |
789 | 1501 spa_scrub_resume(spa); |
1544 | 1502 spa_async_resume(spa); |
789 | 1503 mutex_exit(&spa_namespace_lock); |
1504 return (EBUSY); | |
1505 } | |
1506 | |
1507 spa_scrub_resume(spa); | |
1508 VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); | |
1509 | |
1510 /* | |
1511 * We want this to be reflected on every label, | |
1512 * so mark them all dirty. spa_unload() will do the | |
1513 * final sync that pushes these changes out. | |
1514 */ | |
1544 | 1515 if (new_state != POOL_STATE_UNINITIALIZED) { |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1516 spa_config_enter(spa, RW_WRITER, FTAG); |
1544 | 1517 spa->spa_state = new_state; |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1518 spa->spa_final_txg = spa_last_synced_txg(spa) + 1; |
1544 | 1519 vdev_config_dirty(spa->spa_root_vdev); |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1520 spa_config_exit(spa, FTAG); |
1544 | 1521 } |
789 | 1522 } |
1523 | |
4451 | 1524 spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); |
1525 | |
789 | 1526 if (spa->spa_state != POOL_STATE_UNINITIALIZED) { |
1527 spa_unload(spa); | |
1528 spa_deactivate(spa); | |
1529 } | |
1530 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1531 if (oldconfig && spa->spa_config) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1532 VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1533 |
1544 | 1534 if (new_state != POOL_STATE_UNINITIALIZED) { |
1535 spa_remove(spa); | |
1536 spa_config_sync(); | |
1537 } | |
789 | 1538 mutex_exit(&spa_namespace_lock); |
1539 | |
1540 return (0); | |
1541 } | |
1542 | |
1543 /* | |
1544 * Destroy a storage pool. | |
1545 */ | |
1546 int | |
1547 spa_destroy(char *pool) | |
1548 { | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1549 return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL)); |
789 | 1550 } |
1551 | |
1552 /* | |
1553 * Export a storage pool. | |
1554 */ | |
1555 int | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1556 spa_export(char *pool, nvlist_t **oldconfig) |
789 | 1557 { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1558 return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig)); |
789 | 1559 } |
1560 | |
1561 /* | |
1544 | 1562 * Similar to spa_export(), this unloads the spa_t without actually removing it |
1563 * from the namespace in any way. | |
1564 */ | |
1565 int | |
1566 spa_reset(char *pool) | |
1567 { | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
1568 return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL)); |
1544 | 1569 } |
1570 | |
1571 | |
1572 /* | |
789 | 1573 * ========================================================================== |
1574 * Device manipulation | |
1575 * ========================================================================== | |
1576 */ | |
1577 | |
1578 /* | |
4527 | 1579 * Add a device to a storage pool. |
789 | 1580 */ |
1581 int | |
1582 spa_vdev_add(spa_t *spa, nvlist_t *nvroot) | |
1583 { | |
1584 uint64_t txg; | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1585 int c, error; |
789 | 1586 vdev_t *rvd = spa->spa_root_vdev; |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1587 vdev_t *vd, *tvd; |
2082 | 1588 nvlist_t **spares; |
1589 uint_t i, nspares; | |
789 | 1590 |
1591 txg = spa_vdev_enter(spa); | |
1592 | |
2082 | 1593 if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, |
1594 VDEV_ALLOC_ADD)) != 0) | |
1595 return (spa_vdev_exit(spa, NULL, txg, error)); | |
1596 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1597 spa->spa_pending_vdev = vd; |
789 | 1598 |
2082 | 1599 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, |
1600 &spares, &nspares) != 0) | |
1601 nspares = 0; | |
1602 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1603 if (vd->vdev_children == 0 && nspares == 0) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1604 spa->spa_pending_vdev = NULL; |
2082 | 1605 return (spa_vdev_exit(spa, vd, txg, EINVAL)); |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1606 } |
2082 | 1607 |
1608 if (vd->vdev_children != 0) { | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1609 if ((error = vdev_create(vd, txg, B_FALSE)) != 0) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1610 spa->spa_pending_vdev = NULL; |
2082 | 1611 return (spa_vdev_exit(spa, vd, txg, error)); |
1612 } | |
1613 } | |
1614 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1615 /* |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1616 * We must validate the spares after checking the children. Otherwise, |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1617 * vdev_inuse() will blindly overwrite the spare. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1618 */ |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1619 if ((error = spa_validate_spares(spa, nvroot, txg, |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1620 VDEV_ALLOC_ADD)) != 0) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1621 spa->spa_pending_vdev = NULL; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1622 return (spa_vdev_exit(spa, vd, txg, error)); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1623 } |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1624 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1625 spa->spa_pending_vdev = NULL; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1626 |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1627 /* |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1628 * Transfer each new top-level vdev from vd to rvd. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1629 */ |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1630 for (c = 0; c < vd->vdev_children; c++) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1631 tvd = vd->vdev_child[c]; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1632 vdev_remove_child(vd, tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1633 tvd->vdev_id = rvd->vdev_children; |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1634 vdev_add_child(rvd, tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1635 vdev_config_dirty(tvd); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1636 } |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1637 |
2082 | 1638 if (nspares != 0) { |
1639 if (spa->spa_sparelist != NULL) { | |
1640 nvlist_t **oldspares; | |
1641 uint_t oldnspares; | |
1642 nvlist_t **newspares; | |
1643 | |
1644 VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, | |
1645 ZPOOL_CONFIG_SPARES, &oldspares, &oldnspares) == 0); | |
1646 | |
1647 newspares = kmem_alloc(sizeof (void *) * | |
1648 (nspares + oldnspares), KM_SLEEP); | |
1649 for (i = 0; i < oldnspares; i++) | |
1650 VERIFY(nvlist_dup(oldspares[i], | |
1651 &newspares[i], KM_SLEEP) == 0); | |
1652 for (i = 0; i < nspares; i++) | |
1653 VERIFY(nvlist_dup(spares[i], | |
1654 &newspares[i + oldnspares], | |
1655 KM_SLEEP) == 0); | |
1656 | |
1657 VERIFY(nvlist_remove(spa->spa_sparelist, | |
1658 ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); | |
1659 | |
1660 VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, | |
1661 ZPOOL_CONFIG_SPARES, newspares, | |
1662 nspares + oldnspares) == 0); | |
1663 for (i = 0; i < oldnspares + nspares; i++) | |
1664 nvlist_free(newspares[i]); | |
1665 kmem_free(newspares, (oldnspares + nspares) * | |
1666 sizeof (void *)); | |
1667 } else { | |
1668 VERIFY(nvlist_alloc(&spa->spa_sparelist, | |
1669 NV_UNIQUE_NAME, KM_SLEEP) == 0); | |
1670 VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, | |
1671 ZPOOL_CONFIG_SPARES, spares, nspares) == 0); | |
1672 } | |
1673 | |
1674 spa_load_spares(spa); | |
1675 spa->spa_sync_spares = B_TRUE; | |
789 | 1676 } |
1677 | |
1678 /* | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1679 * We have to be careful when adding new vdevs to an existing pool. |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1680 * If other threads start allocating from these vdevs before we |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1681 * sync the config cache, and we lose power, then upon reboot we may |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1682 * fail to open the pool because there are DVAs that the config cache |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1683 * can't translate. Therefore, we first add the vdevs without |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1684 * initializing metaslabs; sync the config cache (via spa_vdev_exit()); |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1685 * and then let spa_config_update() initialize the new metaslabs. |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1686 * |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1687 * spa_load() checks for added-but-not-initialized vdevs, so that |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1688 * if we lose power at any point in this sequence, the remaining |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1689 * steps will be completed the next time we load the pool. |
789 | 1690 */ |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1691 (void) spa_vdev_exit(spa, vd, txg, 0); |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1692 |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1693 mutex_enter(&spa_namespace_lock); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1694 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1695 mutex_exit(&spa_namespace_lock); |
789 | 1696 |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
1697 return (0); |
789 | 1698 } |
1699 | |
1700 /* | |
1701 * Attach a device to a mirror. The arguments are the path to any device | |
1702 * in the mirror, and the nvroot for the new device. If the path specifies | |
1703 * a device that is not mirrored, we automatically insert the mirror vdev. | |
1704 * | |
1705 * If 'replacing' is specified, the new device is intended to replace the | |
1706 * existing device; in this case the two devices are made into their own | |
4451 | 1707 * mirror using the 'replacing' vdev, which is functionally identical to |
789 | 1708 * the mirror vdev (it actually reuses all the same ops) but has a few |
1709 * extra rules: you can't attach to it after it's been created, and upon | |
1710 * completion of resilvering, the first disk (the one being replaced) | |
1711 * is automatically detached. | |
1712 */ | |
1713 int | |
1544 | 1714 spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) |
789 | 1715 { |
1716 uint64_t txg, open_txg; | |
1717 int error; | |
1718 vdev_t *rvd = spa->spa_root_vdev; | |
1719 vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; | |
2082 | 1720 vdev_ops_t *pvops; |
4527 | 1721 int is_log; |
789 | 1722 |
1723 txg = spa_vdev_enter(spa); | |
1724 | |
1544 | 1725 oldvd = vdev_lookup_by_guid(rvd, guid); |
789 | 1726 |
1727 if (oldvd == NULL) | |
1728 return (spa_vdev_exit(spa, NULL, txg, ENODEV)); | |
1729 | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1730 if (!oldvd->vdev_ops->vdev_op_leaf) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1731 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1732 |
789 | 1733 pvd = oldvd->vdev_parent; |
1734 | |
2082 | 1735 if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, |
4451 | 1736 VDEV_ALLOC_ADD)) != 0) |
1737 return (spa_vdev_exit(spa, NULL, txg, EINVAL)); | |
1738 | |
1739 if (newrootvd->vdev_children != 1) | |
789 | 1740 return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); |
1741 | |
1742 newvd = newrootvd->vdev_child[0]; | |
1743 | |
1744 if (!newvd->vdev_ops->vdev_op_leaf) | |
1745 return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); | |
1746 | |
2082 | 1747 if ((error = vdev_create(newrootvd, txg, replacing)) != 0) |
789 | 1748 return (spa_vdev_exit(spa, newrootvd, txg, error)); |
1749 | |
4527 | 1750 /* |
1751 * Spares can't replace logs | |
1752 */ | |
1753 is_log = oldvd->vdev_islog; | |
1754 if (is_log && newvd->vdev_isspare) | |
1755 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | |
1756 | |
2082 | 1757 if (!replacing) { |
1758 /* | |
1759 * For attach, the only allowable parent is a mirror or the root | |
1760 * vdev. | |
1761 */ | |
1762 if (pvd->vdev_ops != &vdev_mirror_ops && | |
1763 pvd->vdev_ops != &vdev_root_ops) | |
1764 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | |
1765 | |
1766 pvops = &vdev_mirror_ops; | |
1767 } else { | |
1768 /* | |
1769 * Active hot spares can only be replaced by inactive hot | |
1770 * spares. | |
1771 */ | |
1772 if (pvd->vdev_ops == &vdev_spare_ops && | |
1773 pvd->vdev_child[1] == oldvd && | |
1774 !spa_has_spare(spa, newvd->vdev_guid)) | |
1775 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | |
1776 | |
1777 /* | |
1778 * If the source is a hot spare, and the parent isn't already a | |
1779 * spare, then we want to create a new hot spare. Otherwise, we | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1780 * want to create a replacing vdev. The user is not allowed to |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1781 * attach to a spared vdev child unless the 'isspare' state is |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1782 * the same (spare replaces spare, non-spare replaces |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1783 * non-spare). |
2082 | 1784 */ |
1785 if (pvd->vdev_ops == &vdev_replacing_ops) | |
1786 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1787 else if (pvd->vdev_ops == &vdev_spare_ops && |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1788 newvd->vdev_isspare != oldvd->vdev_isspare) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1789 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); |
2082 | 1790 else if (pvd->vdev_ops != &vdev_spare_ops && |
1791 newvd->vdev_isspare) | |
1792 pvops = &vdev_spare_ops; | |
1793 else | |
1794 pvops = &vdev_replacing_ops; | |
1795 } | |
1796 | |
1175
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
1797 /* |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
1798 * Compare the new device size with the replaceable/attachable |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
1799 * device size. |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
1800 */ |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
797
diff
changeset
|
1801 if (newvd->vdev_psize < vdev_get_rsize(oldvd)) |
789 | 1802 return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); |
1803 | |
1732 | 1804 /* |
1805 * The new device cannot have a higher alignment requirement | |
1806 * than the top-level vdev. | |
1807 */ | |
1808 if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) | |
789 | 1809 return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); |
1810 | |
1811 /* | |
1812 * If this is an in-place replacement, update oldvd's path and devid | |
1813 * to make it distinguishable from newvd, and unopenable from now on. | |
1814 */ | |
1815 if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { | |
1816 spa_strfree(oldvd->vdev_path); | |
1817 oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, | |
1818 KM_SLEEP); | |
1819 (void) sprintf(oldvd->vdev_path, "%s/%s", | |
1820 newvd->vdev_path, "old"); | |
1821 if (oldvd->vdev_devid != NULL) { | |
1822 spa_strfree(oldvd->vdev_devid); | |
1823 oldvd->vdev_devid = NULL; | |
1824 } | |
1825 } | |
1826 | |
1827 /* | |
2082 | 1828 * If the parent is not a mirror, or if we're replacing, insert the new |
1829 * mirror/replacing/spare vdev above oldvd. | |
789 | 1830 */ |
1831 if (pvd->vdev_ops != pvops) | |
1832 pvd = vdev_add_parent(oldvd, pvops); | |
1833 | |
1834 ASSERT(pvd->vdev_top->vdev_parent == rvd); | |
1835 ASSERT(pvd->vdev_ops == pvops); | |
1836 ASSERT(oldvd->vdev_parent == pvd); | |
1837 | |
1838 /* | |
1839 * Extract the new device from its root and add it to pvd. | |
1840 */ | |
1841 vdev_remove_child(newrootvd, newvd); | |
1842 newvd->vdev_id = pvd->vdev_children; | |
1843 vdev_add_child(pvd, newvd); | |
1844 | |
1544 | 1845 /* |
1846 * If newvd is smaller than oldvd, but larger than its rsize, | |
1847 * the addition of newvd may have decreased our parent's asize. | |
1848 */ | |
1849 pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize); | |
1850 | |
789 | 1851 tvd = newvd->vdev_top; |
1852 ASSERT(pvd->vdev_top == tvd); | |
1853 ASSERT(tvd->vdev_parent == rvd); | |
1854 | |
1855 vdev_config_dirty(tvd); | |
1856 | |
1857 /* | |
1858 * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate | |
1859 * upward when spa_vdev_exit() calls vdev_dtl_reassess(). | |
1860 */ | |
1861 open_txg = txg + TXG_CONCURRENT_STATES - 1; | |
1862 | |
1863 mutex_enter(&newvd->vdev_dtl_lock); | |
1864 space_map_add(&newvd->vdev_dtl_map, TXG_INITIAL, | |
1865 open_txg - TXG_INITIAL + 1); | |
1866 mutex_exit(&newvd->vdev_dtl_lock); | |
1867 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1868 if (newvd->vdev_isspare) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1869 spa_spare_activate(newvd); |
1544 | 1870 |
789 | 1871 /* |
1872 * Mark newvd's DTL dirty in this txg. | |
1873 */ | |
1732 | 1874 vdev_dirty(tvd, VDD_DTL, newvd, txg); |
789 | 1875 |
1876 (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); | |
1877 | |
1878 /* | |
4451 | 1879 * Kick off a resilver to update newvd. We need to grab the namespace |
1880 * lock because spa_scrub() needs to post a sysevent with the pool name. | |
789 | 1881 */ |
4451 | 1882 mutex_enter(&spa_namespace_lock); |
789 | 1883 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); |
4451 | 1884 mutex_exit(&spa_namespace_lock); |
789 | 1885 |
1886 return (0); | |
1887 } | |
1888 | |
1889 /* | |
1890 * Detach a device from a mirror or replacing vdev. | |
1891 * If 'replace_done' is specified, only detach if the parent | |
1892 * is a replacing vdev. | |
1893 */ | |
1894 int | |
1544 | 1895 spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) |
789 | 1896 { |
1897 uint64_t txg; | |
1898 int c, t, error; | |
1899 vdev_t *rvd = spa->spa_root_vdev; | |
1900 vdev_t *vd, *pvd, *cvd, *tvd; | |
2082 | 1901 boolean_t unspare = B_FALSE; |
1902 uint64_t unspare_guid; | |
789 | 1903 |
1904 txg = spa_vdev_enter(spa); | |
1905 | |
1544 | 1906 vd = vdev_lookup_by_guid(rvd, guid); |
789 | 1907 |
1908 if (vd == NULL) | |
1909 return (spa_vdev_exit(spa, NULL, txg, ENODEV)); | |
1910 | |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1911 if (!vd->vdev_ops->vdev_op_leaf) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1912 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1913 |
789 | 1914 pvd = vd->vdev_parent; |
1915 | |
1916 /* | |
1917 * If replace_done is specified, only remove this device if it's | |
2082 | 1918 * the first child of a replacing vdev. For the 'spare' vdev, either |
1919 * disk can be removed. | |
789 | 1920 */ |
2082 | 1921 if (replace_done) { |
1922 if (pvd->vdev_ops == &vdev_replacing_ops) { | |
1923 if (vd->vdev_id != 0) | |
1924 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); | |
1925 } else if (pvd->vdev_ops != &vdev_spare_ops) { | |
1926 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); | |
1927 } | |
1928 } | |
1929 | |
1930 ASSERT(pvd->vdev_ops != &vdev_spare_ops || | |
4577 | 1931 spa_version(spa) >= SPA_VERSION_SPARES); |
789 | 1932 |
1933 /* | |
2082 | 1934 * Only mirror, replacing, and spare vdevs support detach. |
789 | 1935 */ |
1936 if (pvd->vdev_ops != &vdev_replacing_ops && | |
2082 | 1937 pvd->vdev_ops != &vdev_mirror_ops && |
1938 pvd->vdev_ops != &vdev_spare_ops) | |
789 | 1939 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
1940 | |
1941 /* | |
1942 * If there's only one replica, you can't detach it. | |
1943 */ | |
1944 if (pvd->vdev_children <= 1) | |
1945 return (spa_vdev_exit(spa, NULL, txg, EBUSY)); | |
1946 | |
1947 /* | |
1948 * If all siblings have non-empty DTLs, this device may have the only | |
1949 * valid copy of the data, which means we cannot safely detach it. | |
1950 * | |
1951 * XXX -- as in the vdev_offline() case, we really want a more | |
1952 * precise DTL check. | |
1953 */ | |
1954 for (c = 0; c < pvd->vdev_children; c++) { | |
1955 uint64_t dirty; | |
1956 | |
1957 cvd = pvd->vdev_child[c]; | |
1958 if (cvd == vd) | |
1959 continue; | |
1960 if (vdev_is_dead(cvd)) | |
1961 continue; | |
1962 mutex_enter(&cvd->vdev_dtl_lock); | |
1963 dirty = cvd->vdev_dtl_map.sm_space | | |
1964 cvd->vdev_dtl_scrub.sm_space; | |
1965 mutex_exit(&cvd->vdev_dtl_lock); | |
1966 if (!dirty) | |
1967 break; | |
1968 } | |
2082 | 1969 |
1970 /* | |
1971 * If we are a replacing or spare vdev, then we can always detach the | |
1972 * latter child, as that is how one cancels the operation. | |
1973 */ | |
1974 if ((pvd->vdev_ops == &vdev_mirror_ops || vd->vdev_id != 1) && | |
1975 c == pvd->vdev_children) | |
789 | 1976 return (spa_vdev_exit(spa, NULL, txg, EBUSY)); |
1977 | |
1978 /* | |
2082 | 1979 * If we are detaching the original disk from a spare, then it implies |
1980 * that the spare should become a real disk, and be removed from the | |
1981 * active spare list for the pool. | |
1982 */ | |
1983 if (pvd->vdev_ops == &vdev_spare_ops && | |
1984 vd->vdev_id == 0) | |
1985 unspare = B_TRUE; | |
1986 | |
1987 /* | |
789 | 1988 * Erase the disk labels so the disk can be used for other things. |
1989 * This must be done after all other error cases are handled, | |
1990 * but before we disembowel vd (so we can still do I/O to it). | |
1991 * But if we can't do it, don't treat the error as fatal -- | |
1992 * it may be that the unwritability of the disk is the reason | |
1993 * it's being detached! | |
1994 */ | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
1995 error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); |
789 | 1996 |
1997 /* | |
1998 * Remove vd from its parent and compact the parent's children. | |
1999 */ | |
2000 vdev_remove_child(pvd, vd); | |
2001 vdev_compact_children(pvd); | |
2002 | |
2003 /* | |
2004 * Remember one of the remaining children so we can get tvd below. | |
2005 */ | |
2006 cvd = pvd->vdev_child[0]; | |
2007 | |
2008 /* | |
2082 | 2009 * If we need to remove the remaining child from the list of hot spares, |
2010 * do it now, marking the vdev as no longer a spare in the process. We | |
2011 * must do this before vdev_remove_parent(), because that can change the | |
2012 * GUID if it creates a new toplevel GUID. | |
2013 */ | |
2014 if (unspare) { | |
2015 ASSERT(cvd->vdev_isspare); | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2016 spa_spare_remove(cvd); |
2082 | 2017 unspare_guid = cvd->vdev_guid; |
2018 } | |
2019 | |
2020 /* | |
789 | 2021 * If the parent mirror/replacing vdev only has one child, |
2022 * the parent is no longer needed. Remove it from the tree. | |
2023 */ | |
2024 if (pvd->vdev_children == 1) | |
2025 vdev_remove_parent(cvd); | |
2026 | |
2027 /* | |
2028 * We don't set tvd until now because the parent we just removed | |
2029 * may have been the previous top-level vdev. | |
2030 */ | |
2031 tvd = cvd->vdev_top; | |
2032 ASSERT(tvd->vdev_parent == rvd); | |
2033 | |
2034 /* | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2035 * Reevaluate the parent vdev state. |
789 | 2036 */ |
4451 | 2037 vdev_propagate_state(cvd); |
789 | 2038 |
2039 /* | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2040 * If the device we just detached was smaller than the others, it may be |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2041 * possible to add metaslabs (i.e. grow the pool). vdev_metaslab_init() |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2042 * can't fail because the existing metaslabs are already in core, so |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2043 * there's nothing to read from disk. |
789 | 2044 */ |
1732 | 2045 VERIFY(vdev_metaslab_init(tvd, txg) == 0); |
789 | 2046 |
2047 vdev_config_dirty(tvd); | |
2048 | |
2049 /* | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2050 * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2051 * vd->vdev_detached is set and free vd's DTL object in syncing context. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2052 * But first make sure we're not on any *other* txg's DTL list, to |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2053 * prevent vd from being accessed after it's freed. |
789 | 2054 */ |
2055 for (t = 0; t < TXG_SIZE; t++) | |
2056 (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); | |
1732 | 2057 vd->vdev_detached = B_TRUE; |
2058 vdev_dirty(tvd, VDD_DTL, vd, txg); | |
789 | 2059 |
4451 | 2060 spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); |
2061 | |
2082 | 2062 error = spa_vdev_exit(spa, vd, txg, 0); |
2063 | |
2064 /* | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2065 * If this was the removal of the original device in a hot spare vdev, |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2066 * then we want to go through and remove the device from the hot spare |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
2067 * list of every other pool. |
2082 | 2068 */ |
2069 if (unspare) { | |
2070 spa = NULL; | |
2071 mutex_enter(&spa_namespace_lock); | |
2072 while ((spa = spa_next(spa)) != NULL) { | |
2073 if (spa->spa_state != POOL_STATE_ACTIVE) | |
2074 continue; | |
2075 | |
2076 (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); | |
2077 } | |
2078 mutex_exit(&spa_namespace_lock); | |
2079 } | |
2080 | |
2081 return (error); | |
2082 } | |
2083 | |
2084 /* | |
2085 * Remove a device from the pool. Currently, this supports removing only hot | |
2086 * spares. | |
2087 */ | |
2088 int | |
2089 spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) | |
2090 { | |
2091 vdev_t *vd; | |
2092 nvlist_t **spares, *nv, **newspares; | |
2093 uint_t i, j, nspares; | |
2094 int ret = 0; | |
2095 | |
2096 spa_config_enter(spa, RW_WRITER, FTAG); | |
2097 | |
2098 vd = spa_lookup_by_guid(spa, guid); | |
2099 | |
2100 nv = NULL; | |
2101 if (spa->spa_spares != NULL && | |
2102 nvlist_lookup_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, | |
2103 &spares, &nspares) == 0) { | |
2104 for (i = 0; i < nspares; i++) { | |
2105 uint64_t theguid; | |
2106 | |
2107 VERIFY(nvlist_lookup_uint64(spares[i], | |
2108 ZPOOL_CONFIG_GUID, &theguid) == 0); | |
2109 if (theguid == guid) { | |
2110 nv = spares[i]; | |
2111 break; | |
2112 } | |
2113 } | |
2114 } | |
2115 | |
2116 /* | |
2117 * We only support removing a hot spare, and only if it's not currently | |
2118 * in use in this pool. | |
2119 */ | |
2120 if (nv == NULL && vd == NULL) { | |
2121 ret = ENOENT; | |
2122 goto out; | |
2123 } | |
2124 | |
2125 if (nv == NULL && vd != NULL) { | |
2126 ret = ENOTSUP; | |
2127 goto out; | |
2128 } | |
2129 | |
2130 if (!unspare && nv != NULL && vd != NULL) { | |
2131 ret = EBUSY; | |
2132 goto out; | |
2133 } | |
2134 | |
2135 if (nspares == 1) { | |
2136 newspares = NULL; | |
2137 } else { | |
2138 newspares = kmem_alloc((nspares - 1) * sizeof (void *), | |
2139 KM_SLEEP); | |
2140 for (i = 0, j = 0; i < nspares; i++) { | |
2141 if (spares[i] != nv) | |
2142 VERIFY(nvlist_dup(spares[i], | |
2143 &newspares[j++], KM_SLEEP) == 0); | |
2144 } | |
2145 } | |
2146 | |
2147 VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, | |
2148 DATA_TYPE_NVLIST_ARRAY) == 0); | |
2149 VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, | |
2150 newspares, nspares - 1) == 0); | |
2151 for (i = 0; i < nspares - 1; i++) | |
2152 nvlist_free(newspares[i]); | |
2153 kmem_free(newspares, (nspares - 1) * sizeof (void *)); | |
2154 spa_load_spares(spa); | |
2155 spa->spa_sync_spares = B_TRUE; | |
2156 | |
2157 out: | |
2158 spa_config_exit(spa, FTAG); | |
2159 | |
2160 return (ret); | |
789 | 2161 } |
2162 | |
2163 /* | |
4451 | 2164 * Find any device that's done replacing, or a vdev marked 'unspare' that's |
2165 * current spared, so we can detach it. | |
789 | 2166 */ |
1544 | 2167 static vdev_t * |
4451 | 2168 spa_vdev_resilver_done_hunt(vdev_t *vd) |
789 | 2169 { |
1544 | 2170 vdev_t *newvd, *oldvd; |
789 | 2171 int c; |
2172 | |
1544 | 2173 for (c = 0; c < vd->vdev_children; c++) { |
4451 | 2174 oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); |
1544 | 2175 if (oldvd != NULL) |
2176 return (oldvd); | |
2177 } | |
789 | 2178 |
4451 | 2179 /* |
2180 * Check for a completed replacement. | |
2181 */ | |
789 | 2182 if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { |
1544 | 2183 oldvd = vd->vdev_child[0]; |
2184 newvd = vd->vdev_child[1]; | |
789 | 2185 |
1544 | 2186 mutex_enter(&newvd->vdev_dtl_lock); |
2187 if (newvd->vdev_dtl_map.sm_space == 0 && | |
2188 newvd->vdev_dtl_scrub.sm_space == 0) { | |
2189 mutex_exit(&newvd->vdev_dtl_lock); | |
2190 return (oldvd); | |
2191 } | |
2192 mutex_exit(&newvd->vdev_dtl_lock); | |
2193 } | |
789 | 2194 |
4451 | 2195 /* |
2196 * Check for a completed resilver with the 'unspare' flag set. | |
2197 */ | |
2198 if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { | |
2199 newvd = vd->vdev_child[0]; | |
2200 oldvd = vd->vdev_child[1]; | |
2201 | |
2202 mutex_enter(&newvd->vdev_dtl_lock); | |
2203 if (newvd->vdev_unspare && | |
2204 newvd->vdev_dtl_map.sm_space == 0 && | |
2205 newvd->vdev_dtl_scrub.sm_space == 0) { | |
2206 newvd->vdev_unspare = 0; | |
2207 mutex_exit(&newvd->vdev_dtl_lock); | |
2208 return (oldvd); | |
2209 } | |
2210 mutex_exit(&newvd->vdev_dtl_lock); | |
2211 } | |
2212 | |
1544 | 2213 return (NULL); |
789 | 2214 } |
2215 | |
1544 | 2216 static void |
4451 | 2217 spa_vdev_resilver_done(spa_t *spa) |
789 | 2218 { |
1544 | 2219 vdev_t *vd; |
2082 | 2220 vdev_t *pvd; |
1544 | 2221 uint64_t guid; |
2082 | 2222 uint64_t pguid = 0; |
789 | 2223 |
1544 | 2224 spa_config_enter(spa, RW_READER, FTAG); |
789 | 2225 |
4451 | 2226 while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { |
1544 | 2227 guid = vd->vdev_guid; |
2082 | 2228 /* |
2229 * If we have just finished replacing a hot spared device, then | |
2230 * we need to detach the parent's first child (the original hot | |
2231 * spare) as well. | |
2232 */ | |
2233 pvd = vd->vdev_parent; | |
2234 if (pvd->vdev_parent->vdev_ops == &vdev_spare_ops && | |
2235 pvd->vdev_id == 0) { | |
2236 ASSERT(pvd->vdev_ops == &vdev_replacing_ops); | |
2237 ASSERT(pvd->vdev_parent->vdev_children == 2); | |
2238 pguid = pvd->vdev_parent->vdev_child[1]->vdev_guid; | |
2239 } | |
1544 | 2240 spa_config_exit(spa, FTAG); |
2241 if (spa_vdev_detach(spa, guid, B_TRUE) != 0) | |
2242 return; | |
2082 | 2243 if (pguid != 0 && spa_vdev_detach(spa, pguid, B_TRUE) != 0) |
2244 return; | |
1544 | 2245 spa_config_enter(spa, RW_READER, FTAG); |
789 | 2246 } |
2247 | |
1544 | 2248 spa_config_exit(spa, FTAG); |
789 | 2249 } |
2250 | |
2251 /* | |
1354
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2252 * Update the stored path for this vdev. Dirty the vdev configuration, relying |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2253 * on spa_vdev_enter/exit() to synchronize the labels and cache. |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2254 */ |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2255 int |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2256 spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2257 { |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2258 vdev_t *rvd, *vd; |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2259 uint64_t txg; |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2260 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2261 rvd = spa->spa_root_vdev; |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2262 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2263 txg = spa_vdev_enter(spa); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2264 |
2082 | 2265 if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) { |
2266 /* | |
2267 * Determine if this is a reference to a hot spare. In that | |
2268 * case, update the path as stored in the spare list. | |
2269 */ | |
2270 nvlist_t **spares; | |
2271 uint_t i, nspares; | |
2272 if (spa->spa_sparelist != NULL) { | |
2273 VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, | |
2274 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); | |
2275 for (i = 0; i < nspares; i++) { | |
2276 uint64_t theguid; | |
2277 VERIFY(nvlist_lookup_uint64(spares[i], | |
2278 ZPOOL_CONFIG_GUID, &theguid) == 0); | |
2279 if (theguid == guid) | |
2280 break; | |
2281 } | |
2282 | |
2283 if (i == nspares) | |
2284 return (spa_vdev_exit(spa, NULL, txg, ENOENT)); | |
2285 | |
2286 VERIFY(nvlist_add_string(spares[i], | |
2287 ZPOOL_CONFIG_PATH, newpath) == 0); | |
2288 spa_load_spares(spa); | |
2289 spa->spa_sync_spares = B_TRUE; | |
2290 return (spa_vdev_exit(spa, NULL, txg, 0)); | |
2291 } else { | |
2292 return (spa_vdev_exit(spa, NULL, txg, ENOENT)); | |
2293 } | |
2294 } | |
1354
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2295 |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2296 if (!vd->vdev_ops->vdev_op_leaf) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2297 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
2298 |
1354
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2299 spa_strfree(vd->vdev_path); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2300 vd->vdev_path = spa_strdup(newpath); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2301 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2302 vdev_config_dirty(vd->vdev_top); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2303 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2304 return (spa_vdev_exit(spa, NULL, txg, 0)); |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2305 } |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2306 |
81359ee1ee63
6362672 import gets confused about overlapping slices
eschrock
parents:
1175
diff
changeset
|
2307 /* |
789 | 2308 * ========================================================================== |
2309 * SPA Scrubbing | |
2310 * ========================================================================== | |
2311 */ | |
2312 | |
2313 static void | |
2314 spa_scrub_io_done(zio_t *zio) | |
2315 { | |
2316 spa_t *spa = zio->io_spa; | |
2317 | |
4309
3dfde0f4662d
6542676 ARC needs to track meta-data memory overhead
maybee
parents:
4178
diff
changeset
|
2318 arc_data_buf_free(zio->io_data, zio->io_size); |
789 | 2319 |
2320 mutex_enter(&spa->spa_scrub_lock); | |
1544 | 2321 if (zio->io_error && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2322 vdev_t *vd = zio->io_vd ? zio->io_vd : spa->spa_root_vdev; |
789 | 2323 spa->spa_scrub_errors++; |
2324 mutex_enter(&vd->vdev_stat_lock); | |
2325 vd->vdev_stat.vs_scrub_errors++; | |
2326 mutex_exit(&vd->vdev_stat_lock); | |
2327 } | |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2328 |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2329 if (--spa->spa_scrub_inflight < spa->spa_scrub_maxinflight) |
1544 | 2330 cv_broadcast(&spa->spa_scrub_io_cv); |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2331 |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2332 ASSERT(spa->spa_scrub_inflight >= 0); |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2333 |
1544 | 2334 mutex_exit(&spa->spa_scrub_lock); |
789 | 2335 } |
2336 | |
2337 static void | |
1544 | 2338 spa_scrub_io_start(spa_t *spa, blkptr_t *bp, int priority, int flags, |
2339 zbookmark_t *zb) | |
789 | 2340 { |
2341 size_t size = BP_GET_LSIZE(bp); | |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2342 void *data; |
789 | 2343 |
2344 mutex_enter(&spa->spa_scrub_lock); | |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2345 /* |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2346 * Do not give too much work to vdev(s). |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2347 */ |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2348 while (spa->spa_scrub_inflight >= spa->spa_scrub_maxinflight) { |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2349 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2350 } |
789 | 2351 spa->spa_scrub_inflight++; |
2352 mutex_exit(&spa->spa_scrub_lock); | |
2353 | |
4309
3dfde0f4662d
6542676 ARC needs to track meta-data memory overhead
maybee
parents:
4178
diff
changeset
|
2354 data = arc_data_buf_alloc(size); |
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
2355 |
1544 | 2356 if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET) |
2357 flags |= ZIO_FLAG_SPECULATIVE; /* intent log block */ | |
2358 | |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
2359 flags |= ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_CANFAIL; |
1544 | 2360 |
789 | 2361 zio_nowait(zio_read(NULL, spa, bp, data, size, |
1544 | 2362 spa_scrub_io_done, NULL, priority, flags, zb)); |
789 | 2363 } |
2364 | |
2365 /* ARGSUSED */ | |
2366 static int | |
2367 spa_scrub_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) | |
2368 { | |
2369 blkptr_t *bp = &bc->bc_blkptr; | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2370 vdev_t *vd = spa->spa_root_vdev; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2371 dva_t *dva = bp->blk_dva; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2372 int needs_resilver = B_FALSE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2373 int d; |
789 | 2374 |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2375 if (bc->bc_errno) { |
789 | 2376 /* |
2377 * We can't scrub this block, but we can continue to scrub | |
2378 * the rest of the pool. Note the error and move along. | |
2379 */ | |
2380 mutex_enter(&spa->spa_scrub_lock); | |
2381 spa->spa_scrub_errors++; | |
2382 mutex_exit(&spa->spa_scrub_lock); | |
2383 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2384 mutex_enter(&vd->vdev_stat_lock); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2385 vd->vdev_stat.vs_scrub_errors++; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2386 mutex_exit(&vd->vdev_stat_lock); |
789 | 2387 |
2388 return (ERESTART); | |
2389 } | |
2390 | |
2391 ASSERT(bp->blk_birth < spa->spa_scrub_maxtxg); | |
2392 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2393 for (d = 0; d < BP_GET_NDVAS(bp); d++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2394 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d])); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2395 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2396 ASSERT(vd != NULL); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2397 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2398 /* |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2399 * Keep track of how much data we've examined so that |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2400 * zpool(1M) status can make useful progress reports. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2401 */ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2402 mutex_enter(&vd->vdev_stat_lock); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2403 vd->vdev_stat.vs_scrub_examined += DVA_GET_ASIZE(&dva[d]); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2404 mutex_exit(&vd->vdev_stat_lock); |
789 | 2405 |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2406 if (spa->spa_scrub_type == POOL_SCRUB_RESILVER) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2407 if (DVA_GET_GANG(&dva[d])) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2408 /* |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2409 * Gang members may be spread across multiple |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2410 * vdevs, so the best we can do is look at the |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2411 * pool-wide DTL. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2412 * XXX -- it would be better to change our |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2413 * allocation policy to ensure that this can't |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2414 * happen. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2415 */ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2416 vd = spa->spa_root_vdev; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2417 } |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2418 if (vdev_dtl_contains(&vd->vdev_dtl_map, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2419 bp->blk_birth, 1)) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2420 needs_resilver = B_TRUE; |
789 | 2421 } |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2422 } |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2423 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2424 if (spa->spa_scrub_type == POOL_SCRUB_EVERYTHING) |
789 | 2425 spa_scrub_io_start(spa, bp, ZIO_PRIORITY_SCRUB, |
1544 | 2426 ZIO_FLAG_SCRUB, &bc->bc_bookmark); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2427 else if (needs_resilver) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2428 spa_scrub_io_start(spa, bp, ZIO_PRIORITY_RESILVER, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1760
diff
changeset
|
2429 ZIO_FLAG_RESILVER, &bc->bc_bookmark); |
789 | 2430 |
2431 return (0); | |
2432 } | |
2433 | |
2434 static void | |
2435 spa_scrub_thread(spa_t *spa) | |
2436 { | |
2437 callb_cpr_t cprinfo; | |
2438 traverse_handle_t *th = spa->spa_scrub_th; | |
2439 vdev_t *rvd = spa->spa_root_vdev; | |
2440 pool_scrub_type_t scrub_type = spa->spa_scrub_type; | |
2441 int error = 0; | |
2442 boolean_t complete; | |
2443 | |
2444 CALLB_CPR_INIT(&cprinfo, &spa->spa_scrub_lock, callb_generic_cpr, FTAG); | |
2445 | |
797
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
2446 /* |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
2447 * If we're restarting due to a snapshot create/delete, |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
2448 * wait for that to complete. |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
2449 */ |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
2450 txg_wait_synced(spa_get_dsl(spa), 0); |
af56ba8b7e41
6344108 snapshot create/delete interlock with scrub/resilver must sync txg
bonwick
parents:
789
diff
changeset
|
2451 |
1544 | 2452 dprintf("start %s mintxg=%llu maxtxg=%llu\n", |
2453 scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", | |
2454 spa->spa_scrub_mintxg, spa->spa_scrub_maxtxg); | |
2455 | |
2456 spa_config_enter(spa, RW_WRITER, FTAG); | |
2457 vdev_reopen(rvd); /* purge all vdev caches */ | |
789 | 2458 vdev_config_dirty(rvd); /* rewrite all disk labels */ |
2459 vdev_scrub_stat_update(rvd, scrub_type, B_FALSE); | |
1544 | 2460 spa_config_exit(spa, FTAG); |
789 | 2461 |
2462 mutex_enter(&spa->spa_scrub_lock); | |
2463 spa->spa_scrub_errors = 0; | |
2464 spa->spa_scrub_active = 1; | |
1544 | 2465 ASSERT(spa->spa_scrub_inflight == 0); |
789 | 2466 |
2467 while (!spa->spa_scrub_stop) { | |
2468 CALLB_CPR_SAFE_BEGIN(&cprinfo); | |
1544 | 2469 while (spa->spa_scrub_suspended) { |
789 | 2470 spa->spa_scrub_active = 0; |
2471 cv_broadcast(&spa->spa_scrub_cv); | |
2472 cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); | |
2473 spa->spa_scrub_active = 1; | |
2474 } | |
2475 CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_scrub_lock); | |
2476 | |
2477 if (spa->spa_scrub_restart_txg != 0) | |
2478 break; | |
2479 | |
2480 mutex_exit(&spa->spa_scrub_lock); | |
2481 error = traverse_more(th); | |
2482 mutex_enter(&spa->spa_scrub_lock); | |
2483 if (error != EAGAIN) | |
2484 break; | |
2485 } | |
2486 | |
2487 while (spa->spa_scrub_inflight) | |
2488 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); | |
2489 | |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2490 spa->spa_scrub_active = 0; |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2491 cv_broadcast(&spa->spa_scrub_cv); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2492 |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2493 mutex_exit(&spa->spa_scrub_lock); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2494 |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2495 spa_config_enter(spa, RW_WRITER, FTAG); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2496 |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2497 mutex_enter(&spa->spa_scrub_lock); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2498 |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2499 /* |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2500 * Note: we check spa_scrub_restart_txg under both spa_scrub_lock |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2501 * AND the spa config lock to synchronize with any config changes |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2502 * that revise the DTLs under spa_vdev_enter() / spa_vdev_exit(). |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2503 */ |
789 | 2504 if (spa->spa_scrub_restart_txg != 0) |
2505 error = ERESTART; | |
2506 | |
1544 | 2507 if (spa->spa_scrub_stop) |
2508 error = EINTR; | |
2509 | |
789 | 2510 /* |
1544 | 2511 * Even if there were uncorrectable errors, we consider the scrub |
2512 * completed. The downside is that if there is a transient error during | |
2513 * a resilver, we won't resilver the data properly to the target. But | |
2514 * if the damage is permanent (more likely) we will resilver forever, | |
2515 * which isn't really acceptable. Since there is enough information for | |
2516 * the user to know what has failed and why, this seems like a more | |
2517 * tractable approach. | |
789 | 2518 */ |
1544 | 2519 complete = (error == 0); |
789 | 2520 |
1544 | 2521 dprintf("end %s to maxtxg=%llu %s, traverse=%d, %llu errors, stop=%u\n", |
2522 scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub", | |
789 | 2523 spa->spa_scrub_maxtxg, complete ? "done" : "FAILED", |
2524 error, spa->spa_scrub_errors, spa->spa_scrub_stop); | |
2525 | |
2526 mutex_exit(&spa->spa_scrub_lock); | |
2527 | |
2528 /* | |
2529 * If the scrub/resilver completed, update all DTLs to reflect this. | |
2530 * Whether it succeeded or not, vacate all temporary scrub DTLs. | |
2531 */ | |
2532 vdev_dtl_reassess(rvd, spa_last_synced_txg(spa) + 1, | |
2533 complete ? spa->spa_scrub_maxtxg : 0, B_TRUE); | |
2534 vdev_scrub_stat_update(rvd, POOL_SCRUB_NONE, complete); | |
1544 | 2535 spa_errlog_rotate(spa); |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2536 |
4451 | 2537 if (scrub_type == POOL_SCRUB_RESILVER && complete) |
2538 spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_FINISH); | |
2539 | |
1544 | 2540 spa_config_exit(spa, FTAG); |
789 | 2541 |
2542 mutex_enter(&spa->spa_scrub_lock); | |
2543 | |
1544 | 2544 /* |
2545 * We may have finished replacing a device. | |
2546 * Let the async thread assess this and handle the detach. | |
2547 */ | |
4451 | 2548 spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); |
789 | 2549 |
2550 /* | |
2551 * If we were told to restart, our final act is to start a new scrub. | |
2552 */ | |
2553 if (error == ERESTART) | |
1544 | 2554 spa_async_request(spa, scrub_type == POOL_SCRUB_RESILVER ? |
2555 SPA_ASYNC_RESILVER : SPA_ASYNC_SCRUB); | |
789 | 2556 |
1544 | 2557 spa->spa_scrub_type = POOL_SCRUB_NONE; |
2558 spa->spa_scrub_active = 0; | |
2559 spa->spa_scrub_thread = NULL; | |
2560 cv_broadcast(&spa->spa_scrub_cv); | |
789 | 2561 CALLB_CPR_EXIT(&cprinfo); /* drops &spa->spa_scrub_lock */ |
2562 thread_exit(); | |
2563 } | |
2564 | |
2565 void | |
2566 spa_scrub_suspend(spa_t *spa) | |
2567 { | |
2568 mutex_enter(&spa->spa_scrub_lock); | |
1544 | 2569 spa->spa_scrub_suspended++; |
789 | 2570 while (spa->spa_scrub_active) { |
2571 cv_broadcast(&spa->spa_scrub_cv); | |
2572 cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); | |
2573 } | |
2574 while (spa->spa_scrub_inflight) | |
2575 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); | |
2576 mutex_exit(&spa->spa_scrub_lock); | |
2577 } | |
2578 | |
2579 void | |
2580 spa_scrub_resume(spa_t *spa) | |
2581 { | |
2582 mutex_enter(&spa->spa_scrub_lock); | |
1544 | 2583 ASSERT(spa->spa_scrub_suspended != 0); |
2584 if (--spa->spa_scrub_suspended == 0) | |
789 | 2585 cv_broadcast(&spa->spa_scrub_cv); |
2586 mutex_exit(&spa->spa_scrub_lock); | |
2587 } | |
2588 | |
2589 void | |
2590 spa_scrub_restart(spa_t *spa, uint64_t txg) | |
2591 { | |
2592 /* | |
2593 * Something happened (e.g. snapshot create/delete) that means | |
2594 * we must restart any in-progress scrubs. The itinerary will | |
2595 * fix this properly. | |
2596 */ | |
2597 mutex_enter(&spa->spa_scrub_lock); | |
2598 spa->spa_scrub_restart_txg = txg; | |
2599 mutex_exit(&spa->spa_scrub_lock); | |
2600 } | |
2601 | |
1544 | 2602 int |
2603 spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force) | |
789 | 2604 { |
2605 space_seg_t *ss; | |
2606 uint64_t mintxg, maxtxg; | |
2607 vdev_t *rvd = spa->spa_root_vdev; | |
2608 | |
2609 if ((uint_t)type >= POOL_SCRUB_TYPES) | |
2610 return (ENOTSUP); | |
2611 | |
1544 | 2612 mutex_enter(&spa->spa_scrub_lock); |
2613 | |
789 | 2614 /* |
2615 * If there's a scrub or resilver already in progress, stop it. | |
2616 */ | |
2617 while (spa->spa_scrub_thread != NULL) { | |
2618 /* | |
2619 * Don't stop a resilver unless forced. | |
2620 */ | |
1544 | 2621 if (spa->spa_scrub_type == POOL_SCRUB_RESILVER && !force) { |
2622 mutex_exit(&spa->spa_scrub_lock); | |
789 | 2623 return (EBUSY); |
1544 | 2624 } |
789 | 2625 spa->spa_scrub_stop = 1; |
2626 cv_broadcast(&spa->spa_scrub_cv); | |
2627 cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock); | |
2628 } | |
2629 | |
2630 /* | |
2631 * Terminate the previous traverse. | |
2632 */ | |
2633 if (spa->spa_scrub_th != NULL) { | |
2634 traverse_fini(spa->spa_scrub_th); | |
2635 spa->spa_scrub_th = NULL; | |
2636 } | |
2637 | |
1544 | 2638 if (rvd == NULL) { |
2639 ASSERT(spa->spa_scrub_stop == 0); | |
2640 ASSERT(spa->spa_scrub_type == type); | |
2641 ASSERT(spa->spa_scrub_restart_txg == 0); | |
2642 mutex_exit(&spa->spa_scrub_lock); | |
2643 return (0); | |
2644 } | |
789 | 2645 |
2646 mintxg = TXG_INITIAL - 1; | |
2647 maxtxg = spa_last_synced_txg(spa) + 1; | |
2648 | |
1544 | 2649 mutex_enter(&rvd->vdev_dtl_lock); |
789 | 2650 |
1544 | 2651 if (rvd->vdev_dtl_map.sm_space == 0) { |
2652 /* | |
2653 * The pool-wide DTL is empty. | |
1732 | 2654 * If this is a resilver, there's nothing to do except |
2655 * check whether any in-progress replacements have completed. | |
1544 | 2656 */ |
1732 | 2657 if (type == POOL_SCRUB_RESILVER) { |
1544 | 2658 type = POOL_SCRUB_NONE; |
4451 | 2659 spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); |
1732 | 2660 } |
1544 | 2661 } else { |
2662 /* | |
2663 * The pool-wide DTL is non-empty. | |
2664 * If this is a normal scrub, upgrade to a resilver instead. | |
2665 */ | |
2666 if (type == POOL_SCRUB_EVERYTHING) | |
2667 type = POOL_SCRUB_RESILVER; | |
2668 } | |
789 | 2669 |
1544 | 2670 if (type == POOL_SCRUB_RESILVER) { |
789 | 2671 /* |
2672 * Determine the resilvering boundaries. | |
2673 * | |
2674 * Note: (mintxg, maxtxg) is an open interval, | |
2675 * i.e. mintxg and maxtxg themselves are not included. | |
2676 * | |
2677 * Note: for maxtxg, we MIN with spa_last_synced_txg(spa) + 1 | |
2678 * so we don't claim to resilver a txg that's still changing. | |
2679 */ | |
2680 ss = avl_first(&rvd->vdev_dtl_map.sm_root); | |
1544 | 2681 mintxg = ss->ss_start - 1; |
789 | 2682 ss = avl_last(&rvd->vdev_dtl_map.sm_root); |
1544 | 2683 maxtxg = MIN(ss->ss_end, maxtxg); |
4451 | 2684 |
2685 spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START); | |
789 | 2686 } |
2687 | |
1544 | 2688 mutex_exit(&rvd->vdev_dtl_lock); |
2689 | |
2690 spa->spa_scrub_stop = 0; | |
2691 spa->spa_scrub_type = type; | |
2692 spa->spa_scrub_restart_txg = 0; | |
2693 | |
2694 if (type != POOL_SCRUB_NONE) { | |
2695 spa->spa_scrub_mintxg = mintxg; | |
789 | 2696 spa->spa_scrub_maxtxg = maxtxg; |
2697 spa->spa_scrub_th = traverse_init(spa, spa_scrub_cb, NULL, | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2698 ADVANCE_PRE | ADVANCE_PRUNE | ADVANCE_ZIL, |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2699 ZIO_FLAG_CANFAIL); |
789 | 2700 traverse_add_pool(spa->spa_scrub_th, mintxg, maxtxg); |
2701 spa->spa_scrub_thread = thread_create(NULL, 0, | |
2702 spa_scrub_thread, spa, 0, &p0, TS_RUN, minclsyspri); | |
2703 } | |
2704 | |
1544 | 2705 mutex_exit(&spa->spa_scrub_lock); |
2706 | |
789 | 2707 return (0); |
2708 } | |
2709 | |
1544 | 2710 /* |
2711 * ========================================================================== | |
2712 * SPA async task processing | |
2713 * ========================================================================== | |
2714 */ | |
2715 | |
2716 static void | |
4451 | 2717 spa_async_remove(spa_t *spa, vdev_t *vd) |
789 | 2718 { |
1544 | 2719 vdev_t *tvd; |
2720 int c; | |
2721 | |
4451 | 2722 for (c = 0; c < vd->vdev_children; c++) { |
2723 tvd = vd->vdev_child[c]; | |
2724 if (tvd->vdev_remove_wanted) { | |
2725 tvd->vdev_remove_wanted = 0; | |
2726 vdev_set_state(tvd, B_FALSE, VDEV_STATE_REMOVED, | |
2727 VDEV_AUX_NONE); | |
2728 vdev_clear(spa, tvd); | |
2729 vdev_config_dirty(tvd->vdev_top); | |
1544 | 2730 } |
4451 | 2731 spa_async_remove(spa, tvd); |
1544 | 2732 } |
2733 } | |
2734 | |
2735 static void | |
2736 spa_async_thread(spa_t *spa) | |
2737 { | |
2738 int tasks; | |
4451 | 2739 uint64_t txg; |
1544 | 2740 |
2741 ASSERT(spa->spa_sync_on); | |
789 | 2742 |
1544 | 2743 mutex_enter(&spa->spa_async_lock); |
2744 tasks = spa->spa_async_tasks; | |
2745 spa->spa_async_tasks = 0; | |
2746 mutex_exit(&spa->spa_async_lock); | |
2747 | |
2748 /* | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2749 * See if the config needs to be updated. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2750 */ |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2751 if (tasks & SPA_ASYNC_CONFIG_UPDATE) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2752 mutex_enter(&spa_namespace_lock); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2753 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2754 mutex_exit(&spa_namespace_lock); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2755 } |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2756 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2757 /* |
4451 | 2758 * See if any devices need to be marked REMOVED. |
1544 | 2759 */ |
4451 | 2760 if (tasks & SPA_ASYNC_REMOVE) { |
2761 txg = spa_vdev_enter(spa); | |
2762 spa_async_remove(spa, spa->spa_root_vdev); | |
2763 (void) spa_vdev_exit(spa, NULL, txg, 0); | |
2764 } | |
1544 | 2765 |
2766 /* | |
2767 * If any devices are done replacing, detach them. | |
2768 */ | |
4451 | 2769 if (tasks & SPA_ASYNC_RESILVER_DONE) |
2770 spa_vdev_resilver_done(spa); | |
789 | 2771 |
1544 | 2772 /* |
4451 | 2773 * Kick off a scrub. When starting a RESILVER scrub (or an EVERYTHING |
2774 * scrub which can become a resilver), we need to hold | |
2775 * spa_namespace_lock() because the sysevent we post via | |
2776 * spa_event_notify() needs to get the name of the pool. | |
1544 | 2777 */ |
4451 | 2778 if (tasks & SPA_ASYNC_SCRUB) { |
2779 mutex_enter(&spa_namespace_lock); | |
1544 | 2780 VERIFY(spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_TRUE) == 0); |
4451 | 2781 mutex_exit(&spa_namespace_lock); |
2782 } | |
1544 | 2783 |
2784 /* | |
2785 * Kick off a resilver. | |
2786 */ | |
4451 | 2787 if (tasks & SPA_ASYNC_RESILVER) { |
2788 mutex_enter(&spa_namespace_lock); | |
1544 | 2789 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); |
4451 | 2790 mutex_exit(&spa_namespace_lock); |
2791 } | |
1544 | 2792 |
2793 /* | |
2794 * Let the world know that we're done. | |
2795 */ | |
2796 mutex_enter(&spa->spa_async_lock); | |
2797 spa->spa_async_thread = NULL; | |
2798 cv_broadcast(&spa->spa_async_cv); | |
2799 mutex_exit(&spa->spa_async_lock); | |
2800 thread_exit(); | |
2801 } | |
2802 | |
2803 void | |
2804 spa_async_suspend(spa_t *spa) | |
2805 { | |
2806 mutex_enter(&spa->spa_async_lock); | |
2807 spa->spa_async_suspended++; | |
2808 while (spa->spa_async_thread != NULL) | |
2809 cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); | |
2810 mutex_exit(&spa->spa_async_lock); | |
2811 } | |
2812 | |
2813 void | |
2814 spa_async_resume(spa_t *spa) | |
2815 { | |
2816 mutex_enter(&spa->spa_async_lock); | |
2817 ASSERT(spa->spa_async_suspended != 0); | |
2818 spa->spa_async_suspended--; | |
2819 mutex_exit(&spa->spa_async_lock); | |
2820 } | |
2821 | |
2822 static void | |
2823 spa_async_dispatch(spa_t *spa) | |
2824 { | |
2825 mutex_enter(&spa->spa_async_lock); | |
2826 if (spa->spa_async_tasks && !spa->spa_async_suspended && | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2827 spa->spa_async_thread == NULL && |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2828 rootdir != NULL && !vn_is_readonly(rootdir)) |
1544 | 2829 spa->spa_async_thread = thread_create(NULL, 0, |
2830 spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); | |
2831 mutex_exit(&spa->spa_async_lock); | |
2832 } | |
2833 | |
2834 void | |
2835 spa_async_request(spa_t *spa, int task) | |
2836 { | |
2837 mutex_enter(&spa->spa_async_lock); | |
2838 spa->spa_async_tasks |= task; | |
2839 mutex_exit(&spa->spa_async_lock); | |
789 | 2840 } |
2841 | |
2842 /* | |
2843 * ========================================================================== | |
2844 * SPA syncing routines | |
2845 * ========================================================================== | |
2846 */ | |
2847 | |
2848 static void | |
2849 spa_sync_deferred_frees(spa_t *spa, uint64_t txg) | |
2850 { | |
2851 bplist_t *bpl = &spa->spa_sync_bplist; | |
2852 dmu_tx_t *tx; | |
2853 blkptr_t blk; | |
2854 uint64_t itor = 0; | |
2855 zio_t *zio; | |
2856 int error; | |
2857 uint8_t c = 1; | |
2858 | |
2859 zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CONFIG_HELD); | |
2860 | |
2861 while (bplist_iterate(bpl, &itor, &blk) == 0) | |
2862 zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL)); | |
2863 | |
2864 error = zio_wait(zio); | |
2865 ASSERT3U(error, ==, 0); | |
2866 | |
2867 tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); | |
2868 bplist_vacate(bpl, tx); | |
2869 | |
2870 /* | |
2871 * Pre-dirty the first block so we sync to convergence faster. | |
2872 * (Usually only the first block is needed.) | |
2873 */ | |
2874 dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); | |
2875 dmu_tx_commit(tx); | |
2876 } | |
2877 | |
2878 static void | |
2082 | 2879 spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) |
2880 { | |
2881 char *packed = NULL; | |
2882 size_t nvsize = 0; | |
2883 dmu_buf_t *db; | |
2884 | |
2885 VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); | |
2886 | |
2887 packed = kmem_alloc(nvsize, KM_SLEEP); | |
2888 | |
2889 VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, | |
2890 KM_SLEEP) == 0); | |
2891 | |
2892 dmu_write(spa->spa_meta_objset, obj, 0, nvsize, packed, tx); | |
2893 | |
2894 kmem_free(packed, nvsize); | |
2895 | |
2896 VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); | |
2897 dmu_buf_will_dirty(db, tx); | |
2898 *(uint64_t *)db->db_data = nvsize; | |
2899 dmu_buf_rele(db, FTAG); | |
2900 } | |
2901 | |
2902 static void | |
2903 spa_sync_spares(spa_t *spa, dmu_tx_t *tx) | |
2904 { | |
2905 nvlist_t *nvroot; | |
2906 nvlist_t **spares; | |
2907 int i; | |
2908 | |
2909 if (!spa->spa_sync_spares) | |
2910 return; | |
2911 | |
2912 /* | |
2913 * Update the MOS nvlist describing the list of available spares. | |
2914 * spa_validate_spares() will have already made sure this nvlist is | |
4451 | 2915 * valid and the vdevs are labeled appropriately. |
2082 | 2916 */ |
2917 if (spa->spa_spares_object == 0) { | |
2918 spa->spa_spares_object = dmu_object_alloc(spa->spa_meta_objset, | |
2919 DMU_OT_PACKED_NVLIST, 1 << 14, | |
2920 DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); | |
2921 VERIFY(zap_update(spa->spa_meta_objset, | |
2922 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SPARES, | |
2923 sizeof (uint64_t), 1, &spa->spa_spares_object, tx) == 0); | |
2924 } | |
2925 | |
2926 VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); | |
2927 if (spa->spa_nspares == 0) { | |
2928 VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, | |
2929 NULL, 0) == 0); | |
2930 } else { | |
2931 spares = kmem_alloc(spa->spa_nspares * sizeof (void *), | |
2932 KM_SLEEP); | |
2933 for (i = 0; i < spa->spa_nspares; i++) | |
2934 spares[i] = vdev_config_generate(spa, | |
2935 spa->spa_spares[i], B_FALSE, B_TRUE); | |
2936 VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, | |
2937 spares, spa->spa_nspares) == 0); | |
2938 for (i = 0; i < spa->spa_nspares; i++) | |
2939 nvlist_free(spares[i]); | |
2940 kmem_free(spares, spa->spa_nspares * sizeof (void *)); | |
2941 } | |
2942 | |
2943 spa_sync_nvlist(spa, spa->spa_spares_object, nvroot, tx); | |
2926 | 2944 nvlist_free(nvroot); |
2082 | 2945 |
2946 spa->spa_sync_spares = B_FALSE; | |
2947 } | |
2948 | |
2949 static void | |
789 | 2950 spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) |
2951 { | |
2952 nvlist_t *config; | |
2953 | |
2954 if (list_is_empty(&spa->spa_dirty_list)) | |
2955 return; | |
2956 | |
2957 config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE); | |
2958 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2959 if (spa->spa_config_syncing) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2960 nvlist_free(spa->spa_config_syncing); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
2961 spa->spa_config_syncing = config; |
789 | 2962 |
2082 | 2963 spa_sync_nvlist(spa, spa->spa_config_object, config, tx); |
789 | 2964 } |
2965 | |
3912 | 2966 static void |
4543 | 2967 spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) |
3912 | 2968 { |
2969 spa_t *spa = arg1; | |
2970 nvlist_t *nvp = arg2; | |
2971 nvpair_t *nvpair; | |
2972 objset_t *mos = spa->spa_meta_objset; | |
2973 uint64_t zapobj; | |
4451 | 2974 uint64_t intval; |
3912 | 2975 |
2976 mutex_enter(&spa->spa_props_lock); | |
2977 if (spa->spa_pool_props_object == 0) { | |
2978 zapobj = zap_create(mos, DMU_OT_POOL_PROPS, DMU_OT_NONE, 0, tx); | |
2979 VERIFY(zapobj > 0); | |
2980 | |
2981 spa->spa_pool_props_object = zapobj; | |
2982 | |
2983 VERIFY(zap_update(mos, DMU_POOL_DIRECTORY_OBJECT, | |
2984 DMU_POOL_PROPS, 8, 1, | |
2985 &spa->spa_pool_props_object, tx) == 0); | |
2986 } | |
2987 mutex_exit(&spa->spa_props_lock); | |
2988 | |
2989 nvpair = NULL; | |
2990 while ((nvpair = nvlist_next_nvpair(nvp, nvpair))) { | |
2991 switch (zpool_name_to_prop(nvpair_name(nvpair))) { | |
4543 | 2992 case ZPOOL_PROP_DELEGATION: |
2993 VERIFY(nvlist_lookup_uint64(nvp, | |
2994 nvpair_name(nvpair), &intval) == 0); | |
2995 VERIFY(zap_update(mos, | |
2996 spa->spa_pool_props_object, | |
2997 nvpair_name(nvpair), 8, 1, | |
2998 &intval, tx) == 0); | |
2999 spa->spa_delegation = intval; | |
3000 break; | |
4451 | 3001 case ZPOOL_PROP_BOOTFS: |
3912 | 3002 VERIFY(nvlist_lookup_uint64(nvp, |
3003 nvpair_name(nvpair), &spa->spa_bootfs) == 0); | |
4543 | 3004 intval = spa->spa_bootfs; |
3912 | 3005 VERIFY(zap_update(mos, |
3006 spa->spa_pool_props_object, | |
4451 | 3007 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 8, 1, |
4543 | 3008 &intval, tx) == 0); |
3912 | 3009 break; |
4451 | 3010 |
3011 case ZPOOL_PROP_AUTOREPLACE: | |
3012 VERIFY(nvlist_lookup_uint64(nvp, | |
3013 nvpair_name(nvpair), &intval) == 0); | |
3014 VERIFY(zap_update(mos, | |
3015 spa->spa_pool_props_object, | |
3016 zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 8, 1, | |
3017 &intval, tx) == 0); | |
3018 break; | |
3912 | 3019 } |
4543 | 3020 spa_history_internal_log(LOG_POOL_PROPSET, |
3021 spa, tx, cr, "%s %lld %s", | |
3022 nvpair_name(nvpair), intval, | |
3023 spa->spa_name); | |
3912 | 3024 } |
3025 } | |
3026 | |
789 | 3027 /* |
3028 * Sync the specified transaction group. New blocks may be dirtied as | |
3029 * part of the process, so we iterate until it converges. | |
3030 */ | |
3031 void | |
3032 spa_sync(spa_t *spa, uint64_t txg) | |
3033 { | |
3034 dsl_pool_t *dp = spa->spa_dsl_pool; | |
3035 objset_t *mos = spa->spa_meta_objset; | |
3036 bplist_t *bpl = &spa->spa_sync_bplist; | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3037 vdev_t *rvd = spa->spa_root_vdev; |
789 | 3038 vdev_t *vd; |
3039 dmu_tx_t *tx; | |
3040 int dirty_vdevs; | |
3041 | |
3042 /* | |
3043 * Lock out configuration changes. | |
3044 */ | |
1544 | 3045 spa_config_enter(spa, RW_READER, FTAG); |
789 | 3046 |
3047 spa->spa_syncing_txg = txg; | |
3048 spa->spa_sync_pass = 0; | |
3049 | |
1544 | 3050 VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); |
789 | 3051 |
2082 | 3052 tx = dmu_tx_create_assigned(dp, txg); |
3053 | |
3054 /* | |
4577 | 3055 * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, |
2082 | 3056 * set spa_deflate if we have no raid-z vdevs. |
3057 */ | |
4577 | 3058 if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && |
3059 spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { | |
2082 | 3060 int i; |
3061 | |
3062 for (i = 0; i < rvd->vdev_children; i++) { | |
3063 vd = rvd->vdev_child[i]; | |
3064 if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) | |
3065 break; | |
3066 } | |
3067 if (i == rvd->vdev_children) { | |
3068 spa->spa_deflate = TRUE; | |
3069 VERIFY(0 == zap_add(spa->spa_meta_objset, | |
3070 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, | |
3071 sizeof (uint64_t), 1, &spa->spa_deflate, tx)); | |
3072 } | |
3073 } | |
3074 | |
789 | 3075 /* |
3076 * If anything has changed in this txg, push the deferred frees | |
3077 * from the previous txg. If not, leave them alone so that we | |
3078 * don't generate work on an otherwise idle system. | |
3079 */ | |
3080 if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || | |
2329
e640bebc73b3
6446569 deferred list is hooked on flintstone vitamins
ek110237
parents:
2199
diff
changeset
|
3081 !txg_list_empty(&dp->dp_dirty_dirs, txg) || |
e640bebc73b3
6446569 deferred list is hooked on flintstone vitamins
ek110237
parents:
2199
diff
changeset
|
3082 !txg_list_empty(&dp->dp_sync_tasks, txg)) |
789 | 3083 spa_sync_deferred_frees(spa, txg); |
3084 | |
3085 /* | |
3086 * Iterate to convergence. | |
3087 */ | |
3088 do { | |
3089 spa->spa_sync_pass++; | |
3090 | |
3091 spa_sync_config_object(spa, tx); | |
2082 | 3092 spa_sync_spares(spa, tx); |
1544 | 3093 spa_errlog_sync(spa, txg); |
789 | 3094 dsl_pool_sync(dp, txg); |
3095 | |
3096 dirty_vdevs = 0; | |
3097 while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { | |
3098 vdev_sync(vd, txg); | |
3099 dirty_vdevs++; | |
3100 } | |
3101 | |
3102 bplist_sync(bpl, tx); | |
3103 } while (dirty_vdevs); | |
3104 | |
3105 bplist_close(bpl); | |
3106 | |
3107 dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); | |
3108 | |
3109 /* | |
3110 * Rewrite the vdev configuration (which includes the uberblock) | |
3111 * to commit the transaction group. | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3112 * |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3113 * If there are any dirty vdevs, sync the uberblock to all vdevs. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3114 * Otherwise, pick a random top-level vdev that's known to be |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3115 * visible in the config cache (see spa_vdev_add() for details). |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3116 * If the write fails, try the next vdev until we're tried them all. |
789 | 3117 */ |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3118 if (!list_is_empty(&spa->spa_dirty_list)) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3119 VERIFY(vdev_config_sync(rvd, txg) == 0); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3120 } else { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3121 int children = rvd->vdev_children; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3122 int c0 = spa_get_random(children); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3123 int c; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3124 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3125 for (c = 0; c < children; c++) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3126 vd = rvd->vdev_child[(c0 + c) % children]; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3127 if (vd->vdev_ms_array == 0) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3128 continue; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3129 if (vdev_config_sync(vd, txg) == 0) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3130 break; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3131 } |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3132 if (c == children) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3133 VERIFY(vdev_config_sync(rvd, txg) == 0); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3134 } |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3135 |
2082 | 3136 dmu_tx_commit(tx); |
3137 | |
1635
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3138 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3139 * Clear the dirty config list. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3140 */ |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3141 while ((vd = list_head(&spa->spa_dirty_list)) != NULL) |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3142 vdev_config_clean(vd); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3143 |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3144 /* |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3145 * Now that the new config has synced transactionally, |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3146 * let it become visible to the config cache. |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3147 */ |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3148 if (spa->spa_config_syncing != NULL) { |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3149 spa_config_set(spa, spa->spa_config_syncing); |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3150 spa->spa_config_txg = txg; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3151 spa->spa_config_syncing = NULL; |
0ab1193d47cb
6398664 zpool detach: missing argument to error message causes core dump
bonwick
parents:
1601
diff
changeset
|
3152 } |
789 | 3153 |
3154 /* | |
3155 * Make a stable copy of the fully synced uberblock. | |
3156 * We use this as the root for pool traversals. | |
3157 */ | |
3158 spa->spa_traverse_wanted = 1; /* tells traverse_more() to stop */ | |
3159 | |
3160 spa_scrub_suspend(spa); /* stop scrubbing and finish I/Os */ | |
3161 | |
3162 rw_enter(&spa->spa_traverse_lock, RW_WRITER); | |
3163 spa->spa_traverse_wanted = 0; | |
3164 spa->spa_ubsync = spa->spa_uberblock; | |
3165 rw_exit(&spa->spa_traverse_lock); | |
3166 | |
3167 spa_scrub_resume(spa); /* resume scrub with new ubsync */ | |
3168 | |
3169 /* | |
3170 * Clean up the ZIL records for the synced txg. | |
3171 */ | |
3172 dsl_pool_zil_clean(dp); | |
3173 | |
3174 /* | |
3175 * Update usable space statistics. | |
3176 */ | |
3177 while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) | |
3178 vdev_sync_done(vd, txg); | |
3179 | |
3180 /* | |
3181 * It had better be the case that we didn't dirty anything | |
2082 | 3182 * since vdev_config_sync(). |
789 | 3183 */ |
3184 ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); | |
3185 ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); | |
3186 ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); | |
3187 ASSERT(bpl->bpl_queue == NULL); | |
3188 | |
1544 | 3189 spa_config_exit(spa, FTAG); |
3190 | |
3191 /* | |
3192 * If any async tasks have been requested, kick them off. | |
3193 */ | |
3194 spa_async_dispatch(spa); | |
789 | 3195 } |
3196 | |
3197 /* | |
3198 * Sync all pools. We don't want to hold the namespace lock across these | |
3199 * operations, so we take a reference on the spa_t and drop the lock during the | |
3200 * sync. | |
3201 */ | |
3202 void | |
3203 spa_sync_allpools(void) | |
3204 { | |
3205 spa_t *spa = NULL; | |
3206 mutex_enter(&spa_namespace_lock); | |
3207 while ((spa = spa_next(spa)) != NULL) { | |
3208 if (spa_state(spa) != POOL_STATE_ACTIVE) | |
3209 continue; | |
3210 spa_open_ref(spa, FTAG); | |
3211 mutex_exit(&spa_namespace_lock); | |
3212 txg_wait_synced(spa_get_dsl(spa), 0); | |
3213 mutex_enter(&spa_namespace_lock); | |
3214 spa_close(spa, FTAG); | |
3215 } | |
3216 mutex_exit(&spa_namespace_lock); | |
3217 } | |
3218 | |
3219 /* | |
3220 * ========================================================================== | |
3221 * Miscellaneous routines | |
3222 * ========================================================================== | |
3223 */ | |
3224 | |
3225 /* | |
3226 * Remove all pools in the system. | |
3227 */ | |
3228 void | |
3229 spa_evict_all(void) | |
3230 { | |
3231 spa_t *spa; | |
3232 | |
3233 /* | |
3234 * Remove all cached state. All pools should be closed now, | |
3235 * so every spa in the AVL tree should be unreferenced. | |
3236 */ | |
3237 mutex_enter(&spa_namespace_lock); | |
3238 while ((spa = spa_next(NULL)) != NULL) { | |
3239 /* | |
1544 | 3240 * Stop async tasks. The async thread may need to detach |
3241 * a device that's been replaced, which requires grabbing | |
3242 * spa_namespace_lock, so we must drop it here. | |
789 | 3243 */ |
3244 spa_open_ref(spa, FTAG); | |
3245 mutex_exit(&spa_namespace_lock); | |
1544 | 3246 spa_async_suspend(spa); |
789 | 3247 VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0); |
3248 mutex_enter(&spa_namespace_lock); | |
3249 spa_close(spa, FTAG); | |
3250 | |
3251 if (spa->spa_state != POOL_STATE_UNINITIALIZED) { | |
3252 spa_unload(spa); | |
3253 spa_deactivate(spa); | |
3254 } | |
3255 spa_remove(spa); | |
3256 } | |
3257 mutex_exit(&spa_namespace_lock); | |
3258 } | |
1544 | 3259 |
3260 vdev_t * | |
3261 spa_lookup_by_guid(spa_t *spa, uint64_t guid) | |
3262 { | |
3263 return (vdev_lookup_by_guid(spa->spa_root_vdev, guid)); | |
3264 } | |
1760 | 3265 |
3266 void | |
3267 spa_upgrade(spa_t *spa) | |
3268 { | |
3269 spa_config_enter(spa, RW_WRITER, FTAG); | |
3270 | |
3271 /* | |
3272 * This should only be called for a non-faulted pool, and since a | |
3273 * future version would result in an unopenable pool, this shouldn't be | |
3274 * possible. | |
3275 */ | |
4577 | 3276 ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); |
3277 | |
3278 spa->spa_uberblock.ub_version = SPA_VERSION; | |
1760 | 3279 vdev_config_dirty(spa->spa_root_vdev); |
3280 | |
3281 spa_config_exit(spa, FTAG); | |
2082 | 3282 |
3283 txg_wait_synced(spa_get_dsl(spa), 0); | |
1760 | 3284 } |
2082 | 3285 |
3286 boolean_t | |
3287 spa_has_spare(spa_t *spa, uint64_t guid) | |
3288 { | |
3289 int i; | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3290 uint64_t spareguid; |
2082 | 3291 |
3292 for (i = 0; i < spa->spa_nspares; i++) | |
3293 if (spa->spa_spares[i]->vdev_guid == guid) | |
3294 return (B_TRUE); | |
3295 | |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3296 for (i = 0; i < spa->spa_pending_nspares; i++) { |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3297 if (nvlist_lookup_uint64(spa->spa_pending_spares[i], |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3298 ZPOOL_CONFIG_GUID, &spareguid) == 0 && |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3299 spareguid == guid) |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3300 return (B_TRUE); |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3301 } |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3290
diff
changeset
|
3302 |
2082 | 3303 return (B_FALSE); |
3304 } | |
3912 | 3305 |
3306 int | |
3307 spa_set_props(spa_t *spa, nvlist_t *nvp) | |
3308 { | |
3309 return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, | |
3310 spa, nvp, 3)); | |
3311 } | |
3312 | |
3313 int | |
3314 spa_get_props(spa_t *spa, nvlist_t **nvp) | |
3315 { | |
3316 zap_cursor_t zc; | |
3317 zap_attribute_t za; | |
3318 objset_t *mos = spa->spa_meta_objset; | |
3319 zfs_source_t src; | |
4451 | 3320 zpool_prop_t prop; |
3912 | 3321 nvlist_t *propval; |
3322 uint64_t value; | |
3323 int err; | |
3324 | |
3325 VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); | |
3326 | |
3327 mutex_enter(&spa->spa_props_lock); | |
3328 /* If no props object, then just return empty nvlist */ | |
3329 if (spa->spa_pool_props_object == 0) { | |
3330 mutex_exit(&spa->spa_props_lock); | |
3331 return (0); | |
3332 } | |
3333 | |
3334 for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); | |
3335 (err = zap_cursor_retrieve(&zc, &za)) == 0; | |
3336 zap_cursor_advance(&zc)) { | |
3337 | |
3338 if ((prop = zpool_name_to_prop(za.za_name)) == ZFS_PROP_INVAL) | |
3339 continue; | |
3340 | |
3341 VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); | |
3342 switch (za.za_integer_length) { | |
3343 case 8: | |
4451 | 3344 if (zpool_prop_default_numeric(prop) == |
3912 | 3345 za.za_first_integer) |
3346 src = ZFS_SRC_DEFAULT; | |
3347 else | |
3348 src = ZFS_SRC_LOCAL; | |
3349 value = za.za_first_integer; | |
3350 | |
4451 | 3351 if (prop == ZPOOL_PROP_BOOTFS) { |
3912 | 3352 dsl_pool_t *dp; |
3353 dsl_dataset_t *ds = NULL; | |
3354 char strval[MAXPATHLEN]; | |
3355 | |
3356 dp = spa_get_dsl(spa); | |
3357 rw_enter(&dp->dp_config_rwlock, RW_READER); | |
3358 if ((err = dsl_dataset_open_obj(dp, | |
3359 za.za_first_integer, NULL, DS_MODE_NONE, | |
3360 FTAG, &ds)) != 0) { | |
3361 rw_exit(&dp->dp_config_rwlock); | |
3362 break; | |
3363 } | |
3364 dsl_dataset_name(ds, strval); | |
3365 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
3366 rw_exit(&dp->dp_config_rwlock); | |
3367 | |
3368 VERIFY(nvlist_add_uint64(propval, | |
3369 ZFS_PROP_SOURCE, src) == 0); | |
3370 VERIFY(nvlist_add_string(propval, | |
3371 ZFS_PROP_VALUE, strval) == 0); | |
3372 } else { | |
3373 VERIFY(nvlist_add_uint64(propval, | |
3374 ZFS_PROP_SOURCE, src) == 0); | |
3375 VERIFY(nvlist_add_uint64(propval, | |
3376 ZFS_PROP_VALUE, value) == 0); | |
3377 } | |
3378 VERIFY(nvlist_add_nvlist(*nvp, za.za_name, | |
3379 propval) == 0); | |
3380 break; | |
3381 } | |
3382 nvlist_free(propval); | |
3383 } | |
3384 zap_cursor_fini(&zc); | |
3385 mutex_exit(&spa->spa_props_lock); | |
3386 if (err && err != ENOENT) { | |
3387 nvlist_free(*nvp); | |
3388 return (err); | |
3389 } | |
3390 | |
3391 return (0); | |
3392 } | |
3393 | |
3394 /* | |
3395 * If the bootfs property value is dsobj, clear it. | |
3396 */ | |
3397 void | |
3398 spa_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) | |
3399 { | |
3400 if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { | |
3401 VERIFY(zap_remove(spa->spa_meta_objset, | |
3402 spa->spa_pool_props_object, | |
4451 | 3403 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); |
3912 | 3404 spa->spa_bootfs = 0; |
3405 } | |
3406 } | |
4451 | 3407 |
3408 /* | |
3409 * Post a sysevent corresponding to the given event. The 'name' must be one of | |
3410 * the event definitions in sys/sysevent/eventdefs.h. The payload will be | |
3411 * filled in from the spa and (optionally) the vdev. This doesn't do anything | |
3412 * in the userland libzpool, as we don't want consumers to misinterpret ztest | |
3413 * or zdb as real changes. | |
3414 */ | |
3415 void | |
3416 spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) | |
3417 { | |
3418 #ifdef _KERNEL | |
3419 sysevent_t *ev; | |
3420 sysevent_attr_list_t *attr = NULL; | |
3421 sysevent_value_t value; | |
3422 sysevent_id_t eid; | |
3423 | |
3424 ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", | |
3425 SE_SLEEP); | |
3426 | |
3427 value.value_type = SE_DATA_TYPE_STRING; | |
3428 value.value.sv_string = spa_name(spa); | |
3429 if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) | |
3430 goto done; | |
3431 | |
3432 value.value_type = SE_DATA_TYPE_UINT64; | |
3433 value.value.sv_uint64 = spa_guid(spa); | |
3434 if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) | |
3435 goto done; | |
3436 | |
3437 if (vd) { | |
3438 value.value_type = SE_DATA_TYPE_UINT64; | |
3439 value.value.sv_uint64 = vd->vdev_guid; | |
3440 if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, | |
3441 SE_SLEEP) != 0) | |
3442 goto done; | |
3443 | |
3444 if (vd->vdev_path) { | |
3445 value.value_type = SE_DATA_TYPE_STRING; | |
3446 value.value.sv_string = vd->vdev_path; | |
3447 if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, | |
3448 &value, SE_SLEEP) != 0) | |
3449 goto done; | |
3450 } | |
3451 } | |
3452 | |
3453 (void) log_sysevent(ev, SE_SLEEP, &eid); | |
3454 | |
3455 done: | |
3456 if (attr) | |
3457 sysevent_free_attr(attr); | |
3458 sysevent_free(ev); | |
3459 #endif | |
3460 } |