Mercurial > illumos > illumos-gate
annotate usr/src/uts/common/fs/zfs/dsl_dataset.c @ 1758:d0750a16db04
6397267 assertion failed: (link->list_next == 0) == (link->list_prev == 0)
6402388 unclean reboot during 'zfs restore' results in maintenance mode
author | ahrens |
---|---|
date | Thu, 06 Apr 2006 18:51:58 -0700 |
parents | 1efa8b3d1296 |
children | 76b439ec3ac1 |
rev | line source |
---|---|
789 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
1544 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
789 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
21 /* | |
1544 | 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 23 * Use is subject to license terms. |
24 */ | |
25 | |
26 #pragma ident "%Z%%M% %I% %E% SMI" | |
27 | |
28 #include <sys/dmu_objset.h> | |
29 #include <sys/dsl_dataset.h> | |
30 #include <sys/dsl_dir.h> | |
31 #include <sys/dmu_traverse.h> | |
32 #include <sys/dmu_tx.h> | |
33 #include <sys/arc.h> | |
34 #include <sys/zio.h> | |
35 #include <sys/zap.h> | |
36 #include <sys/unique.h> | |
37 #include <sys/zfs_context.h> | |
38 | |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
39 static int dsl_dataset_destroy_begin_sync(dsl_dir_t *dd, |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
40 void *arg, dmu_tx_t *tx); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
41 |
789 | 42 #define DOS_REF_MAX (1ULL << 62) |
43 | |
44 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE | |
45 | |
46 #define BP_GET_UCSIZE(bp) \ | |
47 ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \ | |
48 BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)); | |
49 | |
50 /* | |
51 * We use weighted reference counts to express the various forms of exclusion | |
52 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open | |
53 * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. | |
54 * This makes the exclusion logic simple: the total refcnt for all opens cannot | |
55 * exceed DOS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their | |
56 * weight (DOS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume | |
57 * just over half of the refcnt space, so there can't be more than one, but it | |
58 * can peacefully coexist with any number of STANDARD opens. | |
59 */ | |
60 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { | |
61 0, /* DOS_MODE_NONE - invalid */ | |
62 1, /* DOS_MODE_STANDARD - unlimited number */ | |
63 (DOS_REF_MAX >> 1) + 1, /* DOS_MODE_PRIMARY - only one of these */ | |
64 DOS_REF_MAX /* DOS_MODE_EXCLUSIVE - no other opens */ | |
65 }; | |
66 | |
67 | |
68 void | |
69 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
70 { | |
71 int used = BP_GET_ASIZE(bp); | |
72 int compressed = BP_GET_PSIZE(bp); | |
73 int uncompressed = BP_GET_UCSIZE(bp); | |
74 | |
75 dprintf_bp(bp, "born, ds=%p\n", ds); | |
76 | |
77 ASSERT(dmu_tx_is_syncing(tx)); | |
78 /* It could have been compressed away to nothing */ | |
79 if (BP_IS_HOLE(bp)) | |
80 return; | |
81 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); | |
82 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); | |
83 if (ds == NULL) { | |
84 /* | |
85 * Account for the meta-objset space in its placeholder | |
86 * dsl_dir. | |
87 */ | |
88 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ | |
89 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, | |
90 used, compressed, uncompressed, tx); | |
91 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); | |
92 return; | |
93 } | |
94 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
95 mutex_enter(&ds->ds_lock); | |
96 ds->ds_phys->ds_used_bytes += used; | |
97 ds->ds_phys->ds_compressed_bytes += compressed; | |
98 ds->ds_phys->ds_uncompressed_bytes += uncompressed; | |
99 ds->ds_phys->ds_unique_bytes += used; | |
100 mutex_exit(&ds->ds_lock); | |
101 dsl_dir_diduse_space(ds->ds_dir, | |
102 used, compressed, uncompressed, tx); | |
103 } | |
104 | |
105 void | |
106 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
107 { | |
108 int used = BP_GET_ASIZE(bp); | |
109 int compressed = BP_GET_PSIZE(bp); | |
110 int uncompressed = BP_GET_UCSIZE(bp); | |
111 | |
112 ASSERT(dmu_tx_is_syncing(tx)); | |
113 if (BP_IS_HOLE(bp)) | |
114 return; | |
115 | |
116 ASSERT(used > 0); | |
117 if (ds == NULL) { | |
118 /* | |
119 * Account for the meta-objset space in its placeholder | |
120 * dataset. | |
121 */ | |
122 /* XXX this can fail, what do we do when it does? */ | |
123 (void) arc_free(NULL, tx->tx_pool->dp_spa, | |
124 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); | |
125 bzero(bp, sizeof (blkptr_t)); | |
126 | |
127 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, | |
128 -used, -compressed, -uncompressed, tx); | |
129 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); | |
130 return; | |
131 } | |
132 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); | |
133 | |
134 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
135 | |
136 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { | |
137 dprintf_bp(bp, "freeing: %s", ""); | |
138 /* XXX check return code? */ | |
139 (void) arc_free(NULL, tx->tx_pool->dp_spa, | |
140 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); | |
141 | |
142 mutex_enter(&ds->ds_lock); | |
143 /* XXX unique_bytes is not accurate for head datasets */ | |
144 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ | |
145 ds->ds_phys->ds_unique_bytes -= used; | |
146 mutex_exit(&ds->ds_lock); | |
147 dsl_dir_diduse_space(ds->ds_dir, | |
148 -used, -compressed, -uncompressed, tx); | |
149 } else { | |
150 dprintf_bp(bp, "putting on dead list: %s", ""); | |
1544 | 151 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); |
789 | 152 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ |
153 if (ds->ds_phys->ds_prev_snap_obj != 0) { | |
154 ASSERT3U(ds->ds_prev->ds_object, ==, | |
155 ds->ds_phys->ds_prev_snap_obj); | |
156 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); | |
157 if (ds->ds_prev->ds_phys->ds_next_snap_obj == | |
158 ds->ds_object && | |
159 bp->blk_birth > | |
160 ds->ds_prev->ds_phys->ds_prev_snap_txg) { | |
161 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); | |
162 mutex_enter(&ds->ds_prev->ds_lock); | |
163 ds->ds_prev->ds_phys->ds_unique_bytes += | |
164 used; | |
165 mutex_exit(&ds->ds_prev->ds_lock); | |
166 } | |
167 } | |
168 } | |
169 bzero(bp, sizeof (blkptr_t)); | |
170 mutex_enter(&ds->ds_lock); | |
171 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); | |
172 ds->ds_phys->ds_used_bytes -= used; | |
173 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); | |
174 ds->ds_phys->ds_compressed_bytes -= compressed; | |
175 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); | |
176 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; | |
177 mutex_exit(&ds->ds_lock); | |
178 } | |
179 | |
1544 | 180 uint64_t |
181 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) | |
789 | 182 { |
1544 | 183 uint64_t txg; |
789 | 184 dsl_dir_t *dd; |
1544 | 185 |
789 | 186 if (ds == NULL) |
1544 | 187 return (0); |
789 | 188 /* |
189 * The snapshot creation could fail, but that would cause an | |
190 * incorrect FALSE return, which would only result in an | |
191 * overestimation of the amount of space that an operation would | |
192 * consume, which is OK. | |
193 * | |
194 * There's also a small window where we could miss a pending | |
195 * snapshot, because we could set the sync task in the quiescing | |
196 * phase. So this should only be used as a guess. | |
197 */ | |
198 dd = ds->ds_dir; | |
199 mutex_enter(&dd->dd_lock); | |
1544 | 200 if (dd->dd_sync_func == dsl_dataset_snapshot_sync) |
201 txg = dd->dd_sync_txg; | |
789 | 202 else |
1544 | 203 txg = ds->ds_phys->ds_prev_snap_txg; |
789 | 204 mutex_exit(&dd->dd_lock); |
1544 | 205 |
206 return (txg); | |
207 } | |
208 | |
209 int | |
210 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) | |
211 { | |
212 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); | |
789 | 213 } |
214 | |
215 /* ARGSUSED */ | |
216 static void | |
217 dsl_dataset_evict(dmu_buf_t *db, void *dsv) | |
218 { | |
219 dsl_dataset_t *ds = dsv; | |
220 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
221 | |
222 /* open_refcount == DOS_REF_MAX when deleting */ | |
223 ASSERT(ds->ds_open_refcount == 0 || | |
224 ds->ds_open_refcount == DOS_REF_MAX); | |
225 | |
226 dprintf_ds(ds, "evicting %s\n", ""); | |
227 | |
228 unique_remove(ds->ds_phys->ds_fsid_guid); | |
229 | |
230 if (ds->ds_user_ptr != NULL) | |
231 ds->ds_user_evict_func(ds, ds->ds_user_ptr); | |
232 | |
233 if (ds->ds_prev) { | |
234 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); | |
235 ds->ds_prev = NULL; | |
236 } | |
237 | |
238 bplist_close(&ds->ds_deadlist); | |
239 dsl_dir_close(ds->ds_dir, ds); | |
240 | |
241 if (list_link_active(&ds->ds_synced_link)) | |
242 list_remove(&dp->dp_synced_objsets, ds); | |
243 | |
244 kmem_free(ds, sizeof (dsl_dataset_t)); | |
245 } | |
246 | |
1544 | 247 static int |
789 | 248 dsl_dataset_get_snapname(dsl_dataset_t *ds) |
249 { | |
250 dsl_dataset_phys_t *headphys; | |
251 int err; | |
252 dmu_buf_t *headdbuf; | |
253 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
254 objset_t *mos = dp->dp_meta_objset; | |
255 | |
256 if (ds->ds_snapname[0]) | |
1544 | 257 return (0); |
789 | 258 if (ds->ds_phys->ds_next_snap_obj == 0) |
1544 | 259 return (0); |
789 | 260 |
1544 | 261 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, |
262 FTAG, &headdbuf); | |
263 if (err) | |
264 return (err); | |
789 | 265 headphys = headdbuf->db_data; |
266 err = zap_value_search(dp->dp_meta_objset, | |
267 headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname); | |
1544 | 268 dmu_buf_rele(headdbuf, FTAG); |
269 return (err); | |
789 | 270 } |
271 | |
1544 | 272 int |
789 | 273 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, |
1544 | 274 int mode, void *tag, dsl_dataset_t **dsp) |
789 | 275 { |
276 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; | |
277 objset_t *mos = dp->dp_meta_objset; | |
278 dmu_buf_t *dbuf; | |
279 dsl_dataset_t *ds; | |
1544 | 280 int err; |
789 | 281 |
282 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || | |
283 dsl_pool_sync_context(dp)); | |
284 | |
1544 | 285 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); |
286 if (err) | |
287 return (err); | |
789 | 288 ds = dmu_buf_get_user(dbuf); |
289 if (ds == NULL) { | |
290 dsl_dataset_t *winner; | |
291 | |
292 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); | |
293 ds->ds_dbuf = dbuf; | |
294 ds->ds_object = dsobj; | |
295 ds->ds_phys = dbuf->db_data; | |
296 | |
1544 | 297 err = bplist_open(&ds->ds_deadlist, |
789 | 298 mos, ds->ds_phys->ds_deadlist_obj); |
1544 | 299 if (err == 0) { |
300 err = dsl_dir_open_obj(dp, | |
301 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); | |
302 } | |
303 if (err) { | |
304 /* | |
305 * we don't really need to close the blist if we | |
306 * just opened it. | |
307 */ | |
308 kmem_free(ds, sizeof (dsl_dataset_t)); | |
309 dmu_buf_rele(dbuf, tag); | |
310 return (err); | |
311 } | |
789 | 312 |
313 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { | |
314 ds->ds_snapname[0] = '\0'; | |
315 if (ds->ds_phys->ds_prev_snap_obj) { | |
1544 | 316 err = dsl_dataset_open_obj(dp, |
789 | 317 ds->ds_phys->ds_prev_snap_obj, NULL, |
1544 | 318 DS_MODE_NONE, ds, &ds->ds_prev); |
789 | 319 } |
320 } else { | |
321 if (snapname) { | |
322 #ifdef ZFS_DEBUG | |
323 dsl_dataset_phys_t *headphys; | |
1544 | 324 dmu_buf_t *headdbuf; |
325 err = dmu_bonus_hold(mos, | |
326 ds->ds_dir->dd_phys->dd_head_dataset_obj, | |
327 FTAG, &headdbuf); | |
328 if (err == 0) { | |
329 headphys = headdbuf->db_data; | |
330 uint64_t foundobj; | |
331 err = zap_lookup(dp->dp_meta_objset, | |
332 headphys->ds_snapnames_zapobj, | |
333 snapname, sizeof (foundobj), 1, | |
334 &foundobj); | |
335 ASSERT3U(foundobj, ==, dsobj); | |
336 dmu_buf_rele(headdbuf, FTAG); | |
337 } | |
789 | 338 #endif |
339 (void) strcat(ds->ds_snapname, snapname); | |
340 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { | |
1544 | 341 err = dsl_dataset_get_snapname(ds); |
789 | 342 } |
343 } | |
344 | |
1544 | 345 if (err == 0) { |
346 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, | |
347 dsl_dataset_evict); | |
348 } | |
349 if (err || winner) { | |
789 | 350 bplist_close(&ds->ds_deadlist); |
351 if (ds->ds_prev) { | |
352 dsl_dataset_close(ds->ds_prev, | |
353 DS_MODE_NONE, ds); | |
354 } | |
355 dsl_dir_close(ds->ds_dir, ds); | |
356 kmem_free(ds, sizeof (dsl_dataset_t)); | |
1544 | 357 if (err) { |
358 dmu_buf_rele(dbuf, tag); | |
359 return (err); | |
360 } | |
789 | 361 ds = winner; |
362 } else { | |
363 uint64_t new = | |
364 unique_insert(ds->ds_phys->ds_fsid_guid); | |
365 if (new != ds->ds_phys->ds_fsid_guid) { | |
366 /* XXX it won't necessarily be synced... */ | |
367 ds->ds_phys->ds_fsid_guid = new; | |
368 } | |
369 } | |
370 } | |
371 ASSERT3P(ds->ds_dbuf, ==, dbuf); | |
372 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); | |
373 | |
374 mutex_enter(&ds->ds_lock); | |
375 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && | |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
376 ds->ds_phys->ds_inconsistent && !DS_MODE_IS_INCONSISTENT(mode)) || |
789 | 377 (ds->ds_open_refcount + weight > DOS_REF_MAX)) { |
378 mutex_exit(&ds->ds_lock); | |
379 dsl_dataset_close(ds, DS_MODE_NONE, tag); | |
1544 | 380 return (EBUSY); |
789 | 381 } |
382 ds->ds_open_refcount += weight; | |
383 mutex_exit(&ds->ds_lock); | |
384 | |
1544 | 385 *dsp = ds; |
386 return (0); | |
789 | 387 } |
388 | |
389 int | |
390 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, | |
391 void *tag, dsl_dataset_t **dsp) | |
392 { | |
393 dsl_dir_t *dd; | |
394 dsl_pool_t *dp; | |
395 const char *tail; | |
396 uint64_t obj; | |
397 dsl_dataset_t *ds = NULL; | |
398 int err = 0; | |
399 | |
1544 | 400 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); |
401 if (err) | |
402 return (err); | |
789 | 403 |
404 dp = dd->dd_pool; | |
405 obj = dd->dd_phys->dd_head_dataset_obj; | |
406 rw_enter(&dp->dp_config_rwlock, RW_READER); | |
407 if (obj == 0) { | |
408 /* A dataset with no associated objset */ | |
409 err = ENOENT; | |
410 goto out; | |
411 } | |
412 | |
413 if (tail != NULL) { | |
414 objset_t *mos = dp->dp_meta_objset; | |
415 | |
1544 | 416 err = dsl_dataset_open_obj(dp, obj, NULL, |
417 DS_MODE_NONE, tag, &ds); | |
418 if (err) | |
419 goto out; | |
789 | 420 obj = ds->ds_phys->ds_snapnames_zapobj; |
421 dsl_dataset_close(ds, DS_MODE_NONE, tag); | |
422 ds = NULL; | |
423 | |
424 if (tail[0] != '@') { | |
425 err = ENOENT; | |
426 goto out; | |
427 } | |
428 tail++; | |
429 | |
430 /* Look for a snapshot */ | |
431 if (!DS_MODE_IS_READONLY(mode)) { | |
432 err = EROFS; | |
433 goto out; | |
434 } | |
435 dprintf("looking for snapshot '%s'\n", tail); | |
436 err = zap_lookup(mos, obj, tail, 8, 1, &obj); | |
437 if (err) | |
438 goto out; | |
439 } | |
1544 | 440 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); |
789 | 441 |
442 out: | |
443 rw_exit(&dp->dp_config_rwlock); | |
444 dsl_dir_close(dd, FTAG); | |
445 | |
446 ASSERT3U((err == 0), ==, (ds != NULL)); | |
447 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ | |
448 | |
449 *dsp = ds; | |
450 return (err); | |
451 } | |
452 | |
453 int | |
454 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) | |
455 { | |
456 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); | |
457 } | |
458 | |
459 void | |
460 dsl_dataset_name(dsl_dataset_t *ds, char *name) | |
461 { | |
462 if (ds == NULL) { | |
463 (void) strcpy(name, "mos"); | |
464 } else { | |
465 dsl_dir_name(ds->ds_dir, name); | |
1544 | 466 VERIFY(0 == dsl_dataset_get_snapname(ds)); |
789 | 467 if (ds->ds_snapname[0]) { |
468 (void) strcat(name, "@"); | |
469 if (!MUTEX_HELD(&ds->ds_lock)) { | |
470 /* | |
471 * We use a "recursive" mutex so that we | |
472 * can call dprintf_ds() with ds_lock held. | |
473 */ | |
474 mutex_enter(&ds->ds_lock); | |
475 (void) strcat(name, ds->ds_snapname); | |
476 mutex_exit(&ds->ds_lock); | |
477 } else { | |
478 (void) strcat(name, ds->ds_snapname); | |
479 } | |
480 } | |
481 } | |
482 } | |
483 | |
484 void | |
485 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) | |
486 { | |
487 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; | |
488 mutex_enter(&ds->ds_lock); | |
489 ASSERT3U(ds->ds_open_refcount, >=, weight); | |
490 ds->ds_open_refcount -= weight; | |
491 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", | |
492 mode, ds->ds_open_refcount); | |
493 mutex_exit(&ds->ds_lock); | |
494 | |
1544 | 495 dmu_buf_rele(ds->ds_dbuf, tag); |
789 | 496 } |
497 | |
498 void | |
499 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) | |
500 { | |
501 objset_t *mos = dp->dp_meta_objset; | |
502 dmu_buf_t *dbuf; | |
503 dsl_dataset_phys_t *dsphys; | |
504 dsl_dataset_t *ds; | |
505 uint64_t dsobj; | |
506 dsl_dir_t *dd; | |
507 | |
508 dsl_dir_create_root(mos, ddobjp, tx); | |
1544 | 509 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); |
789 | 510 |
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
511 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
512 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); |
1544 | 513 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); |
789 | 514 dmu_buf_will_dirty(dbuf, tx); |
515 dsphys = dbuf->db_data; | |
516 dsphys->ds_dir_obj = dd->dd_object; | |
517 dsphys->ds_fsid_guid = unique_create(); | |
1544 | 518 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ |
789 | 519 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, |
520 sizeof (dsphys->ds_guid)); | |
521 dsphys->ds_snapnames_zapobj = | |
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
522 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); |
789 | 523 dsphys->ds_creation_time = gethrestime_sec(); |
524 dsphys->ds_creation_txg = tx->tx_txg; | |
525 dsphys->ds_deadlist_obj = | |
526 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1544 | 527 dmu_buf_rele(dbuf, FTAG); |
789 | 528 |
529 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
530 dd->dd_phys->dd_head_dataset_obj = dsobj; | |
531 dsl_dir_close(dd, FTAG); | |
532 | |
1544 | 533 VERIFY(0 == |
534 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); | |
789 | 535 (void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx); |
536 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
537 } | |
538 | |
539 int | |
540 dsl_dataset_create_sync(dsl_dir_t *pds, const char *fullname, | |
541 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) | |
542 { | |
543 int err; | |
544 dsl_pool_t *dp = pds->dd_pool; | |
545 dmu_buf_t *dbuf; | |
546 dsl_dataset_phys_t *dsphys; | |
547 uint64_t dsobj; | |
548 objset_t *mos = dp->dp_meta_objset; | |
549 dsl_dir_t *dd; | |
550 | |
551 if (clone_parent != NULL) { | |
552 /* | |
553 * You can't clone across pools. | |
554 */ | |
555 if (clone_parent->ds_dir->dd_pool != dp) | |
556 return (EXDEV); | |
557 | |
558 /* | |
559 * You can only clone snapshots, not the head datasets. | |
560 */ | |
561 if (clone_parent->ds_phys->ds_num_children == 0) | |
562 return (EINVAL); | |
563 } | |
564 | |
565 ASSERT(lastname[0] != '@'); | |
566 ASSERT(dmu_tx_is_syncing(tx)); | |
567 | |
568 err = dsl_dir_create_sync(pds, lastname, tx); | |
569 if (err) | |
570 return (err); | |
1544 | 571 VERIFY(0 == dsl_dir_open_spa(dp->dp_spa, fullname, FTAG, &dd, NULL)); |
789 | 572 |
573 /* This is the point of no (unsuccessful) return */ | |
574 | |
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
575 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
576 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); |
1544 | 577 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); |
789 | 578 dmu_buf_will_dirty(dbuf, tx); |
579 dsphys = dbuf->db_data; | |
580 dsphys->ds_dir_obj = dd->dd_object; | |
581 dsphys->ds_fsid_guid = unique_create(); | |
582 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ | |
583 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, | |
584 sizeof (dsphys->ds_guid)); | |
585 dsphys->ds_snapnames_zapobj = | |
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
586 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); |
789 | 587 dsphys->ds_creation_time = gethrestime_sec(); |
588 dsphys->ds_creation_txg = tx->tx_txg; | |
589 dsphys->ds_deadlist_obj = | |
590 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
591 if (clone_parent) { | |
592 dsphys->ds_prev_snap_obj = clone_parent->ds_object; | |
593 dsphys->ds_prev_snap_txg = | |
594 clone_parent->ds_phys->ds_creation_txg; | |
595 dsphys->ds_used_bytes = | |
596 clone_parent->ds_phys->ds_used_bytes; | |
597 dsphys->ds_compressed_bytes = | |
598 clone_parent->ds_phys->ds_compressed_bytes; | |
599 dsphys->ds_uncompressed_bytes = | |
600 clone_parent->ds_phys->ds_uncompressed_bytes; | |
601 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; | |
602 | |
603 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); | |
604 clone_parent->ds_phys->ds_num_children++; | |
605 | |
606 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
607 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; | |
608 } | |
1544 | 609 dmu_buf_rele(dbuf, FTAG); |
789 | 610 |
611 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
612 dd->dd_phys->dd_head_dataset_obj = dsobj; | |
613 dsl_dir_close(dd, FTAG); | |
614 | |
615 return (0); | |
616 } | |
617 | |
618 int | |
619 dsl_dataset_destroy(const char *name) | |
620 { | |
621 int err; | |
622 dsl_pool_t *dp; | |
623 dsl_dir_t *dd; | |
624 const char *tail; | |
625 | |
1544 | 626 err = dsl_dir_open(name, FTAG, &dd, &tail); |
627 if (err) | |
628 return (err); | |
789 | 629 |
630 dp = dd->dd_pool; | |
631 if (tail != NULL) { | |
632 if (tail[0] != '@') { | |
633 dsl_dir_close(dd, FTAG); | |
634 return (ENOENT); | |
635 } | |
636 tail++; | |
637 /* Just blow away the snapshot */ | |
638 do { | |
639 txg_wait_synced(dp, 0); | |
640 err = dsl_dir_sync_task(dd, | |
641 dsl_dataset_destroy_sync, (void*)tail, 0); | |
642 } while (err == EAGAIN); | |
643 dsl_dir_close(dd, FTAG); | |
644 } else { | |
645 char buf[MAXNAMELEN]; | |
646 char *cp; | |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
647 objset_t *os; |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
648 uint64_t obj; |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
649 dsl_dir_t *pds; |
789 | 650 |
651 if (dd->dd_phys->dd_parent_obj == 0) { | |
652 dsl_dir_close(dd, FTAG); | |
653 return (EINVAL); | |
654 } | |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
655 |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
656 err = dmu_objset_open(name, DMU_OST_ANY, |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
657 DS_MODE_PRIMARY | DS_MODE_INCONSISTENT, &os); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
658 if (err) { |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
659 dsl_dir_close(dd, FTAG); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
660 return (err); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
661 } |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
662 |
789 | 663 /* |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
664 * Check for errors and mark this ds as inconsistent, in |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
665 * case we crash while freeing the objects. |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
666 */ |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
667 err = dsl_dir_sync_task(os->os->os_dsl_dataset->ds_dir, |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
668 dsl_dataset_destroy_begin_sync, os->os->os_dsl_dataset, 0); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
669 if (err) { |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
670 dmu_objset_close(os); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
671 dsl_dir_close(dd, FTAG); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
672 return (err); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
673 } |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
674 |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
675 /* |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
676 * remove the objects in open context, so that we won't |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
677 * have too much to do in syncing context. |
789 | 678 */ |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
679 for (obj = 0; err == 0; |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
680 err = dmu_object_next(os, &obj, FALSE)) { |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
681 dmu_tx_t *tx = dmu_tx_create(os); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
682 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
683 dmu_tx_hold_bonus(tx, obj); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
684 err = dmu_tx_assign(tx, TXG_WAIT); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
685 if (err) { |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
686 /* |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
687 * Perhaps there is not enough disk |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
688 * space. Just deal with it from |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
689 * dsl_dataset_destroy_sync(). |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
690 */ |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
691 dmu_tx_abort(tx); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
692 continue; |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
693 } |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
694 VERIFY(0 == dmu_object_free(os, obj, tx)); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
695 dmu_tx_commit(tx); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
696 } |
1758
d0750a16db04
6397267 assertion failed: (link->list_next == 0) == (link->list_prev == 0)
ahrens
parents:
1731
diff
changeset
|
697 /* Make sure it's not dirty before we finish destroying it. */ |
d0750a16db04
6397267 assertion failed: (link->list_next == 0) == (link->list_prev == 0)
ahrens
parents:
1731
diff
changeset
|
698 txg_wait_synced(dd->dd_pool, 0); |
d0750a16db04
6397267 assertion failed: (link->list_next == 0) == (link->list_prev == 0)
ahrens
parents:
1731
diff
changeset
|
699 |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
700 dmu_objset_close(os); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
701 if (err != ESRCH) { |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
702 dsl_dir_close(dd, FTAG); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
703 return (err); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
704 } |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
705 |
789 | 706 /* |
707 * Blow away the dsl_dir + head dataset. | |
708 * dsl_dir_destroy_sync() will call | |
709 * dsl_dataset_destroy_sync() to destroy the head dataset. | |
710 */ | |
711 rw_enter(&dp->dp_config_rwlock, RW_READER); | |
1544 | 712 err = dsl_dir_open_obj(dd->dd_pool, |
713 dd->dd_phys->dd_parent_obj, NULL, FTAG, &pds); | |
789 | 714 dsl_dir_close(dd, FTAG); |
715 rw_exit(&dp->dp_config_rwlock); | |
1544 | 716 if (err) |
717 return (err); | |
789 | 718 |
719 (void) strcpy(buf, name); | |
720 cp = strrchr(buf, '/') + 1; | |
721 ASSERT(cp[0] != '\0'); | |
722 do { | |
723 txg_wait_synced(dp, 0); | |
724 err = dsl_dir_sync_task(pds, | |
725 dsl_dir_destroy_sync, cp, 0); | |
726 } while (err == EAGAIN); | |
727 dsl_dir_close(pds, FTAG); | |
728 } | |
729 | |
730 return (err); | |
731 } | |
732 | |
733 int | |
734 dsl_dataset_rollback(const char *name) | |
735 { | |
736 int err; | |
737 dsl_dir_t *dd; | |
738 const char *tail; | |
739 | |
1544 | 740 err = dsl_dir_open(name, FTAG, &dd, &tail); |
741 if (err) | |
742 return (err); | |
789 | 743 |
744 if (tail != NULL) { | |
745 dsl_dir_close(dd, FTAG); | |
746 return (EINVAL); | |
747 } | |
748 do { | |
749 txg_wait_synced(dd->dd_pool, 0); | |
750 err = dsl_dir_sync_task(dd, | |
751 dsl_dataset_rollback_sync, NULL, 0); | |
752 } while (err == EAGAIN); | |
753 dsl_dir_close(dd, FTAG); | |
754 | |
755 return (err); | |
756 } | |
757 | |
758 void * | |
759 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, | |
760 void *p, dsl_dataset_evict_func_t func) | |
761 { | |
762 void *old; | |
763 | |
764 mutex_enter(&ds->ds_lock); | |
765 old = ds->ds_user_ptr; | |
766 if (old == NULL) { | |
767 ds->ds_user_ptr = p; | |
768 ds->ds_user_evict_func = func; | |
769 } | |
770 mutex_exit(&ds->ds_lock); | |
771 return (old); | |
772 } | |
773 | |
774 void * | |
775 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) | |
776 { | |
777 return (ds->ds_user_ptr); | |
778 } | |
779 | |
780 | |
781 void | |
782 dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp) | |
783 { | |
784 *bp = ds->ds_phys->ds_bp; | |
785 } | |
786 | |
787 void | |
788 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
789 { | |
790 ASSERT(dmu_tx_is_syncing(tx)); | |
791 /* If it's the meta-objset, set dp_meta_rootbp */ | |
792 if (ds == NULL) { | |
793 tx->tx_pool->dp_meta_rootbp = *bp; | |
794 } else { | |
795 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
796 ds->ds_phys->ds_bp = *bp; | |
797 } | |
798 } | |
799 | |
800 spa_t * | |
801 dsl_dataset_get_spa(dsl_dataset_t *ds) | |
802 { | |
803 return (ds->ds_dir->dd_pool->dp_spa); | |
804 } | |
805 | |
806 void | |
807 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) | |
808 { | |
809 dsl_pool_t *dp; | |
810 | |
811 if (ds == NULL) /* this is the meta-objset */ | |
812 return; | |
813 | |
814 ASSERT(ds->ds_user_ptr != NULL); | |
815 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); | |
816 | |
817 dp = ds->ds_dir->dd_pool; | |
818 | |
819 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { | |
820 /* up the hold count until we can be written out */ | |
821 dmu_buf_add_ref(ds->ds_dbuf, ds); | |
822 } | |
823 } | |
824 | |
825 struct killarg { | |
826 uint64_t *usedp; | |
827 uint64_t *compressedp; | |
828 uint64_t *uncompressedp; | |
829 zio_t *zio; | |
830 dmu_tx_t *tx; | |
831 }; | |
832 | |
833 static int | |
834 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) | |
835 { | |
836 struct killarg *ka = arg; | |
837 blkptr_t *bp = &bc->bc_blkptr; | |
838 | |
839 ASSERT3U(bc->bc_errno, ==, 0); | |
840 | |
841 /* | |
842 * Since this callback is not called concurrently, no lock is | |
843 * needed on the accounting values. | |
844 */ | |
845 *ka->usedp += BP_GET_ASIZE(bp); | |
846 *ka->compressedp += BP_GET_PSIZE(bp); | |
847 *ka->uncompressedp += BP_GET_UCSIZE(bp); | |
848 /* XXX check for EIO? */ | |
849 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, | |
850 ARC_NOWAIT); | |
851 return (0); | |
852 } | |
853 | |
854 /* ARGSUSED */ | |
855 int | |
856 dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) | |
857 { | |
858 objset_t *mos = dd->dd_pool->dp_meta_objset; | |
859 dsl_dataset_t *ds; | |
1544 | 860 int err; |
789 | 861 |
862 if (dd->dd_phys->dd_head_dataset_obj == 0) | |
863 return (EINVAL); | |
1544 | 864 err = dsl_dataset_open_obj(dd->dd_pool, |
865 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &ds); | |
866 if (err) | |
867 return (err); | |
789 | 868 |
869 if (ds->ds_phys->ds_prev_snap_txg == 0) { | |
870 /* | |
871 * There's no previous snapshot. I suppose we could | |
872 * roll it back to being empty (and re-initialize the | |
873 * upper (ZPL) layer). But for now there's no way to do | |
874 * this via the user interface. | |
875 */ | |
876 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
877 return (EINVAL); | |
878 } | |
879 | |
880 mutex_enter(&ds->ds_lock); | |
881 if (ds->ds_open_refcount > 0) { | |
882 mutex_exit(&ds->ds_lock); | |
883 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
884 return (EBUSY); | |
885 } | |
886 | |
887 /* | |
888 * If we made changes this txg, traverse_dsl_dataset won't find | |
889 * them. Try again. | |
890 */ | |
891 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) { | |
892 mutex_exit(&ds->ds_lock); | |
893 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
894 return (EAGAIN); | |
895 } | |
896 | |
897 /* THE POINT OF NO (unsuccessful) RETURN */ | |
898 ds->ds_open_refcount = DOS_REF_MAX; | |
899 mutex_exit(&ds->ds_lock); | |
900 | |
901 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
902 | |
903 /* Zero out the deadlist. */ | |
904 dprintf("old deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); | |
905 bplist_close(&ds->ds_deadlist); | |
906 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); | |
907 ds->ds_phys->ds_deadlist_obj = | |
908 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1544 | 909 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, |
910 ds->ds_phys->ds_deadlist_obj)); | |
789 | 911 dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); |
912 | |
913 { | |
914 /* Free blkptrs that we gave birth to */ | |
915 zio_t *zio; | |
916 uint64_t used = 0, compressed = 0, uncompressed = 0; | |
917 struct killarg ka; | |
918 | |
919 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, | |
920 ZIO_FLAG_MUSTSUCCEED); | |
921 ka.usedp = &used; | |
922 ka.compressedp = &compressed; | |
923 ka.uncompressedp = &uncompressed; | |
924 ka.zio = zio; | |
925 ka.tx = tx; | |
926 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, | |
927 ADVANCE_POST, kill_blkptr, &ka); | |
928 (void) zio_wait(zio); | |
929 | |
930 dsl_dir_diduse_space(dd, | |
931 -used, -compressed, -uncompressed, tx); | |
932 } | |
933 | |
934 /* Change our contents to that of the prev snapshot (finally!) */ | |
935 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); | |
936 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; | |
937 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; | |
938 ds->ds_phys->ds_compressed_bytes = | |
939 ds->ds_prev->ds_phys->ds_compressed_bytes; | |
940 ds->ds_phys->ds_uncompressed_bytes = | |
941 ds->ds_prev->ds_phys->ds_uncompressed_bytes; | |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
942 ds->ds_phys->ds_inconsistent = ds->ds_prev->ds_phys->ds_inconsistent; |
789 | 943 ds->ds_phys->ds_unique_bytes = 0; |
944 | |
945 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); | |
946 ds->ds_prev->ds_phys->ds_unique_bytes = 0; | |
947 | |
948 dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); | |
949 ds->ds_open_refcount = 0; | |
950 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
951 | |
952 return (0); | |
953 } | |
954 | |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
955 /* ARGSUSED */ |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
956 static int |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
957 dsl_dataset_destroy_begin_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
958 { |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
959 dsl_dataset_t *ds = arg; |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
960 |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
961 /* |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
962 * Can't delete a head dataset if there are snapshots of it. |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
963 * (Except if the only snapshots are from the branch we cloned |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
964 * from.) |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
965 */ |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
966 if (ds->ds_prev != NULL && |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
967 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
968 return (EINVAL); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
969 |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
970 /* Mark it as inconsistent on-disk, in case we crash */ |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
971 dmu_buf_will_dirty(ds->ds_dbuf, tx); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
972 ds->ds_phys->ds_inconsistent = TRUE; |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
973 |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
974 return (0); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
975 } |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
976 |
789 | 977 int |
978 dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) | |
979 { | |
980 const char *snapname = arg; | |
981 uint64_t used = 0, compressed = 0, uncompressed = 0; | |
982 blkptr_t bp; | |
983 zio_t *zio; | |
984 int err; | |
985 int after_branch_point = FALSE; | |
986 int drop_lock = FALSE; | |
987 dsl_pool_t *dp = dd->dd_pool; | |
988 objset_t *mos = dp->dp_meta_objset; | |
989 dsl_dataset_t *ds, *ds_prev = NULL; | |
990 uint64_t obj; | |
991 | |
992 if (dd->dd_phys->dd_head_dataset_obj == 0) | |
993 return (EINVAL); | |
994 | |
995 if (!RW_WRITE_HELD(&dp->dp_config_rwlock)) { | |
996 rw_enter(&dp->dp_config_rwlock, RW_WRITER); | |
997 drop_lock = TRUE; | |
998 } | |
999 | |
1544 | 1000 err = dsl_dataset_open_obj(dd->dd_pool, |
789 | 1001 dd->dd_phys->dd_head_dataset_obj, NULL, |
1544 | 1002 snapname ? DS_MODE_NONE : DS_MODE_EXCLUSIVE, FTAG, &ds); |
789 | 1003 |
1544 | 1004 if (err == 0 && snapname) { |
789 | 1005 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, |
1006 snapname, 8, 1, &obj); | |
1007 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
1544 | 1008 if (err == 0) { |
1009 err = dsl_dataset_open_obj(dd->dd_pool, obj, NULL, | |
1010 DS_MODE_EXCLUSIVE, FTAG, &ds); | |
789 | 1011 } |
1012 } | |
1544 | 1013 if (err) { |
789 | 1014 if (drop_lock) |
1015 rw_exit(&dp->dp_config_rwlock); | |
1544 | 1016 return (err); |
789 | 1017 } |
1018 | |
1019 obj = ds->ds_object; | |
1020 | |
1021 /* Can't delete a branch point. */ | |
1022 if (ds->ds_phys->ds_num_children > 1) { | |
1023 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
1024 if (drop_lock) | |
1025 rw_exit(&dp->dp_config_rwlock); | |
1026 return (EINVAL); | |
1027 } | |
1028 | |
1029 /* | |
1030 * Can't delete a head dataset if there are snapshots of it. | |
1031 * (Except if the only snapshots are from the branch we cloned | |
1032 * from.) | |
1033 */ | |
1034 if (ds->ds_prev != NULL && | |
1035 ds->ds_prev->ds_phys->ds_next_snap_obj == obj) { | |
1036 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
1037 if (drop_lock) | |
1038 rw_exit(&dp->dp_config_rwlock); | |
1039 return (EINVAL); | |
1040 } | |
1041 | |
1042 /* | |
1043 * If we made changes this txg, traverse_dsl_dataset won't find | |
1044 * them. Try again. | |
1045 */ | |
1046 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) { | |
1047 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
1048 if (drop_lock) |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
1049 rw_exit(&dp->dp_config_rwlock); |
789 | 1050 return (EAGAIN); |
1051 } | |
1052 | |
1053 if (ds->ds_phys->ds_prev_snap_obj != 0) { | |
1054 if (ds->ds_prev) { | |
1055 ds_prev = ds->ds_prev; | |
1056 } else { | |
1544 | 1057 err = dsl_dataset_open_obj(dd->dd_pool, |
789 | 1058 ds->ds_phys->ds_prev_snap_obj, NULL, |
1544 | 1059 DS_MODE_NONE, FTAG, &ds_prev); |
1060 if (err) { | |
1061 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
1062 if (drop_lock) | |
1063 rw_exit(&dp->dp_config_rwlock); | |
1064 return (err); | |
1065 } | |
789 | 1066 } |
1067 after_branch_point = | |
1068 (ds_prev->ds_phys->ds_next_snap_obj != obj); | |
1069 | |
1070 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); | |
1071 if (after_branch_point && | |
1072 ds->ds_phys->ds_next_snap_obj == 0) { | |
1073 /* This clone is toast. */ | |
1074 ASSERT(ds_prev->ds_phys->ds_num_children > 1); | |
1075 ds_prev->ds_phys->ds_num_children--; | |
1076 } else if (!after_branch_point) { | |
1077 ds_prev->ds_phys->ds_next_snap_obj = | |
1078 ds->ds_phys->ds_next_snap_obj; | |
1079 } | |
1080 } | |
1081 | |
1544 | 1082 /* THE POINT OF NO (unsuccessful) RETURN */ |
1083 | |
789 | 1084 ASSERT3P(tx->tx_pool, ==, dd->dd_pool); |
1085 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); | |
1086 | |
1087 if (ds->ds_phys->ds_next_snap_obj != 0) { | |
1088 dsl_dataset_t *ds_next; | |
1089 uint64_t itor = 0; | |
1090 | |
1091 spa_scrub_restart(dp->dp_spa, tx->tx_txg); | |
1092 | |
1544 | 1093 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, |
1094 ds->ds_phys->ds_next_snap_obj, NULL, | |
1095 DS_MODE_NONE, FTAG, &ds_next)); | |
789 | 1096 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); |
1097 | |
1098 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); | |
1099 ds_next->ds_phys->ds_prev_snap_obj = | |
1100 ds->ds_phys->ds_prev_snap_obj; | |
1101 ds_next->ds_phys->ds_prev_snap_txg = | |
1102 ds->ds_phys->ds_prev_snap_txg; | |
1103 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, | |
1104 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); | |
1105 | |
1106 /* | |
1107 * Transfer to our deadlist (which will become next's | |
1108 * new deadlist) any entries from next's current | |
1109 * deadlist which were born before prev, and free the | |
1110 * other entries. | |
1111 * | |
1112 * XXX we're doing this long task with the config lock held | |
1113 */ | |
1114 while (bplist_iterate(&ds_next->ds_deadlist, &itor, | |
1115 &bp) == 0) { | |
1116 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { | |
1544 | 1117 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, |
1118 &bp, tx)); | |
789 | 1119 if (ds_prev && !after_branch_point && |
1120 bp.blk_birth > | |
1121 ds_prev->ds_phys->ds_prev_snap_txg) { | |
1122 ds_prev->ds_phys->ds_unique_bytes += | |
1123 BP_GET_ASIZE(&bp); | |
1124 } | |
1125 } else { | |
1126 used += BP_GET_ASIZE(&bp); | |
1127 compressed += BP_GET_PSIZE(&bp); | |
1128 uncompressed += BP_GET_UCSIZE(&bp); | |
1129 /* XXX check return value? */ | |
1130 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, | |
1131 &bp, NULL, NULL, ARC_NOWAIT); | |
1132 } | |
1133 } | |
1134 | |
1135 /* free next's deadlist */ | |
1136 bplist_close(&ds_next->ds_deadlist); | |
1137 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); | |
1138 | |
1139 /* set next's deadlist to our deadlist */ | |
1140 ds_next->ds_phys->ds_deadlist_obj = | |
1141 ds->ds_phys->ds_deadlist_obj; | |
1544 | 1142 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, |
1143 ds_next->ds_phys->ds_deadlist_obj)); | |
789 | 1144 ds->ds_phys->ds_deadlist_obj = 0; |
1145 | |
1146 if (ds_next->ds_phys->ds_next_snap_obj != 0) { | |
1147 /* | |
1148 * Update next's unique to include blocks which | |
1149 * were previously shared by only this snapshot | |
1150 * and it. Those blocks will be born after the | |
1151 * prev snap and before this snap, and will have | |
1152 * died after the next snap and before the one | |
1153 * after that (ie. be on the snap after next's | |
1154 * deadlist). | |
1155 * | |
1156 * XXX we're doing this long task with the | |
1157 * config lock held | |
1158 */ | |
1159 dsl_dataset_t *ds_after_next; | |
1160 | |
1544 | 1161 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, |
789 | 1162 ds_next->ds_phys->ds_next_snap_obj, NULL, |
1544 | 1163 DS_MODE_NONE, FTAG, &ds_after_next)); |
789 | 1164 itor = 0; |
1165 while (bplist_iterate(&ds_after_next->ds_deadlist, | |
1166 &itor, &bp) == 0) { | |
1167 if (bp.blk_birth > | |
1168 ds->ds_phys->ds_prev_snap_txg && | |
1169 bp.blk_birth <= | |
1170 ds->ds_phys->ds_creation_txg) { | |
1171 ds_next->ds_phys->ds_unique_bytes += | |
1172 BP_GET_ASIZE(&bp); | |
1173 } | |
1174 } | |
1175 | |
1176 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); | |
1177 ASSERT3P(ds_next->ds_prev, ==, NULL); | |
1178 } else { | |
1179 /* | |
1180 * It would be nice to update the head dataset's | |
1181 * unique. To do so we would have to traverse | |
1182 * it for blocks born after ds_prev, which is | |
1183 * pretty expensive just to maintain something | |
1184 * for debugging purposes. | |
1185 */ | |
1186 ASSERT3P(ds_next->ds_prev, ==, ds); | |
1187 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, | |
1188 ds_next); | |
1189 if (ds_prev) { | |
1544 | 1190 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, |
1191 ds->ds_phys->ds_prev_snap_obj, NULL, | |
1192 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); | |
789 | 1193 } else { |
1194 ds_next->ds_prev = NULL; | |
1195 } | |
1196 } | |
1197 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); | |
1198 | |
1199 /* | |
1200 * NB: unique_bytes is not accurate for head objsets | |
1201 * because we don't update it when we delete the most | |
1202 * recent snapshot -- see above comment. | |
1203 */ | |
1204 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); | |
1205 } else { | |
1206 /* | |
1207 * There's no next snapshot, so this is a head dataset. | |
1208 * Destroy the deadlist. Unless it's a clone, the | |
1209 * deadlist should be empty. (If it's a clone, it's | |
1210 * safe to ignore the deadlist contents.) | |
1211 */ | |
1212 struct killarg ka; | |
1213 | |
1214 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); | |
1215 bplist_close(&ds->ds_deadlist); | |
1216 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); | |
1217 ds->ds_phys->ds_deadlist_obj = 0; | |
1218 | |
1219 /* | |
1220 * Free everything that we point to (that's born after | |
1221 * the previous snapshot, if we are a clone) | |
1222 * | |
1223 * XXX we're doing this long task with the config lock held | |
1224 */ | |
1225 ka.usedp = &used; | |
1226 ka.compressedp = &compressed; | |
1227 ka.uncompressedp = &uncompressed; | |
1228 ka.zio = zio; | |
1229 ka.tx = tx; | |
1230 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, | |
1231 ADVANCE_POST, kill_blkptr, &ka); | |
1232 ASSERT3U(err, ==, 0); | |
1233 } | |
1234 | |
1235 err = zio_wait(zio); | |
1236 ASSERT3U(err, ==, 0); | |
1237 | |
1238 dsl_dir_diduse_space(dd, -used, -compressed, -uncompressed, tx); | |
1239 | |
1240 if (ds->ds_phys->ds_snapnames_zapobj) { | |
1241 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); | |
1242 ASSERT(err == 0); | |
1243 } | |
1244 | |
1245 if (dd->dd_phys->dd_head_dataset_obj == ds->ds_object) { | |
1246 /* Erase the link in the dataset */ | |
1247 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
1248 dd->dd_phys->dd_head_dataset_obj = 0; | |
1249 /* | |
1250 * dsl_dir_sync_destroy() called us, they'll destroy | |
1251 * the dataset. | |
1252 */ | |
1253 } else { | |
1254 /* remove from snapshot namespace */ | |
1255 dsl_dataset_t *ds_head; | |
1544 | 1256 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, |
1257 dd->dd_phys->dd_head_dataset_obj, NULL, | |
1258 DS_MODE_NONE, FTAG, &ds_head)); | |
789 | 1259 #ifdef ZFS_DEBUG |
1260 { | |
1261 uint64_t val; | |
1262 err = zap_lookup(mos, | |
1263 ds_head->ds_phys->ds_snapnames_zapobj, | |
1264 snapname, 8, 1, &val); | |
1265 ASSERT3U(err, ==, 0); | |
1266 ASSERT3U(val, ==, obj); | |
1267 } | |
1268 #endif | |
1269 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, | |
1270 snapname, tx); | |
1271 ASSERT(err == 0); | |
1272 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); | |
1273 } | |
1274 | |
1275 if (ds_prev && ds->ds_prev != ds_prev) | |
1276 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); | |
1277 | |
1278 err = dmu_object_free(mos, obj, tx); | |
1279 ASSERT(err == 0); | |
1280 | |
1281 /* | |
1282 * Close the objset with mode NONE, thus leaving it with | |
1283 * DOS_REF_MAX set, so that noone can access it. | |
1284 */ | |
1285 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
1286 | |
1287 if (drop_lock) | |
1288 rw_exit(&dp->dp_config_rwlock); | |
1289 return (0); | |
1290 } | |
1291 | |
1292 int | |
1293 dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) | |
1294 { | |
1295 const char *snapname = arg; | |
1296 dsl_pool_t *dp = dd->dd_pool; | |
1297 dmu_buf_t *dbuf; | |
1298 dsl_dataset_phys_t *dsphys; | |
1299 uint64_t dsobj, value; | |
1300 objset_t *mos = dp->dp_meta_objset; | |
1301 dsl_dataset_t *ds; | |
1302 int err; | |
1303 | |
1304 ASSERT(dmu_tx_is_syncing(tx)); | |
1305 | |
1306 if (dd->dd_phys->dd_head_dataset_obj == 0) | |
1307 return (EINVAL); | |
1544 | 1308 err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_head_dataset_obj, NULL, |
1309 DS_MODE_NONE, FTAG, &ds); | |
1310 if (err) | |
1311 return (err); | |
789 | 1312 |
1313 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, | |
1314 snapname, 8, 1, &value); | |
1315 if (err == 0) { | |
1316 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
1317 return (EEXIST); | |
1318 } | |
1319 ASSERT(err == ENOENT); | |
1320 | |
1321 /* The point of no (unsuccessful) return */ | |
1322 | |
1323 dprintf_dd(dd, "taking snapshot %s in txg %llu\n", | |
1324 snapname, tx->tx_txg); | |
1325 | |
1326 spa_scrub_restart(dp->dp_spa, tx->tx_txg); | |
1327 | |
1328 rw_enter(&dp->dp_config_rwlock, RW_WRITER); | |
1329 | |
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
1330 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
1331 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); |
1544 | 1332 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); |
789 | 1333 dmu_buf_will_dirty(dbuf, tx); |
1334 dsphys = dbuf->db_data; | |
1335 dsphys->ds_dir_obj = dd->dd_object; | |
1336 dsphys->ds_fsid_guid = unique_create(); | |
1337 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ | |
1338 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, | |
1339 sizeof (dsphys->ds_guid)); | |
1340 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; | |
1341 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; | |
1342 dsphys->ds_next_snap_obj = ds->ds_object; | |
1343 dsphys->ds_num_children = 1; | |
1344 dsphys->ds_creation_time = gethrestime_sec(); | |
1345 dsphys->ds_creation_txg = tx->tx_txg; | |
1346 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; | |
1347 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; | |
1348 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; | |
1349 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; | |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1350 dsphys->ds_inconsistent = ds->ds_phys->ds_inconsistent; |
789 | 1351 dsphys->ds_bp = ds->ds_phys->ds_bp; |
1544 | 1352 dmu_buf_rele(dbuf, FTAG); |
789 | 1353 |
1354 if (ds->ds_phys->ds_prev_snap_obj != 0) { | |
1355 dsl_dataset_t *ds_prev; | |
1356 | |
1544 | 1357 VERIFY(0 == dsl_dataset_open_obj(dp, |
1358 ds->ds_phys->ds_prev_snap_obj, NULL, | |
1359 DS_MODE_NONE, FTAG, &ds_prev)); | |
789 | 1360 ASSERT(ds_prev->ds_phys->ds_next_snap_obj == |
1361 ds->ds_object || | |
1362 ds_prev->ds_phys->ds_num_children > 1); | |
1363 if (ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { | |
1364 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); | |
1365 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, | |
1366 ds_prev->ds_phys->ds_creation_txg); | |
1367 ds_prev->ds_phys->ds_next_snap_obj = dsobj; | |
1368 } | |
1369 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); | |
1370 } else { | |
1371 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 0); | |
1372 } | |
1373 | |
1374 bplist_close(&ds->ds_deadlist); | |
1375 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1376 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); | |
1377 ds->ds_phys->ds_prev_snap_obj = dsobj; | |
1378 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; | |
1379 ds->ds_phys->ds_unique_bytes = 0; | |
1380 ds->ds_phys->ds_deadlist_obj = | |
1381 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1544 | 1382 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, |
1383 ds->ds_phys->ds_deadlist_obj)); | |
789 | 1384 |
1385 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); | |
1386 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, | |
1387 snapname, 8, 1, &dsobj, tx); | |
1388 ASSERT(err == 0); | |
1389 | |
1390 if (ds->ds_prev) | |
1391 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); | |
1544 | 1392 VERIFY(0 == dsl_dataset_open_obj(dp, |
1393 ds->ds_phys->ds_prev_snap_obj, snapname, | |
1394 DS_MODE_NONE, ds, &ds->ds_prev)); | |
789 | 1395 |
1396 rw_exit(&dp->dp_config_rwlock); | |
1397 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
1398 | |
1399 return (0); | |
1400 } | |
1401 | |
1402 void | |
1403 dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx) | |
1404 { | |
1405 ASSERT(dmu_tx_is_syncing(tx)); | |
1406 ASSERT(ds->ds_user_ptr != NULL); | |
1407 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); | |
1408 | |
1409 dmu_objset_sync(ds->ds_user_ptr, tx); | |
1410 dsl_dir_dirty(ds->ds_dir, tx); | |
1411 bplist_close(&ds->ds_deadlist); | |
1412 | |
1544 | 1413 dmu_buf_rele(ds->ds_dbuf, ds); |
789 | 1414 } |
1415 | |
1416 void | |
1417 dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds) | |
1418 { | |
1419 /* fill in properties crap */ | |
1420 dsl_dir_stats(ds->ds_dir, dds); | |
1421 | |
1422 if (ds->ds_phys->ds_num_children != 0) { | |
1423 dds->dds_is_snapshot = TRUE; | |
1424 dds->dds_num_clones = ds->ds_phys->ds_num_children - 1; | |
1425 } | |
1426 | |
1758
d0750a16db04
6397267 assertion failed: (link->list_next == 0) == (link->list_prev == 0)
ahrens
parents:
1731
diff
changeset
|
1427 dds->dds_inconsistent = ds->ds_phys->ds_inconsistent; |
789 | 1428 dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth; |
1429 | |
1430 dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill; | |
1431 dds->dds_objects_avail = DN_MAX_OBJECT - dds->dds_objects_used; | |
1432 | |
1433 /* We override the dataset's creation time... they should be the same */ | |
1434 dds->dds_creation_time = ds->ds_phys->ds_creation_time; | |
1435 dds->dds_creation_txg = ds->ds_phys->ds_creation_txg; | |
1436 dds->dds_space_refd = ds->ds_phys->ds_used_bytes; | |
1437 dds->dds_fsid_guid = ds->ds_phys->ds_fsid_guid; | |
1438 | |
1439 if (ds->ds_phys->ds_next_snap_obj) { | |
1440 /* | |
1441 * This is a snapshot; override the dd's space used with | |
1442 * our unique space | |
1443 */ | |
1444 dds->dds_space_used = ds->ds_phys->ds_unique_bytes; | |
1445 dds->dds_compressed_bytes = | |
1446 ds->ds_phys->ds_compressed_bytes; | |
1447 dds->dds_uncompressed_bytes = | |
1448 ds->ds_phys->ds_uncompressed_bytes; | |
1449 } | |
1450 } | |
1451 | |
1452 dsl_pool_t * | |
1453 dsl_dataset_pool(dsl_dataset_t *ds) | |
1454 { | |
1455 return (ds->ds_dir->dd_pool); | |
1456 } | |
1457 | |
1458 struct osrenamearg { | |
1459 const char *oldname; | |
1460 const char *newname; | |
1461 }; | |
1462 | |
1463 static int | |
1464 dsl_dataset_snapshot_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) | |
1465 { | |
1466 struct osrenamearg *ora = arg; | |
1467 objset_t *mos = dd->dd_pool->dp_meta_objset; | |
1468 dsl_dir_t *nds; | |
1469 const char *tail; | |
1470 int err; | |
1471 dsl_dataset_t *snds, *fsds; | |
1472 uint64_t val; | |
1473 | |
1474 err = dsl_dataset_open_spa(dd->dd_pool->dp_spa, ora->oldname, | |
1475 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &snds); | |
1476 if (err) | |
1477 return (err); | |
1478 | |
1479 if (snds->ds_dir != dd) { | |
1480 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); | |
1481 return (EINVAL); | |
1482 } | |
1483 | |
1484 /* better be changing a snapshot */ | |
1485 if (snds->ds_phys->ds_next_snap_obj == 0) { | |
1486 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); | |
1487 return (EINVAL); | |
1488 } | |
1489 | |
1490 /* new fs better exist */ | |
1544 | 1491 err = dsl_dir_open_spa(dd->dd_pool->dp_spa, ora->newname, |
1492 FTAG, &nds, &tail); | |
1493 if (err) { | |
789 | 1494 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); |
1544 | 1495 return (err); |
789 | 1496 } |
1497 | |
1498 dsl_dir_close(nds, FTAG); | |
1499 | |
1500 /* new name better be in same fs */ | |
1501 if (nds != dd) { | |
1502 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); | |
1503 return (EINVAL); | |
1504 } | |
1505 | |
1506 /* new name better be a snapshot */ | |
1507 if (tail == NULL || tail[0] != '@') { | |
1508 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); | |
1509 return (EINVAL); | |
1510 } | |
1511 | |
1512 tail++; | |
1513 | |
1544 | 1514 err = dsl_dataset_open_obj(dd->dd_pool, |
1515 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &fsds); | |
1516 if (err) { | |
1517 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); | |
1518 return (err); | |
1519 } | |
789 | 1520 |
1521 /* new name better not be in use */ | |
1522 err = zap_lookup(mos, fsds->ds_phys->ds_snapnames_zapobj, | |
1523 tail, 8, 1, &val); | |
1524 if (err != ENOENT) { | |
1525 if (err == 0) | |
1526 err = EEXIST; | |
1527 dsl_dataset_close(fsds, DS_MODE_NONE, FTAG); | |
1528 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); | |
1529 return (EEXIST); | |
1530 } | |
1531 | |
1532 /* The point of no (unsuccessful) return */ | |
1533 | |
1534 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER); | |
1544 | 1535 VERIFY(0 == dsl_dataset_get_snapname(snds)); |
789 | 1536 err = zap_remove(mos, fsds->ds_phys->ds_snapnames_zapobj, |
1537 snds->ds_snapname, tx); | |
1538 ASSERT3U(err, ==, 0); | |
1539 mutex_enter(&snds->ds_lock); | |
1540 (void) strcpy(snds->ds_snapname, tail); | |
1541 mutex_exit(&snds->ds_lock); | |
1542 err = zap_add(mos, fsds->ds_phys->ds_snapnames_zapobj, | |
1543 snds->ds_snapname, 8, 1, &snds->ds_object, tx); | |
1544 ASSERT3U(err, ==, 0); | |
1545 rw_exit(&dd->dd_pool->dp_config_rwlock); | |
1546 | |
1547 dsl_dataset_close(fsds, DS_MODE_NONE, FTAG); | |
1548 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); | |
1549 return (0); | |
1550 } | |
1551 | |
1552 #pragma weak dmu_objset_rename = dsl_dataset_rename | |
1553 int | |
1554 dsl_dataset_rename(const char *osname, const char *newname) | |
1555 { | |
1556 dsl_dir_t *dd; | |
1557 const char *tail; | |
1558 struct osrenamearg ora; | |
1559 int err; | |
1560 | |
1544 | 1561 err = dsl_dir_open(osname, FTAG, &dd, &tail); |
1562 if (err) | |
1563 return (err); | |
789 | 1564 if (tail == NULL) { |
1565 err = dsl_dir_sync_task(dd, | |
1566 dsl_dir_rename_sync, (void*)newname, 1<<12); | |
1567 dsl_dir_close(dd, FTAG); | |
1568 return (err); | |
1569 } | |
1570 if (tail[0] != '@') { | |
1571 /* the name ended in a nonexistant component */ | |
1572 dsl_dir_close(dd, FTAG); | |
1573 return (ENOENT); | |
1574 } | |
1575 | |
1576 ora.oldname = osname; | |
1577 ora.newname = newname; | |
1578 | |
1579 err = dsl_dir_sync_task(dd, | |
1580 dsl_dataset_snapshot_rename_sync, &ora, 1<<12); | |
1581 dsl_dir_close(dd, FTAG); | |
1582 return (err); | |
1583 } |