Mercurial > illumos > illumos-gate
annotate usr/src/uts/common/fs/zfs/dsl_dataset.c @ 2856:6f4d5ee1906a
6463348 ZFS code could be more portable
author | nd150628 |
---|---|
date | Tue, 03 Oct 2006 15:01:10 -0700 |
parents | 752725c22841 |
children | c0259887ebbc |
rev | line source |
---|---|
789 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
1544 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
789 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
21 /* | |
1544 | 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 23 * Use is subject to license terms. |
24 */ | |
25 | |
26 #pragma ident "%Z%%M% %I% %E% SMI" | |
27 | |
28 #include <sys/dmu_objset.h> | |
29 #include <sys/dsl_dataset.h> | |
30 #include <sys/dsl_dir.h> | |
2082 | 31 #include <sys/dsl_prop.h> |
2199 | 32 #include <sys/dsl_synctask.h> |
789 | 33 #include <sys/dmu_traverse.h> |
34 #include <sys/dmu_tx.h> | |
35 #include <sys/arc.h> | |
36 #include <sys/zio.h> | |
37 #include <sys/zap.h> | |
38 #include <sys/unique.h> | |
39 #include <sys/zfs_context.h> | |
40 | |
2199 | 41 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; |
42 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; | |
43 static dsl_checkfunc_t dsl_dataset_rollback_check; | |
44 static dsl_syncfunc_t dsl_dataset_rollback_sync; | |
45 static dsl_checkfunc_t dsl_dataset_destroy_check; | |
46 static dsl_syncfunc_t dsl_dataset_destroy_sync; | |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
47 |
789 | 48 #define DOS_REF_MAX (1ULL << 62) |
49 | |
50 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE | |
51 | |
52 /* | |
53 * We use weighted reference counts to express the various forms of exclusion | |
54 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open | |
55 * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. | |
56 * This makes the exclusion logic simple: the total refcnt for all opens cannot | |
57 * exceed DOS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their | |
58 * weight (DOS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume | |
59 * just over half of the refcnt space, so there can't be more than one, but it | |
60 * can peacefully coexist with any number of STANDARD opens. | |
61 */ | |
62 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { | |
63 0, /* DOS_MODE_NONE - invalid */ | |
64 1, /* DOS_MODE_STANDARD - unlimited number */ | |
65 (DOS_REF_MAX >> 1) + 1, /* DOS_MODE_PRIMARY - only one of these */ | |
66 DOS_REF_MAX /* DOS_MODE_EXCLUSIVE - no other opens */ | |
67 }; | |
68 | |
69 | |
70 void | |
71 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
72 { | |
2082 | 73 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); |
789 | 74 int compressed = BP_GET_PSIZE(bp); |
75 int uncompressed = BP_GET_UCSIZE(bp); | |
76 | |
77 dprintf_bp(bp, "born, ds=%p\n", ds); | |
78 | |
79 ASSERT(dmu_tx_is_syncing(tx)); | |
80 /* It could have been compressed away to nothing */ | |
81 if (BP_IS_HOLE(bp)) | |
82 return; | |
83 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); | |
84 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); | |
85 if (ds == NULL) { | |
86 /* | |
87 * Account for the meta-objset space in its placeholder | |
88 * dsl_dir. | |
89 */ | |
90 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ | |
91 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, | |
92 used, compressed, uncompressed, tx); | |
93 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); | |
94 return; | |
95 } | |
96 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
97 mutex_enter(&ds->ds_lock); | |
98 ds->ds_phys->ds_used_bytes += used; | |
99 ds->ds_phys->ds_compressed_bytes += compressed; | |
100 ds->ds_phys->ds_uncompressed_bytes += uncompressed; | |
101 ds->ds_phys->ds_unique_bytes += used; | |
102 mutex_exit(&ds->ds_lock); | |
103 dsl_dir_diduse_space(ds->ds_dir, | |
104 used, compressed, uncompressed, tx); | |
105 } | |
106 | |
107 void | |
108 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
109 { | |
2082 | 110 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); |
789 | 111 int compressed = BP_GET_PSIZE(bp); |
112 int uncompressed = BP_GET_UCSIZE(bp); | |
113 | |
114 ASSERT(dmu_tx_is_syncing(tx)); | |
115 if (BP_IS_HOLE(bp)) | |
116 return; | |
117 | |
118 ASSERT(used > 0); | |
119 if (ds == NULL) { | |
120 /* | |
121 * Account for the meta-objset space in its placeholder | |
122 * dataset. | |
123 */ | |
124 /* XXX this can fail, what do we do when it does? */ | |
125 (void) arc_free(NULL, tx->tx_pool->dp_spa, | |
126 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); | |
127 bzero(bp, sizeof (blkptr_t)); | |
128 | |
129 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, | |
130 -used, -compressed, -uncompressed, tx); | |
131 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); | |
132 return; | |
133 } | |
134 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); | |
135 | |
136 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
137 | |
138 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { | |
139 dprintf_bp(bp, "freeing: %s", ""); | |
140 /* XXX check return code? */ | |
141 (void) arc_free(NULL, tx->tx_pool->dp_spa, | |
142 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); | |
143 | |
144 mutex_enter(&ds->ds_lock); | |
145 /* XXX unique_bytes is not accurate for head datasets */ | |
146 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ | |
147 ds->ds_phys->ds_unique_bytes -= used; | |
148 mutex_exit(&ds->ds_lock); | |
149 dsl_dir_diduse_space(ds->ds_dir, | |
150 -used, -compressed, -uncompressed, tx); | |
151 } else { | |
152 dprintf_bp(bp, "putting on dead list: %s", ""); | |
1544 | 153 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); |
789 | 154 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ |
155 if (ds->ds_phys->ds_prev_snap_obj != 0) { | |
156 ASSERT3U(ds->ds_prev->ds_object, ==, | |
157 ds->ds_phys->ds_prev_snap_obj); | |
158 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); | |
159 if (ds->ds_prev->ds_phys->ds_next_snap_obj == | |
2082 | 160 ds->ds_object && bp->blk_birth > |
789 | 161 ds->ds_prev->ds_phys->ds_prev_snap_txg) { |
162 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); | |
163 mutex_enter(&ds->ds_prev->ds_lock); | |
164 ds->ds_prev->ds_phys->ds_unique_bytes += | |
165 used; | |
166 mutex_exit(&ds->ds_prev->ds_lock); | |
167 } | |
168 } | |
169 } | |
170 bzero(bp, sizeof (blkptr_t)); | |
171 mutex_enter(&ds->ds_lock); | |
172 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); | |
173 ds->ds_phys->ds_used_bytes -= used; | |
174 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); | |
175 ds->ds_phys->ds_compressed_bytes -= compressed; | |
176 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); | |
177 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; | |
178 mutex_exit(&ds->ds_lock); | |
179 } | |
180 | |
1544 | 181 uint64_t |
182 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) | |
789 | 183 { |
184 if (ds == NULL) | |
1544 | 185 return (0); |
789 | 186 /* |
187 * The snapshot creation could fail, but that would cause an | |
188 * incorrect FALSE return, which would only result in an | |
189 * overestimation of the amount of space that an operation would | |
190 * consume, which is OK. | |
191 * | |
192 * There's also a small window where we could miss a pending | |
193 * snapshot, because we could set the sync task in the quiescing | |
194 * phase. So this should only be used as a guess. | |
195 */ | |
2199 | 196 return (MAX(ds->ds_phys->ds_prev_snap_txg, ds->ds_trysnap_txg)); |
1544 | 197 } |
198 | |
199 int | |
200 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) | |
201 { | |
202 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); | |
789 | 203 } |
204 | |
205 /* ARGSUSED */ | |
206 static void | |
207 dsl_dataset_evict(dmu_buf_t *db, void *dsv) | |
208 { | |
209 dsl_dataset_t *ds = dsv; | |
210 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
211 | |
212 /* open_refcount == DOS_REF_MAX when deleting */ | |
213 ASSERT(ds->ds_open_refcount == 0 || | |
214 ds->ds_open_refcount == DOS_REF_MAX); | |
215 | |
216 dprintf_ds(ds, "evicting %s\n", ""); | |
217 | |
218 unique_remove(ds->ds_phys->ds_fsid_guid); | |
219 | |
220 if (ds->ds_user_ptr != NULL) | |
221 ds->ds_user_evict_func(ds, ds->ds_user_ptr); | |
222 | |
223 if (ds->ds_prev) { | |
224 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); | |
225 ds->ds_prev = NULL; | |
226 } | |
227 | |
228 bplist_close(&ds->ds_deadlist); | |
229 dsl_dir_close(ds->ds_dir, ds); | |
230 | |
231 if (list_link_active(&ds->ds_synced_link)) | |
232 list_remove(&dp->dp_synced_objsets, ds); | |
233 | |
2856 | 234 mutex_destroy(&ds->ds_lock); |
235 mutex_destroy(&ds->ds_deadlist.bpl_lock); | |
236 | |
789 | 237 kmem_free(ds, sizeof (dsl_dataset_t)); |
238 } | |
239 | |
1544 | 240 static int |
789 | 241 dsl_dataset_get_snapname(dsl_dataset_t *ds) |
242 { | |
243 dsl_dataset_phys_t *headphys; | |
244 int err; | |
245 dmu_buf_t *headdbuf; | |
246 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
247 objset_t *mos = dp->dp_meta_objset; | |
248 | |
249 if (ds->ds_snapname[0]) | |
1544 | 250 return (0); |
789 | 251 if (ds->ds_phys->ds_next_snap_obj == 0) |
1544 | 252 return (0); |
789 | 253 |
1544 | 254 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, |
255 FTAG, &headdbuf); | |
256 if (err) | |
257 return (err); | |
789 | 258 headphys = headdbuf->db_data; |
259 err = zap_value_search(dp->dp_meta_objset, | |
260 headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname); | |
1544 | 261 dmu_buf_rele(headdbuf, FTAG); |
262 return (err); | |
789 | 263 } |
264 | |
1544 | 265 int |
789 | 266 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, |
1544 | 267 int mode, void *tag, dsl_dataset_t **dsp) |
789 | 268 { |
269 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; | |
270 objset_t *mos = dp->dp_meta_objset; | |
271 dmu_buf_t *dbuf; | |
272 dsl_dataset_t *ds; | |
1544 | 273 int err; |
789 | 274 |
275 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || | |
276 dsl_pool_sync_context(dp)); | |
277 | |
1544 | 278 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); |
279 if (err) | |
280 return (err); | |
789 | 281 ds = dmu_buf_get_user(dbuf); |
282 if (ds == NULL) { | |
283 dsl_dataset_t *winner; | |
284 | |
285 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); | |
286 ds->ds_dbuf = dbuf; | |
287 ds->ds_object = dsobj; | |
288 ds->ds_phys = dbuf->db_data; | |
289 | |
2856 | 290 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); |
291 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, | |
292 NULL); | |
293 | |
1544 | 294 err = bplist_open(&ds->ds_deadlist, |
789 | 295 mos, ds->ds_phys->ds_deadlist_obj); |
1544 | 296 if (err == 0) { |
297 err = dsl_dir_open_obj(dp, | |
298 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); | |
299 } | |
300 if (err) { | |
301 /* | |
302 * we don't really need to close the blist if we | |
303 * just opened it. | |
304 */ | |
2856 | 305 mutex_destroy(&ds->ds_lock); |
306 mutex_destroy(&ds->ds_deadlist.bpl_lock); | |
1544 | 307 kmem_free(ds, sizeof (dsl_dataset_t)); |
308 dmu_buf_rele(dbuf, tag); | |
309 return (err); | |
310 } | |
789 | 311 |
312 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { | |
313 ds->ds_snapname[0] = '\0'; | |
314 if (ds->ds_phys->ds_prev_snap_obj) { | |
1544 | 315 err = dsl_dataset_open_obj(dp, |
789 | 316 ds->ds_phys->ds_prev_snap_obj, NULL, |
1544 | 317 DS_MODE_NONE, ds, &ds->ds_prev); |
789 | 318 } |
319 } else { | |
320 if (snapname) { | |
321 #ifdef ZFS_DEBUG | |
322 dsl_dataset_phys_t *headphys; | |
1544 | 323 dmu_buf_t *headdbuf; |
324 err = dmu_bonus_hold(mos, | |
325 ds->ds_dir->dd_phys->dd_head_dataset_obj, | |
326 FTAG, &headdbuf); | |
327 if (err == 0) { | |
328 headphys = headdbuf->db_data; | |
329 uint64_t foundobj; | |
330 err = zap_lookup(dp->dp_meta_objset, | |
331 headphys->ds_snapnames_zapobj, | |
332 snapname, sizeof (foundobj), 1, | |
333 &foundobj); | |
334 ASSERT3U(foundobj, ==, dsobj); | |
335 dmu_buf_rele(headdbuf, FTAG); | |
336 } | |
789 | 337 #endif |
338 (void) strcat(ds->ds_snapname, snapname); | |
339 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { | |
1544 | 340 err = dsl_dataset_get_snapname(ds); |
789 | 341 } |
342 } | |
343 | |
1544 | 344 if (err == 0) { |
345 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, | |
346 dsl_dataset_evict); | |
347 } | |
348 if (err || winner) { | |
789 | 349 bplist_close(&ds->ds_deadlist); |
350 if (ds->ds_prev) { | |
351 dsl_dataset_close(ds->ds_prev, | |
352 DS_MODE_NONE, ds); | |
353 } | |
354 dsl_dir_close(ds->ds_dir, ds); | |
2856 | 355 mutex_destroy(&ds->ds_lock); |
356 mutex_destroy(&ds->ds_deadlist.bpl_lock); | |
789 | 357 kmem_free(ds, sizeof (dsl_dataset_t)); |
1544 | 358 if (err) { |
359 dmu_buf_rele(dbuf, tag); | |
360 return (err); | |
361 } | |
789 | 362 ds = winner; |
363 } else { | |
364 uint64_t new = | |
365 unique_insert(ds->ds_phys->ds_fsid_guid); | |
366 if (new != ds->ds_phys->ds_fsid_guid) { | |
367 /* XXX it won't necessarily be synced... */ | |
368 ds->ds_phys->ds_fsid_guid = new; | |
369 } | |
370 } | |
371 } | |
372 ASSERT3P(ds->ds_dbuf, ==, dbuf); | |
373 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); | |
374 | |
375 mutex_enter(&ds->ds_lock); | |
376 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && | |
2082 | 377 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && |
378 !DS_MODE_IS_INCONSISTENT(mode)) || | |
789 | 379 (ds->ds_open_refcount + weight > DOS_REF_MAX)) { |
380 mutex_exit(&ds->ds_lock); | |
381 dsl_dataset_close(ds, DS_MODE_NONE, tag); | |
1544 | 382 return (EBUSY); |
789 | 383 } |
384 ds->ds_open_refcount += weight; | |
385 mutex_exit(&ds->ds_lock); | |
386 | |
1544 | 387 *dsp = ds; |
388 return (0); | |
789 | 389 } |
390 | |
391 int | |
392 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, | |
393 void *tag, dsl_dataset_t **dsp) | |
394 { | |
395 dsl_dir_t *dd; | |
396 dsl_pool_t *dp; | |
397 const char *tail; | |
398 uint64_t obj; | |
399 dsl_dataset_t *ds = NULL; | |
400 int err = 0; | |
401 | |
1544 | 402 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); |
403 if (err) | |
404 return (err); | |
789 | 405 |
406 dp = dd->dd_pool; | |
407 obj = dd->dd_phys->dd_head_dataset_obj; | |
408 rw_enter(&dp->dp_config_rwlock, RW_READER); | |
409 if (obj == 0) { | |
410 /* A dataset with no associated objset */ | |
411 err = ENOENT; | |
412 goto out; | |
413 } | |
414 | |
415 if (tail != NULL) { | |
416 objset_t *mos = dp->dp_meta_objset; | |
417 | |
1544 | 418 err = dsl_dataset_open_obj(dp, obj, NULL, |
419 DS_MODE_NONE, tag, &ds); | |
420 if (err) | |
421 goto out; | |
789 | 422 obj = ds->ds_phys->ds_snapnames_zapobj; |
423 dsl_dataset_close(ds, DS_MODE_NONE, tag); | |
424 ds = NULL; | |
425 | |
426 if (tail[0] != '@') { | |
427 err = ENOENT; | |
428 goto out; | |
429 } | |
430 tail++; | |
431 | |
432 /* Look for a snapshot */ | |
433 if (!DS_MODE_IS_READONLY(mode)) { | |
434 err = EROFS; | |
435 goto out; | |
436 } | |
437 dprintf("looking for snapshot '%s'\n", tail); | |
438 err = zap_lookup(mos, obj, tail, 8, 1, &obj); | |
439 if (err) | |
440 goto out; | |
441 } | |
1544 | 442 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); |
789 | 443 |
444 out: | |
445 rw_exit(&dp->dp_config_rwlock); | |
446 dsl_dir_close(dd, FTAG); | |
447 | |
448 ASSERT3U((err == 0), ==, (ds != NULL)); | |
449 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ | |
450 | |
451 *dsp = ds; | |
452 return (err); | |
453 } | |
454 | |
455 int | |
456 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) | |
457 { | |
458 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); | |
459 } | |
460 | |
461 void | |
462 dsl_dataset_name(dsl_dataset_t *ds, char *name) | |
463 { | |
464 if (ds == NULL) { | |
465 (void) strcpy(name, "mos"); | |
466 } else { | |
467 dsl_dir_name(ds->ds_dir, name); | |
1544 | 468 VERIFY(0 == dsl_dataset_get_snapname(ds)); |
789 | 469 if (ds->ds_snapname[0]) { |
470 (void) strcat(name, "@"); | |
471 if (!MUTEX_HELD(&ds->ds_lock)) { | |
472 /* | |
473 * We use a "recursive" mutex so that we | |
474 * can call dprintf_ds() with ds_lock held. | |
475 */ | |
476 mutex_enter(&ds->ds_lock); | |
477 (void) strcat(name, ds->ds_snapname); | |
478 mutex_exit(&ds->ds_lock); | |
479 } else { | |
480 (void) strcat(name, ds->ds_snapname); | |
481 } | |
482 } | |
483 } | |
484 } | |
485 | |
486 void | |
487 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) | |
488 { | |
489 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; | |
490 mutex_enter(&ds->ds_lock); | |
491 ASSERT3U(ds->ds_open_refcount, >=, weight); | |
492 ds->ds_open_refcount -= weight; | |
493 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", | |
494 mode, ds->ds_open_refcount); | |
495 mutex_exit(&ds->ds_lock); | |
496 | |
1544 | 497 dmu_buf_rele(ds->ds_dbuf, tag); |
789 | 498 } |
499 | |
500 void | |
501 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) | |
502 { | |
503 objset_t *mos = dp->dp_meta_objset; | |
504 dmu_buf_t *dbuf; | |
505 dsl_dataset_phys_t *dsphys; | |
506 dsl_dataset_t *ds; | |
507 uint64_t dsobj; | |
508 dsl_dir_t *dd; | |
509 | |
510 dsl_dir_create_root(mos, ddobjp, tx); | |
1544 | 511 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); |
789 | 512 |
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
513 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
514 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); |
1544 | 515 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); |
789 | 516 dmu_buf_will_dirty(dbuf, tx); |
517 dsphys = dbuf->db_data; | |
518 dsphys->ds_dir_obj = dd->dd_object; | |
519 dsphys->ds_fsid_guid = unique_create(); | |
1544 | 520 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ |
789 | 521 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, |
522 sizeof (dsphys->ds_guid)); | |
523 dsphys->ds_snapnames_zapobj = | |
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
524 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); |
789 | 525 dsphys->ds_creation_time = gethrestime_sec(); |
526 dsphys->ds_creation_txg = tx->tx_txg; | |
527 dsphys->ds_deadlist_obj = | |
528 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1544 | 529 dmu_buf_rele(dbuf, FTAG); |
789 | 530 |
531 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
532 dd->dd_phys->dd_head_dataset_obj = dsobj; | |
533 dsl_dir_close(dd, FTAG); | |
534 | |
1544 | 535 VERIFY(0 == |
536 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); | |
789 | 537 (void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx); |
538 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
539 } | |
540 | |
2199 | 541 uint64_t |
542 dsl_dataset_create_sync(dsl_dir_t *pdd, | |
789 | 543 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) |
544 { | |
2199 | 545 dsl_pool_t *dp = pdd->dd_pool; |
789 | 546 dmu_buf_t *dbuf; |
547 dsl_dataset_phys_t *dsphys; | |
2199 | 548 uint64_t dsobj, ddobj; |
789 | 549 objset_t *mos = dp->dp_meta_objset; |
550 dsl_dir_t *dd; | |
551 | |
2199 | 552 ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp); |
553 ASSERT(clone_parent == NULL || | |
554 clone_parent->ds_phys->ds_num_children > 0); | |
789 | 555 ASSERT(lastname[0] != '@'); |
556 ASSERT(dmu_tx_is_syncing(tx)); | |
557 | |
2199 | 558 ddobj = dsl_dir_create_sync(pdd, lastname, tx); |
559 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); | |
789 | 560 |
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
561 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
562 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); |
1544 | 563 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); |
789 | 564 dmu_buf_will_dirty(dbuf, tx); |
565 dsphys = dbuf->db_data; | |
566 dsphys->ds_dir_obj = dd->dd_object; | |
567 dsphys->ds_fsid_guid = unique_create(); | |
568 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ | |
569 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, | |
570 sizeof (dsphys->ds_guid)); | |
571 dsphys->ds_snapnames_zapobj = | |
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
572 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); |
789 | 573 dsphys->ds_creation_time = gethrestime_sec(); |
574 dsphys->ds_creation_txg = tx->tx_txg; | |
575 dsphys->ds_deadlist_obj = | |
576 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
577 if (clone_parent) { | |
578 dsphys->ds_prev_snap_obj = clone_parent->ds_object; | |
579 dsphys->ds_prev_snap_txg = | |
580 clone_parent->ds_phys->ds_creation_txg; | |
581 dsphys->ds_used_bytes = | |
582 clone_parent->ds_phys->ds_used_bytes; | |
583 dsphys->ds_compressed_bytes = | |
584 clone_parent->ds_phys->ds_compressed_bytes; | |
585 dsphys->ds_uncompressed_bytes = | |
586 clone_parent->ds_phys->ds_uncompressed_bytes; | |
587 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; | |
588 | |
589 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); | |
590 clone_parent->ds_phys->ds_num_children++; | |
591 | |
592 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
593 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; | |
594 } | |
1544 | 595 dmu_buf_rele(dbuf, FTAG); |
789 | 596 |
597 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
598 dd->dd_phys->dd_head_dataset_obj = dsobj; | |
599 dsl_dir_close(dd, FTAG); | |
600 | |
2199 | 601 return (dsobj); |
602 } | |
603 | |
604 struct destroyarg { | |
605 dsl_sync_task_group_t *dstg; | |
606 char *snapname; | |
607 void *tag; | |
608 char *failed; | |
609 }; | |
610 | |
611 static int | |
612 dsl_snapshot_destroy_one(char *name, void *arg) | |
613 { | |
614 struct destroyarg *da = arg; | |
615 dsl_dataset_t *ds; | |
616 char *cp; | |
617 int err; | |
618 | |
619 (void) strcat(name, "@"); | |
620 (void) strcat(name, da->snapname); | |
621 err = dsl_dataset_open(name, | |
622 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, | |
623 da->tag, &ds); | |
624 cp = strchr(name, '@'); | |
625 *cp = '\0'; | |
626 if (err == ENOENT) | |
627 return (0); | |
628 if (err) { | |
629 (void) strcpy(da->failed, name); | |
630 return (err); | |
631 } | |
632 | |
633 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, | |
634 dsl_dataset_destroy_sync, ds, da->tag, 0); | |
789 | 635 return (0); |
636 } | |
637 | |
2199 | 638 /* |
639 * Destroy 'snapname' in all descendants of 'fsname'. | |
640 */ | |
641 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy | |
642 int | |
643 dsl_snapshots_destroy(char *fsname, char *snapname) | |
644 { | |
645 int err; | |
646 struct destroyarg da; | |
647 dsl_sync_task_t *dst; | |
648 spa_t *spa; | |
649 char *cp; | |
650 | |
651 cp = strchr(fsname, '/'); | |
652 if (cp) { | |
653 *cp = '\0'; | |
654 err = spa_open(fsname, &spa, FTAG); | |
655 *cp = '/'; | |
656 } else { | |
657 err = spa_open(fsname, &spa, FTAG); | |
658 } | |
659 if (err) | |
660 return (err); | |
661 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); | |
662 da.snapname = snapname; | |
663 da.tag = FTAG; | |
664 da.failed = fsname; | |
665 | |
666 err = dmu_objset_find(fsname, | |
2417 | 667 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); |
2199 | 668 |
669 if (err == 0) | |
670 err = dsl_sync_task_group_wait(da.dstg); | |
671 | |
672 for (dst = list_head(&da.dstg->dstg_tasks); dst; | |
673 dst = list_next(&da.dstg->dstg_tasks, dst)) { | |
674 dsl_dataset_t *ds = dst->dst_arg1; | |
675 if (dst->dst_err) { | |
676 dsl_dataset_name(ds, fsname); | |
677 cp = strchr(fsname, '@'); | |
678 *cp = '\0'; | |
679 } | |
680 /* | |
681 * If it was successful, destroy_sync would have | |
682 * closed the ds | |
683 */ | |
684 if (err) | |
685 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
686 } | |
687 | |
688 dsl_sync_task_group_destroy(da.dstg); | |
689 spa_close(spa, FTAG); | |
690 return (err); | |
691 } | |
692 | |
789 | 693 int |
694 dsl_dataset_destroy(const char *name) | |
695 { | |
696 int err; | |
2199 | 697 dsl_sync_task_group_t *dstg; |
698 objset_t *os; | |
699 dsl_dataset_t *ds; | |
789 | 700 dsl_dir_t *dd; |
2199 | 701 uint64_t obj; |
702 | |
703 if (strchr(name, '@')) { | |
704 /* Destroying a snapshot is simpler */ | |
705 err = dsl_dataset_open(name, | |
706 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, | |
707 FTAG, &ds); | |
708 if (err) | |
709 return (err); | |
710 err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
711 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, | |
712 ds, FTAG, 0); | |
713 if (err) | |
714 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
715 return (err); | |
716 } | |
717 | |
718 err = dmu_objset_open(name, DMU_OST_ANY, | |
719 DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); | |
720 if (err) | |
721 return (err); | |
722 ds = os->os->os_dsl_dataset; | |
723 dd = ds->ds_dir; | |
789 | 724 |
2199 | 725 /* |
726 * Check for errors and mark this ds as inconsistent, in | |
727 * case we crash while freeing the objects. | |
728 */ | |
729 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, | |
730 dsl_dataset_destroy_begin_sync, ds, NULL, 0); | |
731 if (err) { | |
732 dmu_objset_close(os); | |
733 return (err); | |
734 } | |
735 | |
736 /* | |
737 * remove the objects in open context, so that we won't | |
738 * have too much to do in syncing context. | |
739 */ | |
740 for (obj = 0; err == 0; | |
741 err = dmu_object_next(os, &obj, FALSE)) { | |
742 dmu_tx_t *tx = dmu_tx_create(os); | |
743 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); | |
744 dmu_tx_hold_bonus(tx, obj); | |
745 err = dmu_tx_assign(tx, TXG_WAIT); | |
746 if (err) { | |
747 /* | |
748 * Perhaps there is not enough disk | |
749 * space. Just deal with it from | |
750 * dsl_dataset_destroy_sync(). | |
751 */ | |
752 dmu_tx_abort(tx); | |
753 continue; | |
754 } | |
755 VERIFY(0 == dmu_object_free(os, obj, tx)); | |
756 dmu_tx_commit(tx); | |
757 } | |
758 /* Make sure it's not dirty before we finish destroying it. */ | |
759 txg_wait_synced(dd->dd_pool, 0); | |
760 | |
761 dmu_objset_close(os); | |
762 if (err != ESRCH) | |
763 return (err); | |
764 | |
765 err = dsl_dataset_open(name, | |
766 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, | |
767 FTAG, &ds); | |
1544 | 768 if (err) |
769 return (err); | |
789 | 770 |
2199 | 771 err = dsl_dir_open(name, FTAG, &dd, NULL); |
772 if (err) { | |
773 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
774 return (err); | |
789 | 775 } |
776 | |
2199 | 777 /* |
778 * Blow away the dsl_dir + head dataset. | |
779 */ | |
780 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); | |
781 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, | |
782 dsl_dataset_destroy_sync, ds, FTAG, 0); | |
783 dsl_sync_task_create(dstg, dsl_dir_destroy_check, | |
784 dsl_dir_destroy_sync, dd, FTAG, 0); | |
785 err = dsl_sync_task_group_wait(dstg); | |
786 dsl_sync_task_group_destroy(dstg); | |
787 /* if it is successful, *destroy_sync will close the ds+dd */ | |
788 if (err) { | |
789 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
790 dsl_dir_close(dd, FTAG); | |
791 } | |
789 | 792 return (err); |
793 } | |
794 | |
795 int | |
2199 | 796 dsl_dataset_rollback(dsl_dataset_t *ds) |
789 | 797 { |
2199 | 798 ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); |
799 return (dsl_sync_task_do(ds->ds_dir->dd_pool, | |
800 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, | |
801 ds, NULL, 0)); | |
789 | 802 } |
803 | |
804 void * | |
805 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, | |
806 void *p, dsl_dataset_evict_func_t func) | |
807 { | |
808 void *old; | |
809 | |
810 mutex_enter(&ds->ds_lock); | |
811 old = ds->ds_user_ptr; | |
812 if (old == NULL) { | |
813 ds->ds_user_ptr = p; | |
814 ds->ds_user_evict_func = func; | |
815 } | |
816 mutex_exit(&ds->ds_lock); | |
817 return (old); | |
818 } | |
819 | |
820 void * | |
821 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) | |
822 { | |
823 return (ds->ds_user_ptr); | |
824 } | |
825 | |
826 | |
827 void | |
828 dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp) | |
829 { | |
830 *bp = ds->ds_phys->ds_bp; | |
831 } | |
832 | |
833 void | |
834 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
835 { | |
836 ASSERT(dmu_tx_is_syncing(tx)); | |
837 /* If it's the meta-objset, set dp_meta_rootbp */ | |
838 if (ds == NULL) { | |
839 tx->tx_pool->dp_meta_rootbp = *bp; | |
840 } else { | |
841 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
842 ds->ds_phys->ds_bp = *bp; | |
843 } | |
844 } | |
845 | |
846 spa_t * | |
847 dsl_dataset_get_spa(dsl_dataset_t *ds) | |
848 { | |
849 return (ds->ds_dir->dd_pool->dp_spa); | |
850 } | |
851 | |
852 void | |
853 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) | |
854 { | |
855 dsl_pool_t *dp; | |
856 | |
857 if (ds == NULL) /* this is the meta-objset */ | |
858 return; | |
859 | |
860 ASSERT(ds->ds_user_ptr != NULL); | |
861 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); | |
862 | |
863 dp = ds->ds_dir->dd_pool; | |
864 | |
865 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { | |
866 /* up the hold count until we can be written out */ | |
867 dmu_buf_add_ref(ds->ds_dbuf, ds); | |
868 } | |
869 } | |
870 | |
871 struct killarg { | |
872 uint64_t *usedp; | |
873 uint64_t *compressedp; | |
874 uint64_t *uncompressedp; | |
875 zio_t *zio; | |
876 dmu_tx_t *tx; | |
877 }; | |
878 | |
879 static int | |
880 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) | |
881 { | |
882 struct killarg *ka = arg; | |
883 blkptr_t *bp = &bc->bc_blkptr; | |
884 | |
885 ASSERT3U(bc->bc_errno, ==, 0); | |
886 | |
887 /* | |
888 * Since this callback is not called concurrently, no lock is | |
889 * needed on the accounting values. | |
890 */ | |
2082 | 891 *ka->usedp += bp_get_dasize(spa, bp); |
789 | 892 *ka->compressedp += BP_GET_PSIZE(bp); |
893 *ka->uncompressedp += BP_GET_UCSIZE(bp); | |
894 /* XXX check for EIO? */ | |
895 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, | |
896 ARC_NOWAIT); | |
897 return (0); | |
898 } | |
899 | |
900 /* ARGSUSED */ | |
2199 | 901 static int |
902 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
789 | 903 { |
2199 | 904 dsl_dataset_t *ds = arg1; |
789 | 905 |
2199 | 906 /* |
907 * There must be a previous snapshot. I suppose we could roll | |
908 * it back to being empty (and re-initialize the upper (ZPL) | |
909 * layer). But for now there's no way to do this via the user | |
910 * interface. | |
911 */ | |
912 if (ds->ds_phys->ds_prev_snap_txg == 0) | |
789 | 913 return (EINVAL); |
914 | |
2199 | 915 /* |
916 * This must not be a snapshot. | |
917 */ | |
918 if (ds->ds_phys->ds_next_snap_obj != 0) | |
919 return (EINVAL); | |
789 | 920 |
921 /* | |
922 * If we made changes this txg, traverse_dsl_dataset won't find | |
923 * them. Try again. | |
924 */ | |
2199 | 925 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) |
789 | 926 return (EAGAIN); |
2199 | 927 |
928 return (0); | |
929 } | |
789 | 930 |
2199 | 931 /* ARGSUSED */ |
932 static void | |
933 dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
934 { | |
935 dsl_dataset_t *ds = arg1; | |
936 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; | |
789 | 937 |
938 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
939 | |
940 /* Zero out the deadlist. */ | |
941 bplist_close(&ds->ds_deadlist); | |
942 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); | |
943 ds->ds_phys->ds_deadlist_obj = | |
944 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1544 | 945 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, |
946 ds->ds_phys->ds_deadlist_obj)); | |
789 | 947 |
948 { | |
949 /* Free blkptrs that we gave birth to */ | |
950 zio_t *zio; | |
951 uint64_t used = 0, compressed = 0, uncompressed = 0; | |
952 struct killarg ka; | |
953 | |
954 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, | |
955 ZIO_FLAG_MUSTSUCCEED); | |
956 ka.usedp = &used; | |
957 ka.compressedp = &compressed; | |
958 ka.uncompressedp = &uncompressed; | |
959 ka.zio = zio; | |
960 ka.tx = tx; | |
961 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, | |
962 ADVANCE_POST, kill_blkptr, &ka); | |
963 (void) zio_wait(zio); | |
964 | |
2199 | 965 dsl_dir_diduse_space(ds->ds_dir, |
789 | 966 -used, -compressed, -uncompressed, tx); |
967 } | |
968 | |
2199 | 969 /* Change our contents to that of the prev snapshot */ |
789 | 970 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); |
971 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; | |
972 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; | |
973 ds->ds_phys->ds_compressed_bytes = | |
974 ds->ds_prev->ds_phys->ds_compressed_bytes; | |
975 ds->ds_phys->ds_uncompressed_bytes = | |
976 ds->ds_prev->ds_phys->ds_uncompressed_bytes; | |
2082 | 977 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; |
789 | 978 ds->ds_phys->ds_unique_bytes = 0; |
979 | |
2532
752725c22841
6448999 panic: used == ds->ds_phys->ds_unique_bytes
ahrens
parents:
2417
diff
changeset
|
980 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { |
752725c22841
6448999 panic: used == ds->ds_phys->ds_unique_bytes
ahrens
parents:
2417
diff
changeset
|
981 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); |
752725c22841
6448999 panic: used == ds->ds_phys->ds_unique_bytes
ahrens
parents:
2417
diff
changeset
|
982 ds->ds_prev->ds_phys->ds_unique_bytes = 0; |
752725c22841
6448999 panic: used == ds->ds_phys->ds_unique_bytes
ahrens
parents:
2417
diff
changeset
|
983 } |
789 | 984 } |
985 | |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
986 /* ARGSUSED */ |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
987 static int |
2199 | 988 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
989 { |
2199 | 990 dsl_dataset_t *ds = arg1; |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
991 |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
992 /* |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
993 * Can't delete a head dataset if there are snapshots of it. |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
994 * (Except if the only snapshots are from the branch we cloned |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
995 * from.) |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
996 */ |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
997 if (ds->ds_prev != NULL && |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
998 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
999 return (EINVAL); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1000 |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1001 return (0); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1002 } |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1003 |
2199 | 1004 /* ARGSUSED */ |
1005 static void | |
1006 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
789 | 1007 { |
2199 | 1008 dsl_dataset_t *ds = arg1; |
789 | 1009 |
2199 | 1010 /* Mark it as inconsistent on-disk, in case we crash */ |
1011 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1012 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; | |
1013 } | |
789 | 1014 |
2199 | 1015 /* ARGSUSED */ |
1016 static int | |
1017 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
1018 { | |
1019 dsl_dataset_t *ds = arg1; | |
789 | 1020 |
1021 /* Can't delete a branch point. */ | |
2199 | 1022 if (ds->ds_phys->ds_num_children > 1) |
1023 return (EEXIST); | |
789 | 1024 |
1025 /* | |
1026 * Can't delete a head dataset if there are snapshots of it. | |
1027 * (Except if the only snapshots are from the branch we cloned | |
1028 * from.) | |
1029 */ | |
1030 if (ds->ds_prev != NULL && | |
2199 | 1031 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) |
789 | 1032 return (EINVAL); |
1033 | |
1034 /* | |
1035 * If we made changes this txg, traverse_dsl_dataset won't find | |
1036 * them. Try again. | |
1037 */ | |
2199 | 1038 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) |
789 | 1039 return (EAGAIN); |
2199 | 1040 |
1041 /* XXX we should do some i/o error checking... */ | |
1042 return (0); | |
1043 } | |
1044 | |
1045 static void | |
1046 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) | |
1047 { | |
1048 dsl_dataset_t *ds = arg1; | |
1049 uint64_t used = 0, compressed = 0, uncompressed = 0; | |
1050 zio_t *zio; | |
1051 int err; | |
1052 int after_branch_point = FALSE; | |
1053 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
1054 objset_t *mos = dp->dp_meta_objset; | |
1055 dsl_dataset_t *ds_prev = NULL; | |
1056 uint64_t obj; | |
1057 | |
1058 ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); | |
1059 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); | |
1060 ASSERT(ds->ds_prev == NULL || | |
1061 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); | |
1062 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); | |
1063 | |
1064 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); | |
1065 | |
1066 obj = ds->ds_object; | |
789 | 1067 |
1068 if (ds->ds_phys->ds_prev_snap_obj != 0) { | |
1069 if (ds->ds_prev) { | |
1070 ds_prev = ds->ds_prev; | |
1071 } else { | |
2199 | 1072 VERIFY(0 == dsl_dataset_open_obj(dp, |
789 | 1073 ds->ds_phys->ds_prev_snap_obj, NULL, |
2199 | 1074 DS_MODE_NONE, FTAG, &ds_prev)); |
789 | 1075 } |
1076 after_branch_point = | |
1077 (ds_prev->ds_phys->ds_next_snap_obj != obj); | |
1078 | |
1079 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); | |
1080 if (after_branch_point && | |
1081 ds->ds_phys->ds_next_snap_obj == 0) { | |
1082 /* This clone is toast. */ | |
1083 ASSERT(ds_prev->ds_phys->ds_num_children > 1); | |
1084 ds_prev->ds_phys->ds_num_children--; | |
1085 } else if (!after_branch_point) { | |
1086 ds_prev->ds_phys->ds_next_snap_obj = | |
1087 ds->ds_phys->ds_next_snap_obj; | |
1088 } | |
1089 } | |
1090 | |
1091 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); | |
1092 | |
1093 if (ds->ds_phys->ds_next_snap_obj != 0) { | |
2199 | 1094 blkptr_t bp; |
789 | 1095 dsl_dataset_t *ds_next; |
1096 uint64_t itor = 0; | |
1097 | |
1098 spa_scrub_restart(dp->dp_spa, tx->tx_txg); | |
1099 | |
2199 | 1100 VERIFY(0 == dsl_dataset_open_obj(dp, |
1544 | 1101 ds->ds_phys->ds_next_snap_obj, NULL, |
1102 DS_MODE_NONE, FTAG, &ds_next)); | |
789 | 1103 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); |
1104 | |
1105 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); | |
1106 ds_next->ds_phys->ds_prev_snap_obj = | |
1107 ds->ds_phys->ds_prev_snap_obj; | |
1108 ds_next->ds_phys->ds_prev_snap_txg = | |
1109 ds->ds_phys->ds_prev_snap_txg; | |
1110 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, | |
1111 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); | |
1112 | |
1113 /* | |
1114 * Transfer to our deadlist (which will become next's | |
1115 * new deadlist) any entries from next's current | |
1116 * deadlist which were born before prev, and free the | |
1117 * other entries. | |
1118 * | |
1119 * XXX we're doing this long task with the config lock held | |
1120 */ | |
1121 while (bplist_iterate(&ds_next->ds_deadlist, &itor, | |
1122 &bp) == 0) { | |
1123 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { | |
1544 | 1124 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, |
1125 &bp, tx)); | |
789 | 1126 if (ds_prev && !after_branch_point && |
1127 bp.blk_birth > | |
1128 ds_prev->ds_phys->ds_prev_snap_txg) { | |
1129 ds_prev->ds_phys->ds_unique_bytes += | |
2082 | 1130 bp_get_dasize(dp->dp_spa, &bp); |
789 | 1131 } |
1132 } else { | |
2082 | 1133 used += bp_get_dasize(dp->dp_spa, &bp); |
789 | 1134 compressed += BP_GET_PSIZE(&bp); |
1135 uncompressed += BP_GET_UCSIZE(&bp); | |
1136 /* XXX check return value? */ | |
1137 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, | |
1138 &bp, NULL, NULL, ARC_NOWAIT); | |
1139 } | |
1140 } | |
1141 | |
1142 /* free next's deadlist */ | |
1143 bplist_close(&ds_next->ds_deadlist); | |
1144 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); | |
1145 | |
1146 /* set next's deadlist to our deadlist */ | |
1147 ds_next->ds_phys->ds_deadlist_obj = | |
1148 ds->ds_phys->ds_deadlist_obj; | |
1544 | 1149 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, |
1150 ds_next->ds_phys->ds_deadlist_obj)); | |
789 | 1151 ds->ds_phys->ds_deadlist_obj = 0; |
1152 | |
1153 if (ds_next->ds_phys->ds_next_snap_obj != 0) { | |
1154 /* | |
1155 * Update next's unique to include blocks which | |
1156 * were previously shared by only this snapshot | |
1157 * and it. Those blocks will be born after the | |
1158 * prev snap and before this snap, and will have | |
1159 * died after the next snap and before the one | |
1160 * after that (ie. be on the snap after next's | |
1161 * deadlist). | |
1162 * | |
1163 * XXX we're doing this long task with the | |
1164 * config lock held | |
1165 */ | |
1166 dsl_dataset_t *ds_after_next; | |
1167 | |
2199 | 1168 VERIFY(0 == dsl_dataset_open_obj(dp, |
789 | 1169 ds_next->ds_phys->ds_next_snap_obj, NULL, |
1544 | 1170 DS_MODE_NONE, FTAG, &ds_after_next)); |
789 | 1171 itor = 0; |
1172 while (bplist_iterate(&ds_after_next->ds_deadlist, | |
1173 &itor, &bp) == 0) { | |
1174 if (bp.blk_birth > | |
1175 ds->ds_phys->ds_prev_snap_txg && | |
1176 bp.blk_birth <= | |
1177 ds->ds_phys->ds_creation_txg) { | |
1178 ds_next->ds_phys->ds_unique_bytes += | |
2082 | 1179 bp_get_dasize(dp->dp_spa, &bp); |
789 | 1180 } |
1181 } | |
1182 | |
1183 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); | |
1184 ASSERT3P(ds_next->ds_prev, ==, NULL); | |
1185 } else { | |
1186 /* | |
1187 * It would be nice to update the head dataset's | |
1188 * unique. To do so we would have to traverse | |
1189 * it for blocks born after ds_prev, which is | |
1190 * pretty expensive just to maintain something | |
1191 * for debugging purposes. | |
1192 */ | |
1193 ASSERT3P(ds_next->ds_prev, ==, ds); | |
1194 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, | |
1195 ds_next); | |
1196 if (ds_prev) { | |
2199 | 1197 VERIFY(0 == dsl_dataset_open_obj(dp, |
1544 | 1198 ds->ds_phys->ds_prev_snap_obj, NULL, |
1199 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); | |
789 | 1200 } else { |
1201 ds_next->ds_prev = NULL; | |
1202 } | |
1203 } | |
1204 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); | |
1205 | |
1206 /* | |
1207 * NB: unique_bytes is not accurate for head objsets | |
1208 * because we don't update it when we delete the most | |
1209 * recent snapshot -- see above comment. | |
1210 */ | |
1211 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); | |
1212 } else { | |
1213 /* | |
1214 * There's no next snapshot, so this is a head dataset. | |
1215 * Destroy the deadlist. Unless it's a clone, the | |
1216 * deadlist should be empty. (If it's a clone, it's | |
1217 * safe to ignore the deadlist contents.) | |
1218 */ | |
1219 struct killarg ka; | |
1220 | |
1221 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); | |
1222 bplist_close(&ds->ds_deadlist); | |
1223 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); | |
1224 ds->ds_phys->ds_deadlist_obj = 0; | |
1225 | |
1226 /* | |
1227 * Free everything that we point to (that's born after | |
1228 * the previous snapshot, if we are a clone) | |
1229 * | |
1230 * XXX we're doing this long task with the config lock held | |
1231 */ | |
1232 ka.usedp = &used; | |
1233 ka.compressedp = &compressed; | |
1234 ka.uncompressedp = &uncompressed; | |
1235 ka.zio = zio; | |
1236 ka.tx = tx; | |
1237 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, | |
1238 ADVANCE_POST, kill_blkptr, &ka); | |
1239 ASSERT3U(err, ==, 0); | |
1240 } | |
1241 | |
1242 err = zio_wait(zio); | |
1243 ASSERT3U(err, ==, 0); | |
1244 | |
2199 | 1245 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); |
789 | 1246 |
1247 if (ds->ds_phys->ds_snapnames_zapobj) { | |
1248 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); | |
1249 ASSERT(err == 0); | |
1250 } | |
1251 | |
2199 | 1252 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { |
789 | 1253 /* Erase the link in the dataset */ |
2199 | 1254 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); |
1255 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; | |
789 | 1256 /* |
1257 * dsl_dir_sync_destroy() called us, they'll destroy | |
1258 * the dataset. | |
1259 */ | |
1260 } else { | |
1261 /* remove from snapshot namespace */ | |
1262 dsl_dataset_t *ds_head; | |
2199 | 1263 VERIFY(0 == dsl_dataset_open_obj(dp, |
1264 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, | |
1544 | 1265 DS_MODE_NONE, FTAG, &ds_head)); |
2207
47efcb3433a7
6439370 assertion failures possible in dsl_dataset_destroy_sync()
ahrens
parents:
2199
diff
changeset
|
1266 VERIFY(0 == dsl_dataset_get_snapname(ds)); |
789 | 1267 #ifdef ZFS_DEBUG |
1268 { | |
1269 uint64_t val; | |
1270 err = zap_lookup(mos, | |
1271 ds_head->ds_phys->ds_snapnames_zapobj, | |
2199 | 1272 ds->ds_snapname, 8, 1, &val); |
789 | 1273 ASSERT3U(err, ==, 0); |
1274 ASSERT3U(val, ==, obj); | |
1275 } | |
1276 #endif | |
1277 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, | |
2199 | 1278 ds->ds_snapname, tx); |
789 | 1279 ASSERT(err == 0); |
1280 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); | |
1281 } | |
1282 | |
1283 if (ds_prev && ds->ds_prev != ds_prev) | |
1284 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); | |
1285 | |
2199 | 1286 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); |
1287 VERIFY(0 == dmu_object_free(mos, obj, tx)); | |
1288 } | |
1289 | |
1290 /* ARGSUSED */ | |
1291 int | |
1292 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
1293 { | |
1294 objset_t *os = arg1; | |
1295 dsl_dataset_t *ds = os->os->os_dsl_dataset; | |
1296 const char *snapname = arg2; | |
1297 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; | |
1298 int err; | |
1299 uint64_t value; | |
789 | 1300 |
1301 /* | |
2199 | 1302 * We don't allow multiple snapshots of the same txg. If there |
1303 * is already one, try again. | |
1304 */ | |
1305 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) | |
1306 return (EAGAIN); | |
1307 | |
1308 /* | |
1309 * Check for conflicting name snapshot name. | |
789 | 1310 */ |
2199 | 1311 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, |
1312 snapname, 8, 1, &value); | |
1313 if (err == 0) | |
1314 return (EEXIST); | |
1315 if (err != ENOENT) | |
1316 return (err); | |
789 | 1317 |
2199 | 1318 ds->ds_trysnap_txg = tx->tx_txg; |
789 | 1319 return (0); |
1320 } | |
1321 | |
2199 | 1322 void |
1323 dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
789 | 1324 { |
2199 | 1325 objset_t *os = arg1; |
1326 dsl_dataset_t *ds = os->os->os_dsl_dataset; | |
1327 const char *snapname = arg2; | |
1328 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
789 | 1329 dmu_buf_t *dbuf; |
1330 dsl_dataset_phys_t *dsphys; | |
2199 | 1331 uint64_t dsobj; |
789 | 1332 objset_t *mos = dp->dp_meta_objset; |
1333 int err; | |
1334 | |
1335 spa_scrub_restart(dp->dp_spa, tx->tx_txg); | |
2199 | 1336 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); |
789 | 1337 |
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
1338 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
1339 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); |
1544 | 1340 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); |
789 | 1341 dmu_buf_will_dirty(dbuf, tx); |
1342 dsphys = dbuf->db_data; | |
2199 | 1343 dsphys->ds_dir_obj = ds->ds_dir->dd_object; |
789 | 1344 dsphys->ds_fsid_guid = unique_create(); |
1345 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ | |
1346 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, | |
1347 sizeof (dsphys->ds_guid)); | |
1348 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; | |
1349 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; | |
1350 dsphys->ds_next_snap_obj = ds->ds_object; | |
1351 dsphys->ds_num_children = 1; | |
1352 dsphys->ds_creation_time = gethrestime_sec(); | |
1353 dsphys->ds_creation_txg = tx->tx_txg; | |
1354 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; | |
1355 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; | |
1356 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; | |
1357 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; | |
2082 | 1358 dsphys->ds_flags = ds->ds_phys->ds_flags; |
789 | 1359 dsphys->ds_bp = ds->ds_phys->ds_bp; |
1544 | 1360 dmu_buf_rele(dbuf, FTAG); |
789 | 1361 |
2199 | 1362 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); |
1363 if (ds->ds_prev) { | |
1364 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == | |
789 | 1365 ds->ds_object || |
2199 | 1366 ds->ds_prev->ds_phys->ds_num_children > 1); |
1367 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { | |
1368 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); | |
789 | 1369 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, |
2199 | 1370 ds->ds_prev->ds_phys->ds_creation_txg); |
1371 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; | |
789 | 1372 } |
1373 } | |
1374 | |
1375 bplist_close(&ds->ds_deadlist); | |
1376 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1377 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); | |
1378 ds->ds_phys->ds_prev_snap_obj = dsobj; | |
1379 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; | |
1380 ds->ds_phys->ds_unique_bytes = 0; | |
1381 ds->ds_phys->ds_deadlist_obj = | |
1382 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1544 | 1383 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, |
1384 ds->ds_phys->ds_deadlist_obj)); | |
789 | 1385 |
1386 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); | |
1387 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, | |
1388 snapname, 8, 1, &dsobj, tx); | |
1389 ASSERT(err == 0); | |
1390 | |
1391 if (ds->ds_prev) | |
1392 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); | |
1544 | 1393 VERIFY(0 == dsl_dataset_open_obj(dp, |
1394 ds->ds_phys->ds_prev_snap_obj, snapname, | |
1395 DS_MODE_NONE, ds, &ds->ds_prev)); | |
789 | 1396 } |
1397 | |
1398 void | |
1399 dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx) | |
1400 { | |
1401 ASSERT(dmu_tx_is_syncing(tx)); | |
1402 ASSERT(ds->ds_user_ptr != NULL); | |
1403 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); | |
1404 | |
1405 dmu_objset_sync(ds->ds_user_ptr, tx); | |
1406 dsl_dir_dirty(ds->ds_dir, tx); | |
1407 bplist_close(&ds->ds_deadlist); | |
1408 | |
1544 | 1409 dmu_buf_rele(ds->ds_dbuf, ds); |
789 | 1410 } |
1411 | |
1412 void | |
1413 dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds) | |
1414 { | |
1415 /* fill in properties crap */ | |
1416 dsl_dir_stats(ds->ds_dir, dds); | |
1417 | |
1418 if (ds->ds_phys->ds_num_children != 0) { | |
1419 dds->dds_is_snapshot = TRUE; | |
1420 dds->dds_num_clones = ds->ds_phys->ds_num_children - 1; | |
1421 } | |
1422 | |
2082 | 1423 dds->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; |
789 | 1424 dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth; |
1425 | |
1426 dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill; | |
1427 dds->dds_objects_avail = DN_MAX_OBJECT - dds->dds_objects_used; | |
1428 | |
1429 /* We override the dataset's creation time... they should be the same */ | |
1430 dds->dds_creation_time = ds->ds_phys->ds_creation_time; | |
1431 dds->dds_creation_txg = ds->ds_phys->ds_creation_txg; | |
1432 dds->dds_space_refd = ds->ds_phys->ds_used_bytes; | |
1433 dds->dds_fsid_guid = ds->ds_phys->ds_fsid_guid; | |
1434 | |
1435 if (ds->ds_phys->ds_next_snap_obj) { | |
1436 /* | |
1437 * This is a snapshot; override the dd's space used with | |
1438 * our unique space | |
1439 */ | |
1440 dds->dds_space_used = ds->ds_phys->ds_unique_bytes; | |
1441 dds->dds_compressed_bytes = | |
1442 ds->ds_phys->ds_compressed_bytes; | |
1443 dds->dds_uncompressed_bytes = | |
1444 ds->ds_phys->ds_uncompressed_bytes; | |
1445 } | |
1446 } | |
1447 | |
1448 dsl_pool_t * | |
1449 dsl_dataset_pool(dsl_dataset_t *ds) | |
1450 { | |
1451 return (ds->ds_dir->dd_pool); | |
1452 } | |
1453 | |
2199 | 1454 /* ARGSUSED */ |
789 | 1455 static int |
2199 | 1456 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) |
789 | 1457 { |
2199 | 1458 dsl_dataset_t *ds = arg1; |
1459 char *newsnapname = arg2; | |
1460 dsl_dir_t *dd = ds->ds_dir; | |
789 | 1461 objset_t *mos = dd->dd_pool->dp_meta_objset; |
2199 | 1462 dsl_dataset_t *hds; |
1463 uint64_t val; | |
789 | 1464 int err; |
1465 | |
2199 | 1466 err = dsl_dataset_open_obj(dd->dd_pool, |
1467 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); | |
789 | 1468 if (err) |
1469 return (err); | |
1470 | |
2199 | 1471 /* new name better not be in use */ |
1472 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, | |
1473 newsnapname, 8, 1, &val); | |
1474 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); | |
789 | 1475 |
2199 | 1476 if (err == 0) |
1477 err = EEXIST; | |
1478 else if (err == ENOENT) | |
1479 err = 0; | |
1480 return (err); | |
1481 } | |
789 | 1482 |
2199 | 1483 static void |
1484 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
1485 { | |
1486 dsl_dataset_t *ds = arg1; | |
1487 char *newsnapname = arg2; | |
1488 dsl_dir_t *dd = ds->ds_dir; | |
1489 objset_t *mos = dd->dd_pool->dp_meta_objset; | |
1490 dsl_dataset_t *hds; | |
1491 int err; | |
789 | 1492 |
2199 | 1493 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); |
789 | 1494 |
2199 | 1495 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, |
1496 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); | |
789 | 1497 |
2199 | 1498 VERIFY(0 == dsl_dataset_get_snapname(ds)); |
1499 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, | |
1500 ds->ds_snapname, tx); | |
789 | 1501 ASSERT3U(err, ==, 0); |
2199 | 1502 mutex_enter(&ds->ds_lock); |
1503 (void) strcpy(ds->ds_snapname, newsnapname); | |
1504 mutex_exit(&ds->ds_lock); | |
1505 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, | |
1506 ds->ds_snapname, 8, 1, &ds->ds_object, tx); | |
789 | 1507 ASSERT3U(err, ==, 0); |
1508 | |
2199 | 1509 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); |
789 | 1510 } |
1511 | |
1512 #pragma weak dmu_objset_rename = dsl_dataset_rename | |
1513 int | |
2199 | 1514 dsl_dataset_rename(const char *oldname, const char *newname) |
789 | 1515 { |
1516 dsl_dir_t *dd; | |
2199 | 1517 dsl_dataset_t *ds; |
789 | 1518 const char *tail; |
1519 int err; | |
1520 | |
2199 | 1521 err = dsl_dir_open(oldname, FTAG, &dd, &tail); |
1544 | 1522 if (err) |
1523 return (err); | |
789 | 1524 if (tail == NULL) { |
2199 | 1525 err = dsl_dir_rename(dd, newname); |
789 | 1526 dsl_dir_close(dd, FTAG); |
1527 return (err); | |
1528 } | |
1529 if (tail[0] != '@') { | |
1530 /* the name ended in a nonexistant component */ | |
1531 dsl_dir_close(dd, FTAG); | |
1532 return (ENOENT); | |
1533 } | |
1534 | |
2199 | 1535 dsl_dir_close(dd, FTAG); |
1536 | |
1537 /* new name must be snapshot in same filesystem */ | |
1538 tail = strchr(newname, '@'); | |
1539 if (tail == NULL) | |
1540 return (EINVAL); | |
1541 tail++; | |
1542 if (strncmp(oldname, newname, tail - newname) != 0) | |
1543 return (EXDEV); | |
789 | 1544 |
2199 | 1545 err = dsl_dataset_open(oldname, |
1546 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); | |
1547 if (err) | |
1548 return (err); | |
1549 | |
1550 err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
1551 dsl_dataset_snapshot_rename_check, | |
1552 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); | |
1553 | |
1554 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); | |
1555 | |
789 | 1556 return (err); |
1557 } | |
2082 | 1558 |
2199 | 1559 struct promotearg { |
1560 uint64_t used, comp, uncomp, unique; | |
1561 uint64_t newnext_obj, snapnames_obj; | |
1562 }; | |
1563 | |
2082 | 1564 static int |
2199 | 1565 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) |
2082 | 1566 { |
2199 | 1567 dsl_dataset_t *hds = arg1; |
1568 struct promotearg *pa = arg2; | |
1569 dsl_dir_t *dd = hds->ds_dir; | |
1570 dsl_pool_t *dp = hds->ds_dir->dd_pool; | |
2082 | 1571 dsl_dir_t *pdd = NULL; |
1572 dsl_dataset_t *ds = NULL; | |
1573 dsl_dataset_t *pivot_ds = NULL; | |
1574 dsl_dataset_t *newnext_ds = NULL; | |
1575 int err; | |
1576 char *name = NULL; | |
2199 | 1577 uint64_t itor = 0; |
2082 | 1578 blkptr_t bp; |
1579 | |
2199 | 1580 bzero(pa, sizeof (*pa)); |
1581 | |
2082 | 1582 /* Check that it is a clone */ |
1583 if (dd->dd_phys->dd_clone_parent_obj == 0) | |
1584 return (EINVAL); | |
1585 | |
2199 | 1586 /* Since this is so expensive, don't do the preliminary check */ |
1587 if (!dmu_tx_is_syncing(tx)) | |
1588 return (0); | |
1589 | |
1590 if (err = dsl_dataset_open_obj(dp, | |
2082 | 1591 dd->dd_phys->dd_clone_parent_obj, |
1592 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) | |
1593 goto out; | |
1594 pdd = pivot_ds->ds_dir; | |
2199 | 1595 |
1596 { | |
1597 dsl_dataset_t *phds; | |
1598 if (err = dsl_dataset_open_obj(dd->dd_pool, | |
1599 pdd->dd_phys->dd_head_dataset_obj, | |
1600 NULL, DS_MODE_NONE, FTAG, &phds)) | |
1601 goto out; | |
1602 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; | |
1603 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); | |
1604 } | |
2082 | 1605 |
1606 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { | |
1607 err = EXDEV; | |
1608 goto out; | |
1609 } | |
1610 | |
1611 /* find pivot point's new next ds */ | |
1612 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, | |
1613 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); | |
1614 while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { | |
1615 dsl_dataset_t *prev; | |
1616 | |
1617 if (err = dsl_dataset_open_obj(dd->dd_pool, | |
2199 | 1618 newnext_ds->ds_phys->ds_prev_snap_obj, |
1619 NULL, DS_MODE_NONE, FTAG, &prev)) | |
2082 | 1620 goto out; |
1621 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); | |
1622 newnext_ds = prev; | |
1623 } | |
2199 | 1624 pa->newnext_obj = newnext_ds->ds_object; |
2082 | 1625 |
1626 /* compute pivot point's new unique space */ | |
1627 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, | |
1628 &itor, &bp)) == 0) { | |
1629 if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) | |
2199 | 1630 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); |
2082 | 1631 } |
1632 if (err != ENOENT) | |
1633 goto out; | |
1634 | |
1635 /* Walk the snapshots that we are moving */ | |
1636 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); | |
1637 ds = pivot_ds; | |
1638 /* CONSTCOND */ | |
1639 while (TRUE) { | |
1640 uint64_t val, dlused, dlcomp, dluncomp; | |
1641 dsl_dataset_t *prev; | |
1642 | |
1643 /* Check that the snapshot name does not conflict */ | |
1644 dsl_dataset_name(ds, name); | |
1645 err = zap_lookup(dd->dd_pool->dp_meta_objset, | |
1646 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, | |
1647 8, 1, &val); | |
1648 if (err != ENOENT) { | |
1649 if (err == 0) | |
1650 err = EEXIST; | |
1651 goto out; | |
1652 } | |
1653 | |
1654 /* | |
1655 * compute space to transfer. Each snapshot gave birth to: | |
1656 * (my used) - (prev's used) + (deadlist's used) | |
1657 */ | |
2199 | 1658 pa->used += ds->ds_phys->ds_used_bytes; |
1659 pa->comp += ds->ds_phys->ds_compressed_bytes; | |
1660 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; | |
2082 | 1661 |
1662 /* If we reach the first snapshot, we're done. */ | |
1663 if (ds->ds_phys->ds_prev_snap_obj == 0) | |
1664 break; | |
1665 | |
1666 if (err = bplist_space(&ds->ds_deadlist, | |
1667 &dlused, &dlcomp, &dluncomp)) | |
1668 goto out; | |
1669 if (err = dsl_dataset_open_obj(dd->dd_pool, | |
1670 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, | |
1671 FTAG, &prev)) | |
1672 goto out; | |
2199 | 1673 pa->used += dlused - prev->ds_phys->ds_used_bytes; |
1674 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; | |
1675 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; | |
2082 | 1676 |
1677 /* | |
1678 * We could be a clone of a clone. If we reach our | |
1679 * parent's branch point, we're done. | |
1680 */ | |
1681 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { | |
1682 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); | |
1683 break; | |
1684 } | |
1685 if (ds != pivot_ds) | |
1686 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
1687 ds = prev; | |
1688 } | |
1689 | |
1690 /* Check that there is enough space here */ | |
2199 | 1691 err = dsl_dir_transfer_possible(pdd, dd, pa->used); |
1692 | |
1693 out: | |
1694 if (ds && ds != pivot_ds) | |
1695 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
1696 if (pivot_ds) | |
1697 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); | |
1698 if (newnext_ds) | |
1699 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); | |
1700 if (name) | |
1701 kmem_free(name, MAXPATHLEN); | |
1702 return (err); | |
1703 } | |
2082 | 1704 |
2199 | 1705 static void |
1706 dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
1707 { | |
1708 dsl_dataset_t *hds = arg1; | |
1709 struct promotearg *pa = arg2; | |
1710 dsl_dir_t *dd = hds->ds_dir; | |
1711 dsl_pool_t *dp = hds->ds_dir->dd_pool; | |
1712 dsl_dir_t *pdd = NULL; | |
1713 dsl_dataset_t *ds, *pivot_ds; | |
1714 char *name; | |
1715 | |
1716 ASSERT(dd->dd_phys->dd_clone_parent_obj != 0); | |
1717 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); | |
1718 | |
1719 VERIFY(0 == dsl_dataset_open_obj(dp, | |
1720 dd->dd_phys->dd_clone_parent_obj, | |
1721 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)); | |
2417 | 1722 /* |
1723 * We need to explicitly open pdd, since pivot_ds's pdd will be | |
1724 * changing. | |
1725 */ | |
1726 VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object, | |
1727 NULL, FTAG, &pdd)); | |
2082 | 1728 |
1729 /* move snapshots to this dir */ | |
2199 | 1730 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); |
2082 | 1731 ds = pivot_ds; |
1732 /* CONSTCOND */ | |
1733 while (TRUE) { | |
1734 dsl_dataset_t *prev; | |
1735 | |
1736 /* move snap name entry */ | |
1737 dsl_dataset_name(ds, name); | |
2199 | 1738 VERIFY(0 == zap_remove(dp->dp_meta_objset, |
1739 pa->snapnames_obj, ds->ds_snapname, tx)); | |
1740 VERIFY(0 == zap_add(dp->dp_meta_objset, | |
2082 | 1741 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, |
1742 8, 1, &ds->ds_object, tx)); | |
1743 | |
1744 /* change containing dsl_dir */ | |
1745 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1746 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); | |
1747 ds->ds_phys->ds_dir_obj = dd->dd_object; | |
1748 ASSERT3P(ds->ds_dir, ==, pdd); | |
1749 dsl_dir_close(ds->ds_dir, ds); | |
2199 | 1750 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, |
2082 | 1751 NULL, ds, &ds->ds_dir)); |
1752 | |
1753 ASSERT3U(dsl_prop_numcb(ds), ==, 0); | |
1754 | |
1755 if (ds->ds_phys->ds_prev_snap_obj == 0) | |
1756 break; | |
1757 | |
2199 | 1758 VERIFY(0 == dsl_dataset_open_obj(dp, |
2082 | 1759 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, |
1760 FTAG, &prev)); | |
1761 | |
1762 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { | |
1763 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); | |
1764 break; | |
1765 } | |
1766 if (ds != pivot_ds) | |
1767 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
1768 ds = prev; | |
1769 } | |
2199 | 1770 if (ds != pivot_ds) |
1771 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
2082 | 1772 |
1773 /* change pivot point's next snap */ | |
1774 dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); | |
2199 | 1775 pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; |
2082 | 1776 |
1777 /* change clone_parent-age */ | |
1778 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
1779 ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); | |
1780 dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; | |
1781 dmu_buf_will_dirty(pdd->dd_dbuf, tx); | |
1782 pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; | |
1783 | |
1784 /* change space accounting */ | |
2199 | 1785 dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx); |
1786 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); | |
1787 pivot_ds->ds_phys->ds_unique_bytes = pa->unique; | |
2082 | 1788 |
2417 | 1789 dsl_dir_close(pdd, FTAG); |
2199 | 1790 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); |
1791 kmem_free(name, MAXPATHLEN); | |
2082 | 1792 } |
1793 | |
1794 int | |
1795 dsl_dataset_promote(const char *name) | |
1796 { | |
1797 dsl_dataset_t *ds; | |
1798 int err; | |
1799 dmu_object_info_t doi; | |
2199 | 1800 struct promotearg pa; |
2082 | 1801 |
1802 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); | |
1803 if (err) | |
1804 return (err); | |
1805 | |
1806 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, | |
1807 ds->ds_phys->ds_snapnames_zapobj, &doi); | |
1808 if (err) { | |
1809 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
1810 return (err); | |
1811 } | |
1812 | |
1813 /* | |
1814 * Add in 128x the snapnames zapobj size, since we will be moving | |
1815 * a bunch of snapnames to the promoted ds, and dirtying their | |
1816 * bonus buffers. | |
1817 */ | |
2199 | 1818 err = dsl_sync_task_do(ds->ds_dir->dd_pool, |
1819 dsl_dataset_promote_check, | |
1820 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); | |
2082 | 1821 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); |
1822 return (err); | |
1823 } |