Mercurial > illumos > illumos-gate
annotate usr/src/uts/common/fs/zfs/dsl_dataset.c @ 2885:c0259887ebbc
6460059 zfs destroy <snapshot> leaves behind kruft
6463788 'zfs recv -d' fails if some ancestors already exist
6464897 assertion failed: "BP_GET_COMPRESS(bp) == compress" zio.c, line:897
6472843 panic when write to zvol snapshot
6475506 panic in dmu_recvbackup due to NULL pointer dereference
6475942 need more assertions in dnode_destroy()
6477102 recvbackup ioctl does not advance file offset
6477103 read-only properties should be passed as nvlist
6477900 want more /etc/system tunables for ZFS performance analysis
6479497 ::abuf_find is broken
author | ahrens |
---|---|
date | Mon, 09 Oct 2006 10:56:01 -0700 |
parents | 6f4d5ee1906a |
children | 4e5ee8301d84 |
rev | line source |
---|---|
789 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
1544 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
789 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
21 /* | |
1544 | 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 23 * Use is subject to license terms. |
24 */ | |
25 | |
26 #pragma ident "%Z%%M% %I% %E% SMI" | |
27 | |
28 #include <sys/dmu_objset.h> | |
29 #include <sys/dsl_dataset.h> | |
30 #include <sys/dsl_dir.h> | |
2082 | 31 #include <sys/dsl_prop.h> |
2199 | 32 #include <sys/dsl_synctask.h> |
789 | 33 #include <sys/dmu_traverse.h> |
34 #include <sys/dmu_tx.h> | |
35 #include <sys/arc.h> | |
36 #include <sys/zio.h> | |
37 #include <sys/zap.h> | |
38 #include <sys/unique.h> | |
39 #include <sys/zfs_context.h> | |
40 | |
2199 | 41 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; |
42 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; | |
43 static dsl_checkfunc_t dsl_dataset_rollback_check; | |
44 static dsl_syncfunc_t dsl_dataset_rollback_sync; | |
45 static dsl_checkfunc_t dsl_dataset_destroy_check; | |
46 static dsl_syncfunc_t dsl_dataset_destroy_sync; | |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
47 |
789 | 48 #define DOS_REF_MAX (1ULL << 62) |
49 | |
50 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE | |
51 | |
52 /* | |
53 * We use weighted reference counts to express the various forms of exclusion | |
54 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open | |
55 * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. | |
56 * This makes the exclusion logic simple: the total refcnt for all opens cannot | |
57 * exceed DOS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their | |
58 * weight (DOS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume | |
59 * just over half of the refcnt space, so there can't be more than one, but it | |
60 * can peacefully coexist with any number of STANDARD opens. | |
61 */ | |
62 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { | |
63 0, /* DOS_MODE_NONE - invalid */ | |
64 1, /* DOS_MODE_STANDARD - unlimited number */ | |
65 (DOS_REF_MAX >> 1) + 1, /* DOS_MODE_PRIMARY - only one of these */ | |
66 DOS_REF_MAX /* DOS_MODE_EXCLUSIVE - no other opens */ | |
67 }; | |
68 | |
69 | |
70 void | |
71 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
72 { | |
2082 | 73 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); |
789 | 74 int compressed = BP_GET_PSIZE(bp); |
75 int uncompressed = BP_GET_UCSIZE(bp); | |
76 | |
77 dprintf_bp(bp, "born, ds=%p\n", ds); | |
78 | |
79 ASSERT(dmu_tx_is_syncing(tx)); | |
80 /* It could have been compressed away to nothing */ | |
81 if (BP_IS_HOLE(bp)) | |
82 return; | |
83 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); | |
84 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); | |
85 if (ds == NULL) { | |
86 /* | |
87 * Account for the meta-objset space in its placeholder | |
88 * dsl_dir. | |
89 */ | |
90 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ | |
91 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, | |
92 used, compressed, uncompressed, tx); | |
93 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); | |
94 return; | |
95 } | |
96 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
97 mutex_enter(&ds->ds_lock); | |
98 ds->ds_phys->ds_used_bytes += used; | |
99 ds->ds_phys->ds_compressed_bytes += compressed; | |
100 ds->ds_phys->ds_uncompressed_bytes += uncompressed; | |
101 ds->ds_phys->ds_unique_bytes += used; | |
102 mutex_exit(&ds->ds_lock); | |
103 dsl_dir_diduse_space(ds->ds_dir, | |
104 used, compressed, uncompressed, tx); | |
105 } | |
106 | |
107 void | |
108 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
109 { | |
2082 | 110 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); |
789 | 111 int compressed = BP_GET_PSIZE(bp); |
112 int uncompressed = BP_GET_UCSIZE(bp); | |
113 | |
114 ASSERT(dmu_tx_is_syncing(tx)); | |
115 if (BP_IS_HOLE(bp)) | |
116 return; | |
117 | |
118 ASSERT(used > 0); | |
119 if (ds == NULL) { | |
120 /* | |
121 * Account for the meta-objset space in its placeholder | |
122 * dataset. | |
123 */ | |
124 /* XXX this can fail, what do we do when it does? */ | |
125 (void) arc_free(NULL, tx->tx_pool->dp_spa, | |
126 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); | |
127 bzero(bp, sizeof (blkptr_t)); | |
128 | |
129 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, | |
130 -used, -compressed, -uncompressed, tx); | |
131 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); | |
132 return; | |
133 } | |
134 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); | |
135 | |
136 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
137 | |
138 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { | |
139 dprintf_bp(bp, "freeing: %s", ""); | |
140 /* XXX check return code? */ | |
141 (void) arc_free(NULL, tx->tx_pool->dp_spa, | |
142 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); | |
143 | |
144 mutex_enter(&ds->ds_lock); | |
145 /* XXX unique_bytes is not accurate for head datasets */ | |
146 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ | |
147 ds->ds_phys->ds_unique_bytes -= used; | |
148 mutex_exit(&ds->ds_lock); | |
149 dsl_dir_diduse_space(ds->ds_dir, | |
150 -used, -compressed, -uncompressed, tx); | |
151 } else { | |
152 dprintf_bp(bp, "putting on dead list: %s", ""); | |
1544 | 153 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); |
789 | 154 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ |
155 if (ds->ds_phys->ds_prev_snap_obj != 0) { | |
156 ASSERT3U(ds->ds_prev->ds_object, ==, | |
157 ds->ds_phys->ds_prev_snap_obj); | |
158 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); | |
159 if (ds->ds_prev->ds_phys->ds_next_snap_obj == | |
2082 | 160 ds->ds_object && bp->blk_birth > |
789 | 161 ds->ds_prev->ds_phys->ds_prev_snap_txg) { |
162 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); | |
163 mutex_enter(&ds->ds_prev->ds_lock); | |
164 ds->ds_prev->ds_phys->ds_unique_bytes += | |
165 used; | |
166 mutex_exit(&ds->ds_prev->ds_lock); | |
167 } | |
168 } | |
169 } | |
170 bzero(bp, sizeof (blkptr_t)); | |
171 mutex_enter(&ds->ds_lock); | |
172 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); | |
173 ds->ds_phys->ds_used_bytes -= used; | |
174 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); | |
175 ds->ds_phys->ds_compressed_bytes -= compressed; | |
176 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); | |
177 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; | |
178 mutex_exit(&ds->ds_lock); | |
179 } | |
180 | |
1544 | 181 uint64_t |
182 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) | |
789 | 183 { |
2885 | 184 uint64_t trysnap = 0; |
185 | |
789 | 186 if (ds == NULL) |
1544 | 187 return (0); |
789 | 188 /* |
189 * The snapshot creation could fail, but that would cause an | |
190 * incorrect FALSE return, which would only result in an | |
191 * overestimation of the amount of space that an operation would | |
192 * consume, which is OK. | |
193 * | |
194 * There's also a small window where we could miss a pending | |
195 * snapshot, because we could set the sync task in the quiescing | |
196 * phase. So this should only be used as a guess. | |
197 */ | |
2885 | 198 if (ds->ds_trysnap_txg > |
199 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) | |
200 trysnap = ds->ds_trysnap_txg; | |
201 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); | |
1544 | 202 } |
203 | |
204 int | |
205 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) | |
206 { | |
207 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); | |
789 | 208 } |
209 | |
210 /* ARGSUSED */ | |
211 static void | |
212 dsl_dataset_evict(dmu_buf_t *db, void *dsv) | |
213 { | |
214 dsl_dataset_t *ds = dsv; | |
215 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
216 | |
217 /* open_refcount == DOS_REF_MAX when deleting */ | |
218 ASSERT(ds->ds_open_refcount == 0 || | |
219 ds->ds_open_refcount == DOS_REF_MAX); | |
220 | |
221 dprintf_ds(ds, "evicting %s\n", ""); | |
222 | |
223 unique_remove(ds->ds_phys->ds_fsid_guid); | |
224 | |
225 if (ds->ds_user_ptr != NULL) | |
226 ds->ds_user_evict_func(ds, ds->ds_user_ptr); | |
227 | |
228 if (ds->ds_prev) { | |
229 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); | |
230 ds->ds_prev = NULL; | |
231 } | |
232 | |
233 bplist_close(&ds->ds_deadlist); | |
234 dsl_dir_close(ds->ds_dir, ds); | |
235 | |
236 if (list_link_active(&ds->ds_synced_link)) | |
237 list_remove(&dp->dp_synced_objsets, ds); | |
238 | |
2856 | 239 mutex_destroy(&ds->ds_lock); |
240 mutex_destroy(&ds->ds_deadlist.bpl_lock); | |
241 | |
789 | 242 kmem_free(ds, sizeof (dsl_dataset_t)); |
243 } | |
244 | |
1544 | 245 static int |
789 | 246 dsl_dataset_get_snapname(dsl_dataset_t *ds) |
247 { | |
248 dsl_dataset_phys_t *headphys; | |
249 int err; | |
250 dmu_buf_t *headdbuf; | |
251 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
252 objset_t *mos = dp->dp_meta_objset; | |
253 | |
254 if (ds->ds_snapname[0]) | |
1544 | 255 return (0); |
789 | 256 if (ds->ds_phys->ds_next_snap_obj == 0) |
1544 | 257 return (0); |
789 | 258 |
1544 | 259 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, |
260 FTAG, &headdbuf); | |
261 if (err) | |
262 return (err); | |
789 | 263 headphys = headdbuf->db_data; |
264 err = zap_value_search(dp->dp_meta_objset, | |
265 headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname); | |
1544 | 266 dmu_buf_rele(headdbuf, FTAG); |
267 return (err); | |
789 | 268 } |
269 | |
1544 | 270 int |
789 | 271 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, |
1544 | 272 int mode, void *tag, dsl_dataset_t **dsp) |
789 | 273 { |
274 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; | |
275 objset_t *mos = dp->dp_meta_objset; | |
276 dmu_buf_t *dbuf; | |
277 dsl_dataset_t *ds; | |
1544 | 278 int err; |
789 | 279 |
280 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || | |
281 dsl_pool_sync_context(dp)); | |
282 | |
1544 | 283 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); |
284 if (err) | |
285 return (err); | |
789 | 286 ds = dmu_buf_get_user(dbuf); |
287 if (ds == NULL) { | |
288 dsl_dataset_t *winner; | |
289 | |
290 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); | |
291 ds->ds_dbuf = dbuf; | |
292 ds->ds_object = dsobj; | |
293 ds->ds_phys = dbuf->db_data; | |
294 | |
2856 | 295 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); |
296 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, | |
297 NULL); | |
298 | |
1544 | 299 err = bplist_open(&ds->ds_deadlist, |
789 | 300 mos, ds->ds_phys->ds_deadlist_obj); |
1544 | 301 if (err == 0) { |
302 err = dsl_dir_open_obj(dp, | |
303 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); | |
304 } | |
305 if (err) { | |
306 /* | |
307 * we don't really need to close the blist if we | |
308 * just opened it. | |
309 */ | |
2856 | 310 mutex_destroy(&ds->ds_lock); |
311 mutex_destroy(&ds->ds_deadlist.bpl_lock); | |
1544 | 312 kmem_free(ds, sizeof (dsl_dataset_t)); |
313 dmu_buf_rele(dbuf, tag); | |
314 return (err); | |
315 } | |
789 | 316 |
317 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { | |
318 ds->ds_snapname[0] = '\0'; | |
319 if (ds->ds_phys->ds_prev_snap_obj) { | |
1544 | 320 err = dsl_dataset_open_obj(dp, |
789 | 321 ds->ds_phys->ds_prev_snap_obj, NULL, |
1544 | 322 DS_MODE_NONE, ds, &ds->ds_prev); |
789 | 323 } |
324 } else { | |
325 if (snapname) { | |
326 #ifdef ZFS_DEBUG | |
327 dsl_dataset_phys_t *headphys; | |
1544 | 328 dmu_buf_t *headdbuf; |
329 err = dmu_bonus_hold(mos, | |
330 ds->ds_dir->dd_phys->dd_head_dataset_obj, | |
331 FTAG, &headdbuf); | |
332 if (err == 0) { | |
333 headphys = headdbuf->db_data; | |
334 uint64_t foundobj; | |
335 err = zap_lookup(dp->dp_meta_objset, | |
336 headphys->ds_snapnames_zapobj, | |
337 snapname, sizeof (foundobj), 1, | |
338 &foundobj); | |
339 ASSERT3U(foundobj, ==, dsobj); | |
340 dmu_buf_rele(headdbuf, FTAG); | |
341 } | |
789 | 342 #endif |
343 (void) strcat(ds->ds_snapname, snapname); | |
344 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { | |
1544 | 345 err = dsl_dataset_get_snapname(ds); |
789 | 346 } |
347 } | |
348 | |
1544 | 349 if (err == 0) { |
350 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, | |
351 dsl_dataset_evict); | |
352 } | |
353 if (err || winner) { | |
789 | 354 bplist_close(&ds->ds_deadlist); |
355 if (ds->ds_prev) { | |
356 dsl_dataset_close(ds->ds_prev, | |
357 DS_MODE_NONE, ds); | |
358 } | |
359 dsl_dir_close(ds->ds_dir, ds); | |
2856 | 360 mutex_destroy(&ds->ds_lock); |
361 mutex_destroy(&ds->ds_deadlist.bpl_lock); | |
789 | 362 kmem_free(ds, sizeof (dsl_dataset_t)); |
1544 | 363 if (err) { |
364 dmu_buf_rele(dbuf, tag); | |
365 return (err); | |
366 } | |
789 | 367 ds = winner; |
368 } else { | |
369 uint64_t new = | |
370 unique_insert(ds->ds_phys->ds_fsid_guid); | |
371 if (new != ds->ds_phys->ds_fsid_guid) { | |
372 /* XXX it won't necessarily be synced... */ | |
373 ds->ds_phys->ds_fsid_guid = new; | |
374 } | |
375 } | |
376 } | |
377 ASSERT3P(ds->ds_dbuf, ==, dbuf); | |
378 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); | |
379 | |
380 mutex_enter(&ds->ds_lock); | |
381 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && | |
2082 | 382 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && |
383 !DS_MODE_IS_INCONSISTENT(mode)) || | |
789 | 384 (ds->ds_open_refcount + weight > DOS_REF_MAX)) { |
385 mutex_exit(&ds->ds_lock); | |
386 dsl_dataset_close(ds, DS_MODE_NONE, tag); | |
1544 | 387 return (EBUSY); |
789 | 388 } |
389 ds->ds_open_refcount += weight; | |
390 mutex_exit(&ds->ds_lock); | |
391 | |
1544 | 392 *dsp = ds; |
393 return (0); | |
789 | 394 } |
395 | |
396 int | |
397 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, | |
398 void *tag, dsl_dataset_t **dsp) | |
399 { | |
400 dsl_dir_t *dd; | |
401 dsl_pool_t *dp; | |
402 const char *tail; | |
403 uint64_t obj; | |
404 dsl_dataset_t *ds = NULL; | |
405 int err = 0; | |
406 | |
1544 | 407 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); |
408 if (err) | |
409 return (err); | |
789 | 410 |
411 dp = dd->dd_pool; | |
412 obj = dd->dd_phys->dd_head_dataset_obj; | |
413 rw_enter(&dp->dp_config_rwlock, RW_READER); | |
414 if (obj == 0) { | |
415 /* A dataset with no associated objset */ | |
416 err = ENOENT; | |
417 goto out; | |
418 } | |
419 | |
420 if (tail != NULL) { | |
421 objset_t *mos = dp->dp_meta_objset; | |
422 | |
1544 | 423 err = dsl_dataset_open_obj(dp, obj, NULL, |
424 DS_MODE_NONE, tag, &ds); | |
425 if (err) | |
426 goto out; | |
789 | 427 obj = ds->ds_phys->ds_snapnames_zapobj; |
428 dsl_dataset_close(ds, DS_MODE_NONE, tag); | |
429 ds = NULL; | |
430 | |
431 if (tail[0] != '@') { | |
432 err = ENOENT; | |
433 goto out; | |
434 } | |
435 tail++; | |
436 | |
437 /* Look for a snapshot */ | |
438 if (!DS_MODE_IS_READONLY(mode)) { | |
439 err = EROFS; | |
440 goto out; | |
441 } | |
442 dprintf("looking for snapshot '%s'\n", tail); | |
443 err = zap_lookup(mos, obj, tail, 8, 1, &obj); | |
444 if (err) | |
445 goto out; | |
446 } | |
1544 | 447 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); |
789 | 448 |
449 out: | |
450 rw_exit(&dp->dp_config_rwlock); | |
451 dsl_dir_close(dd, FTAG); | |
452 | |
453 ASSERT3U((err == 0), ==, (ds != NULL)); | |
454 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ | |
455 | |
456 *dsp = ds; | |
457 return (err); | |
458 } | |
459 | |
460 int | |
461 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) | |
462 { | |
463 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); | |
464 } | |
465 | |
466 void | |
467 dsl_dataset_name(dsl_dataset_t *ds, char *name) | |
468 { | |
469 if (ds == NULL) { | |
470 (void) strcpy(name, "mos"); | |
471 } else { | |
472 dsl_dir_name(ds->ds_dir, name); | |
1544 | 473 VERIFY(0 == dsl_dataset_get_snapname(ds)); |
789 | 474 if (ds->ds_snapname[0]) { |
475 (void) strcat(name, "@"); | |
476 if (!MUTEX_HELD(&ds->ds_lock)) { | |
477 /* | |
478 * We use a "recursive" mutex so that we | |
479 * can call dprintf_ds() with ds_lock held. | |
480 */ | |
481 mutex_enter(&ds->ds_lock); | |
482 (void) strcat(name, ds->ds_snapname); | |
483 mutex_exit(&ds->ds_lock); | |
484 } else { | |
485 (void) strcat(name, ds->ds_snapname); | |
486 } | |
487 } | |
488 } | |
489 } | |
490 | |
491 void | |
492 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) | |
493 { | |
494 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; | |
495 mutex_enter(&ds->ds_lock); | |
496 ASSERT3U(ds->ds_open_refcount, >=, weight); | |
497 ds->ds_open_refcount -= weight; | |
498 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", | |
499 mode, ds->ds_open_refcount); | |
500 mutex_exit(&ds->ds_lock); | |
501 | |
1544 | 502 dmu_buf_rele(ds->ds_dbuf, tag); |
789 | 503 } |
504 | |
505 void | |
506 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) | |
507 { | |
508 objset_t *mos = dp->dp_meta_objset; | |
509 dmu_buf_t *dbuf; | |
510 dsl_dataset_phys_t *dsphys; | |
511 dsl_dataset_t *ds; | |
512 uint64_t dsobj; | |
513 dsl_dir_t *dd; | |
514 | |
515 dsl_dir_create_root(mos, ddobjp, tx); | |
1544 | 516 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); |
789 | 517 |
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
518 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
519 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); |
1544 | 520 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); |
789 | 521 dmu_buf_will_dirty(dbuf, tx); |
522 dsphys = dbuf->db_data; | |
523 dsphys->ds_dir_obj = dd->dd_object; | |
524 dsphys->ds_fsid_guid = unique_create(); | |
1544 | 525 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ |
789 | 526 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, |
527 sizeof (dsphys->ds_guid)); | |
528 dsphys->ds_snapnames_zapobj = | |
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
529 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); |
789 | 530 dsphys->ds_creation_time = gethrestime_sec(); |
531 dsphys->ds_creation_txg = tx->tx_txg; | |
532 dsphys->ds_deadlist_obj = | |
533 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1544 | 534 dmu_buf_rele(dbuf, FTAG); |
789 | 535 |
536 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
537 dd->dd_phys->dd_head_dataset_obj = dsobj; | |
538 dsl_dir_close(dd, FTAG); | |
539 | |
1544 | 540 VERIFY(0 == |
541 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); | |
789 | 542 (void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx); |
543 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
544 } | |
545 | |
2199 | 546 uint64_t |
547 dsl_dataset_create_sync(dsl_dir_t *pdd, | |
789 | 548 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) |
549 { | |
2199 | 550 dsl_pool_t *dp = pdd->dd_pool; |
789 | 551 dmu_buf_t *dbuf; |
552 dsl_dataset_phys_t *dsphys; | |
2199 | 553 uint64_t dsobj, ddobj; |
789 | 554 objset_t *mos = dp->dp_meta_objset; |
555 dsl_dir_t *dd; | |
556 | |
2199 | 557 ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp); |
558 ASSERT(clone_parent == NULL || | |
559 clone_parent->ds_phys->ds_num_children > 0); | |
789 | 560 ASSERT(lastname[0] != '@'); |
561 ASSERT(dmu_tx_is_syncing(tx)); | |
562 | |
2199 | 563 ddobj = dsl_dir_create_sync(pdd, lastname, tx); |
564 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); | |
789 | 565 |
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
566 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
567 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); |
1544 | 568 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); |
789 | 569 dmu_buf_will_dirty(dbuf, tx); |
570 dsphys = dbuf->db_data; | |
571 dsphys->ds_dir_obj = dd->dd_object; | |
572 dsphys->ds_fsid_guid = unique_create(); | |
573 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ | |
574 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, | |
575 sizeof (dsphys->ds_guid)); | |
576 dsphys->ds_snapnames_zapobj = | |
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
577 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); |
789 | 578 dsphys->ds_creation_time = gethrestime_sec(); |
579 dsphys->ds_creation_txg = tx->tx_txg; | |
580 dsphys->ds_deadlist_obj = | |
581 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
582 if (clone_parent) { | |
583 dsphys->ds_prev_snap_obj = clone_parent->ds_object; | |
584 dsphys->ds_prev_snap_txg = | |
585 clone_parent->ds_phys->ds_creation_txg; | |
586 dsphys->ds_used_bytes = | |
587 clone_parent->ds_phys->ds_used_bytes; | |
588 dsphys->ds_compressed_bytes = | |
589 clone_parent->ds_phys->ds_compressed_bytes; | |
590 dsphys->ds_uncompressed_bytes = | |
591 clone_parent->ds_phys->ds_uncompressed_bytes; | |
592 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; | |
593 | |
594 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); | |
595 clone_parent->ds_phys->ds_num_children++; | |
596 | |
597 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
598 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; | |
599 } | |
1544 | 600 dmu_buf_rele(dbuf, FTAG); |
789 | 601 |
602 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
603 dd->dd_phys->dd_head_dataset_obj = dsobj; | |
604 dsl_dir_close(dd, FTAG); | |
605 | |
2199 | 606 return (dsobj); |
607 } | |
608 | |
609 struct destroyarg { | |
610 dsl_sync_task_group_t *dstg; | |
611 char *snapname; | |
612 void *tag; | |
613 char *failed; | |
614 }; | |
615 | |
616 static int | |
617 dsl_snapshot_destroy_one(char *name, void *arg) | |
618 { | |
619 struct destroyarg *da = arg; | |
620 dsl_dataset_t *ds; | |
621 char *cp; | |
622 int err; | |
623 | |
624 (void) strcat(name, "@"); | |
625 (void) strcat(name, da->snapname); | |
626 err = dsl_dataset_open(name, | |
627 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, | |
628 da->tag, &ds); | |
629 cp = strchr(name, '@'); | |
630 *cp = '\0'; | |
631 if (err == ENOENT) | |
632 return (0); | |
633 if (err) { | |
634 (void) strcpy(da->failed, name); | |
635 return (err); | |
636 } | |
637 | |
638 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, | |
639 dsl_dataset_destroy_sync, ds, da->tag, 0); | |
789 | 640 return (0); |
641 } | |
642 | |
2199 | 643 /* |
644 * Destroy 'snapname' in all descendants of 'fsname'. | |
645 */ | |
646 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy | |
647 int | |
648 dsl_snapshots_destroy(char *fsname, char *snapname) | |
649 { | |
650 int err; | |
651 struct destroyarg da; | |
652 dsl_sync_task_t *dst; | |
653 spa_t *spa; | |
654 char *cp; | |
655 | |
656 cp = strchr(fsname, '/'); | |
657 if (cp) { | |
658 *cp = '\0'; | |
659 err = spa_open(fsname, &spa, FTAG); | |
660 *cp = '/'; | |
661 } else { | |
662 err = spa_open(fsname, &spa, FTAG); | |
663 } | |
664 if (err) | |
665 return (err); | |
666 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); | |
667 da.snapname = snapname; | |
668 da.tag = FTAG; | |
669 da.failed = fsname; | |
670 | |
671 err = dmu_objset_find(fsname, | |
2417 | 672 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); |
2199 | 673 |
674 if (err == 0) | |
675 err = dsl_sync_task_group_wait(da.dstg); | |
676 | |
677 for (dst = list_head(&da.dstg->dstg_tasks); dst; | |
678 dst = list_next(&da.dstg->dstg_tasks, dst)) { | |
679 dsl_dataset_t *ds = dst->dst_arg1; | |
680 if (dst->dst_err) { | |
681 dsl_dataset_name(ds, fsname); | |
682 cp = strchr(fsname, '@'); | |
683 *cp = '\0'; | |
684 } | |
685 /* | |
686 * If it was successful, destroy_sync would have | |
687 * closed the ds | |
688 */ | |
689 if (err) | |
690 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
691 } | |
692 | |
693 dsl_sync_task_group_destroy(da.dstg); | |
694 spa_close(spa, FTAG); | |
695 return (err); | |
696 } | |
697 | |
789 | 698 int |
699 dsl_dataset_destroy(const char *name) | |
700 { | |
701 int err; | |
2199 | 702 dsl_sync_task_group_t *dstg; |
703 objset_t *os; | |
704 dsl_dataset_t *ds; | |
789 | 705 dsl_dir_t *dd; |
2199 | 706 uint64_t obj; |
707 | |
708 if (strchr(name, '@')) { | |
709 /* Destroying a snapshot is simpler */ | |
710 err = dsl_dataset_open(name, | |
711 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, | |
712 FTAG, &ds); | |
713 if (err) | |
714 return (err); | |
715 err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
716 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, | |
717 ds, FTAG, 0); | |
718 if (err) | |
719 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
720 return (err); | |
721 } | |
722 | |
723 err = dmu_objset_open(name, DMU_OST_ANY, | |
724 DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); | |
725 if (err) | |
726 return (err); | |
727 ds = os->os->os_dsl_dataset; | |
728 dd = ds->ds_dir; | |
789 | 729 |
2199 | 730 /* |
731 * Check for errors and mark this ds as inconsistent, in | |
732 * case we crash while freeing the objects. | |
733 */ | |
734 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, | |
735 dsl_dataset_destroy_begin_sync, ds, NULL, 0); | |
736 if (err) { | |
737 dmu_objset_close(os); | |
738 return (err); | |
739 } | |
740 | |
741 /* | |
742 * remove the objects in open context, so that we won't | |
743 * have too much to do in syncing context. | |
744 */ | |
745 for (obj = 0; err == 0; | |
746 err = dmu_object_next(os, &obj, FALSE)) { | |
747 dmu_tx_t *tx = dmu_tx_create(os); | |
748 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); | |
749 dmu_tx_hold_bonus(tx, obj); | |
750 err = dmu_tx_assign(tx, TXG_WAIT); | |
751 if (err) { | |
752 /* | |
753 * Perhaps there is not enough disk | |
754 * space. Just deal with it from | |
755 * dsl_dataset_destroy_sync(). | |
756 */ | |
757 dmu_tx_abort(tx); | |
758 continue; | |
759 } | |
760 VERIFY(0 == dmu_object_free(os, obj, tx)); | |
761 dmu_tx_commit(tx); | |
762 } | |
763 /* Make sure it's not dirty before we finish destroying it. */ | |
764 txg_wait_synced(dd->dd_pool, 0); | |
765 | |
766 dmu_objset_close(os); | |
767 if (err != ESRCH) | |
768 return (err); | |
769 | |
770 err = dsl_dataset_open(name, | |
771 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, | |
772 FTAG, &ds); | |
1544 | 773 if (err) |
774 return (err); | |
789 | 775 |
2199 | 776 err = dsl_dir_open(name, FTAG, &dd, NULL); |
777 if (err) { | |
778 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
779 return (err); | |
789 | 780 } |
781 | |
2199 | 782 /* |
783 * Blow away the dsl_dir + head dataset. | |
784 */ | |
785 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); | |
786 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, | |
787 dsl_dataset_destroy_sync, ds, FTAG, 0); | |
788 dsl_sync_task_create(dstg, dsl_dir_destroy_check, | |
789 dsl_dir_destroy_sync, dd, FTAG, 0); | |
790 err = dsl_sync_task_group_wait(dstg); | |
791 dsl_sync_task_group_destroy(dstg); | |
792 /* if it is successful, *destroy_sync will close the ds+dd */ | |
793 if (err) { | |
794 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
795 dsl_dir_close(dd, FTAG); | |
796 } | |
789 | 797 return (err); |
798 } | |
799 | |
800 int | |
2199 | 801 dsl_dataset_rollback(dsl_dataset_t *ds) |
789 | 802 { |
2199 | 803 ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); |
804 return (dsl_sync_task_do(ds->ds_dir->dd_pool, | |
805 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, | |
806 ds, NULL, 0)); | |
789 | 807 } |
808 | |
809 void * | |
810 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, | |
811 void *p, dsl_dataset_evict_func_t func) | |
812 { | |
813 void *old; | |
814 | |
815 mutex_enter(&ds->ds_lock); | |
816 old = ds->ds_user_ptr; | |
817 if (old == NULL) { | |
818 ds->ds_user_ptr = p; | |
819 ds->ds_user_evict_func = func; | |
820 } | |
821 mutex_exit(&ds->ds_lock); | |
822 return (old); | |
823 } | |
824 | |
825 void * | |
826 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) | |
827 { | |
828 return (ds->ds_user_ptr); | |
829 } | |
830 | |
831 | |
832 void | |
833 dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp) | |
834 { | |
835 *bp = ds->ds_phys->ds_bp; | |
836 } | |
837 | |
838 void | |
839 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
840 { | |
841 ASSERT(dmu_tx_is_syncing(tx)); | |
842 /* If it's the meta-objset, set dp_meta_rootbp */ | |
843 if (ds == NULL) { | |
844 tx->tx_pool->dp_meta_rootbp = *bp; | |
845 } else { | |
846 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
847 ds->ds_phys->ds_bp = *bp; | |
848 } | |
849 } | |
850 | |
851 spa_t * | |
852 dsl_dataset_get_spa(dsl_dataset_t *ds) | |
853 { | |
854 return (ds->ds_dir->dd_pool->dp_spa); | |
855 } | |
856 | |
857 void | |
858 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) | |
859 { | |
860 dsl_pool_t *dp; | |
861 | |
862 if (ds == NULL) /* this is the meta-objset */ | |
863 return; | |
864 | |
865 ASSERT(ds->ds_user_ptr != NULL); | |
2885 | 866 |
867 if (ds->ds_phys->ds_next_snap_obj != 0) | |
868 panic("dirtying snapshot!"); | |
789 | 869 |
870 dp = ds->ds_dir->dd_pool; | |
871 | |
872 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { | |
873 /* up the hold count until we can be written out */ | |
874 dmu_buf_add_ref(ds->ds_dbuf, ds); | |
875 } | |
876 } | |
877 | |
878 struct killarg { | |
879 uint64_t *usedp; | |
880 uint64_t *compressedp; | |
881 uint64_t *uncompressedp; | |
882 zio_t *zio; | |
883 dmu_tx_t *tx; | |
884 }; | |
885 | |
886 static int | |
887 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) | |
888 { | |
889 struct killarg *ka = arg; | |
890 blkptr_t *bp = &bc->bc_blkptr; | |
891 | |
892 ASSERT3U(bc->bc_errno, ==, 0); | |
893 | |
894 /* | |
895 * Since this callback is not called concurrently, no lock is | |
896 * needed on the accounting values. | |
897 */ | |
2082 | 898 *ka->usedp += bp_get_dasize(spa, bp); |
789 | 899 *ka->compressedp += BP_GET_PSIZE(bp); |
900 *ka->uncompressedp += BP_GET_UCSIZE(bp); | |
901 /* XXX check for EIO? */ | |
902 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, | |
903 ARC_NOWAIT); | |
904 return (0); | |
905 } | |
906 | |
907 /* ARGSUSED */ | |
2199 | 908 static int |
909 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
789 | 910 { |
2199 | 911 dsl_dataset_t *ds = arg1; |
789 | 912 |
2199 | 913 /* |
914 * There must be a previous snapshot. I suppose we could roll | |
915 * it back to being empty (and re-initialize the upper (ZPL) | |
916 * layer). But for now there's no way to do this via the user | |
917 * interface. | |
918 */ | |
919 if (ds->ds_phys->ds_prev_snap_txg == 0) | |
789 | 920 return (EINVAL); |
921 | |
2199 | 922 /* |
923 * This must not be a snapshot. | |
924 */ | |
925 if (ds->ds_phys->ds_next_snap_obj != 0) | |
926 return (EINVAL); | |
789 | 927 |
928 /* | |
929 * If we made changes this txg, traverse_dsl_dataset won't find | |
930 * them. Try again. | |
931 */ | |
2199 | 932 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) |
789 | 933 return (EAGAIN); |
2199 | 934 |
935 return (0); | |
936 } | |
789 | 937 |
2199 | 938 /* ARGSUSED */ |
939 static void | |
940 dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
941 { | |
942 dsl_dataset_t *ds = arg1; | |
943 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; | |
789 | 944 |
945 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
946 | |
947 /* Zero out the deadlist. */ | |
948 bplist_close(&ds->ds_deadlist); | |
949 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); | |
950 ds->ds_phys->ds_deadlist_obj = | |
951 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1544 | 952 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, |
953 ds->ds_phys->ds_deadlist_obj)); | |
789 | 954 |
955 { | |
956 /* Free blkptrs that we gave birth to */ | |
957 zio_t *zio; | |
958 uint64_t used = 0, compressed = 0, uncompressed = 0; | |
959 struct killarg ka; | |
960 | |
961 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, | |
962 ZIO_FLAG_MUSTSUCCEED); | |
963 ka.usedp = &used; | |
964 ka.compressedp = &compressed; | |
965 ka.uncompressedp = &uncompressed; | |
966 ka.zio = zio; | |
967 ka.tx = tx; | |
968 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, | |
969 ADVANCE_POST, kill_blkptr, &ka); | |
970 (void) zio_wait(zio); | |
971 | |
2199 | 972 dsl_dir_diduse_space(ds->ds_dir, |
789 | 973 -used, -compressed, -uncompressed, tx); |
974 } | |
975 | |
2199 | 976 /* Change our contents to that of the prev snapshot */ |
789 | 977 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); |
978 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; | |
979 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; | |
980 ds->ds_phys->ds_compressed_bytes = | |
981 ds->ds_prev->ds_phys->ds_compressed_bytes; | |
982 ds->ds_phys->ds_uncompressed_bytes = | |
983 ds->ds_prev->ds_phys->ds_uncompressed_bytes; | |
2082 | 984 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; |
789 | 985 ds->ds_phys->ds_unique_bytes = 0; |
986 | |
2532
752725c22841
6448999 panic: used == ds->ds_phys->ds_unique_bytes
ahrens
parents:
2417
diff
changeset
|
987 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { |
752725c22841
6448999 panic: used == ds->ds_phys->ds_unique_bytes
ahrens
parents:
2417
diff
changeset
|
988 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); |
752725c22841
6448999 panic: used == ds->ds_phys->ds_unique_bytes
ahrens
parents:
2417
diff
changeset
|
989 ds->ds_prev->ds_phys->ds_unique_bytes = 0; |
752725c22841
6448999 panic: used == ds->ds_phys->ds_unique_bytes
ahrens
parents:
2417
diff
changeset
|
990 } |
789 | 991 } |
992 | |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
993 /* ARGSUSED */ |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
994 static int |
2199 | 995 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
996 { |
2199 | 997 dsl_dataset_t *ds = arg1; |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
998 |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
999 /* |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1000 * Can't delete a head dataset if there are snapshots of it. |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1001 * (Except if the only snapshots are from the branch we cloned |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1002 * from.) |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1003 */ |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1004 if (ds->ds_prev != NULL && |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1005 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1006 return (EINVAL); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1007 |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1008 return (0); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1009 } |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
1010 |
2199 | 1011 /* ARGSUSED */ |
1012 static void | |
1013 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
789 | 1014 { |
2199 | 1015 dsl_dataset_t *ds = arg1; |
789 | 1016 |
2199 | 1017 /* Mark it as inconsistent on-disk, in case we crash */ |
1018 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1019 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; | |
1020 } | |
789 | 1021 |
2199 | 1022 /* ARGSUSED */ |
1023 static int | |
1024 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
1025 { | |
1026 dsl_dataset_t *ds = arg1; | |
789 | 1027 |
1028 /* Can't delete a branch point. */ | |
2199 | 1029 if (ds->ds_phys->ds_num_children > 1) |
1030 return (EEXIST); | |
789 | 1031 |
1032 /* | |
1033 * Can't delete a head dataset if there are snapshots of it. | |
1034 * (Except if the only snapshots are from the branch we cloned | |
1035 * from.) | |
1036 */ | |
1037 if (ds->ds_prev != NULL && | |
2199 | 1038 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) |
789 | 1039 return (EINVAL); |
1040 | |
1041 /* | |
1042 * If we made changes this txg, traverse_dsl_dataset won't find | |
1043 * them. Try again. | |
1044 */ | |
2199 | 1045 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) |
789 | 1046 return (EAGAIN); |
2199 | 1047 |
1048 /* XXX we should do some i/o error checking... */ | |
1049 return (0); | |
1050 } | |
1051 | |
1052 static void | |
1053 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) | |
1054 { | |
1055 dsl_dataset_t *ds = arg1; | |
1056 uint64_t used = 0, compressed = 0, uncompressed = 0; | |
1057 zio_t *zio; | |
1058 int err; | |
1059 int after_branch_point = FALSE; | |
1060 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
1061 objset_t *mos = dp->dp_meta_objset; | |
1062 dsl_dataset_t *ds_prev = NULL; | |
1063 uint64_t obj; | |
1064 | |
1065 ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); | |
1066 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); | |
1067 ASSERT(ds->ds_prev == NULL || | |
1068 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); | |
1069 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); | |
1070 | |
1071 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); | |
1072 | |
1073 obj = ds->ds_object; | |
789 | 1074 |
1075 if (ds->ds_phys->ds_prev_snap_obj != 0) { | |
1076 if (ds->ds_prev) { | |
1077 ds_prev = ds->ds_prev; | |
1078 } else { | |
2199 | 1079 VERIFY(0 == dsl_dataset_open_obj(dp, |
789 | 1080 ds->ds_phys->ds_prev_snap_obj, NULL, |
2199 | 1081 DS_MODE_NONE, FTAG, &ds_prev)); |
789 | 1082 } |
1083 after_branch_point = | |
1084 (ds_prev->ds_phys->ds_next_snap_obj != obj); | |
1085 | |
1086 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); | |
1087 if (after_branch_point && | |
1088 ds->ds_phys->ds_next_snap_obj == 0) { | |
1089 /* This clone is toast. */ | |
1090 ASSERT(ds_prev->ds_phys->ds_num_children > 1); | |
1091 ds_prev->ds_phys->ds_num_children--; | |
1092 } else if (!after_branch_point) { | |
1093 ds_prev->ds_phys->ds_next_snap_obj = | |
1094 ds->ds_phys->ds_next_snap_obj; | |
1095 } | |
1096 } | |
1097 | |
1098 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); | |
1099 | |
1100 if (ds->ds_phys->ds_next_snap_obj != 0) { | |
2199 | 1101 blkptr_t bp; |
789 | 1102 dsl_dataset_t *ds_next; |
1103 uint64_t itor = 0; | |
1104 | |
1105 spa_scrub_restart(dp->dp_spa, tx->tx_txg); | |
1106 | |
2199 | 1107 VERIFY(0 == dsl_dataset_open_obj(dp, |
1544 | 1108 ds->ds_phys->ds_next_snap_obj, NULL, |
1109 DS_MODE_NONE, FTAG, &ds_next)); | |
789 | 1110 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); |
1111 | |
1112 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); | |
1113 ds_next->ds_phys->ds_prev_snap_obj = | |
1114 ds->ds_phys->ds_prev_snap_obj; | |
1115 ds_next->ds_phys->ds_prev_snap_txg = | |
1116 ds->ds_phys->ds_prev_snap_txg; | |
1117 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, | |
1118 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); | |
1119 | |
1120 /* | |
1121 * Transfer to our deadlist (which will become next's | |
1122 * new deadlist) any entries from next's current | |
1123 * deadlist which were born before prev, and free the | |
1124 * other entries. | |
1125 * | |
1126 * XXX we're doing this long task with the config lock held | |
1127 */ | |
1128 while (bplist_iterate(&ds_next->ds_deadlist, &itor, | |
1129 &bp) == 0) { | |
1130 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { | |
1544 | 1131 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, |
1132 &bp, tx)); | |
789 | 1133 if (ds_prev && !after_branch_point && |
1134 bp.blk_birth > | |
1135 ds_prev->ds_phys->ds_prev_snap_txg) { | |
1136 ds_prev->ds_phys->ds_unique_bytes += | |
2082 | 1137 bp_get_dasize(dp->dp_spa, &bp); |
789 | 1138 } |
1139 } else { | |
2082 | 1140 used += bp_get_dasize(dp->dp_spa, &bp); |
789 | 1141 compressed += BP_GET_PSIZE(&bp); |
1142 uncompressed += BP_GET_UCSIZE(&bp); | |
1143 /* XXX check return value? */ | |
1144 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, | |
1145 &bp, NULL, NULL, ARC_NOWAIT); | |
1146 } | |
1147 } | |
1148 | |
1149 /* free next's deadlist */ | |
1150 bplist_close(&ds_next->ds_deadlist); | |
1151 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); | |
1152 | |
1153 /* set next's deadlist to our deadlist */ | |
1154 ds_next->ds_phys->ds_deadlist_obj = | |
1155 ds->ds_phys->ds_deadlist_obj; | |
1544 | 1156 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, |
1157 ds_next->ds_phys->ds_deadlist_obj)); | |
789 | 1158 ds->ds_phys->ds_deadlist_obj = 0; |
1159 | |
1160 if (ds_next->ds_phys->ds_next_snap_obj != 0) { | |
1161 /* | |
1162 * Update next's unique to include blocks which | |
1163 * were previously shared by only this snapshot | |
1164 * and it. Those blocks will be born after the | |
1165 * prev snap and before this snap, and will have | |
1166 * died after the next snap and before the one | |
1167 * after that (ie. be on the snap after next's | |
1168 * deadlist). | |
1169 * | |
1170 * XXX we're doing this long task with the | |
1171 * config lock held | |
1172 */ | |
1173 dsl_dataset_t *ds_after_next; | |
1174 | |
2199 | 1175 VERIFY(0 == dsl_dataset_open_obj(dp, |
789 | 1176 ds_next->ds_phys->ds_next_snap_obj, NULL, |
1544 | 1177 DS_MODE_NONE, FTAG, &ds_after_next)); |
789 | 1178 itor = 0; |
1179 while (bplist_iterate(&ds_after_next->ds_deadlist, | |
1180 &itor, &bp) == 0) { | |
1181 if (bp.blk_birth > | |
1182 ds->ds_phys->ds_prev_snap_txg && | |
1183 bp.blk_birth <= | |
1184 ds->ds_phys->ds_creation_txg) { | |
1185 ds_next->ds_phys->ds_unique_bytes += | |
2082 | 1186 bp_get_dasize(dp->dp_spa, &bp); |
789 | 1187 } |
1188 } | |
1189 | |
1190 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); | |
1191 ASSERT3P(ds_next->ds_prev, ==, NULL); | |
1192 } else { | |
1193 /* | |
1194 * It would be nice to update the head dataset's | |
1195 * unique. To do so we would have to traverse | |
1196 * it for blocks born after ds_prev, which is | |
1197 * pretty expensive just to maintain something | |
1198 * for debugging purposes. | |
1199 */ | |
1200 ASSERT3P(ds_next->ds_prev, ==, ds); | |
1201 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, | |
1202 ds_next); | |
1203 if (ds_prev) { | |
2199 | 1204 VERIFY(0 == dsl_dataset_open_obj(dp, |
1544 | 1205 ds->ds_phys->ds_prev_snap_obj, NULL, |
1206 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); | |
789 | 1207 } else { |
1208 ds_next->ds_prev = NULL; | |
1209 } | |
1210 } | |
1211 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); | |
1212 | |
1213 /* | |
1214 * NB: unique_bytes is not accurate for head objsets | |
1215 * because we don't update it when we delete the most | |
1216 * recent snapshot -- see above comment. | |
1217 */ | |
1218 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); | |
1219 } else { | |
1220 /* | |
1221 * There's no next snapshot, so this is a head dataset. | |
1222 * Destroy the deadlist. Unless it's a clone, the | |
1223 * deadlist should be empty. (If it's a clone, it's | |
1224 * safe to ignore the deadlist contents.) | |
1225 */ | |
1226 struct killarg ka; | |
1227 | |
1228 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); | |
1229 bplist_close(&ds->ds_deadlist); | |
1230 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); | |
1231 ds->ds_phys->ds_deadlist_obj = 0; | |
1232 | |
1233 /* | |
1234 * Free everything that we point to (that's born after | |
1235 * the previous snapshot, if we are a clone) | |
1236 * | |
1237 * XXX we're doing this long task with the config lock held | |
1238 */ | |
1239 ka.usedp = &used; | |
1240 ka.compressedp = &compressed; | |
1241 ka.uncompressedp = &uncompressed; | |
1242 ka.zio = zio; | |
1243 ka.tx = tx; | |
1244 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, | |
1245 ADVANCE_POST, kill_blkptr, &ka); | |
1246 ASSERT3U(err, ==, 0); | |
1247 } | |
1248 | |
1249 err = zio_wait(zio); | |
1250 ASSERT3U(err, ==, 0); | |
1251 | |
2199 | 1252 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); |
789 | 1253 |
1254 if (ds->ds_phys->ds_snapnames_zapobj) { | |
1255 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); | |
1256 ASSERT(err == 0); | |
1257 } | |
1258 | |
2199 | 1259 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { |
789 | 1260 /* Erase the link in the dataset */ |
2199 | 1261 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); |
1262 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; | |
789 | 1263 /* |
1264 * dsl_dir_sync_destroy() called us, they'll destroy | |
1265 * the dataset. | |
1266 */ | |
1267 } else { | |
1268 /* remove from snapshot namespace */ | |
1269 dsl_dataset_t *ds_head; | |
2199 | 1270 VERIFY(0 == dsl_dataset_open_obj(dp, |
1271 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, | |
1544 | 1272 DS_MODE_NONE, FTAG, &ds_head)); |
2207
47efcb3433a7
6439370 assertion failures possible in dsl_dataset_destroy_sync()
ahrens
parents:
2199
diff
changeset
|
1273 VERIFY(0 == dsl_dataset_get_snapname(ds)); |
789 | 1274 #ifdef ZFS_DEBUG |
1275 { | |
1276 uint64_t val; | |
1277 err = zap_lookup(mos, | |
1278 ds_head->ds_phys->ds_snapnames_zapobj, | |
2199 | 1279 ds->ds_snapname, 8, 1, &val); |
789 | 1280 ASSERT3U(err, ==, 0); |
1281 ASSERT3U(val, ==, obj); | |
1282 } | |
1283 #endif | |
1284 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, | |
2199 | 1285 ds->ds_snapname, tx); |
789 | 1286 ASSERT(err == 0); |
1287 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); | |
1288 } | |
1289 | |
1290 if (ds_prev && ds->ds_prev != ds_prev) | |
1291 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); | |
1292 | |
2199 | 1293 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); |
1294 VERIFY(0 == dmu_object_free(mos, obj, tx)); | |
1295 } | |
1296 | |
1297 /* ARGSUSED */ | |
1298 int | |
1299 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
1300 { | |
1301 objset_t *os = arg1; | |
1302 dsl_dataset_t *ds = os->os->os_dsl_dataset; | |
1303 const char *snapname = arg2; | |
1304 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; | |
1305 int err; | |
1306 uint64_t value; | |
789 | 1307 |
1308 /* | |
2199 | 1309 * We don't allow multiple snapshots of the same txg. If there |
1310 * is already one, try again. | |
1311 */ | |
1312 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) | |
1313 return (EAGAIN); | |
1314 | |
1315 /* | |
1316 * Check for conflicting name snapshot name. | |
789 | 1317 */ |
2199 | 1318 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, |
1319 snapname, 8, 1, &value); | |
1320 if (err == 0) | |
1321 return (EEXIST); | |
1322 if (err != ENOENT) | |
1323 return (err); | |
789 | 1324 |
2199 | 1325 ds->ds_trysnap_txg = tx->tx_txg; |
789 | 1326 return (0); |
1327 } | |
1328 | |
2199 | 1329 void |
1330 dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
789 | 1331 { |
2199 | 1332 objset_t *os = arg1; |
1333 dsl_dataset_t *ds = os->os->os_dsl_dataset; | |
1334 const char *snapname = arg2; | |
1335 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
789 | 1336 dmu_buf_t *dbuf; |
1337 dsl_dataset_phys_t *dsphys; | |
2199 | 1338 uint64_t dsobj; |
789 | 1339 objset_t *mos = dp->dp_meta_objset; |
1340 int err; | |
1341 | |
1342 spa_scrub_restart(dp->dp_spa, tx->tx_txg); | |
2199 | 1343 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); |
789 | 1344 |
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
1345 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
1346 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); |
1544 | 1347 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); |
789 | 1348 dmu_buf_will_dirty(dbuf, tx); |
1349 dsphys = dbuf->db_data; | |
2199 | 1350 dsphys->ds_dir_obj = ds->ds_dir->dd_object; |
789 | 1351 dsphys->ds_fsid_guid = unique_create(); |
1352 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ | |
1353 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, | |
1354 sizeof (dsphys->ds_guid)); | |
1355 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; | |
1356 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; | |
1357 dsphys->ds_next_snap_obj = ds->ds_object; | |
1358 dsphys->ds_num_children = 1; | |
1359 dsphys->ds_creation_time = gethrestime_sec(); | |
1360 dsphys->ds_creation_txg = tx->tx_txg; | |
1361 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; | |
1362 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; | |
1363 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; | |
1364 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; | |
2082 | 1365 dsphys->ds_flags = ds->ds_phys->ds_flags; |
789 | 1366 dsphys->ds_bp = ds->ds_phys->ds_bp; |
1544 | 1367 dmu_buf_rele(dbuf, FTAG); |
789 | 1368 |
2199 | 1369 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); |
1370 if (ds->ds_prev) { | |
1371 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == | |
789 | 1372 ds->ds_object || |
2199 | 1373 ds->ds_prev->ds_phys->ds_num_children > 1); |
1374 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { | |
1375 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); | |
789 | 1376 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, |
2199 | 1377 ds->ds_prev->ds_phys->ds_creation_txg); |
1378 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; | |
789 | 1379 } |
1380 } | |
1381 | |
1382 bplist_close(&ds->ds_deadlist); | |
1383 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1384 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); | |
1385 ds->ds_phys->ds_prev_snap_obj = dsobj; | |
1386 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; | |
1387 ds->ds_phys->ds_unique_bytes = 0; | |
1388 ds->ds_phys->ds_deadlist_obj = | |
1389 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1544 | 1390 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, |
1391 ds->ds_phys->ds_deadlist_obj)); | |
789 | 1392 |
1393 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); | |
1394 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, | |
1395 snapname, 8, 1, &dsobj, tx); | |
1396 ASSERT(err == 0); | |
1397 | |
1398 if (ds->ds_prev) | |
1399 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); | |
1544 | 1400 VERIFY(0 == dsl_dataset_open_obj(dp, |
1401 ds->ds_phys->ds_prev_snap_obj, snapname, | |
1402 DS_MODE_NONE, ds, &ds->ds_prev)); | |
789 | 1403 } |
1404 | |
1405 void | |
1406 dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx) | |
1407 { | |
1408 ASSERT(dmu_tx_is_syncing(tx)); | |
1409 ASSERT(ds->ds_user_ptr != NULL); | |
1410 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); | |
1411 | |
1412 dmu_objset_sync(ds->ds_user_ptr, tx); | |
1413 dsl_dir_dirty(ds->ds_dir, tx); | |
1414 bplist_close(&ds->ds_deadlist); | |
1415 | |
1544 | 1416 dmu_buf_rele(ds->ds_dbuf, ds); |
789 | 1417 } |
1418 | |
1419 void | |
2885 | 1420 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) |
789 | 1421 { |
2885 | 1422 dsl_dir_stats(ds->ds_dir, nv); |
789 | 1423 |
2885 | 1424 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, |
1425 ds->ds_phys->ds_creation_time); | |
1426 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, | |
1427 ds->ds_phys->ds_creation_txg); | |
1428 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, | |
1429 ds->ds_phys->ds_used_bytes); | |
789 | 1430 |
1431 if (ds->ds_phys->ds_next_snap_obj) { | |
1432 /* | |
1433 * This is a snapshot; override the dd's space used with | |
2885 | 1434 * our unique space and compression ratio. |
789 | 1435 */ |
2885 | 1436 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, |
1437 ds->ds_phys->ds_unique_bytes); | |
1438 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, | |
1439 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : | |
1440 (ds->ds_phys->ds_uncompressed_bytes * 100 / | |
1441 ds->ds_phys->ds_compressed_bytes)); | |
789 | 1442 } |
1443 } | |
1444 | |
2885 | 1445 void |
1446 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) | |
789 | 1447 { |
2885 | 1448 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; |
1449 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; | |
1450 if (ds->ds_phys->ds_next_snap_obj) { | |
1451 stat->dds_is_snapshot = B_TRUE; | |
1452 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; | |
1453 } | |
1454 | |
1455 /* clone origin is really a dsl_dir thing... */ | |
1456 if (ds->ds_dir->dd_phys->dd_clone_parent_obj) { | |
1457 dsl_dataset_t *ods; | |
1458 | |
1459 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); | |
1460 VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool, | |
1461 ds->ds_dir->dd_phys->dd_clone_parent_obj, | |
1462 NULL, DS_MODE_NONE, FTAG, &ods)); | |
1463 dsl_dataset_name(ods, stat->dds_clone_of); | |
1464 dsl_dataset_close(ods, DS_MODE_NONE, FTAG); | |
1465 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); | |
1466 } | |
1467 } | |
1468 | |
1469 uint64_t | |
1470 dsl_dataset_fsid_guid(dsl_dataset_t *ds) | |
1471 { | |
1472 return (ds->ds_phys->ds_fsid_guid); | |
1473 } | |
1474 | |
1475 void | |
1476 dsl_dataset_space(dsl_dataset_t *ds, | |
1477 uint64_t *refdbytesp, uint64_t *availbytesp, | |
1478 uint64_t *usedobjsp, uint64_t *availobjsp) | |
1479 { | |
1480 *refdbytesp = ds->ds_phys->ds_used_bytes; | |
1481 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); | |
1482 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; | |
1483 *availobjsp = DN_MAX_OBJECT - *usedobjsp; | |
789 | 1484 } |
1485 | |
2199 | 1486 /* ARGSUSED */ |
789 | 1487 static int |
2199 | 1488 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) |
789 | 1489 { |
2199 | 1490 dsl_dataset_t *ds = arg1; |
1491 char *newsnapname = arg2; | |
1492 dsl_dir_t *dd = ds->ds_dir; | |
789 | 1493 objset_t *mos = dd->dd_pool->dp_meta_objset; |
2199 | 1494 dsl_dataset_t *hds; |
1495 uint64_t val; | |
789 | 1496 int err; |
1497 | |
2199 | 1498 err = dsl_dataset_open_obj(dd->dd_pool, |
1499 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); | |
789 | 1500 if (err) |
1501 return (err); | |
1502 | |
2199 | 1503 /* new name better not be in use */ |
1504 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, | |
1505 newsnapname, 8, 1, &val); | |
1506 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); | |
789 | 1507 |
2199 | 1508 if (err == 0) |
1509 err = EEXIST; | |
1510 else if (err == ENOENT) | |
1511 err = 0; | |
1512 return (err); | |
1513 } | |
789 | 1514 |
2199 | 1515 static void |
1516 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
1517 { | |
1518 dsl_dataset_t *ds = arg1; | |
1519 char *newsnapname = arg2; | |
1520 dsl_dir_t *dd = ds->ds_dir; | |
1521 objset_t *mos = dd->dd_pool->dp_meta_objset; | |
1522 dsl_dataset_t *hds; | |
1523 int err; | |
789 | 1524 |
2199 | 1525 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); |
789 | 1526 |
2199 | 1527 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, |
1528 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); | |
789 | 1529 |
2199 | 1530 VERIFY(0 == dsl_dataset_get_snapname(ds)); |
1531 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, | |
1532 ds->ds_snapname, tx); | |
789 | 1533 ASSERT3U(err, ==, 0); |
2199 | 1534 mutex_enter(&ds->ds_lock); |
1535 (void) strcpy(ds->ds_snapname, newsnapname); | |
1536 mutex_exit(&ds->ds_lock); | |
1537 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, | |
1538 ds->ds_snapname, 8, 1, &ds->ds_object, tx); | |
789 | 1539 ASSERT3U(err, ==, 0); |
1540 | |
2199 | 1541 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); |
789 | 1542 } |
1543 | |
1544 #pragma weak dmu_objset_rename = dsl_dataset_rename | |
1545 int | |
2199 | 1546 dsl_dataset_rename(const char *oldname, const char *newname) |
789 | 1547 { |
1548 dsl_dir_t *dd; | |
2199 | 1549 dsl_dataset_t *ds; |
789 | 1550 const char *tail; |
1551 int err; | |
1552 | |
2199 | 1553 err = dsl_dir_open(oldname, FTAG, &dd, &tail); |
1544 | 1554 if (err) |
1555 return (err); | |
789 | 1556 if (tail == NULL) { |
2199 | 1557 err = dsl_dir_rename(dd, newname); |
789 | 1558 dsl_dir_close(dd, FTAG); |
1559 return (err); | |
1560 } | |
1561 if (tail[0] != '@') { | |
1562 /* the name ended in a nonexistant component */ | |
1563 dsl_dir_close(dd, FTAG); | |
1564 return (ENOENT); | |
1565 } | |
1566 | |
2199 | 1567 dsl_dir_close(dd, FTAG); |
1568 | |
1569 /* new name must be snapshot in same filesystem */ | |
1570 tail = strchr(newname, '@'); | |
1571 if (tail == NULL) | |
1572 return (EINVAL); | |
1573 tail++; | |
1574 if (strncmp(oldname, newname, tail - newname) != 0) | |
1575 return (EXDEV); | |
789 | 1576 |
2199 | 1577 err = dsl_dataset_open(oldname, |
1578 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); | |
1579 if (err) | |
1580 return (err); | |
1581 | |
1582 err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
1583 dsl_dataset_snapshot_rename_check, | |
1584 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); | |
1585 | |
1586 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); | |
1587 | |
789 | 1588 return (err); |
1589 } | |
2082 | 1590 |
2199 | 1591 struct promotearg { |
1592 uint64_t used, comp, uncomp, unique; | |
1593 uint64_t newnext_obj, snapnames_obj; | |
1594 }; | |
1595 | |
2082 | 1596 static int |
2199 | 1597 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) |
2082 | 1598 { |
2199 | 1599 dsl_dataset_t *hds = arg1; |
1600 struct promotearg *pa = arg2; | |
1601 dsl_dir_t *dd = hds->ds_dir; | |
1602 dsl_pool_t *dp = hds->ds_dir->dd_pool; | |
2082 | 1603 dsl_dir_t *pdd = NULL; |
1604 dsl_dataset_t *ds = NULL; | |
1605 dsl_dataset_t *pivot_ds = NULL; | |
1606 dsl_dataset_t *newnext_ds = NULL; | |
1607 int err; | |
1608 char *name = NULL; | |
2199 | 1609 uint64_t itor = 0; |
2082 | 1610 blkptr_t bp; |
1611 | |
2199 | 1612 bzero(pa, sizeof (*pa)); |
1613 | |
2082 | 1614 /* Check that it is a clone */ |
1615 if (dd->dd_phys->dd_clone_parent_obj == 0) | |
1616 return (EINVAL); | |
1617 | |
2199 | 1618 /* Since this is so expensive, don't do the preliminary check */ |
1619 if (!dmu_tx_is_syncing(tx)) | |
1620 return (0); | |
1621 | |
1622 if (err = dsl_dataset_open_obj(dp, | |
2082 | 1623 dd->dd_phys->dd_clone_parent_obj, |
1624 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) | |
1625 goto out; | |
1626 pdd = pivot_ds->ds_dir; | |
2199 | 1627 |
1628 { | |
1629 dsl_dataset_t *phds; | |
1630 if (err = dsl_dataset_open_obj(dd->dd_pool, | |
1631 pdd->dd_phys->dd_head_dataset_obj, | |
1632 NULL, DS_MODE_NONE, FTAG, &phds)) | |
1633 goto out; | |
1634 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; | |
1635 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); | |
1636 } | |
2082 | 1637 |
1638 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { | |
1639 err = EXDEV; | |
1640 goto out; | |
1641 } | |
1642 | |
1643 /* find pivot point's new next ds */ | |
1644 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, | |
1645 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); | |
1646 while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { | |
1647 dsl_dataset_t *prev; | |
1648 | |
1649 if (err = dsl_dataset_open_obj(dd->dd_pool, | |
2199 | 1650 newnext_ds->ds_phys->ds_prev_snap_obj, |
1651 NULL, DS_MODE_NONE, FTAG, &prev)) | |
2082 | 1652 goto out; |
1653 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); | |
1654 newnext_ds = prev; | |
1655 } | |
2199 | 1656 pa->newnext_obj = newnext_ds->ds_object; |
2082 | 1657 |
1658 /* compute pivot point's new unique space */ | |
1659 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, | |
1660 &itor, &bp)) == 0) { | |
1661 if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) | |
2199 | 1662 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); |
2082 | 1663 } |
1664 if (err != ENOENT) | |
1665 goto out; | |
1666 | |
1667 /* Walk the snapshots that we are moving */ | |
1668 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); | |
1669 ds = pivot_ds; | |
1670 /* CONSTCOND */ | |
1671 while (TRUE) { | |
1672 uint64_t val, dlused, dlcomp, dluncomp; | |
1673 dsl_dataset_t *prev; | |
1674 | |
1675 /* Check that the snapshot name does not conflict */ | |
1676 dsl_dataset_name(ds, name); | |
1677 err = zap_lookup(dd->dd_pool->dp_meta_objset, | |
1678 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, | |
1679 8, 1, &val); | |
1680 if (err != ENOENT) { | |
1681 if (err == 0) | |
1682 err = EEXIST; | |
1683 goto out; | |
1684 } | |
1685 | |
1686 /* | |
1687 * compute space to transfer. Each snapshot gave birth to: | |
1688 * (my used) - (prev's used) + (deadlist's used) | |
1689 */ | |
2199 | 1690 pa->used += ds->ds_phys->ds_used_bytes; |
1691 pa->comp += ds->ds_phys->ds_compressed_bytes; | |
1692 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; | |
2082 | 1693 |
1694 /* If we reach the first snapshot, we're done. */ | |
1695 if (ds->ds_phys->ds_prev_snap_obj == 0) | |
1696 break; | |
1697 | |
1698 if (err = bplist_space(&ds->ds_deadlist, | |
1699 &dlused, &dlcomp, &dluncomp)) | |
1700 goto out; | |
1701 if (err = dsl_dataset_open_obj(dd->dd_pool, | |
1702 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, | |
1703 FTAG, &prev)) | |
1704 goto out; | |
2199 | 1705 pa->used += dlused - prev->ds_phys->ds_used_bytes; |
1706 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; | |
1707 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; | |
2082 | 1708 |
1709 /* | |
1710 * We could be a clone of a clone. If we reach our | |
1711 * parent's branch point, we're done. | |
1712 */ | |
1713 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { | |
1714 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); | |
1715 break; | |
1716 } | |
1717 if (ds != pivot_ds) | |
1718 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
1719 ds = prev; | |
1720 } | |
1721 | |
1722 /* Check that there is enough space here */ | |
2199 | 1723 err = dsl_dir_transfer_possible(pdd, dd, pa->used); |
1724 | |
1725 out: | |
1726 if (ds && ds != pivot_ds) | |
1727 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
1728 if (pivot_ds) | |
1729 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); | |
1730 if (newnext_ds) | |
1731 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); | |
1732 if (name) | |
1733 kmem_free(name, MAXPATHLEN); | |
1734 return (err); | |
1735 } | |
2082 | 1736 |
2199 | 1737 static void |
1738 dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
1739 { | |
1740 dsl_dataset_t *hds = arg1; | |
1741 struct promotearg *pa = arg2; | |
1742 dsl_dir_t *dd = hds->ds_dir; | |
1743 dsl_pool_t *dp = hds->ds_dir->dd_pool; | |
1744 dsl_dir_t *pdd = NULL; | |
1745 dsl_dataset_t *ds, *pivot_ds; | |
1746 char *name; | |
1747 | |
1748 ASSERT(dd->dd_phys->dd_clone_parent_obj != 0); | |
1749 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); | |
1750 | |
1751 VERIFY(0 == dsl_dataset_open_obj(dp, | |
1752 dd->dd_phys->dd_clone_parent_obj, | |
1753 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)); | |
2417 | 1754 /* |
1755 * We need to explicitly open pdd, since pivot_ds's pdd will be | |
1756 * changing. | |
1757 */ | |
1758 VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object, | |
1759 NULL, FTAG, &pdd)); | |
2082 | 1760 |
1761 /* move snapshots to this dir */ | |
2199 | 1762 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); |
2082 | 1763 ds = pivot_ds; |
1764 /* CONSTCOND */ | |
1765 while (TRUE) { | |
1766 dsl_dataset_t *prev; | |
1767 | |
1768 /* move snap name entry */ | |
1769 dsl_dataset_name(ds, name); | |
2199 | 1770 VERIFY(0 == zap_remove(dp->dp_meta_objset, |
1771 pa->snapnames_obj, ds->ds_snapname, tx)); | |
1772 VERIFY(0 == zap_add(dp->dp_meta_objset, | |
2082 | 1773 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, |
1774 8, 1, &ds->ds_object, tx)); | |
1775 | |
1776 /* change containing dsl_dir */ | |
1777 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1778 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); | |
1779 ds->ds_phys->ds_dir_obj = dd->dd_object; | |
1780 ASSERT3P(ds->ds_dir, ==, pdd); | |
1781 dsl_dir_close(ds->ds_dir, ds); | |
2199 | 1782 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, |
2082 | 1783 NULL, ds, &ds->ds_dir)); |
1784 | |
1785 ASSERT3U(dsl_prop_numcb(ds), ==, 0); | |
1786 | |
1787 if (ds->ds_phys->ds_prev_snap_obj == 0) | |
1788 break; | |
1789 | |
2199 | 1790 VERIFY(0 == dsl_dataset_open_obj(dp, |
2082 | 1791 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, |
1792 FTAG, &prev)); | |
1793 | |
1794 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { | |
1795 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); | |
1796 break; | |
1797 } | |
1798 if (ds != pivot_ds) | |
1799 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
1800 ds = prev; | |
1801 } | |
2199 | 1802 if (ds != pivot_ds) |
1803 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
2082 | 1804 |
1805 /* change pivot point's next snap */ | |
1806 dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); | |
2199 | 1807 pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; |
2082 | 1808 |
1809 /* change clone_parent-age */ | |
1810 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
1811 ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); | |
1812 dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; | |
1813 dmu_buf_will_dirty(pdd->dd_dbuf, tx); | |
1814 pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; | |
1815 | |
1816 /* change space accounting */ | |
2199 | 1817 dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx); |
1818 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); | |
1819 pivot_ds->ds_phys->ds_unique_bytes = pa->unique; | |
2082 | 1820 |
2417 | 1821 dsl_dir_close(pdd, FTAG); |
2199 | 1822 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); |
1823 kmem_free(name, MAXPATHLEN); | |
2082 | 1824 } |
1825 | |
1826 int | |
1827 dsl_dataset_promote(const char *name) | |
1828 { | |
1829 dsl_dataset_t *ds; | |
1830 int err; | |
1831 dmu_object_info_t doi; | |
2199 | 1832 struct promotearg pa; |
2082 | 1833 |
1834 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); | |
1835 if (err) | |
1836 return (err); | |
1837 | |
1838 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, | |
1839 ds->ds_phys->ds_snapnames_zapobj, &doi); | |
1840 if (err) { | |
1841 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
1842 return (err); | |
1843 } | |
1844 | |
1845 /* | |
1846 * Add in 128x the snapnames zapobj size, since we will be moving | |
1847 * a bunch of snapnames to the promoted ds, and dirtying their | |
1848 * bonus buffers. | |
1849 */ | |
2199 | 1850 err = dsl_sync_task_do(ds->ds_dir->dd_pool, |
1851 dsl_dataset_promote_check, | |
1852 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); | |
2082 | 1853 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); |
1854 return (err); | |
1855 } |