Mercurial > illumos > illumos-gate
annotate usr/src/uts/common/fs/zfs/dsl_dataset.c @ 2532:752725c22841
6448999 panic: used == ds->ds_phys->ds_unique_bytes
6458781 random spurious ENOSPC failures
author | ahrens |
---|---|
date | Thu, 10 Aug 2006 14:05:42 -0700 |
parents | 694d5de97348 |
children | 6f4d5ee1906a |
rev | line source |
---|---|
789 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
1544 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
789 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
21 /* | |
1544 | 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 23 * Use is subject to license terms. |
24 */ | |
25 | |
26 #pragma ident "%Z%%M% %I% %E% SMI" | |
27 | |
28 #include <sys/dmu_objset.h> | |
29 #include <sys/dsl_dataset.h> | |
30 #include <sys/dsl_dir.h> | |
2082 | 31 #include <sys/dsl_prop.h> |
2199 | 32 #include <sys/dsl_synctask.h> |
789 | 33 #include <sys/dmu_traverse.h> |
34 #include <sys/dmu_tx.h> | |
35 #include <sys/arc.h> | |
36 #include <sys/zio.h> | |
37 #include <sys/zap.h> | |
38 #include <sys/unique.h> | |
39 #include <sys/zfs_context.h> | |
40 | |
2199 | 41 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; |
42 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; | |
43 static dsl_checkfunc_t dsl_dataset_rollback_check; | |
44 static dsl_syncfunc_t dsl_dataset_rollback_sync; | |
45 static dsl_checkfunc_t dsl_dataset_destroy_check; | |
46 static dsl_syncfunc_t dsl_dataset_destroy_sync; | |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
47 |
789 | 48 #define DOS_REF_MAX (1ULL << 62) |
49 | |
50 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE | |
51 | |
52 /* | |
53 * We use weighted reference counts to express the various forms of exclusion | |
54 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open | |
55 * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. | |
56 * This makes the exclusion logic simple: the total refcnt for all opens cannot | |
57 * exceed DOS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their | |
58 * weight (DOS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume | |
59 * just over half of the refcnt space, so there can't be more than one, but it | |
60 * can peacefully coexist with any number of STANDARD opens. | |
61 */ | |
62 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { | |
63 0, /* DOS_MODE_NONE - invalid */ | |
64 1, /* DOS_MODE_STANDARD - unlimited number */ | |
65 (DOS_REF_MAX >> 1) + 1, /* DOS_MODE_PRIMARY - only one of these */ | |
66 DOS_REF_MAX /* DOS_MODE_EXCLUSIVE - no other opens */ | |
67 }; | |
68 | |
69 | |
70 void | |
71 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
72 { | |
2082 | 73 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); |
789 | 74 int compressed = BP_GET_PSIZE(bp); |
75 int uncompressed = BP_GET_UCSIZE(bp); | |
76 | |
77 dprintf_bp(bp, "born, ds=%p\n", ds); | |
78 | |
79 ASSERT(dmu_tx_is_syncing(tx)); | |
80 /* It could have been compressed away to nothing */ | |
81 if (BP_IS_HOLE(bp)) | |
82 return; | |
83 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); | |
84 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); | |
85 if (ds == NULL) { | |
86 /* | |
87 * Account for the meta-objset space in its placeholder | |
88 * dsl_dir. | |
89 */ | |
90 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ | |
91 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, | |
92 used, compressed, uncompressed, tx); | |
93 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); | |
94 return; | |
95 } | |
96 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
97 mutex_enter(&ds->ds_lock); | |
98 ds->ds_phys->ds_used_bytes += used; | |
99 ds->ds_phys->ds_compressed_bytes += compressed; | |
100 ds->ds_phys->ds_uncompressed_bytes += uncompressed; | |
101 ds->ds_phys->ds_unique_bytes += used; | |
102 mutex_exit(&ds->ds_lock); | |
103 dsl_dir_diduse_space(ds->ds_dir, | |
104 used, compressed, uncompressed, tx); | |
105 } | |
106 | |
107 void | |
108 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
109 { | |
2082 | 110 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); |
789 | 111 int compressed = BP_GET_PSIZE(bp); |
112 int uncompressed = BP_GET_UCSIZE(bp); | |
113 | |
114 ASSERT(dmu_tx_is_syncing(tx)); | |
115 if (BP_IS_HOLE(bp)) | |
116 return; | |
117 | |
118 ASSERT(used > 0); | |
119 if (ds == NULL) { | |
120 /* | |
121 * Account for the meta-objset space in its placeholder | |
122 * dataset. | |
123 */ | |
124 /* XXX this can fail, what do we do when it does? */ | |
125 (void) arc_free(NULL, tx->tx_pool->dp_spa, | |
126 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); | |
127 bzero(bp, sizeof (blkptr_t)); | |
128 | |
129 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, | |
130 -used, -compressed, -uncompressed, tx); | |
131 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); | |
132 return; | |
133 } | |
134 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); | |
135 | |
136 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
137 | |
138 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { | |
139 dprintf_bp(bp, "freeing: %s", ""); | |
140 /* XXX check return code? */ | |
141 (void) arc_free(NULL, tx->tx_pool->dp_spa, | |
142 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); | |
143 | |
144 mutex_enter(&ds->ds_lock); | |
145 /* XXX unique_bytes is not accurate for head datasets */ | |
146 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ | |
147 ds->ds_phys->ds_unique_bytes -= used; | |
148 mutex_exit(&ds->ds_lock); | |
149 dsl_dir_diduse_space(ds->ds_dir, | |
150 -used, -compressed, -uncompressed, tx); | |
151 } else { | |
152 dprintf_bp(bp, "putting on dead list: %s", ""); | |
1544 | 153 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); |
789 | 154 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ |
155 if (ds->ds_phys->ds_prev_snap_obj != 0) { | |
156 ASSERT3U(ds->ds_prev->ds_object, ==, | |
157 ds->ds_phys->ds_prev_snap_obj); | |
158 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); | |
159 if (ds->ds_prev->ds_phys->ds_next_snap_obj == | |
2082 | 160 ds->ds_object && bp->blk_birth > |
789 | 161 ds->ds_prev->ds_phys->ds_prev_snap_txg) { |
162 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); | |
163 mutex_enter(&ds->ds_prev->ds_lock); | |
164 ds->ds_prev->ds_phys->ds_unique_bytes += | |
165 used; | |
166 mutex_exit(&ds->ds_prev->ds_lock); | |
167 } | |
168 } | |
169 } | |
170 bzero(bp, sizeof (blkptr_t)); | |
171 mutex_enter(&ds->ds_lock); | |
172 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); | |
173 ds->ds_phys->ds_used_bytes -= used; | |
174 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); | |
175 ds->ds_phys->ds_compressed_bytes -= compressed; | |
176 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); | |
177 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; | |
178 mutex_exit(&ds->ds_lock); | |
179 } | |
180 | |
1544 | 181 uint64_t |
182 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) | |
789 | 183 { |
184 if (ds == NULL) | |
1544 | 185 return (0); |
789 | 186 /* |
187 * The snapshot creation could fail, but that would cause an | |
188 * incorrect FALSE return, which would only result in an | |
189 * overestimation of the amount of space that an operation would | |
190 * consume, which is OK. | |
191 * | |
192 * There's also a small window where we could miss a pending | |
193 * snapshot, because we could set the sync task in the quiescing | |
194 * phase. So this should only be used as a guess. | |
195 */ | |
2199 | 196 return (MAX(ds->ds_phys->ds_prev_snap_txg, ds->ds_trysnap_txg)); |
1544 | 197 } |
198 | |
199 int | |
200 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) | |
201 { | |
202 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); | |
789 | 203 } |
204 | |
205 /* ARGSUSED */ | |
206 static void | |
207 dsl_dataset_evict(dmu_buf_t *db, void *dsv) | |
208 { | |
209 dsl_dataset_t *ds = dsv; | |
210 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
211 | |
212 /* open_refcount == DOS_REF_MAX when deleting */ | |
213 ASSERT(ds->ds_open_refcount == 0 || | |
214 ds->ds_open_refcount == DOS_REF_MAX); | |
215 | |
216 dprintf_ds(ds, "evicting %s\n", ""); | |
217 | |
218 unique_remove(ds->ds_phys->ds_fsid_guid); | |
219 | |
220 if (ds->ds_user_ptr != NULL) | |
221 ds->ds_user_evict_func(ds, ds->ds_user_ptr); | |
222 | |
223 if (ds->ds_prev) { | |
224 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); | |
225 ds->ds_prev = NULL; | |
226 } | |
227 | |
228 bplist_close(&ds->ds_deadlist); | |
229 dsl_dir_close(ds->ds_dir, ds); | |
230 | |
231 if (list_link_active(&ds->ds_synced_link)) | |
232 list_remove(&dp->dp_synced_objsets, ds); | |
233 | |
234 kmem_free(ds, sizeof (dsl_dataset_t)); | |
235 } | |
236 | |
1544 | 237 static int |
789 | 238 dsl_dataset_get_snapname(dsl_dataset_t *ds) |
239 { | |
240 dsl_dataset_phys_t *headphys; | |
241 int err; | |
242 dmu_buf_t *headdbuf; | |
243 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
244 objset_t *mos = dp->dp_meta_objset; | |
245 | |
246 if (ds->ds_snapname[0]) | |
1544 | 247 return (0); |
789 | 248 if (ds->ds_phys->ds_next_snap_obj == 0) |
1544 | 249 return (0); |
789 | 250 |
1544 | 251 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, |
252 FTAG, &headdbuf); | |
253 if (err) | |
254 return (err); | |
789 | 255 headphys = headdbuf->db_data; |
256 err = zap_value_search(dp->dp_meta_objset, | |
257 headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname); | |
1544 | 258 dmu_buf_rele(headdbuf, FTAG); |
259 return (err); | |
789 | 260 } |
261 | |
1544 | 262 int |
789 | 263 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, |
1544 | 264 int mode, void *tag, dsl_dataset_t **dsp) |
789 | 265 { |
266 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; | |
267 objset_t *mos = dp->dp_meta_objset; | |
268 dmu_buf_t *dbuf; | |
269 dsl_dataset_t *ds; | |
1544 | 270 int err; |
789 | 271 |
272 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || | |
273 dsl_pool_sync_context(dp)); | |
274 | |
1544 | 275 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); |
276 if (err) | |
277 return (err); | |
789 | 278 ds = dmu_buf_get_user(dbuf); |
279 if (ds == NULL) { | |
280 dsl_dataset_t *winner; | |
281 | |
282 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); | |
283 ds->ds_dbuf = dbuf; | |
284 ds->ds_object = dsobj; | |
285 ds->ds_phys = dbuf->db_data; | |
286 | |
1544 | 287 err = bplist_open(&ds->ds_deadlist, |
789 | 288 mos, ds->ds_phys->ds_deadlist_obj); |
1544 | 289 if (err == 0) { |
290 err = dsl_dir_open_obj(dp, | |
291 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); | |
292 } | |
293 if (err) { | |
294 /* | |
295 * we don't really need to close the blist if we | |
296 * just opened it. | |
297 */ | |
298 kmem_free(ds, sizeof (dsl_dataset_t)); | |
299 dmu_buf_rele(dbuf, tag); | |
300 return (err); | |
301 } | |
789 | 302 |
303 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { | |
304 ds->ds_snapname[0] = '\0'; | |
305 if (ds->ds_phys->ds_prev_snap_obj) { | |
1544 | 306 err = dsl_dataset_open_obj(dp, |
789 | 307 ds->ds_phys->ds_prev_snap_obj, NULL, |
1544 | 308 DS_MODE_NONE, ds, &ds->ds_prev); |
789 | 309 } |
310 } else { | |
311 if (snapname) { | |
312 #ifdef ZFS_DEBUG | |
313 dsl_dataset_phys_t *headphys; | |
1544 | 314 dmu_buf_t *headdbuf; |
315 err = dmu_bonus_hold(mos, | |
316 ds->ds_dir->dd_phys->dd_head_dataset_obj, | |
317 FTAG, &headdbuf); | |
318 if (err == 0) { | |
319 headphys = headdbuf->db_data; | |
320 uint64_t foundobj; | |
321 err = zap_lookup(dp->dp_meta_objset, | |
322 headphys->ds_snapnames_zapobj, | |
323 snapname, sizeof (foundobj), 1, | |
324 &foundobj); | |
325 ASSERT3U(foundobj, ==, dsobj); | |
326 dmu_buf_rele(headdbuf, FTAG); | |
327 } | |
789 | 328 #endif |
329 (void) strcat(ds->ds_snapname, snapname); | |
330 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { | |
1544 | 331 err = dsl_dataset_get_snapname(ds); |
789 | 332 } |
333 } | |
334 | |
1544 | 335 if (err == 0) { |
336 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, | |
337 dsl_dataset_evict); | |
338 } | |
339 if (err || winner) { | |
789 | 340 bplist_close(&ds->ds_deadlist); |
341 if (ds->ds_prev) { | |
342 dsl_dataset_close(ds->ds_prev, | |
343 DS_MODE_NONE, ds); | |
344 } | |
345 dsl_dir_close(ds->ds_dir, ds); | |
346 kmem_free(ds, sizeof (dsl_dataset_t)); | |
1544 | 347 if (err) { |
348 dmu_buf_rele(dbuf, tag); | |
349 return (err); | |
350 } | |
789 | 351 ds = winner; |
352 } else { | |
353 uint64_t new = | |
354 unique_insert(ds->ds_phys->ds_fsid_guid); | |
355 if (new != ds->ds_phys->ds_fsid_guid) { | |
356 /* XXX it won't necessarily be synced... */ | |
357 ds->ds_phys->ds_fsid_guid = new; | |
358 } | |
359 } | |
360 } | |
361 ASSERT3P(ds->ds_dbuf, ==, dbuf); | |
362 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); | |
363 | |
364 mutex_enter(&ds->ds_lock); | |
365 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && | |
2082 | 366 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && |
367 !DS_MODE_IS_INCONSISTENT(mode)) || | |
789 | 368 (ds->ds_open_refcount + weight > DOS_REF_MAX)) { |
369 mutex_exit(&ds->ds_lock); | |
370 dsl_dataset_close(ds, DS_MODE_NONE, tag); | |
1544 | 371 return (EBUSY); |
789 | 372 } |
373 ds->ds_open_refcount += weight; | |
374 mutex_exit(&ds->ds_lock); | |
375 | |
1544 | 376 *dsp = ds; |
377 return (0); | |
789 | 378 } |
379 | |
380 int | |
381 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, | |
382 void *tag, dsl_dataset_t **dsp) | |
383 { | |
384 dsl_dir_t *dd; | |
385 dsl_pool_t *dp; | |
386 const char *tail; | |
387 uint64_t obj; | |
388 dsl_dataset_t *ds = NULL; | |
389 int err = 0; | |
390 | |
1544 | 391 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); |
392 if (err) | |
393 return (err); | |
789 | 394 |
395 dp = dd->dd_pool; | |
396 obj = dd->dd_phys->dd_head_dataset_obj; | |
397 rw_enter(&dp->dp_config_rwlock, RW_READER); | |
398 if (obj == 0) { | |
399 /* A dataset with no associated objset */ | |
400 err = ENOENT; | |
401 goto out; | |
402 } | |
403 | |
404 if (tail != NULL) { | |
405 objset_t *mos = dp->dp_meta_objset; | |
406 | |
1544 | 407 err = dsl_dataset_open_obj(dp, obj, NULL, |
408 DS_MODE_NONE, tag, &ds); | |
409 if (err) | |
410 goto out; | |
789 | 411 obj = ds->ds_phys->ds_snapnames_zapobj; |
412 dsl_dataset_close(ds, DS_MODE_NONE, tag); | |
413 ds = NULL; | |
414 | |
415 if (tail[0] != '@') { | |
416 err = ENOENT; | |
417 goto out; | |
418 } | |
419 tail++; | |
420 | |
421 /* Look for a snapshot */ | |
422 if (!DS_MODE_IS_READONLY(mode)) { | |
423 err = EROFS; | |
424 goto out; | |
425 } | |
426 dprintf("looking for snapshot '%s'\n", tail); | |
427 err = zap_lookup(mos, obj, tail, 8, 1, &obj); | |
428 if (err) | |
429 goto out; | |
430 } | |
1544 | 431 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); |
789 | 432 |
433 out: | |
434 rw_exit(&dp->dp_config_rwlock); | |
435 dsl_dir_close(dd, FTAG); | |
436 | |
437 ASSERT3U((err == 0), ==, (ds != NULL)); | |
438 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ | |
439 | |
440 *dsp = ds; | |
441 return (err); | |
442 } | |
443 | |
444 int | |
445 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) | |
446 { | |
447 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); | |
448 } | |
449 | |
450 void | |
451 dsl_dataset_name(dsl_dataset_t *ds, char *name) | |
452 { | |
453 if (ds == NULL) { | |
454 (void) strcpy(name, "mos"); | |
455 } else { | |
456 dsl_dir_name(ds->ds_dir, name); | |
1544 | 457 VERIFY(0 == dsl_dataset_get_snapname(ds)); |
789 | 458 if (ds->ds_snapname[0]) { |
459 (void) strcat(name, "@"); | |
460 if (!MUTEX_HELD(&ds->ds_lock)) { | |
461 /* | |
462 * We use a "recursive" mutex so that we | |
463 * can call dprintf_ds() with ds_lock held. | |
464 */ | |
465 mutex_enter(&ds->ds_lock); | |
466 (void) strcat(name, ds->ds_snapname); | |
467 mutex_exit(&ds->ds_lock); | |
468 } else { | |
469 (void) strcat(name, ds->ds_snapname); | |
470 } | |
471 } | |
472 } | |
473 } | |
474 | |
475 void | |
476 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) | |
477 { | |
478 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; | |
479 mutex_enter(&ds->ds_lock); | |
480 ASSERT3U(ds->ds_open_refcount, >=, weight); | |
481 ds->ds_open_refcount -= weight; | |
482 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", | |
483 mode, ds->ds_open_refcount); | |
484 mutex_exit(&ds->ds_lock); | |
485 | |
1544 | 486 dmu_buf_rele(ds->ds_dbuf, tag); |
789 | 487 } |
488 | |
489 void | |
490 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) | |
491 { | |
492 objset_t *mos = dp->dp_meta_objset; | |
493 dmu_buf_t *dbuf; | |
494 dsl_dataset_phys_t *dsphys; | |
495 dsl_dataset_t *ds; | |
496 uint64_t dsobj; | |
497 dsl_dir_t *dd; | |
498 | |
499 dsl_dir_create_root(mos, ddobjp, tx); | |
1544 | 500 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); |
789 | 501 |
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
502 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
503 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); |
1544 | 504 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); |
789 | 505 dmu_buf_will_dirty(dbuf, tx); |
506 dsphys = dbuf->db_data; | |
507 dsphys->ds_dir_obj = dd->dd_object; | |
508 dsphys->ds_fsid_guid = unique_create(); | |
1544 | 509 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ |
789 | 510 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, |
511 sizeof (dsphys->ds_guid)); | |
512 dsphys->ds_snapnames_zapobj = | |
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
513 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); |
789 | 514 dsphys->ds_creation_time = gethrestime_sec(); |
515 dsphys->ds_creation_txg = tx->tx_txg; | |
516 dsphys->ds_deadlist_obj = | |
517 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1544 | 518 dmu_buf_rele(dbuf, FTAG); |
789 | 519 |
520 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
521 dd->dd_phys->dd_head_dataset_obj = dsobj; | |
522 dsl_dir_close(dd, FTAG); | |
523 | |
1544 | 524 VERIFY(0 == |
525 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); | |
789 | 526 (void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx); |
527 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
528 } | |
529 | |
2199 | 530 uint64_t |
531 dsl_dataset_create_sync(dsl_dir_t *pdd, | |
789 | 532 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) |
533 { | |
2199 | 534 dsl_pool_t *dp = pdd->dd_pool; |
789 | 535 dmu_buf_t *dbuf; |
536 dsl_dataset_phys_t *dsphys; | |
2199 | 537 uint64_t dsobj, ddobj; |
789 | 538 objset_t *mos = dp->dp_meta_objset; |
539 dsl_dir_t *dd; | |
540 | |
2199 | 541 ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp); |
542 ASSERT(clone_parent == NULL || | |
543 clone_parent->ds_phys->ds_num_children > 0); | |
789 | 544 ASSERT(lastname[0] != '@'); |
545 ASSERT(dmu_tx_is_syncing(tx)); | |
546 | |
2199 | 547 ddobj = dsl_dir_create_sync(pdd, lastname, tx); |
548 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); | |
789 | 549 |
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
550 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
551 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); |
1544 | 552 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); |
789 | 553 dmu_buf_will_dirty(dbuf, tx); |
554 dsphys = dbuf->db_data; | |
555 dsphys->ds_dir_obj = dd->dd_object; | |
556 dsphys->ds_fsid_guid = unique_create(); | |
557 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ | |
558 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, | |
559 sizeof (dsphys->ds_guid)); | |
560 dsphys->ds_snapnames_zapobj = | |
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
561 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); |
789 | 562 dsphys->ds_creation_time = gethrestime_sec(); |
563 dsphys->ds_creation_txg = tx->tx_txg; | |
564 dsphys->ds_deadlist_obj = | |
565 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
566 if (clone_parent) { | |
567 dsphys->ds_prev_snap_obj = clone_parent->ds_object; | |
568 dsphys->ds_prev_snap_txg = | |
569 clone_parent->ds_phys->ds_creation_txg; | |
570 dsphys->ds_used_bytes = | |
571 clone_parent->ds_phys->ds_used_bytes; | |
572 dsphys->ds_compressed_bytes = | |
573 clone_parent->ds_phys->ds_compressed_bytes; | |
574 dsphys->ds_uncompressed_bytes = | |
575 clone_parent->ds_phys->ds_uncompressed_bytes; | |
576 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; | |
577 | |
578 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); | |
579 clone_parent->ds_phys->ds_num_children++; | |
580 | |
581 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
582 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; | |
583 } | |
1544 | 584 dmu_buf_rele(dbuf, FTAG); |
789 | 585 |
586 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
587 dd->dd_phys->dd_head_dataset_obj = dsobj; | |
588 dsl_dir_close(dd, FTAG); | |
589 | |
2199 | 590 return (dsobj); |
591 } | |
592 | |
593 struct destroyarg { | |
594 dsl_sync_task_group_t *dstg; | |
595 char *snapname; | |
596 void *tag; | |
597 char *failed; | |
598 }; | |
599 | |
600 static int | |
601 dsl_snapshot_destroy_one(char *name, void *arg) | |
602 { | |
603 struct destroyarg *da = arg; | |
604 dsl_dataset_t *ds; | |
605 char *cp; | |
606 int err; | |
607 | |
608 (void) strcat(name, "@"); | |
609 (void) strcat(name, da->snapname); | |
610 err = dsl_dataset_open(name, | |
611 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, | |
612 da->tag, &ds); | |
613 cp = strchr(name, '@'); | |
614 *cp = '\0'; | |
615 if (err == ENOENT) | |
616 return (0); | |
617 if (err) { | |
618 (void) strcpy(da->failed, name); | |
619 return (err); | |
620 } | |
621 | |
622 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, | |
623 dsl_dataset_destroy_sync, ds, da->tag, 0); | |
789 | 624 return (0); |
625 } | |
626 | |
2199 | 627 /* |
628 * Destroy 'snapname' in all descendants of 'fsname'. | |
629 */ | |
630 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy | |
631 int | |
632 dsl_snapshots_destroy(char *fsname, char *snapname) | |
633 { | |
634 int err; | |
635 struct destroyarg da; | |
636 dsl_sync_task_t *dst; | |
637 spa_t *spa; | |
638 char *cp; | |
639 | |
640 cp = strchr(fsname, '/'); | |
641 if (cp) { | |
642 *cp = '\0'; | |
643 err = spa_open(fsname, &spa, FTAG); | |
644 *cp = '/'; | |
645 } else { | |
646 err = spa_open(fsname, &spa, FTAG); | |
647 } | |
648 if (err) | |
649 return (err); | |
650 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); | |
651 da.snapname = snapname; | |
652 da.tag = FTAG; | |
653 da.failed = fsname; | |
654 | |
655 err = dmu_objset_find(fsname, | |
2417 | 656 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); |
2199 | 657 |
658 if (err == 0) | |
659 err = dsl_sync_task_group_wait(da.dstg); | |
660 | |
661 for (dst = list_head(&da.dstg->dstg_tasks); dst; | |
662 dst = list_next(&da.dstg->dstg_tasks, dst)) { | |
663 dsl_dataset_t *ds = dst->dst_arg1; | |
664 if (dst->dst_err) { | |
665 dsl_dataset_name(ds, fsname); | |
666 cp = strchr(fsname, '@'); | |
667 *cp = '\0'; | |
668 } | |
669 /* | |
670 * If it was successful, destroy_sync would have | |
671 * closed the ds | |
672 */ | |
673 if (err) | |
674 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
675 } | |
676 | |
677 dsl_sync_task_group_destroy(da.dstg); | |
678 spa_close(spa, FTAG); | |
679 return (err); | |
680 } | |
681 | |
789 | 682 int |
683 dsl_dataset_destroy(const char *name) | |
684 { | |
685 int err; | |
2199 | 686 dsl_sync_task_group_t *dstg; |
687 objset_t *os; | |
688 dsl_dataset_t *ds; | |
789 | 689 dsl_dir_t *dd; |
2199 | 690 uint64_t obj; |
691 | |
692 if (strchr(name, '@')) { | |
693 /* Destroying a snapshot is simpler */ | |
694 err = dsl_dataset_open(name, | |
695 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, | |
696 FTAG, &ds); | |
697 if (err) | |
698 return (err); | |
699 err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
700 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, | |
701 ds, FTAG, 0); | |
702 if (err) | |
703 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
704 return (err); | |
705 } | |
706 | |
707 err = dmu_objset_open(name, DMU_OST_ANY, | |
708 DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); | |
709 if (err) | |
710 return (err); | |
711 ds = os->os->os_dsl_dataset; | |
712 dd = ds->ds_dir; | |
789 | 713 |
2199 | 714 /* |
715 * Check for errors and mark this ds as inconsistent, in | |
716 * case we crash while freeing the objects. | |
717 */ | |
718 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, | |
719 dsl_dataset_destroy_begin_sync, ds, NULL, 0); | |
720 if (err) { | |
721 dmu_objset_close(os); | |
722 return (err); | |
723 } | |
724 | |
725 /* | |
726 * remove the objects in open context, so that we won't | |
727 * have too much to do in syncing context. | |
728 */ | |
729 for (obj = 0; err == 0; | |
730 err = dmu_object_next(os, &obj, FALSE)) { | |
731 dmu_tx_t *tx = dmu_tx_create(os); | |
732 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); | |
733 dmu_tx_hold_bonus(tx, obj); | |
734 err = dmu_tx_assign(tx, TXG_WAIT); | |
735 if (err) { | |
736 /* | |
737 * Perhaps there is not enough disk | |
738 * space. Just deal with it from | |
739 * dsl_dataset_destroy_sync(). | |
740 */ | |
741 dmu_tx_abort(tx); | |
742 continue; | |
743 } | |
744 VERIFY(0 == dmu_object_free(os, obj, tx)); | |
745 dmu_tx_commit(tx); | |
746 } | |
747 /* Make sure it's not dirty before we finish destroying it. */ | |
748 txg_wait_synced(dd->dd_pool, 0); | |
749 | |
750 dmu_objset_close(os); | |
751 if (err != ESRCH) | |
752 return (err); | |
753 | |
754 err = dsl_dataset_open(name, | |
755 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, | |
756 FTAG, &ds); | |
1544 | 757 if (err) |
758 return (err); | |
789 | 759 |
2199 | 760 err = dsl_dir_open(name, FTAG, &dd, NULL); |
761 if (err) { | |
762 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
763 return (err); | |
789 | 764 } |
765 | |
2199 | 766 /* |
767 * Blow away the dsl_dir + head dataset. | |
768 */ | |
769 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); | |
770 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, | |
771 dsl_dataset_destroy_sync, ds, FTAG, 0); | |
772 dsl_sync_task_create(dstg, dsl_dir_destroy_check, | |
773 dsl_dir_destroy_sync, dd, FTAG, 0); | |
774 err = dsl_sync_task_group_wait(dstg); | |
775 dsl_sync_task_group_destroy(dstg); | |
776 /* if it is successful, *destroy_sync will close the ds+dd */ | |
777 if (err) { | |
778 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
779 dsl_dir_close(dd, FTAG); | |
780 } | |
789 | 781 return (err); |
782 } | |
783 | |
784 int | |
2199 | 785 dsl_dataset_rollback(dsl_dataset_t *ds) |
789 | 786 { |
2199 | 787 ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); |
788 return (dsl_sync_task_do(ds->ds_dir->dd_pool, | |
789 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, | |
790 ds, NULL, 0)); | |
789 | 791 } |
792 | |
793 void * | |
794 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, | |
795 void *p, dsl_dataset_evict_func_t func) | |
796 { | |
797 void *old; | |
798 | |
799 mutex_enter(&ds->ds_lock); | |
800 old = ds->ds_user_ptr; | |
801 if (old == NULL) { | |
802 ds->ds_user_ptr = p; | |
803 ds->ds_user_evict_func = func; | |
804 } | |
805 mutex_exit(&ds->ds_lock); | |
806 return (old); | |
807 } | |
808 | |
809 void * | |
810 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) | |
811 { | |
812 return (ds->ds_user_ptr); | |
813 } | |
814 | |
815 | |
816 void | |
817 dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp) | |
818 { | |
819 *bp = ds->ds_phys->ds_bp; | |
820 } | |
821 | |
822 void | |
823 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
824 { | |
825 ASSERT(dmu_tx_is_syncing(tx)); | |
826 /* If it's the meta-objset, set dp_meta_rootbp */ | |
827 if (ds == NULL) { | |
828 tx->tx_pool->dp_meta_rootbp = *bp; | |
829 } else { | |
830 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
831 ds->ds_phys->ds_bp = *bp; | |
832 } | |
833 } | |
834 | |
835 spa_t * | |
836 dsl_dataset_get_spa(dsl_dataset_t *ds) | |
837 { | |
838 return (ds->ds_dir->dd_pool->dp_spa); | |
839 } | |
840 | |
841 void | |
842 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) | |
843 { | |
844 dsl_pool_t *dp; | |
845 | |
846 if (ds == NULL) /* this is the meta-objset */ | |
847 return; | |
848 | |
849 ASSERT(ds->ds_user_ptr != NULL); | |
850 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); | |
851 | |
852 dp = ds->ds_dir->dd_pool; | |
853 | |
854 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { | |
855 /* up the hold count until we can be written out */ | |
856 dmu_buf_add_ref(ds->ds_dbuf, ds); | |
857 } | |
858 } | |
859 | |
860 struct killarg { | |
861 uint64_t *usedp; | |
862 uint64_t *compressedp; | |
863 uint64_t *uncompressedp; | |
864 zio_t *zio; | |
865 dmu_tx_t *tx; | |
866 }; | |
867 | |
868 static int | |
869 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) | |
870 { | |
871 struct killarg *ka = arg; | |
872 blkptr_t *bp = &bc->bc_blkptr; | |
873 | |
874 ASSERT3U(bc->bc_errno, ==, 0); | |
875 | |
876 /* | |
877 * Since this callback is not called concurrently, no lock is | |
878 * needed on the accounting values. | |
879 */ | |
2082 | 880 *ka->usedp += bp_get_dasize(spa, bp); |
789 | 881 *ka->compressedp += BP_GET_PSIZE(bp); |
882 *ka->uncompressedp += BP_GET_UCSIZE(bp); | |
883 /* XXX check for EIO? */ | |
884 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, | |
885 ARC_NOWAIT); | |
886 return (0); | |
887 } | |
888 | |
889 /* ARGSUSED */ | |
2199 | 890 static int |
891 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
789 | 892 { |
2199 | 893 dsl_dataset_t *ds = arg1; |
789 | 894 |
2199 | 895 /* |
896 * There must be a previous snapshot. I suppose we could roll | |
897 * it back to being empty (and re-initialize the upper (ZPL) | |
898 * layer). But for now there's no way to do this via the user | |
899 * interface. | |
900 */ | |
901 if (ds->ds_phys->ds_prev_snap_txg == 0) | |
789 | 902 return (EINVAL); |
903 | |
2199 | 904 /* |
905 * This must not be a snapshot. | |
906 */ | |
907 if (ds->ds_phys->ds_next_snap_obj != 0) | |
908 return (EINVAL); | |
789 | 909 |
910 /* | |
911 * If we made changes this txg, traverse_dsl_dataset won't find | |
912 * them. Try again. | |
913 */ | |
2199 | 914 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) |
789 | 915 return (EAGAIN); |
2199 | 916 |
917 return (0); | |
918 } | |
789 | 919 |
2199 | 920 /* ARGSUSED */ |
921 static void | |
922 dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
923 { | |
924 dsl_dataset_t *ds = arg1; | |
925 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; | |
789 | 926 |
927 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
928 | |
929 /* Zero out the deadlist. */ | |
930 bplist_close(&ds->ds_deadlist); | |
931 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); | |
932 ds->ds_phys->ds_deadlist_obj = | |
933 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1544 | 934 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, |
935 ds->ds_phys->ds_deadlist_obj)); | |
789 | 936 |
937 { | |
938 /* Free blkptrs that we gave birth to */ | |
939 zio_t *zio; | |
940 uint64_t used = 0, compressed = 0, uncompressed = 0; | |
941 struct killarg ka; | |
942 | |
943 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, | |
944 ZIO_FLAG_MUSTSUCCEED); | |
945 ka.usedp = &used; | |
946 ka.compressedp = &compressed; | |
947 ka.uncompressedp = &uncompressed; | |
948 ka.zio = zio; | |
949 ka.tx = tx; | |
950 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, | |
951 ADVANCE_POST, kill_blkptr, &ka); | |
952 (void) zio_wait(zio); | |
953 | |
2199 | 954 dsl_dir_diduse_space(ds->ds_dir, |
789 | 955 -used, -compressed, -uncompressed, tx); |
956 } | |
957 | |
2199 | 958 /* Change our contents to that of the prev snapshot */ |
789 | 959 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); |
960 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; | |
961 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; | |
962 ds->ds_phys->ds_compressed_bytes = | |
963 ds->ds_prev->ds_phys->ds_compressed_bytes; | |
964 ds->ds_phys->ds_uncompressed_bytes = | |
965 ds->ds_prev->ds_phys->ds_uncompressed_bytes; | |
2082 | 966 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; |
789 | 967 ds->ds_phys->ds_unique_bytes = 0; |
968 | |
2532
752725c22841
6448999 panic: used == ds->ds_phys->ds_unique_bytes
ahrens
parents:
2417
diff
changeset
|
969 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { |
752725c22841
6448999 panic: used == ds->ds_phys->ds_unique_bytes
ahrens
parents:
2417
diff
changeset
|
970 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); |
752725c22841
6448999 panic: used == ds->ds_phys->ds_unique_bytes
ahrens
parents:
2417
diff
changeset
|
971 ds->ds_prev->ds_phys->ds_unique_bytes = 0; |
752725c22841
6448999 panic: used == ds->ds_phys->ds_unique_bytes
ahrens
parents:
2417
diff
changeset
|
972 } |
789 | 973 } |
974 | |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
975 /* ARGSUSED */ |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
976 static int |
2199 | 977 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
978 { |
2199 | 979 dsl_dataset_t *ds = arg1; |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
980 |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
981 /* |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
982 * Can't delete a head dataset if there are snapshots of it. |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
983 * (Except if the only snapshots are from the branch we cloned |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
984 * from.) |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
985 */ |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
986 if (ds->ds_prev != NULL && |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
987 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
988 return (EINVAL); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
989 |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
990 return (0); |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
991 } |
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1544
diff
changeset
|
992 |
2199 | 993 /* ARGSUSED */ |
994 static void | |
995 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
789 | 996 { |
2199 | 997 dsl_dataset_t *ds = arg1; |
789 | 998 |
2199 | 999 /* Mark it as inconsistent on-disk, in case we crash */ |
1000 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1001 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; | |
1002 } | |
789 | 1003 |
2199 | 1004 /* ARGSUSED */ |
1005 static int | |
1006 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
1007 { | |
1008 dsl_dataset_t *ds = arg1; | |
789 | 1009 |
1010 /* Can't delete a branch point. */ | |
2199 | 1011 if (ds->ds_phys->ds_num_children > 1) |
1012 return (EEXIST); | |
789 | 1013 |
1014 /* | |
1015 * Can't delete a head dataset if there are snapshots of it. | |
1016 * (Except if the only snapshots are from the branch we cloned | |
1017 * from.) | |
1018 */ | |
1019 if (ds->ds_prev != NULL && | |
2199 | 1020 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) |
789 | 1021 return (EINVAL); |
1022 | |
1023 /* | |
1024 * If we made changes this txg, traverse_dsl_dataset won't find | |
1025 * them. Try again. | |
1026 */ | |
2199 | 1027 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) |
789 | 1028 return (EAGAIN); |
2199 | 1029 |
1030 /* XXX we should do some i/o error checking... */ | |
1031 return (0); | |
1032 } | |
1033 | |
1034 static void | |
1035 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) | |
1036 { | |
1037 dsl_dataset_t *ds = arg1; | |
1038 uint64_t used = 0, compressed = 0, uncompressed = 0; | |
1039 zio_t *zio; | |
1040 int err; | |
1041 int after_branch_point = FALSE; | |
1042 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
1043 objset_t *mos = dp->dp_meta_objset; | |
1044 dsl_dataset_t *ds_prev = NULL; | |
1045 uint64_t obj; | |
1046 | |
1047 ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); | |
1048 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); | |
1049 ASSERT(ds->ds_prev == NULL || | |
1050 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); | |
1051 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); | |
1052 | |
1053 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); | |
1054 | |
1055 obj = ds->ds_object; | |
789 | 1056 |
1057 if (ds->ds_phys->ds_prev_snap_obj != 0) { | |
1058 if (ds->ds_prev) { | |
1059 ds_prev = ds->ds_prev; | |
1060 } else { | |
2199 | 1061 VERIFY(0 == dsl_dataset_open_obj(dp, |
789 | 1062 ds->ds_phys->ds_prev_snap_obj, NULL, |
2199 | 1063 DS_MODE_NONE, FTAG, &ds_prev)); |
789 | 1064 } |
1065 after_branch_point = | |
1066 (ds_prev->ds_phys->ds_next_snap_obj != obj); | |
1067 | |
1068 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); | |
1069 if (after_branch_point && | |
1070 ds->ds_phys->ds_next_snap_obj == 0) { | |
1071 /* This clone is toast. */ | |
1072 ASSERT(ds_prev->ds_phys->ds_num_children > 1); | |
1073 ds_prev->ds_phys->ds_num_children--; | |
1074 } else if (!after_branch_point) { | |
1075 ds_prev->ds_phys->ds_next_snap_obj = | |
1076 ds->ds_phys->ds_next_snap_obj; | |
1077 } | |
1078 } | |
1079 | |
1080 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); | |
1081 | |
1082 if (ds->ds_phys->ds_next_snap_obj != 0) { | |
2199 | 1083 blkptr_t bp; |
789 | 1084 dsl_dataset_t *ds_next; |
1085 uint64_t itor = 0; | |
1086 | |
1087 spa_scrub_restart(dp->dp_spa, tx->tx_txg); | |
1088 | |
2199 | 1089 VERIFY(0 == dsl_dataset_open_obj(dp, |
1544 | 1090 ds->ds_phys->ds_next_snap_obj, NULL, |
1091 DS_MODE_NONE, FTAG, &ds_next)); | |
789 | 1092 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); |
1093 | |
1094 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); | |
1095 ds_next->ds_phys->ds_prev_snap_obj = | |
1096 ds->ds_phys->ds_prev_snap_obj; | |
1097 ds_next->ds_phys->ds_prev_snap_txg = | |
1098 ds->ds_phys->ds_prev_snap_txg; | |
1099 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, | |
1100 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); | |
1101 | |
1102 /* | |
1103 * Transfer to our deadlist (which will become next's | |
1104 * new deadlist) any entries from next's current | |
1105 * deadlist which were born before prev, and free the | |
1106 * other entries. | |
1107 * | |
1108 * XXX we're doing this long task with the config lock held | |
1109 */ | |
1110 while (bplist_iterate(&ds_next->ds_deadlist, &itor, | |
1111 &bp) == 0) { | |
1112 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { | |
1544 | 1113 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, |
1114 &bp, tx)); | |
789 | 1115 if (ds_prev && !after_branch_point && |
1116 bp.blk_birth > | |
1117 ds_prev->ds_phys->ds_prev_snap_txg) { | |
1118 ds_prev->ds_phys->ds_unique_bytes += | |
2082 | 1119 bp_get_dasize(dp->dp_spa, &bp); |
789 | 1120 } |
1121 } else { | |
2082 | 1122 used += bp_get_dasize(dp->dp_spa, &bp); |
789 | 1123 compressed += BP_GET_PSIZE(&bp); |
1124 uncompressed += BP_GET_UCSIZE(&bp); | |
1125 /* XXX check return value? */ | |
1126 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, | |
1127 &bp, NULL, NULL, ARC_NOWAIT); | |
1128 } | |
1129 } | |
1130 | |
1131 /* free next's deadlist */ | |
1132 bplist_close(&ds_next->ds_deadlist); | |
1133 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); | |
1134 | |
1135 /* set next's deadlist to our deadlist */ | |
1136 ds_next->ds_phys->ds_deadlist_obj = | |
1137 ds->ds_phys->ds_deadlist_obj; | |
1544 | 1138 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, |
1139 ds_next->ds_phys->ds_deadlist_obj)); | |
789 | 1140 ds->ds_phys->ds_deadlist_obj = 0; |
1141 | |
1142 if (ds_next->ds_phys->ds_next_snap_obj != 0) { | |
1143 /* | |
1144 * Update next's unique to include blocks which | |
1145 * were previously shared by only this snapshot | |
1146 * and it. Those blocks will be born after the | |
1147 * prev snap and before this snap, and will have | |
1148 * died after the next snap and before the one | |
1149 * after that (ie. be on the snap after next's | |
1150 * deadlist). | |
1151 * | |
1152 * XXX we're doing this long task with the | |
1153 * config lock held | |
1154 */ | |
1155 dsl_dataset_t *ds_after_next; | |
1156 | |
2199 | 1157 VERIFY(0 == dsl_dataset_open_obj(dp, |
789 | 1158 ds_next->ds_phys->ds_next_snap_obj, NULL, |
1544 | 1159 DS_MODE_NONE, FTAG, &ds_after_next)); |
789 | 1160 itor = 0; |
1161 while (bplist_iterate(&ds_after_next->ds_deadlist, | |
1162 &itor, &bp) == 0) { | |
1163 if (bp.blk_birth > | |
1164 ds->ds_phys->ds_prev_snap_txg && | |
1165 bp.blk_birth <= | |
1166 ds->ds_phys->ds_creation_txg) { | |
1167 ds_next->ds_phys->ds_unique_bytes += | |
2082 | 1168 bp_get_dasize(dp->dp_spa, &bp); |
789 | 1169 } |
1170 } | |
1171 | |
1172 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); | |
1173 ASSERT3P(ds_next->ds_prev, ==, NULL); | |
1174 } else { | |
1175 /* | |
1176 * It would be nice to update the head dataset's | |
1177 * unique. To do so we would have to traverse | |
1178 * it for blocks born after ds_prev, which is | |
1179 * pretty expensive just to maintain something | |
1180 * for debugging purposes. | |
1181 */ | |
1182 ASSERT3P(ds_next->ds_prev, ==, ds); | |
1183 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, | |
1184 ds_next); | |
1185 if (ds_prev) { | |
2199 | 1186 VERIFY(0 == dsl_dataset_open_obj(dp, |
1544 | 1187 ds->ds_phys->ds_prev_snap_obj, NULL, |
1188 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); | |
789 | 1189 } else { |
1190 ds_next->ds_prev = NULL; | |
1191 } | |
1192 } | |
1193 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); | |
1194 | |
1195 /* | |
1196 * NB: unique_bytes is not accurate for head objsets | |
1197 * because we don't update it when we delete the most | |
1198 * recent snapshot -- see above comment. | |
1199 */ | |
1200 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); | |
1201 } else { | |
1202 /* | |
1203 * There's no next snapshot, so this is a head dataset. | |
1204 * Destroy the deadlist. Unless it's a clone, the | |
1205 * deadlist should be empty. (If it's a clone, it's | |
1206 * safe to ignore the deadlist contents.) | |
1207 */ | |
1208 struct killarg ka; | |
1209 | |
1210 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); | |
1211 bplist_close(&ds->ds_deadlist); | |
1212 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); | |
1213 ds->ds_phys->ds_deadlist_obj = 0; | |
1214 | |
1215 /* | |
1216 * Free everything that we point to (that's born after | |
1217 * the previous snapshot, if we are a clone) | |
1218 * | |
1219 * XXX we're doing this long task with the config lock held | |
1220 */ | |
1221 ka.usedp = &used; | |
1222 ka.compressedp = &compressed; | |
1223 ka.uncompressedp = &uncompressed; | |
1224 ka.zio = zio; | |
1225 ka.tx = tx; | |
1226 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, | |
1227 ADVANCE_POST, kill_blkptr, &ka); | |
1228 ASSERT3U(err, ==, 0); | |
1229 } | |
1230 | |
1231 err = zio_wait(zio); | |
1232 ASSERT3U(err, ==, 0); | |
1233 | |
2199 | 1234 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); |
789 | 1235 |
1236 if (ds->ds_phys->ds_snapnames_zapobj) { | |
1237 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); | |
1238 ASSERT(err == 0); | |
1239 } | |
1240 | |
2199 | 1241 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { |
789 | 1242 /* Erase the link in the dataset */ |
2199 | 1243 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); |
1244 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; | |
789 | 1245 /* |
1246 * dsl_dir_sync_destroy() called us, they'll destroy | |
1247 * the dataset. | |
1248 */ | |
1249 } else { | |
1250 /* remove from snapshot namespace */ | |
1251 dsl_dataset_t *ds_head; | |
2199 | 1252 VERIFY(0 == dsl_dataset_open_obj(dp, |
1253 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, | |
1544 | 1254 DS_MODE_NONE, FTAG, &ds_head)); |
2207
47efcb3433a7
6439370 assertion failures possible in dsl_dataset_destroy_sync()
ahrens
parents:
2199
diff
changeset
|
1255 VERIFY(0 == dsl_dataset_get_snapname(ds)); |
789 | 1256 #ifdef ZFS_DEBUG |
1257 { | |
1258 uint64_t val; | |
1259 err = zap_lookup(mos, | |
1260 ds_head->ds_phys->ds_snapnames_zapobj, | |
2199 | 1261 ds->ds_snapname, 8, 1, &val); |
789 | 1262 ASSERT3U(err, ==, 0); |
1263 ASSERT3U(val, ==, obj); | |
1264 } | |
1265 #endif | |
1266 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, | |
2199 | 1267 ds->ds_snapname, tx); |
789 | 1268 ASSERT(err == 0); |
1269 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); | |
1270 } | |
1271 | |
1272 if (ds_prev && ds->ds_prev != ds_prev) | |
1273 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); | |
1274 | |
2199 | 1275 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); |
1276 VERIFY(0 == dmu_object_free(mos, obj, tx)); | |
1277 } | |
1278 | |
1279 /* ARGSUSED */ | |
1280 int | |
1281 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
1282 { | |
1283 objset_t *os = arg1; | |
1284 dsl_dataset_t *ds = os->os->os_dsl_dataset; | |
1285 const char *snapname = arg2; | |
1286 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; | |
1287 int err; | |
1288 uint64_t value; | |
789 | 1289 |
1290 /* | |
2199 | 1291 * We don't allow multiple snapshots of the same txg. If there |
1292 * is already one, try again. | |
1293 */ | |
1294 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) | |
1295 return (EAGAIN); | |
1296 | |
1297 /* | |
1298 * Check for conflicting name snapshot name. | |
789 | 1299 */ |
2199 | 1300 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, |
1301 snapname, 8, 1, &value); | |
1302 if (err == 0) | |
1303 return (EEXIST); | |
1304 if (err != ENOENT) | |
1305 return (err); | |
789 | 1306 |
2199 | 1307 ds->ds_trysnap_txg = tx->tx_txg; |
789 | 1308 return (0); |
1309 } | |
1310 | |
2199 | 1311 void |
1312 dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
789 | 1313 { |
2199 | 1314 objset_t *os = arg1; |
1315 dsl_dataset_t *ds = os->os->os_dsl_dataset; | |
1316 const char *snapname = arg2; | |
1317 dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
789 | 1318 dmu_buf_t *dbuf; |
1319 dsl_dataset_phys_t *dsphys; | |
2199 | 1320 uint64_t dsobj; |
789 | 1321 objset_t *mos = dp->dp_meta_objset; |
1322 int err; | |
1323 | |
1324 spa_scrub_restart(dp->dp_spa, tx->tx_txg); | |
2199 | 1325 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); |
789 | 1326 |
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
1327 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
1328 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); |
1544 | 1329 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); |
789 | 1330 dmu_buf_will_dirty(dbuf, tx); |
1331 dsphys = dbuf->db_data; | |
2199 | 1332 dsphys->ds_dir_obj = ds->ds_dir->dd_object; |
789 | 1333 dsphys->ds_fsid_guid = unique_create(); |
1334 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ | |
1335 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, | |
1336 sizeof (dsphys->ds_guid)); | |
1337 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; | |
1338 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; | |
1339 dsphys->ds_next_snap_obj = ds->ds_object; | |
1340 dsphys->ds_num_children = 1; | |
1341 dsphys->ds_creation_time = gethrestime_sec(); | |
1342 dsphys->ds_creation_txg = tx->tx_txg; | |
1343 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; | |
1344 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; | |
1345 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; | |
1346 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; | |
2082 | 1347 dsphys->ds_flags = ds->ds_phys->ds_flags; |
789 | 1348 dsphys->ds_bp = ds->ds_phys->ds_bp; |
1544 | 1349 dmu_buf_rele(dbuf, FTAG); |
789 | 1350 |
2199 | 1351 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); |
1352 if (ds->ds_prev) { | |
1353 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == | |
789 | 1354 ds->ds_object || |
2199 | 1355 ds->ds_prev->ds_phys->ds_num_children > 1); |
1356 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { | |
1357 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); | |
789 | 1358 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, |
2199 | 1359 ds->ds_prev->ds_phys->ds_creation_txg); |
1360 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; | |
789 | 1361 } |
1362 } | |
1363 | |
1364 bplist_close(&ds->ds_deadlist); | |
1365 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1366 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); | |
1367 ds->ds_phys->ds_prev_snap_obj = dsobj; | |
1368 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; | |
1369 ds->ds_phys->ds_unique_bytes = 0; | |
1370 ds->ds_phys->ds_deadlist_obj = | |
1371 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1544 | 1372 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, |
1373 ds->ds_phys->ds_deadlist_obj)); | |
789 | 1374 |
1375 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); | |
1376 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, | |
1377 snapname, 8, 1, &dsobj, tx); | |
1378 ASSERT(err == 0); | |
1379 | |
1380 if (ds->ds_prev) | |
1381 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); | |
1544 | 1382 VERIFY(0 == dsl_dataset_open_obj(dp, |
1383 ds->ds_phys->ds_prev_snap_obj, snapname, | |
1384 DS_MODE_NONE, ds, &ds->ds_prev)); | |
789 | 1385 } |
1386 | |
1387 void | |
1388 dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx) | |
1389 { | |
1390 ASSERT(dmu_tx_is_syncing(tx)); | |
1391 ASSERT(ds->ds_user_ptr != NULL); | |
1392 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); | |
1393 | |
1394 dmu_objset_sync(ds->ds_user_ptr, tx); | |
1395 dsl_dir_dirty(ds->ds_dir, tx); | |
1396 bplist_close(&ds->ds_deadlist); | |
1397 | |
1544 | 1398 dmu_buf_rele(ds->ds_dbuf, ds); |
789 | 1399 } |
1400 | |
1401 void | |
1402 dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds) | |
1403 { | |
1404 /* fill in properties crap */ | |
1405 dsl_dir_stats(ds->ds_dir, dds); | |
1406 | |
1407 if (ds->ds_phys->ds_num_children != 0) { | |
1408 dds->dds_is_snapshot = TRUE; | |
1409 dds->dds_num_clones = ds->ds_phys->ds_num_children - 1; | |
1410 } | |
1411 | |
2082 | 1412 dds->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; |
789 | 1413 dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth; |
1414 | |
1415 dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill; | |
1416 dds->dds_objects_avail = DN_MAX_OBJECT - dds->dds_objects_used; | |
1417 | |
1418 /* We override the dataset's creation time... they should be the same */ | |
1419 dds->dds_creation_time = ds->ds_phys->ds_creation_time; | |
1420 dds->dds_creation_txg = ds->ds_phys->ds_creation_txg; | |
1421 dds->dds_space_refd = ds->ds_phys->ds_used_bytes; | |
1422 dds->dds_fsid_guid = ds->ds_phys->ds_fsid_guid; | |
1423 | |
1424 if (ds->ds_phys->ds_next_snap_obj) { | |
1425 /* | |
1426 * This is a snapshot; override the dd's space used with | |
1427 * our unique space | |
1428 */ | |
1429 dds->dds_space_used = ds->ds_phys->ds_unique_bytes; | |
1430 dds->dds_compressed_bytes = | |
1431 ds->ds_phys->ds_compressed_bytes; | |
1432 dds->dds_uncompressed_bytes = | |
1433 ds->ds_phys->ds_uncompressed_bytes; | |
1434 } | |
1435 } | |
1436 | |
1437 dsl_pool_t * | |
1438 dsl_dataset_pool(dsl_dataset_t *ds) | |
1439 { | |
1440 return (ds->ds_dir->dd_pool); | |
1441 } | |
1442 | |
2199 | 1443 /* ARGSUSED */ |
789 | 1444 static int |
2199 | 1445 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) |
789 | 1446 { |
2199 | 1447 dsl_dataset_t *ds = arg1; |
1448 char *newsnapname = arg2; | |
1449 dsl_dir_t *dd = ds->ds_dir; | |
789 | 1450 objset_t *mos = dd->dd_pool->dp_meta_objset; |
2199 | 1451 dsl_dataset_t *hds; |
1452 uint64_t val; | |
789 | 1453 int err; |
1454 | |
2199 | 1455 err = dsl_dataset_open_obj(dd->dd_pool, |
1456 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); | |
789 | 1457 if (err) |
1458 return (err); | |
1459 | |
2199 | 1460 /* new name better not be in use */ |
1461 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, | |
1462 newsnapname, 8, 1, &val); | |
1463 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); | |
789 | 1464 |
2199 | 1465 if (err == 0) |
1466 err = EEXIST; | |
1467 else if (err == ENOENT) | |
1468 err = 0; | |
1469 return (err); | |
1470 } | |
789 | 1471 |
2199 | 1472 static void |
1473 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
1474 { | |
1475 dsl_dataset_t *ds = arg1; | |
1476 char *newsnapname = arg2; | |
1477 dsl_dir_t *dd = ds->ds_dir; | |
1478 objset_t *mos = dd->dd_pool->dp_meta_objset; | |
1479 dsl_dataset_t *hds; | |
1480 int err; | |
789 | 1481 |
2199 | 1482 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); |
789 | 1483 |
2199 | 1484 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, |
1485 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); | |
789 | 1486 |
2199 | 1487 VERIFY(0 == dsl_dataset_get_snapname(ds)); |
1488 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, | |
1489 ds->ds_snapname, tx); | |
789 | 1490 ASSERT3U(err, ==, 0); |
2199 | 1491 mutex_enter(&ds->ds_lock); |
1492 (void) strcpy(ds->ds_snapname, newsnapname); | |
1493 mutex_exit(&ds->ds_lock); | |
1494 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, | |
1495 ds->ds_snapname, 8, 1, &ds->ds_object, tx); | |
789 | 1496 ASSERT3U(err, ==, 0); |
1497 | |
2199 | 1498 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); |
789 | 1499 } |
1500 | |
1501 #pragma weak dmu_objset_rename = dsl_dataset_rename | |
1502 int | |
2199 | 1503 dsl_dataset_rename(const char *oldname, const char *newname) |
789 | 1504 { |
1505 dsl_dir_t *dd; | |
2199 | 1506 dsl_dataset_t *ds; |
789 | 1507 const char *tail; |
1508 int err; | |
1509 | |
2199 | 1510 err = dsl_dir_open(oldname, FTAG, &dd, &tail); |
1544 | 1511 if (err) |
1512 return (err); | |
789 | 1513 if (tail == NULL) { |
2199 | 1514 err = dsl_dir_rename(dd, newname); |
789 | 1515 dsl_dir_close(dd, FTAG); |
1516 return (err); | |
1517 } | |
1518 if (tail[0] != '@') { | |
1519 /* the name ended in a nonexistant component */ | |
1520 dsl_dir_close(dd, FTAG); | |
1521 return (ENOENT); | |
1522 } | |
1523 | |
2199 | 1524 dsl_dir_close(dd, FTAG); |
1525 | |
1526 /* new name must be snapshot in same filesystem */ | |
1527 tail = strchr(newname, '@'); | |
1528 if (tail == NULL) | |
1529 return (EINVAL); | |
1530 tail++; | |
1531 if (strncmp(oldname, newname, tail - newname) != 0) | |
1532 return (EXDEV); | |
789 | 1533 |
2199 | 1534 err = dsl_dataset_open(oldname, |
1535 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); | |
1536 if (err) | |
1537 return (err); | |
1538 | |
1539 err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
1540 dsl_dataset_snapshot_rename_check, | |
1541 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); | |
1542 | |
1543 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); | |
1544 | |
789 | 1545 return (err); |
1546 } | |
2082 | 1547 |
2199 | 1548 struct promotearg { |
1549 uint64_t used, comp, uncomp, unique; | |
1550 uint64_t newnext_obj, snapnames_obj; | |
1551 }; | |
1552 | |
2082 | 1553 static int |
2199 | 1554 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) |
2082 | 1555 { |
2199 | 1556 dsl_dataset_t *hds = arg1; |
1557 struct promotearg *pa = arg2; | |
1558 dsl_dir_t *dd = hds->ds_dir; | |
1559 dsl_pool_t *dp = hds->ds_dir->dd_pool; | |
2082 | 1560 dsl_dir_t *pdd = NULL; |
1561 dsl_dataset_t *ds = NULL; | |
1562 dsl_dataset_t *pivot_ds = NULL; | |
1563 dsl_dataset_t *newnext_ds = NULL; | |
1564 int err; | |
1565 char *name = NULL; | |
2199 | 1566 uint64_t itor = 0; |
2082 | 1567 blkptr_t bp; |
1568 | |
2199 | 1569 bzero(pa, sizeof (*pa)); |
1570 | |
2082 | 1571 /* Check that it is a clone */ |
1572 if (dd->dd_phys->dd_clone_parent_obj == 0) | |
1573 return (EINVAL); | |
1574 | |
2199 | 1575 /* Since this is so expensive, don't do the preliminary check */ |
1576 if (!dmu_tx_is_syncing(tx)) | |
1577 return (0); | |
1578 | |
1579 if (err = dsl_dataset_open_obj(dp, | |
2082 | 1580 dd->dd_phys->dd_clone_parent_obj, |
1581 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) | |
1582 goto out; | |
1583 pdd = pivot_ds->ds_dir; | |
2199 | 1584 |
1585 { | |
1586 dsl_dataset_t *phds; | |
1587 if (err = dsl_dataset_open_obj(dd->dd_pool, | |
1588 pdd->dd_phys->dd_head_dataset_obj, | |
1589 NULL, DS_MODE_NONE, FTAG, &phds)) | |
1590 goto out; | |
1591 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; | |
1592 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); | |
1593 } | |
2082 | 1594 |
1595 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { | |
1596 err = EXDEV; | |
1597 goto out; | |
1598 } | |
1599 | |
1600 /* find pivot point's new next ds */ | |
1601 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, | |
1602 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); | |
1603 while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { | |
1604 dsl_dataset_t *prev; | |
1605 | |
1606 if (err = dsl_dataset_open_obj(dd->dd_pool, | |
2199 | 1607 newnext_ds->ds_phys->ds_prev_snap_obj, |
1608 NULL, DS_MODE_NONE, FTAG, &prev)) | |
2082 | 1609 goto out; |
1610 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); | |
1611 newnext_ds = prev; | |
1612 } | |
2199 | 1613 pa->newnext_obj = newnext_ds->ds_object; |
2082 | 1614 |
1615 /* compute pivot point's new unique space */ | |
1616 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, | |
1617 &itor, &bp)) == 0) { | |
1618 if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) | |
2199 | 1619 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); |
2082 | 1620 } |
1621 if (err != ENOENT) | |
1622 goto out; | |
1623 | |
1624 /* Walk the snapshots that we are moving */ | |
1625 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); | |
1626 ds = pivot_ds; | |
1627 /* CONSTCOND */ | |
1628 while (TRUE) { | |
1629 uint64_t val, dlused, dlcomp, dluncomp; | |
1630 dsl_dataset_t *prev; | |
1631 | |
1632 /* Check that the snapshot name does not conflict */ | |
1633 dsl_dataset_name(ds, name); | |
1634 err = zap_lookup(dd->dd_pool->dp_meta_objset, | |
1635 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, | |
1636 8, 1, &val); | |
1637 if (err != ENOENT) { | |
1638 if (err == 0) | |
1639 err = EEXIST; | |
1640 goto out; | |
1641 } | |
1642 | |
1643 /* | |
1644 * compute space to transfer. Each snapshot gave birth to: | |
1645 * (my used) - (prev's used) + (deadlist's used) | |
1646 */ | |
2199 | 1647 pa->used += ds->ds_phys->ds_used_bytes; |
1648 pa->comp += ds->ds_phys->ds_compressed_bytes; | |
1649 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; | |
2082 | 1650 |
1651 /* If we reach the first snapshot, we're done. */ | |
1652 if (ds->ds_phys->ds_prev_snap_obj == 0) | |
1653 break; | |
1654 | |
1655 if (err = bplist_space(&ds->ds_deadlist, | |
1656 &dlused, &dlcomp, &dluncomp)) | |
1657 goto out; | |
1658 if (err = dsl_dataset_open_obj(dd->dd_pool, | |
1659 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, | |
1660 FTAG, &prev)) | |
1661 goto out; | |
2199 | 1662 pa->used += dlused - prev->ds_phys->ds_used_bytes; |
1663 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; | |
1664 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; | |
2082 | 1665 |
1666 /* | |
1667 * We could be a clone of a clone. If we reach our | |
1668 * parent's branch point, we're done. | |
1669 */ | |
1670 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { | |
1671 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); | |
1672 break; | |
1673 } | |
1674 if (ds != pivot_ds) | |
1675 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
1676 ds = prev; | |
1677 } | |
1678 | |
1679 /* Check that there is enough space here */ | |
2199 | 1680 err = dsl_dir_transfer_possible(pdd, dd, pa->used); |
1681 | |
1682 out: | |
1683 if (ds && ds != pivot_ds) | |
1684 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
1685 if (pivot_ds) | |
1686 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); | |
1687 if (newnext_ds) | |
1688 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); | |
1689 if (name) | |
1690 kmem_free(name, MAXPATHLEN); | |
1691 return (err); | |
1692 } | |
2082 | 1693 |
2199 | 1694 static void |
1695 dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) | |
1696 { | |
1697 dsl_dataset_t *hds = arg1; | |
1698 struct promotearg *pa = arg2; | |
1699 dsl_dir_t *dd = hds->ds_dir; | |
1700 dsl_pool_t *dp = hds->ds_dir->dd_pool; | |
1701 dsl_dir_t *pdd = NULL; | |
1702 dsl_dataset_t *ds, *pivot_ds; | |
1703 char *name; | |
1704 | |
1705 ASSERT(dd->dd_phys->dd_clone_parent_obj != 0); | |
1706 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); | |
1707 | |
1708 VERIFY(0 == dsl_dataset_open_obj(dp, | |
1709 dd->dd_phys->dd_clone_parent_obj, | |
1710 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)); | |
2417 | 1711 /* |
1712 * We need to explicitly open pdd, since pivot_ds's pdd will be | |
1713 * changing. | |
1714 */ | |
1715 VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object, | |
1716 NULL, FTAG, &pdd)); | |
2082 | 1717 |
1718 /* move snapshots to this dir */ | |
2199 | 1719 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); |
2082 | 1720 ds = pivot_ds; |
1721 /* CONSTCOND */ | |
1722 while (TRUE) { | |
1723 dsl_dataset_t *prev; | |
1724 | |
1725 /* move snap name entry */ | |
1726 dsl_dataset_name(ds, name); | |
2199 | 1727 VERIFY(0 == zap_remove(dp->dp_meta_objset, |
1728 pa->snapnames_obj, ds->ds_snapname, tx)); | |
1729 VERIFY(0 == zap_add(dp->dp_meta_objset, | |
2082 | 1730 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, |
1731 8, 1, &ds->ds_object, tx)); | |
1732 | |
1733 /* change containing dsl_dir */ | |
1734 dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1735 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); | |
1736 ds->ds_phys->ds_dir_obj = dd->dd_object; | |
1737 ASSERT3P(ds->ds_dir, ==, pdd); | |
1738 dsl_dir_close(ds->ds_dir, ds); | |
2199 | 1739 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, |
2082 | 1740 NULL, ds, &ds->ds_dir)); |
1741 | |
1742 ASSERT3U(dsl_prop_numcb(ds), ==, 0); | |
1743 | |
1744 if (ds->ds_phys->ds_prev_snap_obj == 0) | |
1745 break; | |
1746 | |
2199 | 1747 VERIFY(0 == dsl_dataset_open_obj(dp, |
2082 | 1748 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, |
1749 FTAG, &prev)); | |
1750 | |
1751 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { | |
1752 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); | |
1753 break; | |
1754 } | |
1755 if (ds != pivot_ds) | |
1756 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
1757 ds = prev; | |
1758 } | |
2199 | 1759 if (ds != pivot_ds) |
1760 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
2082 | 1761 |
1762 /* change pivot point's next snap */ | |
1763 dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); | |
2199 | 1764 pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; |
2082 | 1765 |
1766 /* change clone_parent-age */ | |
1767 dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
1768 ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); | |
1769 dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; | |
1770 dmu_buf_will_dirty(pdd->dd_dbuf, tx); | |
1771 pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; | |
1772 | |
1773 /* change space accounting */ | |
2199 | 1774 dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx); |
1775 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); | |
1776 pivot_ds->ds_phys->ds_unique_bytes = pa->unique; | |
2082 | 1777 |
2417 | 1778 dsl_dir_close(pdd, FTAG); |
2199 | 1779 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); |
1780 kmem_free(name, MAXPATHLEN); | |
2082 | 1781 } |
1782 | |
1783 int | |
1784 dsl_dataset_promote(const char *name) | |
1785 { | |
1786 dsl_dataset_t *ds; | |
1787 int err; | |
1788 dmu_object_info_t doi; | |
2199 | 1789 struct promotearg pa; |
2082 | 1790 |
1791 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); | |
1792 if (err) | |
1793 return (err); | |
1794 | |
1795 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, | |
1796 ds->ds_phys->ds_snapnames_zapobj, &doi); | |
1797 if (err) { | |
1798 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
1799 return (err); | |
1800 } | |
1801 | |
1802 /* | |
1803 * Add in 128x the snapnames zapobj size, since we will be moving | |
1804 * a bunch of snapnames to the promoted ds, and dirtying their | |
1805 * bonus buffers. | |
1806 */ | |
2199 | 1807 err = dsl_sync_task_do(ds->ds_dir->dd_pool, |
1808 dsl_dataset_promote_check, | |
1809 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); | |
2082 | 1810 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); |
1811 return (err); | |
1812 } |