Mercurial > illumos > illumos-gate
diff usr/src/uts/common/fs/zfs/dsl_dataset.c @ 12470:54258108784b
6948890 snapshot deletion can induce pathologically long spa_sync() times
author | Matthew Ahrens <Matthew.Ahrens@Sun.COM> |
---|---|
date | Fri, 21 May 2010 17:29:22 -0700 |
parents | c77e20e4e046 |
children | 693dd2cad55f |
line wrap: on
line diff
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c Fri May 21 15:05:41 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c Fri May 21 17:29:22 2010 -0700 @@ -39,6 +39,7 @@ #include <sys/zfs_znode.h> #include <sys/zvol.h> #include <sys/dsl_scan.h> +#include <sys/dsl_deadlist.h> /* * Enable/disable prefetching of dedup-ed blocks which are going to be freed. @@ -51,6 +52,13 @@ static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; static dsl_syncfunc_t dsl_dataset_set_reservation_sync; +#define SWITCH64(x, y) \ + { \ + uint64_t __tmp = (x); \ + (x) = (y); \ + (y) = __tmp; \ + } + #define DS_REF_MAX (1ULL << 62) #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE @@ -178,13 +186,13 @@ /* * We are here as part of zio's write done callback, * which means we're a zio interrupt thread. We can't - * call bplist_enqueue() now because it may block + * call dsl_deadlist_insert() now because it may block * waiting for I/O. Instead, put bp on the deferred * queue and let dsl_pool_sync() finish the job. */ - bplist_enqueue_deferred(&ds->ds_deadlist, bp); + bplist_append(&ds->ds_pending_deadlist, bp); } else { - VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); + dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); } ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); @@ -269,7 +277,13 @@ ds->ds_prev = NULL; } - bplist_close(&ds->ds_deadlist); + bplist_destroy(&ds->ds_pending_deadlist); + if (db != NULL) { + dsl_deadlist_close(&ds->ds_deadlist); + } else { + ASSERT(ds->ds_deadlist.dl_dbuf == NULL); + ASSERT(!ds->ds_deadlist.dl_oldfmt); + } if (ds->ds_dir) dsl_dir_close(ds->ds_dir, ds); @@ -280,7 +294,6 @@ mutex_destroy(&ds->ds_opening_lock); rw_destroy(&ds->ds_rwlock); cv_destroy(&ds->ds_exclusive_cv); - bplist_fini(&ds->ds_deadlist); kmem_free(ds, sizeof (dsl_dataset_t)); } @@ -380,25 +393,23 @@ mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); rw_init(&ds->ds_rwlock, 0, 0, 0); cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); - bplist_init(&ds->ds_deadlist); - - err = bplist_open(&ds->ds_deadlist, + + bplist_create(&ds->ds_pending_deadlist); + dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); + if (err == 0) { err = dsl_dir_open_obj(dp, ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); } if (err) { - /* - * we don't really need to close the blist if we - * just opened it. - */ mutex_destroy(&ds->ds_lock); mutex_destroy(&ds->ds_recvlock); mutex_destroy(&ds->ds_opening_lock); rw_destroy(&ds->ds_rwlock); cv_destroy(&ds->ds_exclusive_cv); - bplist_fini(&ds->ds_deadlist); + bplist_destroy(&ds->ds_pending_deadlist); + dsl_deadlist_close(&ds->ds_deadlist); kmem_free(ds, sizeof (dsl_dataset_t)); dmu_buf_rele(dbuf, tag); return (err); @@ -455,7 +466,8 @@ dsl_dataset_evict); } if (err || winner) { - bplist_close(&ds->ds_deadlist); + bplist_destroy(&ds->ds_pending_deadlist); + dsl_deadlist_close(&ds->ds_deadlist); if (ds->ds_prev) dsl_dataset_drop_ref(ds->ds_prev, ds); dsl_dir_close(ds->ds_dir, ds); @@ -464,7 +476,6 @@ mutex_destroy(&ds->ds_opening_lock); rw_destroy(&ds->ds_rwlock); cv_destroy(&ds->ds_exclusive_cv); - bplist_fini(&ds->ds_deadlist); kmem_free(ds, sizeof (dsl_dataset_t)); if (err) { dmu_buf_rele(dbuf, tag); @@ -726,7 +737,7 @@ if (ds->ds_dbuf) dsl_dataset_drop_ref(ds, tag); else - dsl_dataset_evict(ds->ds_dbuf, ds); + dsl_dataset_evict(NULL, ds); } boolean_t @@ -788,10 +799,12 @@ DMU_OT_NONE, 0, tx); dsphys->ds_creation_time = gethrestime_sec(); dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; - dsphys->ds_deadlist_obj = - bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); - - if (origin) { + + if (origin == NULL) { + dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); + } else { + dsl_dataset_t *ohds; + dsphys->ds_prev_snap_obj = origin->ds_object; dsphys->ds_prev_snap_txg = origin->ds_phys->ds_creation_txg; @@ -807,6 +820,12 @@ dmu_buf_will_dirty(origin->ds_dbuf, tx); origin->ds_phys->ds_num_children++; + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, + origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); + dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, + dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); + dsl_dataset_rele(ohds, FTAG); + if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { if (origin->ds_phys->ds_next_clones_obj == 0) { origin->ds_phys->ds_next_clones_obj = @@ -820,6 +839,16 @@ dmu_buf_will_dirty(dd->dd_dbuf, tx); dd->dd_phys->dd_origin_obj = origin->ds_object; + if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { + if (origin->ds_dir->dd_phys->dd_clones == 0) { + dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); + origin->ds_dir->dd_phys->dd_clones = + zap_create(mos, + DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); + } + VERIFY3U(0, ==, zap_add_int(mos, + origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); + } } if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) @@ -1201,8 +1230,7 @@ else mrs_used = 0; - VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, - &dluncomp)); + dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); ASSERT3U(dlused, <=, mrs_used); ds->ds_phys->ds_unique_bytes = @@ -1462,6 +1490,103 @@ ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); } +static void +dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) +{ + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + zap_cursor_t zc; + zap_attribute_t za; + + /* + * If it is the old version, dd_clones doesn't exist so we can't + * find the clones, but deadlist_remove_key() is a no-op so it + * doesn't matter. + */ + if (ds->ds_dir->dd_phys->dd_clones == 0) + return; + + for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + dsl_dataset_t *clone; + + VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool, + za.za_first_integer, FTAG, &clone)); + if (clone->ds_dir->dd_origin_txg > mintxg) { + dsl_deadlist_remove_key(&clone->ds_deadlist, + mintxg, tx); + dsl_dataset_remove_clones_key(clone, mintxg, tx); + } + dsl_dataset_rele(clone, FTAG); + } + zap_cursor_fini(&zc); +} + +struct process_old_arg { + dsl_dataset_t *ds; + dsl_dataset_t *ds_prev; + boolean_t after_branch_point; + zio_t *pio; + uint64_t used, comp, uncomp; +}; + +static int +process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + struct process_old_arg *poa = arg; + dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; + + if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) { + dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); + if (poa->ds_prev && !poa->after_branch_point && + bp->blk_birth > + poa->ds_prev->ds_phys->ds_prev_snap_txg) { + poa->ds_prev->ds_phys->ds_unique_bytes += + bp_get_dsize_sync(dp->dp_spa, bp); + } + } else { + poa->used += bp_get_dsize_sync(dp->dp_spa, bp); + poa->comp += BP_GET_PSIZE(bp); + poa->uncomp += BP_GET_UCSIZE(bp); + dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); + } + return (0); +} + +static void +process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, + dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) +{ + struct process_old_arg poa = { 0 }; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + + ASSERT(ds->ds_deadlist.dl_oldfmt); + ASSERT(ds_next->ds_deadlist.dl_oldfmt); + + poa.ds = ds; + poa.ds_prev = ds_prev; + poa.after_branch_point = after_branch_point; + poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, + process_old_cb, &poa, tx)); + VERIFY3U(zio_wait(poa.pio), ==, 0); + ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes); + + /* change snapused */ + dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, + -poa.used, -poa.comp, -poa.uncomp, tx); + + /* swap next's deadlist to our deadlist */ + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_close(&ds_next->ds_deadlist); + SWITCH64(ds_next->ds_phys->ds_deadlist_obj, + ds->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&ds_next->ds_deadlist, mos, + ds_next->ds_phys->ds_deadlist_obj); +} + void dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) { @@ -1566,12 +1691,9 @@ } if (dsl_dataset_is_snapshot(ds)) { - blkptr_t bp; - zio_t *pio; dsl_dataset_t *ds_next; - uint64_t itor = 0; uint64_t old_unique; - int64_t used = 0, compressed = 0, uncompressed = 0; + uint64_t used = 0, comp = 0, uncomp = 0; VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); @@ -1587,53 +1709,49 @@ ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); - pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); - - /* - * Transfer to our deadlist (which will become next's - * new deadlist) any entries from next's current - * deadlist which were born before prev, and free the - * other entries. - * - * XXX we're doing this long task with the config lock held - */ - while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) { - if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { - VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, - &bp, tx)); - if (ds_prev && !after_branch_point && - bp.blk_birth > - ds_prev->ds_phys->ds_prev_snap_txg) { - ds_prev->ds_phys->ds_unique_bytes += - bp_get_dsize_sync(dp->dp_spa, &bp); - } - } else { - used += bp_get_dsize_sync(dp->dp_spa, &bp); - compressed += BP_GET_PSIZE(&bp); - uncompressed += BP_GET_UCSIZE(&bp); - dsl_free_sync(pio, dp, tx->tx_txg, &bp); + + if (ds_next->ds_deadlist.dl_oldfmt) { + process_old_deadlist(ds, ds_prev, ds_next, + after_branch_point, tx); + } else { + /* Adjust prev's unique space. */ + if (ds_prev && !after_branch_point) { + dsl_deadlist_space_range(&ds_next->ds_deadlist, + ds_prev->ds_phys->ds_prev_snap_txg, + ds->ds_phys->ds_prev_snap_txg, + &used, &comp, &uncomp); + ds_prev->ds_phys->ds_unique_bytes += used; } + + /* Adjust snapused. */ + dsl_deadlist_space_range(&ds_next->ds_deadlist, + ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, + &used, &comp, &uncomp); + dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, + -used, -comp, -uncomp, tx); + + /* Move blocks to be freed to pool's free list. */ + dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, + &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg, + tx); + dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, + DD_USED_HEAD, used, comp, uncomp, tx); + dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx); + + /* Merge our deadlist into next's and free it. */ + dsl_deadlist_merge(&ds_next->ds_deadlist, + ds->ds_phys->ds_deadlist_obj, tx); } - VERIFY3U(zio_wait(pio), ==, 0); - ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); - - /* change snapused */ - dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, - -used, -compressed, -uncompressed, tx); - - /* free next's deadlist */ - bplist_close(&ds_next->ds_deadlist); - bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); - - /* set next's deadlist to our deadlist */ - bplist_close(&ds->ds_deadlist); - ds_next->ds_phys->ds_deadlist_obj = - ds->ds_phys->ds_deadlist_obj; - VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, - ds_next->ds_phys->ds_deadlist_obj)); - ds->ds_phys->ds_deadlist_obj = 0; + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); + + /* Collapse range in clone heads */ + dsl_dataset_remove_clones_key(ds, + ds->ds_phys->ds_creation_txg, tx); if (dsl_dataset_is_snapshot(ds_next)) { + dsl_dataset_t *ds_nextnext; + /* * Update next's unique to include blocks which * were previously shared by only this snapshot @@ -1642,25 +1760,27 @@ * died after the next snap and before the one * after that (ie. be on the snap after next's * deadlist). - * - * XXX we're doing this long task with the - * config lock held */ - dsl_dataset_t *ds_after_next; - uint64_t space; - VERIFY(0 == dsl_dataset_hold_obj(dp, ds_next->ds_phys->ds_next_snap_obj, - FTAG, &ds_after_next)); - - VERIFY(0 == - bplist_space_birthrange(&ds_after_next->ds_deadlist, + FTAG, &ds_nextnext)); + dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, ds->ds_phys->ds_prev_snap_txg, - ds->ds_phys->ds_creation_txg, &space)); - ds_next->ds_phys->ds_unique_bytes += space; - - dsl_dataset_rele(ds_after_next, FTAG); + ds->ds_phys->ds_creation_txg, + &used, &comp, &uncomp); + ds_next->ds_phys->ds_unique_bytes += used; + dsl_dataset_rele(ds_nextnext, FTAG); ASSERT3P(ds_next->ds_prev, ==, NULL); + + /* Collapse range in this head. */ + dsl_dataset_t *hds; + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_head_dataset_obj, + FTAG, &hds)); + dsl_deadlist_remove_key(&hds->ds_deadlist, + ds->ds_phys->ds_creation_txg, tx); + dsl_dataset_rele(hds, FTAG); + } else { ASSERT3P(ds_next->ds_prev, ==, ds); dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); @@ -1700,9 +1820,8 @@ */ struct killarg ka; - ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); - bplist_close(&ds->ds_deadlist); - bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); ds->ds_phys->ds_deadlist_obj = 0; /* @@ -1721,6 +1840,11 @@ ds->ds_phys->ds_unique_bytes == 0); if (ds->ds_prev != NULL) { + if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { + VERIFY3U(0, ==, zap_remove_int(mos, + ds->ds_prev->ds_dir->dd_phys->dd_clones, + ds->ds_object, tx)); + } dsl_dataset_rele(ds->ds_prev, ds); ds->ds_prev = ds_prev = NULL; } @@ -1935,20 +2059,24 @@ delta, 0, 0, tx); } - bplist_close(&ds->ds_deadlist); dmu_buf_will_dirty(ds->ds_dbuf, tx); + zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu", + ds->ds_dir->dd_myname, snapname, dsobj, + ds->ds_phys->ds_prev_snap_txg); + ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, + UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); + dsl_deadlist_add_key(&ds->ds_deadlist, + ds->ds_phys->ds_prev_snap_txg, tx); + ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); ds->ds_phys->ds_prev_snap_obj = dsobj; ds->ds_phys->ds_prev_snap_txg = crtxg; ds->ds_phys->ds_unique_bytes = 0; if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; - ds->ds_phys->ds_deadlist_obj = - bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); - VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, - ds->ds_phys->ds_deadlist_obj)); - - dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); + err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, snapname, 8, 1, &dsobj, tx); ASSERT(err == 0); @@ -2357,6 +2485,7 @@ struct promotenode *snap = list_head(&pa->shared_snaps); dsl_dataset_t *origin_ds = snap->ds; int err; + uint64_t unused; /* Check that it is a real clone */ if (!dsl_dir_is_clone(hds->ds_dir)) @@ -2372,10 +2501,9 @@ /* compute origin's new unique space */ snap = list_tail(&pa->clone_snaps); ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); - err = bplist_space_birthrange(&snap->ds->ds_deadlist, - origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &pa->unique); - if (err) - return (err); + dsl_deadlist_space_range(&snap->ds->ds_deadlist, + origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, + &pa->unique, &unused, &unused); /* * Walk the snapshots that we are moving @@ -2414,9 +2542,8 @@ if (ds->ds_phys->ds_prev_snap_obj == 0) continue; - if (err = bplist_space(&ds->ds_deadlist, - &dlused, &dlcomp, &dluncomp)) - goto out; + dsl_deadlist_space(&ds->ds_deadlist, + &dlused, &dlcomp, &dluncomp); pa->used += dlused; pa->comp += dlcomp; pa->uncomp += dluncomp; @@ -2450,7 +2577,7 @@ /* * Note, typically this will not be a clone of a clone, * so dd_origin_txg will be < TXG_INITIAL, so - * these snaplist_space() -> bplist_space_birthrange() + * these snaplist_space() -> dsl_deadlist_space_range() * calls will be fast because they do not have to * iterate over all bps. */ @@ -2530,6 +2657,26 @@ origin_head->ds_dir->dd_origin_txg = origin_ds->ds_phys->ds_creation_txg; + /* change dd_clone entries */ + if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { + VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + odd->dd_phys->dd_clones, hds->ds_object, tx)); + VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, + pa->origin_origin->ds_dir->dd_phys->dd_clones, + hds->ds_object, tx)); + + VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + pa->origin_origin->ds_dir->dd_phys->dd_clones, + origin_head->ds_object, tx)); + if (dd->dd_phys->dd_clones == 0) { + dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, + DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); + } + VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, + dd->dd_phys->dd_clones, origin_head->ds_object, tx)); + + } + /* move snapshots to this dir */ for (snap = list_head(&pa->shared_snaps); snap; snap = list_next(&pa->shared_snaps, snap)) { @@ -2547,6 +2694,7 @@ VERIFY(0 == zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 8, 1, &ds->ds_object, tx)); + /* change containing dsl_dir */ dmu_buf_will_dirty(ds->ds_dbuf, tx); ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); @@ -2556,6 +2704,40 @@ VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, NULL, ds, &ds->ds_dir)); + /* move any clone references */ + if (ds->ds_phys->ds_next_clones_obj && + spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { + zap_cursor_t zc; + zap_attribute_t za; + + for (zap_cursor_init(&zc, dp->dp_meta_objset, + ds->ds_phys->ds_next_clones_obj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + dsl_dataset_t *cnds; + uint64_t o; + + if (za.za_first_integer == oldnext_obj) { + /* + * We've already moved the + * origin's reference. + */ + continue; + } + + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, + za.za_first_integer, FTAG, &cnds)); + o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; + + VERIFY3U(zap_remove_int(dp->dp_meta_objset, + odd->dd_phys->dd_clones, o, tx), ==, 0); + VERIFY3U(zap_add_int(dp->dp_meta_objset, + dd->dd_phys->dd_clones, o, tx), ==, 0); + dsl_dataset_rele(cnds, FTAG); + } + zap_cursor_fini(&zc); + } + ASSERT3U(dsl_prop_numcb(ds), ==, 0); } @@ -2651,11 +2833,9 @@ *spacep = 0; for (snap = list_head(l); snap; snap = list_next(l, snap)) { - uint64_t used; - int err = bplist_space_birthrange(&snap->ds->ds_deadlist, - mintxg, UINT64_MAX, &used); - if (err) - return (err); + uint64_t used, comp, uncomp; + dsl_deadlist_space_range(&snap->ds->ds_deadlist, + mintxg, UINT64_MAX, &used, &comp, &uncomp); *spacep += used; } return (0); @@ -2742,10 +2922,10 @@ if (err != 0) goto out; - if (dsl_dir_is_clone(snap->ds->ds_dir)) { - err = dsl_dataset_own_obj(dp, + if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { + err = dsl_dataset_hold_obj(dp, snap->ds->ds_dir->dd_phys->dd_origin_obj, - 0, FTAG, &pa.origin_origin); + FTAG, &pa.origin_origin); if (err != 0) goto out; } @@ -2770,7 +2950,7 @@ snaplist_destroy(&pa.clone_snaps, B_FALSE); snaplist_destroy(&pa.origin_snaps, B_FALSE); if (pa.origin_origin) - dsl_dataset_disown(pa.origin_origin, FTAG); + dsl_dataset_rele(pa.origin_origin, FTAG); dsl_dataset_rele(ds, FTAG); return (err); } @@ -2860,10 +3040,12 @@ */ if (csa->cds->ds_prev) { dsl_dataset_t *origin = csa->cds->ds_prev; + uint64_t comp, uncomp; + dmu_buf_will_dirty(origin->ds_dbuf, tx); - VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, + dsl_deadlist_space_range(&csa->cds->ds_deadlist, origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, - &origin->ds_phys->ds_unique_bytes)); + &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); } /* swap blkptrs */ @@ -2883,10 +3065,10 @@ ASSERT3U(csa->cds->ds_dir->dd_phys-> dd_used_breakdown[DD_USED_SNAP], ==, 0); - VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, - &cdl_comp, &cdl_uncomp)); - VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, - &odl_comp, &odl_uncomp)); + dsl_deadlist_space(&csa->cds->ds_deadlist, + &cdl_used, &cdl_comp, &cdl_uncomp); + dsl_deadlist_space(&csa->ohds->ds_deadlist, + &odl_used, &odl_comp, &odl_uncomp); dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - (csa->ohds->ds_phys->ds_used_bytes + odl_used); @@ -2907,21 +3089,16 @@ * deadlist (since that's the only thing that's * changing that affects the snapused). */ - VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, - csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, &cdl_used)); - VERIFY(0 == bplist_space_birthrange(&csa->ohds->ds_deadlist, - csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, &odl_used)); + dsl_deadlist_space_range(&csa->cds->ds_deadlist, + csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, + &cdl_used, &cdl_comp, &cdl_uncomp); + dsl_deadlist_space_range(&csa->ohds->ds_deadlist, + csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, + &odl_used, &odl_comp, &odl_uncomp); dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, DD_USED_HEAD, DD_USED_SNAP, tx); } -#define SWITCH64(x, y) \ - { \ - uint64_t __tmp = (x); \ - (x) = (y); \ - (y) = __tmp; \ - } - /* swap ds_*_bytes */ SWITCH64(csa->ohds->ds_phys->ds_used_bytes, csa->cds->ds_phys->ds_used_bytes); @@ -2936,15 +3113,17 @@ dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, csa->unused_refres_delta, 0, 0, tx); - /* swap deadlists */ - bplist_close(&csa->cds->ds_deadlist); - bplist_close(&csa->ohds->ds_deadlist); + /* + * Swap deadlists. + */ + dsl_deadlist_close(&csa->cds->ds_deadlist); + dsl_deadlist_close(&csa->ohds->ds_deadlist); SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, csa->cds->ds_phys->ds_deadlist_obj); - VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, - csa->cds->ds_phys->ds_deadlist_obj)); - VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, - csa->ohds->ds_phys->ds_deadlist_obj)); + dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, + csa->cds->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, + csa->ohds->ds_phys->ds_deadlist_obj); dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx); }