Mercurial > illumos > illumos-gate
changeset 12798:f6c8601080b4
6938335 zfs send -R can still miss renamed snapshots
6955879 panic in dmu_objset_stats while running nfs IOs.
6928104 zfs send/rename race can leak snapshot holds
6953835 mem leak in dsl_dataset_user_release_tmp()
6915117 zfs_iter_snapshots() should deal with midstream snapshot renames
author | Chris Kirby <Chris.Kirby@oracle.com> |
---|---|
date | Wed, 07 Jul 2010 15:04:13 -0600 |
parents | 7119a7ce586b |
children | 45ed97ad3d9f |
files | usr/src/cmd/zfs/zfs_main.c usr/src/lib/libzfs/common/libzfs.h usr/src/lib/libzfs/common/libzfs_dataset.c usr/src/lib/libzfs/common/libzfs_sendrecv.c usr/src/lib/libzfs/common/mapfile-vers usr/src/lib/libzpool/common/kernel.c usr/src/uts/common/fs/zfs/dmu_send.c usr/src/uts/common/fs/zfs/dsl_dataset.c usr/src/uts/common/fs/zfs/dsl_deleg.c usr/src/uts/common/fs/zfs/dsl_pool.c usr/src/uts/common/fs/zfs/sys/dsl_dataset.h usr/src/uts/common/fs/zfs/sys/dsl_deleg.h usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h usr/src/uts/common/fs/zfs/sys/zfs_onexit.h usr/src/uts/common/fs/zfs/zfs_ioctl.c usr/src/uts/common/fs/zfs/zfs_onexit.c |
diffstat | 16 files changed, 564 insertions(+), 334 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/cmd/zfs/zfs_main.c Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/cmd/zfs/zfs_main.c Wed Jul 07 15:04:13 2010 -0600 @@ -2888,7 +2888,7 @@ } if (holding) { if (zfs_hold(zhp, delim+1, tag, recursive, - temphold, B_FALSE, -1) != 0) + temphold, B_FALSE, -1, 0, 0) != 0) ++errors; } else { if (zfs_release(zhp, delim+1, tag, recursive) != 0)
--- a/usr/src/lib/libzfs/common/libzfs.h Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/lib/libzfs/common/libzfs.h Wed Jul 07 15:04:13 2010 -0600 @@ -533,12 +533,8 @@ extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t, - boolean_t, boolean_t, int); -extern int zfs_hold_range(zfs_handle_t *, const char *, const char *, - const char *, boolean_t, boolean_t, snapfilter_cb_t, void *, int); + boolean_t, boolean_t, int, uint64_t, uint64_t); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); -extern int zfs_release_range(zfs_handle_t *, const char *, const char *, - const char *, boolean_t); extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c Wed Jul 07 15:04:13 2010 -0600 @@ -3927,11 +3927,13 @@ int zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, boolean_t recursive, boolean_t temphold, boolean_t enoent_ok, - int cleanup_fd) + int cleanup_fd, uint64_t dsobj, uint64_t createtxg) { zfs_cmd_t zc = { 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; + ASSERT(!recursive || dsobj == 0); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string)) @@ -3940,6 +3942,8 @@ zc.zc_cookie = recursive; zc.zc_temphold = temphold; zc.zc_cleanup_fd = cleanup_fd; + zc.zc_sendobj = dsobj; + zc.zc_createtxg = createtxg; if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) { char errbuf[ZFS_MAXNAMELEN+32]; @@ -3969,7 +3973,7 @@ return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf)); case ENOENT: if (enoent_ok) - return (0); + return (ENOENT); /* FALLTHROUGH */ default: return (zfs_standard_error_fmt(hdl, errno, errbuf)); @@ -3979,107 +3983,6 @@ return (0); } -struct hold_range_arg { - zfs_handle_t *origin; - const char *fromsnap; - const char *tosnap; - char lastsnapheld[ZFS_MAXNAMELEN]; - const char *tag; - boolean_t temphold; - boolean_t seento; - boolean_t seenfrom; - boolean_t holding; - boolean_t recursive; - snapfilter_cb_t *filter_cb; - void *filter_cb_arg; - int cleanup_fd; -}; - -static int -zfs_hold_range_one(zfs_handle_t *zhp, void *arg) -{ - struct hold_range_arg *hra = arg; - const char *thissnap; - int error; - - thissnap = strchr(zfs_get_name(zhp), '@') + 1; - - if (hra->fromsnap && !hra->seenfrom && - strcmp(hra->fromsnap, thissnap) == 0) - hra->seenfrom = B_TRUE; - - /* snap is older or newer than the desired range, ignore it */ - if (hra->seento || !hra->seenfrom) { - zfs_close(zhp); - return (0); - } - - if (!hra->seento && strcmp(hra->tosnap, thissnap) == 0) - hra->seento = B_TRUE; - - if (hra->filter_cb != NULL && - hra->filter_cb(zhp, hra->filter_cb_arg) == B_FALSE) { - zfs_close(zhp); - return (0); - } - - if (hra->holding) { - /* We could be racing with destroy, so ignore ENOENT. */ - error = zfs_hold(hra->origin, thissnap, hra->tag, - hra->recursive, hra->temphold, B_TRUE, hra->cleanup_fd); - if (error == 0) { - (void) strlcpy(hra->lastsnapheld, zfs_get_name(zhp), - sizeof (hra->lastsnapheld)); - } - } else { - error = zfs_release(hra->origin, thissnap, hra->tag, - hra->recursive); - } - - zfs_close(zhp); - return (error); -} - -/* - * Add a user hold on the set of snapshots starting with fromsnap up to - * and including tosnap. If we're unable to to acquire a particular hold, - * undo any holds up to that point. - */ -int -zfs_hold_range(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, - const char *tag, boolean_t recursive, boolean_t temphold, - snapfilter_cb_t filter_cb, void *cbarg, int cleanup_fd) -{ - struct hold_range_arg arg = { 0 }; - int error; - - arg.origin = zhp; - arg.fromsnap = fromsnap; - arg.tosnap = tosnap; - arg.tag = tag; - arg.temphold = temphold; - arg.holding = B_TRUE; - arg.recursive = recursive; - arg.seenfrom = (fromsnap == NULL); - arg.filter_cb = filter_cb; - arg.filter_cb_arg = cbarg; - arg.cleanup_fd = cleanup_fd; - - error = zfs_iter_snapshots_sorted(zhp, zfs_hold_range_one, &arg); - - /* - * Make sure we either hold the entire range or none. If we're - * using cleanup-on-exit, we'll let the closing of the cleanup_fd - * do the work for us. - */ - if (error && arg.lastsnapheld[0] != '\0' && - (cleanup_fd == -1 || !temphold)) { - (void) zfs_release_range(zhp, fromsnap, - (const char *)arg.lastsnapheld, tag, recursive); - } - return (error); -} - int zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, boolean_t recursive) @@ -4121,27 +4024,6 @@ return (0); } -/* - * Release a user hold from the set of snapshots starting with fromsnap - * up to and including tosnap. - */ -int -zfs_release_range(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, - const char *tag, boolean_t recursive) -{ - struct hold_range_arg arg = { 0 }; - - arg.origin = zhp; - arg.fromsnap = fromsnap; - arg.tosnap = tosnap; - arg.tag = tag; - arg.recursive = recursive; - arg.seenfrom = (fromsnap == NULL); - arg.cleanup_fd = -1; - - return (zfs_iter_snapshots_sorted(zhp, zfs_hold_range_one, &arg)); -} - uint64_t zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props) {
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c Wed Jul 07 15:04:13 2010 -0600 @@ -782,14 +782,30 @@ zfs_sort_snaps(zfs_handle_t *zhp, void *data) { avl_tree_t *avl = data; - zfs_node_t *node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t)); - + zfs_node_t *node; + zfs_node_t search; + + search.zn_handle = zhp; + node = avl_find(avl, &search, NULL); + if (node) { + /* + * If this snapshot was renamed while we were creating the + * AVL tree, it's possible that we already inserted it under + * its old name. Remove the old handle before adding the new + * one. + */ + zfs_close(node->zn_handle); + avl_remove(avl, node); + free(node); + } + + node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t)); node->zn_handle = zhp; avl_add(avl, node); + return (0); } -/* ARGSUSED */ static int zfs_snapshot_compare(const void *larg, const void *rarg) { @@ -844,6 +860,7 @@ const char *fromsnap; const char *tosnap; char prevsnap[ZFS_MAXNAMELEN]; + uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; boolean_t verbose; int outfd; @@ -853,6 +870,8 @@ snapfilter_cb_t *filter_cb; void *filter_cb_arg; nvlist_t *debugnv; + char holdtag[ZFS_MAXNAMELEN]; + int cleanup_fd; } send_dump_data_t; /* @@ -860,23 +879,21 @@ * NULL) to the file descriptor specified by outfd. */ static int -dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin, - int outfd, boolean_t enoent_ok, boolean_t *got_enoent, nvlist_t *debugnv) +dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, + boolean_t fromorigin, int outfd, nvlist_t *debugnv) { zfs_cmd_t zc = { 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; nvlist_t *thisdbg; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); - assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin); + assert(fromsnap_obj == 0 || !fromorigin); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - if (fromsnap) - (void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_value)); zc.zc_cookie = outfd; zc.zc_obj = fromorigin; - - *got_enoent = B_FALSE; + zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); + zc.zc_fromobj = fromsnap_obj; VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0)); if (fromsnap && fromsnap[0] != '\0') { @@ -904,10 +921,6 @@ return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); case ENOENT: - if (enoent_ok) { - *got_enoent = B_TRUE; - return (0); - } if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_SNAPSHOT)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -943,12 +956,47 @@ } static int +hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd) +{ + zfs_handle_t *pzhp; + int error = 0; + char *thissnap; + + assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + + /* + * zfs_send() only opens a cleanup_fd for sends that need it, + * e.g. replication and doall. + */ + if (sdd->cleanup_fd == -1) + return (0); + + thissnap = strchr(zhp->zfs_name, '@') + 1; + *(thissnap - 1) = '\0'; + pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET); + *(thissnap - 1) = '@'; + + /* + * It's OK if the parent no longer exists. The send code will + * handle that error. + */ + if (pzhp) { + error = zfs_hold(pzhp, thissnap, sdd->holdtag, + B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd, + zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID), + zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG)); + zfs_close(pzhp); + } + + return (error); +} + +static int dump_snapshot(zfs_handle_t *zhp, void *arg) { send_dump_data_t *sdd = arg; - const char *thissnap; + char *thissnap; int err; - boolean_t got_enoent; boolean_t isfromsnap, istosnap; boolean_t exclude = B_FALSE; @@ -957,10 +1005,17 @@ strcmp(sdd->fromsnap, thissnap) == 0); if (!sdd->seenfrom && isfromsnap) { - sdd->seenfrom = B_TRUE; - (void) strcpy(sdd->prevsnap, thissnap); + err = hold_for_send(zhp, sdd); + if (err == 0) { + sdd->seenfrom = B_TRUE; + (void) strcpy(sdd->prevsnap, thissnap); + sdd->prevsnap_obj = zfs_prop_get_int(zhp, + ZFS_PROP_OBJSETID); + } else if (err == ENOENT) { + err = 0; + } zfs_close(zhp); - return (0); + return (err); } if (sdd->seento || !sdd->seenfrom) { @@ -1001,7 +1056,7 @@ sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) { /* * This snapshot is filtered out. Don't send it, and don't - * set prevsnap, so it will be as if this snapshot didn't + * set prevsnap_obj, so it will be as if this snapshot didn't * exist, and the next accepted snapshot will be sent as * an incremental from the last accepted one, or as the * first (and full) snapshot in the case of a replication, @@ -1011,20 +1066,26 @@ return (0); } + err = hold_for_send(zhp, sdd); + if (err) { + if (err == ENOENT) + err = 0; + zfs_close(zhp); + return (err); + } + /* send it */ if (sdd->verbose) { (void) fprintf(stderr, "sending from @%s to %s\n", sdd->prevsnap, zhp->zfs_name); } - err = dump_ioctl(zhp, sdd->prevsnap, + err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj, sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate), - sdd->outfd, B_TRUE, &got_enoent, sdd->debugnv); - - if (got_enoent) - err = 0; - else - (void) strcpy(sdd->prevsnap, thissnap); + sdd->outfd, sdd->debugnv); + + (void) strcpy(sdd->prevsnap, thissnap); + sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); zfs_close(zhp); return (err); } @@ -1064,6 +1125,7 @@ } sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0; + sdd->prevsnap_obj = 0; if (sdd->fromsnap == NULL || missingfrom) sdd->seenfrom = B_TRUE; @@ -1202,7 +1264,6 @@ int err; nvlist_t *fss = NULL; avl_tree_t *fsavl = NULL; - char holdtag[128]; static uint64_t holdseq; int spa_version; boolean_t holdsnaps = B_FALSE; @@ -1210,15 +1271,6 @@ int pipefd[2]; dedup_arg_t dda = { 0 }; int featureflags = 0; - int cleanup_fd = -1; - - if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) { - uint64_t version; - version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); - if (version >= ZPL_VERSION_SA) { - featureflags |= DMU_BACKUP_FEATURE_SA_SPILL; - } - } (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot send '%s'"), zhp->zfs_name); @@ -1229,8 +1281,17 @@ return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf)); } + if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) { + uint64_t version; + version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); + if (version >= ZPL_VERSION_SA) { + featureflags |= DMU_BACKUP_FEATURE_SA_SPILL; + } + } + if (zfs_spa_version(zhp, &spa_version) == 0 && - spa_version >= SPA_VERSION_USERREFS) + spa_version >= SPA_VERSION_USERREFS && + (flags.doall || flags.replicate)) holdsnaps = B_TRUE; if (flags.dedup) { @@ -1259,22 +1320,6 @@ size_t buflen = 0; zio_cksum_t zc = { 0 }; - if (holdsnaps) { - ++holdseq; - (void) snprintf(holdtag, sizeof (holdtag), - ".send-%d-%llu", getpid(), (u_longlong_t)holdseq); - cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL); - if (cleanup_fd < 0) { - err = errno; - goto stderr_out; - } - err = zfs_hold_range(zhp, fromsnap, tosnap, - holdtag, flags.replicate, B_TRUE, filter_func, - cb_arg, cleanup_fd); - if (err) - goto err_out; - } - if (flags.replicate || flags.props) { nvlist_t *hdrnv; @@ -1364,6 +1409,18 @@ sdd.filter_cb_arg = cb_arg; if (debugnvp) sdd.debugnv = *debugnvp; + if (holdsnaps) { + ++holdseq; + (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag), + ".send-%d-%llu", getpid(), (u_longlong_t)holdseq); + sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL); + if (sdd.cleanup_fd < 0) { + err = errno; + goto stderr_out; + } + } else { + sdd.cleanup_fd = -1; + } err = dump_filesystems(zhp, &sdd); fsavl_destroy(fsavl); nvlist_free(fss); @@ -1373,9 +1430,9 @@ (void) pthread_join(tid, NULL); } - if (cleanup_fd != -1) { - VERIFY(0 == close(cleanup_fd)); - cleanup_fd = -1; + if (sdd.cleanup_fd != -1) { + VERIFY(0 == close(sdd.cleanup_fd)); + sdd.cleanup_fd = -1; } if (flags.replicate || flags.doall || flags.props) { @@ -1397,8 +1454,8 @@ stderr_out: err = zfs_standard_error(zhp->zfs_hdl, err, errbuf); err_out: - if (cleanup_fd != -1) - VERIFY(0 == close(cleanup_fd)); + if (sdd.cleanup_fd != -1) + VERIFY(0 == close(sdd.cleanup_fd)); if (flags.dedup) { (void) pthread_cancel(tid); (void) pthread_join(tid, NULL);
--- a/usr/src/lib/libzfs/common/mapfile-vers Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/lib/libzfs/common/mapfile-vers Wed Jul 07 15:04:13 2010 -0600 @@ -75,7 +75,6 @@ zfs_get_type; zfs_history_event_names; zfs_hold; - zfs_hold_range; zfs_is_mounted; zfs_is_shared; zfs_is_shared_nfs; @@ -121,7 +120,6 @@ zfs_receive; zfs_refresh_properties; zfs_release; - zfs_release_range; zfs_rename; zfs_rollback; zfs_send;
--- a/usr/src/lib/libzpool/common/kernel.c Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/lib/libzpool/common/kernel.c Wed Jul 07 15:04:13 2010 -0600 @@ -946,7 +946,21 @@ /* ARGSUSED */ int -zfs_onexit_add_cb(int fd, void (*func)(void *), void *data, +zfs_onexit_fd_hold(int fd, minor_t *minorp) +{ + *minorp = 0; + return (0); +} + +/* ARGSUSED */ +void +zfs_onexit_fd_rele(int fd) +{ +} + +/* ARGSUSED */ +int +zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, uint64_t *action_handle) { return (0); @@ -954,14 +968,14 @@ /* ARGSUSED */ int -zfs_onexit_del_cb(int fd, uint64_t action_handle, boolean_t fire) +zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire) { return (0); } /* ARGSUSED */ int -zfs_onexit_cb_data(int fd, uint64_t action_handle, void **data) +zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data) { return (0); }
--- a/usr/src/uts/common/fs/zfs/dmu_send.c Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/dmu_send.c Wed Jul 07 15:04:13 2010 -0600 @@ -1351,10 +1351,18 @@ /* if this stream is dedup'ed, set up the avl tree for guid mapping */ if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { + minor_t minor; + if (cleanup_fd == -1) { ra.err = EBADF; goto out; } + ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); + if (ra.err) { + cleanup_fd = -1; + goto out; + } + if (*action_handlep == 0) { ra.guid_to_ds_map = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); @@ -1364,13 +1372,13 @@ (void) dmu_objset_find(drc->drc_top_ds, find_ds_by_guid, (void *)ra.guid_to_ds_map, DS_FIND_CHILDREN); - ra.err = zfs_onexit_add_cb(cleanup_fd, + ra.err = zfs_onexit_add_cb(minor, free_guid_map_onexit, ra.guid_to_ds_map, action_handlep); if (ra.err) goto out; } else { - ra.err = zfs_onexit_cb_data(cleanup_fd, *action_handlep, + ra.err = zfs_onexit_cb_data(minor, *action_handlep, (void **)&ra.guid_to_ds_map); if (ra.err) goto out; @@ -1456,6 +1464,9 @@ ASSERT(ra.err != 0); out: + if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) + zfs_onexit_fd_rele(cleanup_fd); + if (ra.err != 0) { /* * destroy what we created, so we don't leave it in the
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c Wed Jul 07 15:04:13 2010 -0600 @@ -367,6 +367,7 @@ dmu_buf_t *dbuf; dsl_dataset_t *ds; int err; + dmu_object_info_t doi; ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || dsl_pool_sync_context(dp)); @@ -374,6 +375,12 @@ err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); if (err) return (err); + + /* Make sure dsobj has the correct object type. */ + dmu_object_info_from_db(dbuf, &doi); + if (doi.doi_type != DMU_OT_DSL_DATASET) + return (EINVAL); + ds = dmu_buf_get_user(dbuf); if (ds == NULL) { dsl_dataset_t *winner; @@ -3422,10 +3429,9 @@ }; typedef struct zfs_hold_cleanup_arg { - char dsname[MAXNAMELEN]; - char snapname[MAXNAMELEN]; + dsl_pool_t *dp; + uint64_t dsobj; char htag[MAXNAMELEN]; - boolean_t recursive; } zfs_hold_cleanup_arg_t; static void @@ -3433,11 +3439,25 @@ { zfs_hold_cleanup_arg_t *ca = arg; - (void) dsl_dataset_user_release(ca->dsname, ca->snapname, - ca->htag, ca->recursive); + (void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag, + B_TRUE); kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); } +void +dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, + minor_t minor) +{ + zfs_hold_cleanup_arg_t *ca; + + ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP); + ca->dp = ds->ds_dir->dd_pool; + ca->dsobj = ds->ds_object; + (void) strlcpy(ca->htag, htag, sizeof (ca->htag)); + VERIFY3U(0, ==, zfs_onexit_add_cb(minor, + dsl_dataset_user_release_onexit, ca, NULL)); +} + /* * The max length of a temporary tag prefix is the number of hex digits * required to express UINT64_MAX plus one for the hyphen. @@ -3541,6 +3561,24 @@ } int +dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag, + boolean_t temphold) +{ + struct dsl_ds_holdarg *ha; + int error; + + ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); + ha->htag = htag; + ha->temphold = temphold; + error = dsl_sync_task_do(ds->ds_dir->dd_pool, + dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync, + ds, ha, 0); + kmem_free(ha, sizeof (struct dsl_ds_holdarg)); + + return (error); +} + +int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, boolean_t recursive, boolean_t temphold, int cleanup_fd) { @@ -3548,6 +3586,16 @@ dsl_sync_task_t *dst; spa_t *spa; int error; + minor_t minor = 0; + + if (cleanup_fd != -1) { + /* Currently we only support cleanup-on-exit of tempholds. */ + if (!temphold) + return (EINVAL); + error = zfs_onexit_fd_hold(cleanup_fd, &minor); + if (error) + return (error); + } ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); @@ -3556,6 +3604,8 @@ error = spa_open(dsname, &spa, FTAG); if (error) { kmem_free(ha, sizeof (struct dsl_ds_holdarg)); + if (cleanup_fd != -1) + zfs_onexit_fd_rele(cleanup_fd); return (error); } @@ -3581,6 +3631,12 @@ if (dst->dst_err) { dsl_dataset_name(ds, ha->failed); *strchr(ha->failed, '@') = '\0'; + } else if (error == 0 && minor != 0 && temphold) { + /* + * If this hold is to be released upon process exit, + * register that action now. + */ + dsl_register_onexit_hold_cleanup(ds, htag, minor); } dsl_dataset_rele(ds, ha->dstg); } @@ -3593,25 +3649,10 @@ dsl_sync_task_group_destroy(ha->dstg); - /* - * If this set of temporary holds is to be removed upon process exit, - * register that action now. - */ - if (error == 0 && cleanup_fd != -1 && temphold) { - zfs_hold_cleanup_arg_t *ca; - uint64_t action_handle; - - ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP); - (void) strlcpy(ca->dsname, dsname, sizeof (ca->dsname)); - (void) strlcpy(ca->snapname, snapname, sizeof (ca->snapname)); - (void) strlcpy(ca->htag, htag, sizeof (ca->htag)); - ca->recursive = recursive; - (void) zfs_onexit_add_cb(cleanup_fd, - dsl_dataset_user_release_onexit, ca, &action_handle); - } - kmem_free(ha, sizeof (struct dsl_ds_holdarg)); spa_close(spa, FTAG); + if (cleanup_fd != -1) + zfs_onexit_fd_rele(cleanup_fd); return (error); } @@ -3703,11 +3744,6 @@ uint64_t refs; int error; - if (ds->ds_objset) { - dmu_objset_evict(ds->ds_objset); - ds->ds_objset = NULL; - } - mutex_enter(&ds->ds_lock); ds->ds_userrefs--; refs = ds->ds_userrefs; @@ -3867,10 +3903,12 @@ } /* - * Called at spa_load time to release a stale temporary user hold. + * Called at spa_load time (with retry == B_FALSE) to release a stale + * temporary user hold. Also called by the onexit code (with retry == B_TRUE). */ int -dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag) +dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag, + boolean_t retry) { dsl_dataset_t *ds; char *snap; @@ -3878,20 +3916,36 @@ int namelen; int error; - rw_enter(&dp->dp_config_rwlock, RW_READER); - error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); - rw_exit(&dp->dp_config_rwlock); - if (error) - return (error); - namelen = dsl_dataset_namelen(ds)+1; - name = kmem_alloc(namelen, KM_SLEEP); - dsl_dataset_name(ds, name); - dsl_dataset_rele(ds, FTAG); - - snap = strchr(name, '@'); - *snap = '\0'; - ++snap; - return (dsl_dataset_user_release(name, snap, htag, B_FALSE)); + do { + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + rw_exit(&dp->dp_config_rwlock); + if (error) + return (error); + namelen = dsl_dataset_namelen(ds)+1; + name = kmem_alloc(namelen, KM_SLEEP); + dsl_dataset_name(ds, name); + dsl_dataset_rele(ds, FTAG); + + snap = strchr(name, '@'); + *snap = '\0'; + ++snap; + error = dsl_dataset_user_release(name, snap, htag, B_FALSE); + kmem_free(name, namelen); + + /* + * The object can't have been destroyed because we have a hold, + * but it might have been renamed, resulting in ENOENT. Retry + * if we've been requested to do so. + * + * It would be nice if we could use the dsobj all the way + * through and avoid ENOENT entirely. But we might need to + * unmount the snapshot, and there's currently no way to lookup + * a vfsp using a ZFS object id. + */ + } while ((error == ENOENT) && retry); + + return (error); } int
--- a/usr/src/uts/common/fs/zfs/dsl_deleg.c Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/dsl_deleg.c Wed Jul 07 15:04:13 2010 -0600 @@ -528,9 +528,8 @@ * Check if user has requested permission. */ int -dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr) +dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr) { - dsl_dataset_t *ds; dsl_dir_t *dd; dsl_pool_t *dp; void *cookie; @@ -540,23 +539,15 @@ avl_tree_t permsets; perm_set_t *setnode; - error = dsl_dataset_hold(dsname, FTAG, &ds); - if (error) - return (error); - dp = ds->ds_dir->dd_pool; mos = dp->dp_meta_objset; - if (dsl_delegation_on(mos) == B_FALSE) { - dsl_dataset_rele(ds, FTAG); + if (dsl_delegation_on(mos) == B_FALSE) return (ECANCELED); - } if (spa_version(dmu_objset_spa(dp->dp_meta_objset)) < - SPA_VERSION_DELEGATED_PERMS) { - dsl_dataset_rele(ds, FTAG); + SPA_VERSION_DELEGATED_PERMS) return (EPERM); - } if (dsl_dataset_is_snapshot(ds)) { /* @@ -633,7 +624,6 @@ error = EPERM; success: rw_exit(&dp->dp_config_rwlock); - dsl_dataset_rele(ds, FTAG); cookie = NULL; while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL) @@ -642,6 +632,22 @@ return (error); } +int +dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr) +{ + dsl_dataset_t *ds; + int error; + + error = dsl_dataset_hold(dsname, FTAG, &ds); + if (error) + return (error); + + error = dsl_deleg_access_impl(ds, perm, cr); + dsl_dataset_rele(ds, FTAG); + + return (error); +} + /* * Other routines. */
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/dsl_pool.c Wed Jul 07 15:04:13 2010 -0600 @@ -768,7 +768,7 @@ *htag = '\0'; ++htag; dsobj = strtonum(za.za_name, NULL); - (void) dsl_dataset_user_release_tmp(dp, dsobj, htag); + (void) dsl_dataset_user_release_tmp(dp, dsobj, htag, B_FALSE); } zap_cursor_fini(&zc); }
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Wed Jul 07 15:04:13 2010 -0600 @@ -182,6 +182,8 @@ boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag); void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag); +void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, + minor_t minor); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, @@ -198,10 +200,12 @@ boolean_t force); int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, boolean_t recursive, boolean_t temphold, int cleanup_fd); +int dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag, + boolean_t temphold); int dsl_dataset_user_release(char *dsname, char *snapname, char *htag, boolean_t recursive); int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj, - char *htag); + char *htag, boolean_t retry); int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp); blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
--- a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h Wed Jul 07 15:04:13 2010 -0600 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_DSL_DELEG_H @@ -64,6 +63,7 @@ int dsl_deleg_get(const char *ddname, nvlist_t **nvp); int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset); int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr); +int dsl_deleg_access_impl(struct dsl_dataset *ds, const char *perm, cred_t *cr); void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr); int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr); int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr);
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h Wed Jul 07 15:04:13 2010 -0600 @@ -267,6 +267,9 @@ uint64_t zc_action_handle; int zc_cleanup_fd; uint8_t zc_pad[4]; + uint64_t zc_sendobj; + uint64_t zc_fromobj; + uint64_t zc_createtxg; } zfs_cmd_t; typedef struct zfs_useracct {
--- a/usr/src/uts/common/fs/zfs/sys/zfs_onexit.h Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/zfs_onexit.h Wed Jul 07 15:04:13 2010 -0600 @@ -50,10 +50,14 @@ #endif -extern int zfs_onexit_add_cb(int fd, void (*func)(void *), void *data, +extern int zfs_onexit_fd_hold(int fd, minor_t *minorp); +extern void zfs_onexit_fd_rele(int fd); +extern int zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, uint64_t *action_handle); -extern int zfs_onexit_del_cb(int fd, uint64_t action_handle, boolean_t fire); -extern int zfs_onexit_cb_data(int fd, uint64_t action_handle, void **data); +extern int zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, + boolean_t fire); +extern int zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, + void **data); #ifdef __cplusplus }
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c Wed Jul 07 15:04:13 2010 -0600 @@ -282,9 +282,8 @@ } static int -zfs_dozonecheck(const char *dataset, cred_t *cr) +zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr) { - uint64_t zoned; int writable = 1; /* @@ -295,9 +294,6 @@ !zone_dataset_visible(dataset, &writable)) return (ENOENT); - if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL)) - return (ENOENT); - if (INGLOBALZONE(curproc)) { /* * If the fs is zoned, only root can access it from the @@ -319,6 +315,32 @@ return (0); } +static int +zfs_dozonecheck(const char *dataset, cred_t *cr) +{ + uint64_t zoned; + + if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL)) + return (ENOENT); + + return (zfs_dozonecheck_impl(dataset, zoned, cr)); +} + +static int +zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr) +{ + uint64_t zoned; + + rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); + if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) { + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); + return (ENOENT); + } + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); + + return (zfs_dozonecheck_impl(dataset, zoned, cr)); +} + int zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) { @@ -333,6 +355,21 @@ return (error); } +int +zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, + const char *perm, cred_t *cr) +{ + int error; + + error = zfs_dozonecheck_ds(name, ds, cr); + if (error == 0) { + error = secpolicy_zfs(cr); + if (error) + error = dsl_deleg_access_impl(ds, perm, cr); + } + return (error); +} + /* * Policy for setting the security label property. * @@ -508,8 +545,38 @@ int zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) { - return (zfs_secpolicy_write_perms(zc->zc_name, - ZFS_DELEG_PERM_SEND, cr)); + spa_t *spa; + dsl_pool_t *dp; + dsl_dataset_t *ds; + char *cp; + int error; + + /* + * Generate the current snapshot name from the given objsetid, then + * use that name for the secpolicy/zone checks. + */ + cp = strchr(zc->zc_name, '@'); + if (cp == NULL) + return (EINVAL); + error = spa_open(zc->zc_name, &spa, FTAG); + if (error) + return (error); + + dp = spa_get_dsl(spa); + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); + rw_exit(&dp->dp_config_rwlock); + spa_close(spa, FTAG); + if (error) + return (error); + + dsl_dataset_name(ds, zc->zc_name); + + error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds, + ZFS_DELEG_PERM_SEND, cr); + dsl_dataset_rele(ds, FTAG); + + return (error); } static int @@ -1579,26 +1646,12 @@ return (error); } -/* - * inputs: - * zc_name name of filesystem - * zc_nvlist_dst_size size of buffer for property nvlist - * - * outputs: - * zc_objset_stats stats - * zc_nvlist_dst property nvlist - * zc_nvlist_dst_size size of property nvlist - */ static int -zfs_ioc_objset_stats(zfs_cmd_t *zc) +zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os) { - objset_t *os = NULL; - int error; + int error = 0; nvlist_t *nv; - if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) - return (error); - dmu_objset_fast_stat(os, &zc->zc_objset_stats); if (zc->zc_nvlist_dst != 0 && @@ -1619,7 +1672,32 @@ nvlist_free(nv); } + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_nvlist_dst_size size of buffer for property nvlist + * + * outputs: + * zc_objset_stats stats + * zc_nvlist_dst property nvlist + * zc_nvlist_dst_size size of property nvlist + */ +static int +zfs_ioc_objset_stats(zfs_cmd_t *zc) +{ + objset_t *os = NULL; + int error; + + if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) + return (error); + + error = zfs_ioc_objset_stats_impl(zc, os); + dmu_objset_rele(os, FTAG); + return (error); } @@ -1852,19 +1930,43 @@ error = dmu_snapshot_list_next(os, sizeof (zc->zc_name) - strlen(zc->zc_name), - zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie, NULL); - dmu_objset_rele(os, FTAG); + zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie, + NULL); + if (error == 0) { - error = zfs_ioc_objset_stats(zc); /* fill in the stats */ - if (error == ENOENT) { - /* We lost a race with destroy, get the next one. */ - *strchr(zc->zc_name, '@') = '\0'; - goto top; + dsl_dataset_t *ds; + dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; + + /* + * Since we probably don't have a hold on this snapshot, + * it's possible that the objsetid could have been destroyed + * and reused for a new objset. It's OK if this happens during + * a zfs send operation, since the new createtxg will be + * beyond the range we're interested in. + */ + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds); + rw_exit(&dp->dp_config_rwlock); + if (error) { + if (error == ENOENT) { + /* Racing with destroy, get the next one. */ + *strchr(zc->zc_name, '@') = '\0'; + dmu_objset_rele(os, FTAG); + goto top; + } + } else { + objset_t *ossnap; + + error = dmu_objset_from_ds(ds, &ossnap); + if (error == 0) + error = zfs_ioc_objset_stats_impl(zc, ossnap); + dsl_dataset_rele(ds, FTAG); } } else if (error == ENOENT) { error = ESRCH; } + dmu_objset_rele(os, FTAG); /* if we failed, undo the @ that we tacked on to zc_name */ if (error) *strchr(zc->zc_name, '@') = '\0'; @@ -3573,9 +3675,10 @@ /* * inputs: * zc_name name of snapshot to send - * zc_value short name of incremental fromsnap (may be empty) * zc_cookie file descriptor to send stream to - * zc_obj fromorigin flag (mutually exclusive with zc_value) + * zc_obj fromorigin flag (mutually exclusive with zc_fromobj) + * zc_sendobj objsetid of snapshot to send + * zc_fromobj objsetid of incremental fromsnap (may be zero) * * outputs: none */ @@ -3587,34 +3690,55 @@ file_t *fp; int error; offset_t off; - - error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap); + dsl_dataset_t *ds; + dsl_dataset_t *dsfrom = NULL; + spa_t *spa; + dsl_pool_t *dp; + + error = spa_open(zc->zc_name, &spa, FTAG); if (error) return (error); - if (zc->zc_value[0] != '\0') { - char *buf; - char *cp; - - buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); - (void) strncpy(buf, zc->zc_name, MAXPATHLEN); - cp = strchr(buf, '@'); - if (cp) - *(cp+1) = 0; - (void) strncat(buf, zc->zc_value, MAXPATHLEN); - error = dmu_objset_hold(buf, FTAG, &fromsnap); - kmem_free(buf, MAXPATHLEN); + dp = spa_get_dsl(spa); + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); + rw_exit(&dp->dp_config_rwlock); + if (error) { + spa_close(spa, FTAG); + return (error); + } + + error = dmu_objset_from_ds(ds, &tosnap); + if (error) { + dsl_dataset_rele(ds, FTAG); + spa_close(spa, FTAG); + return (error); + } + + if (zc->zc_fromobj != 0) { + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom); + rw_exit(&dp->dp_config_rwlock); + spa_close(spa, FTAG); if (error) { - dmu_objset_rele(tosnap, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } + error = dmu_objset_from_ds(dsfrom, &fromsnap); + if (error) { + dsl_dataset_rele(dsfrom, FTAG); + dsl_dataset_rele(ds, FTAG); + return (error); + } + } else { + spa_close(spa, FTAG); } fp = getf(zc->zc_cookie); if (fp == NULL) { - dmu_objset_rele(tosnap, FTAG); - if (fromsnap) - dmu_objset_rele(fromsnap, FTAG); + dsl_dataset_rele(ds, FTAG); + if (dsfrom) + dsl_dataset_rele(dsfrom, FTAG); return (EBADF); } @@ -3624,9 +3748,9 @@ if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; releasef(zc->zc_cookie); - if (fromsnap) - dmu_objset_rele(fromsnap, FTAG); - dmu_objset_rele(tosnap, FTAG); + if (dsfrom) + dsl_dataset_rele(dsfrom, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } @@ -4194,6 +4318,8 @@ * zc_cookie recursive flag * zc_temphold set if hold is temporary * zc_cleanup_fd cleanup-on-exit file descriptor for calling process + * zc_sendobj if non-zero, the objid for zc_name@zc_value + * zc_createtxg if zc_sendobj is non-zero, snap must have zc_createtxg * * outputs: none */ @@ -4201,12 +4327,66 @@ zfs_ioc_hold(zfs_cmd_t *zc) { boolean_t recursive = zc->zc_cookie; + spa_t *spa; + dsl_pool_t *dp; + dsl_dataset_t *ds; + int error; + minor_t minor = 0; if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) return (EINVAL); - return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value, - zc->zc_string, recursive, zc->zc_temphold, zc->zc_cleanup_fd)); + if (zc->zc_sendobj == 0) { + return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value, + zc->zc_string, recursive, zc->zc_temphold, + zc->zc_cleanup_fd)); + } + + if (recursive) + return (EINVAL); + + error = spa_open(zc->zc_name, &spa, FTAG); + if (error) + return (error); + + dp = spa_get_dsl(spa); + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); + rw_exit(&dp->dp_config_rwlock); + spa_close(spa, FTAG); + if (error) + return (error); + + /* + * Until we have a hold on this snapshot, it's possible that + * zc_sendobj could've been destroyed and reused as part + * of a later txg. Make sure we're looking at the right object. + */ + if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) { + dsl_dataset_rele(ds, FTAG); + return (ENOENT); + } + + if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) { + error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor); + if (error) { + dsl_dataset_rele(ds, FTAG); + return (error); + } + } + + error = dsl_dataset_user_hold_for_send(ds, zc->zc_string, + zc->zc_temphold); + if (minor != 0) { + if (error == 0) { + dsl_register_onexit_hold_cleanup(ds, zc->zc_string, + minor); + } + zfs_onexit_fd_rele(zc->zc_cleanup_fd); + } + dsl_dataset_rele(ds, FTAG); + + return (error); } /*
--- a/usr/src/uts/common/fs/zfs/zfs_onexit.c Wed Jul 07 02:21:35 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/zfs_onexit.c Wed Jul 07 15:04:13 2010 -0600 @@ -46,6 +46,12 @@ * clone-open, generating a unique minor number. The process then passes * along that file descriptor to each ioctl that might have a cleanup operation. * + * Consumers of the onexit routines should call zfs_onexit_fd_hold() early + * on to validate the given fd and add a reference to its file table entry. + * This allows the consumer to do its work and then add a callback, knowing + * that zfs_onexit_add_cb() won't fail with EBADF. When finished, consumers + * should call zfs_onexit_fd_rele(). + * * A simple example is zfs_ioc_recv(), where we might create an AVL tree * with dataset/GUID mappings and then reuse that tree on subsequent * zfs_ioc_recv() calls. @@ -57,7 +63,8 @@ * * The action handle is then passed from user space to subsequent * zfs_ioc_recv() calls, so that dmu_recv_stream() can fetch its AVL tree - * by calling zfs_onexit_cb_data() with the cleanup fd and action handle. + * by calling zfs_onexit_cb_data() with the device minor number and + * action handle. * * If the user process exits abnormally, the callback is invoked implicitly * as part of the driver close operation. Once the user space process is @@ -97,37 +104,53 @@ } static int -zfs_onexit_fd_to_state(int fd, zfs_onexit_t **zo) +zfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo) +{ + *zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV); + if (*zo == NULL) + return (EBADF); + + return (0); +} + +/* + * Consumers might need to operate by minor number instead of fd, since + * they might be running in another thread (e.g. txg_sync_thread). Callers + * of this function must call zfs_onexit_fd_rele() when they're finished + * using the minor number. + */ +int +zfs_onexit_fd_hold(int fd, minor_t *minorp) { file_t *fp; - dev_t rdev; + zfs_onexit_t *zo; fp = getf(fd); if (fp == NULL) return (EBADF); - rdev = fp->f_vnode->v_rdev; - *zo = zfsdev_get_soft_state(getminor(rdev), ZSST_CTLDEV); - if (*zo == NULL) { - releasef(fd); - return (EBADF); - } + *minorp = getminor(fp->f_vnode->v_rdev); + return (zfs_onexit_minor_to_state(*minorp, &zo)); +} - return (0); +void +zfs_onexit_fd_rele(int fd) +{ + releasef(fd); } /* * Add a callback to be invoked when the calling process exits. */ int -zfs_onexit_add_cb(int fd, void (*func)(void *), void *data, +zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, uint64_t *action_handle) { zfs_onexit_t *zo; zfs_onexit_action_node_t *ap; int error; - error = zfs_onexit_fd_to_state(fd, &zo); + error = zfs_onexit_minor_to_state(minor, &zo); if (error) return (error); @@ -139,8 +162,8 @@ mutex_enter(&zo->zo_lock); list_insert_tail(&zo->zo_actions, ap); mutex_exit(&zo->zo_lock); - *action_handle = (uint64_t)(uintptr_t)ap; - releasef(fd); + if (action_handle) + *action_handle = (uint64_t)(uintptr_t)ap; return (0); } @@ -167,13 +190,13 @@ * Delete the callback, triggering it first if 'fire' is set. */ int -zfs_onexit_del_cb(int fd, uint64_t action_handle, boolean_t fire) +zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire) { zfs_onexit_t *zo; zfs_onexit_action_node_t *ap; int error; - error = zfs_onexit_fd_to_state(fd, &zo); + error = zfs_onexit_minor_to_state(minor, &zo); if (error) return (error); @@ -189,7 +212,6 @@ mutex_exit(&zo->zo_lock); error = ENOENT; } - releasef(fd); return (error); } @@ -200,7 +222,7 @@ * calls, knowing that it will be cleaned up if the calling process exits. */ int -zfs_onexit_cb_data(int fd, uint64_t action_handle, void **data) +zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data) { zfs_onexit_t *zo; zfs_onexit_action_node_t *ap; @@ -208,7 +230,7 @@ *data = NULL; - error = zfs_onexit_fd_to_state(fd, &zo); + error = zfs_onexit_minor_to_state(minor, &zo); if (error) return (error); @@ -219,7 +241,6 @@ else error = ENOENT; mutex_exit(&zo->zo_lock); - releasef(fd); return (error); }