changeset 12798:f6c8601080b4

6938335 zfs send -R can still miss renamed snapshots 6955879 panic in dmu_objset_stats while running nfs IOs. 6928104 zfs send/rename race can leak snapshot holds 6953835 mem leak in dsl_dataset_user_release_tmp() 6915117 zfs_iter_snapshots() should deal with midstream snapshot renames
author Chris Kirby <Chris.Kirby@oracle.com>
date Wed, 07 Jul 2010 15:04:13 -0600
parents 7119a7ce586b
children 45ed97ad3d9f
files usr/src/cmd/zfs/zfs_main.c usr/src/lib/libzfs/common/libzfs.h usr/src/lib/libzfs/common/libzfs_dataset.c usr/src/lib/libzfs/common/libzfs_sendrecv.c usr/src/lib/libzfs/common/mapfile-vers usr/src/lib/libzpool/common/kernel.c usr/src/uts/common/fs/zfs/dmu_send.c usr/src/uts/common/fs/zfs/dsl_dataset.c usr/src/uts/common/fs/zfs/dsl_deleg.c usr/src/uts/common/fs/zfs/dsl_pool.c usr/src/uts/common/fs/zfs/sys/dsl_dataset.h usr/src/uts/common/fs/zfs/sys/dsl_deleg.h usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h usr/src/uts/common/fs/zfs/sys/zfs_onexit.h usr/src/uts/common/fs/zfs/zfs_ioctl.c usr/src/uts/common/fs/zfs/zfs_onexit.c
diffstat 16 files changed, 564 insertions(+), 334 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/zfs/zfs_main.c	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/cmd/zfs/zfs_main.c	Wed Jul 07 15:04:13 2010 -0600
@@ -2888,7 +2888,7 @@
 		}
 		if (holding) {
 			if (zfs_hold(zhp, delim+1, tag, recursive,
-			    temphold, B_FALSE, -1) != 0)
+			    temphold, B_FALSE, -1, 0, 0) != 0)
 				++errors;
 		} else {
 			if (zfs_release(zhp, delim+1, tag, recursive) != 0)
--- a/usr/src/lib/libzfs/common/libzfs.h	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/lib/libzfs/common/libzfs.h	Wed Jul 07 15:04:13 2010 -0600
@@ -533,12 +533,8 @@
 
 extern int zfs_promote(zfs_handle_t *);
 extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
-    boolean_t, boolean_t, int);
-extern int zfs_hold_range(zfs_handle_t *, const char *, const char *,
-    const char *, boolean_t, boolean_t, snapfilter_cb_t, void *, int);
+    boolean_t, boolean_t, int, uint64_t, uint64_t);
 extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
-extern int zfs_release_range(zfs_handle_t *, const char *, const char *,
-    const char *, boolean_t);
 extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);
 
 typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c	Wed Jul 07 15:04:13 2010 -0600
@@ -3927,11 +3927,13 @@
 int
 zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
     boolean_t recursive, boolean_t temphold, boolean_t enoent_ok,
-    int cleanup_fd)
+    int cleanup_fd, uint64_t dsobj, uint64_t createtxg)
 {
 	zfs_cmd_t zc = { 0 };
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
+	ASSERT(!recursive || dsobj == 0);
+
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
 	if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
@@ -3940,6 +3942,8 @@
 	zc.zc_cookie = recursive;
 	zc.zc_temphold = temphold;
 	zc.zc_cleanup_fd = cleanup_fd;
+	zc.zc_sendobj = dsobj;
+	zc.zc_createtxg = createtxg;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) {
 		char errbuf[ZFS_MAXNAMELEN+32];
@@ -3969,7 +3973,7 @@
 			return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf));
 		case ENOENT:
 			if (enoent_ok)
-				return (0);
+				return (ENOENT);
 			/* FALLTHROUGH */
 		default:
 			return (zfs_standard_error_fmt(hdl, errno, errbuf));
@@ -3979,107 +3983,6 @@
 	return (0);
 }
 
-struct hold_range_arg {
-	zfs_handle_t	*origin;
-	const char	*fromsnap;
-	const char	*tosnap;
-	char		lastsnapheld[ZFS_MAXNAMELEN];
-	const char	*tag;
-	boolean_t	temphold;
-	boolean_t	seento;
-	boolean_t	seenfrom;
-	boolean_t	holding;
-	boolean_t	recursive;
-	snapfilter_cb_t	*filter_cb;
-	void		*filter_cb_arg;
-	int		cleanup_fd;
-};
-
-static int
-zfs_hold_range_one(zfs_handle_t *zhp, void *arg)
-{
-	struct hold_range_arg *hra = arg;
-	const char *thissnap;
-	int error;
-
-	thissnap = strchr(zfs_get_name(zhp), '@') + 1;
-
-	if (hra->fromsnap && !hra->seenfrom &&
-	    strcmp(hra->fromsnap, thissnap) == 0)
-		hra->seenfrom = B_TRUE;
-
-	/* snap is older or newer than the desired range, ignore it */
-	if (hra->seento || !hra->seenfrom) {
-		zfs_close(zhp);
-		return (0);
-	}
-
-	if (!hra->seento && strcmp(hra->tosnap, thissnap) == 0)
-		hra->seento = B_TRUE;
-
-	if (hra->filter_cb != NULL &&
-	    hra->filter_cb(zhp, hra->filter_cb_arg) == B_FALSE) {
-		zfs_close(zhp);
-		return (0);
-	}
-
-	if (hra->holding) {
-		/* We could be racing with destroy, so ignore ENOENT. */
-		error = zfs_hold(hra->origin, thissnap, hra->tag,
-		    hra->recursive, hra->temphold, B_TRUE, hra->cleanup_fd);
-		if (error == 0) {
-			(void) strlcpy(hra->lastsnapheld, zfs_get_name(zhp),
-			    sizeof (hra->lastsnapheld));
-		}
-	} else {
-		error = zfs_release(hra->origin, thissnap, hra->tag,
-		    hra->recursive);
-	}
-
-	zfs_close(zhp);
-	return (error);
-}
-
-/*
- * Add a user hold on the set of snapshots starting with fromsnap up to
- * and including tosnap. If we're unable to to acquire a particular hold,
- * undo any holds up to that point.
- */
-int
-zfs_hold_range(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
-    const char *tag, boolean_t recursive, boolean_t temphold,
-    snapfilter_cb_t filter_cb, void *cbarg, int cleanup_fd)
-{
-	struct hold_range_arg arg = { 0 };
-	int error;
-
-	arg.origin = zhp;
-	arg.fromsnap = fromsnap;
-	arg.tosnap = tosnap;
-	arg.tag = tag;
-	arg.temphold = temphold;
-	arg.holding = B_TRUE;
-	arg.recursive = recursive;
-	arg.seenfrom = (fromsnap == NULL);
-	arg.filter_cb = filter_cb;
-	arg.filter_cb_arg = cbarg;
-	arg.cleanup_fd = cleanup_fd;
-
-	error = zfs_iter_snapshots_sorted(zhp, zfs_hold_range_one, &arg);
-
-	/*
-	 * Make sure we either hold the entire range or none. If we're
-	 * using cleanup-on-exit, we'll let the closing of the cleanup_fd
-	 * do the work for us.
-	 */
-	if (error && arg.lastsnapheld[0] != '\0' &&
-	    (cleanup_fd == -1 || !temphold)) {
-		(void) zfs_release_range(zhp, fromsnap,
-		    (const char *)arg.lastsnapheld, tag, recursive);
-	}
-	return (error);
-}
-
 int
 zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
     boolean_t recursive)
@@ -4121,27 +4024,6 @@
 	return (0);
 }
 
-/*
- * Release a user hold from the set of snapshots starting with fromsnap
- * up to and including tosnap.
- */
-int
-zfs_release_range(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
-    const char *tag, boolean_t recursive)
-{
-	struct hold_range_arg arg = { 0 };
-
-	arg.origin = zhp;
-	arg.fromsnap = fromsnap;
-	arg.tosnap = tosnap;
-	arg.tag = tag;
-	arg.recursive = recursive;
-	arg.seenfrom = (fromsnap == NULL);
-	arg.cleanup_fd = -1;
-
-	return (zfs_iter_snapshots_sorted(zhp, zfs_hold_range_one, &arg));
-}
-
 uint64_t
 zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props)
 {
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Wed Jul 07 15:04:13 2010 -0600
@@ -782,14 +782,30 @@
 zfs_sort_snaps(zfs_handle_t *zhp, void *data)
 {
 	avl_tree_t *avl = data;
-	zfs_node_t *node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
-
+	zfs_node_t *node;
+	zfs_node_t search;
+
+	search.zn_handle = zhp;
+	node = avl_find(avl, &search, NULL);
+	if (node) {
+		/*
+		 * If this snapshot was renamed while we were creating the
+		 * AVL tree, it's possible that we already inserted it under
+		 * its old name. Remove the old handle before adding the new
+		 * one.
+		 */
+		zfs_close(node->zn_handle);
+		avl_remove(avl, node);
+		free(node);
+	}
+
+	node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
 	node->zn_handle = zhp;
 	avl_add(avl, node);
+
 	return (0);
 }
 
-/* ARGSUSED */
 static int
 zfs_snapshot_compare(const void *larg, const void *rarg)
 {
@@ -844,6 +860,7 @@
 	const char *fromsnap;
 	const char *tosnap;
 	char prevsnap[ZFS_MAXNAMELEN];
+	uint64_t prevsnap_obj;
 	boolean_t seenfrom, seento, replicate, doall, fromorigin;
 	boolean_t verbose;
 	int outfd;
@@ -853,6 +870,8 @@
 	snapfilter_cb_t *filter_cb;
 	void *filter_cb_arg;
 	nvlist_t *debugnv;
+	char holdtag[ZFS_MAXNAMELEN];
+	int cleanup_fd;
 } send_dump_data_t;
 
 /*
@@ -860,23 +879,21 @@
  * NULL) to the file descriptor specified by outfd.
  */
 static int
-dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
-    int outfd, boolean_t enoent_ok, boolean_t *got_enoent, nvlist_t *debugnv)
+dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
+    boolean_t fromorigin, int outfd, nvlist_t *debugnv)
 {
 	zfs_cmd_t zc = { 0 };
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	nvlist_t *thisdbg;
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
-	assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin);
+	assert(fromsnap_obj == 0 || !fromorigin);
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-	if (fromsnap)
-		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_value));
 	zc.zc_cookie = outfd;
 	zc.zc_obj = fromorigin;
-
-	*got_enoent = B_FALSE;
+	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
+	zc.zc_fromobj = fromsnap_obj;
 
 	VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
 	if (fromsnap && fromsnap[0] != '\0') {
@@ -904,10 +921,6 @@
 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 
 		case ENOENT:
-			if (enoent_ok) {
-				*got_enoent = B_TRUE;
-				return (0);
-			}
 			if (zfs_dataset_exists(hdl, zc.zc_name,
 			    ZFS_TYPE_SNAPSHOT)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
@@ -943,12 +956,47 @@
 }
 
 static int
+hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
+{
+	zfs_handle_t *pzhp;
+	int error = 0;
+	char *thissnap;
+
+	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
+
+	/*
+	 * zfs_send() only opens a cleanup_fd for sends that need it,
+	 * e.g. replication and doall.
+	 */
+	if (sdd->cleanup_fd == -1)
+		return (0);
+
+	thissnap = strchr(zhp->zfs_name, '@') + 1;
+	*(thissnap - 1) = '\0';
+	pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
+	*(thissnap - 1) = '@';
+
+	/*
+	 * It's OK if the parent no longer exists.  The send code will
+	 * handle that error.
+	 */
+	if (pzhp) {
+		error = zfs_hold(pzhp, thissnap, sdd->holdtag,
+		    B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd,
+		    zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID),
+		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG));
+		zfs_close(pzhp);
+	}
+
+	return (error);
+}
+
+static int
 dump_snapshot(zfs_handle_t *zhp, void *arg)
 {
 	send_dump_data_t *sdd = arg;
-	const char *thissnap;
+	char *thissnap;
 	int err;
-	boolean_t got_enoent;
 	boolean_t isfromsnap, istosnap;
 	boolean_t exclude = B_FALSE;
 
@@ -957,10 +1005,17 @@
 	    strcmp(sdd->fromsnap, thissnap) == 0);
 
 	if (!sdd->seenfrom && isfromsnap) {
-		sdd->seenfrom = B_TRUE;
-		(void) strcpy(sdd->prevsnap, thissnap);
+		err = hold_for_send(zhp, sdd);
+		if (err == 0) {
+			sdd->seenfrom = B_TRUE;
+			(void) strcpy(sdd->prevsnap, thissnap);
+			sdd->prevsnap_obj = zfs_prop_get_int(zhp,
+			    ZFS_PROP_OBJSETID);
+		} else if (err == ENOENT) {
+			err = 0;
+		}
 		zfs_close(zhp);
-		return (0);
+		return (err);
 	}
 
 	if (sdd->seento || !sdd->seenfrom) {
@@ -1001,7 +1056,7 @@
 	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
 		/*
 		 * This snapshot is filtered out.  Don't send it, and don't
-		 * set prevsnap, so it will be as if this snapshot didn't
+		 * set prevsnap_obj, so it will be as if this snapshot didn't
 		 * exist, and the next accepted snapshot will be sent as
 		 * an incremental from the last accepted one, or as the
 		 * first (and full) snapshot in the case of a replication,
@@ -1011,20 +1066,26 @@
 		return (0);
 	}
 
+	err = hold_for_send(zhp, sdd);
+	if (err) {
+		if (err == ENOENT)
+			err = 0;
+		zfs_close(zhp);
+		return (err);
+	}
+
 	/* send it */
 	if (sdd->verbose) {
 		(void) fprintf(stderr, "sending from @%s to %s\n",
 		    sdd->prevsnap, zhp->zfs_name);
 	}
 
-	err = dump_ioctl(zhp, sdd->prevsnap,
+	err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
 	    sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
-	    sdd->outfd, B_TRUE, &got_enoent, sdd->debugnv);
-
-	if (got_enoent)
-		err = 0;
-	else
-		(void) strcpy(sdd->prevsnap, thissnap);
+	    sdd->outfd, sdd->debugnv);
+
+	(void) strcpy(sdd->prevsnap, thissnap);
+	sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 	zfs_close(zhp);
 	return (err);
 }
@@ -1064,6 +1125,7 @@
 	}
 
 	sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
+	sdd->prevsnap_obj = 0;
 	if (sdd->fromsnap == NULL || missingfrom)
 		sdd->seenfrom = B_TRUE;
 
@@ -1202,7 +1264,6 @@
 	int err;
 	nvlist_t *fss = NULL;
 	avl_tree_t *fsavl = NULL;
-	char holdtag[128];
 	static uint64_t holdseq;
 	int spa_version;
 	boolean_t holdsnaps = B_FALSE;
@@ -1210,15 +1271,6 @@
 	int pipefd[2];
 	dedup_arg_t dda = { 0 };
 	int featureflags = 0;
-	int cleanup_fd = -1;
-
-	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
-		uint64_t version;
-		version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
-		if (version >= ZPL_VERSION_SA) {
-			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
-		}
-	}
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot send '%s'"), zhp->zfs_name);
@@ -1229,8 +1281,17 @@
 		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
 	}
 
+	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
+		uint64_t version;
+		version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
+		if (version >= ZPL_VERSION_SA) {
+			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
+		}
+	}
+
 	if (zfs_spa_version(zhp, &spa_version) == 0 &&
-	    spa_version >= SPA_VERSION_USERREFS)
+	    spa_version >= SPA_VERSION_USERREFS &&
+	    (flags.doall || flags.replicate))
 		holdsnaps = B_TRUE;
 
 	if (flags.dedup) {
@@ -1259,22 +1320,6 @@
 		size_t buflen = 0;
 		zio_cksum_t zc = { 0 };
 
-		if (holdsnaps) {
-			++holdseq;
-			(void) snprintf(holdtag, sizeof (holdtag),
-			    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
-			cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
-			if (cleanup_fd < 0) {
-				err = errno;
-				goto stderr_out;
-			}
-			err = zfs_hold_range(zhp, fromsnap, tosnap,
-			    holdtag, flags.replicate, B_TRUE, filter_func,
-			    cb_arg, cleanup_fd);
-			if (err)
-				goto err_out;
-		}
-
 		if (flags.replicate || flags.props) {
 			nvlist_t *hdrnv;
 
@@ -1364,6 +1409,18 @@
 	sdd.filter_cb_arg = cb_arg;
 	if (debugnvp)
 		sdd.debugnv = *debugnvp;
+	if (holdsnaps) {
+		++holdseq;
+		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
+		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
+		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
+		if (sdd.cleanup_fd < 0) {
+			err = errno;
+			goto stderr_out;
+		}
+	} else {
+		sdd.cleanup_fd = -1;
+	}
 	err = dump_filesystems(zhp, &sdd);
 	fsavl_destroy(fsavl);
 	nvlist_free(fss);
@@ -1373,9 +1430,9 @@
 		(void) pthread_join(tid, NULL);
 	}
 
-	if (cleanup_fd != -1) {
-		VERIFY(0 == close(cleanup_fd));
-		cleanup_fd = -1;
+	if (sdd.cleanup_fd != -1) {
+		VERIFY(0 == close(sdd.cleanup_fd));
+		sdd.cleanup_fd = -1;
 	}
 
 	if (flags.replicate || flags.doall || flags.props) {
@@ -1397,8 +1454,8 @@
 stderr_out:
 	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
 err_out:
-	if (cleanup_fd != -1)
-		VERIFY(0 == close(cleanup_fd));
+	if (sdd.cleanup_fd != -1)
+		VERIFY(0 == close(sdd.cleanup_fd));
 	if (flags.dedup) {
 		(void) pthread_cancel(tid);
 		(void) pthread_join(tid, NULL);
--- a/usr/src/lib/libzfs/common/mapfile-vers	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/lib/libzfs/common/mapfile-vers	Wed Jul 07 15:04:13 2010 -0600
@@ -75,7 +75,6 @@
 	zfs_get_type;
 	zfs_history_event_names;
 	zfs_hold;
-	zfs_hold_range;
 	zfs_is_mounted;
 	zfs_is_shared;
 	zfs_is_shared_nfs;
@@ -121,7 +120,6 @@
 	zfs_receive;
 	zfs_refresh_properties;
 	zfs_release;
-	zfs_release_range;
 	zfs_rename;
 	zfs_rollback;
 	zfs_send;
--- a/usr/src/lib/libzpool/common/kernel.c	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/lib/libzpool/common/kernel.c	Wed Jul 07 15:04:13 2010 -0600
@@ -946,7 +946,21 @@
 
 /* ARGSUSED */
 int
-zfs_onexit_add_cb(int fd, void (*func)(void *), void *data,
+zfs_onexit_fd_hold(int fd, minor_t *minorp)
+{
+	*minorp = 0;
+	return (0);
+}
+
+/* ARGSUSED */
+void
+zfs_onexit_fd_rele(int fd)
+{
+}
+
+/* ARGSUSED */
+int
+zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
     uint64_t *action_handle)
 {
 	return (0);
@@ -954,14 +968,14 @@
 
 /* ARGSUSED */
 int
-zfs_onexit_del_cb(int fd, uint64_t action_handle, boolean_t fire)
+zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
 {
 	return (0);
 }
 
 /* ARGSUSED */
 int
-zfs_onexit_cb_data(int fd, uint64_t action_handle, void **data)
+zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
 {
 	return (0);
 }
--- a/usr/src/uts/common/fs/zfs/dmu_send.c	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c	Wed Jul 07 15:04:13 2010 -0600
@@ -1351,10 +1351,18 @@
 
 	/* if this stream is dedup'ed, set up the avl tree for guid mapping */
 	if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
+		minor_t minor;
+
 		if (cleanup_fd == -1) {
 			ra.err = EBADF;
 			goto out;
 		}
+		ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor);
+		if (ra.err) {
+			cleanup_fd = -1;
+			goto out;
+		}
+
 		if (*action_handlep == 0) {
 			ra.guid_to_ds_map =
 			    kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
@@ -1364,13 +1372,13 @@
 			(void) dmu_objset_find(drc->drc_top_ds, find_ds_by_guid,
 			    (void *)ra.guid_to_ds_map,
 			    DS_FIND_CHILDREN);
-			ra.err = zfs_onexit_add_cb(cleanup_fd,
+			ra.err = zfs_onexit_add_cb(minor,
 			    free_guid_map_onexit, ra.guid_to_ds_map,
 			    action_handlep);
 			if (ra.err)
 				goto out;
 		} else {
-			ra.err = zfs_onexit_cb_data(cleanup_fd, *action_handlep,
+			ra.err = zfs_onexit_cb_data(minor, *action_handlep,
 			    (void **)&ra.guid_to_ds_map);
 			if (ra.err)
 				goto out;
@@ -1456,6 +1464,9 @@
 	ASSERT(ra.err != 0);
 
 out:
+	if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
+		zfs_onexit_fd_rele(cleanup_fd);
+
 	if (ra.err != 0) {
 		/*
 		 * destroy what we created, so we don't leave it in the
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Wed Jul 07 15:04:13 2010 -0600
@@ -367,6 +367,7 @@
 	dmu_buf_t *dbuf;
 	dsl_dataset_t *ds;
 	int err;
+	dmu_object_info_t doi;
 
 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
 	    dsl_pool_sync_context(dp));
@@ -374,6 +375,12 @@
 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
 	if (err)
 		return (err);
+
+	/* Make sure dsobj has the correct object type. */
+	dmu_object_info_from_db(dbuf, &doi);
+	if (doi.doi_type != DMU_OT_DSL_DATASET)
+		return (EINVAL);
+
 	ds = dmu_buf_get_user(dbuf);
 	if (ds == NULL) {
 		dsl_dataset_t *winner;
@@ -3422,10 +3429,9 @@
 };
 
 typedef struct zfs_hold_cleanup_arg {
-	char dsname[MAXNAMELEN];
-	char snapname[MAXNAMELEN];
+	dsl_pool_t *dp;
+	uint64_t dsobj;
 	char htag[MAXNAMELEN];
-	boolean_t recursive;
 } zfs_hold_cleanup_arg_t;
 
 static void
@@ -3433,11 +3439,25 @@
 {
 	zfs_hold_cleanup_arg_t *ca = arg;
 
-	(void) dsl_dataset_user_release(ca->dsname, ca->snapname,
-	    ca->htag, ca->recursive);
+	(void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag,
+	    B_TRUE);
 	kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 }
 
+void
+dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
+    minor_t minor)
+{
+	zfs_hold_cleanup_arg_t *ca;
+
+	ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP);
+	ca->dp = ds->ds_dir->dd_pool;
+	ca->dsobj = ds->ds_object;
+	(void) strlcpy(ca->htag, htag, sizeof (ca->htag));
+	VERIFY3U(0, ==, zfs_onexit_add_cb(minor,
+	    dsl_dataset_user_release_onexit, ca, NULL));
+}
+
 /*
  * The max length of a temporary tag prefix is the number of hex digits
  * required to express UINT64_MAX plus one for the hyphen.
@@ -3541,6 +3561,24 @@
 }
 
 int
+dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
+    boolean_t temphold)
+{
+	struct dsl_ds_holdarg *ha;
+	int error;
+
+	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
+	ha->htag = htag;
+	ha->temphold = temphold;
+	error = dsl_sync_task_do(ds->ds_dir->dd_pool,
+	    dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync,
+	    ds, ha, 0);
+	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+
+	return (error);
+}
+
+int
 dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
     boolean_t recursive, boolean_t temphold, int cleanup_fd)
 {
@@ -3548,6 +3586,16 @@
 	dsl_sync_task_t *dst;
 	spa_t *spa;
 	int error;
+	minor_t minor = 0;
+
+	if (cleanup_fd != -1) {
+		/* Currently we only support cleanup-on-exit of tempholds. */
+		if (!temphold)
+			return (EINVAL);
+		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
+		if (error)
+			return (error);
+	}
 
 	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
 
@@ -3556,6 +3604,8 @@
 	error = spa_open(dsname, &spa, FTAG);
 	if (error) {
 		kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+		if (cleanup_fd != -1)
+			zfs_onexit_fd_rele(cleanup_fd);
 		return (error);
 	}
 
@@ -3581,6 +3631,12 @@
 		if (dst->dst_err) {
 			dsl_dataset_name(ds, ha->failed);
 			*strchr(ha->failed, '@') = '\0';
+		} else if (error == 0 && minor != 0 && temphold) {
+			/*
+			 * If this hold is to be released upon process exit,
+			 * register that action now.
+			 */
+			dsl_register_onexit_hold_cleanup(ds, htag, minor);
 		}
 		dsl_dataset_rele(ds, ha->dstg);
 	}
@@ -3593,25 +3649,10 @@
 
 	dsl_sync_task_group_destroy(ha->dstg);
 
-	/*
-	 * If this set of temporary holds is to be removed upon process exit,
-	 * register that action now.
-	 */
-	if (error == 0 && cleanup_fd != -1 && temphold) {
-		zfs_hold_cleanup_arg_t *ca;
-		uint64_t action_handle;
-
-		ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP);
-		(void) strlcpy(ca->dsname, dsname, sizeof (ca->dsname));
-		(void) strlcpy(ca->snapname, snapname, sizeof (ca->snapname));
-		(void) strlcpy(ca->htag, htag, sizeof (ca->htag));
-		ca->recursive = recursive;
-		(void) zfs_onexit_add_cb(cleanup_fd,
-		    dsl_dataset_user_release_onexit, ca, &action_handle);
-	}
-
 	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
 	spa_close(spa, FTAG);
+	if (cleanup_fd != -1)
+		zfs_onexit_fd_rele(cleanup_fd);
 	return (error);
 }
 
@@ -3703,11 +3744,6 @@
 	uint64_t refs;
 	int error;
 
-	if (ds->ds_objset) {
-		dmu_objset_evict(ds->ds_objset);
-		ds->ds_objset = NULL;
-	}
-
 	mutex_enter(&ds->ds_lock);
 	ds->ds_userrefs--;
 	refs = ds->ds_userrefs;
@@ -3867,10 +3903,12 @@
 }
 
 /*
- * Called at spa_load time to release a stale temporary user hold.
+ * Called at spa_load time (with retry == B_FALSE) to release a stale
+ * temporary user hold. Also called by the onexit code (with retry == B_TRUE).
  */
 int
-dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag)
+dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag,
+    boolean_t retry)
 {
 	dsl_dataset_t *ds;
 	char *snap;
@@ -3878,20 +3916,36 @@
 	int namelen;
 	int error;
 
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
-	error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
-	rw_exit(&dp->dp_config_rwlock);
-	if (error)
-		return (error);
-	namelen = dsl_dataset_namelen(ds)+1;
-	name = kmem_alloc(namelen, KM_SLEEP);
-	dsl_dataset_name(ds, name);
-	dsl_dataset_rele(ds, FTAG);
-
-	snap = strchr(name, '@');
-	*snap = '\0';
-	++snap;
-	return (dsl_dataset_user_release(name, snap, htag, B_FALSE));
+	do {
+		rw_enter(&dp->dp_config_rwlock, RW_READER);
+		error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+		rw_exit(&dp->dp_config_rwlock);
+		if (error)
+			return (error);
+		namelen = dsl_dataset_namelen(ds)+1;
+		name = kmem_alloc(namelen, KM_SLEEP);
+		dsl_dataset_name(ds, name);
+		dsl_dataset_rele(ds, FTAG);
+
+		snap = strchr(name, '@');
+		*snap = '\0';
+		++snap;
+		error = dsl_dataset_user_release(name, snap, htag, B_FALSE);
+		kmem_free(name, namelen);
+
+		/*
+		 * The object can't have been destroyed because we have a hold,
+		 * but it might have been renamed, resulting in ENOENT.  Retry
+		 * if we've been requested to do so.
+		 *
+		 * It would be nice if we could use the dsobj all the way
+		 * through and avoid ENOENT entirely.  But we might need to
+		 * unmount the snapshot, and there's currently no way to lookup
+		 * a vfsp using a ZFS object id.
+		 */
+	} while ((error == ENOENT) && retry);
+
+	return (error);
 }
 
 int
--- a/usr/src/uts/common/fs/zfs/dsl_deleg.c	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_deleg.c	Wed Jul 07 15:04:13 2010 -0600
@@ -528,9 +528,8 @@
  * Check if user has requested permission.
  */
 int
-dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
+dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
 {
-	dsl_dataset_t *ds;
 	dsl_dir_t *dd;
 	dsl_pool_t *dp;
 	void *cookie;
@@ -540,23 +539,15 @@
 	avl_tree_t permsets;
 	perm_set_t *setnode;
 
-	error = dsl_dataset_hold(dsname, FTAG, &ds);
-	if (error)
-		return (error);
-
 	dp = ds->ds_dir->dd_pool;
 	mos = dp->dp_meta_objset;
 
-	if (dsl_delegation_on(mos) == B_FALSE) {
-		dsl_dataset_rele(ds, FTAG);
+	if (dsl_delegation_on(mos) == B_FALSE)
 		return (ECANCELED);
-	}
 
 	if (spa_version(dmu_objset_spa(dp->dp_meta_objset)) <
-	    SPA_VERSION_DELEGATED_PERMS) {
-		dsl_dataset_rele(ds, FTAG);
+	    SPA_VERSION_DELEGATED_PERMS)
 		return (EPERM);
-	}
 
 	if (dsl_dataset_is_snapshot(ds)) {
 		/*
@@ -633,7 +624,6 @@
 	error = EPERM;
 success:
 	rw_exit(&dp->dp_config_rwlock);
-	dsl_dataset_rele(ds, FTAG);
 
 	cookie = NULL;
 	while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL)
@@ -642,6 +632,22 @@
 	return (error);
 }
 
+int
+dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
+{
+	dsl_dataset_t *ds;
+	int error;
+
+	error = dsl_dataset_hold(dsname, FTAG, &ds);
+	if (error)
+		return (error);
+
+	error = dsl_deleg_access_impl(ds, perm, cr);
+	dsl_dataset_rele(ds, FTAG);
+
+	return (error);
+}
+
 /*
  * Other routines.
  */
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c	Wed Jul 07 15:04:13 2010 -0600
@@ -768,7 +768,7 @@
 		*htag = '\0';
 		++htag;
 		dsobj = strtonum(za.za_name, NULL);
-		(void) dsl_dataset_user_release_tmp(dp, dsobj, htag);
+		(void) dsl_dataset_user_release_tmp(dp, dsobj, htag, B_FALSE);
 	}
 	zap_cursor_fini(&zc);
 }
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Wed Jul 07 15:04:13 2010 -0600
@@ -182,6 +182,8 @@
 boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
     void *tag);
 void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag);
+void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
+    minor_t minor);
 uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
     dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
 uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
@@ -198,10 +200,12 @@
     boolean_t force);
 int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
     boolean_t recursive, boolean_t temphold, int cleanup_fd);
+int dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
+    boolean_t temphold);
 int dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
     boolean_t recursive);
 int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
-    char *htag);
+    char *htag, boolean_t retry);
 int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
 
 blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
--- a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h	Wed Jul 07 15:04:13 2010 -0600
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_DELEG_H
@@ -64,6 +63,7 @@
 int dsl_deleg_get(const char *ddname, nvlist_t **nvp);
 int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset);
 int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr);
+int dsl_deleg_access_impl(struct dsl_dataset *ds, const char *perm, cred_t *cr);
 void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr);
 int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr);
 int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr);
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Wed Jul 07 15:04:13 2010 -0600
@@ -267,6 +267,9 @@
 	uint64_t	zc_action_handle;
 	int		zc_cleanup_fd;
 	uint8_t		zc_pad[4];
+	uint64_t	zc_sendobj;
+	uint64_t	zc_fromobj;
+	uint64_t	zc_createtxg;
 } zfs_cmd_t;
 
 typedef struct zfs_useracct {
--- a/usr/src/uts/common/fs/zfs/sys/zfs_onexit.h	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_onexit.h	Wed Jul 07 15:04:13 2010 -0600
@@ -50,10 +50,14 @@
 
 #endif
 
-extern int zfs_onexit_add_cb(int fd, void (*func)(void *), void *data,
+extern int zfs_onexit_fd_hold(int fd, minor_t *minorp);
+extern void zfs_onexit_fd_rele(int fd);
+extern int zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
     uint64_t *action_handle);
-extern int zfs_onexit_del_cb(int fd, uint64_t action_handle, boolean_t fire);
-extern int zfs_onexit_cb_data(int fd, uint64_t action_handle, void **data);
+extern int zfs_onexit_del_cb(minor_t minor, uint64_t action_handle,
+    boolean_t fire);
+extern int zfs_onexit_cb_data(minor_t minor, uint64_t action_handle,
+    void **data);
 
 #ifdef	__cplusplus
 }
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Wed Jul 07 15:04:13 2010 -0600
@@ -282,9 +282,8 @@
 }
 
 static int
-zfs_dozonecheck(const char *dataset, cred_t *cr)
+zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
 {
-	uint64_t zoned;
 	int writable = 1;
 
 	/*
@@ -295,9 +294,6 @@
 	    !zone_dataset_visible(dataset, &writable))
 		return (ENOENT);
 
-	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
-		return (ENOENT);
-
 	if (INGLOBALZONE(curproc)) {
 		/*
 		 * If the fs is zoned, only root can access it from the
@@ -319,6 +315,32 @@
 	return (0);
 }
 
+static int
+zfs_dozonecheck(const char *dataset, cred_t *cr)
+{
+	uint64_t zoned;
+
+	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
+		return (ENOENT);
+
+	return (zfs_dozonecheck_impl(dataset, zoned, cr));
+}
+
+static int
+zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
+{
+	uint64_t zoned;
+
+	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
+	if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) {
+		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
+		return (ENOENT);
+	}
+	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
+
+	return (zfs_dozonecheck_impl(dataset, zoned, cr));
+}
+
 int
 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
 {
@@ -333,6 +355,21 @@
 	return (error);
 }
 
+int
+zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
+    const char *perm, cred_t *cr)
+{
+	int error;
+
+	error = zfs_dozonecheck_ds(name, ds, cr);
+	if (error == 0) {
+		error = secpolicy_zfs(cr);
+		if (error)
+			error = dsl_deleg_access_impl(ds, perm, cr);
+	}
+	return (error);
+}
+
 /*
  * Policy for setting the security label property.
  *
@@ -508,8 +545,38 @@
 int
 zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
 {
-	return (zfs_secpolicy_write_perms(zc->zc_name,
-	    ZFS_DELEG_PERM_SEND, cr));
+	spa_t *spa;
+	dsl_pool_t *dp;
+	dsl_dataset_t *ds;
+	char *cp;
+	int error;
+
+	/*
+	 * Generate the current snapshot name from the given objsetid, then
+	 * use that name for the secpolicy/zone checks.
+	 */
+	cp = strchr(zc->zc_name, '@');
+	if (cp == NULL)
+		return (EINVAL);
+	error = spa_open(zc->zc_name, &spa, FTAG);
+	if (error)
+		return (error);
+
+	dp = spa_get_dsl(spa);
+	rw_enter(&dp->dp_config_rwlock, RW_READER);
+	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
+	rw_exit(&dp->dp_config_rwlock);
+	spa_close(spa, FTAG);
+	if (error)
+		return (error);
+
+	dsl_dataset_name(ds, zc->zc_name);
+
+	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
+	    ZFS_DELEG_PERM_SEND, cr);
+	dsl_dataset_rele(ds, FTAG);
+
+	return (error);
 }
 
 static int
@@ -1579,26 +1646,12 @@
 	return (error);
 }
 
-/*
- * inputs:
- * zc_name		name of filesystem
- * zc_nvlist_dst_size	size of buffer for property nvlist
- *
- * outputs:
- * zc_objset_stats	stats
- * zc_nvlist_dst	property nvlist
- * zc_nvlist_dst_size	size of property nvlist
- */
 static int
-zfs_ioc_objset_stats(zfs_cmd_t *zc)
+zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
 {
-	objset_t *os = NULL;
-	int error;
+	int error = 0;
 	nvlist_t *nv;
 
-	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
-		return (error);
-
 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
 
 	if (zc->zc_nvlist_dst != 0 &&
@@ -1619,7 +1672,32 @@
 		nvlist_free(nv);
 	}
 
+	return (error);
+}
+
+/*
+ * inputs:
+ * zc_name		name of filesystem
+ * zc_nvlist_dst_size	size of buffer for property nvlist
+ *
+ * outputs:
+ * zc_objset_stats	stats
+ * zc_nvlist_dst	property nvlist
+ * zc_nvlist_dst_size	size of property nvlist
+ */
+static int
+zfs_ioc_objset_stats(zfs_cmd_t *zc)
+{
+	objset_t *os = NULL;
+	int error;
+
+	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
+		return (error);
+
+	error = zfs_ioc_objset_stats_impl(zc, os);
+
 	dmu_objset_rele(os, FTAG);
+
 	return (error);
 }
 
@@ -1852,19 +1930,43 @@
 
 	error = dmu_snapshot_list_next(os,
 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
-	    zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie, NULL);
-	dmu_objset_rele(os, FTAG);
+	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
+	    NULL);
+
 	if (error == 0) {
-		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
-		if (error == ENOENT)  {
-			/* We lost a race with destroy, get the next one. */
-			*strchr(zc->zc_name, '@') = '\0';
-			goto top;
+		dsl_dataset_t *ds;
+		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
+
+		/*
+		 * Since we probably don't have a hold on this snapshot,
+		 * it's possible that the objsetid could have been destroyed
+		 * and reused for a new objset. It's OK if this happens during
+		 * a zfs send operation, since the new createtxg will be
+		 * beyond the range we're interested in.
+		 */
+		rw_enter(&dp->dp_config_rwlock, RW_READER);
+		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
+		rw_exit(&dp->dp_config_rwlock);
+		if (error) {
+			if (error == ENOENT) {
+				/* Racing with destroy, get the next one. */
+				*strchr(zc->zc_name, '@') = '\0';
+				dmu_objset_rele(os, FTAG);
+				goto top;
+			}
+		} else {
+			objset_t *ossnap;
+
+			error = dmu_objset_from_ds(ds, &ossnap);
+			if (error == 0)
+				error = zfs_ioc_objset_stats_impl(zc, ossnap);
+			dsl_dataset_rele(ds, FTAG);
 		}
 	} else if (error == ENOENT) {
 		error = ESRCH;
 	}
 
+	dmu_objset_rele(os, FTAG);
 	/* if we failed, undo the @ that we tacked on to zc_name */
 	if (error)
 		*strchr(zc->zc_name, '@') = '\0';
@@ -3573,9 +3675,10 @@
 /*
  * inputs:
  * zc_name	name of snapshot to send
- * zc_value	short name of incremental fromsnap (may be empty)
  * zc_cookie	file descriptor to send stream to
- * zc_obj	fromorigin flag (mutually exclusive with zc_value)
+ * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
+ * zc_sendobj	objsetid of snapshot to send
+ * zc_fromobj	objsetid of incremental fromsnap (may be zero)
  *
  * outputs: none
  */
@@ -3587,34 +3690,55 @@
 	file_t *fp;
 	int error;
 	offset_t off;
-
-	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
+	dsl_dataset_t *ds;
+	dsl_dataset_t *dsfrom = NULL;
+	spa_t *spa;
+	dsl_pool_t *dp;
+
+	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error)
 		return (error);
 
-	if (zc->zc_value[0] != '\0') {
-		char *buf;
-		char *cp;
-
-		buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-		(void) strncpy(buf, zc->zc_name, MAXPATHLEN);
-		cp = strchr(buf, '@');
-		if (cp)
-			*(cp+1) = 0;
-		(void) strncat(buf, zc->zc_value, MAXPATHLEN);
-		error = dmu_objset_hold(buf, FTAG, &fromsnap);
-		kmem_free(buf, MAXPATHLEN);
+	dp = spa_get_dsl(spa);
+	rw_enter(&dp->dp_config_rwlock, RW_READER);
+	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
+	rw_exit(&dp->dp_config_rwlock);
+	if (error) {
+		spa_close(spa, FTAG);
+		return (error);
+	}
+
+	error = dmu_objset_from_ds(ds, &tosnap);
+	if (error) {
+		dsl_dataset_rele(ds, FTAG);
+		spa_close(spa, FTAG);
+		return (error);
+	}
+
+	if (zc->zc_fromobj != 0) {
+		rw_enter(&dp->dp_config_rwlock, RW_READER);
+		error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
+		rw_exit(&dp->dp_config_rwlock);
+		spa_close(spa, FTAG);
 		if (error) {
-			dmu_objset_rele(tosnap, FTAG);
+			dsl_dataset_rele(ds, FTAG);
 			return (error);
 		}
+		error = dmu_objset_from_ds(dsfrom, &fromsnap);
+		if (error) {
+			dsl_dataset_rele(dsfrom, FTAG);
+			dsl_dataset_rele(ds, FTAG);
+			return (error);
+		}
+	} else {
+		spa_close(spa, FTAG);
 	}
 
 	fp = getf(zc->zc_cookie);
 	if (fp == NULL) {
-		dmu_objset_rele(tosnap, FTAG);
-		if (fromsnap)
-			dmu_objset_rele(fromsnap, FTAG);
+		dsl_dataset_rele(ds, FTAG);
+		if (dsfrom)
+			dsl_dataset_rele(dsfrom, FTAG);
 		return (EBADF);
 	}
 
@@ -3624,9 +3748,9 @@
 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
 		fp->f_offset = off;
 	releasef(zc->zc_cookie);
-	if (fromsnap)
-		dmu_objset_rele(fromsnap, FTAG);
-	dmu_objset_rele(tosnap, FTAG);
+	if (dsfrom)
+		dsl_dataset_rele(dsfrom, FTAG);
+	dsl_dataset_rele(ds, FTAG);
 	return (error);
 }
 
@@ -4194,6 +4318,8 @@
  * zc_cookie		recursive flag
  * zc_temphold		set if hold is temporary
  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
+ * zc_sendobj		if non-zero, the objid for zc_name@zc_value
+ * zc_createtxg		if zc_sendobj is non-zero, snap must have zc_createtxg
  *
  * outputs:		none
  */
@@ -4201,12 +4327,66 @@
 zfs_ioc_hold(zfs_cmd_t *zc)
 {
 	boolean_t recursive = zc->zc_cookie;
+	spa_t *spa;
+	dsl_pool_t *dp;
+	dsl_dataset_t *ds;
+	int error;
+	minor_t minor = 0;
 
 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
 		return (EINVAL);
 
-	return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
-	    zc->zc_string, recursive, zc->zc_temphold, zc->zc_cleanup_fd));
+	if (zc->zc_sendobj == 0) {
+		return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
+		    zc->zc_string, recursive, zc->zc_temphold,
+		    zc->zc_cleanup_fd));
+	}
+
+	if (recursive)
+		return (EINVAL);
+
+	error = spa_open(zc->zc_name, &spa, FTAG);
+	if (error)
+		return (error);
+
+	dp = spa_get_dsl(spa);
+	rw_enter(&dp->dp_config_rwlock, RW_READER);
+	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
+	rw_exit(&dp->dp_config_rwlock);
+	spa_close(spa, FTAG);
+	if (error)
+		return (error);
+
+	/*
+	 * Until we have a hold on this snapshot, it's possible that
+	 * zc_sendobj could've been destroyed and reused as part
+	 * of a later txg.  Make sure we're looking at the right object.
+	 */
+	if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) {
+		dsl_dataset_rele(ds, FTAG);
+		return (ENOENT);
+	}
+
+	if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) {
+		error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
+		if (error) {
+			dsl_dataset_rele(ds, FTAG);
+			return (error);
+		}
+	}
+
+	error = dsl_dataset_user_hold_for_send(ds, zc->zc_string,
+	    zc->zc_temphold);
+	if (minor != 0) {
+		if (error == 0) {
+			dsl_register_onexit_hold_cleanup(ds, zc->zc_string,
+			    minor);
+		}
+		zfs_onexit_fd_rele(zc->zc_cleanup_fd);
+	}
+	dsl_dataset_rele(ds, FTAG);
+
+	return (error);
 }
 
 /*
--- a/usr/src/uts/common/fs/zfs/zfs_onexit.c	Wed Jul 07 02:21:35 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_onexit.c	Wed Jul 07 15:04:13 2010 -0600
@@ -46,6 +46,12 @@
  * clone-open, generating a unique minor number. The process then passes
  * along that file descriptor to each ioctl that might have a cleanup operation.
  *
+ * Consumers of the onexit routines should call zfs_onexit_fd_hold() early
+ * on to validate the given fd and add a reference to its file table entry.
+ * This allows the consumer to do its work and then add a callback, knowing
+ * that zfs_onexit_add_cb() won't fail with EBADF.  When finished, consumers
+ * should call zfs_onexit_fd_rele().
+ *
  * A simple example is zfs_ioc_recv(), where we might create an AVL tree
  * with dataset/GUID mappings and then reuse that tree on subsequent
  * zfs_ioc_recv() calls.
@@ -57,7 +63,8 @@
  *
  * The action handle is then passed from user space to subsequent
  * zfs_ioc_recv() calls, so that dmu_recv_stream() can fetch its AVL tree
- * by calling zfs_onexit_cb_data() with the cleanup fd and action handle.
+ * by calling zfs_onexit_cb_data() with the device minor number and
+ * action handle.
  *
  * If the user process exits abnormally, the callback is invoked implicitly
  * as part of the driver close operation.  Once the user space process is
@@ -97,37 +104,53 @@
 }
 
 static int
-zfs_onexit_fd_to_state(int fd, zfs_onexit_t **zo)
+zfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo)
+{
+	*zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
+	if (*zo == NULL)
+		return (EBADF);
+
+	return (0);
+}
+
+/*
+ * Consumers might need to operate by minor number instead of fd, since
+ * they might be running in another thread (e.g. txg_sync_thread). Callers
+ * of this function must call zfs_onexit_fd_rele() when they're finished
+ * using the minor number.
+ */
+int
+zfs_onexit_fd_hold(int fd, minor_t *minorp)
 {
 	file_t *fp;
-	dev_t rdev;
+	zfs_onexit_t *zo;
 
 	fp = getf(fd);
 	if (fp == NULL)
 		return (EBADF);
 
-	rdev = fp->f_vnode->v_rdev;
-	*zo = zfsdev_get_soft_state(getminor(rdev), ZSST_CTLDEV);
-	if (*zo == NULL) {
-		releasef(fd);
-		return (EBADF);
-	}
+	*minorp = getminor(fp->f_vnode->v_rdev);
+	return (zfs_onexit_minor_to_state(*minorp, &zo));
+}
 
-	return (0);
+void
+zfs_onexit_fd_rele(int fd)
+{
+	releasef(fd);
 }
 
 /*
  * Add a callback to be invoked when the calling process exits.
  */
 int
-zfs_onexit_add_cb(int fd, void (*func)(void *), void *data,
+zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
     uint64_t *action_handle)
 {
 	zfs_onexit_t *zo;
 	zfs_onexit_action_node_t *ap;
 	int error;
 
-	error = zfs_onexit_fd_to_state(fd, &zo);
+	error = zfs_onexit_minor_to_state(minor, &zo);
 	if (error)
 		return (error);
 
@@ -139,8 +162,8 @@
 	mutex_enter(&zo->zo_lock);
 	list_insert_tail(&zo->zo_actions, ap);
 	mutex_exit(&zo->zo_lock);
-	*action_handle = (uint64_t)(uintptr_t)ap;
-	releasef(fd);
+	if (action_handle)
+		*action_handle = (uint64_t)(uintptr_t)ap;
 
 	return (0);
 }
@@ -167,13 +190,13 @@
  * Delete the callback, triggering it first if 'fire' is set.
  */
 int
-zfs_onexit_del_cb(int fd, uint64_t action_handle, boolean_t fire)
+zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
 {
 	zfs_onexit_t *zo;
 	zfs_onexit_action_node_t *ap;
 	int error;
 
-	error = zfs_onexit_fd_to_state(fd, &zo);
+	error = zfs_onexit_minor_to_state(minor, &zo);
 	if (error)
 		return (error);
 
@@ -189,7 +212,6 @@
 		mutex_exit(&zo->zo_lock);
 		error = ENOENT;
 	}
-	releasef(fd);
 
 	return (error);
 }
@@ -200,7 +222,7 @@
  * calls, knowing that it will be cleaned up if the calling process exits.
  */
 int
-zfs_onexit_cb_data(int fd, uint64_t action_handle, void **data)
+zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
 {
 	zfs_onexit_t *zo;
 	zfs_onexit_action_node_t *ap;
@@ -208,7 +230,7 @@
 
 	*data = NULL;
 
-	error = zfs_onexit_fd_to_state(fd, &zo);
+	error = zfs_onexit_minor_to_state(minor, &zo);
 	if (error)
 		return (error);
 
@@ -219,7 +241,6 @@
 	else
 		error = ENOENT;
 	mutex_exit(&zo->zo_lock);
-	releasef(fd);
 
 	return (error);
 }