Mercurial > illumos > illumos-gate
changeset 12527:693dd2cad55f
6884007 zfs_send() can leave temporary holds around
6954429 ZFS_IOC_VDEV_SPLIT missing from truss/codes.c
author | Chris Kirby <Chris.Kirby@oracle.com> |
---|---|
date | Tue, 01 Jun 2010 17:04:42 -0600 |
parents | 6f48102ad665 |
children | 13ec26f2b29c |
files | usr/src/cmd/truss/codes.c usr/src/cmd/zfs/zfs_main.c usr/src/cmd/ztest/ztest.c usr/src/lib/libzfs/common/libzfs.h usr/src/lib/libzfs/common/libzfs_dataset.c usr/src/lib/libzfs/common/libzfs_sendrecv.c usr/src/lib/libzpool/common/kernel.c usr/src/uts/common/Makefile.files usr/src/uts/common/fs/zfs/dmu_send.c usr/src/uts/common/fs/zfs/dsl_dataset.c usr/src/uts/common/fs/zfs/sys/dmu.h usr/src/uts/common/fs/zfs/sys/dsl_dataset.h usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h usr/src/uts/common/fs/zfs/sys/zfs_onexit.h usr/src/uts/common/fs/zfs/zfs_ioctl.c usr/src/uts/common/fs/zfs/zfs_onexit.c usr/src/uts/common/fs/zfs/zvol.c |
diffstat | 17 files changed, 699 insertions(+), 193 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/cmd/truss/codes.c Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/cmd/truss/codes.c Tue Jun 01 17:04:42 2010 -0600 @@ -1237,6 +1237,8 @@ "zfs_cmd_t" }, { (uint_t)ZFS_IOC_OBJSET_RECVD_PROPS, "ZFS_IOC_OBJSET_RECVD_PROPS", "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_VDEV_SPLIT, "ZFS_IOC_VDEV_SPLIT", + "zfs_cmd_t" }, /* kssl ioctls */ { (uint_t)KSSL_ADD_ENTRY, "KSSL_ADD_ENTRY",
--- a/usr/src/cmd/zfs/zfs_main.c Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/cmd/zfs/zfs_main.c Tue Jun 01 17:04:42 2010 -0600 @@ -2888,7 +2888,7 @@ } if (holding) { if (zfs_hold(zhp, delim+1, tag, recursive, - temphold, B_FALSE) != 0) + temphold, B_FALSE, -1) != 0) ++errors; } else { if (zfs_release(zhp, delim+1, tag, recursive) != 0)
--- a/usr/src/cmd/ztest/ztest.c Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/cmd/ztest/ztest.c Tue Jun 01 17:04:42 2010 -0600 @@ -4355,7 +4355,8 @@ fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); } - error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE, B_TRUE); + error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE, + B_TRUE, -1); if (error) fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag);
--- a/usr/src/lib/libzfs/common/libzfs.h Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/lib/libzfs/common/libzfs.h Tue Jun 01 17:04:42 2010 -0600 @@ -533,9 +533,9 @@ extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t, - boolean_t, boolean_t); + boolean_t, boolean_t, int); extern int zfs_hold_range(zfs_handle_t *, const char *, const char *, - const char *, boolean_t, boolean_t, snapfilter_cb_t, void *); + const char *, boolean_t, boolean_t, snapfilter_cb_t, void *, int); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); extern int zfs_release_range(zfs_handle_t *, const char *, const char *, const char *, boolean_t);
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c Tue Jun 01 17:04:42 2010 -0600 @@ -3926,7 +3926,8 @@ int zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, - boolean_t recursive, boolean_t temphold, boolean_t enoent_ok) + boolean_t recursive, boolean_t temphold, boolean_t enoent_ok, + int cleanup_fd) { zfs_cmd_t zc = { 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; @@ -3938,6 +3939,7 @@ return (zfs_error(hdl, EZFS_TAGTOOLONG, tag)); zc.zc_cookie = recursive; zc.zc_temphold = temphold; + zc.zc_cleanup_fd = cleanup_fd; if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) { char errbuf[ZFS_MAXNAMELEN+32]; @@ -3990,6 +3992,7 @@ boolean_t recursive; snapfilter_cb_t *filter_cb; void *filter_cb_arg; + int cleanup_fd; }; static int @@ -4023,7 +4026,7 @@ if (hra->holding) { /* We could be racing with destroy, so ignore ENOENT. */ error = zfs_hold(hra->origin, thissnap, hra->tag, - hra->recursive, hra->temphold, B_TRUE); + hra->recursive, hra->temphold, B_TRUE, hra->cleanup_fd); if (error == 0) { (void) strlcpy(hra->lastsnapheld, zfs_get_name(zhp), sizeof (hra->lastsnapheld)); @@ -4045,7 +4048,7 @@ int zfs_hold_range(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, const char *tag, boolean_t recursive, boolean_t temphold, - snapfilter_cb_t filter_cb, void *cbarg) + snapfilter_cb_t filter_cb, void *cbarg, int cleanup_fd) { struct hold_range_arg arg = { 0 }; int error; @@ -4060,13 +4063,17 @@ arg.seenfrom = (fromsnap == NULL); arg.filter_cb = filter_cb; arg.filter_cb_arg = cbarg; + arg.cleanup_fd = cleanup_fd; error = zfs_iter_snapshots_sorted(zhp, zfs_hold_range_one, &arg); /* - * Make sure we either hold the entire range or none. + * Make sure we either hold the entire range or none. If we're + * using cleanup-on-exit, we'll let the closing of the cleanup_fd + * do the work for us. */ - if (error && arg.lastsnapheld[0] != '\0') { + if (error && arg.lastsnapheld[0] != '\0' && + (cleanup_fd == -1 || !temphold)) { (void) zfs_release_range(zhp, fromsnap, (const char *)arg.lastsnapheld, tag, recursive); } @@ -4130,6 +4137,7 @@ arg.tag = tag; arg.recursive = recursive; arg.seenfrom = (fromsnap == NULL); + arg.cleanup_fd = -1; return (zfs_iter_snapshots_sorted(zhp, zfs_hold_range_one, &arg)); }
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c Tue Jun 01 17:04:42 2010 -0600 @@ -51,7 +51,7 @@ extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *); static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t, - int, const char *, nvlist_t *, avl_tree_t *, char **); + int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *); static const zio_cksum_t zero_cksum = { 0 }; @@ -1210,6 +1210,7 @@ int pipefd[2]; dedup_arg_t dda = { 0 }; int featureflags = 0; + int cleanup_fd = -1; if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) { uint64_t version; @@ -1259,12 +1260,17 @@ zio_cksum_t zc = { 0 }; if (holdsnaps) { + ++holdseq; (void) snprintf(holdtag, sizeof (holdtag), ".send-%d-%llu", getpid(), (u_longlong_t)holdseq); - ++holdseq; + cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL); + if (cleanup_fd < 0) { + err = errno; + goto stderr_out; + } err = zfs_hold_range(zhp, fromsnap, tosnap, holdtag, flags.replicate, B_TRUE, filter_func, - cb_arg); + cb_arg, cleanup_fd); if (err) goto err_out; } @@ -1285,13 +1291,8 @@ err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name, fromsnap, tosnap, flags.replicate, &fss, &fsavl); - if (err) { - if (holdsnaps) { - (void) zfs_release_range(zhp, fromsnap, - tosnap, holdtag, flags.replicate); - } + if (err) goto err_out; - } VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss)); err = nvlist_pack(hdrnv, &packbuf, &buflen, NV_ENCODE_XDR, 0); @@ -1302,10 +1303,6 @@ if (err) { fsavl_destroy(fsavl); nvlist_free(fss); - if (holdsnaps) { - (void) zfs_release_range(zhp, fromsnap, - tosnap, holdtag, flags.replicate); - } goto stderr_out; } } @@ -1331,10 +1328,6 @@ if (err == -1) { fsavl_destroy(fsavl); nvlist_free(fss); - if (holdsnaps) { - (void) zfs_release_range(zhp, fromsnap, tosnap, - holdtag, flags.replicate); - } err = errno; goto stderr_out; } @@ -1349,10 +1342,6 @@ fsavl_destroy(fsavl); nvlist_free(fss); err = errno; - if (holdsnaps) { - (void) zfs_release_range(zhp, fromsnap, - tosnap, holdtag, flags.replicate); - } goto stderr_out; } } @@ -1384,6 +1373,11 @@ (void) pthread_join(tid, NULL); } + if (cleanup_fd != -1) { + VERIFY(0 == close(cleanup_fd)); + cleanup_fd = -1; + } + if (flags.replicate || flags.doall || flags.props) { /* * write final end record. NB: want to do this even if @@ -1392,10 +1386,6 @@ */ dmu_replay_record_t drr = { 0 }; drr.drr_type = DRR_END; - if (holdsnaps) { - (void) zfs_release_range(zhp, fromsnap, tosnap, - holdtag, flags.replicate); - } if (write(outfd, &drr, sizeof (drr)) == -1) { return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf)); @@ -1407,6 +1397,8 @@ stderr_out: err = zfs_standard_error(zhp->zfs_hdl, err, errbuf); err_out: + if (cleanup_fd != -1) + VERIFY(0 == close(cleanup_fd)); if (flags.dedup) { (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); @@ -1992,7 +1984,7 @@ static int zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc, - char **top_zfs) + char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { nvlist_t *stream_nv = NULL; avl_tree_t *stream_avl = NULL; @@ -2158,7 +2150,8 @@ * recv_skip() and return 0). */ error = zfs_receive_impl(hdl, destname, flags, fd, - sendfs, stream_nv, stream_avl, top_zfs); + sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd, + action_handlep); if (error == ENODATA) { error = 0; break; @@ -2281,7 +2274,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, recvflags_t flags, dmu_replay_record_t *drr, dmu_replay_record_t *drr_noswap, const char *sendfs, - nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs) + nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, + uint64_t *action_handlep) { zfs_cmd_t zc = { 0 }; time_t begin_time; @@ -2609,6 +2603,8 @@ zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf; zc.zc_nvlist_dst_size = sizeof (prop_errbuf); + zc.zc_cleanup_fd = cleanup_fd; + zc.zc_action_handle = *action_handlep; err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc); ioctl_errno = errno; @@ -2796,6 +2792,8 @@ if (err || ioctl_err) return (-1); + *action_handlep = zc.zc_action_handle; + if (flags.verbose) { char buf1[64]; char buf2[64]; @@ -2816,7 +2814,7 @@ static int zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags, int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl, - char **top_zfs) + char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { int err; dmu_replay_record_t drr, drr_noswap; @@ -2909,12 +2907,12 @@ } return (zfs_receive_one(hdl, infd, tosnap, flags, &drr, &drr_noswap, sendfs, stream_nv, stream_avl, - top_zfs)); + top_zfs, cleanup_fd, action_handlep)); } else { assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_COMPOUNDSTREAM); return (zfs_receive_package(hdl, infd, tosnap, flags, - &drr, &zcksum, top_zfs)); + &drr, &zcksum, top_zfs, cleanup_fd, action_handlep)); } } @@ -2930,9 +2928,16 @@ { char *top_zfs = NULL; int err; + int cleanup_fd; + uint64_t action_handle = 0; + + cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL); + VERIFY(cleanup_fd >= 0); err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL, - stream_avl, &top_zfs); + stream_avl, &top_zfs, cleanup_fd, &action_handle); + + VERIFY(0 == close(cleanup_fd)); if (err == 0 && !flags.nomount && top_zfs) { zfs_handle_t *zhp;
--- a/usr/src/lib/libzpool/common/kernel.c Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/lib/libzpool/common/kernel.c Tue Jun 01 17:04:42 2010 -0600 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <assert.h> @@ -944,3 +943,25 @@ return (buf); } + +/* ARGSUSED */ +int +zfs_onexit_add_cb(int fd, void (*func)(void *), void *data, + uint64_t *action_handle) +{ + return (0); +} + +/* ARGSUSED */ +int +zfs_onexit_del_cb(int fd, uint64_t action_handle, boolean_t fire) +{ + return (0); +} + +/* ARGSUSED */ +int +zfs_onexit_cb_data(int fd, uint64_t action_handle, void **data) +{ + return (0); +}
--- a/usr/src/uts/common/Makefile.files Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/uts/common/Makefile.files Tue Jun 01 17:04:42 2010 -0600 @@ -1404,6 +1404,7 @@ zfs_dir.o \ zfs_ioctl.o \ zfs_log.o \ + zfs_onexit.o \ zfs_replay.o \ zfs_rlock.o \ rrwlock.o \
--- a/usr/src/uts/common/fs/zfs/dmu_send.c Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/uts/common/fs/zfs/dmu_send.c Tue Jun 01 17:04:42 2010 -0600 @@ -42,6 +42,7 @@ #include <zfs_fletcher.h> #include <sys/avl.h> #include <sys/ddt.h> +#include <sys/zfs_onexit.h> static char *dmu_recv_tag = "dmu_recv_tag"; @@ -810,7 +811,7 @@ uint64_t voff; int bufsize; /* amount of memory allocated for buf */ zio_cksum_t cksum; - avl_tree_t guid_to_ds_map; + avl_tree_t *guid_to_ds_map; }; typedef struct guid_map_entry { @@ -887,6 +888,21 @@ return (0); } +static void +free_guid_map_onexit(void *arg) +{ + avl_tree_t *ca = arg; + void *cookie = NULL; + guid_map_entry_t *gmep; + + while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { + dsl_dataset_rele(gmep->gme_ds, ca); + kmem_free(gmep, sizeof (guid_map_entry_t)); + } + avl_destroy(ca); + kmem_free(ca, sizeof (avl_tree_t)); +} + static void * restore_read(struct restorearg *ra, int len) { @@ -1173,7 +1189,7 @@ */ if (drrwbr->drr_toguid != drrwbr->drr_refguid) { gmesrch.guid = drrwbr->drr_refguid; - if ((gmep = avl_find(&ra->guid_to_ds_map, &gmesrch, + if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, &where)) == NULL) { return (EINVAL); } @@ -1276,13 +1292,13 @@ * NB: callers *must* call dmu_recv_end() if this succeeds. */ int -dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp) +dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, + int cleanup_fd, uint64_t *action_handlep) { struct restorearg ra = { 0 }; dmu_replay_record_t *drr; objset_t *os; zio_cksum_t pcksum; - guid_map_entry_t *gmep; int featureflags; if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) @@ -1336,12 +1352,30 @@ /* if this stream is dedup'ed, set up the avl tree for guid mapping */ if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { - avl_create(&ra.guid_to_ds_map, guid_compare, - sizeof (guid_map_entry_t), - offsetof(guid_map_entry_t, avlnode)); - (void) dmu_objset_find(drc->drc_top_ds, find_ds_by_guid, - (void *)&ra.guid_to_ds_map, - DS_FIND_CHILDREN); + if (cleanup_fd == -1) { + ra.err = EBADF; + goto out; + } + if (*action_handlep == 0) { + ra.guid_to_ds_map = + kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); + avl_create(ra.guid_to_ds_map, guid_compare, + sizeof (guid_map_entry_t), + offsetof(guid_map_entry_t, avlnode)); + (void) dmu_objset_find(drc->drc_top_ds, find_ds_by_guid, + (void *)ra.guid_to_ds_map, + DS_FIND_CHILDREN); + ra.err = zfs_onexit_add_cb(cleanup_fd, + free_guid_map_onexit, ra.guid_to_ds_map, + action_handlep); + if (ra.err) + goto out; + } else { + ra.err = zfs_onexit_cb_data(cleanup_fd, *action_handlep, + (void **)&ra.guid_to_ds_map); + if (ra.err) + goto out; + } } /* @@ -1438,16 +1472,6 @@ } } - if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { - void *cookie = NULL; - - while (gmep = avl_destroy_nodes(&ra.guid_to_ds_map, &cookie)) { - dsl_dataset_rele(gmep->gme_ds, &ra.guid_to_ds_map); - kmem_free(gmep, sizeof (guid_map_entry_t)); - } - avl_destroy(&ra.guid_to_ds_map); - } - kmem_free(ra.buf, ra.bufsize); *voffp = ra.voff; return (ra.err);
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c Tue Jun 01 17:04:42 2010 -0600 @@ -37,6 +37,7 @@ #include <sys/zfs_ioctl.h> #include <sys/spa.h> #include <sys/zfs_znode.h> +#include <sys/zfs_onexit.h> #include <sys/zvol.h> #include <sys/dsl_scan.h> #include <sys/dsl_deadlist.h> @@ -3421,6 +3422,23 @@ char failed[MAXPATHLEN]; }; +typedef struct zfs_hold_cleanup_arg { + char dsname[MAXNAMELEN]; + char snapname[MAXNAMELEN]; + char htag[MAXNAMELEN]; + boolean_t recursive; +} zfs_hold_cleanup_arg_t; + +static void +dsl_dataset_user_release_onexit(void *arg) +{ + zfs_hold_cleanup_arg_t *ca = arg; + + (void) dsl_dataset_user_release(ca->dsname, ca->snapname, + ca->htag, ca->recursive); + kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); +} + /* * The max length of a temporary tag prefix is the number of hex digits * required to express UINT64_MAX plus one for the hyphen. @@ -3525,7 +3543,7 @@ int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, - boolean_t recursive, boolean_t temphold) + boolean_t recursive, boolean_t temphold, int cleanup_fd) { struct dsl_ds_holdarg *ha; dsl_sync_task_t *dst; @@ -3547,6 +3565,7 @@ ha->snapname = snapname; ha->recursive = recursive; ha->temphold = temphold; + if (recursive) { error = dmu_objset_find(dsname, dsl_dataset_user_hold_one, ha, DS_FIND_CHILDREN); @@ -3574,6 +3593,24 @@ (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); dsl_sync_task_group_destroy(ha->dstg); + + /* + * If this set of temporary holds is to be removed upon process exit, + * register that action now. + */ + if (error == 0 && cleanup_fd != -1 && temphold) { + zfs_hold_cleanup_arg_t *ca; + uint64_t action_handle; + + ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP); + (void) strlcpy(ca->dsname, dsname, sizeof (ca->dsname)); + (void) strlcpy(ca->snapname, snapname, sizeof (ca->snapname)); + (void) strlcpy(ca->htag, htag, sizeof (ca->htag)); + ca->recursive = recursive; + ASSERT3U(0, ==, zfs_onexit_add_cb(cleanup_fd, + dsl_dataset_user_release_onexit, ca, &action_handle)); + } + kmem_free(ha, sizeof (struct dsl_ds_holdarg)); spa_close(spa, FTAG); return (error);
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h Tue Jun 01 17:04:42 2010 -0600 @@ -721,7 +721,8 @@ int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *, boolean_t force, objset_t *origin, dmu_recv_cookie_t *); -int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp); +int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp, + int cleanup_fd, uint64_t *action_handlep); int dmu_recv_end(dmu_recv_cookie_t *drc); /* CRC64 table */
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Tue Jun 01 17:04:42 2010 -0600 @@ -197,7 +197,7 @@ int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, boolean_t force); int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, - boolean_t recursive, boolean_t temphold); + boolean_t recursive, boolean_t temphold, int cleanup_fd); int dsl_dataset_user_release(char *dsname, char *snapname, char *htag, boolean_t recursive); int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h Tue Jun 01 17:04:42 2010 -0600 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_ZFS_IOCTL_H @@ -265,6 +264,9 @@ zinject_record_t zc_inject_record; boolean_t zc_defer_destroy; boolean_t zc_temphold; + uint64_t zc_action_handle; + int zc_cleanup_fd; + uint8_t zc_pad[4]; } zfs_cmd_t; typedef struct zfs_useracct { @@ -274,8 +276,8 @@ uint64_t zu_space; } zfs_useracct_t; -#define ZVOL_MAX_MINOR (1 << 16) -#define ZFS_MIN_MINOR (ZVOL_MAX_MINOR + 1) +#define ZFSDEV_MAX_MINOR (1 << 16) +#define ZFS_MIN_MINOR (ZFSDEV_MAX_MINOR + 1) #define ZPOOL_EXPORT_AFTER_SPLIT 0x1 @@ -295,6 +297,28 @@ extern int zfs_busy(void); extern int zfs_unmount_snap(const char *, void *); +/* + * ZFS minor numbers can refer to either a control device instance or + * a zvol. Depending on the value of zss_type, zss_data points to either + * a zvol_state_t or a zfs_onexit_t. + */ +enum zfs_soft_state_type { + ZSST_ZVOL, + ZSST_CTLDEV +}; + +typedef struct zfs_soft_state { + enum zfs_soft_state_type zss_type; + void *zss_data; +} zfs_soft_state_t; + +extern void *zfsdev_get_soft_state(minor_t minor, + enum zfs_soft_state_type which); +extern minor_t zfsdev_minor_alloc(void); + +extern void *zfsdev_state; +extern kmutex_t zfsdev_state_lock; + #endif /* _KERNEL */ #ifdef __cplusplus
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/fs/zfs/sys/zfs_onexit.h Tue Jun 01 17:04:42 2010 -0600 @@ -0,0 +1,62 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_ZFS_ONEXIT_H +#define _SYS_ZFS_ONEXIT_H + +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL + +typedef struct zfs_onexit { + kmutex_t zo_lock; + list_t zo_actions; +} zfs_onexit_t; + +typedef struct zfs_onexit_action_node { + list_node_t za_link; + void (*za_func)(void *); + void *za_data; +} zfs_onexit_action_node_t; + +extern void zfs_onexit_init(zfs_onexit_t **zo); +extern void zfs_onexit_destroy(zfs_onexit_t *zo); + +#endif + +extern int zfs_onexit_add_cb(int fd, void (*func)(void *), void *data, + uint64_t *action_handle); +extern int zfs_onexit_del_cb(int fd, uint64_t action_handle, boolean_t fire); +extern int zfs_onexit_cb_data(int fd, uint64_t action_handle, void **data); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZFS_ONEXIT_H */
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c Tue Jun 01 17:04:42 2010 -0600 @@ -60,6 +60,7 @@ #include <sys/fs/zfs.h> #include <sys/zfs_ctldir.h> #include <sys/zfs_dir.h> +#include <sys/zfs_onexit.h> #include <sys/zvol.h> #include <sys/dsl_scan.h> #include <sharefs/share.h> @@ -3342,11 +3343,14 @@ * zc_cookie file descriptor to recv from * zc_begin_record the BEGIN record of the stream (not byteswapped) * zc_guid force flag + * zc_cleanup_fd cleanup-on-exit file descriptor + * zc_action_handle handle for this guid/ds mapping (or zero on first call) * * outputs: * zc_cookie number of bytes read * zc_nvlist_dst{_size} error for each unapplied received property * zc_obj zprop_errflags_t + * zc_action_handle handle for this guid/ds mapping */ static int zfs_ioc_recv(zfs_cmd_t *zc) @@ -3475,7 +3479,8 @@ } off = fp->f_offset; - error = dmu_recv_stream(&drc, fp->f_vnode, &off); + error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd, + &zc->zc_action_handle); if (error == 0) { zfsvfs_t *zfsvfs = NULL; @@ -4182,11 +4187,12 @@ /* * inputs: - * zc_name name of filesystem - * zc_value short name of snap - * zc_string user-supplied tag for this reference - * zc_cookie recursive flag - * zc_temphold set if hold is temporary + * zc_name name of filesystem + * zc_value short name of snap + * zc_string user-supplied tag for this hold + * zc_cookie recursive flag + * zc_temphold set if hold is temporary + * zc_cleanup_fd cleanup-on-exit file descriptor for calling process * * outputs: none */ @@ -4199,17 +4205,17 @@ return (EINVAL); return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value, - zc->zc_string, recursive, zc->zc_temphold)); + zc->zc_string, recursive, zc->zc_temphold, zc->zc_cleanup_fd)); } /* * inputs: - * zc_name name of dataset from which we're releasing a user reference + * zc_name name of dataset from which we're releasing a user hold * zc_value short name of snap - * zc_string user-supplied tag for this reference + * zc_string user-supplied tag for this hold * zc_cookie recursive flag * - * outputs: none + * outputs: none */ static int zfs_ioc_release(zfs_cmd_t *zc) @@ -4369,14 +4375,124 @@ return (error); } +/* + * Find a free minor number. + */ +minor_t +zfsdev_minor_alloc(void) +{ + static minor_t last_minor; + minor_t m; + + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + + for (m = last_minor + 1; m != last_minor; m++) { + if (m > ZFSDEV_MAX_MINOR) + m = 1; + if (ddi_get_soft_state(zfsdev_state, m) == NULL) { + last_minor = m; + return (m); + } + } + + return (0); +} + +static int +zfs_ctldev_init(dev_t *devp) +{ + minor_t minor; + zfs_soft_state_t *zs; + + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + ASSERT(getminor(*devp) == 0); + + minor = zfsdev_minor_alloc(); + if (minor == 0) + return (ENXIO); + + if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) + return (EAGAIN); + + *devp = makedevice(getemajor(*devp), minor); + + zs = ddi_get_soft_state(zfsdev_state, minor); + zs->zss_type = ZSST_CTLDEV; + zfs_onexit_init((zfs_onexit_t **)&zs->zss_data); + + return (0); +} + +static void +zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor) +{ + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + + zfs_onexit_destroy(zo); + ddi_soft_state_free(zfsdev_state, minor); +} + +void * +zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which) +{ + zfs_soft_state_t *zp; + + zp = ddi_get_soft_state(zfsdev_state, minor); + if (zp == NULL || zp->zss_type != which) + return (NULL); + + return (zp->zss_data); +} + +static int +zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr) +{ + int error = 0; + + if (getminor(*devp) != 0) + return (zvol_open(devp, flag, otyp, cr)); + + /* This is the control device. Allocate a new minor if requested. */ + if (flag & FEXCL) { + mutex_enter(&zfsdev_state_lock); + error = zfs_ctldev_init(devp); + mutex_exit(&zfsdev_state_lock); + } + + return (error); +} + +static int +zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr) +{ + zfs_onexit_t *zo; + minor_t minor = getminor(dev); + + if (minor == 0) + return (0); + + mutex_enter(&zfsdev_state_lock); + zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV); + if (zo == NULL) { + mutex_exit(&zfsdev_state_lock); + return (zvol_close(dev, flag, otyp, cr)); + } + zfs_ctldev_destroy(zo, minor); + mutex_exit(&zfsdev_state_lock); + + return (0); +} + static int zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) { zfs_cmd_t *zc; uint_t vec; int error, rc; - - if (getminor(dev) != 0) + minor_t minor = getminor(dev); + + if (minor != 0 && + zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL) return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp)); vec = cmd - ZFS_IOC; @@ -4499,8 +4615,8 @@ * so most of the standard driver entry points are in zvol.c. */ static struct cb_ops zfs_cb_ops = { - zvol_open, /* open */ - zvol_close, /* close */ + zfsdev_open, /* open */ + zfsdev_close, /* close */ zvol_strategy, /* strategy */ nodev, /* print */ zvol_dump, /* dump */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/fs/zfs/zfs_onexit.c Tue Jun 01 17:04:42 2010 -0600 @@ -0,0 +1,225 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/errno.h> +#include <sys/open.h> +#include <sys/kmem.h> +#include <sys/conf.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/zfs_ioctl.h> +#include <sys/mkdev.h> +#include <sys/zfs_onexit.h> +#include <sys/zvol.h> + +/* + * ZFS kernel routines may add/delete callback routines to be invoked + * upon process exit (triggered via the close operation from the /dev/zfs + * driver). + * + * These cleanup callbacks are intended to allow for the accumulation + * of kernel state across multiple ioctls. User processes participate + * by opening ZFS_DEV with O_EXCL. This causes the ZFS driver to do a + * clone-open, generating a unique minor number. The process then passes + * along that file descriptor to each ioctl that might have a cleanup operation. + * + * A simple example is zfs_ioc_recv(), where we might create an AVL tree + * with dataset/GUID mappings and then reuse that tree on subsequent + * zfs_ioc_recv() calls. + * + * On the first zfs_ioc_recv() call, dmu_recv_stream() will kmem_alloc() + * the AVL tree and pass it along with a callback function to + * zfs_onexit_add_cb(). The zfs_onexit_add_cb() routine will register the + * callback and return an action handle. + * + * The action handle is then passed from user space to subsequent + * zfs_ioc_recv() calls, so that dmu_recv_stream() can fetch its AVL tree + * by calling zfs_onexit_cb_data() with the cleanup fd and action handle. + * + * If the user process exits abnormally, the callback is invoked implicitly + * as part of the driver close operation. Once the user space process is + * finished with the accumulated kernel state, it can also just call close(2) + * on the cleanup fd to trigger the cleanup callback. + */ + +void +zfs_onexit_init(zfs_onexit_t **zop) +{ + zfs_onexit_t *zo; + + zo = *zop = kmem_zalloc(sizeof (zfs_onexit_t), KM_SLEEP); + mutex_init(&zo->zo_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&zo->zo_actions, sizeof (zfs_onexit_action_node_t), + offsetof(zfs_onexit_action_node_t, za_link)); +} + +void +zfs_onexit_destroy(zfs_onexit_t *zo) +{ + zfs_onexit_action_node_t *ap; + + mutex_enter(&zo->zo_lock); + while ((ap = list_head(&zo->zo_actions)) != NULL) { + list_remove(&zo->zo_actions, ap); + mutex_exit(&zo->zo_lock); + ap->za_func(ap->za_data); + kmem_free(ap, sizeof (zfs_onexit_action_node_t)); + mutex_enter(&zo->zo_lock); + } + mutex_exit(&zo->zo_lock); + + list_destroy(&zo->zo_actions); + mutex_destroy(&zo->zo_lock); + kmem_free(zo, sizeof (zfs_onexit_t)); +} + +static int +zfs_onexit_fd_to_state(int fd, zfs_onexit_t **zo) +{ + file_t *fp; + dev_t rdev; + + fp = getf(fd); + if (fp == NULL) + return (EBADF); + + rdev = fp->f_vnode->v_rdev; + *zo = zfsdev_get_soft_state(getminor(rdev), ZSST_CTLDEV); + if (*zo == NULL) { + releasef(fd); + return (EBADF); + } + + return (0); +} + +/* + * Add a callback to be invoked when the calling process exits. + */ +int +zfs_onexit_add_cb(int fd, void (*func)(void *), void *data, + uint64_t *action_handle) +{ + zfs_onexit_t *zo; + zfs_onexit_action_node_t *ap; + int error; + + error = zfs_onexit_fd_to_state(fd, &zo); + if (error) + return (error); + + ap = kmem_alloc(sizeof (zfs_onexit_action_node_t), KM_SLEEP); + list_link_init(&ap->za_link); + ap->za_func = func; + ap->za_data = data; + + mutex_enter(&zo->zo_lock); + list_insert_tail(&zo->zo_actions, ap); + mutex_exit(&zo->zo_lock); + *action_handle = (uint64_t)(uintptr_t)ap; + releasef(fd); + + return (0); +} + +static zfs_onexit_action_node_t * +zfs_onexit_find_cb(zfs_onexit_t *zo, uint64_t action_handle) +{ + zfs_onexit_action_node_t *match; + zfs_onexit_action_node_t *ap; + list_t *l; + + ASSERT(MUTEX_HELD(&zo->zo_lock)); + + match = (zfs_onexit_action_node_t *)(uintptr_t)action_handle; + l = &zo->zo_actions; + for (ap = list_head(l); ap != NULL; ap = list_next(l, ap)) { + if (match == ap) + break; + } + return (ap); +} + +/* + * Delete the callback, triggering it first if 'fire' is set. + */ +int +zfs_onexit_del_cb(int fd, uint64_t action_handle, boolean_t fire) +{ + zfs_onexit_t *zo; + zfs_onexit_action_node_t *ap; + int error; + + error = zfs_onexit_fd_to_state(fd, &zo); + if (error) + return (error); + + mutex_enter(&zo->zo_lock); + ap = zfs_onexit_find_cb(zo, action_handle); + if (ap != NULL) { + list_remove(&zo->zo_actions, ap); + mutex_exit(&zo->zo_lock); + if (fire) + ap->za_func(ap->za_data); + kmem_free(ap, sizeof (zfs_onexit_action_node_t)); + } else { + mutex_exit(&zo->zo_lock); + error = ENOENT; + } + releasef(fd); + + return (error); +} + +/* + * Return the data associated with this callback. This allows consumers + * of the cleanup-on-exit interfaces to stash kernel data across system + * calls, knowing that it will be cleaned up if the calling process exits. + */ +int +zfs_onexit_cb_data(int fd, uint64_t action_handle, void **data) +{ + zfs_onexit_t *zo; + zfs_onexit_action_node_t *ap; + int error; + + *data = NULL; + + error = zfs_onexit_fd_to_state(fd, &zo); + if (error) + return (error); + + mutex_enter(&zo->zo_lock); + ap = zfs_onexit_find_cb(zo, action_handle); + if (ap != NULL) + *data = ap->za_data; + else + error = ENOENT; + mutex_exit(&zo->zo_lock); + releasef(fd); + + return (error); +}
--- a/usr/src/uts/common/fs/zfs/zvol.c Tue Jun 01 17:33:59 2010 -0400 +++ b/usr/src/uts/common/fs/zfs/zvol.c Tue Jun 01 17:04:42 2010 -0600 @@ -80,18 +80,18 @@ #include "zfs_namecheck.h" -static void *zvol_state; +void *zfsdev_state; static char *zvol_tag = "zvol_tag"; #define ZVOL_DUMPSIZE "dumpsize" /* - * This lock protects the zvol_state structure from being modified + * This lock protects the zfsdev_state structure from being modified * while it's being used, e.g. an open that comes in before a create * finishes. It also protects temporary opens of the dataset so that, * e.g., an open doesn't get a spurious EBUSY. */ -static kmutex_t zvol_state_lock; +kmutex_t zfsdev_state_lock; static uint32_t zvol_minors; typedef struct zvol_extent { @@ -205,33 +205,16 @@ return (error); } -/* - * Find a free minor number. - */ -static minor_t -zvol_minor_alloc(void) -{ - minor_t minor; - - ASSERT(MUTEX_HELD(&zvol_state_lock)); - - for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) - if (ddi_get_soft_state(zvol_state, minor) == NULL) - return (minor); - - return (0); -} - static zvol_state_t * zvol_minor_lookup(const char *name) { minor_t minor; zvol_state_t *zv; - ASSERT(MUTEX_HELD(&zvol_state_lock)); + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); - for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) { - zv = ddi_get_soft_state(zvol_state, minor); + for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) { + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); if (zv == NULL) continue; if (strcmp(zv->zv_name, name) == 0) @@ -438,11 +421,11 @@ { zvol_state_t *zv; - mutex_enter(&zvol_state_lock); + mutex_enter(&zfsdev_state_lock); zv = zvol_minor_lookup(name); if (minor && zv) *minor = zv->zv_minor; - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (zv ? 0 : -1); } @@ -452,6 +435,7 @@ int zvol_create_minor(const char *name) { + zfs_soft_state_t *zs; zvol_state_t *zv; objset_t *os; dmu_object_info_t doi; @@ -459,10 +443,10 @@ char chrbuf[30], blkbuf[30]; int error; - mutex_enter(&zvol_state_lock); + mutex_enter(&zfsdev_state_lock); if (zvol_minor_lookup(name) != NULL) { - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (EEXIST); } @@ -470,19 +454,19 @@ error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os); if (error) { - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (error); } - if ((minor = zvol_minor_alloc()) == 0) { + if ((minor = zfsdev_minor_alloc()) == 0) { dmu_objset_disown(os, zvol_tag); - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (ENXIO); } - if (ddi_soft_state_zalloc(zvol_state, minor) != DDI_SUCCESS) { + if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) { dmu_objset_disown(os, zvol_tag); - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (EAGAIN); } (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, @@ -492,9 +476,9 @@ if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { - ddi_soft_state_free(zvol_state, minor); + ddi_soft_state_free(zfsdev_state, minor); dmu_objset_disown(os, zvol_tag); - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (EAGAIN); } @@ -503,14 +487,15 @@ if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { ddi_remove_minor_node(zfs_dip, chrbuf); - ddi_soft_state_free(zvol_state, minor); + ddi_soft_state_free(zfsdev_state, minor); dmu_objset_disown(os, zvol_tag); - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (EAGAIN); } - zv = ddi_get_soft_state(zvol_state, minor); - + zs = ddi_get_soft_state(zfsdev_state, minor); + zs->zss_type = ZSST_ZVOL; + zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP); (void) strlcpy(zv->zv_name, name, MAXPATHLEN); zv->zv_min_bs = DEV_BSHIFT; zv->zv_minor = minor; @@ -536,7 +521,7 @@ zvol_minors++; - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (0); } @@ -548,21 +533,24 @@ zvol_remove_zv(zvol_state_t *zv) { char nmbuf[20]; + minor_t minor = zv->zv_minor; - ASSERT(MUTEX_HELD(&zvol_state_lock)); + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); if (zv->zv_total_opens != 0) return (EBUSY); - (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", zv->zv_minor); + (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor); ddi_remove_minor_node(zfs_dip, nmbuf); - (void) snprintf(nmbuf, sizeof (nmbuf), "%u", zv->zv_minor); + (void) snprintf(nmbuf, sizeof (nmbuf), "%u", minor); ddi_remove_minor_node(zfs_dip, nmbuf); avl_destroy(&zv->zv_znode.z_range_avl); mutex_destroy(&zv->zv_znode.z_range_lock); - ddi_soft_state_free(zvol_state, zv->zv_minor); + kmem_free(zv, sizeof (zvol_state_t)); + + ddi_soft_state_free(zfsdev_state, minor); zvol_minors--; return (0); @@ -574,13 +562,13 @@ zvol_state_t *zv; int rc; - mutex_enter(&zvol_state_lock); + mutex_enter(&zfsdev_state_lock); if ((zv = zvol_minor_lookup(name)) == NULL) { - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (ENXIO); } rc = zvol_remove_zv(zv); - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (rc); } @@ -680,7 +668,7 @@ dmu_tx_t *tx; int error; - ASSERT(MUTEX_HELD(&zvol_state_lock)); + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); @@ -710,10 +698,10 @@ namebuf = kmem_zalloc(strlen(name) + 2, KM_SLEEP); (void) strncpy(namebuf, name, strlen(name)); (void) strcat(namebuf, "/"); - mutex_enter(&zvol_state_lock); - for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) { + mutex_enter(&zfsdev_state_lock); + for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) { - zv = ddi_get_soft_state(zvol_state, minor); + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); if (zv == NULL) continue; if (strncmp(namebuf, zv->zv_name, strlen(namebuf)) == 0) @@ -721,7 +709,7 @@ } kmem_free(namebuf, strlen(name) + 2); - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); } int @@ -734,10 +722,10 @@ uint64_t old_volsize = 0ULL; uint64_t readonly; - mutex_enter(&zvol_state_lock); + mutex_enter(&zfsdev_state_lock); zv = zvol_minor_lookup(name); if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (error); } @@ -800,7 +788,7 @@ out: dmu_objset_rele(os, FTAG); - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (error); } @@ -809,25 +797,21 @@ int zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr) { - minor_t minor = getminor(*devp); zvol_state_t *zv; int err = 0; - if (minor == 0) /* This is the control device */ - return (0); + mutex_enter(&zfsdev_state_lock); - mutex_enter(&zvol_state_lock); - - zv = ddi_get_soft_state(zvol_state, minor); + zv = zfsdev_get_soft_state(getminor(*devp), ZSST_ZVOL); if (zv == NULL) { - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (ENXIO); } if (zv->zv_total_opens == 0) err = zvol_first_open(zv); if (err) { - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (err); } if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { @@ -850,13 +834,13 @@ zv->zv_open_count[otyp]++; zv->zv_total_opens++; } - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (err); out: if (zv->zv_total_opens == 0) zvol_last_close(zv); - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (err); } @@ -868,14 +852,11 @@ zvol_state_t *zv; int error = 0; - if (minor == 0) /* This is the control device */ - return (0); + mutex_enter(&zfsdev_state_lock); - mutex_enter(&zvol_state_lock); - - zv = ddi_get_soft_state(zvol_state, minor); + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); if (zv == NULL) { - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (ENXIO); } @@ -900,7 +881,7 @@ if (zv->zv_total_opens == 0) zvol_last_close(zv); - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (error); } @@ -1138,7 +1119,8 @@ int zvol_strategy(buf_t *bp) { - zvol_state_t *zv = ddi_get_soft_state(zvol_state, getminor(bp->b_edev)); + zfs_soft_state_t *zs = NULL; + zvol_state_t *zv; uint64_t off, volsize; size_t resid; char *addr; @@ -1149,17 +1131,23 @@ boolean_t is_dump; boolean_t sync; - if (zv == NULL) { - bioerror(bp, ENXIO); + if (getminor(bp->b_edev) == 0) { + error = EINVAL; + } else { + zs = ddi_get_soft_state(zfsdev_state, getminor(bp->b_edev)); + if (zs == NULL) + error = ENXIO; + else if (zs->zss_type != ZSST_ZVOL) + error = EINVAL; + } + + if (error) { + bioerror(bp, error); biodone(bp); return (0); } - if (getminor(bp->b_edev) == 0) { - bioerror(bp, EINVAL); - biodone(bp); - return (0); - } + zv = zs->zss_data; if (!(bp->b_flags & B_READ) && (zv->zv_flags & ZVOL_RDONLY)) { bioerror(bp, EROFS); @@ -1264,10 +1252,7 @@ uint64_t boff; uint64_t resid; - if (minor == 0) /* This is the control device */ - return (ENXIO); - - zv = ddi_get_soft_state(zvol_state, minor); + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); if (zv == NULL) return (ENXIO); @@ -1299,10 +1284,7 @@ rl_t *rl; int error = 0; - if (minor == 0) /* This is the control device */ - return (ENXIO); - - zv = ddi_get_soft_state(zvol_state, minor); + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); if (zv == NULL) return (ENXIO); @@ -1349,10 +1331,7 @@ int error = 0; boolean_t sync; - if (minor == 0) /* This is the control device */ - return (ENXIO); - - zv = ddi_get_soft_state(zvol_state, minor); + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); if (zv == NULL) return (ENXIO); @@ -1471,9 +1450,8 @@ { zvol_state_t *zv; - if (minor == 0) - return (ENXIO); - if ((zv = ddi_get_soft_state(zvol_state, minor)) == NULL) + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); + if (zv == NULL) return (ENXIO); if (zv->zv_flags & ZVOL_DUMPIFIED) return (ENXIO); @@ -1544,12 +1522,12 @@ int error = 0; rl_t *rl; - mutex_enter(&zvol_state_lock); + mutex_enter(&zfsdev_state_lock); - zv = ddi_get_soft_state(zvol_state, getminor(dev)); + zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL); if (zv == NULL) { - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (ENXIO); } ASSERT(zv->zv_total_opens > 0); @@ -1563,7 +1541,7 @@ dki.dki_ctype = DKC_UNKNOWN; dki.dki_unit = getminor(dev); dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs); - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag)) error = EFAULT; return (error); @@ -1573,7 +1551,7 @@ dkm.dki_lbsize = 1U << zv->zv_min_bs; dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; dkm.dki_media_type = DK_UNKNOWN; - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag)) error = EFAULT; return (error); @@ -1583,14 +1561,14 @@ uint64_t vs = zv->zv_volsize; uint8_t bs = zv->zv_min_bs; - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); error = zvol_getefi((void *)arg, flag, vs, bs); return (error); } case DKIOCFLUSHWRITECACHE: dkc = (struct dk_callback *)arg; - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) { (*dkc->dkc_callback)(dkc->dkc_cookie, error); @@ -1616,10 +1594,10 @@ } if (wce) { zv->zv_flags |= ZVOL_WCE; - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); } else { zv->zv_flags &= ~ZVOL_WCE; - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); } return (0); @@ -1655,7 +1633,7 @@ break; } - mutex_exit(&zvol_state_lock); + mutex_exit(&zfsdev_state_lock); return (error); } @@ -1668,15 +1646,16 @@ void zvol_init(void) { - VERIFY(ddi_soft_state_init(&zvol_state, sizeof (zvol_state_t), 1) == 0); - mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL); + VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t), + 1) == 0); + mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL); } void zvol_fini(void) { - mutex_destroy(&zvol_state_lock); - ddi_soft_state_fini(&zvol_state); + mutex_destroy(&zfsdev_state_lock); + ddi_soft_state_fini(&zfsdev_state); } static int @@ -1688,7 +1667,7 @@ nvlist_t *nv = NULL; uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset)); - ASSERT(MUTEX_HELD(&zvol_state_lock)); + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0, DMU_OBJECT_END); /* wait for dmu_free_long_range to actually free the blocks */