changeset 22369:5653fa724c62

Merge branch 'upstream_gate' into upstream_merge/2019111401 commit 22f89f96cd 11922 ipmi_open looks for wrong return value commit 327c8d1665 11792 ibtl: cast between incompatible function types commit 1c085a54d0 3334 zonestat missing man page commit 4c05824a49 11924 infinite loop in mdb ::load commit 249622b3e0 11929 mac_minor_hold() gets id_alloc_nosleep() wrong commit ad3e6d4dd8 11880 changing encryption key on dataset with unencrypted children triggers VERIFY commit 42cd19316c 11859 need swapgs mitigation commit 985366be3b 11818 IPMI topo plugin shouldn't return data from unavailable sensors commit 9f62da4d4c 11906 Add line drawing characters to ptree(1) commit fec6629365 11923 loader: zfs.c cstyle cleanup commit 2c6de39661 11934 One more typo in zone_sun.tab commit ad234cdc80 11951 smatch sometimes flags problems with ipmp_snap_take() commit d8849d7dee 11943 Fix out-of-order ZIL txtype lost on hardlinked files 11942 Panic on zil/slog replay when TX_REMOVE followed by TX_CREATE commit 3b4422300b 11946 clean up improper use of mdb_getopts commit 8d91e49dd9 11825 PKCS#11 CKM_AES_CBC_PAD decryption can fail commit 10b633f40f 11952 large USB hard disks experience I/O failures commit 4941d7e28c 11892 ahciem doesn't properly check if enclosure services are present commit 0dd73f5050 11944 mdb ::refcount dcmd no longer works commit 28df1ae01e 11947 zfs diff on encrypted dataset leaks key reference commit 7dcf02b394 11930 loader: factor out label and uberblock load from vdev_probe, add MMP checks commit af1d63aba5 11918 metaslab improvements commit 2c465844f6 11793 mac: cast between incompatible function types commit 3fcf12aa85 11794 usba: cast between incompatible function types commit 7fd1b424e2 11799 rootnex: cast between incompatible function types commit 158d5b49eb 11803 evtchn: cast between incompatible function types commit 4224cf3543 11804 xpv_psm: cast between incompatible function types commit 5328fc53d1 11805 generic_cpu: cast between incompatible function types commit 4ad35fa311 11038 SMB2 server should require signed Validate Negotiate requests commit dafb549fce 11039 All zfs/nfs/smb threads in door calls to idle idmap commit 06721c885c 11852 SMB should explicitly fail deletion of mountpoints commit 0292c176d8 11773 Need ways to override Domain Admins' full control commit 3c1aa8841c 11853 Administrators should have Backup and Restore privileges by default commit 48f31329f5 11854 Domain Admins shouldn't always be Administrators commit 4b6bffb4c4 11928 rpcmod's clnt_cots can do zero-length kmem allocations commit 67806cd738 11933 loader: we can not read log device but we need to know about commit f0c1c263e9 9601 Divide by zero in i40e_get_available_resources() commit f67d64d998 11954 rpcmod: Possible memory leak in connmgr_get() 11955 clnt_cots: kmem_free(NULL, 0) is legal commit cb09bd3c63 11935 loader: fix memory corruption bug in vdev_read commit f21abddf56 11963 Allow ptree(1) to wrap output commit e89be50a40 11961 add DDI UFM support to the nvme driver Conflicts: exception_lists/wscheck usr/src/cmd/mdb/common/modules/genunix/memory.c usr/src/cmd/mdb/common/modules/qlc/qlc.c usr/src/cmd/ptools/ptree/ptree.c usr/src/man/man1/Makefile usr/src/pkg/manifests/system-zones.mf usr/src/uts/common/fs/zfs/metaslab.c usr/src/uts/common/fs/zfs/spa_log_spacemap.c usr/src/uts/common/fs/zfs/sys/metaslab_impl.h usr/src/uts/common/fs/zfs/vdev_initialize.c usr/src/uts/common/io/sata/adapters/ahci/ahci.c
author Andy Fiddaman <omnios@citrus-it.co.uk>
date Thu, 14 Nov 2019 23:30:04 +0000
parents 1d88dd61f7a7 (current diff) da4207e17ba1 (diff)
children 7528c68d2f08
files exception_lists/copyright exception_lists/wscheck usr/src/boot/lib/libstand/zfs/zfs.c usr/src/boot/lib/libstand/zfs/zfsimpl.c usr/src/cmd/mdb/common/modules/genunix/memory.c usr/src/cmd/mdb/common/modules/libumem/umem.c usr/src/cmd/mdb/common/modules/qlc/qlc.c usr/src/cmd/ptools/Makefile.bld usr/src/cmd/ptools/ptree/ptree.c usr/src/cmd/zdb/zdb.c usr/src/data/zoneinfo/zone_sun.tab usr/src/man/man1/Makefile usr/src/man/man1/ptree.1 usr/src/man/man1m/Makefile usr/src/pkg/manifests/system-test-zfstest.mf usr/src/pkg/manifests/system-zones.mf usr/src/test/crypto-tests/tests/modes/aes/Makefile usr/src/test/crypto-tests/tests/modes/aes/cbc_pad/Makefile usr/src/test/crypto-tests/tests/modes/aes/cbc_pad/aes_cbc_pad.c usr/src/test/zfs-tests/runfiles/delphix.run usr/src/test/zfs-tests/runfiles/omnios.run usr/src/test/zfs-tests/runfiles/openindiana.run usr/src/test/zfs-tests/runfiles/smartos.run usr/src/uts/common/fs/smbsrv/smb2_negotiate.c usr/src/uts/common/fs/zfs/arc.c usr/src/uts/common/fs/zfs/metaslab.c usr/src/uts/common/fs/zfs/spa.c usr/src/uts/common/fs/zfs/spa_log_spacemap.c usr/src/uts/common/fs/zfs/sys/metaslab.h usr/src/uts/common/fs/zfs/sys/metaslab_impl.h usr/src/uts/common/fs/zfs/vdev_initialize.c usr/src/uts/common/fs/zfs/vdev_trim.c usr/src/uts/common/fs/zfs/zfs_ioctl.c usr/src/uts/common/fs/zfs/zfs_vnops.c usr/src/uts/common/io/mac/mac.c usr/src/uts/common/io/mac/mac_datapath_setup.c usr/src/uts/common/io/mac/mac_soft_ring.c usr/src/uts/common/io/nvme/nvme.c usr/src/uts/common/io/sata/adapters/ahci/ahci.c usr/src/uts/i86pc/ml/kpti_trampolines.s usr/src/uts/i86pc/os/cpuid.c usr/src/uts/i86xpv/io/psm/xpv_psm.c usr/src/uts/intel/kdi/kdi_asm.s usr/src/uts/intel/sys/segments.h
diffstat 144 files changed, 4108 insertions(+), 1209 deletions(-) [+]
line wrap: on
line diff
--- a/exception_lists/copyright	Fri Nov 08 17:08:44 2019 +0100
+++ b/exception_lists/copyright	Thu Nov 14 23:30:04 2019 +0000
@@ -435,6 +435,7 @@
 usr/src/uts/common/gssapi/mechs/krb5/mech/util_seqnum.c
 usr/src/uts/common/gssapi/mechs/krb5/mech/val_cred.c
 usr/src/uts/common/io/cxgbe/*
+usr/src/uts/common/io/i40e/core/*
 usr/src/uts/common/io/iwn/THIRDPARTYLICENSE
 usr/src/uts/common/io/iwn/THIRDPARTYLICENSE.descrip
 usr/src/uts/common/io/iwn/fw-iw/THIRDPARTYLICENSE
--- a/exception_lists/wscheck	Fri Nov 08 17:08:44 2019 +0100
+++ b/exception_lists/wscheck	Thu Nov 14 23:30:04 2019 +0000
@@ -12,12 +12,13 @@
 #
 syntax: glob
 
-usr/src/uts/common/io/qede/*
-usr/src/tools/smatch/src/*
+usr/src/cmd/smbsrv/testoplock/case*.ref
 usr/src/data/hwdata/pci.ids
 usr/src/data/hwdata/usb.ids
 usr/src/data/perfmon/readme.txt
-usr/src/cmd/smbsrv/testoplock/case*.ref
+usr/src/tools/smatch/src/*
+usr/src/uts/common/io/qede/*
+usr/src/uts/common/io/i40e/core/*
 
 usr/src/data/ucode/amd/*
 usr/src/data/ucode/intel/*
--- a/usr/src/boot/Makefile.version	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/boot/Makefile.version	Thu Nov 14 23:30:04 2019 +0000
@@ -33,4 +33,4 @@
 # Use date like formatting here, YYYY.MM.DD.XX, without leading zeroes.
 # The version is processed from left to right, the version number can only
 # be increased.
-BOOT_VERSION = $(LOADER_VERSION)-2019.11.04.1
+BOOT_VERSION = $(LOADER_VERSION)-2019.11.06.1
--- a/usr/src/boot/lib/libstand/zfs/zfs.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/boot/lib/libstand/zfs/zfs.c	Thu Nov 14 23:30:04 2019 +0000
@@ -51,12 +51,12 @@
 #define		ZFS_BE_FIRST	4
 #define		ZFS_BE_LAST	8
 
-static int	zfs_open(const char *path, struct open_file *f);
-static int	zfs_close(struct open_file *f);
-static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
-static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
-static int	zfs_stat(struct open_file *f, struct stat *sb);
-static int	zfs_readdir(struct open_file *f, struct dirent *d);
+static int	zfs_open(const char *, struct open_file *);
+static int	zfs_close(struct open_file *);
+static int	zfs_read(struct open_file *, void *, size_t, size_t *);
+static off_t	zfs_seek(struct open_file *, off_t, int);
+static int	zfs_stat(struct open_file *, struct stat *);
+static int	zfs_readdir(struct open_file *, struct dirent *);
 
 struct devsw zfs_dev;
 
@@ -82,12 +82,8 @@
 	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
 };
 
-#ifdef __FreeBSD__
-static int	zfs_env_index;
-static int	zfs_env_count;
-#endif
-
-SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
+SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head =
+    SLIST_HEAD_INITIALIZER(zfs_be_head);
 struct zfs_be_list *zfs_be_headp;
 struct zfs_be_entry {
 	const char *name;
@@ -139,7 +135,7 @@
  * Cross block boundaries when necessary.
  */
 static int
-zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
+zfs_read(struct open_file *f, void *start, size_t size, size_t *resid)
 {
 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
 	struct file *fp = (struct file *)f->f_fsdata;
@@ -158,11 +154,6 @@
 	if (rc)
 		return (rc);
 
-	if (0) {
-	    int i;
-	    for (i = 0; i < n; i++)
-		putchar(((char*) start)[i]);
-	}
 	fp->f_seekp += n;
 	if (resid)
 		*resid = size - n;
@@ -174,6 +165,9 @@
 zfs_seek(struct open_file *f, off_t offset, int where)
 {
 	struct file *fp = (struct file *)f->f_fsdata;
+	struct stat sb;
+	int error;
+
 
 	switch (where) {
 	case SEEK_SET:
@@ -183,10 +177,6 @@
 		fp->f_seekp += offset;
 		break;
 	case SEEK_END:
-	    {
-		struct stat sb;
-		int error;
-
 		error = zfs_stat(f, &sb);
 		if (error != 0) {
 			errno = error;
@@ -194,7 +184,6 @@
 		}
 		fp->f_seekp = sb.st_size - offset;
 		break;
-	    }
 	default:
 		errno = EINVAL;
 		return (-1);
@@ -231,8 +220,8 @@
 	 * If this is the first read, get the zap type.
 	 */
 	if (fp->f_seekp == 0) {
-		rc = dnode_read(spa, &fp->f_dnode,
-				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
+		rc = dnode_read(spa, &fp->f_dnode, 0, &fp->f_zap_type,
+		    sizeof (fp->f_zap_type));
 		if (rc)
 			return (rc);
 
@@ -240,9 +229,8 @@
 			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
 		} else {
 			rc = dnode_read(spa, &fp->f_dnode,
-					offsetof(zap_phys_t, zap_num_leafs),
-					&fp->f_num_leafs,
-					sizeof(fp->f_num_leafs));
+			    offsetof(zap_phys_t, zap_num_leafs),
+			    &fp->f_num_leafs, sizeof (fp->f_num_leafs));
 			if (rc)
 				return (rc);
 
@@ -250,10 +238,8 @@
 			fp->f_zap_leaf = malloc(bsize);
 			if (fp->f_zap_leaf == NULL)
 				return (ENOMEM);
-			rc = dnode_read(spa, &fp->f_dnode,
-					fp->f_seekp,
-					fp->f_zap_leaf,
-					bsize);
+			rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp,
+			    fp->f_zap_leaf, bsize);
 			if (rc)
 				return (rc);
 		}
@@ -264,11 +250,11 @@
 		if (fp->f_seekp >= bsize)
 			return (ENOENT);
 
-		rc = dnode_read(spa, &fp->f_dnode,
-				fp->f_seekp, &mze, sizeof(mze));
+		rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, &mze,
+		    sizeof (mze));
 		if (rc)
 			return (rc);
-		fp->f_seekp += sizeof(mze);
+		fp->f_seekp += sizeof (mze);
 
 		if (!mze.mze_name[0])
 			goto mzap_next;
@@ -310,10 +296,8 @@
 			if (fp->f_seekp >= bsize * fp->f_num_leafs)
 				return (ENOENT);
 
-			rc = dnode_read(spa, &fp->f_dnode,
-					fp->f_seekp,
-					fp->f_zap_leaf,
-					bsize);
+			rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp,
+			    fp->f_zap_leaf, bsize);
 			if (rc)
 				return (rc);
 		}
@@ -324,8 +308,8 @@
 			goto fzap_next;
 
 		namelen = zc->l_entry.le_name_numints;
-		if (namelen > sizeof(d->d_name))
-			namelen = sizeof(d->d_name);
+		if (namelen > sizeof (d->d_name))
+			namelen = sizeof (d->d_name);
 
 		/*
 		 * Paste the name back together.
@@ -342,7 +326,7 @@
 			namelen -= len;
 			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
 		}
-		d->d_name[sizeof(d->d_name) - 1] = 0;
+		d->d_name[sizeof (d->d_name) - 1] = 0;
 
 		/*
 		 * Assume the first eight bytes of the value are
@@ -363,57 +347,107 @@
     size_t bytes)
 {
 	int fd, ret;
-	size_t res, size, remainder, rb_size, blksz;
-	unsigned secsz;
-	off_t off;
-	char *bouncebuf, *rb_buf;
+	size_t res, head, tail, total_size, full_sec_size;
+	unsigned secsz, do_tail_read;
+	off_t start_sec;
+	char *outbuf, *bouncebuf;
 
-	fd = (uintptr_t) priv;
+	fd = (uintptr_t)priv;
+	outbuf = (char *)buf;
 	bouncebuf = NULL;
 
 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
 	if (ret != 0)
 		return (ret);
 
-	off = offset / secsz;
-	remainder = offset % secsz;
-	if (lseek(fd, off * secsz, SEEK_SET) == -1)
-		return (errno);
+	/*
+	 * Handling reads of arbitrary offset and size - multi-sector case
+	 * and single-sector case.
+	 *
+	 *                        Multi-sector Case
+	 *                (do_tail_read = true if tail > 0)
+	 *
+	 *   |<----------------------total_size--------------------->|
+	 *   |                                                       |
+	 *   |<--head-->|<--------------bytes------------>|<--tail-->|
+	 *   |          |                                 |          |
+	 *   |          |       |<~full_sec_size~>|       |          |
+	 *   +------------------+                 +------------------+
+	 *   |          |0101010|     .  .  .     |0101011|          |
+	 *   +------------------+                 +------------------+
+	 *         start_sec                         start_sec + n
+	 *
+	 *
+	 *                      Single-sector Case
+	 *                    (do_tail_read = false)
+	 *
+	 *              |<------total_size = secsz----->|
+	 *              |                               |
+	 *              |<-head->|<---bytes--->|<-tail->|
+	 *              +-------------------------------+
+	 *              |        |0101010101010|        |
+	 *              +-------------------------------+
+	 *                          start_sec
+	 */
+	start_sec = offset / secsz;
+	head = offset % secsz;
+	total_size = roundup2(head + bytes, secsz);
+	tail = total_size - (head + bytes);
+	do_tail_read = ((tail > 0) && (head + bytes > secsz));
+	full_sec_size = total_size;
+	if (head > 0)
+		full_sec_size -= secsz;
+	if (do_tail_read)
+		full_sec_size -= secsz;
 
-	rb_buf = buf;
-	rb_size = bytes;
-	size = roundup2(bytes + remainder, secsz);
-	blksz = size;
-	if (remainder != 0 || size != bytes) {
-		bouncebuf = zfs_alloc(secsz);
+	/* Return of partial sector data requires a bounce buffer. */
+	if ((head > 0) || do_tail_read) {
+		bouncebuf = malloc(secsz);
 		if (bouncebuf == NULL) {
 			printf("vdev_read: out of memory\n");
 			return (ENOMEM);
 		}
-		rb_buf = bouncebuf;
-		blksz = rb_size - remainder;
 	}
 
-	while (bytes > 0) {
-		res = read(fd, rb_buf, rb_size);
-		if (res != rb_size) {
+	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
+		ret = errno;
+		goto error;
+	}
+
+	/* Partial data return from first sector */
+	if (head > 0) {
+		res = read(fd, bouncebuf, secsz);
+		if (res != secsz) {
 			ret = EIO;
 			goto error;
 		}
-		if (bytes < blksz)
-			blksz = bytes;
-		if (bouncebuf != NULL)
-			memcpy(buf, rb_buf + remainder, blksz);
-		buf = (void *)((uintptr_t)buf + blksz);
-		bytes -= blksz;
-		remainder = 0;
-		blksz = rb_size;
+		memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes));
+		outbuf += min(secsz - head, bytes);
+	}
+
+	/* Full data return from read sectors */
+	if (full_sec_size > 0) {
+		res = read(fd, outbuf, full_sec_size);
+		if (res != full_sec_size) {
+			ret = EIO;
+			goto error;
+		}
+		outbuf += full_sec_size;
+	}
+
+	/* Partial data return from last sector */
+	if (do_tail_read) {
+		res = read(fd, bouncebuf, secsz);
+		if (res != secsz) {
+			ret = EIO;
+			goto error;
+		}
+		memcpy(outbuf, bouncebuf, secsz - tail);
 	}
 
 	ret = 0;
 error:
-	if (bouncebuf != NULL)
-		zfs_free(bouncebuf, secsz);
+	free(bouncebuf);
 	return (ret);
 }
 
@@ -449,7 +483,7 @@
 	int		fd;
 	const char	*devname;
 	uint64_t	*pool_guid;
-	u_int		secsz;
+	unsigned	secsz;
 };
 
 static int
@@ -762,7 +796,7 @@
 	 * the environment and we can stop caring about old kernels,
 	 * we can remove this part.
 	 */
-	snprintf(buf, sizeof(buf), "zfs-bootfs=%s/%" PRIu64, spa->spa_name,
+	snprintf(buf, sizeof (buf), "zfs-bootfs=%s/%" PRIu64, spa->spa_name,
 	    objnum);
 	n = strlen(buf);
 	if (spa->spa_boot_vdev->v_phys_path != NULL) {
--- a/usr/src/boot/lib/libstand/zfs/zfsimpl.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/boot/lib/libstand/zfs/zfsimpl.c	Thu Nov 14 23:30:04 2019 +0000
@@ -1106,6 +1106,7 @@
 	const unsigned char *kids;
 	int nkids, i, is_new;
 	uint64_t is_offline, is_faulted, is_degraded, is_removed, isnt_present;
+	uint64_t is_log;
 
 	if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
 	    NULL, &guid) ||
@@ -1129,6 +1130,7 @@
 	}
 
 	is_offline = is_removed = is_faulted = is_degraded = isnt_present = 0;
+	is_log = 0;
 
 	nvlist_find(nvlist, ZPOOL_CONFIG_OFFLINE, DATA_TYPE_UINT64, NULL,
 	    &is_offline);
@@ -1140,6 +1142,8 @@
 	    &is_degraded);
 	nvlist_find(nvlist, ZPOOL_CONFIG_NOT_PRESENT, DATA_TYPE_UINT64, NULL,
 	    &isnt_present);
+	nvlist_find(nvlist, ZPOOL_CONFIG_IS_LOG, DATA_TYPE_UINT64, NULL,
+	    &is_log);
 
 	vdev = vdev_find(guid);
 	if (!vdev) {
@@ -1226,6 +1230,7 @@
 				return (ENOMEM);
 			vdev->v_name = name;
 		}
+		vdev->v_islog = is_log == 1;
 	} else {
 		is_new = 0;
 	}
@@ -1429,6 +1434,12 @@
 {
 	vdev_t *kid;
 	int ret;
+
+	if (vdev->v_islog) {
+		(void)pager_output("        logs\n");
+		indent++;
+	}
+
 	ret = print_state(indent, vdev->v_name, vdev->v_state);
 	if (ret != 0)
 		return (ret);
@@ -1534,32 +1545,171 @@
 }
 
 static int
+vdev_uberblock_compare(const uberblock_t *ub1, const uberblock_t *ub2)
+{
+	unsigned int seq1 = 0;
+	unsigned int seq2 = 0;
+	int cmp = AVL_CMP(ub1->ub_txg, ub2->ub_txg);
+
+	if (cmp != 0)
+		return (cmp);
+
+	cmp = AVL_CMP(ub1->ub_timestamp, ub2->ub_timestamp);
+	if (cmp != 0)
+		return (cmp);
+
+	if (MMP_VALID(ub1) && MMP_SEQ_VALID(ub1))
+		seq1 = MMP_SEQ(ub1);
+
+	if (MMP_VALID(ub2) && MMP_SEQ_VALID(ub2))
+		seq2 = MMP_SEQ(ub2);
+
+	return (AVL_CMP(seq1, seq2));
+}
+
+static int
+uberblock_verify(uberblock_t *ub)
+{
+	if (ub->ub_magic == BSWAP_64((uint64_t)UBERBLOCK_MAGIC)) {
+		byteswap_uint64_array(ub, sizeof (uberblock_t));
+	}
+
+	if (ub->ub_magic != UBERBLOCK_MAGIC ||
+	    !SPA_VERSION_IS_SUPPORTED(ub->ub_version))
+		return (EINVAL);
+
+	return (0);
+}
+
+static int
+vdev_label_read(vdev_t *vd, int l, void *buf, uint64_t offset,
+    size_t size)
+{
+	blkptr_t bp;
+	off_t off;
+
+	off = vdev_label_offset(vd->v_psize, l, offset);
+
+	BP_ZERO(&bp);
+	BP_SET_LSIZE(&bp, size);
+	BP_SET_PSIZE(&bp, size);
+	BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
+	BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
+	DVA_SET_OFFSET(BP_IDENTITY(&bp), off);
+	ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
+
+	return (vdev_read_phys(vd, &bp, buf, off, size));
+}
+
+static unsigned char *
+vdev_label_read_config(vdev_t *vd, uint64_t txg)
+{
+	vdev_phys_t *label;
+	uint64_t best_txg = 0;
+	uint64_t label_txg = 0;
+	uint64_t asize;
+	unsigned char *nvl;
+	size_t nvl_size;
+	int error;
+
+	label = malloc(sizeof (vdev_phys_t));
+	if (label == NULL)
+		return (NULL);
+
+	nvl_size = VDEV_PHYS_SIZE - sizeof (zio_eck_t) - 4;
+	nvl = malloc(nvl_size);
+	if (nvl == NULL)
+		goto done;
+
+	for (int l = 0; l < VDEV_LABELS; l++) {
+		const unsigned char *nvlist;
+
+		if (vdev_label_read(vd, l, label,
+		    offsetof(vdev_label_t, vl_vdev_phys),
+		    sizeof (vdev_phys_t)))
+			continue;
+
+		if (label->vp_nvlist[0] != NV_ENCODE_XDR)
+			continue;
+
+		nvlist = (const unsigned char *) label->vp_nvlist + 4;
+		error = nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG,
+		    DATA_TYPE_UINT64, NULL, &label_txg);
+		if (error != 0 || label_txg == 0) {
+			memcpy(nvl, nvlist, nvl_size);
+			goto done;
+		}
+
+		if (label_txg <= txg && label_txg > best_txg) {
+			best_txg = label_txg;
+			memcpy(nvl, nvlist, nvl_size);
+
+			/*
+			 * Use asize from pool config. We need this
+			 * because we can get bad value from BIOS.
+			 */
+			if (nvlist_find(nvlist, ZPOOL_CONFIG_ASIZE,
+			    DATA_TYPE_UINT64, NULL, &asize) == 0) {
+				vd->v_psize = asize +
+				    VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
+			}
+		}
+	}
+
+	if (best_txg == 0) {
+		free(nvl);
+		nvl = NULL;
+	}
+done:
+	free(label);
+	return (nvl);
+}
+
+static void
+vdev_uberblock_load(vdev_t *vd, uberblock_t *ub)
+{
+	uberblock_t *buf;
+
+	buf = malloc(VDEV_UBERBLOCK_SIZE(vd));
+	if (buf == NULL)
+		return;
+
+	for (int l = 0; l < VDEV_LABELS; l++) {
+		for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
+			if (vdev_label_read(vd, l, buf,
+			    VDEV_UBERBLOCK_OFFSET(vd, n),
+			    VDEV_UBERBLOCK_SIZE(vd)))
+				continue;
+			if (uberblock_verify(buf) != 0)
+				continue;
+
+			if (vdev_uberblock_compare(buf, ub) > 0)
+				*ub = *buf;
+		}
+	}
+	free(buf);
+}
+
+static int
 vdev_probe(vdev_phys_read_t *phys_read, void *read_priv, spa_t **spap)
 {
 	vdev_t vtmp;
-	vdev_phys_t *vdev_label = (vdev_phys_t *) zap_scratch;
-	vdev_phys_t *tmp_label;
 	spa_t *spa;
 	vdev_t *vdev, *top_vdev, *pool_vdev;
-	off_t off;
-	blkptr_t bp;
-	const unsigned char *nvlist = NULL;
+	unsigned char *nvlist;
 	uint64_t val;
 	uint64_t guid;
-	uint64_t best_txg = 0;
 	uint64_t pool_txg, pool_guid;
 	const char *pool_name;
 	const unsigned char *vdevs;
 	const unsigned char *features;
-	int i, l, rc, is_newer;
-	char *upbuf;
-	const struct uberblock *up;
+	int rc, is_newer;
 
 	/*
 	 * Load the vdev label and figure out which
 	 * uberblock is most current.
 	 */
-	memset(&vtmp, 0, sizeof(vtmp));
+	memset(&vtmp, 0, sizeof (vtmp));
 	vtmp.v_phys_read = phys_read;
 	vtmp.v_read_priv = read_priv;
 	vtmp.v_psize = P2ALIGN(ldi_get_size(read_priv),
@@ -1569,67 +1719,20 @@
 	if (vtmp.v_psize < SPA_MINDEVSIZE)
 		return (EIO);
 
-	tmp_label = zfs_alloc(sizeof (vdev_phys_t));
-
-	for (l = 0; l < VDEV_LABELS; l++) {
-		off = vdev_label_offset(vtmp.v_psize, l,
-		    offsetof(vdev_label_t, vl_vdev_phys));
-
-		BP_ZERO(&bp);
-		BP_SET_LSIZE(&bp, sizeof(vdev_phys_t));
-		BP_SET_PSIZE(&bp, sizeof(vdev_phys_t));
-		BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
-		BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
-		DVA_SET_OFFSET(BP_IDENTITY(&bp), off);
-		ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
-
-		if (vdev_read_phys(&vtmp, &bp, tmp_label, off, 0))
-			continue;
-
-		if (tmp_label->vp_nvlist[0] != NV_ENCODE_XDR)
-			continue;
-
-		nvlist = (const unsigned char *) tmp_label->vp_nvlist + 4;
-		if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG,
-		    DATA_TYPE_UINT64, NULL, &pool_txg) != 0)
-			continue;
-
-		if (best_txg <= pool_txg) {
-			uint64_t asize;
-
-			best_txg = pool_txg;
-			memcpy(vdev_label, tmp_label, sizeof (vdev_phys_t));
-
-			/*
-			 * Use asize from pool config. We need this
-			 * because we can get bad value from BIOS.
-			 */
-			if (nvlist_find(nvlist, ZPOOL_CONFIG_ASIZE,
-			    DATA_TYPE_UINT64, NULL, &asize) == 0) {
-				vtmp.v_psize = asize +
-				    VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
-			}
-		}
-	}
-
-	zfs_free(tmp_label, sizeof (vdev_phys_t));
-
-	if (best_txg == 0)
+	nvlist = vdev_label_read_config(&vtmp, UINT64_MAX);
+	if (nvlist == NULL)
 		return (EIO);
 
-	if (vdev_label->vp_nvlist[0] != NV_ENCODE_XDR)
-		return (EIO);
-
-	nvlist = (const unsigned char *) vdev_label->vp_nvlist + 4;
-
 	if (nvlist_find(nvlist, ZPOOL_CONFIG_VERSION, DATA_TYPE_UINT64,
 	    NULL, &val) != 0) {
+		free(nvlist);
 		return (EIO);
 	}
 
 	if (!SPA_VERSION_IS_SUPPORTED(val)) {
 		printf("ZFS: unsupported ZFS version %u (should be %u)\n",
 		    (unsigned) val, (unsigned) SPA_VERSION);
+		free(nvlist);
 		return (EIO);
 	}
 
@@ -1637,16 +1740,19 @@
 	if (nvlist_find(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ,
 	    DATA_TYPE_NVLIST, NULL, &features) == 0 &&
 	    nvlist_check_features_for_read(features) != 0) {
+		free(nvlist);
 		return (EIO);
 	}
 
 	if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_STATE, DATA_TYPE_UINT64,
 	    NULL, &val) != 0) {
+		free(nvlist);
 		return (EIO);
 	}
 
 	if (val == POOL_STATE_DESTROYED) {
 		/* We don't boot only from destroyed pools. */
+		free(nvlist);
 		return (EIO);
 	}
 
@@ -1660,12 +1766,7 @@
 		 * Cache and spare devices end up here - just ignore
 		 * them.
 		 */
-		/*printf("ZFS: can't find pool details\n");*/
-		return (EIO);
-	}
-
-	if (nvlist_find(nvlist, ZPOOL_CONFIG_IS_LOG, DATA_TYPE_UINT64,
-	    NULL, &val) == 0 && val != 0) {
+		free(nvlist);
 		return (EIO);
 	}
 
@@ -1675,8 +1776,10 @@
 	spa = spa_find_by_guid(pool_guid);
 	if (spa == NULL) {
 		spa = spa_create(pool_guid, pool_name);
-		if (spa == NULL)
+		if (spa == NULL) {
+			free(nvlist);
 			return (ENOMEM);
+		}
 	}
 	if (pool_txg > spa->spa_txg) {
 		spa->spa_txg = pool_txg;
@@ -1693,18 +1796,24 @@
 	 */
 	if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
 	    NULL, &guid) != 0) {
+		free(nvlist);
 		return (EIO);
 	}
 	vdev = vdev_find(guid);
-	if (vdev && vdev->v_phys_read)	/* Has this vdev already been inited? */
+	/* Has this vdev already been inited? */
+	if (vdev && vdev->v_phys_read) {
+		free(nvlist);
 		return (EIO);
+	}
 
 	if (nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST,
 	    NULL, &vdevs)) {
+		free(nvlist);
 		return (EIO);
 	}
 
 	rc = vdev_init_from_nvlist(vdevs, NULL, &top_vdev, is_newer);
+	free(nvlist);
 	if (rc != 0)
 		return (rc);
 
@@ -1714,6 +1823,7 @@
 	STAILQ_FOREACH(pool_vdev, &spa->spa_vdevs, v_childlink)
 		if (top_vdev == pool_vdev)
 			break;
+
 	if (!pool_vdev && top_vdev) {
 		top_vdev->spa = spa;
 		STAILQ_INSERT_TAIL(&spa->spa_vdevs, top_vdev, v_childlink);
@@ -1734,6 +1844,9 @@
 		return (EIO);
 	}
 
+	if (vdev->v_islog)
+		spa->spa_with_log = vdev->v_islog;
+
 	/* Record boot vdev for spa. */
 	if (is_newer == 1)
 		spa->spa_boot_vdev = vdev;
@@ -1748,36 +1861,7 @@
 	 * the best uberblock and then we can actually access
 	 * the contents of the pool.
 	 */
-	upbuf = zfs_alloc(VDEV_UBERBLOCK_SIZE(vdev));
-	up = (const struct uberblock *)upbuf;
-	for (l = 0; l < VDEV_LABELS; l++) {
-		for (i = 0; i < VDEV_UBERBLOCK_COUNT(vdev); i++) {
-			off = vdev_label_offset(vdev->v_psize, l,
-			    VDEV_UBERBLOCK_OFFSET(vdev, i));
-			BP_ZERO(&bp);
-			DVA_SET_OFFSET(&bp.blk_dva[0], off);
-			BP_SET_LSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev));
-			BP_SET_PSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev));
-			BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
-			BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
-			ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
-
-			if (vdev_read_phys(vdev, &bp, upbuf, off, 0) != 0)
-				continue;
-
-			if (up->ub_magic != UBERBLOCK_MAGIC)
-				continue;
-			if (up->ub_txg < spa->spa_txg)
-				continue;
-			if (up->ub_txg > spa->spa_uberblock.ub_txg ||
-			    (up->ub_txg == spa->spa_uberblock.ub_txg &&
-			    up->ub_timestamp >
-			    spa->spa_uberblock.ub_timestamp)) {
-				spa->spa_uberblock = *up;
-			}
-		}
-	}
-	zfs_free(upbuf, VDEV_UBERBLOCK_SIZE(vdev));
+	vdev_uberblock_load(vdev, &spa->spa_uberblock);
 
 	vdev->spa = spa;
 	if (spap != NULL)
--- a/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h	Thu Nov 14 23:30:04 2019 +0000
@@ -66,6 +66,14 @@
 
 #define _NOTE(s)
 
+/*
+ * AVL comparator helpers
+ */
+#define	AVL_ISIGN(a)	(((a) > 0) - ((a) < 0))
+#define	AVL_CMP(a, b)	(((a) > (b)) - ((a) < (b)))
+#define	AVL_PCMP(a, b)	\
+	(((uintptr_t)(a) > (uintptr_t)(b)) - ((uintptr_t)(a) < (uintptr_t)(b)))
+
 /* CRC64 table */
 #define	ZFS_CRC64_POLY	0xC96C5795D7870F42ULL	/* ECMA-182, reflected form */
 
@@ -492,8 +500,16 @@
 #define	VDEV_PHYS_SIZE		(112 << 10)
 #define	VDEV_UBERBLOCK_RING	(128 << 10)
 
+/*
+ * MMP blocks occupy the last MMP_BLOCKS_PER_LABEL slots in the uberblock
+ * ring when MMP is enabled.
+ */
+#define	MMP_BLOCKS_PER_LABEL	1
+
+/* The largest uberblock we support is 8k. */
+#define	MAX_UBERBLOCK_SHIFT	(13)
 #define	VDEV_UBERBLOCK_SHIFT(vd)	\
-	MAX((vd)->v_top->v_ashift, UBERBLOCK_SHIFT)
+	MIN(MAX((vd)->v_top->v_ashift, UBERBLOCK_SHIFT), MAX_UBERBLOCK_SHIFT)
 #define	VDEV_UBERBLOCK_COUNT(vd)	\
 	(VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd))
 #define	VDEV_UBERBLOCK_OFFSET(vd, n)	\
@@ -843,15 +859,88 @@
  */
 #define	UBERBLOCK_MAGIC		0x00bab10c		/* oo-ba-bloc!	*/
 #define	UBERBLOCK_SHIFT		10			/* up to 1K	*/
+#define	MMP_MAGIC		0xa11cea11		/* all-see-all  */
 
-struct uberblock {
+#define	MMP_INTERVAL_VALID_BIT	0x01
+#define	MMP_SEQ_VALID_BIT	0x02
+#define	MMP_FAIL_INT_VALID_BIT	0x04
+
+#define	MMP_VALID(ubp)		(ubp->ub_magic == UBERBLOCK_MAGIC && \
+				    ubp->ub_mmp_magic == MMP_MAGIC)
+#define	MMP_INTERVAL_VALID(ubp)	(MMP_VALID(ubp) && (ubp->ub_mmp_config & \
+				    MMP_INTERVAL_VALID_BIT))
+#define	MMP_SEQ_VALID(ubp)	(MMP_VALID(ubp) && (ubp->ub_mmp_config & \
+				    MMP_SEQ_VALID_BIT))
+#define	MMP_FAIL_INT_VALID(ubp)	(MMP_VALID(ubp) && (ubp->ub_mmp_config & \
+				    MMP_FAIL_INT_VALID_BIT))
+
+#define	MMP_INTERVAL(ubp)	((ubp->ub_mmp_config & 0x00000000FFFFFF00) \
+				    >> 8)
+#define	MMP_SEQ(ubp)		((ubp->ub_mmp_config & 0x0000FFFF00000000) \
+				    >> 32)
+#define	MMP_FAIL_INT(ubp)	((ubp->ub_mmp_config & 0xFFFF000000000000) \
+				    >> 48)
+
+typedef struct uberblock {
 	uint64_t	ub_magic;	/* UBERBLOCK_MAGIC		*/
 	uint64_t	ub_version;	/* SPA_VERSION			*/
 	uint64_t	ub_txg;		/* txg of last sync		*/
 	uint64_t	ub_guid_sum;	/* sum of all vdev guids	*/
 	uint64_t	ub_timestamp;	/* UTC time of last sync	*/
 	blkptr_t	ub_rootbp;	/* MOS objset_phys_t		*/
-};
+	/* highest SPA_VERSION supported by software that wrote this txg */
+	uint64_t	ub_software_version;
+	/* Maybe missing in uberblocks we read, but always written */
+	uint64_t	ub_mmp_magic;
+	/*
+	 * If ub_mmp_delay == 0 and ub_mmp_magic is valid, MMP is off.
+	 * Otherwise, nanosec since last MMP write.
+	 */
+	uint64_t	ub_mmp_delay;
+
+	/*
+	 * The ub_mmp_config contains the multihost write interval, multihost
+	 * fail intervals, sequence number for sub-second granularity, and
+	 * valid bit mask.  This layout is as follows:
+	 *
+	 *   64      56      48      40      32      24      16      8       0
+	 *   +-------+-------+-------+-------+-------+-------+-------+-------+
+	 * 0 | Fail Intervals|      Seq      |   Write Interval (ms) | VALID |
+	 *   +-------+-------+-------+-------+-------+-------+-------+-------+
+	 *
+	 * This allows a write_interval of (2^24/1000)s, over 4.5 hours
+	 *
+	 * VALID Bits:
+	 * - 0x01 - Write Interval (ms)
+	 * - 0x02 - Sequence number exists
+	 * - 0x04 - Fail Intervals
+	 * - 0xf8 - Reserved
+	 */
+	uint64_t	ub_mmp_config;
+
+	/*
+	 * ub_checkpoint_txg indicates two things about the current uberblock:
+	 *
+	 * 1] If it is not zero then this uberblock is a checkpoint. If it is
+	 *    zero, then this uberblock is not a checkpoint.
+	 *
+	 * 2] On checkpointed uberblocks, the value of ub_checkpoint_txg is
+	 *    the ub_txg that the uberblock had at the time we moved it to
+	 *    the MOS config.
+	 *
+	 * The field is set when we checkpoint the uberblock and continues to
+	 * hold that value even after we've rewound (unlike the ub_txg that
+	 * is reset to a higher value).
+	 *
+	 * Besides checks used to determine whether we are reopening the
+	 * pool from a checkpointed uberblock [see spa_ld_select_uberblock()],
+	 * the value of the field is used to determine which ZIL blocks have
+	 * been allocated according to the ms_sm when we are rewinding to a
+	 * checkpoint. Specifically, if blk_birth > ub_checkpoint_txg, then
+	 * the ZIL block is not allocated [see uses of spa_min_claim_txg()].
+	 */
+	uint64_t	ub_checkpoint_txg;
+} uberblock_t;
 
 /*
  * Flags.
@@ -1681,6 +1770,7 @@
 	vdev_phys_read_t *v_phys_read;	/* read from raw leaf vdev */
 	vdev_read_t	*v_read;	/* read from vdev */
 	void		*v_read_priv;	/* private data for read function */
+	boolean_t	v_islog;
 	struct spa	*spa;		/* link to spa */
 	/*
 	 * Values stored in the config for an indirect or removing vdev.
@@ -1706,6 +1796,7 @@
 	void		*spa_cksum_tmpls[ZIO_CHECKSUM_FUNCTIONS];
 	int		spa_inited;	/* initialized */
 	vdev_t		*spa_boot_vdev;	/* boot device for kernel */
+	boolean_t	spa_with_log;	/* this pool has log */
 } spa_t;
 
 /* IO related arguments. */
--- a/usr/src/cmd/fm/modules/common/eversholt/eft_mdb.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/fm/modules/common/eversholt/eft_mdb.c	Thu Nov 14 23:30:04 2019 +0000
@@ -22,6 +22,8 @@
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright 2019 Joyent, Inc.
  */
 
 #include <sys/mdb_modapi.h>
@@ -316,8 +318,8 @@
 	if (argc) {
 		if (mdb_getopts(argc, argv,
 		    'l', MDB_OPT_UINT64, &ull,
-		    'p', MDB_OPT_SETBITS, TRUE, &opt_p,
-		    MDB_OPT_UINT64) != argc) {
+		    'p', MDB_OPT_SETBITS, TRUE, &opt_p, MDB_OPT_UINT64,
+		    NULL) != argc) {
 			return (DCMD_USAGE);
 		}
 	}
--- a/usr/src/cmd/idmap/idmapd/idmap_lsa.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/idmap/idmapd/idmap_lsa.c	Thu Nov 14 23:30:04 2019 +0000
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
  */
 
 /*
@@ -83,9 +83,9 @@
 
 	(void) snprintf(sid, sizeof (sid), "%s-%u", sidprefix, rid);
 
-	rc = smb_lookup_sid(sid, &acct);
+	rc = smb_lookup_lsid(sid, &acct);
 	if (rc != 0) {
-		idmapdlog(LOG_ERR, "Error:  smb_lookup_sid failed.");
+		idmapdlog(LOG_ERR, "Error: SMB lookup SID failed.");
 		idmapdlog(LOG_ERR,
 		    "Check SMB service (svc:/network/smb/server).");
 		idmapdlog(LOG_ERR,
@@ -100,7 +100,7 @@
 	}
 	if (acct.a_status != NT_STATUS_SUCCESS) {
 		idmapdlog(LOG_WARNING,
-		    "Warning:  smb_lookup_sid(%s) failed (0x%x)",
+		    "Warning:  SMB lookup SID(%s) failed (0x%x)",
 		    sid, acct.a_status);
 		/* Fail soft */
 		ret = IDMAP_ERR_NOTFOUND;
@@ -167,9 +167,9 @@
 		goto out;
 	}
 
-	rc = smb_lookup_name(namedom, SidTypeUnknown, &acct);
+	rc = smb_lookup_lname(namedom, SidTypeUnknown, &acct);
 	if (rc != 0) {
-		idmapdlog(LOG_ERR, "Error:  smb_lookup_name failed.");
+		idmapdlog(LOG_ERR, "Error: SMB lookup name failed.");
 		idmapdlog(LOG_ERR,
 		    "Check SMB service (svc:/network/smb/server).");
 		idmapdlog(LOG_ERR,
@@ -183,7 +183,7 @@
 	}
 	if (acct.a_status != NT_STATUS_SUCCESS) {
 		idmapdlog(LOG_WARNING,
-		    "Warning:  smb_lookup_name(%s) failed (0x%x)",
+		    "Warning: SMB lookup name(%s) failed (0x%x)",
 		    namedom, acct.a_status);
 		/* Fail soft */
 		ret = IDMAP_ERR_NOTFOUND;
--- a/usr/src/cmd/mdb/common/mdb/mdb_modapi.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/mdb/mdb_modapi.h	Thu Nov 14 23:30:04 2019 +0000
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright (c) 2012 Joyent, Inc. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #ifndef	_MDB_MODAPI_H
@@ -245,7 +245,7 @@
 #define	MDB_OPT_UINT64	5			/* uint64_t argument */
 #define	MDB_OPT_UINTPTR_SET	6		/* boolean_t+uintptr_t args */
 
-extern int mdb_getopts(int, const mdb_arg_t *, ...);
+extern int mdb_getopts(int, const mdb_arg_t *, ...) __sentinel(0);
 
 extern u_longlong_t mdb_strtoull(const char *);
 
--- a/usr/src/cmd/mdb/common/mdb/mdb_module_load.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/mdb/mdb_module_load.c	Thu Nov 14 23:30:04 2019 +0000
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright (c) 2012 Joyent, Inc. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #include <sys/param.h>
@@ -60,21 +60,22 @@
 		/*
 		 * Remove any .so(.[0-9]+)? suffix
 		 */
-		while ((p = strrchr(buf, '.')) != NULL) {
+		if ((p = strrchr(buf, '.')) != NULL) {
 			for (q = p + 1; isdigit(*q); q++)
 				;
 
 			if (*q == '\0') {
-				/* found digits to remove */
-				*p = '\0';
-				continue;
-			}
+				if (q > p + 1) {
 
-			if (strcmp(p, ".so") == 0) {
-				*p = '\0';
-				break;
+					/* found digits to remove */
+					*p = '\0';
+				}
 			}
-
+			if ((p = strrchr(buf, '.')) != NULL) {
+				if (strcmp(p, ".so") == 0) {
+					*p = '\0';
+				}
+			}
 		}
 		fullname = name;
 		name = buf;
--- a/usr/src/cmd/mdb/common/modules/cpc/cpc.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/modules/cpc/cpc.c	Thu Nov 14 23:30:04 2019 +0000
@@ -22,6 +22,8 @@
 /*
  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright 2019 Joyent, Inc.
  */
 
 #include <sys/mdb_modapi.h>
@@ -57,7 +59,8 @@
 	int		j;
 	uint_t		opt_v = FALSE;
 
-	if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, TRUE, &opt_v) != argc)
+	if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, TRUE, &opt_v, NULL) !=
+	    argc)
 		return (DCMD_USAGE);
 
 	if ((flags & DCMD_ADDRSPEC) == 0) {
--- a/usr/src/cmd/mdb/common/modules/fctl/fctl.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/modules/fctl/fctl.c	Thu Nov 14 23:30:04 2019 +0000
@@ -24,7 +24,7 @@
  */
 
 /*
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #include <sys/mdb_modapi.h>
@@ -134,7 +134,7 @@
 	}
 
 	if (mdb_getopts(argc, argv,
-	    'l', MDB_OPT_SETBITS, TRUE, &longlist) != argc) {
+	    'l', MDB_OPT_SETBITS, TRUE, &longlist, NULL) != argc) {
 		return (DCMD_USAGE);
 	}
 
@@ -1104,7 +1104,8 @@
 
 	if (mdb_getopts(argc, argv,
 	    's', MDB_OPT_UINTPTR, &pktstart,
-	    'e', MDB_OPT_UINTPTR, &pktend) != argc) {
+	    'e', MDB_OPT_UINTPTR, &pktend,
+	    NULL) != argc) {
 		return (DCMD_USAGE);
 	}
 
--- a/usr/src/cmd/mdb/common/modules/genunix/fm.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/modules/genunix/fm.c	Thu Nov 14 23:30:04 2019 +0000
@@ -21,6 +21,8 @@
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright 2019 Joyent, Inc.
  */
 
 #include <sys/types.h>
@@ -158,7 +160,8 @@
 	if (!(flags & DCMD_ADDRSPEC))
 		return (DCMD_USAGE);
 
-	if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, TRUE, &opt_v) != argc)
+	if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, TRUE, &opt_v, NULL) !=
+	    argc)
 		return (DCMD_USAGE);
 
 	if (mdb_vread(&nvl, sizeof (nvl), addr) == -1) {
--- a/usr/src/cmd/mdb/common/modules/genunix/ldi.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/modules/genunix/ldi.c	Thu Nov 14 23:30:04 2019 +0000
@@ -25,7 +25,7 @@
  */
 
 /*
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #include <sys/types.h>
@@ -290,7 +290,8 @@
 }
 
 static void
-ldi_handle_header(int refs, int ident) {
+ldi_handle_header(int refs, int ident)
+{
 	mdb_printf("%-?s ", "HANDLE");
 
 	if (refs)
@@ -369,7 +370,7 @@
 	int			refs = 1;
 
 	if (mdb_getopts(argc, argv,
-	    'i', MDB_OPT_SETBITS, TRUE, &ident) != argc)
+	    'i', MDB_OPT_SETBITS, TRUE, &ident, NULL) != argc)
 		return (DCMD_USAGE);
 
 	if (ident)
--- a/usr/src/cmd/mdb/common/modules/genunix/memory.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/modules/genunix/memory.c	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #include <mdb/mdb_param.h>
@@ -695,7 +695,7 @@
 	if (mdb_getopts(argc, argv,
 	    'v', MDB_OPT_UINTPTR, &vp,
 	    'o', MDB_OPT_UINT64, &offset,
-	    0) != argc) {
+	    NULL) != argc) {
 		return (DCMD_USAGE);
 	}
 
--- a/usr/src/cmd/mdb/common/modules/libumem/umem.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/modules/libumem/umem.c	Thu Nov 14 23:30:04 2019 +0000
@@ -24,7 +24,7 @@
  */
 
 /*
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
  */
 
@@ -4089,7 +4089,7 @@
 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
-	    0) != argc)
+	    NULL) != argc)
 		return (DCMD_USAGE);
 
 	bzero(&mi, sizeof (mi));
@@ -4165,7 +4165,7 @@
 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
-	    0) != argc)
+	    NULL) != argc)
 		return (DCMD_USAGE);
 
 	if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
--- a/usr/src/cmd/mdb/common/modules/pmcs/pmcs.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/modules/pmcs/pmcs.c	Thu Nov 14 23:30:04 2019 +0000
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2012 Milan Jurik. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #include <limits.h>
@@ -2646,7 +2647,8 @@
 	if (mdb_getopts(argc, argv,
 	    'i', MDB_OPT_UINT64, &index,
 	    's', MDB_OPT_UINT64, &snum,
-	    't', MDB_OPT_UINT64, &tag_type) != argc)
+	    't', MDB_OPT_UINT64, &tag_type,
+	    NULL) != argc)
 		return (DCMD_USAGE);
 
 	/*
--- a/usr/src/cmd/mdb/common/modules/qlc/qlc.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/modules/qlc/qlc.c	Thu Nov 14 23:30:04 2019 +0000
@@ -34,6 +34,10 @@
  *
  */
 
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
 #pragma ident	"Copyright 2015 QLogic Corporation; ql_mdb.c"
 
 #include <sys/mdb_modapi.h>
@@ -1115,8 +1119,8 @@
 		return (DCMD_USAGE);
 	}
 
-	if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, TRUE, &verbose) !=
-	    argc) {
+	if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, TRUE, &verbose,
+	    NULL) != argc) {
 		return (DCMD_USAGE);
 	}
 
@@ -1750,8 +1754,8 @@
 		return (DCMD_USAGE);
 	}
 
-	if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, TRUE, &verbose) !=
-	    argc) {
+	if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, TRUE, &verbose,
+	    NULL) != argc) {
 		return (DCMD_USAGE);
 	}
 
@@ -3852,7 +3856,7 @@
 		return (DCMD_USAGE);
 	}
 
-	if (mdb_getopts(argc, argv) != argc) {
+	if (mdb_getopts(argc, argv, NULL) != argc) {
 		return (DCMD_USAGE);
 	}
 
--- a/usr/src/cmd/mdb/common/modules/scsi_vhci/scsi_vhci.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/modules/scsi_vhci/scsi_vhci.c	Thu Nov 14 23:30:04 2019 +0000
@@ -21,10 +21,10 @@
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright 2019 Joyent, Inc.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/kmem.h>
 #include <sys/proc.h>
 #include <sys/time.h>
@@ -193,10 +193,10 @@
 	dump_condvar(value.ct_failover_cv, "ct_failover_cv");
 
 	mdb_printf("\n");
-	mdb_printf("ct_failover_flags TEMP_VAR: %8d\n", value.ct_failover_flags)
-;
-	mdb_printf("ct_failover_status UNUSED: %9d\n", value.ct_failover_status)
-;
+	mdb_printf("ct_failover_flags TEMP_VAR: %8d\n",
+	    value.ct_failover_flags);
+	mdb_printf("ct_failover_status UNUSED: %9d\n",
+	    value.ct_failover_status);
 
 	return (DCMD_OK);
 }
@@ -250,7 +250,7 @@
 
 
 	if (flags & DCMD_ADDRSPEC)
-	    mdb_warn("This command doesn't use an address\n");
+		mdb_warn("This command doesn't use an address\n");
 
 	if (i_vhci_states(0, 0, 0, 0, &ss) != DCMD_OK)
 		return (DCMD_ERR);
@@ -517,7 +517,7 @@
 	}
 	if (sp == NULL) {
 		if (mdb_getopts(argc, argv,
-		    'v', MDB_OPT_SETBITS, TRUE, &verbose) != argc) {
+		    'v', MDB_OPT_SETBITS, TRUE, &verbose, NULL) != argc) {
 			return (DCMD_USAGE);
 		}
 	}
--- a/usr/src/cmd/mdb/common/modules/sctp/sctp.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/modules/sctp/sctp.c	Thu Nov 14 23:30:04 2019 +0000
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #include <sys/types.h>
@@ -726,7 +727,8 @@
 	    'c', MDB_OPT_SETBITS, MDB_SCTP_SHOW_CLOSE, &opts,
 	    'e', MDB_OPT_SETBITS, MDB_SCTP_SHOW_EXT, &opts,
 	    'P', MDB_OPT_SETBITS, 1, &paddr,
-	    'd', MDB_OPT_SETBITS, MDB_SCTP_DUMP_ADDRS, &opts) != argc) {
+	    'd', MDB_OPT_SETBITS, MDB_SCTP_DUMP_ADDRS, &opts,
+	    NULL) != argc) {
 		return (DCMD_USAGE);
 	}
 
--- a/usr/src/cmd/mdb/common/modules/smbsrv/smbsrv.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/modules/smbsrv/smbsrv.c	Thu Nov 14 23:30:04 2019 +0000
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
  */
 
 #include <mdb/mdb_modapi.h>
@@ -1409,6 +1409,12 @@
 	{ "CHANGE_NOTIFY",
 	    SMB_USER_PRIV_CHANGE_NOTIFY,
 	    SMB_USER_PRIV_CHANGE_NOTIFY },
+	{ "READ_FILE",
+	    SMB_USER_PRIV_READ_FILE,
+	    SMB_USER_PRIV_READ_FILE },
+	{ "WRITE_FILE",
+	    SMB_USER_PRIV_WRITE_FILE,
+	    SMB_USER_PRIV_WRITE_FILE },
 	{ NULL, 0, 0 }
 };
 
--- a/usr/src/cmd/mdb/common/modules/sockfs/sockfs.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/modules/sockfs/sockfs.c	Thu Nov 14 23:30:04 2019 +0000
@@ -21,6 +21,8 @@
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright 2019 Joyent, Inc.
  */
 
 #include <sys/types.h>
@@ -78,7 +80,7 @@
 		 */
 		if (mdb_getopts(argc, argv,
 		    'e', MDB_OPT_SETBITS, 1, &opt_e,
-		    'E', MDB_OPT_SETBITS, 1, &opt_E) != argc)
+		    'E', MDB_OPT_SETBITS, 1, &opt_E, NULL) != argc)
 			return (DCMD_USAGE);
 
 		if (!opt_E) {
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Thu Nov 14 23:30:04 2019 +0000
@@ -22,7 +22,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
- * Copyright (c) 2019 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -948,7 +948,8 @@
 	    'n', MDB_OPT_STR, &data.osname,
 	    'o', MDB_OPT_STR, &object,
 	    'l', MDB_OPT_UINT64, &data.level,
-	    'b', MDB_OPT_STR, &blkid) != argc) {
+	    'b', MDB_OPT_STR, &blkid,
+	    NULL) != argc) {
 		return (DCMD_USAGE);
 	}
 
@@ -3097,25 +3098,25 @@
 	return (WALK_NEXT);
 }
 
-typedef struct mdb_refcount {
+typedef struct mdb_zfs_refcount {
 	uint64_t rc_count;
-} mdb_refcount_t;
-
-typedef struct mdb_refcount_removed {
+} mdb_zfs_refcount_t;
+
+typedef struct mdb_zfs_refcount_removed {
 	uint64_t rc_removed_count;
-} mdb_refcount_removed_t;
-
-typedef struct mdb_refcount_tracked {
+} mdb_zfs_refcount_removed_t;
+
+typedef struct mdb_zfs_refcount_tracked {
 	boolean_t rc_tracked;
-} mdb_refcount_tracked_t;
+} mdb_zfs_refcount_tracked_t;
 
 /* ARGSUSED */
 static int
-refcount(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+zfs_refcount(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 {
-	mdb_refcount_t rc;
-	mdb_refcount_removed_t rcr;
-	mdb_refcount_tracked_t rct;
+	mdb_zfs_refcount_t rc;
+	mdb_zfs_refcount_removed_t rcr;
+	mdb_zfs_refcount_tracked_t rct;
 	int off;
 	boolean_t released = B_FALSE;
 
@@ -3127,30 +3128,30 @@
 	    NULL) != argc)
 		return (DCMD_USAGE);
 
-	if (mdb_ctf_vread(&rc, "refcount_t", "mdb_refcount_t", addr,
+	if (mdb_ctf_vread(&rc, "zfs_refcount_t", "mdb_zfs_refcount_t", addr,
 	    0) == -1)
 		return (DCMD_ERR);
 
-	if (mdb_ctf_vread(&rcr, "refcount_t", "mdb_refcount_removed_t", addr,
-	    MDB_CTF_VREAD_QUIET) == -1) {
-		mdb_printf("refcount_t at %p has %llu holds (untracked)\n",
+	if (mdb_ctf_vread(&rcr, "zfs_refcount_t", "mdb_zfs_refcount_removed_t",
+	    addr, MDB_CTF_VREAD_QUIET) == -1) {
+		mdb_printf("zfs_refcount_t at %p has %llu holds (untracked)\n",
 		    addr, (longlong_t)rc.rc_count);
 		return (DCMD_OK);
 	}
 
-	if (mdb_ctf_vread(&rct, "refcount_t", "mdb_refcount_tracked_t", addr,
-	    MDB_CTF_VREAD_QUIET) == -1) {
+	if (mdb_ctf_vread(&rct, "zfs_refcount_t", "mdb_zfs_refcount_tracked_t",
+	    addr, MDB_CTF_VREAD_QUIET) == -1) {
 		/* If this is an old target, it might be tracked. */
 		rct.rc_tracked = B_TRUE;
 	}
 
-	mdb_printf("refcount_t at %p has %llu current holds, "
+	mdb_printf("zfs_refcount_t at %p has %llu current holds, "
 	    "%llu recently released holds\n",
 	    addr, (longlong_t)rc.rc_count, (longlong_t)rcr.rc_removed_count);
 
 	if (rct.rc_tracked && rc.rc_count > 0)
 		mdb_printf("current holds:\n");
-	off = mdb_ctf_offsetof_by_name("refcount_t", "rc_list");
+	off = mdb_ctf_offsetof_by_name("zfs_refcount_t", "rc_list");
 	if (off == -1)
 		return (DCMD_ERR);
 	mdb_pwalk("list", reference_cb, (void*)B_FALSE, addr + off);
@@ -3158,7 +3159,7 @@
 	if (released && rcr.rc_removed_count > 0) {
 		mdb_printf("released holds:\n");
 
-		off = mdb_ctf_offsetof_by_name("refcount_t", "rc_removed");
+		off = mdb_ctf_offsetof_by_name("zfs_refcount_t", "rc_removed");
 		if (off == -1)
 			return (DCMD_ERR);
 		mdb_pwalk("list", reference_cb, (void*)B_TRUE, addr + off);
@@ -3573,7 +3574,7 @@
 		return (DCMD_USAGE);
 
 	if (mdb_getopts(argc, argv,
-	    'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
+	    'v', MDB_OPT_SETBITS, TRUE, &verbose, NULL) != argc)
 		return (DCMD_USAGE);
 
 	if (mdb_vread(&zacl, sizeof (zfs_acl_t), addr) == -1) {
@@ -3796,12 +3797,12 @@
 	}
 
 	mdb_printf("anonymous references:\n");
-	(void) mdb_call_dcmd("refcount", addr +
+	(void) mdb_call_dcmd("zfs_refcount", addr +
 	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_anon_rcount"),
 	    DCMD_ADDRSPEC, 0, NULL);
 
 	mdb_printf("linked references:\n");
-	(void) mdb_call_dcmd("refcount", addr +
+	(void) mdb_call_dcmd("zfs_refcount", addr +
 	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_linked_rcount"),
 	    DCMD_ADDRSPEC, 0, NULL);
 
@@ -4006,7 +4007,8 @@
 	    'a', MDB_OPT_SETBITS, ARC_CFLAG_ANON, &data.arc_cflags,
 	    'b', MDB_OPT_SETBITS, ARC_CFLAG_BUFS, &data.arc_cflags,
 	    'r', MDB_OPT_SETBITS, ARC_CFLAG_MRU, &data.arc_cflags,
-	    'f', MDB_OPT_SETBITS, ARC_CFLAG_MFU, &data.arc_cflags) != argc)
+	    'f', MDB_OPT_SETBITS, ARC_CFLAG_MFU, &data.arc_cflags,
+	    NULL) != argc)
 		return (DCMD_USAGE);
 
 	if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_anon", &data.anon_sym) ||
@@ -4343,9 +4345,9 @@
 	    "given a spa_t, print block type stats from last scrub",
 	    zfs_blkstats },
 	{ "zfs_params", "", "print zfs tunable parameters", zfs_params },
-	{ "refcount", ":[-r]\n"
+	{ "zfs_refcount", ":[-r]\n"
 	    "\t-r display recently removed references",
-	    "print refcount_t holders", refcount },
+	    "print zfs_refcount_t holders", zfs_refcount },
 	{ "zap_leaf", "", "print zap_leaf_phys_t", zap_leaf },
 	{ "zfs_aces", ":[-v]", "print all ACEs from a zfs_acl_t",
 	    zfs_acl_dump },
--- a/usr/src/cmd/mdb/i86pc/modules/unix/i86mmu.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/i86pc/modules/unix/i86mmu.c	Thu Nov 14 23:30:04 2019 +0000
@@ -22,7 +22,7 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  */
 
 /*
@@ -491,7 +491,7 @@
 		return (DCMD_USAGE);
 
 	if (mdb_getopts(argc, argv,
-	    'l', MDB_OPT_UINT64, &level) != argc)
+	    'l', MDB_OPT_UINT64, &level, NULL) != argc)
 		return (DCMD_USAGE);
 
 	if (level > mmu.max_level) {
@@ -647,7 +647,7 @@
 		return (DCMD_ERR);
 
 	if (mdb_getopts(argc, argv,
-	    'a', MDB_OPT_STR, &addrspace_str) != argc)
+	    'a', MDB_OPT_STR, &addrspace_str, NULL) != argc)
 		return (DCMD_USAGE);
 
 	if ((flags & DCMD_ADDRSPEC) == 0)
--- a/usr/src/cmd/mdb/intel/modules/sata/sata.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/intel/modules/sata/sata.c	Thu Nov 14 23:30:04 2019 +0000
@@ -24,6 +24,7 @@
  */
 /*
  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #include <sys/mdb_modapi.h>
@@ -216,7 +217,7 @@
 	}
 
 	if (mdb_getopts(argc, argv,
-	    'a', MDB_OPT_SETBITS, TRUE, &print_pathname) != argc) {
+	    'a', MDB_OPT_SETBITS, TRUE, &print_pathname, NULL) != argc) {
 		return (DCMD_USAGE);
 	}
 
--- a/usr/src/cmd/mdb/intel/modules/xhci/xhci.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/mdb/intel/modules/xhci/xhci.c	Thu Nov 14 23:30:04 2019 +0000
@@ -10,7 +10,7 @@
  */
 
 /*
- * Copyright 2016 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #include <sys/mdb_modapi.h>
@@ -665,7 +665,7 @@
 
 	ep_set = slot_set = B_FALSE;
 	if (mdb_getopts(argc, argv, 'e', MDB_OPT_UINTPTR_SET, &ep_set, &ep,
-	    's', MDB_OPT_UINTPTR_SET, &slot_set, &slot) != argc)
+	    's', MDB_OPT_UINTPTR_SET, &slot_set, &slot, NULL) != argc)
 		return (DCMD_USAGE);
 
 	if (!slot_set) {
--- a/usr/src/cmd/ptools/Makefile.bld	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/ptools/Makefile.bld	Thu Nov 14 23:30:04 2019 +0000
@@ -24,7 +24,7 @@
 # Use is subject to license terms.
 # Copyright 2015 Joyent, Inc.
 #
-# Copyright (c) 2018, Joyent, Inc.
+# Copyright 2019 Joyent, Inc.
 
 PROG:sh = basename `cd ..; pwd`
 
@@ -128,6 +128,9 @@
 OBJS +=		$(OBJS_$(PROG))
 SRCS +=		$(SRCS_$(PROG))
 
+$(OBJS_ptree) :=	CSTD = $(CSTD_GNU99)
+ptree :=		CSTD = $(CSTD_GNU99)
+
 INSTALL_NEW=
 INSTALL_LEGACY=$(RM) $(ROOTPROCBINSYMLINK) ; \
 	$(LN) -s ../../bin/$(PROG) $(ROOTPROCBINSYMLINK)
--- a/usr/src/cmd/ptools/ptree/ptree.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/ptools/ptree/ptree.c	Thu Nov 14 23:30:04 2019 +0000
@@ -32,7 +32,9 @@
 #include <stdio.h>
 #include <string.h>
 #include <errno.h>
+#include <err.h>
 #include <fcntl.h>
+#include <sys/debug.h>
 #include <sys/types.h>
 #include <sys/termios.h>
 #include <unistd.h>
@@ -43,14 +45,17 @@
 #include <libzonecfg.h>
 #include <limits.h>
 #include <libcontract.h>
+#include <locale.h>
 #include <sys/contract.h>
 #include <sys/ctfs.h>
 #include <libcontract_priv.h>
 #include <sys/stat.h>
 #include <stdbool.h>
-#include "ptools_common.h"
 
+#define	COLUMN_DEFAULT	80
+#define	CHUNK_SIZE	256 /* Arbitrary amount */
 #define	FAKEDPID0(p)	(p->pid == 0 && p->psargs[0] == '\0')
+#define	HAS_SIBLING(p)	((p)->sp != NULL && (p)->sp->done != 0)
 
 typedef struct ps {
 	int	done;
@@ -70,6 +75,8 @@
 	struct ps *cp;		/* child */
 } ps_t;
 
+enum { DASH = 0, BAR, CORNER, VRIGHT };
+
 static	ps_t	**ps;		/* array of ps_t's */
 static	unsigned psize;		/* size of array */
 static	int	nps;		/* number of ps_t's */
@@ -79,17 +86,36 @@
 static	ps_t	*proc0;		/* process 0 */
 static	ps_t	*proc1;		/* process 1 */
 
-static	char	*command;
-
 static	int	aflag = 0;
 static	int	cflag = 0;
+static	int	gflag = 0;
 static	int	sflag = 0;
+static	int	wflag = 0;
 static	int	zflag = 0;
 static	zoneid_t zoneid;
 static	char *match_svc;
 static	char *match_inst;
-static	int	columns = 80;
+static	int	columns;
+
+static const char *box_ascii[] = {
+	[DASH] =	"-",
+	[BAR] =		"|",
+	[CORNER] =	"`",
+	[VRIGHT] =	"+"
+};
 
+static const char *box_utf8[] = {
+	[DASH] =	"\xe2\x94\x80", /* \u2500 */
+	[BAR] =		"\xe2\x94\x82", /* \u2502 */
+	[CORNER] =	"\xe2\x94\x94", /* \u2514 */
+	[VRIGHT] =	"\xe2\x94\x9c", /* \u251c */
+};
+
+static const char **box;
+
+static size_t get_termwidth(void);
+static const char **get_boxchars(void);
+static int add_proc(psinfo_t *, lwpsinfo_t *, void *);
 static bool match_proc(ps_t *);
 static void markprocs(ps_t *);
 static int printone(ps_t *, int);
@@ -101,32 +127,47 @@
 static zoneid_t getzone(const char *);
 static ps_t *fakepid0(void);
 
+static void *zalloc(size_t);
+static void *xreallocarray(void *, size_t, size_t);
+static char *xstrdup(const char *);
+
+static void __NORETURN
+usage(void)
+{
+	(void) fprintf(stderr,
+	    "usage:\t%s [-ac] [-s svc] [-z zone] [ {pid|user} ... ]\n",
+	    getprogname());
+	(void) fprintf(stderr,
+	    "  (show process trees)\n");
+	(void) fprintf(stderr,
+	    "  list can include process-ids and user names\n");
+	(void) fprintf(stderr,
+	    "  -a : include children of process 0\n");
+	(void) fprintf(stderr,
+	    "  -c : show contracts\n");
+	(void) fprintf(stderr,
+	    "  -g : use line drawing characters in output\n");
+	(void) fprintf(stderr,
+	    "  -s : print only processes with given service FMRI\n");
+	(void) fprintf(stderr,
+	    "  -w : allow lines to wrap instead of truncating\n");
+	(void) fprintf(stderr,
+	    "  -z : print only processes in given zone\n");
+	exit(2);
+}
+
 int
 main(int argc, char **argv)
 {
-	psinfo_t info;	/* process information structure from /proc */
 	int opt;
 	int errflg = 0;
-	struct winsize winsize;
-	char *s;
 	int n;
 	int retc = 0;
-	char ppath[PATH_MAX];
-
-	DIR *dirp;
-	struct dirent *dentp;
-	char	pname[PATH_MAX];
-	int	pdlen;
 
 	ps_t *p;
 
-	if ((command = strrchr(argv[0], '/')) == NULL)
-		command = argv[0];
-	else
-		command++;
-
 	/* options */
-	while ((opt = getopt(argc, argv, "acs:z:")) != EOF) {
+	while ((opt = getopt(argc, argv, "acgs:wz:")) != EOF) {
 		switch (opt) {
 		case 'a':		/* include children of process 0 */
 			aflag = 1;
@@ -134,10 +175,17 @@
 		case 'c':		/* display contract ownership */
 			aflag = cflag = 1;
 			break;
+		case 'g':
+			gflag = 1;
+			box = get_boxchars();
+			break;
 		case 's':
 			sflag = 1;
 			match_svc = parse_svc(optarg, &match_inst);
 			break;
+		case 'w':
+			wflag = 1;
+			break;
 		case 'z':		/* only processes in given zone */
 			zflag = 1;
 			zoneid = getzone(optarg);
@@ -151,132 +199,21 @@
 	argc -= optind;
 	argv += optind;
 
-	if (errflg) {
-		(void) fprintf(stderr,
-		    "usage:\t%s [-ac] [-s svc] [-z zone] [ {pid|user} ... ]\n",
-		    command);
-		(void) fprintf(stderr,
-		    "  (show process trees)\n");
-		(void) fprintf(stderr,
-		    "  list can include process-ids and user names\n");
-		(void) fprintf(stderr,
-		    "  -a : include children of process 0\n");
-		(void) fprintf(stderr,
-		    "  -c : show contracts\n");
-		(void) fprintf(stderr,
-		    "  -s : print only processes with given service FMRI\n");
-		(void) fprintf(stderr,
-		    "  -z : print only processes in given zone\n");
-		return (2);
+	if (errflg)
+		usage();
+
+	if (!wflag) {
+		columns = get_termwidth();
+		VERIFY3S(columns, >, 0);
 	}
 
-	/*
-	 * Kind of a hack to determine the width of the output...
-	 */
-	if ((s = getenv("COLUMNS")) != NULL && (n = atoi(s)) > 0)
-		columns = n;
-	else if (isatty(fileno(stdout)) &&
-	    ioctl(fileno(stdout), TIOCGWINSZ, &winsize) == 0 &&
-	    winsize.ws_col != 0)
-		columns = winsize.ws_col;
-
 	nps = 0;
 	psize = 0;
 	ps = NULL;
 
-	(void) proc_snprintf(ppath, sizeof (ppath), "/proc");
-
-	/*
-	 * Search the /proc directory for all processes.
-	 */
-	if ((dirp = opendir(ppath)) == NULL) {
-		(void) fprintf(stderr, "%s: cannot open %s directory\n",
-		    command, ppath);
-		return (1);
-	}
-
-	(void) strcpy(pname, ppath);
-	pdlen = strlen(pname);
-	pname[pdlen++] = '/';
-
-	/* for each active process --- */
-	while (dentp = readdir(dirp)) {
-		int	procfd;	/* filedescriptor for /proc/nnnnn/psinfo */
-
-		if (dentp->d_name[0] == '.')		/* skip . and .. */
-			continue;
-		(void) strcpy(pname + pdlen, dentp->d_name);
-		(void) strcpy(pname + strlen(pname), "/psinfo");
-retry:
-		if ((procfd = open(pname, O_RDONLY)) == -1)
-			continue;
-
-		/*
-		 * Get the info structure for the process and close quickly.
-		 */
-		if (read(procfd, &info, sizeof (info)) != sizeof (info)) {
-			int	saverr = errno;
-
-			(void) close(procfd);
-			if (saverr == EAGAIN)
-				goto retry;
-			if (saverr != ENOENT)
-				perror(pname);
-			continue;
-		}
-		(void) close(procfd);
+	/* Currently, this can only fail if the 3rd argument is invalid */
+	VERIFY0(proc_walk(add_proc, NULL, PR_WALK_PROC));
 
-		/*
-		 * We make sure there's always a free slot in the table
-		 * in case we need to add a fake p0.
-		 */
-		if (nps + 1 >= psize) {
-			if ((psize *= 2) == 0)
-				psize = 20;
-			if ((ps = realloc(ps, psize*sizeof (ps_t *))) == NULL) {
-				perror("realloc()");
-				return (1);
-			}
-		}
-		if ((p = calloc(1, sizeof (ps_t))) == NULL) {
-			perror("calloc()");
-			return (1);
-		}
-		ps[nps++] = p;
-		p->done = 0;
-		p->uid = info.pr_uid;
-		p->gid = info.pr_gid;
-		p->pid = info.pr_pid;
-		p->ppid = info.pr_ppid;
-		p->pgrp = info.pr_pgid;
-		p->sid = info.pr_sid;
-		p->zoneid = info.pr_zoneid;
-		p->ctid = info.pr_contract;
-		p->start = info.pr_start;
-		proc_unctrl_psinfo(&info);
-		if (info.pr_nlwp == 0)
-			(void) strcpy(p->psargs, "<defunct>");
-		else if (info.pr_psargs[0] == '\0')
-			(void) strncpy(p->psargs, info.pr_fname,
-			    sizeof (p->psargs));
-		else
-			(void) strncpy(p->psargs, info.pr_psargs,
-			    sizeof (p->psargs));
-		p->psargs[sizeof (p->psargs)-1] = '\0';
-		p->pp = NULL;
-		p->sp = NULL;
-		p->cp = NULL;
-
-		if (sflag)
-			p_get_svc_fmri(p, NULL);
-
-		if (p->pid == p->ppid)
-			proc0 = p;
-		if (p->pid == 1)
-			proc1 = p;
-	}
-
-	(void) closedir(dirp);
 	if (proc0 == NULL)
 		proc0 = fakepid0();
 	if (proc1 == NULL)
@@ -328,9 +265,7 @@
 		if (errno != 0 || *next != '\0') {
 			struct passwd *pw = getpwnam(arg);
 			if (pw == NULL) {
-				(void) fprintf(stderr,
-				    "%s: invalid username: %s\n",
-				    command, arg);
+				warnx("invalid username: %s", arg);
 				retc = 1;
 				continue;
 			}
@@ -372,7 +307,33 @@
 	return (retc || errflg);
 }
 
-#define	PIDWIDTH	5
+
+#define	PIDWIDTH	6
+
+static void
+printlines(ps_t *p, int level)
+{
+	if (level == 0)
+		return;
+
+	if (!gflag) {
+		(void) printf("%*s", level * 2, "");
+		return;
+	}
+
+	for (int i = 1; i < level; i++) {
+		ps_t *ancestor = p;
+
+		/* Find our ancestor at depth 'i' */
+		for (int j = i; j < level; j++)
+			ancestor = ancestor->pp;
+
+		(void) printf("%s ", HAS_SIBLING(ancestor) ? box[BAR] : " ");
+	}
+
+	(void) printf("%s%s", HAS_SIBLING(p) ? box[VRIGHT] : box[CORNER],
+	    box[DASH]);
+}
 
 static int
 printone(ps_t *p, int level)
@@ -381,15 +342,22 @@
 
 	if (p->done && !FAKEDPID0(p)) {
 		indent = level * 2;
-		if ((n = columns - PIDWIDTH - indent - 2) < 0)
-			n = 0;
+
+		if (wflag) {
+			n = strlen(p->psargs);
+		} else {
+			if ((n = columns - PIDWIDTH - indent - 2) < 0)
+				n = 0;
+		}
+
+		printlines(p, level);
 		if (p->pid >= 0) {
-			(void) printf("%*.*s%-*d %.*s\n", indent, indent, " ",
-			    PIDWIDTH, (int)p->pid, n, p->psargs);
+			(void) printf("%-*d %.*s\n", PIDWIDTH, (int)p->pid, n,
+			    p->psargs);
 		} else {
 			assert(cflag != 0);
-			(void) printf("%*.*s[process contract %d: %s]\n",
-			    indent, indent, " ", (int)p->ctid,
+			(void) printf("[process contract %d: %s]\n",
+			    (int)p->ctid,
 			    p->svc_fmri == NULL ? "?" : p->svc_fmri);
 		}
 		return (1);
@@ -476,18 +444,10 @@
 		return;
 
 	if (nctps >= ctsize) {
-		if ((ctsize *= 2) == 0)
-			ctsize = 20;
-		if ((ctps = realloc(ctps, ctsize * sizeof (ps_t *))) == NULL) {
-			perror("realloc()");
-			exit(1);
-		}
+		ctsize += CHUNK_SIZE;
+		ctps = xreallocarray(ctps, ctsize, sizeof (ps_t *));
 	}
-	pp = calloc(sizeof (ps_t), 1);
-	if (pp == NULL) {
-		perror("calloc()");
-		exit(1);
-	}
+	pp = zalloc(sizeof (*pp));
 	ctps[nctps++] = pp;
 
 	pp->pid = -1;
@@ -628,11 +588,7 @@
 	ps_t *p0, *p;
 	int n;
 
-	if ((p0 = calloc(1, sizeof (ps_t))) == NULL) {
-		perror("calloc()");
-		exit(1);
-	}
-	(void) memset(p0, '\0', sizeof (ps_t));
+	p0 = zalloc(sizeof (*p0));
 
 	/* First build all partial process trees. */
 	for (n = 0; n < nps; n++) {
@@ -659,10 +615,9 @@
 {
 	zoneid_t zoneid;
 
-	if (zone_get_id(arg, &zoneid) != 0) {
-		(void) fprintf(stderr, "%s: unknown zone: %s\n", command, arg);
-		exit(1);
-	}
+	if (zone_get_id(arg, &zoneid) != 0)
+		err(EXIT_FAILURE, "unknown zone: %s", arg);
+
 	return (zoneid);
 }
 
@@ -677,10 +632,7 @@
 	if (strncmp(p, "svc:/", strlen("svc:/")) == 0)
 		p += strlen("svc:/");
 
-	if ((ret = strdup(p)) == NULL) {
-		perror("strdup()");
-		exit(1);
-	}
+	ret = xstrdup(p);
 
 	if ((cp = strrchr(ret, ':')) != NULL) {
 		*cp = '\0';
@@ -689,10 +641,136 @@
 		cp = "";
 	}
 
-	if ((*instp = strdup(cp)) == NULL) {
-		perror("strdup()");
-		exit(1);
+	*instp = xstrdup(cp);
+	return (ret);
+}
+
+static int
+add_proc(psinfo_t *info, lwpsinfo_t *lwp __unused, void *arg __unused)
+{
+	ps_t *p;
+
+	/*
+	 * We make sure there is always a free slot in the table
+	 * in case we need to add a fake p0;
+	 */
+	if (nps + 1 >= psize) {
+		psize += CHUNK_SIZE;
+		ps = xreallocarray(ps, psize, sizeof (ps_t));
 	}
 
-	return (ret);
+	p = zalloc(sizeof (*p));
+	ps[nps++] = p;
+	p->done = 0;
+	p->uid = info->pr_uid;
+	p->gid = info->pr_gid;
+	p->pid = info->pr_pid;
+	p->ppid = info->pr_ppid;
+	p->pgrp = info->pr_pgid;
+	p->sid = info->pr_sid;
+	p->zoneid = info->pr_zoneid;
+	p->ctid = info->pr_contract;
+	p->start = info->pr_start;
+	proc_unctrl_psinfo(info);
+	if (info->pr_nlwp == 0)
+		(void) strcpy(p->psargs, "<defunct>");
+	else if (info->pr_psargs[0] == '\0')
+		(void) strncpy(p->psargs, info->pr_fname,
+		    sizeof (p->psargs));
+	else
+		(void) strncpy(p->psargs, info->pr_psargs,
+		    sizeof (p->psargs));
+	p->psargs[sizeof (p->psargs)-1] = '\0';
+	p->pp = NULL;
+	p->sp = NULL;
+
+	if (sflag)
+		p_get_svc_fmri(p, NULL);
+
+	if (p->pid == p->ppid)
+		proc0 = p;
+	if (p->pid == 1)
+		proc1 = p;
+
+	return (0);
 }
+
+
+static size_t
+get_termwidth(void)
+{
+	char *s;
+
+	if ((s = getenv("COLUMNS")) != NULL) {
+		unsigned long n;
+
+		errno = 0;
+		n = strtoul(s, NULL, 10);
+		if (n != 0 && errno == 0) {
+			/* Sanity check on the range */
+			if (n > INT_MAX)
+				n = COLUMN_DEFAULT;
+			return (n);
+		}
+	}
+
+	struct winsize winsize;
+
+	if (isatty(STDOUT_FILENO) &&
+	    ioctl(STDOUT_FILENO, TIOCGWINSZ, &winsize) == 0 &&
+	    winsize.ws_col != 0) {
+		return (winsize.ws_col);
+	}
+
+	return (COLUMN_DEFAULT);
+}
+
+static const char **
+get_boxchars(void)
+{
+	char *loc = setlocale(LC_ALL, "");
+
+	if (loc == NULL)
+		return (box_ascii);
+
+	const char *p = strstr(loc, "UTF-8");
+
+	/*
+	 * Only use the UTF-8 box drawing characters if the locale ends
+	 * with "UTF-8".
+	 */
+	if (p != NULL && p[5] == '\0')
+		return (box_utf8);
+
+	return (box_ascii);
+}
+
+static void *
+zalloc(size_t len)
+{
+	void *p = calloc(1, len);
+
+	if (p == NULL)
+		err(EXIT_FAILURE, "calloc");
+	return (p);
+}
+
+static void *
+xreallocarray(void *ptr, size_t nelem, size_t elsize)
+{
+	void *p = reallocarray(ptr, nelem, elsize);
+
+	if (p == NULL)
+		err(EXIT_FAILURE, "reallocarray");
+	return (p);
+}
+
+static char *
+xstrdup(const char *s)
+{
+	char *news = strdup(s);
+
+	if (news == NULL)
+		err(EXIT_FAILURE, "strdup");
+	return (news);
+}
--- a/usr/src/cmd/smbsrv/smbadm/smbadm.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/smbsrv/smbadm/smbadm.c	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
  */
 
 /*
@@ -178,6 +178,10 @@
 static boolean_t smbadm_chkprop_priv(smbadm_prop_t *prop);
 static int smbadm_setprop_tkowner(char *gname, smbadm_prop_t *prop);
 static int smbadm_getprop_tkowner(char *gname, smbadm_prop_t *prop);
+static int smbadm_setprop_readfile(char *gname, smbadm_prop_t *prop);
+static int smbadm_getprop_readfile(char *gname, smbadm_prop_t *prop);
+static int smbadm_setprop_writefile(char *gname, smbadm_prop_t *prop);
+static int smbadm_getprop_writefile(char *gname, smbadm_prop_t *prop);
 static int smbadm_setprop_backup(char *gname, smbadm_prop_t *prop);
 static int smbadm_getprop_backup(char *gname, smbadm_prop_t *prop);
 static int smbadm_setprop_restore(char *gname, smbadm_prop_t *prop);
@@ -192,6 +196,10 @@
 	smbadm_getprop_restore,	smbadm_chkprop_priv	},
 	{"take-ownership", "on|off",	smbadm_setprop_tkowner,
 	smbadm_getprop_tkowner,	smbadm_chkprop_priv	},
+	{"bypass-read", "on|off",	smbadm_setprop_readfile,
+	smbadm_getprop_readfile,	smbadm_chkprop_priv	},
+	{"bypass-write", "on|off",	smbadm_setprop_writefile,
+	smbadm_getprop_writefile,	smbadm_chkprop_priv	},
 	{"description",	"<string>",	smbadm_setprop_desc,
 	smbadm_getprop_desc,	NULL			},
 };
@@ -1807,6 +1815,30 @@
 }
 
 static int
+smbadm_setprop_readfile(char *gname, smbadm_prop_t *prop)
+{
+	return (smbadm_group_setpriv(gname, SE_READ_FILE_LUID, prop));
+}
+
+static int
+smbadm_getprop_readfile(char *gname, smbadm_prop_t *prop)
+{
+	return (smbadm_group_getpriv(gname, SE_READ_FILE_LUID, prop));
+}
+
+static int
+smbadm_setprop_writefile(char *gname, smbadm_prop_t *prop)
+{
+	return (smbadm_group_setpriv(gname, SE_WRITE_FILE_LUID, prop));
+}
+
+static int
+smbadm_getprop_writefile(char *gname, smbadm_prop_t *prop)
+{
+	return (smbadm_group_getpriv(gname, SE_WRITE_FILE_LUID, prop));
+}
+
+static int
 smbadm_setprop_backup(char *gname, smbadm_prop_t *prop)
 {
 	return (smbadm_group_setpriv(gname, SE_BACKUP_LUID, prop));
--- a/usr/src/cmd/smbsrv/smbd/smbd_doorsvc.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/smbsrv/smbd/smbd_doorsvc.c	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #include <sys/list.h>
@@ -103,7 +103,9 @@
 	{ SMB_DR_DFS_GET_REFERRALS,	smbd_dop_dfs_get_referrals },
 	{ SMB_DR_SHR_HOSTACCESS,	smbd_dop_shr_hostaccess },
 	{ SMB_DR_SHR_EXEC,		smbd_dop_shr_exec },
-	{ SMB_DR_NOTIFY_DC_CHANGED,	smbd_dop_notify_dc_changed }
+	{ SMB_DR_NOTIFY_DC_CHANGED,	smbd_dop_notify_dc_changed },
+	{ SMB_DR_LOOKUP_LSID,		smbd_dop_lookup_sid },
+	{ SMB_DR_LOOKUP_LNAME,		smbd_dop_lookup_name }
 };
 
 static int smbd_ndoorop = (sizeof (smbd_doorops) / sizeof (smbd_doorops[0]));
@@ -581,6 +583,10 @@
 	return (SMB_DOP_EMPTYBUF);
 }
 
+/*
+ * SMB_DR_LOOKUP_NAME,
+ * SMB_DR_LOOKUP_LNAME (local-only, for idmap)
+ */
 static int
 smbd_dop_lookup_name(smbd_arg_t *arg)
 {
@@ -604,7 +610,24 @@
 		(void) snprintf(buf, MAXNAMELEN, "%s\\%s", acct.a_domain,
 		    acct.a_name);
 
-	acct.a_status = lsa_lookup_name(buf, acct.a_sidtype, &ainfo);
+	switch (arg->hdr.dh_op) {
+	case SMB_DR_LOOKUP_NAME:
+		acct.a_status = lsa_lookup_name(buf, acct.a_sidtype, &ainfo);
+		break;
+
+	case SMB_DR_LOOKUP_LNAME:
+		/*
+		 * Basically for idmap.  Don't call out to AD.
+		 */
+		acct.a_status = lsa_lookup_lname(buf, acct.a_sidtype, &ainfo);
+		break;
+
+	default:
+		assert(!"arg->hdr.dh_op");
+		acct.a_status = NT_STATUS_INTERNAL_ERROR;
+		break;
+	}
+
 	if (acct.a_status == NT_STATUS_SUCCESS) {
 		acct.a_sidtype = ainfo.a_type;
 		smb_sid_tostr(ainfo.a_sid, acct.a_sid);
@@ -626,6 +649,10 @@
 	return (SMB_DOP_SUCCESS);
 }
 
+/*
+ * SMB_DR_LOOKUP_SID,
+ * SMB_DR_LOOKUP_LSID (local-only, for idmap)
+ */
 static int
 smbd_dop_lookup_sid(smbd_arg_t *arg)
 {
@@ -641,7 +668,25 @@
 		return (SMB_DOP_DECODE_ERROR);
 
 	sid = smb_sid_fromstr(acct.a_sid);
-	acct.a_status = lsa_lookup_sid(sid, &ainfo);
+
+	switch (arg->hdr.dh_op) {
+	case SMB_DR_LOOKUP_SID:
+		acct.a_status = lsa_lookup_sid(sid, &ainfo);
+		break;
+
+	case SMB_DR_LOOKUP_LSID:
+		/*
+		 * Basically for idmap.  Don't call out to AD.
+		 */
+		acct.a_status = lsa_lookup_lsid(sid, &ainfo);
+		break;
+
+	default:
+		assert(!"arg->hdr.dh_op");
+		acct.a_status = NT_STATUS_INTERNAL_ERROR;
+		break;
+	}
+
 	smb_sid_free(sid);
 
 	if (acct.a_status == NT_STATUS_SUCCESS) {
--- a/usr/src/cmd/zdb/zdb.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/cmd/zdb/zdb.c	Thu Nov 14 23:30:04 2019 +0000
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2017 Nexenta Systems, Inc.
  * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
@@ -901,7 +901,7 @@
 	/* max sure nicenum has enough space */
 	CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ);
 
-	zdb_nicenum(metaslab_block_maxsize(msp), maxbuf, sizeof (maxbuf));
+	zdb_nicenum(metaslab_largest_allocatable(msp), maxbuf, sizeof (maxbuf));
 
 	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
 	    "segments", avl_numnodes(t), "maxsize", maxbuf,
--- a/usr/src/common/crypto/modes/modes.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/common/crypto/modes/modes.h	Thu Nov 14 23:30:04 2019 +0000
@@ -23,7 +23,7 @@
  * Use is subject to license terms.
  *
  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #ifndef	_COMMON_CRYPTO_MODES_H
@@ -51,6 +51,9 @@
 #define	GMAC_MODE			0x00000040
 #define	CMAC_MODE			0x00000080
 
+/* Private flag for pkcs11_softtoken */
+#define	P11_DECRYPTED			0x80000000
+
 /*
  * cc_keysched:		Pointer to key schedule.
  *
--- a/usr/src/data/zoneinfo/zone_sun.tab	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/data/zoneinfo/zone_sun.tab	Thu Nov 14 23:30:04 2019 +0000
@@ -133,7 +133,7 @@
 CA	+4612-05957	America/Glace_Bay	-	Atlantic - NS (Cape Breton)
 CA	+4606-06447	America/Moncton	-	Atlantic - New Brunswick
 CA	+5320-06025	America/Goose_Bay	-	Atlantic - Labrador (most areas)
-CA	+5125-05707	America/Blanc-Sablon	AST - QC (Lower North Shore)
+CA	+5125-05707	America/Blanc-Sablon	-	AST - QC (Lower North Shore)
 CA	+4339-07923	America/Toronto	-	Eastern - ON, QC (most areas)
 CA	+4901-08816	America/Nipigon	-	Eastern - ON, QC (no DST 1967-73)
 CA	+4823-08915	America/Thunder_Bay	-	Eastern - ON (Thunder Bay)
--- a/usr/src/lib/fm/topo/modules/common/fac_prov_ipmi/fac_prov_ipmi.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/fm/topo/modules/common/fac_prov_ipmi/fac_prov_ipmi.c	Thu Nov 14 23:30:04 2019 +0000
@@ -24,6 +24,7 @@
  */
 /*
  * Copyright (c) 2019, Joyent, Inc.
+ * Copyright 2019 by Western Digital Corporation
  */
 #include <unistd.h>
 #include <stdio.h>
@@ -427,6 +428,13 @@
 		topo_mod_ipmi_rele(mod);
 		return (-1);
 	}
+	if (reading->isr_state_unavailable) {
+		topo_mod_dprintf(mod, "Unavailable sensor %s, sensor_num=%d\n",
+		    entity_refs[i], sensor_num);
+		strarr_free(mod, entity_refs, nelems);
+		topo_mod_ipmi_rele(mod);
+		return (-1);
+	}
 	strarr_free(mod, entity_refs, nelems);
 	topo_mod_ipmi_rele(mod);
 
--- a/usr/src/lib/fm/topo/modules/common/ipmi/ipmi_enum.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/fm/topo/modules/common/ipmi/ipmi_enum.c	Thu Nov 14 23:30:04 2019 +0000
@@ -22,6 +22,7 @@
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2017, Joyent, Inc.
+ * Copyright 2019 by Western Digital Corporation
  */
 
 #include <assert.h>
@@ -30,7 +31,7 @@
 #include <sys/fm/protocol.h>
 #include <string.h>
 
-#define	TOPO_PGROUP_IPMI 		"ipmi"
+#define	TOPO_PGROUP_IPMI		"ipmi"
 #define	TOPO_PROP_IPMI_ENTITY_REF	"entity_ref"
 #define	TOPO_PROP_IPMI_ENTITY_PRESENT	"entity_present"
 #define	FAC_PROV_IPMI			"fac_prov_ipmi"
@@ -47,6 +48,8 @@
 
 static int ipmi_present(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
     nvlist_t **);
+static int ipmi_unusable(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
+    nvlist_t **);
 static int ipmi_enum(topo_mod_t *, tnode_t *, const char *,
     topo_instance_t, topo_instance_t, void *, void *);
 static int ipmi_post_process(topo_mod_t *, tnode_t *);
@@ -60,6 +63,9 @@
 static const topo_method_t ipmi_methods[] = {
 	{ TOPO_METH_PRESENT, TOPO_METH_PRESENT_DESC,
 	    TOPO_METH_PRESENT_VERSION0, TOPO_STABILITY_INTERNAL, ipmi_present },
+	{ TOPO_METH_UNUSABLE, TOPO_METH_UNUSABLE_DESC,
+	    TOPO_METH_UNUSABLE_VERSION, TOPO_STABILITY_INTERNAL,
+	    ipmi_unusable },
 	{ "ipmi_fru_label", "Property method", 0,
 	    TOPO_STABILITY_INTERNAL, ipmi_fru_label},
 	{ "ipmi_fru_fmri", "Property method", 0,
@@ -75,6 +81,85 @@
 const topo_modinfo_t ipmi_info =
 	{ "ipmi", FM_FMRI_SCHEME_HC, TOPO_VERSION, &ipmi_ops };
 
+/* Common code used by topo methods below to find an IPMI entity */
+static int
+ipmi_find_entity(topo_mod_t *mod, tnode_t *tn, ipmi_handle_t **ihpp,
+    ipmi_entity_t **epp, char **namep, ipmi_sdr_t **sdrpp)
+{
+	ipmi_handle_t *ihp;
+	ipmi_entity_t *ep;
+	int err;
+	char *name = NULL, **names;
+	ipmi_sdr_t *sdrp = NULL;
+	uint_t nelems, i;
+
+	*ihpp = NULL;
+	*epp = NULL;
+	*namep = NULL;
+	*sdrpp = NULL;
+
+	if ((ihp = topo_mod_ipmi_hold(mod)) == NULL)
+		return (topo_mod_seterrno(mod, ETOPO_METHOD_UNKNOWN));
+
+	ep = topo_node_getspecific(tn);
+	if (ep != NULL) {
+		*ihpp = ihp;
+		*epp = ep;
+		return (0);
+	}
+
+	if (topo_prop_get_string(tn, TOPO_PGROUP_IPMI,
+	    TOPO_PROP_IPMI_ENTITY_PRESENT, &name, &err) == 0) {
+		/*
+		 * Some broken IPMI implementations don't export correct
+		 * entities, so referring to an entity isn't sufficient.
+		 * For these platforms, we allow the XML to specify a
+		 * single SDR record that represents the current present
+		 * state.
+		 */
+		sdrp = ipmi_sdr_lookup(ihp, name);
+	} else {
+		if (topo_prop_get_string_array(tn, TOPO_PGROUP_IPMI,
+		    TOPO_PROP_IPMI_ENTITY_REF, &names, &nelems, &err) != 0) {
+			/*
+			 * Not all nodes have an entity_ref attribute.
+			 * For these cases, return ENOTSUP so that we
+			 * fall back to the default hc presence
+			 * detection.
+			 */
+			topo_mod_ipmi_rele(mod);
+			return (topo_mod_seterrno(mod, ETOPO_METHOD_NOTSUP));
+		}
+
+		for (i = 0; i < nelems; i++) {
+			if ((ep = ipmi_entity_lookup_sdr(ihp, names[i]))
+			    != NULL) {
+				name = names[i];
+				names[i] = NULL;
+				break;
+			}
+		}
+
+		for (i = 0; i < nelems; i++)
+			topo_mod_strfree(mod, names[i]);
+		topo_mod_free(mod, names, (nelems * sizeof (char *)));
+
+		if (ep == NULL) {
+			topo_mod_dprintf(mod,
+			    "Failed to get present state of %s=%d\n",
+			    topo_node_name(tn), topo_node_instance(tn));
+			topo_mod_ipmi_rele(mod);
+			return (-1);
+		}
+		topo_node_setspecific(tn, ep);
+	}
+
+	*ihpp = ihp;
+	*namep = name;
+	*sdrpp = sdrp;
+	return (0);
+}
+
 /*
  * Determine if the entity is present.
  */
@@ -85,81 +170,22 @@
 {
 	ipmi_handle_t *ihp;
 	ipmi_entity_t *ep;
-	boolean_t present;
-	nvlist_t *nvl;
-	int err, i;
-	char *name, **names;
+	char *name;
 	ipmi_sdr_t *sdrp;
-	uint_t nelems;
-
-	if ((ihp = topo_mod_ipmi_hold(mod)) == NULL)
-		return (topo_mod_seterrno(mod, ETOPO_METHOD_UNKNOWN));
-
-	ep = topo_node_getspecific(tn);
-	if (ep == NULL) {
-		if (topo_prop_get_string(tn, TOPO_PGROUP_IPMI,
-		    TOPO_PROP_IPMI_ENTITY_PRESENT, &name, &err) == 0) {
-			/*
-			 * Some broken IPMI implementations don't export correct
-			 * entities, so referring to an entity isn't sufficient.
-			 * For these platforms, we allow the XML to specify a
-			 * single SDR record that represents the current present
-			 * state.
-			 */
-			if ((sdrp = ipmi_sdr_lookup(ihp, name)) == NULL ||
-			    ipmi_entity_present_sdr(ihp, sdrp, &present) != 0) {
-				topo_mod_dprintf(mod,
-				    "Failed to get present state of %s (%s)\n",
-				    name, ipmi_errmsg(ihp));
-				topo_mod_strfree(mod, name);
-				topo_mod_ipmi_rele(mod);
-				return (-1);
-			}
+	int err;
+	boolean_t present = B_FALSE;
+	nvlist_t *nvl;
 
-			topo_mod_dprintf(mod,
-			    "ipmi_entity_present_sdr(%s) = %d\n", name,
-			    present);
-			topo_mod_strfree(mod, name);
-		} else {
-			if (topo_prop_get_string_array(tn, TOPO_PGROUP_IPMI,
-			    TOPO_PROP_IPMI_ENTITY_REF, &names, &nelems, &err)
-			    != 0) {
-				/*
-				 * Not all nodes have an entity_ref attribute.
-				 * For these cases, return ENOTSUP so that we
-				 * fall back to the default hc presence
-				 * detection.
-				 */
-				topo_mod_ipmi_rele(mod);
-				return (topo_mod_seterrno(mod,
-				    ETOPO_METHOD_NOTSUP));
-			}
-
-			for (i = 0; i < nelems; i++)
-				if ((ep = ipmi_entity_lookup_sdr(ihp, names[i]))
-				    != NULL)
-					break;
-
-			for (i = 0; i < nelems; i++)
-				topo_mod_strfree(mod, names[i]);
-			topo_mod_free(mod, names, (nelems * sizeof (char *)));
-
-			if (ep == NULL) {
-				topo_mod_dprintf(mod,
-				    "Failed to get present state of %s=%d\n",
-				    topo_node_name(tn), topo_node_instance(tn));
-				topo_mod_ipmi_rele(mod);
-				return (-1);
-			}
-			topo_node_setspecific(tn, ep);
-		}
-	}
+	err = ipmi_find_entity(mod, tn, &ihp, &ep, &name, &sdrp);
+	if (err != 0)
+		return (err);
 
 	if (ep != NULL) {
 		if (ipmi_entity_present(ihp, ep, &present) != 0) {
 			topo_mod_dprintf(mod,
 			    "ipmi_entity_present() failed: %s",
 			    ipmi_errmsg(ihp));
+			topo_mod_strfree(mod, name);
 			topo_mod_ipmi_rele(mod);
 			return (-1);
 		}
@@ -167,8 +193,21 @@
 		topo_mod_dprintf(mod,
 		    "ipmi_entity_present(%d, %d) = %d\n", ep->ie_type,
 		    ep->ie_instance, present);
+	} else if (sdrp != NULL) {
+		if (ipmi_entity_present_sdr(ihp, sdrp, &present) != 0) {
+			topo_mod_dprintf(mod,
+			    "Failed to get present state of %s (%s)\n",
+			    name, ipmi_errmsg(ihp));
+			topo_mod_strfree(mod, name);
+			topo_mod_ipmi_rele(mod);
+			return (-1);
+		}
+
+		topo_mod_dprintf(mod, "ipmi_entity_present_sdr(%s) = %d\n",
+		    name, present);
 	}
 
+	topo_mod_strfree(mod, name);
 	topo_mod_ipmi_rele(mod);
 
 	if (topo_mod_nvalloc(mod, &nvl, NV_UNIQUE_NAME) != 0)
@@ -185,6 +224,88 @@
 }
 
 /*
+ * Check whether an IPMI entity is a sensor that is unavailable
+ */
+static int
+ipmi_check_sensor(ipmi_handle_t *ihp, ipmi_entity_t *ep, const char *name,
+    ipmi_sdr_t *sdrp, void *data)
+{
+	ipmi_sdr_full_sensor_t *fsp;
+	ipmi_sdr_compact_sensor_t *csp;
+	uint8_t sensor_number;
+	ipmi_sensor_reading_t *reading;
+
+	switch (sdrp->is_type) {
+	case IPMI_SDR_TYPE_FULL_SENSOR:
+		fsp = (ipmi_sdr_full_sensor_t *)sdrp->is_record;
+		sensor_number = fsp->is_fs_number;
+		break;
+
+	case IPMI_SDR_TYPE_COMPACT_SENSOR:
+		csp = (ipmi_sdr_compact_sensor_t *)sdrp->is_record;
+		sensor_number = csp->is_cs_number;
+		break;
+
+	default:
+		return (0);
+	}
+
+	reading = ipmi_get_sensor_reading(ihp, sensor_number);
+	if (reading != NULL && reading->isr_state_unavailable)
+		return (1);
+
+	return (0);
+}
+
+/*
+ * Determine if the entity is unusable
+ */
+/*ARGSUSED*/
+static int
+ipmi_unusable(topo_mod_t *mod, tnode_t *tn, topo_version_t version,
+    nvlist_t *in, nvlist_t **out)
+{
+	ipmi_handle_t *ihp;
+	ipmi_entity_t *ep;
+	char *name;
+	ipmi_sdr_t *sdrp;
+	int err;
+	boolean_t unusable = B_FALSE;
+	nvlist_t *nvl;
+
+	err = ipmi_find_entity(mod, tn, &ihp, &ep, &name, &sdrp);
+	if (err != 0)
+		return (err);
+
+	/*
+	 * Check whether the IPMI presented us with an entity for a
+	 * sensor that is unavailable.
+	 */
+	if (ep != NULL) {
+		unusable = (ipmi_entity_iter_sdr(ihp, ep, ipmi_check_sensor,
+		    NULL) != 0);
+	} else if (sdrp != NULL) {
+		unusable = (ipmi_check_sensor(ihp, NULL, NULL, sdrp,
+		    NULL) != 0);
+	}
+
+	topo_mod_strfree(mod, name);
+	topo_mod_ipmi_rele(mod);
+
+	if (topo_mod_nvalloc(mod, &nvl, NV_UNIQUE_NAME) != 0)
+		return (topo_mod_seterrno(mod, EMOD_FMRI_NVL));
+
+	if (nvlist_add_uint32(nvl, TOPO_METH_UNUSABLE_RET, unusable) != 0) {
+		nvlist_free(nvl);
+		return (topo_mod_seterrno(mod, EMOD_FMRI_NVL));
+	}
+
+	*out = nvl;
+
+	return (0);
+}
+
+/*
  * This determines if the entity has a FRU locator record set, in which case we
  * treat this as a FRU, even if it's part of an association.
  */
--- a/usr/src/lib/libipmp/Makefile.com	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/libipmp/Makefile.com	Thu Nov 14 23:30:04 2019 +0000
@@ -21,6 +21,7 @@
 #
 # Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
+# Copyright 2019 Joyent, Inc.
 #
 
 LIBRARY =	libipmp.a
@@ -39,6 +40,10 @@
 CFLAGS +=	$(CCVERBOSE)
 CPPFLAGS +=	-D_REENTRANT -I$(SRCDIR)
 
+# ipmp_snap_take() generates false double free and dereferencing freed memory
+# errors
+pics/ipmp_query.o := SMOFF = check_free
+
 .KEEP_STATE:
 
 all: $(LIBS)
--- a/usr/src/lib/pkcs11/pkcs11_softtoken/common/softAESCrypt.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/pkcs11/pkcs11_softtoken/common/softAESCrypt.c	Thu Nov 14 23:30:04 2019 +0000
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  * Copyright 2017 Jason King.
  */
 
@@ -349,7 +349,7 @@
 		return (CKR_ARGUMENTS_BAD);
 	}
 
-	remainder = ulDataLen & (AES_BLOCK_LEN - 1);
+	remainder = ulDataLen % AES_BLOCK_LEN;
 
 	/*
 	 * CTR, CCM, CMAC, and GCM modes do not require the plaintext
@@ -372,23 +372,24 @@
 		}
 	}
 
-	switch (aes_ctx->ac_flags & (CMAC_MODE|CCM_MODE|GCM_MODE)) {
-	case CCM_MODE:
+	switch (mech) {
+	case CKM_AES_CCM:
 		length_needed = ulDataLen + aes_ctx->ac_mac_len;
 		break;
-	case GCM_MODE:
+	case CKM_AES_GCM:
 		length_needed = ulDataLen + aes_ctx->ac_tag_len;
 		break;
-	case CMAC_MODE:
+	case CKM_AES_CMAC:
+	case CKM_AES_CMAC_GENERAL:
 		length_needed = AES_BLOCK_LEN;
 		break;
+	case CKM_AES_CBC_PAD:
+		/* CKM_AES_CBC_PAD always adds 1..AES_BLOCK_LEN of padding */
+		length_needed = ulDataLen + AES_BLOCK_LEN - remainder;
+		break;
 	default:
 		length_needed = ulDataLen;
-
-		/* CKM_AES_CBC_PAD out pads to a multiple of AES_BLOCK_LEN */
-		if (mech == CKM_AES_CBC_PAD) {
-			length_needed += AES_BLOCK_LEN - remainder;
-		}
+		break;
 	}
 
 	if (pEncryptedData == NULL) {
@@ -424,58 +425,82 @@
 		out.cd_offset = *pulEncryptedDataLen;
 	}
 
-	/*
-	 * As CKM_AES_CTR is a stream cipher, ctr_mode_final is always
-	 * invoked in the _update() functions, so we do not need to call it
-	 * here.
-	 */
-	if (mech == CKM_AES_CBC_PAD) {
+	switch (mech) {
+	case CKM_AES_CBC_PAD: {
 		/*
 		 * aes_encrypt_contiguous_blocks() accumulates plaintext
-		 * in aes_ctx and then encrypts once it has accumulated
-		 * a multiple of AES_BLOCK_LEN bytes of plaintext (through one
-		 * or more calls).  Any leftover plaintext is left in aes_ctx
-		 * for subsequent calls.  If there is any remaining plaintext
-		 * at the end, we pad it out to to AES_BLOCK_LEN using the
-		 * amount of padding to add as the value of the pad bytes
-		 * (i.e. PKCS#7 padding) and call
-		 * aes_encrypt_contiguous_blocks() one last time.
+		 * in aes_ctx until it has at least one full block of
+		 * plaintext.  Any partial blocks of data remaining after
+		 * encrypting are left for subsequent calls to
+		 * aes_encrypt_contiguous_blocks().  If the input happened
+		 * to be an exact multiple of AES_BLOCK_LEN, we must still
+		 * append a block of padding (a full block in that case) so
+		 * that the correct amount of padding to remove is known
+		 * during decryption.
 		 *
-		 * Even when the input is already a multiple of AES_BLOCK_LEN,
-		 * we must add an additional full block so that we can determine
-		 * the amount of padding to remove during decryption (by
-		 * examining the last byte of the decrypted ciphertext).
+		 * soft_add_pkcs7_padding() is a bit overkill -- we just
+		 * create a block filled with the pad amount using memset(),
+		 * and encrypt 'amt' bytes of the block to pad out the input.
 		 */
+		char block[AES_BLOCK_LEN];
 		size_t amt = AES_BLOCK_LEN - remainder;
-		char block[AES_BLOCK_LEN];
 
-		ASSERT3U(remainder, ==, aes_ctx->ac_remainder_len);
-		ASSERT3U(amt + remainder, ==, AES_BLOCK_LEN);
+		VERIFY3U(remainder, ==, aes_ctx->ac_remainder_len);
 
-		/*
-		 * The existing soft_add_pkcs7_padding() interface is
-		 * overkill for what is effectively a memset().  A better
-		 * RFE would be to create a CBC_PAD mode.
-		 */
 		(void) memset(block, amt & 0xff, sizeof (block));
 		rc = aes_encrypt_contiguous_blocks(aes_ctx, block, amt, &out);
-	} else if (aes_ctx->ac_flags & CCM_MODE) {
+		rv = crypto2pkcs11_error_number(rc);
+		explicit_bzero(block, sizeof (block));
+		break;
+	}
+	case CKM_AES_CCM:
 		rc = ccm_encrypt_final((ccm_ctx_t *)aes_ctx, &out,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
-	} else if (aes_ctx->ac_flags & GCM_MODE) {
+		rv = crypto2pkcs11_error_number(rc);
+		break;
+	case CKM_AES_GCM:
 		rc = gcm_encrypt_final((gcm_ctx_t *)aes_ctx, &out,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
 		    aes_xor_block);
-	} else if (aes_ctx->ac_flags & CMAC_MODE) {
+		rv = crypto2pkcs11_error_number(rc);
+		break;
+	case CKM_AES_CMAC:
+	case CKM_AES_CMAC_GENERAL:
 		rc = cmac_mode_final((cbc_ctx_t *)aes_ctx, &out,
 		    aes_encrypt_block, aes_xor_block);
+		rv = crypto2pkcs11_error_number(rc);
 		aes_ctx->ac_remainder_len = 0;
+		break;
+	case CKM_AES_CTR:
+		/*
+		 * As CKM_AES_CTR is a stream cipher, ctr_mode_final is always
+		 * invoked in the xx_update() functions, so we do not need to
+		 * call it again here.
+		 */
+		break;
+	case CKM_AES_ECB:
+	case CKM_AES_CBC:
+		/*
+		 * These mechanisms do not have nor require a xx_final function.
+		 */
+		break;
+	default:
+		rv = CKR_MECHANISM_INVALID;
+		break;
 	}
 
 cleanup:
-	if (rc != CRYPTO_SUCCESS && rv == CKR_OK) {
+	switch (rv) {
+	case CKR_OK:
+		*pulEncryptedDataLen = out.cd_offset;
+		break;
+	case CKR_BUFFER_TOO_SMALL:
+		/* *pulEncryptedDataLen was set earlier */
+		break;
+	default:
+		/* something else failed */
 		*pulEncryptedDataLen = 0;
-		rv = crypto2pkcs11_error_number(rc);
+		break;
 	}
 
 	(void) pthread_mutex_lock(&session_p->session_mutex);
@@ -483,8 +508,124 @@
 	session_p->encrypt.context = NULL;
 	(void) pthread_mutex_unlock(&session_p->session_mutex);
 
-	if (rv == CKR_OK) {
-		*pulEncryptedDataLen = out.cd_offset;
+	return (rv);
+}
+
+static CK_RV
+soft_aes_cbc_pad_decrypt(aes_ctx_t *aes_ctx, CK_BYTE_PTR pEncryptedData,
+    CK_ULONG ulEncryptedDataLen, crypto_data_t *out_orig)
+{
+	aes_ctx_t *ctx = aes_ctx;
+	uint8_t *buf = NULL;
+	uint8_t *outbuf = (uint8_t *)out_orig->cd_raw.iov_base;
+	crypto_data_t out = *out_orig;
+	size_t i;
+	int rc;
+	CK_RV rv = CKR_OK;
+	uint8_t pad_len;
+	boolean_t speculate = B_FALSE;
+
+	/*
+	 * Just a query for the output size.  When the output buffer is
+	 * NULL, we are allowed to return a size slightly larger than
+	 * necessary.  We know the output will never be larger than the
+	 * input ciphertext, so we use that as an estimate.
+	 */
+	if (out_orig->cd_raw.iov_base == NULL) {
+		out_orig->cd_length = ulEncryptedDataLen;
+		return (CKR_OK);
+	}
+
+	/*
+	 * The output plaintext size will be 1..AES_BLOCK_LEN bytes
+	 * smaller than the input ciphertext.  However we cannot know
+	 * exactly how much smaller until we decrypt the entire
+	 * input ciphertext.  If we are unsure we have enough output buffer
+	 * space, we have to allocate our own memory to hold the output,
+	 * then see if we have enough room to hold the result.
+	 *
+	 * Unfortunately, having an output buffer that's too small does
+	 * not terminate the operation, nor are we allowed to return
+	 * partial results.  Therefore we must also duplicate the initial
+	 * aes_ctx so that this can potentially be run again.
+	 */
+	if (out_orig->cd_length < ulEncryptedDataLen) {
+		void *ks = malloc(aes_ctx->ac_keysched_len);
+
+		ctx = malloc(sizeof (*aes_ctx));
+		buf = malloc(ulEncryptedDataLen);
+		if (ks == NULL || ctx == NULL || buf == NULL) {
+			free(ks);
+			free(ctx);
+			free(buf);
+			return (CKR_HOST_MEMORY);
+		}
+
+		bcopy(aes_ctx, ctx, sizeof (*ctx));
+		bcopy(aes_ctx->ac_keysched, ks, aes_ctx->ac_keysched_len);
+		ctx->ac_keysched = ks;
+
+		out.cd_length = ulEncryptedDataLen;
+		out.cd_raw.iov_base = (char *)buf;
+		out.cd_raw.iov_len = ulEncryptedDataLen;
+		outbuf = buf;
+
+		speculate = B_TRUE;
+	}
+
+	rc = aes_decrypt_contiguous_blocks(ctx, (char *)pEncryptedData,
+	    ulEncryptedDataLen, &out);
+	if (rc != CRYPTO_SUCCESS) {
+		out_orig->cd_offset = 0;
+		rv = CKR_FUNCTION_FAILED;
+		goto done;
+	}
+
+	/*
+	 * RFC5652 6.3 The amount of padding must be
+	 * block_sz - (len mod block_size).  This means
+	 * the amount of padding must always be in the
+	 * range [1..block_size].
+	 */
+	pad_len = outbuf[ulEncryptedDataLen - 1];
+	if (pad_len == 0 || pad_len > AES_BLOCK_LEN) {
+		rv = CKR_ENCRYPTED_DATA_INVALID;
+		goto done;
+	}
+	out.cd_offset -= pad_len;
+
+	/*
+	 * Verify pad values, trying to do so in as close to constant
+	 * time as possible.
+	 */
+	for (i = ulEncryptedDataLen - pad_len; i < ulEncryptedDataLen; i++) {
+		if (outbuf[i] != pad_len) {
+			rv = CKR_ENCRYPTED_DATA_INVALID;
+		}
+	}
+	if (rv != CKR_OK) {
+		goto done;
+	}
+
+	if (speculate) {
+		if (out.cd_offset <= out_orig->cd_length) {
+			bcopy(out.cd_raw.iov_base, out_orig->cd_raw.iov_base,
+			    out.cd_offset);
+		} else {
+			rv = CKR_BUFFER_TOO_SMALL;
+		}
+	}
+
+	/*
+	 * No matter what, we report the exact size required.
+	 */
+	out_orig->cd_offset = out.cd_offset;
+
+done:
+	freezero(buf, ulEncryptedDataLen);
+	if (ctx != aes_ctx) {
+		VERIFY(speculate);
+		soft_aes_free_ctx(ctx);
 	}
 
 	return (rv);
@@ -518,7 +659,7 @@
 		return (CKR_ARGUMENTS_BAD);
 	}
 
-	remainder = ulEncryptedDataLen & (AES_BLOCK_LEN - 1);
+	remainder = ulEncryptedDataLen % AES_BLOCK_LEN;
 
 	/*
 	 * CTR, CCM, CMAC, and GCM modes do not require the ciphertext
@@ -540,6 +681,16 @@
 		}
 	}
 
+	if (mech == CKM_AES_CBC_PAD) {
+		rv = soft_aes_cbc_pad_decrypt(aes_ctx, pEncryptedData,
+		    ulEncryptedDataLen, &out);
+		if (pData == NULL || rv == CKR_BUFFER_TOO_SMALL) {
+			*pulDataLen = out.cd_offset;
+			return (rv);
+		}
+		goto cleanup;
+	}
+
 	switch (aes_ctx->ac_flags & (CCM_MODE|GCM_MODE)) {
 	case CCM_MODE:
 		length_needed = aes_ctx->ac_processed_data_len;
@@ -614,9 +765,7 @@
 	 * invoked in the _update() functions, so we do not need to call it
 	 * here.
 	 */
-	if (mech == CKM_AES_CBC_PAD) {
-		rv = soft_remove_pkcs7_padding(pData, *pulDataLen, pulDataLen);
-	} else if (aes_ctx->ac_flags & CCM_MODE) {
+	if (aes_ctx->ac_flags & CCM_MODE) {
 		ASSERT3U(aes_ctx->ac_processed_data_len, ==,
 		    aes_ctx->ac_data_len);
 		ASSERT3U(aes_ctx->ac_processed_mac_len, ==,
@@ -625,19 +774,18 @@
 		rc = ccm_decrypt_final((ccm_ctx_t *)aes_ctx, &out,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
 		    aes_xor_block);
+		rv = crypto2pkcs11_error_number(rc);
 	} else if (aes_ctx->ac_flags & GCM_MODE) {
 		rc = gcm_decrypt_final((gcm_ctx_t *)aes_ctx, &out,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
+		rv = crypto2pkcs11_error_number(rc);
 	}
 
 cleanup:
-	if (rc != CRYPTO_SUCCESS && rv == CKR_OK) {
-		rv = crypto2pkcs11_error_number(rc);
-		*pulDataLen = 0;
-	}
-
 	if (rv == CKR_OK) {
 		*pulDataLen = out.cd_offset;
+	} else {
+		*pulDataLen = 0;
 	}
 
 	(void) pthread_mutex_lock(&session_p->session_mutex);
@@ -667,24 +815,7 @@
 	int rc;
 
 	/* Check size of the output buffer */
-	if (mech == CKM_AES_CBC_PAD && (out_len <= AES_BLOCK_LEN)) {
-		/*
-		 * Since there is currently no CBC_PAD mode, we must stash any
-		 * remainder ourselves.  For all other modes,
-		 * aes_encrypt_contiguous_blocks() will call the mode specific
-		 * encrypt function and will stash any reminder if required.
-		 */
-		if (pData != NULL) {
-			uint8_t *dest = (uint8_t *)aes_ctx->ac_remainder +
-			    aes_ctx->ac_remainder_len;
-
-			bcopy(pData, dest, ulDataLen);
-			aes_ctx->ac_remainder_len += ulDataLen;
-		}
-
-		*pulEncryptedDataLen = 0;
-		return (CKR_OK);
-	} else if (aes_ctx->ac_flags & CMAC_MODE) {
+	if (aes_ctx->ac_flags & CMAC_MODE) {
 		/*
 		 * The underlying CMAC implementation handles the storing of
 		 * extra bytes and does not output any data until *_final,
@@ -725,8 +856,7 @@
 	*pulEncryptedDataLen = out.cd_offset;
 
 	if (rc != CRYPTO_SUCCESS) {
-		rv = CKR_FUNCTION_FAILED;
-		goto done;
+		return (CKR_FUNCTION_FAILED);
 	}
 
 	/*
@@ -738,11 +868,7 @@
 		rc = ctr_mode_final((ctr_ctx_t *)aes_ctx, &out,
 		    aes_encrypt_block);
 	}
-
-done:
-	if (rc != CRYPTO_SUCCESS && rv == CKR_OK) {
-		rv = crypto2pkcs11_error_number(rc);
-	}
+	rv = crypto2pkcs11_error_number(rc);
 
 	return (rv);
 }
@@ -752,6 +878,7 @@
     CK_ULONG ulEncryptedDataLen, CK_BYTE_PTR pData, CK_ULONG_PTR pulDataLen)
 {
 	aes_ctx_t *aes_ctx = session_p->decrypt.context;
+	uint8_t *buffer_block = NULL;
 	crypto_data_t out = {
 		.cd_format = CRYPTO_DATA_RAW,
 		.cd_offset = 0,
@@ -761,35 +888,217 @@
 	};
 	CK_MECHANISM_TYPE mech = session_p->decrypt.mech.mechanism;
 	CK_RV rv = CKR_OK;
-	size_t out_len = 0;
+	size_t in_len = ulEncryptedDataLen;
+	size_t out_len;
 	int rc = CRYPTO_SUCCESS;
 
-	if ((aes_ctx->ac_flags & (CCM_MODE|GCM_MODE)) == 0) {
-		out_len = aes_ctx->ac_remainder_len + ulEncryptedDataLen;
+	switch (mech) {
+	case CKM_AES_CCM:
+	case CKM_AES_GCM:
+		out_len = 0;
+		break;
+	case CKM_AES_CBC_PAD:
+		/*
+		 * For CKM_AES_CBC_PAD, we use the existing code for CBC
+		 * mode in libsoftcrypto (which itself uses the code in
+		 * usr/src/common/crypto/modes for CBC mode).  For
+		 * non-padding AES CBC mode, aes_decrypt_contiguous_blocks()
+		 * will accumulate ciphertext in aes_ctx->ac_remainder until
+		 * there is at least AES_BLOCK_LEN bytes of ciphertext available
+		 * to decrypt.  At that point, as many blocks of AES_BLOCK_LEN
+		 * sized ciphertext blocks are decrypted.  Any remainder is
+		 * copied into aes_ctx->ac_remainder for decryption in
+		 * subsequent calls to aes_decrypt_contiguous_blocks().
+		 *
+		 * When PKCS#7 padding is used, the buffering
+		 * aes_decrypt_contigous_blocks() performs is insufficient.
+		 * PKCS#7 padding always adds [1..AES_BLOCK_LEN] bytes of
+		 * padding to plaintext, so the resulting ciphertext is always
+		 * larger than the input plaintext.  However we cannot know
+		 * which block is the final block (and needs its padding
+		 * stripped) until C_DecryptFinal() is called.  Additionally,
+		 * it is permissible for a caller to use buffers sized to the
+		 * output plaintext -- i.e. smaller than the input ciphertext.
+		 * This leads to a more complicated buffering/accumulation
+		 * strategy than what aes_decrypt_contiguous_blocks() provides
+		 * us.
+		 *
+		 * Our buffering strategy works as follows:
+		 *  For each call to C_DecryptUpdate, we calculate the
+		 *  total amount of ciphertext available (buffered plus what's
+		 *  passed in) as the initial output size (out_len). Based
+		 *  on the value of out_len, there are three possibilties:
+		 *
+		 *  1. We have less than AES_BLOCK_LEN + 1 bytes of
+		 *  ciphertext available. Accumulate the ciphertext in
+		 *  aes_ctx->ac_remainder. Note that while we could let
+		 *  aes_decrypt_contiguous_blocks() buffer the input for us
+		 *  when we have less than AES_BLOCK_LEN bytes, we would still
+		 *  need to buffer when we have exactly AES_BLOCK_LEN
+		 *  bytes available, so we just handle both situations with
+		 *  one if clause.
+		 *
+		 *  2. We have at least AES_BLOCK_LEN + 1 bytes of
+		 *  ciphertext, and the total amount available is also an
+		 *  exact multiple of AES_BLOCK_LEN. We cannot know if the
+		 *  last block of input is the final block (yet), but we
+		 *  are an exact multiple of AES_BLOCK_LEN, and we have
+		 *  at least AES_BLOCK_LEN + 1 bytes available, therefore
+		 *  there must be at least 2 * AES_BLOCK_LEN bytes of input
+		 *  ciphertext available. It also means there's at least one
+		 *  full block of input ciphertext that can be decrypted. We
+		 *  reduce the size of the input (in_len) given to
+		 *  aes_decrypt_contiguous_bytes() by AES_BLOCK_LEN to prevent
+		 *  it from decrypting the last full block of data.
+		 *  aes_decrypt_contiguous_blocks() will when decrypt any
+		 *  buffered data in aex_ctx->ac_remainder, and then any
+		 *  input data passed. Since we have an exact multiple of
+		 *  AES_BLOCK_LEN, aes_ctx->ac_remainder will be empty
+		 *  (aes_ctx->ac_remainder_len == 0), once
+		 *  aes_decrypt_contiguout_block() completes, and we can
+		 *  copy the last block of data into aes_ctx->ac_remainder.
+		 *
+		 *  3. We have at least AES_BLOCK_LEN + 1 bytes of
+		 *  ciphertext, but the total amount available is not an
+		 *  exact multiple of AES_BLOCK_LEN. We decrypt all of
+		 *  full blocks of data we have. The remainder will be
+		 *  less than AES_BLOCK_LEN bytes. We let
+		 *  aes_decrypt_contiguous_blocks() buffer the remainder
+		 *  for us since it would normally do this anyway. Since there
+		 *  is a remainder, the full blocks that are present cannot
+		 *  be the last block, so we can safey decrypt all of them.
+		 *
+		 * Some things to note:
+		 *  - The above semantics will cause aes_ctx->ac_remainder to
+		 *  never accumulate more than AES_BLOCK_LEN bytes of
+		 *  ciphertext. Once we reach at least AES_BLOCK_LEN + 1 bytes,
+		 *  we will decrypt the contents of aes_ctx->ac_remainder by one
+		 *  of the last two scenarios described above.
+		 *
+		 *  - We must always end up with AES_BLOCK_LEN bytes of data
+		 *  in aes_ctx->ac_remainder when C_DecryptFinal() is called.
+		 *  The first and third scenarios above may leave
+		 *  aes_ctx->ac_remainder with less than AES_BLOCK_LEN bytes,
+		 *  however the total size of the input ciphertext that's
+		 *  been decrypted must end up a multiple of AES_BLOCK_LEN.
+		 *  Therefore, we can always assume when there is a
+		 *  remainder that more data is coming.  If we do end up
+		 *  with a remainder that's not AES_BLOCK_LEN bytes long
+		 *  when C_DecryptFinal() is called, the input is assumed
+		 *  invalid and we return CKR_DATA_LEN_RANGE (see
+		 *  soft_aes_decrypt_final()).
+		 */
 
-		if (mech == CKM_AES_CBC_PAD && out_len <= AES_BLOCK_LEN) {
+		VERIFY3U(aes_ctx->ac_remainder_len, <=, AES_BLOCK_LEN);
+		if (in_len >= SIZE_MAX - AES_BLOCK_LEN)
+			return (CKR_ENCRYPTED_DATA_LEN_RANGE);
+
+		out_len = aes_ctx->ac_remainder_len + in_len;
+
+		if (out_len <= AES_BLOCK_LEN) {
+			/*
+			 * The first scenario detailed above, accumulate
+			 * ciphertext in ac_remainder_len and return.
+			 */
 			uint8_t *dest = (uint8_t *)aes_ctx->ac_remainder +
 			    aes_ctx->ac_remainder_len;
 
-			bcopy(pEncryptedData, dest, ulEncryptedDataLen);
-			aes_ctx->ac_remainder_len += ulEncryptedDataLen;
+			bcopy(pEncryptedData, dest, in_len);
+			aes_ctx->ac_remainder_len += in_len;
+			*pulDataLen = 0;
+
+			/*
+			 * Since we aren't writing an output, and are returning
+			 * here, we don't need to adjust out_len -- we never
+			 * reach the output buffer size checks after the
+			 * switch statement.
+			 */
 			return (CKR_OK);
+		} else if (out_len % AES_BLOCK_LEN == 0) {
+			/*
+			 * The second scenario decribed above. The total amount
+			 * available is a multiple of AES_BLOCK_LEN, and
+			 * we have more than one block.  We reduce the
+			 * input size (in_len) by AES_BLOCK_LEN. We also
+			 * reduce the output size (out_len) by AES_BLOCK_LEN
+			 * for the output buffer size checks that follow
+			 * the switch statement. In certain situations,
+			 * PKCS#11 requires this to be an exact value, so
+			 * the size check cannot occur for CKM_AES_CBC_PAD
+			 * until after we've determine which scenario we
+			 * have.
+			 *
+			 * Because we never accumulate more than AES_BLOCK_LEN
+			 * bytes in aes_ctx->ac_remainder, when we are in
+			 * this scenario, the following VERIFYs should always
+			 * be true (and serve as a final safeguard against
+			 * underflow).
+			 */
+			VERIFY3U(in_len, >=, AES_BLOCK_LEN);
+
+			buffer_block = pEncryptedData + in_len - AES_BLOCK_LEN;
+
+			in_len -= AES_BLOCK_LEN;
+
+			/*
+			 * This else clause explicity checks
+			 * out_len > AES_BLOCK_LEN, so this is also safe.
+			 */
+			out_len -= AES_BLOCK_LEN;
+		} else {
+			/*
+			 * The third scenario above.  We have at least
+			 * AES_BLOCK_LEN + 1 bytes, but the total amount of
+			 * input ciphertext available is not an exact
+			 * multiple of AES_BLOCK_LEN.  Let
+			 * aes_decrypt_contiguous_blocks() handle the
+			 * buffering of the remainder.  Update the
+			 * output size to reflect the actual amount of output
+			 * we want to emit for the checks after the switch
+			 * statement.
+			 */
+			out_len &= ~(AES_BLOCK_LEN - 1);
 		}
+		break;
+	default:
+		out_len = aes_ctx->ac_remainder_len + in_len;
 		out_len &= ~(AES_BLOCK_LEN - 1);
+		break;
 	}
 
+	/*
+	 * C_DecryptUpdate() verifies that pulDataLen is not NULL prior
+	 * to calling soft_decrypt_common() (which calls us).
+	 */
+
 	if (pData == NULL) {
+		/*
+		 * If the output buffer (pData) is NULL, that means the
+		 * caller is inquiring about the size buffer needed to
+		 * complete the C_DecryptUpdate() request.  While we are
+		 * permitted to set *pulDataLen to an estimated value that can
+		 * be 'slightly' larger than the actual value required,
+		 * since we know the exact size we need, we stick with the
+		 * exact size.
+		 */
 		*pulDataLen = out_len;
 		return (CKR_OK);
 	}
 
 	if (*pulDataLen < out_len) {
+		/*
+		 * Not an inquiry, but the output buffer isn't large enough.
+		 * PKCS#11 requires that this scenario not fail fatally (as
+		 * well as return a different error value). This situation
+		 * also requires us to set *pulDataLen to the _exact_ size
+		 * required.
+		 */
 		*pulDataLen = out_len;
 		return (CKR_BUFFER_TOO_SMALL);
 	}
 
 	rc = aes_decrypt_contiguous_blocks(aes_ctx, (char *)pEncryptedData,
-	    ulEncryptedDataLen, &out);
+	    in_len, &out);
 
 	if (rc != CRYPTO_SUCCESS) {
 		rv = CKR_FUNCTION_FAILED;
@@ -798,16 +1107,34 @@
 
 	*pulDataLen = out.cd_offset;
 
-	if ((aes_ctx->ac_flags & CTR_MODE) && (aes_ctx->ac_remainder_len > 0)) {
+	switch (mech) {
+	case CKM_AES_CTR:
+		if (aes_ctx->ac_remainder_len == 0) {
+			break;
+		}
 		rc = ctr_mode_final((ctr_ctx_t *)aes_ctx, &out,
 		    aes_encrypt_block);
+		rv = crypto2pkcs11_error_number(rc);
+		break;
+	case CKM_AES_CBC_PAD:
+		if (buffer_block == NULL) {
+			break;
+		}
+
+		VERIFY0(aes_ctx->ac_remainder_len);
+
+		/*
+		 * We had multiple blocks of data to decrypt with nothing
+		 * left over and deferred decrypting the last block of data.
+		 * Copy it into aes_ctx->ac_remainder to decrypt on the
+		 * next update call (or final).
+		 */
+		bcopy(buffer_block, aes_ctx->ac_remainder, AES_BLOCK_LEN);
+		aes_ctx->ac_remainder_len = AES_BLOCK_LEN;
+		break;
 	}
 
 done:
-	if (rc != CRYPTO_SUCCESS && rv == CKR_OK) {
-		rv = crypto2pkcs11_error_number(rc);
-	}
-
 	return (rv);
 }
 
@@ -823,60 +1150,104 @@
 		.cd_raw.iov_base = (char *)pLastEncryptedPart,
 		.cd_raw.iov_len = *pulLastEncryptedPartLen
 	};
-	int rc = CRYPTO_SUCCESS;
+	CK_MECHANISM_TYPE mech = session_p->encrypt.mech.mechanism;
 	CK_RV rv = CKR_OK;
+	size_t out_len;
+	int rc = CRYPTO_SUCCESS;
 
-	if (session_p->encrypt.mech.mechanism == CKM_AES_CBC_PAD) {
+	switch (mech) {
+	case CKM_AES_CBC_PAD:
+		/*
+		 * We always add 1..AES_BLOCK_LEN of padding to the input
+		 * plaintext to round up to a multiple of AES_BLOCK_LEN.
+		 * During encryption, we never output a partially encrypted
+		 * block (that is the amount encrypted by each call of
+		 * C_EncryptUpdate() is always either 0 or n * AES_BLOCK_LEN).
+		 * As a result, at the end of the encryption operation, we
+		 * output AES_BLOCK_LEN bytes of data -- this could be a full
+		 * block of padding, or a combination of data + padding.
+		 */
+		out_len = AES_BLOCK_LEN;
+		break;
+	case CKM_AES_CTR:
+		out_len = aes_ctx->ac_remainder_len;
+		break;
+	case CKM_AES_CCM:
+		out_len = aes_ctx->ac_remainder_len +
+		    aes_ctx->acu.acu_ccm.ccm_mac_len;
+		break;
+	case CKM_AES_GCM:
+		out_len = aes_ctx->ac_remainder_len +
+		    aes_ctx->acu.acu_gcm.gcm_tag_len;
+		break;
+	case CKM_AES_CMAC:
+	case CKM_AES_CMAC_GENERAL:
+		out_len = AES_BLOCK_LEN;
+		break;
+	default:
+		/*
+		 * Everything other AES mechansism requires full blocks of
+		 * input.  If the input was not an exact multiple of
+		 * AES_BLOCK_LEN, it is a fatal error.
+		 */
+		if (aes_ctx->ac_remainder_len > 0) {
+			rv = CKR_DATA_LEN_RANGE;
+			goto done;
+		}
+		out_len = 0;
+	}
+
+	if (*pulLastEncryptedPartLen < out_len || pLastEncryptedPart == NULL) {
+		*pulLastEncryptedPartLen = out_len;
+		return ((pLastEncryptedPart == NULL) ?
+		    CKR_OK : CKR_BUFFER_TOO_SMALL);
+	}
+
+	switch (mech) {
+	case CKM_AES_CBC_PAD: {
 		char block[AES_BLOCK_LEN] = { 0 };
 		size_t padlen = AES_BLOCK_LEN - aes_ctx->ac_remainder_len;
 
-		(void) memset(block, padlen & 0xff, sizeof (block));
-		if (padlen > 0) {
-			rc = aes_encrypt_contiguous_blocks(aes_ctx, block,
-			    padlen, &data);
-		}
-	} else if (aes_ctx->ac_flags & CTR_MODE) {
-		if (pLastEncryptedPart == NULL) {
-			*pulLastEncryptedPartLen = aes_ctx->ac_remainder_len;
-			return (CKR_OK);
+		if (padlen == 0) {
+			padlen = AES_BLOCK_LEN;
 		}
 
-		if (aes_ctx->ac_remainder_len > 0) {
-			rc = ctr_mode_final((ctr_ctx_t *)aes_ctx, &data,
-			    aes_encrypt_block);
-			if (rc == CRYPTO_BUFFER_TOO_SMALL) {
-				rv = CKR_BUFFER_TOO_SMALL;
-			}
+		(void) memset(block, padlen & 0xff, sizeof (block));
+		rc = aes_encrypt_contiguous_blocks(aes_ctx, block,
+		    padlen, &data);
+		explicit_bzero(block, sizeof (block));
+		break;
+	}
+	case CKM_AES_CTR:
+		if (aes_ctx->ac_remainder_len == 0) {
+			break;
 		}
-	} else if (aes_ctx->ac_flags & CCM_MODE) {
+
+		rc = ctr_mode_final((ctr_ctx_t *)aes_ctx, &data,
+		    aes_encrypt_block);
+		break;
+	case CKM_AES_CCM:
 		rc = ccm_encrypt_final((ccm_ctx_t *)aes_ctx, &data,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
-	} else if (aes_ctx->ac_flags & GCM_MODE) {
+		break;
+	case CKM_AES_GCM:
 		rc = gcm_encrypt_final((gcm_ctx_t *)aes_ctx, &data,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
 		    aes_xor_block);
-	} else if (aes_ctx->ac_flags & CMAC_MODE) {
-		if (pLastEncryptedPart == NULL) {
-			*pulLastEncryptedPartLen = AES_BLOCK_LEN;
-			return (CKR_OK);
-		}
-
+		break;
+	case CKM_AES_CMAC:
+	case CKM_AES_CMAC_GENERAL:
 		rc = cmac_mode_final((cbc_ctx_t *)aes_ctx, &data,
 		    aes_encrypt_block, aes_xor_block);
-	} else {
-		/*
-		 * There must be no unprocessed plaintext.
-		 * This happens if the length of the last data is not a
-		 * multiple of the AES block length.
-		 */
-		*pulLastEncryptedPartLen = 0;
-		if (aes_ctx->ac_remainder_len > 0) {
-			rv = CKR_DATA_LEN_RANGE;
-		}
+		break;
+	default:
+		break;
 	}
+	rv = crypto2pkcs11_error_number(rc);
 
-	if (rc != CRYPTO_SUCCESS && rv == CKR_OK) {
-		rv = crypto2pkcs11_error_number(rc);
+done:
+	if (rv == CKR_OK) {
+		*pulLastEncryptedPartLen = data.cd_offset;
 	}
 
 	soft_aes_free_ctx(aes_ctx);
@@ -892,6 +1263,7 @@
 	CK_MECHANISM_TYPE mech = session_p->decrypt.mech.mechanism;
 	CK_RV rv = CKR_OK;
 	int rc = CRYPTO_SUCCESS;
+	size_t out_len;
 	crypto_data_t out = {
 		.cd_format = CRYPTO_DATA_RAW,
 		.cd_offset = 0,
@@ -900,93 +1272,154 @@
 		.cd_raw.iov_len = *pulLastPartLen
 	};
 
-	if (aes_ctx->ac_remainder_len > 0) {
-		switch (mech) {
-		case CKM_AES_CBC_PAD:
-			/*
-			 * Since we cannot know the amount of padding present
-			 * until after we decrypt the final block, and since
-			 * we don't know which block is the last block until
-			 * C_DecryptFinal() is called, we must always defer
-			 * decrypting the most recent block of ciphertext
-			 * until C_DecryptFinal() is called.  As a consequence,
-			 * we should always have a remainder, and it should
-			 * always be equal to AES_BLOCK_LEN.
-			 */
+	switch (mech) {
+	case CKM_AES_CBC_PAD:
+		/*
+		 * PKCS#11 requires that a caller can discover the size of
+		 * the output buffer required by calling
+		 * C_DecryptFinal(hSession, NULL, &len) which sets
+		 * *pulLastPartLen to the size required.  However, it also
+		 * allows if one calls C_DecryptFinal with a buffer (i.e.
+		 * pLastPart != NULL) that is too small, to return
+		 * CKR_BUFFER_TOO_SMALL with *pulLastPartLen set to the
+		 * _exact_ size required (when pLastPart is NULL, the
+		 * implementation is allowed to set a 'sightly' larger
+		 * value than is strictly necessary.  In either case, the
+		 * caller is allowed to retry the operation (the operation
+		 * is not terminated).
+		 *
+		 * With PKCS#7 padding, we cannot determine the exact size of
+		 * the output until we decrypt the final block.  As such, the
+		 * first time for a given decrypt operation we are called,
+		 * we decrypt the final block and stash it in the aes_ctx
+		 * remainder block.  On any subsequent calls in the
+		 * current decrypt operation, we then can use the decrypted
+		 * block as necessary to provide the correct semantics.
+		 *
+		 * The cleanup of aes_ctx when the operation terminates
+		 * will take care of clearing out aes_ctx->ac_remainder_len.
+		 */
+		if ((aes_ctx->ac_flags & P11_DECRYPTED) == 0) {
+			uint8_t block[AES_BLOCK_LEN] = { 0 };
+			crypto_data_t block_out = {
+				.cd_format = CRYPTO_DATA_RAW,
+				.cd_offset = 0,
+				.cd_length = sizeof (block),
+				.cd_raw.iov_base = (char *)block,
+				.cd_raw.iov_len = sizeof (block)
+			};
+			size_t amt, i;
+			uint8_t pad_len;
+
 			if (aes_ctx->ac_remainder_len != AES_BLOCK_LEN) {
-				return (CKR_ENCRYPTED_DATA_LEN_RANGE);
-			}
-
-			if (*pulLastPartLen < AES_BLOCK_LEN) {
-				*pulLastPartLen = AES_BLOCK_LEN;
-				return (CKR_BUFFER_TOO_SMALL);
+				return (CKR_DATA_LEN_RANGE);
 			}
 
 			rc = aes_decrypt_contiguous_blocks(aes_ctx,
-			    (char *)pLastPart, AES_BLOCK_LEN, &out);
+			    (char *)block, 0, &block_out);
+			if (rc != CRYPTO_SUCCESS) {
+				explicit_bzero(block, sizeof (block));
+				return (CKR_FUNCTION_FAILED);
+			}
+
+			pad_len = block[AES_BLOCK_LEN - 1];
 
-			if (rc != CRYPTO_SUCCESS) {
-				break;
+			/*
+			 * RFC5652 6.3 The amount of padding must be
+			 * block_sz - (len mod block_size).  This means
+			 * the amount of padding must always be in the
+			 * range [1..block_size].
+			 */
+			if (pad_len == 0 || pad_len > AES_BLOCK_LEN) {
+				rv = CKR_ENCRYPTED_DATA_INVALID;
+				explicit_bzero(block, sizeof (block));
+				goto done;
+			}
+			amt = AES_BLOCK_LEN - pad_len;
+
+			/*
+			 * Verify the padding is correct.  Try to do so
+			 * in as constant a time as possible.
+			 */
+			for (i = amt; i < AES_BLOCK_LEN; i++) {
+				if (block[i] != pad_len) {
+					rv = CKR_ENCRYPTED_DATA_INVALID;
+				}
+			}
+			if (rv != CKR_OK) {
+				explicit_bzero(block, sizeof (block));
+				goto done;
 			}
 
-			rv = soft_remove_pkcs7_padding(pLastPart, AES_BLOCK_LEN,
-			    pulLastPartLen);
-			break;
-		case CKM_AES_CTR:
-			rc = ctr_mode_final((ctr_ctx_t *)aes_ctx, &out,
-			    aes_encrypt_block);
-			break;
-		default:
-			/* There must be no unprocessed ciphertext */
-			return (CKR_ENCRYPTED_DATA_LEN_RANGE);
+			bcopy(block, aes_ctx->ac_remainder, amt);
+			explicit_bzero(block, sizeof (block));
+
+			aes_ctx->ac_flags |= P11_DECRYPTED;
+			aes_ctx->ac_remainder_len = amt;
 		}
-	} else {
+
+		out_len = aes_ctx->ac_remainder_len;
+		break;
+	case CKM_AES_CTR:
+		out_len = aes_ctx->ac_remainder_len;
+		break;
+	case CKM_AES_CCM:
+		out_len = aes_ctx->ac_data_len;
+		break;
+	case CKM_AES_GCM:
+		out_len = aes_ctx->acu.acu_gcm.gcm_processed_data_len -
+		    aes_ctx->acu.acu_gcm.gcm_tag_len;
+		break;
+	default:
 		/*
-		 * We should never have no remainder for AES_CBC_PAD -- see
-		 * above.
+		 * The remaining mechanims require an exact multiple of
+		 * AES_BLOCK_LEN of ciphertext.  Any other value is an error.
 		 */
-		ASSERT3U(mech, !=, CKM_AES_CBC_PAD);
+		if (aes_ctx->ac_remainder_len > 0) {
+			rv = CKR_DATA_LEN_RANGE;
+			goto done;
+		}
+		out_len = 0;
+		break;
 	}
 
-	if (aes_ctx->ac_flags & CCM_MODE) {
-		size_t pt_len = aes_ctx->ac_data_len;
+	if (*pulLastPartLen < out_len || pLastPart == NULL) {
+		*pulLastPartLen = out_len;
+		return ((pLastPart == NULL) ? CKR_OK : CKR_BUFFER_TOO_SMALL);
+	}
 
-		if (*pulLastPartLen < pt_len) {
-			*pulLastPartLen = pt_len;
-			return (CKR_BUFFER_TOO_SMALL);
+	switch (mech) {
+	case CKM_AES_CBC_PAD:
+		*pulLastPartLen = out_len;
+		if (out_len == 0) {
+			break;
 		}
-
-		ASSERT3U(aes_ctx->ac_processed_data_len, ==, pt_len);
+		bcopy(aes_ctx->ac_remainder, pLastPart, out_len);
+		out.cd_offset += out_len;
+		break;
+	case CKM_AES_CCM:
+		ASSERT3U(aes_ctx->ac_processed_data_len, ==, out_len);
 		ASSERT3U(aes_ctx->ac_processed_mac_len, ==,
 		    aes_ctx->ac_mac_len);
 
 		rc = ccm_decrypt_final((ccm_ctx_t *)aes_ctx, &out,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
 		    aes_xor_block);
-
-		if (rc != CRYPTO_SUCCESS) {
-			*pulLastPartLen = out.cd_offset;
-		}
-	} else if (aes_ctx->ac_flags & GCM_MODE) {
-		gcm_ctx_t *gcm_ctx = (gcm_ctx_t *)aes_ctx;
-		size_t pt_len = gcm_ctx->gcm_processed_data_len -
-		    gcm_ctx->gcm_tag_len;
-
-		if (*pulLastPartLen < pt_len) {
-			*pulLastPartLen = pt_len;
-			return (CKR_BUFFER_TOO_SMALL);
-		}
-
-		rc = gcm_decrypt_final(gcm_ctx, &out, AES_BLOCK_LEN,
-		    aes_encrypt_block, aes_xor_block);
-
-		if (rc != CRYPTO_SUCCESS) {
-			*pulLastPartLen = out.cd_offset;
-		}
+		break;
+	case CKM_AES_GCM:
+		rc = gcm_decrypt_final((gcm_ctx_t *)aes_ctx, &out,
+		    AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
+		break;
+	default:
+		break;
 	}
 
-	if (rv == CKR_OK && rc != CRYPTO_SUCCESS) {
-		rv = crypto2pkcs11_error_number(rc);
+	VERIFY3U(out.cd_offset, ==, out_len);
+	rv = crypto2pkcs11_error_number(rc);
+
+done:
+	if (rv == CKR_OK) {
+		*pulLastPartLen = out.cd_offset;
 	}
 
 	soft_aes_free_ctx(aes_ctx);
--- a/usr/src/lib/smbsrv/libmlsvc/common/libmlsvc.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/smbsrv/libmlsvc/common/libmlsvc.h	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #ifndef	_LIBMLSVC_H
@@ -65,7 +65,9 @@
 #endif
 
 uint32_t lsa_lookup_name(char *, uint16_t, smb_account_t *);
+uint32_t lsa_lookup_lname(char *, uint16_t, smb_account_t *);
 uint32_t lsa_lookup_sid(smb_sid_t *, smb_account_t *);
+uint32_t lsa_lookup_lsid(smb_sid_t *, smb_account_t *);
 
 /*
  * SMB domain API to discover a domain controller and obtain domain
--- a/usr/src/lib/smbsrv/libmlsvc/common/lsalib.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/smbsrv/libmlsvc/common/lsalib.c	Thu Nov 14 23:30:04 2019 +0000
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
  */
 
 /*
@@ -38,6 +38,10 @@
 
 #include <lsalib.h>
 
+static uint32_t lsa_lookup_name_int(char *, uint16_t, smb_account_t *,
+    boolean_t);
+static uint32_t lsa_lookup_sid_int(smb_sid_t *, smb_account_t *, boolean_t);
+
 static uint32_t lsa_lookup_name_builtin(char *, char *, smb_account_t *);
 static uint32_t lsa_lookup_name_domain(char *, smb_account_t *);
 
@@ -75,6 +79,20 @@
 uint32_t
 lsa_lookup_name(char *account, uint16_t type, smb_account_t *info)
 {
+	return (lsa_lookup_name_int(account, type, info, B_TRUE));
+}
+
+/* Variant that avoids the call out to AD. */
+uint32_t
+lsa_lookup_lname(char *account, uint16_t type, smb_account_t *info)
+{
+	return (lsa_lookup_name_int(account, type, info, B_FALSE));
+}
+
+uint32_t
+lsa_lookup_name_int(char *account, uint16_t type, smb_account_t *info,
+    boolean_t try_ad)
+{
 	char nambuf[SMB_USERNAME_MAXLEN];
 	char dombuf[SMB_PI_MAX_DOMAIN];
 	char *name, *domain;
@@ -107,8 +125,10 @@
 		if (status == NT_STATUS_SUCCESS)
 			return (status);
 
-		if ((domain == NULL) || (status == NT_STATUS_NOT_FOUND))
+		if (try_ad && ((domain == NULL) ||
+		    (status == NT_STATUS_NOT_FOUND))) {
 			status = lsa_lookup_name_domain(account, info);
+		}
 	}
 
 	return ((status == NT_STATUS_SUCCESS) ? status : NT_STATUS_NONE_MAPPED);
@@ -117,6 +137,19 @@
 uint32_t
 lsa_lookup_sid(smb_sid_t *sid, smb_account_t *info)
 {
+	return (lsa_lookup_sid_int(sid, info, B_TRUE));
+}
+
+/* Variant that avoids the call out to AD. */
+uint32_t
+lsa_lookup_lsid(smb_sid_t *sid, smb_account_t *info)
+{
+	return (lsa_lookup_sid_int(sid, info, B_FALSE));
+}
+
+static uint32_t
+lsa_lookup_sid_int(smb_sid_t *sid, smb_account_t *info, boolean_t try_ad)
+{
 	uint32_t status;
 
 	if (!smb_sid_isvalid(sid))
@@ -125,8 +158,9 @@
 	status = lsa_lookup_sid_builtin(sid, info);
 	if (status == NT_STATUS_NOT_FOUND) {
 		status = smb_sam_lookup_sid(sid, info);
-		if (status == NT_STATUS_NOT_FOUND)
+		if (try_ad && status == NT_STATUS_NOT_FOUND) {
 			status = lsa_lookup_sid_domain(sid, info);
+		}
 	}
 
 	return ((status == NT_STATUS_SUCCESS) ? status : NT_STATUS_NONE_MAPPED);
--- a/usr/src/lib/smbsrv/libmlsvc/common/lsalib.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/smbsrv/libmlsvc/common/lsalib.h	Thu Nov 14 23:30:04 2019 +0000
@@ -52,8 +52,6 @@
 /*
  * lsalib.c
  */
-uint32_t lsa_lookup_name(char *, uint16_t, smb_account_t *);
-uint32_t lsa_lookup_sid(smb_sid_t *, smb_account_t *);
 DWORD lsa_query_primary_domain_info(char *, char *, smb_domain_t *);
 DWORD lsa_query_account_domain_info(char *, char *, smb_domain_t *);
 DWORD lsa_query_dns_domain_info(char *, char *, smb_domain_t *);
--- a/usr/src/lib/smbsrv/libmlsvc/common/mapfile-vers	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/smbsrv/libmlsvc/common/mapfile-vers	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
 #
 #
 # Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
+# Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
 #
 
 #
@@ -45,6 +45,8 @@
 	dfs_info_free;
 	dssetup_check_service;
 	dssetup_clear_domain_info;
+	lsa_lookup_lname;
+	lsa_lookup_lsid;
 	lsa_lookup_name;
 	lsa_lookup_sid;
 	mlsvc_disconnect;
--- a/usr/src/lib/smbsrv/libmlsvc/common/netr_logon.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/smbsrv/libmlsvc/common/netr_logon.c	Thu Nov 14 23:30:04 2019 +0000
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
  */
 
 /*
@@ -56,7 +56,6 @@
     smb_logon_t *, struct netr_logon_info2 *);
 static void netr_setup_identity(ndr_heap_t *, smb_logon_t *,
     netr_logon_id_t *);
-static boolean_t netr_isadmin(struct netr_validation_info3 *);
 static uint32_t netr_setup_domain_groups(struct netr_validation_info3 *,
     smb_ids_t *);
 static uint32_t netr_setup_krb5res_groups(struct krb5_validation_info *,
@@ -818,7 +817,7 @@
  * token.  Called after domain groups have been added.
  */
 static uint32_t
-netr_setup_token_wingrps(struct netr_validation_info3 *info3,
+netr_setup_token_wingrps(struct netr_validation_info3 *info3 __unused,
     smb_token_t *token)
 {
 	uint32_t status;
@@ -828,9 +827,6 @@
 	if (status != NT_STATUS_SUCCESS)
 		return (status);
 
-	if (netr_isadmin(info3))
-		token->tkn_flags |= SMB_ATF_ADMIN;
-
 	status = smb_wka_token_groups(token->tkn_flags, &token->tkn_win_grps);
 
 	return (status);
@@ -923,30 +919,3 @@
 
 	return (0);
 }
-
-/*
- * Determines if the given user is the domain Administrator or a
- * member of Domain Admins
- */
-static boolean_t
-netr_isadmin(struct netr_validation_info3 *info3)
-{
-	smb_domain_t di;
-	int i;
-
-	if (!smb_domain_lookup_sid((smb_sid_t *)info3->LogonDomainId, &di))
-		return (B_FALSE);
-
-	if (di.di_type != SMB_DOMAIN_PRIMARY)
-		return (B_FALSE);
-
-	if ((info3->UserId == DOMAIN_USER_RID_ADMIN) ||
-	    (info3->PrimaryGroupId == DOMAIN_GROUP_RID_ADMINS))
-		return (B_TRUE);
-
-	for (i = 0; i < info3->GroupCount; i++)
-		if (info3->GroupIds[i].rid == DOMAIN_GROUP_RID_ADMINS)
-			return (B_TRUE);
-
-	return (B_FALSE);
-}
--- a/usr/src/lib/smbsrv/libsmb/common/libsmb.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/smbsrv/libsmb/common/libsmb.h	Thu Nov 14 23:30:04 2019 +0000
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #ifndef	_LIBSMB_H
@@ -722,7 +722,9 @@
 int smb_lgrp_iterate(smb_giter_t *, smb_group_t *);
 
 int smb_lookup_sid(const char *, lsa_account_t *);
+int smb_lookup_lsid(const char *, lsa_account_t *);
 int smb_lookup_name(const char *, sid_type_t, lsa_account_t *);
+int smb_lookup_lname(const char *, sid_type_t, lsa_account_t *);
 
 #define	SMB_LGRP_SUCCESS		0
 #define	SMB_LGRP_INVALID_ARG		1
--- a/usr/src/lib/smbsrv/libsmb/common/mapfile-vers	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/smbsrv/libsmb/common/mapfile-vers	Thu Nov 14 23:30:04 2019 +0000
@@ -19,7 +19,7 @@
 #
 #
 # Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
+# Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
 #
 
 #
@@ -268,6 +268,8 @@
 	smb_logon_decode;
 	smb_logon_free;
 	smb_logon_xdr;
+	smb_lookup_lname;
+	smb_lookup_lsid;
 	smb_lookup_name;
 	smb_lookup_sid;
 	smb_match_netlogon_seqnum;
--- a/usr/src/lib/smbsrv/libsmb/common/smb_doorclnt.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/smbsrv/libsmb/common/smb_doorclnt.c	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #include <assert.h>
@@ -44,6 +44,9 @@
 static void smb_door_sethdr(smb_doorhdr_t *, uint32_t, uint32_t);
 static boolean_t smb_door_chkhdr(smb_doorarg_t *, smb_doorhdr_t *);
 static void smb_door_free(door_arg_t *arg);
+static int smb_lookup_name_int(const char *name, sid_type_t sidtype,
+    lsa_account_t *acct, int);
+static int smb_lookup_sid_int(const char *sid, lsa_account_t *acct, int);
 
 /*
  * Given a SID, make a door call to get  the associated name.
@@ -57,6 +60,20 @@
 int
 smb_lookup_sid(const char *sid, lsa_account_t *acct)
 {
+	return (smb_lookup_sid_int(sid, acct, SMB_DR_LOOKUP_SID));
+}
+/*
+ * Variant of smb_lookup_sid to do a "local-only" lookup.
+ */
+int
+smb_lookup_lsid(const char *sid, lsa_account_t *acct)
+{
+	return (smb_lookup_sid_int(sid, acct, SMB_DR_LOOKUP_LSID));
+}
+
+static int
+smb_lookup_sid_int(const char *sid, lsa_account_t *acct, int dop)
+{
 	int	rc;
 
 	assert((sid != NULL) && (acct != NULL));
@@ -64,7 +81,7 @@
 	bzero(acct, sizeof (lsa_account_t));
 	(void) strlcpy(acct->a_sid, sid, SMB_SID_STRSZ);
 
-	rc = smb_door_call(SMB_DR_LOOKUP_SID, acct, lsa_account_xdr,
+	rc = smb_door_call(dop, acct, lsa_account_xdr,
 	    acct, lsa_account_xdr);
 
 	if (rc != 0)
@@ -84,6 +101,19 @@
 int
 smb_lookup_name(const char *name, sid_type_t sidtype, lsa_account_t *acct)
 {
+	return (smb_lookup_name_int(name, sidtype, acct, SMB_DR_LOOKUP_NAME));
+}
+
+int
+smb_lookup_lname(const char *name, sid_type_t sidtype, lsa_account_t *acct)
+{
+	return (smb_lookup_name_int(name, sidtype, acct, SMB_DR_LOOKUP_LNAME));
+}
+
+static int
+smb_lookup_name_int(const char *name, sid_type_t sidtype, lsa_account_t *acct,
+    int dop)
+{
 	char		tmp[MAXNAMELEN];
 	char		*dp = NULL;
 	char		*np = NULL;
@@ -104,7 +134,7 @@
 		(void) strlcpy(acct->a_name, name, MAXNAMELEN);
 	}
 
-	rc = smb_door_call(SMB_DR_LOOKUP_NAME, acct, lsa_account_xdr,
+	rc = smb_door_call(dop, acct, lsa_account_xdr,
 	    acct, lsa_account_xdr);
 
 	if (rc != 0)
--- a/usr/src/lib/smbsrv/libsmb/common/smb_lgrp.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/smbsrv/libsmb/common/smb_lgrp.c	Thu Nov 14 23:30:04 2019 +0000
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 RackTop Systems.
- * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
  */
 
 #include <stdlib.h>
@@ -2398,6 +2398,8 @@
 {
 	if (smb_strcasecmp(grp->sg_name, "Administrators", 0) == 0) {
 		smb_privset_enable(grp->sg_privs, SE_TAKE_OWNERSHIP_LUID);
+		smb_privset_enable(grp->sg_privs, SE_BACKUP_LUID);
+		smb_privset_enable(grp->sg_privs, SE_RESTORE_LUID);
 		return;
 	}
 
--- a/usr/src/lib/smbsrv/libsmb/common/smb_privilege.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/lib/smbsrv/libsmb/common/smb_privilege.c	Thu Nov 14 23:30:04 2019 +0000
@@ -21,6 +21,8 @@
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
  */
 
 /*
@@ -79,7 +81,11 @@
 	    "Modify firmware environment values", 0 },
 	{ 23, SE_CHANGE_NOTIFY_NAME, "Bypass traverse checking", 0 },
 	{ 24, SE_REMOTE_SHUTDOWN_NAME,
-	    "Force shutdown from a remote system", 0 }
+	    "Force shutdown from a remote system", 0 },
+	{ 25, SE_READ_FILE_NAME,
+	    "Bypass ACL for READ access", PF_PRESENTABLE },
+	{ 26, SE_WRITE_FILE_NAME,
+	    "Bypass ACL for WRITE and DELETE access", PF_PRESENTABLE },
 };
 
 /*
--- a/usr/src/man/man1/ptree.1	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/man/man1/ptree.1	Thu Nov 14 23:30:04 2019 +0000
@@ -4,12 +4,12 @@
 .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License").  You may not use this file except in compliance with the License.
 .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.  See the License for the specific language governing permissions and limitations under the License.
 .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH PTREE 1 "Sep 3, 2019"
+.TH PTREE 1 "Nov 13, 2019"
 .SH NAME
 ptree \- print process trees
 .SH SYNOPSIS
 .nf
-\fB/usr/bin/ptree\fR [\fB-a\fR] [\fB-c\fR] [\fB-s\fR \fIsvc\fR] [\fB-z\fR \fIzone\fR] [\fIpid\fR | \fIuser\fR]...
+\fB/usr/bin/ptree\fR [\fB-a\fR] [\fB-c\fR] [\fB-g\fR] [\fB-w\fR] [\fB-s\fR \fIsvc\fR] [\fB-z\fR \fIzone\fR] [\fIpid\fR | \fIuser\fR]...
 .fi
 
 .SH DESCRIPTION
@@ -42,6 +42,17 @@
 .sp
 .ne 2
 .na
+\fB\fB-g\fR\fR
+.ad
+.RS 11n
+Use line drawing characters. If the current locale is a UTF-8 locale, the
+UTF-8 line drawing characters are used, otherwise ASCII line drawing
+characters are used.
+.RE
+
+.sp
+.ne 2
+.na
 \fB\fB-s\fR \fIsvc\fR\fR
 .ad
 .RS 11n
@@ -54,6 +65,16 @@
 .sp
 .ne 2
 .na
+\fB\fB-w\fR\fR
+.ad
+.RS 11n
+Allow output lines to wrap. Normally output lines are truncated to the current
+width of the terminal window.
+.RE
+
+.sp
+.ne 2
+.na
 \fB\fB-z\fR \fIzone\fR\fR
 .ad
 .RS 11n
@@ -109,6 +130,29 @@
 .in -2
 .sp
 
+\fBExample 2\fR
+.sp
+.LP
+The following example prints the process tree (including children of process 0)
+for processes which match the command name \fBssh\fR with ASCII line drawing
+characters:
+
+.sp
+.in +2
+.nf
+$ ptree -ag `pgrep ssh`
+        1     /sbin/init
+        `-100909 /usr/lib/ssh/sshd
+          `-569150 /usr/lib/ssh/sshd
+            `-569157 /usr/lib/ssh/sshd
+              `-569159 -ksh
+                `-569171 bash
+                  `-569173 /bin/ksh
+                    `-569193 bash
+.fi
+.in -2
+.sp
+
 .SH EXIT STATUS
 The following exit values are returned:
 .sp
--- a/usr/src/man/man1m/smbadm.1m	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/man/man1m/smbadm.1m	Thu Nov 14 23:30:04 2019 +0000
@@ -16,9 +16,9 @@
 .\"
 .\"
 .\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved.
-.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
 .\"
-.Dd November 18, 2017
+.Dd June 6, 2019
 .Dt SMBADM 1M
 .Os
 .Sh NAME
@@ -252,6 +252,10 @@
 .It Cm take-ownership Ns = Ns Cm on Ns | Ns Cm off
 Specifies whether members of the SMB local group can take ownership of file
 system objects.
+.It Cm bypass-read Ns = Ns Cm on Ns | Ns Cm off
+Specifies whether members of the SMB local group can always bypass Read access controls.
+.It Cm bypass-write Ns = Ns Cm on Ns | Ns Cm off
+Specifies whether members of the SMB local group can always bypass Write and Delete access controls.
 .El
 .It Xo
 .Cm add-member
--- a/usr/src/pkg/manifests/system-test-cryptotest.mf	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/pkg/manifests/system-test-cryptotest.mf	Thu Nov 14 23:30:04 2019 +0000
@@ -85,6 +85,12 @@
 file opt/crypto-tests/tests/aes/pkcs/aes_cbc_64 \
     path=opt/crypto-tests/tests/aes/pkcs/aes_cbc_64 group=root mode=0555 \
     owner=root
+file opt/crypto-tests/tests/aes/pkcs/aes_cbc_pad_32 \
+    path=opt/crypto-tests/tests/aes/pkcs/aes_cbc_pad_32 group=root mode=0555 \
+    owner=root
+file opt/crypto-tests/tests/aes/pkcs/aes_cbc_pad_64 \
+    path=opt/crypto-tests/tests/aes/pkcs/aes_cbc_pad_64 group=root mode=0555 \
+    owner=root
 file opt/crypto-tests/tests/aes/pkcs/aes_ccm_32 \
     path=opt/crypto-tests/tests/aes/pkcs/aes_ccm_32 group=root mode=0555 \
     owner=root
--- a/usr/src/pkg/manifests/system-test-zfstest.mf	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/pkg/manifests/system-test-zfstest.mf	Thu Nov 14 23:30:04 2019 +0000
@@ -2988,6 +2988,8 @@
 file path=opt/zfs-tests/tests/functional/slog/slog_013_pos mode=0555
 file path=opt/zfs-tests/tests/functional/slog/slog_014_pos mode=0555
 file path=opt/zfs-tests/tests/functional/slog/slog_015_neg mode=0555
+file path=opt/zfs-tests/tests/functional/slog/slog_replay_fs_001 mode=0555
+file path=opt/zfs-tests/tests/functional/slog/slog_replay_fs_002 mode=0555
 file path=opt/zfs-tests/tests/functional/snapshot/cleanup mode=0555
 file path=opt/zfs-tests/tests/functional/snapshot/clone_001_pos mode=0555
 file path=opt/zfs-tests/tests/functional/snapshot/deadlist_lock mode=0555
--- a/usr/src/test/crypto-tests/tests/modes/aes/Makefile	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/crypto-tests/tests/modes/aes/Makefile	Thu Nov 14 23:30:04 2019 +0000
@@ -12,10 +12,11 @@
 #
 # Copyright (c) 2012 by Delphix. All rights reserved.
 # Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+# Copyright 2019 Joyent, Inc.
 #
 
 .PARALLEL: $(SUBDIRS)
 
-SUBDIRS = cbc ccm cmac ctr ecb gcm
+SUBDIRS = cbc cbc_pad ccm cmac ctr ecb gcm
 
 include $(SRC)/test/Makefile.com
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/test/crypto-tests/tests/modes/aes/cbc_pad/Makefile	Thu Nov 14 23:30:04 2019 +0000
@@ -0,0 +1,26 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2012 by Delphix. All rights reserved.
+# Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+# Copyright 2019 Joyent, Inc.
+#
+
+BASEPROG = aes_cbc_pad
+
+# Currently, the cbc_pad mechanism is only present in PKCS#11 and
+# not KCF, so we do not create _kcf test binaries.
+CRYPTO = pkcs
+CRYPTO_kcf = $(POUND_SIGN)
+
+include $(SRC)/cmd/Makefile.cmd
+include ../Makefile.subdirs
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/test/crypto-tests/tests/modes/aes/cbc_pad/aes_cbc_pad.c	Thu Nov 14 23:30:04 2019 +0000
@@ -0,0 +1,77 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Joyent, Inc.
+ */
+
+/*
+ * The illumos KCF does not currently support CKM_AES_CBC_PAD (it
+ * requires the consumer to explicitly add/remove padding), so there is
+ * no SUN_CKM_xxx symbol.
+ */
+#define	CBC_PAD	"CKM_AES_CBC_PAD"
+
+#include <aes/aes_impl.h>
+#include <stdio.h>
+
+#include "cryptotest.h"
+#include "aes_cbc_pad.h"
+
+int
+main(void)
+{
+	int errs = 0;
+	int i;
+	uint8_t N[1024];
+	cryptotest_t args;
+
+	args.out = N;
+
+	args.outlen = sizeof (N);
+	args.plen = AES_BLOCK_LEN;
+
+	args.mechname = CBC_PAD;
+	args.updatelen = 1;
+
+
+	for (i = 0; i < sizeof (RES) / sizeof (RES[0]); i++) {
+		args.in = DATA[i];
+		args.key = KEY[i];
+		args.param = IV[i];
+
+		args.inlen = DATALEN[i];
+		args.keylen = KEYLEN[i];
+
+		errs += run_test(&args, RES[i], RESLEN[i], ENCR_FG);
+		(void) fprintf(stderr, "----------\n");
+	}
+
+	(void) fprintf(stderr, "\t\t\t=== decrypt ===\n----------\n\n");
+
+	for (i = 0; i < sizeof (RES) / sizeof (RES[0]); i++) {
+		args.in = RES[i];
+		args.key = KEY[i];
+		args.param = IV[i];
+
+		args.inlen = RESLEN[i];
+		args.keylen = KEYLEN[i];
+
+		errs += run_test(&args, DATA[i], DATALEN[i], DECR_FG);
+		(void) fprintf(stderr, "----------\n");
+	}
+
+	if (errs != 0)
+		(void) fprintf(stderr, "%d tests failed\n", errs);
+
+	return (errs);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/test/crypto-tests/tests/modes/aes/cbc_pad/aes_cbc_pad.h	Thu Nov 14 23:30:04 2019 +0000
@@ -0,0 +1,260 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef _AES_CBC_PAD_H
+#define	_AES_CBC_PAD_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * A search for test vectors that included PKCS7 padding has proven
+ * unsuccessful.  Instead, a few of the test aes_cbc test vectors
+ * were used as a starting point, adding extra data to the xx_DATA[]
+ * arrays to test padding.
+ *
+ * To compute the xx_RES[] (encrypted) values, openssl on an machine
+ * running macOS Mojave was used.
+ */
+
+static uint8_t CBC_PAD1_KEY[16] = {
+	0x06, 0xa9, 0x21, 0x40, 0x36, 0xb8, 0xa1, 0x5b,
+	0x51, 0x2e, 0x03, 0xd5, 0x34, 0x12, 0x00, 0x06,
+};
+static uint8_t CBC_PAD1_IV[16] = {
+	0x3d, 0xaf, 0xba, 0x42, 0x9d, 0x9e, 0xb4, 0x30,
+	0xb4, 0x22, 0xda, 0x80, 0x2c, 0x9f, 0xac, 0x41,
+};
+static uint8_t CBC_PAD1_DATA[] = {
+	'S', 'i', 'n', 'g', 'l', 'e', ' ', 'b',
+	'l', 'o', 'c', 'k', ' ', 'm', 's', 'g',
+};
+
+static uint8_t CBC_PAD1_RES[] = {
+	0xe3, 0x53, 0x77, 0x9c, 0x10, 0x79, 0xae, 0xb8,
+	0x27, 0x08, 0x94, 0x2d, 0xbe, 0x77, 0x18, 0x1a,
+
+	0xb9, 0x7c, 0x82, 0x5e, 0x1c, 0x78, 0x51, 0x46,
+	0x54, 0x2d, 0x39, 0x69, 0x41, 0xbc, 0xe5, 0x5d
+};
+
+
+static uint8_t CBC_PAD2_KEY[] = {
+	0xc2, 0x86, 0x69, 0x6d, 0x88, 0x7c, 0x9a, 0xa0,
+	0x61, 0x1b, 0xbb, 0x3e, 0x20, 0x25, 0xa4, 0x5a,
+};
+static uint8_t CBC_PAD2_IV[] = {
+	0x56, 0x2e, 0x17, 0x99, 0x6d, 0x09, 0x3d, 0x28,
+	0xdd, 0xb3, 0xba, 0x69, 0x5a, 0x2e, 0x6f, 0x58,
+};
+static uint8_t CBC_PAD2_DATA[] = {
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+};
+
+static uint8_t CBC_PAD2_RES[] = {
+	0xd2, 0x96, 0xcd, 0x94, 0xc2, 0xcc, 0xcf, 0x8a,
+	0x3a, 0x86, 0x30, 0x28, 0xb5, 0xe1, 0xdc, 0x0a,
+
+	0x75, 0x86, 0x60, 0x2d, 0x25, 0x3c, 0xff, 0xf9,
+	0x1b, 0x82, 0x66, 0xbe, 0xa6, 0xd6, 0x1a, 0xb1,
+
+	0xbc, 0xfd, 0x81, 0x02, 0x22, 0x02, 0x36, 0x6b,
+	0xde, 0x6d, 0xd2, 0x60, 0xa1, 0x58, 0x41, 0xa1
+};
+
+
+static uint8_t CBC_PAD3_KEY[] = {
+	0xc2, 0x86, 0x69, 0x6d, 0x88, 0x7c, 0x9a, 0xa0,
+	0x61, 0x1b, 0xbb, 0x3e, 0x20, 0x25, 0xa4, 0x5a,
+};
+static uint8_t CBC_PAD3_IV[] = {
+	0x56, 0x2e, 0x17, 0x99, 0x6d, 0x09, 0x3d, 0x28,
+	0xdd, 0xb3, 0xba, 0x69, 0x5a, 0x2e, 0x6f, 0x58,
+};
+
+static uint8_t CBC_PAD3_DATA[] = {
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+
+	0x00,
+};
+
+static uint8_t CBC_PAD3_RES[] = {
+	0xd2, 0x96, 0xcd, 0x94, 0xc2, 0xcc, 0xcf, 0x8a,
+	0x3a, 0x86, 0x30, 0x28, 0xb5, 0xe1, 0xdc, 0x0a,
+
+	0x75, 0x86, 0x60, 0x2d, 0x25, 0x3c, 0xff, 0xf9,
+	0x1b, 0x82, 0x66, 0xbe, 0xa6, 0xd6, 0x1a, 0xb1,
+
+	0xde, 0xf6, 0x23, 0xa9, 0xc6, 0xf5, 0xc6, 0xb9,
+	0x56, 0x14, 0x49, 0x60, 0xb2, 0x3d, 0x2f, 0x7f
+};
+
+
+static uint8_t CBC_PAD4_KEY[] = {
+	0xc2, 0x86, 0x69, 0x6d, 0x88, 0x7c, 0x9a, 0xa0,
+	0x61, 0x1b, 0xbb, 0x3e, 0x20, 0x25, 0xa4, 0x5a,
+};
+
+static uint8_t CBC_PAD4_IV[] = {
+	0x56, 0x2e, 0x17, 0x99, 0x6d, 0x09, 0x3d, 0x28,
+	0xdd, 0xb3, 0xba, 0x69, 0x5a, 0x2e, 0x6f, 0x58,
+};
+
+static uint8_t CBC_PAD4_DATA[] = {
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+
+	0x00, 0x01,
+};
+
+static uint8_t CBC_PAD4_RES[] = {
+	0xd2, 0x96, 0xcd, 0x94, 0xc2, 0xcc, 0xcf, 0x8a,
+	0x3a, 0x86, 0x30, 0x28, 0xb5, 0xe1, 0xdc, 0x0a,
+
+	0x75, 0x86, 0x60, 0x2d, 0x25, 0x3c, 0xff, 0xf9,
+	0x1b, 0x82, 0x66, 0xbe, 0xa6, 0xd6, 0x1a, 0xb1,
+
+	0x30, 0xce, 0x1d, 0xd5, 0xd1, 0xb3, 0x0e, 0xde,
+	0x59, 0x9c, 0x3b, 0x31, 0x1b, 0x62, 0xf0, 0x23
+};
+
+
+static uint8_t CBC_PAD5_KEY[] = {
+	0xc2, 0x86, 0x69, 0x6d, 0x88, 0x7c, 0x9a, 0xa0,
+	0x61, 0x1b, 0xbb, 0x3e, 0x20, 0x25, 0xa4, 0x5a,
+};
+
+static uint8_t CBC_PAD5_IV[] = {
+	0x56, 0x2e, 0x17, 0x99, 0x6d, 0x09, 0x3d, 0x28,
+	0xdd, 0xb3, 0xba, 0x69, 0x5a, 0x2e, 0x6f, 0x58,
+};
+
+static uint8_t CBC_PAD5_DATA[] = {
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
+};
+
+static uint8_t CBC_PAD5_RES[] = {
+	0xd2, 0x96, 0xcd, 0x94, 0xc2, 0xcc, 0xcf, 0x8a,
+	0x3a, 0x86, 0x30, 0x28, 0xb5, 0xe1, 0xdc, 0x0a,
+
+	0x75, 0x86, 0x60, 0x2d, 0x25, 0x3c, 0xff, 0xf9,
+	0x1b, 0x82, 0x66, 0xbe, 0xa6, 0xd6, 0x1a, 0xb1,
+
+	0xd3, 0x1c, 0x5a, 0x9d, 0xc4, 0x37, 0xa7, 0x7a,
+	0x74, 0xca, 0xb3, 0x69, 0x2b, 0x7b, 0x1f, 0xad
+};
+
+
+static uint8_t CBC_PAD6_KEY[] = {
+	0xc2, 0x86, 0x69, 0x6d, 0x88, 0x7c, 0x9a, 0xa0,
+	0x61, 0x1b, 0xbb, 0x3e, 0x20, 0x25, 0xa4, 0x5a,
+};
+
+static uint8_t CBC_PAD6_IV[] = {
+	0x56, 0x2e, 0x17, 0x99, 0x6d, 0x09, 0x3d, 0x28,
+	0xdd, 0xb3, 0xba, 0x69, 0x5a, 0x2e, 0x6f, 0x58,
+};
+
+static uint8_t CBC_PAD6_DATA[] = {
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+};
+
+static uint8_t CBC_PAD6_RES[] = {
+	0xd2, 0x96, 0xcd, 0x94, 0xc2, 0xcc, 0xcf, 0x8a,
+	0x3a, 0x86, 0x30, 0x28, 0xb5, 0xe1, 0xdc, 0x0a,
+
+	0x75, 0x86, 0x60, 0x2d, 0x25, 0x3c, 0xff, 0xf9,
+	0x1b, 0x82, 0x66, 0xbe, 0xa6, 0xd6, 0x1a, 0xb1,
+
+	0x79, 0x33, 0x83, 0xff, 0x4a, 0x64, 0x9d, 0xe3,
+	0x4d, 0x6f, 0x19, 0x94, 0x28, 0x7d, 0x65, 0x67
+};
+
+uint8_t *DATA[] = {
+	CBC_PAD1_DATA, CBC_PAD2_DATA, CBC_PAD3_DATA,
+	CBC_PAD4_DATA, CBC_PAD5_DATA, CBC_PAD6_DATA,
+};
+
+size_t DATALEN[] = {
+	sizeof (CBC_PAD1_DATA), sizeof (CBC_PAD2_DATA),
+	sizeof (CBC_PAD3_DATA), sizeof (CBC_PAD4_DATA),
+	sizeof (CBC_PAD5_DATA), sizeof (CBC_PAD6_DATA),
+};
+
+uint8_t *KEY[] = {
+	CBC_PAD1_KEY, CBC_PAD2_KEY, CBC_PAD3_KEY,
+	CBC_PAD4_KEY, CBC_PAD5_KEY, CBC_PAD6_KEY,
+};
+
+size_t KEYLEN[] = {
+	sizeof (CBC_PAD1_KEY), sizeof (CBC_PAD2_KEY),
+	sizeof (CBC_PAD3_KEY), sizeof (CBC_PAD4_KEY),
+	sizeof (CBC_PAD5_KEY), sizeof (CBC_PAD6_KEY),
+};
+
+uint8_t *IV[] = {
+	CBC_PAD1_IV, CBC_PAD2_IV, CBC_PAD3_IV,
+	CBC_PAD4_IV, CBC_PAD5_IV, CBC_PAD6_IV,
+};
+
+size_t IVLEN[] = {
+	sizeof (CBC_PAD1_IV), sizeof (CBC_PAD2_IV),
+	sizeof (CBC_PAD3_IV), sizeof (CBC_PAD4_IV),
+	sizeof (CBC_PAD5_IV), sizeof (CBC_PAD6_IV),
+};
+
+uint8_t *RES[] = {
+	CBC_PAD1_RES, CBC_PAD2_RES, CBC_PAD3_RES,
+	CBC_PAD4_RES, CBC_PAD5_RES, CBC_PAD6_RES,
+};
+
+size_t RESLEN[] = {
+	sizeof (CBC_PAD1_RES), sizeof (CBC_PAD2_RES),
+	sizeof (CBC_PAD3_RES), sizeof (CBC_PAD4_RES),
+	sizeof (CBC_PAD5_RES), sizeof (CBC_PAD6_RES),
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _AES_CBC_PAD_H */
--- a/usr/src/test/zfs-tests/include/libtest.shlib	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/include/libtest.shlib	Thu Nov 14 23:30:04 2019 +0000
@@ -2880,3 +2880,22 @@
 
 	return 1
 }
+
+#
+# Compute SHA256 digest for given file or stdin if no file given.
+# Note: file path must not contain spaces
+#
+function sha256digest
+{
+        typeset file=$1
+
+	if [ -x /usr/bin/digest ]; then
+		/usr/bin/digest -a sha256 $file
+	elif [ -x /usr/bin/sha256sum ]; then
+		/usr/bin/sha256sum -b $file | awk '{ print $1 }'
+	else
+		echo "Cannot calculate SHA256 digest"
+		return 1
+	fi
+	return 0
+}
--- a/usr/src/test/zfs-tests/runfiles/delphix.run	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/runfiles/delphix.run	Thu Nov 14 23:30:04 2019 +0000
@@ -641,7 +641,7 @@
 tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos',
     'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg',
     'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg',
-    'slog_013_pos', 'slog_014_pos']
+    'slog_013_pos', 'slog_014_pos', 'slog_replay_fs_001', 'slog_replay_fs_002']
 
 [/opt/zfs-tests/tests/functional/snapshot]
 tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos',
--- a/usr/src/test/zfs-tests/runfiles/omnios.run	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/runfiles/omnios.run	Thu Nov 14 23:30:04 2019 +0000
@@ -640,7 +640,7 @@
 tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos',
     'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg',
     'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg',
-    'slog_013_pos', 'slog_014_pos']
+    'slog_013_pos', 'slog_014_pos', 'slog_replay_fs_001', 'slog_replay_fs_002']
 
 [/opt/zfs-tests/tests/functional/snapshot]
 tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos',
--- a/usr/src/test/zfs-tests/runfiles/openindiana.run	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/runfiles/openindiana.run	Thu Nov 14 23:30:04 2019 +0000
@@ -640,7 +640,7 @@
 tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos',
     'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg',
     'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg',
-    'slog_013_pos', 'slog_014_pos']
+    'slog_013_pos', 'slog_014_pos', 'slog_replay_fs_001', 'slog_replay_fs_002']
 
 [/opt/zfs-tests/tests/functional/snapshot]
 tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos',
--- a/usr/src/test/zfs-tests/runfiles/smartos.run	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/runfiles/smartos.run	Thu Nov 14 23:30:04 2019 +0000
@@ -550,7 +550,7 @@
 tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos',
     'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg',
     'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg',
-    'slog_013_pos', 'slog_014_pos']
+    'slog_013_pos', 'slog_014_pos', 'slog_replay_fs_001', 'slog_replay_fs_002']
 
 [/opt/zfs-tests/tests/functional/snapshot]
 tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos',
--- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -28,13 +28,15 @@
 # STRATEGY:
 # 1. Create an encrypted dataset
 # 2. Create an encrypted child dataset
-# 3. Attempt to change the key without any flags
-# 4. Attempt to change the key specifying keylocation
-# 5. Attempt to change the key specifying keyformat
-# 6. Verify the new encryption root can unload and load its key
-# 7. Recreate the child dataset
-# 8. Attempt to change the key specifying both the keylocation and keyformat
-# 9. Verify the new encryption root can unload and load its key
+# 3. Create an unencrypted child dataset
+# 4. Attempt to change the key without any flags
+# 5. Attempt to change the key specifying keylocation
+# 6. Attempt to change the key specifying keyformat
+# 7. Verify the new encryption root can unload and load its key
+# 8. Recreate the child dataset
+# 9. Attempt to change the key specifying both the keylocation and keyformat
+# 10. Verify the new encryption root can unload and load its key
+# 11. Verify the unencrytped child is still accessible normally
 #
 
 verify_runnable "both"
@@ -53,6 +55,7 @@
 log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \
 	"-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1"
 log_must zfs create $TESTPOOL/$TESTFS1/child
+log_must zfs create -o encryption=off $TESTPOOL/$TESTFS1/child2
 
 log_mustnot eval "echo $PASSPHRASE2 | zfs change-key" \
 	"$TESTPOOL/$TESTFS1/child"
@@ -82,5 +85,7 @@
 
 log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1/child"
 log_must key_available $TESTPOOL/$TESTFS1/child
+log_must zfs unmount $TESTPOOL/$TESTFS1/child2
+log_must zfs mount $TESTPOOL/$TESTFS1/child2
 
 log_pass "'zfs change-key' promotes an encrypted child to an encryption root"
--- a/usr/src/test/zfs-tests/tests/functional/slog/setup.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/setup.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -38,13 +38,4 @@
 	log_unsupported "This system doesn't support separate intent logs"
 fi
 
-if [[ -d $VDEV ]]; then
-	log_must rm -rf $VDIR
-fi
-if [[ -d $VDEV2 ]]; then
-	log_must rm -rf $VDIR2
-fi
-log_must mkdir -p $VDIR $VDIR2
-log_must mkfile $MINVDEVSIZE $VDEV $SDEV $LDEV $VDEV2 $SDEV2 $LDEV2
-
 log_pass
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog.kshlib	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog.kshlib	Thu Nov 14 23:30:04 2019 +0000
@@ -31,10 +31,20 @@
 . $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/slog/slog.cfg
 
+function setup
+{
+	log_must rm -rf $VDIR $VDIR2
+	log_must mkdir -p $VDIR $VDIR2
+	log_must truncate -s $MINVDEVSIZE $VDEV $SDEV $LDEV $VDEV2 $SDEV2 $LDEV2
+
+	return 0
+}
+
 function cleanup
 {
 	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 	poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2
+	rm -rf $TESTDIR $VDIR $VDIR2
 }
 
 #
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_001_pos.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_001_pos.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -45,6 +45,7 @@
 
 log_assert "Creating a pool with a log device succeeds."
 log_onexit cleanup
+log_must setup
 
 for type in "" "mirror" "raidz" "raidz2"
 do
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_002_pos.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_002_pos.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -46,6 +46,7 @@
 
 log_assert "Adding a log device to normal pool works."
 log_onexit cleanup
+log_must setup
 
 for type in "" "mirror" "raidz" "raidz2"
 do
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_003_pos.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_003_pos.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -46,6 +46,7 @@
 
 log_assert "Adding an extra log device works."
 log_onexit cleanup
+log_must setup
 
 for type in "" "mirror" "raidz" "raidz2"
 do
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_004_pos.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_004_pos.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -46,6 +46,7 @@
 
 log_assert "Attaching a log device passes."
 log_onexit cleanup
+log_must setup
 
 for type in "" "mirror" "raidz" "raidz2"
 do
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_005_pos.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_005_pos.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -46,6 +46,7 @@
 
 log_assert "Detaching a log device passes."
 log_onexit cleanup
+log_must setup
 
 for type in "" "mirror" "raidz" "raidz2"
 do
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_006_pos.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_006_pos.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -46,6 +46,7 @@
 
 log_assert "Replacing a log device passes."
 log_onexit cleanup
+log_must setup
 
 for type in "" "mirror" "raidz" "raidz2"
 do
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_007_pos.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_007_pos.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -48,6 +48,7 @@
 
 log_assert "Exporting and importing pool with log devices passes."
 log_onexit cleanup
+log_must setup
 
 for type in "" "mirror" "raidz" "raidz2"
 do
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_008_neg.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_008_neg.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -44,6 +44,7 @@
 
 log_assert "A raidz/raidz2 log is not supported."
 log_onexit cleanup
+log_must setup
 
 for type in "" "mirror" "raidz" "raidz2"
 do
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_009_neg.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_009_neg.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -45,6 +45,7 @@
 
 log_assert "A raidz/raidz2 log can not be added to existed pool."
 log_onexit cleanup
+log_must setup
 
 for type in "" "mirror" "raidz" "raidz2"
 do
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_010_neg.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_010_neg.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -46,6 +46,7 @@
 
 log_assert "Slog device can not be replaced with spare device."
 log_onexit cleanup
+log_must setup
 
 log_must zpool create $TESTPOOL $VDEV spare $SDEV log $LDEV
 sdev=$(random_get $SDEV)
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_011_neg.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_011_neg.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -46,6 +46,7 @@
 
 log_assert "Offline and online a log device passes."
 log_onexit cleanup
+log_must setup
 
 for type in "" "mirror" "raidz" "raidz2"
 do
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_012_neg.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_012_neg.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -45,6 +45,7 @@
 
 log_assert "Pool can survive when one of mirror log device get corrupted."
 log_onexit cleanup
+log_must setup
 
 for type in "" "mirror" "raidz" "raidz2"
 do
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_013_pos.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_013_pos.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -54,6 +54,7 @@
 	"that presents a block interface."
 verify_disk_count "$DISKS" 2
 log_onexit cleanup_testenv
+log_must setup
 
 dsk1=${DISKS%% *}
 log_must zpool create $TESTPOOL ${DISKS#$dsk1}
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_014_pos.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_014_pos.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -45,6 +45,7 @@
 verify_runnable "global"
 
 log_assert "log device can survive when one of the pool device get corrupted."
+log_must setup
 
 for type in "mirror" "raidz" "raidz2"; do
 	for spare in "" "spare"; do
--- a/usr/src/test/zfs-tests/tests/functional/slog/slog_015_neg.ksh	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_015_neg.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -47,6 +47,7 @@
 
 ORIG_TIMEOUT=$(mdb -ke "zfs_commit_timeout_pct/J" | tail -1 | awk '{print $NF}')
 log_onexit cleanup
+log_must setup
 
 for PCT in 0 1 2 4 8 16 32 64 128 256 512 1024; do
 	log_must mdb -kwe "zfs_commit_timeout_pct/Z $PCT"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -0,0 +1,215 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/tests/functional/slog/slog.kshlib
+
+#
+# DESCRIPTION:
+#	Verify slogs are replayed correctly.  This test is a direct
+#	adaptation of the ziltest.sh script for the ZFS Test Suite.
+#
+#	The general idea is to build up an intent log from a bunch of
+#	diverse user commands without actually committing them to the
+#	file system.  Then copy the file system, replay the intent
+#	log and compare the file system and the copy.
+#
+#	To enable this automated testing of the intent log some minimal
+#	support is required of the file system.  In particular, a
+#	"freeze" command is required to flush the in-flight transactions;
+#	to stop the actual committing of transactions; and to ensure no
+#	deltas are discarded. All deltas past a freeze point are kept
+#	for replay and comparison later. Here is the flow:
+#
+# STRATEGY:
+#	1. Create an empty file system (TESTFS)
+#	2. Freeze TESTFS
+#	3. Run various user commands that create files, directories and ACLs
+#	4. Copy TESTFS to temporary location (TESTDIR/copy)
+#	5. Unmount filesystem
+#	   <at this stage TESTFS is empty again and unfrozen, and the
+#	   intent log contains a complete set of deltas to replay it>
+#	6. Remount TESTFS <which replays the intent log>
+#	7. Compare TESTFS against the TESTDIR/copy
+#
+
+verify_runnable "global"
+
+# As long as we are not running slog_015_neg, the test pool could be hanging
+# around.
+poolexists $TESTPOOL && zpool destroy -f $TESTPOOL
+
+log_assert "Replay of intent log succeeds."
+log_onexit cleanup
+log_must setup
+
+#
+# 1. Create an empty file system (TESTFS)
+#
+log_must zpool create $TESTPOOL $VDEV log mirror $LDEV
+log_must zfs set compression=on $TESTPOOL
+log_must zfs create $TESTPOOL/$TESTFS
+
+#
+# This dd command works around an issue where ZIL records aren't created
+# after freezing the pool unless a ZIL header already exists. Create a file
+# synchronously to force ZFS to write one out.
+#
+log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \
+    oflag=dsync,sync bs=1 count=1
+
+#
+# 2. Freeze TESTFS
+#
+log_must zpool freeze $TESTPOOL
+
+#
+# 3. Run various user commands that create files, directories and ACLs
+#
+
+# TX_CREATE
+log_must touch /$TESTPOOL/$TESTFS/a
+
+# TX_RENAME
+log_must mv /$TESTPOOL/$TESTFS/a /$TESTPOOL/$TESTFS/b
+
+# TX_SYMLINK
+log_must touch /$TESTPOOL/$TESTFS/c
+log_must ln -s /$TESTPOOL/$TESTFS/c /$TESTPOOL/$TESTFS/d
+
+# TX_LINK
+log_must touch /$TESTPOOL/$TESTFS/e
+log_must ln /$TESTPOOL/$TESTFS/e /$TESTPOOL/$TESTFS/f
+
+# TX_MKDIR
+log_must mkdir /$TESTPOOL/$TESTFS/dir_to_delete
+
+# TX_RMDIR
+log_must rmdir /$TESTPOOL/$TESTFS/dir_to_delete
+
+# Create a simple validation payload
+log_must mkdir -p $TESTDIR
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/payload bs=1k count=8
+typeset checksum=$(sha256digest /$TESTPOOL/$TESTFS/payload)
+
+# TX_WRITE (small file with ordering)
+log_must mkfile 1k /$TESTPOOL/$TESTFS/small_file
+log_must mkfile 512b /$TESTPOOL/$TESTFS/small_file
+
+# TX_CREATE, TX_MKDIR, TX_REMOVE, TX_RMDIR
+log_must cp -R /usr/dict /$TESTPOOL/$TESTFS
+log_must rm -rf /$TESTPOOL/$TESTFS/dict
+
+# TX_SETATTR
+log_must touch /$TESTPOOL/$TESTFS/setattr
+log_must chmod 567 /$TESTPOOL/$TESTFS/setattr
+log_must chgrp root /$TESTPOOL/$TESTFS/setattr
+log_must touch -cm -t 201311271200 /$TESTPOOL/$TESTFS/setattr
+
+# TX_TRUNCATE (to zero)
+log_must mkfile 4k /$TESTPOOL/$TESTFS/truncated_file
+log_must truncate -s 0 /$TESTPOOL/$TESTFS/truncated_file
+
+# TX_WRITE (large file)
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/large \
+    bs=128k count=64 oflag=sync
+
+# Write zeros, which compress to holes, in the middle of a file
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.1 bs=128k count=8
+log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.1 bs=128k count=2
+
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.2 bs=128k count=8
+log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.2 bs=128k count=2 seek=2
+
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.3 bs=128k count=8
+log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.3 bs=128k count=2 \
+   seek=2 conv=notrunc
+
+# TX_MKXATTR
+# log_must mkdir /$TESTPOOL/$TESTFS/xattr.dir
+# log_must attr -qs fileattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.dir
+# log_must attr -qs tmpattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.dir
+# log_must attr -qr tmpattr /$TESTPOOL/$TESTFS/xattr.dir
+
+# log_must touch /$TESTPOOL/$TESTFS/xattr.file
+# log_must attr -qs fileattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file
+# log_must attr -qs tmpattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file
+# log_must attr -qr tmpattr /$TESTPOOL/$TESTFS/xattr.file
+
+# TX_WRITE, TX_LINK, TX_REMOVE
+# Make sure TX_REMOVE won't affect TX_WRITE if file is not destroyed
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/link_and_unlink bs=128k \
+   count=8
+log_must ln /$TESTPOOL/$TESTFS/link_and_unlink \
+   /$TESTPOOL/$TESTFS/link_and_unlink.link
+log_must rm /$TESTPOOL/$TESTFS/link_and_unlink.link
+
+#
+# 4. Copy TESTFS to temporary location (TESTDIR/copy)
+#
+log_must mkdir -p $TESTDIR/copy
+log_must cp -a /$TESTPOOL/$TESTFS/* $TESTDIR/copy/
+
+#
+# 5. Unmount filesystem and export the pool
+#
+# At this stage TESTFS is empty again and frozen, the intent log contains
+# a complete set of deltas to replay.
+#
+log_must zfs unmount /$TESTPOOL/$TESTFS
+
+log_note "Verify transactions to replay:"
+log_must zdb -iv $TESTPOOL/$TESTFS
+
+log_must zpool export $TESTPOOL
+
+#
+# 6. Remount TESTFS <which replays the intent log>
+#
+# Import the pool to unfreeze it and claim log blocks.  It has to be
+# `zpool import -f` because we can't write a frozen pool's labels!
+#
+log_must zpool import -f -d $VDIR $TESTPOOL
+
+#
+# 7. Compare TESTFS against the TESTDIR/copy
+#
+log_note "Verify current block usage:"
+log_must zdb -bcv $TESTPOOL
+
+# log_note "Verify copy of xattrs:"
+# log_must attr -l /$TESTPOOL/$TESTFS/xattr.dir
+# log_must attr -l /$TESTPOOL/$TESTFS/xattr.file
+
+log_note "Verify working set diff:"
+log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR/copy
+
+log_note "Verify file checksum:"
+typeset checksum1=$(sha256digest /$TESTPOOL/$TESTFS/payload)
+[[ "$checksum1" == "$checksum" ]] || \
+    log_fail "checksum mismatch ($checksum1 != $checksum)"
+
+log_pass "Replay of intent log succeeds."
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh	Thu Nov 14 23:30:04 2019 +0000
@@ -0,0 +1,137 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/tests/functional/slog/slog.kshlib
+
+#
+# DESCRIPTION:
+#	Verify slog replay correctly when TX_REMOVEs are followed by
+#	TX_CREATEs.
+#
+# STRATEGY:
+#	1. Create a file system (TESTFS) with a lot of files
+#	2. Freeze TESTFS
+#	3. Remove all files then create a lot of files
+#	4. Copy TESTFS to temporary location (TESTDIR/copy)
+#	5. Unmount filesystem
+#	   <at this stage TESTFS is empty again and unfrozen, and the
+#	   intent log contains a complete set of deltas to replay it>
+#	6. Remount TESTFS <which replays the intent log>
+#	7. Compare TESTFS against the TESTDIR/copy
+#
+
+verify_runnable "global"
+
+function cleanup_fs
+{
+	cleanup
+}
+
+log_assert "Replay of intent log succeeds."
+log_onexit cleanup_fs
+log_must setup
+
+#
+# 1. Create a file system (TESTFS) with a lot of files
+#
+log_must zpool create $TESTPOOL $VDEV log mirror $LDEV
+log_must zfs set compression=on $TESTPOOL
+log_must zfs create $TESTPOOL/$TESTFS
+
+# Prep for the test of TX_REMOVE followed by TX_CREATE
+dnsize=(legacy auto 1k 2k 4k 8k 16k)
+NFILES=200
+log_must mkdir /$TESTPOOL/$TESTFS/dir0
+log_must eval 'for i in $(seq $NFILES); do zfs set dnodesize=${dnsize[$RANDOM % ${#dnsize[@]}]} $TESTPOOL/$TESTFS; touch /$TESTPOOL/$TESTFS/dir0/file.$i; done'
+
+#
+# Reimport to reset dnode allocation pointer.
+# This is to make sure we will have TX_REMOVE and TX_CREATE on same id
+#
+log_must zpool export $TESTPOOL
+log_must zpool import -f -d $VDIR $TESTPOOL
+
+#
+# This dd command works around an issue where ZIL records aren't created
+# after freezing the pool unless a ZIL header already exists. Create a file
+# synchronously to force ZFS to write one out.
+#
+log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \
+    oflag=dsync,sync bs=1 count=1
+
+#
+# 2. Freeze TESTFS
+#
+log_must zpool freeze $TESTPOOL
+
+#
+# 3. Remove all files then create a lot of files
+#
+# TX_REMOVE followed by TX_CREATE
+log_must eval 'rm -f /$TESTPOOL/$TESTFS/dir0/*'
+log_must eval 'for i in $(seq $NFILES); do zfs set dnodesize=${dnsize[$RANDOM % ${#dnsize[@]}]} $TESTPOOL/$TESTFS; touch /$TESTPOOL/$TESTFS/dir0/file.$i; done'
+
+#
+# 4. Copy TESTFS to temporary location (TESTDIR/copy)
+#
+log_must mkdir -p $TESTDIR/copy
+log_must cp -a /$TESTPOOL/$TESTFS/* $TESTDIR/copy/
+
+#
+# 5. Unmount filesystem and export the pool
+#
+# At this stage TESTFS is empty again and frozen, the intent log contains
+# a complete set of deltas to replay.
+#
+log_must zfs unmount /$TESTPOOL/$TESTFS
+
+log_note "Verify transactions to replay:"
+log_must zdb -iv $TESTPOOL/$TESTFS
+
+log_must zpool export $TESTPOOL
+
+#
+# 6. Remount TESTFS <which replays the intent log>
+#
+# Import the pool to unfreeze it and claim log blocks.  It has to be
+# `zpool import -f` because we can't write a frozen pool's labels!
+#
+log_must zpool import -f -d $VDIR $TESTPOOL
+
+#
+# 7. Compare TESTFS against the TESTDIR/copy
+#
+log_note "Verify current block usage:"
+log_must zdb -bcv $TESTPOOL
+
+log_note "Verify number of files"
+log_must test "$(ls /$TESTPOOL/$TESTFS/dir0 | wc -l)" -eq $NFILES
+
+log_note "Verify working set diff:"
+log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR/copy
+
+log_pass "Replay of intent log succeeds."
--- a/usr/src/uts/common/fs/smbsrv/smb2_dispatch.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/smbsrv/smb2_dispatch.c	Thu Nov 14 23:30:04 2019 +0000
@@ -10,7 +10,7 @@
  */
 
 /*
- * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
  * Copyright 2019 RackTop Systems.
  */
 
@@ -823,18 +823,21 @@
 	 */
 	if ((sdd->sdt_flags & SDDF_SUPPRESS_UID) == 0 &&
 	    !sr->encrypted && sr->uid_user != NULL &&
-	    (sr->uid_user->u_sign_flags & SMB_SIGNING_CHECK) != 0) {
+	    (sr->uid_user->u_sign_flags & SMB_SIGNING_ENABLED) != 0) {
 		/*
-		 * This request type should be signed, and
-		 * we're configured to require signatures.
+		 * If the request is signed, check the signature.
+		 * Otherwise, if signing is required, deny access.
 		 */
-		if ((sr->smb2_hdr_flags & SMB2_FLAGS_SIGNED) == 0) {
-			smb2sr_put_error(sr, NT_STATUS_ACCESS_DENIED);
-			goto cmd_done;
-		}
-		rc = smb2_sign_check_request(sr);
-		if (rc != 0) {
-			DTRACE_PROBE1(smb2__sign__check, smb_request_t *, sr);
+		if ((sr->smb2_hdr_flags & SMB2_FLAGS_SIGNED) != 0) {
+			rc = smb2_sign_check_request(sr);
+			if (rc != 0) {
+				DTRACE_PROBE1(smb2__sign__check,
+				    smb_request_t *, sr);
+				smb2sr_put_error(sr, NT_STATUS_ACCESS_DENIED);
+				goto cmd_done;
+			}
+		} else if (
+		    (sr->uid_user->u_sign_flags & SMB_SIGNING_CHECK) != 0) {
 			smb2sr_put_error(sr, NT_STATUS_ACCESS_DENIED);
 			goto cmd_done;
 		}
--- a/usr/src/uts/common/fs/smbsrv/smb2_negotiate.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/smbsrv/smb2_negotiate.c	Thu Nov 14 23:30:04 2019 +0000
@@ -10,7 +10,7 @@
  */
 
 /*
- * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
  * Copyright 2019 RackTop Systems.
  */
 
@@ -472,8 +472,7 @@
 	/*
 	 * The spec. says to parse the VALIDATE_NEGOTIATE_INFO here
 	 * and verify that the original negotiate was not modified.
-	 * The only tampering we need worry about is secmode, and
-	 * we're not taking that from the client, so don't bother.
+	 * The request MUST be signed, and we MUST validate the signature.
 	 *
 	 * One interesting requirement here is that we MUST reply
 	 * with exactly the same information as we returned in our
@@ -486,6 +485,9 @@
 	uint16_t secmode, num_dialects, dialects[8];
 	uint8_t clnt_guid[16];
 
+	if ((sr->smb2_hdr_flags & SMB2_FLAGS_SIGNED) == 0)
+		goto drop;
+
 	if (fsctl->InputCount < 24)
 		goto drop;
 
--- a/usr/src/uts/common/fs/smbsrv/smb_authenticate.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/smbsrv/smb_authenticate.c	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
  */
 
 /*
@@ -544,6 +544,12 @@
 	if (smb_token_query_privilege(token, SE_CHANGE_NOTIFY_LUID))
 		privileges |= SMB_USER_PRIV_CHANGE_NOTIFY;
 
+	if (smb_token_query_privilege(token, SE_READ_FILE_LUID))
+		privileges |= SMB_USER_PRIV_READ_FILE;
+
+	if (smb_token_query_privilege(token, SE_WRITE_FILE_LUID))
+		privileges |= SMB_USER_PRIV_WRITE_FILE;
+
 	return (privileges);
 }
 
--- a/usr/src/uts/common/fs/smbsrv/smb_common_open.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/smbsrv/smb_common_open.c	Thu Nov 14 23:30:04 2019 +0000
@@ -543,6 +543,13 @@
 		    (op->create_disposition == FILE_OVERWRITE))
 			op->desired_access |= FILE_WRITE_DATA;
 
+		/* Dataset roots can't be deleted, so don't set DOC */
+		if ((op->create_options & FILE_DELETE_ON_CLOSE) != 0 &&
+		    (fnode->flags & NODE_FLAGS_VFSROOT) != 0) {
+			status = NT_STATUS_CANNOT_DELETE;
+			goto errout;
+		}
+
 		status = smb_fsop_access(sr, sr->user_cr, fnode,
 		    op->desired_access);
 		if (status != NT_STATUS_SUCCESS)
--- a/usr/src/uts/common/fs/smbsrv/smb_cred.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/smbsrv/smb_cred.c	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
  */
 
 /*
@@ -97,35 +97,6 @@
 	ksidlist = smb_cred_set_sidlist(&token->tkn_win_grps);
 	crsetsidlist(cr, ksidlist);
 
-	/*
-	 * In the AD world, "take ownership privilege" is very much
-	 * like having Unix "root" privileges.  It's normally given
-	 * to members of the "Administrators" group, which normally
-	 * includes the the local Administrator (like root) and when
-	 * joined to a domain, "Domain Admins".
-	 */
-	if (smb_token_query_privilege(token, SE_TAKE_OWNERSHIP_LUID)) {
-		(void) crsetpriv(cr,
-		    PRIV_FILE_CHOWN,
-		    PRIV_FILE_DAC_READ,
-		    PRIV_FILE_DAC_SEARCH,
-		    PRIV_FILE_DAC_WRITE,
-		    PRIV_FILE_OWNER,
-		    NULL);
-	}
-
-	/*
-	 * See smb.4 bypass_traverse_checking
-	 *
-	 * For historical reasons, the Windows privilege is named
-	 * SeChangeNotifyPrivilege, though the description is
-	 * "Bypass traverse checking".
-	 */
-	if (smb_token_query_privilege(token, SE_CHANGE_NOTIFY_LUID)) {
-		(void) crsetpriv(cr, PRIV_FILE_DAC_SEARCH, NULL);
-	}
-
-
 	return (cr);
 }
 
--- a/usr/src/uts/common/fs/smbsrv/smb_node.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/smbsrv/smb_node.c	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
  */
 /*
  * SMB Node State Machine
@@ -685,6 +685,11 @@
 		}
 	}
 
+	/* Dataset roots can't be deleted, so don't set DOC */
+	if ((node->flags & NODE_FLAGS_VFSROOT) != 0) {
+		return (NT_STATUS_CANNOT_DELETE);
+	}
+
 	mutex_enter(&node->n_mutex);
 	if (node->flags & NODE_FLAGS_DELETE_ON_CLOSE) {
 		/* It was already marked.  We're done. */
--- a/usr/src/uts/common/fs/smbsrv/smb_user.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/smbsrv/smb_user.c	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
  * Copyright (c) 2016 by Delphix. All rights reserved.
  */
 
@@ -755,6 +755,57 @@
 	ASSERT(cr);
 	crhold(cr);
 
+	/*
+	 * See smb.4 bypass_traverse_checking
+	 *
+	 * For historical reasons, the Windows privilege is named
+	 * SeChangeNotifyPrivilege, though the description is
+	 * "Bypass traverse checking".
+	 */
+	if ((privileges & SMB_USER_PRIV_CHANGE_NOTIFY) != 0) {
+		(void) crsetpriv(cr, PRIV_FILE_DAC_SEARCH, NULL);
+	}
+
+	/*
+	 * Window's "take ownership privilege" is similar to our
+	 * PRIV_FILE_CHOWN privilege. It's normally given to members of the
+	 * "Administrators" group, which normally includes the the local
+	 * Administrator (like root) and when joined to a domain,
+	 * "Domain Admins".
+	 */
+	if ((privileges & SMB_USER_PRIV_TAKE_OWNERSHIP) != 0) {
+		(void) crsetpriv(cr,
+		    PRIV_FILE_CHOWN,
+		    PRIV_FILE_CHOWN_SELF,
+		    NULL);
+	}
+
+	/*
+	 * Bypass ACL for READ accesses.
+	 */
+	if ((privileges & SMB_USER_PRIV_READ_FILE) != 0) {
+		(void) crsetpriv(cr, PRIV_FILE_DAC_READ, NULL);
+	}
+
+	/*
+	 * Bypass ACL for WRITE accesses.
+	 * Include FILE_OWNER, as it covers WRITE_ACL and DELETE.
+	 */
+	if ((privileges & SMB_USER_PRIV_WRITE_FILE) != 0) {
+		(void) crsetpriv(cr,
+		    PRIV_FILE_DAC_WRITE,
+		    PRIV_FILE_OWNER,
+		    NULL);
+	}
+
+	/*
+	 * These privileges are used only when a file is opened with
+	 * 'backup intent'. These allow users to bypass certain access
+	 * controls. Administrators typically have these privileges,
+	 * and they are used during recursive take-ownership operations.
+	 * Some commonly used tools use 'backup intent' to administrate
+	 * files that do not grant explicit permissions to Administrators.
+	 */
 	if (privileges & (SMB_USER_PRIV_BACKUP | SMB_USER_PRIV_RESTORE))
 		privcred = crdup(cr);
 
--- a/usr/src/uts/common/fs/zfs/arc.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/arc.c	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright (c) 2019, Joyent, Inc.
  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
@@ -296,6 +296,7 @@
 #include <zfs_fletcher.h>
 #include <sys/aggsum.h>
 #include <sys/cityhash.h>
+#include <sys/param.h>
 
 #ifndef _KERNEL
 /* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
@@ -1268,6 +1269,20 @@
 static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *);
 static void l2arc_read_done(zio_t *);
 
+/*
+ * The arc_all_memory function is a ZoL enhancement that lives in their OSL
+ * code. In user-space code, which is used primarily for testing, we return
+ * half of all memory.
+ */
+uint64_t
+arc_all_memory(void)
+{
+#ifdef _KERNEL
+	return (ptob(physmem));
+#else
+	return ((sysconf(_SC_PAGESIZE) * sysconf(_SC_PHYS_PAGES)) / 2);
+#endif
+}
 
 /*
  * We use Cityhash for this. It's fast, and has good hash properties without
--- a/usr/src/uts/common/fs/zfs/dnode.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/dnode.c	Thu Nov 14 23:30:04 2019 +0000
@@ -56,7 +56,6 @@
 	{ "dnode_hold_free_lock_retry",		KSTAT_DATA_UINT64 },
 	{ "dnode_hold_free_overflow",		KSTAT_DATA_UINT64 },
 	{ "dnode_hold_free_refcount",		KSTAT_DATA_UINT64 },
-	{ "dnode_hold_free_txg",		KSTAT_DATA_UINT64 },
 	{ "dnode_free_interior_lock_retry",	KSTAT_DATA_UINT64 },
 	{ "dnode_allocate",			KSTAT_DATA_UINT64 },
 	{ "dnode_reallocate",			KSTAT_DATA_UINT64 },
@@ -1260,6 +1259,10 @@
  * as an extra dnode slot by an large dnode, in which case it returns
  * ENOENT.
  *
+ * If the DNODE_DRY_RUN flag is set, we don't actually hold the dnode, just
+ * return whether the hold would succeed or not. tag and dnp should set to
+ * NULL in this case.
+ *
  * errors:
  * EINVAL - invalid object number or flags.
  * ENOSPC - hole too small to fulfill "slots" request (DNODE_MUST_BE_FREE)
@@ -1287,6 +1290,7 @@
 
 	ASSERT(!(flag & DNODE_MUST_BE_ALLOCATED) || (slots == 0));
 	ASSERT(!(flag & DNODE_MUST_BE_FREE) || (slots > 0));
+	IMPLY(flag & DNODE_DRY_RUN, (tag == NULL) && (dnp == NULL));
 
 	/*
 	 * If you are holding the spa config lock as writer, you shouldn't
@@ -1316,8 +1320,11 @@
 		if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)
 			return (SET_ERROR(EEXIST));
 		DNODE_VERIFY(dn);
-		(void) zfs_refcount_add(&dn->dn_holds, tag);
-		*dnp = dn;
+		/* Don't actually hold if dry run, just return 0 */
+		if (!(flag & DNODE_DRY_RUN)) {
+			(void) zfs_refcount_add(&dn->dn_holds, tag);
+			*dnp = dn;
+		}
 		return (0);
 	}
 
@@ -1462,6 +1469,14 @@
 			return (SET_ERROR(ENOENT));
 		}
 
+		/* Don't actually hold if dry run, just return 0 */
+		if (flag & DNODE_DRY_RUN) {
+			mutex_exit(&dn->dn_mtx);
+			dnode_slots_rele(dnc, idx, slots);
+			dbuf_rele(db, FTAG);
+			return (0);
+		}
+
 		DNODE_STAT_BUMP(dnode_hold_alloc_hits);
 	} else if (flag & DNODE_MUST_BE_FREE) {
 
@@ -1521,6 +1536,14 @@
 			return (SET_ERROR(EEXIST));
 		}
 
+		/* Don't actually hold if dry run, just return 0 */
+		if (flag & DNODE_DRY_RUN) {
+			mutex_exit(&dn->dn_mtx);
+			dnode_slots_rele(dnc, idx, slots);
+			dbuf_rele(db, FTAG);
+			return (0);
+		}
+
 		dnode_set_slots(dnc, idx + 1, slots - 1, DN_SLOT_INTERIOR);
 		DNODE_STAT_BUMP(dnode_hold_free_hits);
 	} else {
@@ -1528,15 +1551,7 @@
 		return (SET_ERROR(EINVAL));
 	}
 
-	if (dn->dn_free_txg) {
-		DNODE_STAT_BUMP(dnode_hold_free_txg);
-		type = dn->dn_type;
-		mutex_exit(&dn->dn_mtx);
-		dnode_slots_rele(dnc, idx, slots);
-		dbuf_rele(db, FTAG);
-		return (SET_ERROR((flag & DNODE_MUST_BE_ALLOCATED) ?
-		    ENOENT : EEXIST));
-	}
+	ASSERT0(dn->dn_free_txg);
 
 	if (zfs_refcount_add(&dn->dn_holds, tag) == 1)
 		dbuf_add_ref(db, dnh);
@@ -1627,6 +1642,16 @@
 	}
 }
 
+/*
+ * Test whether we can create a dnode at the specified location.
+ */
+int
+dnode_try_claim(objset_t *os, uint64_t object, int slots)
+{
+	return (dnode_hold_impl(os, object, DNODE_MUST_BE_FREE | DNODE_DRY_RUN,
+	    slots, NULL, NULL));
+}
+
 void
 dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
 {
--- a/usr/src/uts/common/fs/zfs/dsl_crypt.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/dsl_crypt.c	Thu Nov 14 23:30:04 2019 +0000
@@ -1401,6 +1401,7 @@
 spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj,
     uint64_t new_rddobj, dsl_wrapping_key_t *wkey, dmu_tx_t *tx)
 {
+	int ret;
 	zap_cursor_t *zc;
 	zap_attribute_t *za;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
@@ -1419,12 +1420,14 @@
 		return;
 	}
 
+	ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj);
+	VERIFY(ret == 0 || ret == ENOENT);
+
 	/*
 	 * Stop recursing if this dsl dir didn't inherit from the root
 	 * or if this dd is a clone.
 	 */
-	VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj));
-	if (curr_rddobj != rddobj || dsl_dir_is_clone(dd)) {
+	if (ret == ENOENT || curr_rddobj != rddobj || dsl_dir_is_clone(dd)) {
 		dsl_dir_rele(dd, FTAG);
 		return;
 	}
--- a/usr/src/uts/common/fs/zfs/metaslab.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/metaslab.c	Thu Nov 14 23:30:04 2019 +0000
@@ -193,28 +193,20 @@
 int metaslab_load_pct = 50;
 
 /*
- * Determines how many txgs a metaslab may remain loaded without having any
- * allocations from it. As long as a metaslab continues to be used we will
- * keep it loaded.
+ * These tunables control how long a metaslab will remain loaded after the
+ * last allocation from it.  A metaslab can't be unloaded until at least
+ * metaslab_unload_delay TXG's and metaslab_unload_delay_ms milliseconds
+ * have elapsed.  However, zfs_metaslab_mem_limit may cause it to be
+ * unloaded sooner.  These settings are intended to be generous -- to keep
+ * metaslabs loaded for a long time, reducing the rate of metaslab loading.
  */
-int metaslab_unload_delay = TXG_SIZE * 2;
-
-/*
- * Tunables used to reduce metaslab load/unload thrashing when selection
- * algorithm is allocating across metaslabs very evenly. In addition to
- * tracking when the slab was used for allocation (ms_selected_txg), we also
- * track when it was loaded (ms_loaded_txg). If the slab would be unloaded,
- * but the load txg is within the window of
- *    metaslab_unload_delay + metaslab_load_window
- * then we ramp up metaslab_unload_delay instead of unloading the metaslab.
- */
-int metaslab_load_window = 10;
-int metaslab_unload_delay_max = 256;
+int metaslab_unload_delay = 32;
+int metaslab_unload_delay_ms = 10 * 60 * 1000; /* ten minutes */
 
 /*
  * Max number of metaslabs per group to preload.
  */
-int metaslab_preload_limit = SPA_DVAS_PER_BP;
+int metaslab_preload_limit = 10;
 
 /*
  * Enable/disable preloading of metaslab.
@@ -275,6 +267,19 @@
  */
 int max_disabled_ms = 3;
 
+/*
+ * Maximum percentage of memory to use on storing loaded metaslabs. If loading
+ * a metaslab would take it over this percentage, the oldest selected metaslab
+ * is automatically unloaded.
+ */
+int zfs_metaslab_mem_limit = 25;
+
+/*
+ * Time (in seconds) to respect ms_max_size when the metaslab is not loaded.
+ * To avoid 64-bit overflow, don't set above UINT32_MAX.
+ */
+unsigned long zfs_metaslab_max_size_cache_sec = 3600; /* 1 hour */
+
 static uint64_t metaslab_weight(metaslab_t *);
 static void metaslab_set_fragmentation(metaslab_t *);
 static void metaslab_free_impl(vdev_t *, uint64_t, uint64_t, boolean_t);
@@ -282,6 +287,8 @@
 static void metaslab_passivate(metaslab_t *msp, uint64_t weight);
 static uint64_t metaslab_weight_from_range_tree(metaslab_t *msp);
 static void metaslab_flush_update(metaslab_t *, dmu_tx_t *);
+static unsigned int metaslab_idx_func(multilist_t *, void *);
+static void metaslab_evict(metaslab_t *, uint64_t);
 
 kmem_cache_t *metaslab_alloc_trace_cache;
 
@@ -301,6 +308,8 @@
 	mc->mc_rotor = NULL;
 	mc->mc_ops = ops;
 	mutex_init(&mc->mc_lock, NULL, MUTEX_DEFAULT, NULL);
+	mc->mc_metaslab_txg_list = multilist_create(sizeof (metaslab_t),
+	    offsetof(metaslab_t, ms_class_txg_node), metaslab_idx_func);
 	mc->mc_alloc_slots = kmem_zalloc(spa->spa_alloc_count *
 	    sizeof (zfs_refcount_t), KM_SLEEP);
 	mc->mc_alloc_max_slots = kmem_zalloc(spa->spa_alloc_count *
@@ -327,6 +336,7 @@
 	kmem_free(mc->mc_alloc_max_slots, mc->mc_spa->spa_alloc_count *
 	    sizeof (uint64_t));
 	mutex_destroy(&mc->mc_lock);
+	multilist_destroy(mc->mc_metaslab_txg_list);
 	kmem_free(mc, sizeof (metaslab_class_t));
 }
 
@@ -517,6 +527,51 @@
 	return (space);
 }
 
+void
+metaslab_class_evict_old(metaslab_class_t *mc, uint64_t txg)
+{
+	multilist_t *ml = mc->mc_metaslab_txg_list;
+	for (int i = 0; i < multilist_get_num_sublists(ml); i++) {
+		multilist_sublist_t *mls = multilist_sublist_lock(ml, i);
+		metaslab_t *msp = multilist_sublist_head(mls);
+		multilist_sublist_unlock(mls);
+		while (msp != NULL) {
+			mutex_enter(&msp->ms_lock);
+
+			/*
+			 * If the metaslab has been removed from the list
+			 * (which could happen if we were at the memory limit
+			 * and it was evicted during this loop), then we can't
+			 * proceed and we should restart the sublist.
+			 */
+			if (!multilist_link_active(&msp->ms_class_txg_node)) {
+				mutex_exit(&msp->ms_lock);
+				i--;
+				break;
+			}
+			mls = multilist_sublist_lock(ml, i);
+			metaslab_t *next_msp = multilist_sublist_next(mls, msp);
+			multilist_sublist_unlock(mls);
+			if (txg >
+			    msp->ms_selected_txg + metaslab_unload_delay &&
+			    gethrtime() > msp->ms_selected_time +
+			    (uint64_t)MSEC2NSEC(metaslab_unload_delay_ms)) {
+				metaslab_evict(msp, txg);
+			} else {
+				/*
+				 * Once we've hit a metaslab selected too
+				 * recently to evict, we're done evicting for
+				 * now.
+				 */
+				mutex_exit(&msp->ms_lock);
+				break;
+			}
+			mutex_exit(&msp->ms_lock);
+			msp = next_msp;
+		}
+	}
+}
+
 static int
 metaslab_compare(const void *x1, const void *x2)
 {
@@ -960,6 +1015,14 @@
 	mutex_enter(&mg->mg_lock);
 	ASSERT(msp->ms_group == mg);
 	avl_remove(&mg->mg_metaslab_tree, msp);
+
+	metaslab_class_t *mc = msp->ms_group->mg_class;
+	multilist_sublist_t *mls =
+	    multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp);
+	if (multilist_link_active(&msp->ms_class_txg_node))
+		multilist_sublist_remove(mls, msp);
+	multilist_sublist_unlock(mls);
+
 	msp->ms_group = NULL;
 	mutex_exit(&mg->mg_lock);
 }
@@ -967,8 +1030,10 @@
 static void
 metaslab_group_sort_impl(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight)
 {
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
 	ASSERT(MUTEX_HELD(&mg->mg_lock));
 	ASSERT(msp->ms_group == mg);
+
 	avl_remove(&mg->mg_metaslab_tree, msp);
 	msp->ms_weight = weight;
 	avl_add(&mg->mg_metaslab_tree, msp);
@@ -1169,17 +1234,83 @@
  * Return the maximum contiguous segment within the metaslab.
  */
 uint64_t
-metaslab_block_maxsize(metaslab_t *msp)
+metaslab_largest_allocatable(metaslab_t *msp)
 {
 	avl_tree_t *t = &msp->ms_allocatable_by_size;
 	range_seg_t *rs;
 
-	if (t == NULL || (rs = avl_last(t)) == NULL)
-		return (0ULL);
+	if (t == NULL)
+		return (0);
+	rs = avl_last(t);
+	if (rs == NULL)
+		return (0);
 
 	return (rs->rs_end - rs->rs_start);
 }
 
+/*
+ * Return the maximum contiguous segment within the unflushed frees of this
+ * metaslab.
+ */
+uint64_t
+metaslab_largest_unflushed_free(metaslab_t *msp)
+{
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+
+	if (msp->ms_unflushed_frees == NULL)
+		return (0);
+
+	range_seg_t *rs = avl_last(&msp->ms_unflushed_frees_by_size);
+	if (rs == NULL)
+		return (0);
+
+	/*
+	 * When a range is freed from the metaslab, that range is added to
+	 * both the unflushed frees and the deferred frees. While the block
+	 * will eventually be usable, if the metaslab were loaded the range
+	 * would not be added to the ms_allocatable tree until TXG_DEFER_SIZE
+	 * txgs had passed.  As a result, when attempting to estimate an upper
+	 * bound for the largest currently-usable free segment in the
+	 * metaslab, we need to not consider any ranges currently in the defer
+	 * trees. This algorithm approximates the largest available chunk in
+	 * the largest range in the unflushed_frees tree by taking the first
+	 * chunk.  While this may be a poor estimate, it should only remain so
+	 * briefly and should eventually self-correct as frees are no longer
+	 * deferred. Similar logic applies to the ms_freed tree. See
+	 * metaslab_load() for more details.
+	 *
+	 * There are two primary sources of innacuracy in this estimate. Both
+	 * are tolerated for performance reasons. The first source is that we
+	 * only check the largest segment for overlaps. Smaller segments may
+	 * have more favorable overlaps with the other trees, resulting in
+	 * larger usable chunks.  Second, we only look at the first chunk in
+	 * the largest segment; there may be other usable chunks in the
+	 * largest segment, but we ignore them.
+	 */
+	uint64_t rstart = rs->rs_start;
+	uint64_t rsize = rs->rs_end - rstart;
+	for (int t = 0; t < TXG_DEFER_SIZE; t++) {
+		uint64_t start = 0;
+		uint64_t size = 0;
+		boolean_t found = range_tree_find_in(msp->ms_defer[t], rstart,
+		    rsize, &start, &size);
+		if (found) {
+			if (rstart == start)
+				return (0);
+			rsize = start - rstart;
+		}
+	}
+
+	uint64_t start = 0;
+	uint64_t size = 0;
+	boolean_t found = range_tree_find_in(msp->ms_freed, rstart,
+	    rsize, &start, &size);
+	if (found)
+		rsize = start - rstart;
+
+	return (rsize);
+}
+
 static range_seg_t *
 metaslab_block_find(avl_tree_t *t, uint64_t start, uint64_t size)
 {
@@ -1269,7 +1400,7 @@
 	 * If we're running low on space, find a segment based on size,
 	 * rather than iterating based on offset.
 	 */
-	if (metaslab_block_maxsize(msp) < metaslab_df_alloc_threshold ||
+	if (metaslab_largest_allocatable(msp) < metaslab_df_alloc_threshold ||
 	    free_pct < metaslab_df_free_pct) {
 		offset = -1;
 	} else {
@@ -1367,7 +1498,7 @@
 	range_seg_t *rs, rsearch;
 	uint64_t hbit = highbit64(size);
 	uint64_t *cursor = &msp->ms_lbas[hbit - 1];
-	uint64_t max_size = metaslab_block_maxsize(msp);
+	uint64_t max_size = metaslab_largest_allocatable(msp);
 
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 	ASSERT3U(avl_numnodes(t), ==,
@@ -1437,6 +1568,13 @@
 		cv_wait(&msp->ms_flush_cv, &msp->ms_lock);
 }
 
+static unsigned int
+metaslab_idx_func(multilist_t *ml, void *arg)
+{
+	metaslab_t *msp = arg;
+	return (msp->ms_id % multilist_get_num_sublists(ml));
+}
+
 uint64_t
 metaslab_allocated_space(metaslab_t *msp)
 {
@@ -1495,6 +1633,8 @@
 		allocating +=
 		    range_tree_space(msp->ms_allocating[(txg + t) & TXG_MASK]);
 	}
+	ASSERT3U(allocating + msp->ms_allocated_this_txg, ==,
+	    msp->ms_allocating_total);
 
 	ASSERT3U(msp->ms_deferspace, ==,
 	    range_tree_space(msp->ms_defer[0]) +
@@ -1683,7 +1823,6 @@
 
 	msp->ms_weight = 0;
 	msp->ms_fragmentation = 0;
-	msp->ms_max_size = 0;
 
 	/*
 	 * This function is used for verification purposes. Regardless of
@@ -1711,6 +1850,87 @@
 	VERIFY3U(msp->ms_weight, ==, weight);
 }
 
+/*
+ * If we're over the zfs_metaslab_mem_limit, select the loaded metaslab from
+ * this class that was used longest ago, and attempt to unload it.  We don't
+ * want to spend too much time in this loop to prevent performance
+ * degredation, and we expect that most of the time this operation will
+ * succeed. Between that and the normal unloading processing during txg sync,
+ * we expect this to keep the metaslab memory usage under control.
+ */
+static void
+metaslab_potentially_evict(metaslab_class_t *mc)
+{
+#ifdef _KERNEL
+	uint64_t allmem = arc_all_memory();
+	extern kmem_cache_t *range_seg_cache;
+	uint64_t inuse = kmem_cache_stat(range_seg_cache, "buf_inuse");
+	uint64_t size =	kmem_cache_stat(range_seg_cache, "buf_size");
+	int tries = 0;
+	for (; allmem * zfs_metaslab_mem_limit / 100 < inuse * size &&
+	    tries < multilist_get_num_sublists(mc->mc_metaslab_txg_list) * 2;
+	    tries++) {
+		unsigned int idx = multilist_get_random_index(
+		    mc->mc_metaslab_txg_list);
+		multilist_sublist_t *mls =
+		    multilist_sublist_lock(mc->mc_metaslab_txg_list, idx);
+		metaslab_t *msp = multilist_sublist_head(mls);
+		multilist_sublist_unlock(mls);
+		while (msp != NULL && allmem * zfs_metaslab_mem_limit / 100 <
+		    inuse * size) {
+			VERIFY3P(mls, ==, multilist_sublist_lock(
+			    mc->mc_metaslab_txg_list, idx));
+			ASSERT3U(idx, ==,
+			    metaslab_idx_func(mc->mc_metaslab_txg_list, msp));
+
+			if (!multilist_link_active(&msp->ms_class_txg_node)) {
+				multilist_sublist_unlock(mls);
+				break;
+			}
+			metaslab_t *next_msp = multilist_sublist_next(mls, msp);
+			multilist_sublist_unlock(mls);
+			/*
+			 * If the metaslab is currently loading there are two
+			 * cases. If it's the metaslab we're evicting, we
+			 * can't continue on or we'll panic when we attempt to
+			 * recursively lock the mutex. If it's another
+			 * metaslab that's loading, it can be safely skipped,
+			 * since we know it's very new and therefore not a
+			 * good eviction candidate. We check later once the
+			 * lock is held that the metaslab is fully loaded
+			 * before actually unloading it.
+			 */
+			if (msp->ms_loading) {
+				msp = next_msp;
+				inuse = kmem_cache_stat(range_seg_cache,
+				    "buf_inuse");
+				continue;
+			}
+			/*
+			 * We can't unload metaslabs with no spacemap because
+			 * they're not ready to be unloaded yet. We can't
+			 * unload metaslabs with outstanding allocations
+			 * because doing so could cause the metaslab's weight
+			 * to decrease while it's unloaded, which violates an
+			 * invariant that we use to prevent unnecessary
+			 * loading. We also don't unload metaslabs that are
+			 * currently active because they are high-weight
+			 * metaslabs that are likely to be used in the near
+			 * future.
+			 */
+			mutex_enter(&msp->ms_lock);
+			if (msp->ms_allocator == -1 && msp->ms_sm != NULL &&
+			    msp->ms_allocating_total == 0) {
+				metaslab_unload(msp);
+			}
+			mutex_exit(&msp->ms_lock);
+			msp = next_msp;
+			inuse = kmem_cache_stat(range_seg_cache, "buf_inuse");
+		}
+	}
+#endif
+}
+
 static int
 metaslab_load_impl(metaslab_t *msp)
 {
@@ -1873,18 +2093,21 @@
 	 * comment for ms_synchist and ms_deferhist[] for more info]
 	 */
 	uint64_t weight = msp->ms_weight;
+	uint64_t max_size = msp->ms_max_size;
 	metaslab_recalculate_weight_and_sort(msp);
 	if (!WEIGHT_IS_SPACEBASED(weight))
 		ASSERT3U(weight, <=, msp->ms_weight);
-	msp->ms_max_size = metaslab_block_maxsize(msp);
-
+	msp->ms_max_size = metaslab_largest_allocatable(msp);
+	ASSERT3U(max_size, <=, msp->ms_max_size);
 	hrtime_t load_end = gethrtime();
+		msp->ms_load_time = load_end;
 	if (zfs_flags & ZFS_DEBUG_LOG_SPACEMAP) {
 		zfs_dbgmsg("loading: txg %llu, spa %s, vdev_id %llu, "
 		    "ms_id %llu, smp_length %llu, "
 		    "unflushed_allocs %llu, unflushed_frees %llu, "
 		    "freed %llu, defer %llu + %llu, "
-		    "loading_time %lld ms",
+		    "loading_time %lld ms, ms_max_size %llu, "
+		    "max size error %llu",
 		    spa_syncing_txg(spa), spa_name(spa),
 		    msp->ms_group->mg_vd->vdev_id, msp->ms_id,
 		    space_map_length(msp->ms_sm),
@@ -1893,7 +2116,8 @@
 		    range_tree_space(msp->ms_freed),
 		    range_tree_space(msp->ms_defer[0]),
 		    range_tree_space(msp->ms_defer[1]),
-		    (longlong_t)((load_end - load_start) / 1000000));
+		    (longlong_t)((load_end - load_start) / 1000000),
+		    msp->ms_max_size, msp->ms_max_size - max_size);
 	}
 
 	metaslab_verify_space(msp, spa_syncing_txg(spa));
@@ -1902,7 +2126,7 @@
 }
 
 int
-metaslab_load(metaslab_t *msp, uint64_t txg)
+metaslab_load(metaslab_t *msp)
 {
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 
@@ -1939,11 +2163,20 @@
 	 */
 	ASSERT(!msp->ms_loaded);
 
+	/*
+	 * If we're loading a metaslab in the normal class, consider evicting
+	 * another one to keep our memory usage under the limit defined by the
+	 * zfs_metaslab_mem_limit tunable.
+	 */
+	if (spa_normal_class(msp->ms_group->mg_class->mc_spa) ==
+	    msp->ms_group->mg_class) {
+		metaslab_potentially_evict(msp->ms_group->mg_class);
+	}
+
 	int error = metaslab_load_impl(msp);
 
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 	msp->ms_loading = B_FALSE;
-	msp->ms_loaded_txg = txg;
 	cv_broadcast(&msp->ms_load_cv);
 
 	return (error);
@@ -1954,14 +2187,29 @@
 {
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 
-	metaslab_verify_weight_and_frag(msp);
+	/*
+	 * This can happen if a metaslab is selected for eviction (in
+	 * metaslab_potentially_evict) and then unloaded during spa_sync (via
+	 * metaslab_class_evict_old).
+	 */
+	if (!msp->ms_loaded)
+		return;
 
 	range_tree_vacate(msp->ms_allocatable, NULL, NULL);
 	msp->ms_loaded = B_FALSE;
-	msp->ms_loaded_txg = 0;
-
+	msp->ms_unload_time = gethrtime();
+
+	msp->ms_activation_weight = 0;
 	msp->ms_weight &= ~METASLAB_ACTIVE_MASK;
-	msp->ms_max_size = 0;
+
+	if (msp->ms_group != NULL) {
+		metaslab_class_t *mc = msp->ms_group->mg_class;
+		multilist_sublist_t *mls =
+		    multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp);
+		if (multilist_link_active(&msp->ms_class_txg_node))
+			multilist_sublist_remove(mls, msp);
+		multilist_sublist_unlock(mls);
+	}
 
 	/*
 	 * We explicitly recalculate the metaslab's weight based on its space
@@ -1980,6 +2228,21 @@
 }
 
 void
+metaslab_set_selected_txg(metaslab_t *msp, uint64_t txg)
+{
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+	metaslab_class_t *mc = msp->ms_group->mg_class;
+	multilist_sublist_t *mls =
+	    multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp);
+	if (multilist_link_active(&msp->ms_class_txg_node))
+		multilist_sublist_remove(mls, msp);
+	msp->ms_selected_txg = txg;
+	msp->ms_selected_time = gethrtime();
+	multilist_sublist_insert_tail(mls, msp);
+	multilist_sublist_unlock(mls);
+}
+
+void
 metaslab_space_update(vdev_t *vd, metaslab_class_t *mc, int64_t alloc_delta,
     int64_t defer_delta, int64_t space_delta)
 {
@@ -2007,6 +2270,7 @@
 	mutex_init(&ms->ms_sync_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&ms->ms_load_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&ms->ms_flush_cv, NULL, CV_DEFAULT, NULL);
+	multilist_link_init(&ms->ms_class_txg_node);
 
 	ms->ms_id = id;
 	ms->ms_start = id << vd->vdev_ms_shift;
@@ -2300,7 +2564,6 @@
 	uint64_t weight, space;
 
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
-	ASSERT(!vd->vdev_removing);
 
 	/*
 	 * The baseline weight is the metaslab's free space.
@@ -2519,13 +2782,19 @@
  * weights we rely on the entire weight (excluding the weight-type bit).
  */
 boolean_t
-metaslab_should_allocate(metaslab_t *msp, uint64_t asize)
+metaslab_should_allocate(metaslab_t *msp, uint64_t asize, boolean_t try_hard)
 {
-	if (msp->ms_loaded) {
+	/*
+	 * If the metaslab is loaded, ms_max_size is definitive and we can use
+	 * the fast check. If it's not, the ms_max_size is a lower bound (once
+	 * set), and we should use the fast check as long as we're not in
+	 * try_hard and it's been less than zfs_metaslab_max_size_cache_sec
+	 * seconds since the metaslab was unloaded.
+	 */
+	if (msp->ms_loaded ||
+	    (msp->ms_max_size != 0 && !try_hard && gethrtime() <
+	    msp->ms_unload_time + SEC2NSEC(zfs_metaslab_max_size_cache_sec)))
 		return (msp->ms_max_size >= asize);
-	} else {
-		ASSERT0(msp->ms_max_size);
-	}
 
 	boolean_t should_allocate;
 	if (!WEIGHT_IS_SPACEBASED(msp->ms_weight)) {
@@ -2541,6 +2810,7 @@
 		should_allocate = (asize <=
 		    (msp->ms_weight & ~METASLAB_WEIGHT_TYPE));
 	}
+
 	return (should_allocate);
 }
 
@@ -2553,24 +2823,24 @@
 
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 
-	/*
-	 * If this vdev is in the process of being removed, there is nothing
-	 * for us to do here.
-	 */
-	if (vd->vdev_removing)
-		return (0);
-
 	metaslab_set_fragmentation(msp);
 
 	/*
-	 * Update the maximum size if the metaslab is loaded. This will
+	 * Update the maximum size. If the metaslab is loaded, this will
 	 * ensure that we get an accurate maximum size if newly freed space
-	 * has been added back into the free tree.
+	 * has been added back into the free tree. If the metaslab is
+	 * unloaded, we check if there's a larger free segment in the
+	 * unflushed frees. This is a lower bound on the largest allocatable
+	 * segment size. Coalescing of adjacent entries may reveal larger
+	 * allocatable segments, but we aren't aware of those until loading
+	 * the space map into a range tree.
 	 */
-	if (msp->ms_loaded)
-		msp->ms_max_size = metaslab_block_maxsize(msp);
-	else
-		ASSERT0(msp->ms_max_size);
+	if (msp->ms_loaded) {
+		msp->ms_max_size = metaslab_largest_allocatable(msp);
+	} else {
+		msp->ms_max_size = MAX(msp->ms_max_size,
+		    metaslab_largest_unflushed_free(msp));
+	}
 
 	/*
 	 * Segment-based weighting requires space map histogram support.
@@ -2589,6 +2859,8 @@
 void
 metaslab_recalculate_weight_and_sort(metaslab_t *msp)
 {
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+
 	/* note: we preserve the mask (e.g. indication of primary, etc..) */
 	uint64_t was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
 	metaslab_group_sort(msp->ms_group, msp,
@@ -2599,16 +2871,23 @@
 metaslab_activate_allocator(metaslab_group_t *mg, metaslab_t *msp,
     int allocator, uint64_t activation_weight)
 {
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+
 	/*
 	 * If we're activating for the claim code, we don't want to actually
 	 * set the metaslab up for a specific allocator.
 	 */
-	if (activation_weight == METASLAB_WEIGHT_CLAIM)
+	if (activation_weight == METASLAB_WEIGHT_CLAIM) {
+		ASSERT0(msp->ms_activation_weight);
+		msp->ms_activation_weight = msp->ms_weight;
+		metaslab_group_sort(mg, msp, msp->ms_weight |
+		    activation_weight);
 		return (0);
+	}
+
 	metaslab_t **arr = (activation_weight == METASLAB_WEIGHT_PRIMARY ?
 	    mg->mg_primaries : mg->mg_secondaries);
 
-	ASSERT(MUTEX_HELD(&msp->ms_lock));
 	mutex_enter(&mg->mg_lock);
 	if (arr[allocator] != NULL) {
 		mutex_exit(&mg->mg_lock);
@@ -2619,39 +2898,88 @@
 	ASSERT3S(msp->ms_allocator, ==, -1);
 	msp->ms_allocator = allocator;
 	msp->ms_primary = (activation_weight == METASLAB_WEIGHT_PRIMARY);
+
+	ASSERT0(msp->ms_activation_weight);
+	msp->ms_activation_weight = msp->ms_weight;
+	metaslab_group_sort_impl(mg, msp,
+	    msp->ms_weight | activation_weight);
+
 	mutex_exit(&mg->mg_lock);
 
 	return (0);
 }
 
 static int
-metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight,
-    uint64_t txg)
+metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight)
 {
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 
-	if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
-		int error = metaslab_load(msp, txg);
-		if (error != 0) {
-			metaslab_group_sort(msp->ms_group, msp, 0);
-			return (error);
-		}
-		if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) {
-			/*
-			 * The metaslab was activated for another allocator
-			 * while we were waiting, we should reselect.
-			 */
+	/*
+	 * The current metaslab is already activated for us so there
+	 * is nothing to do. Already activated though, doesn't mean
+	 * that this metaslab is activated for our allocator nor our
+	 * requested activation weight. The metaslab could have started
+	 * as an active one for our allocator but changed allocators
+	 * while we were waiting to grab its ms_lock or we stole it
+	 * [see find_valid_metaslab()]. This means that there is a
+	 * possibility of passivating a metaslab of another allocator
+	 * or from a different activation mask, from this thread.
+	 */
+	if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) {
+		ASSERT(msp->ms_loaded);
+		return (0);
+	}
+
+	int error = metaslab_load(msp);
+	if (error != 0) {
+		metaslab_group_sort(msp->ms_group, msp, 0);
+		return (error);
+	}
+
+	/*
+	 * When entering metaslab_load() we may have dropped the
+	 * ms_lock because we were loading this metaslab, or we
+	 * were waiting for another thread to load it for us. In
+	 * that scenario, we recheck the weight of the metaslab
+	 * to see if it was activated by another thread.
+	 *
+	 * If the metaslab was activated for another allocator or
+	 * it was activated with a different activation weight (e.g.
+	 * we wanted to make it a primary but it was activated as
+	 * secondary) we return error (EBUSY).
+	 *
+	 * If the metaslab was activated for the same allocator
+	 * and requested activation mask, skip activating it.
+	 */
+	if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) {
+		if (msp->ms_allocator != allocator)
 			return (EBUSY);
-		}
-		if ((error = metaslab_activate_allocator(msp->ms_group, msp,
-		    allocator, activation_weight)) != 0) {
-			return (error);
-		}
-
-		msp->ms_activation_weight = msp->ms_weight;
-		metaslab_group_sort(msp->ms_group, msp,
-		    msp->ms_weight | activation_weight);
+
+		if ((msp->ms_weight & activation_weight) == 0)
+			return (EBUSY);
+
+		EQUIV((activation_weight == METASLAB_WEIGHT_PRIMARY),
+		    msp->ms_primary);
+		return (0);
 	}
+
+	/*
+	 * If the metaslab has literally 0 space, it will have weight 0. In
+	 * that case, don't bother activating it. This can happen if the
+	 * metaslab had space during find_valid_metaslab, but another thread
+	 * loaded it and used all that space while we were waiting to grab the
+	 * lock.
+	 */
+	if (msp->ms_weight == 0) {
+		ASSERT0(range_tree_space(msp->ms_allocatable));
+		return (SET_ERROR(ENOSPC));
+	}
+
+	if ((error = metaslab_activate_allocator(msp->ms_group, msp,
+	    allocator, activation_weight)) != 0) {
+		return (error);
+	}
+
 	ASSERT(msp->ms_loaded);
 	ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
 
@@ -2663,6 +2991,8 @@
     uint64_t weight)
 {
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
+	ASSERT(msp->ms_loaded);
+
 	if (msp->ms_weight & METASLAB_WEIGHT_CLAIM) {
 		metaslab_group_sort(mg, msp, weight);
 		return;
@@ -2670,15 +3000,16 @@
 
 	mutex_enter(&mg->mg_lock);
 	ASSERT3P(msp->ms_group, ==, mg);
+	ASSERT3S(0, <=, msp->ms_allocator);
+	ASSERT3U(msp->ms_allocator, <, mg->mg_allocators);
+
 	if (msp->ms_primary) {
-		ASSERT3U(0, <=, msp->ms_allocator);
-		ASSERT3U(msp->ms_allocator, <, mg->mg_allocators);
 		ASSERT3P(mg->mg_primaries[msp->ms_allocator], ==, msp);
 		ASSERT(msp->ms_weight & METASLAB_WEIGHT_PRIMARY);
 		mg->mg_primaries[msp->ms_allocator] = NULL;
 	} else {
+		ASSERT3P(mg->mg_secondaries[msp->ms_allocator], ==, msp);
 		ASSERT(msp->ms_weight & METASLAB_WEIGHT_SECONDARY);
-		ASSERT3P(mg->mg_secondaries[msp->ms_allocator], ==, msp);
 		mg->mg_secondaries[msp->ms_allocator] = NULL;
 	}
 	msp->ms_allocator = -1;
@@ -2700,9 +3031,10 @@
 	    range_tree_is_empty(msp->ms_allocatable));
 	ASSERT0(weight & METASLAB_ACTIVE_MASK);
 
+	ASSERT(msp->ms_activation_weight != 0);
 	msp->ms_activation_weight = 0;
 	metaslab_passivate_allocator(msp->ms_group, msp, weight);
-	ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0);
+	ASSERT0(msp->ms_weight & METASLAB_ACTIVE_MASK);
 }
 
 /*
@@ -2741,13 +3073,14 @@
 metaslab_preload(void *arg)
 {
 	metaslab_t *msp = arg;
-	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+	metaslab_class_t *mc = msp->ms_group->mg_class;
+	spa_t *spa = mc->mc_spa;
 
 	ASSERT(!MUTEX_HELD(&msp->ms_group->mg_lock));
 
 	mutex_enter(&msp->ms_lock);
-	(void) metaslab_load(msp, spa_syncing_txg(spa));
-	msp->ms_selected_txg = spa_syncing_txg(spa);
+	(void) metaslab_load(msp);
+	metaslab_set_selected_txg(msp, spa_syncing_txg(spa));
 	mutex_exit(&msp->ms_lock);
 }
 
@@ -3200,12 +3533,19 @@
 	/*
 	 * Normally, we don't want to process a metaslab if there are no
 	 * allocations or frees to perform. However, if the metaslab is being
-	 * forced to condense and it's loaded, we need to let it through.
+	 * forced to condense, it's loaded and we're not beyond the final
+	 * dirty txg, we need to let it through. Not condensing beyond the
+	 * final dirty txg prevents an issue where metaslabs that need to be
+	 * condensed but were loaded for other reasons could cause a panic
+	 * here. By only checking the txg in that branch of the conditional,
+	 * we preserve the utility of the VERIFY statements in all other
+	 * cases.
 	 */
 	if (range_tree_is_empty(alloctree) &&
 	    range_tree_is_empty(msp->ms_freeing) &&
 	    range_tree_is_empty(msp->ms_checkpointing) &&
-	    !(msp->ms_loaded && msp->ms_condense_wanted))
+	    !(msp->ms_loaded && msp->ms_condense_wanted &&
+	    txg <= spa_final_dirty_txg(spa)))
 		return;
 
 
@@ -3458,6 +3798,23 @@
 	dmu_tx_commit(tx);
 }
 
+static void
+metaslab_evict(metaslab_t *msp, uint64_t txg)
+{
+	if (!msp->ms_loaded || msp->ms_disabled != 0)
+		return;
+
+	for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
+		VERIFY0(range_tree_space(
+		    msp->ms_allocating[(txg + t) & TXG_MASK]));
+	}
+	if (msp->ms_allocator != -1)
+		metaslab_passivate(msp, msp->ms_weight & ~METASLAB_ACTIVE_MASK);
+
+	if (!metaslab_debug_unload)
+		metaslab_unload(msp);
+}
+
 /*
  * Called after a transaction group has completely synced to mark
  * all of the metaslab's free space as usable.
@@ -3504,7 +3861,9 @@
 		ASSERT3P(msp->ms_unflushed_allocs, ==, NULL);
 		msp->ms_unflushed_allocs = range_tree_create(NULL, NULL);
 		ASSERT3P(msp->ms_unflushed_frees, ==, NULL);
-		msp->ms_unflushed_frees = range_tree_create(NULL, NULL);
+		msp->ms_unflushed_frees = range_tree_create_impl(&rt_avl_ops,
+		    &msp->ms_unflushed_frees_by_size,
+		    metaslab_rangesize_compare, 0);
 
 		metaslab_space_update(vd, mg->mg_class, 0, 0, msp->ms_size);
 	}
@@ -3609,41 +3968,28 @@
 	/*
 	 * If the metaslab is loaded and we've not tried to load or allocate
 	 * from it in 'metaslab_unload_delay' txgs, then we normally unload it.
-	 * However, to prevent thrashing, if the metaslab was recently loaded,
-	 * then instead of unloading it, we increase the unload delay (only up
-	 * to the maximum).
 	 */
 	if (msp->ms_loaded &&
 	    msp->ms_disabled == 0 &&
 	    msp->ms_selected_txg + metaslab_unload_delay < txg) {
-		if (msp->ms_loaded_txg != 0 && msp->ms_loaded_txg +
-		    metaslab_unload_delay + metaslab_load_window >= txg) {
-			if (metaslab_unload_delay + metaslab_load_window <=
-			    metaslab_unload_delay_max) {
-				metaslab_unload_delay += metaslab_load_window;
-			}
-			DTRACE_PROBE1(zfs__metaslab__delay__unload,
-			    metaslab_t *, msp);
-		} else {
-			for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
-				VERIFY0(range_tree_space(
-				    msp->ms_allocating[(txg + t) & TXG_MASK]));
-			}
-			if (msp->ms_allocator != -1) {
-				metaslab_passivate(msp, msp->ms_weight &
-				    ~METASLAB_ACTIVE_MASK);
-			}
-
-			if (!metaslab_debug_unload)
-				metaslab_unload(msp);
+		for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
+			VERIFY0(range_tree_space(
+			    msp->ms_allocating[(txg + t) & TXG_MASK]));
 		}
+		if (msp->ms_allocator != -1) {
+			metaslab_passivate(msp, msp->ms_weight &
+			    ~METASLAB_ACTIVE_MASK);
+		}
+
+		if (!metaslab_debug_unload)
+			metaslab_unload(msp);
 	}
 
 	ASSERT0(range_tree_space(msp->ms_allocating[txg & TXG_MASK]));
 	ASSERT0(range_tree_space(msp->ms_freeing));
 	ASSERT0(range_tree_space(msp->ms_freed));
 	ASSERT0(range_tree_space(msp->ms_checkpointing));
-
+	msp->ms_allocating_total -= msp->ms_allocated_this_txg;
 	msp->ms_allocated_this_txg = 0;
 	mutex_exit(&msp->ms_lock);
 }
@@ -3897,6 +4243,7 @@
 			vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
 
 		range_tree_add(msp->ms_allocating[txg & TXG_MASK], start, size);
+		msp->ms_allocating_total += size;
 
 		metaslab_verify_space(msp, txg);
 	}
@@ -3905,7 +4252,7 @@
 	 * Now that we've attempted the allocation we need to update the
 	 * metaslab's maximum block size since it may have changed.
 	 */
-	msp->ms_max_size = metaslab_block_maxsize(msp);
+	msp->ms_max_size = metaslab_largest_allocatable(msp);
 	return (start);
 }
 
@@ -3923,7 +4270,8 @@
 static metaslab_t *
 find_valid_metaslab(metaslab_group_t *mg, uint64_t activation_weight,
     dva_t *dva, int d, boolean_t want_unique, uint64_t asize, int allocator,
-    zio_alloc_list_t *zal, metaslab_t *search, boolean_t *was_active)
+    boolean_t try_hard, zio_alloc_list_t *zal, metaslab_t *search,
+    boolean_t *was_active)
 {
 	avl_index_t idx;
 	avl_tree_t *t = &mg->mg_metaslab_tree;
@@ -3933,7 +4281,7 @@
 
 	for (; msp != NULL; msp = AVL_NEXT(t, msp)) {
 		int i;
-		if (!metaslab_should_allocate(msp, asize)) {
+		if (!metaslab_should_allocate(msp, asize, try_hard)) {
 			metaslab_trace_add(zal, mg, msp, asize, d,
 			    TRACE_TOO_SMALL, allocator);
 			continue;
@@ -3975,17 +4323,51 @@
 	return (msp);
 }
 
+void
+metaslab_active_mask_verify(metaslab_t *msp)
+{
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+
+	if ((zfs_flags & ZFS_DEBUG_METASLAB_VERIFY) == 0)
+		return;
+
+	if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0)
+		return;
+
+	if (msp->ms_weight & METASLAB_WEIGHT_PRIMARY) {
+		VERIFY0(msp->ms_weight & METASLAB_WEIGHT_SECONDARY);
+		VERIFY0(msp->ms_weight & METASLAB_WEIGHT_CLAIM);
+		VERIFY3S(msp->ms_allocator, !=, -1);
+		VERIFY(msp->ms_primary);
+		return;
+	}
+
+	if (msp->ms_weight & METASLAB_WEIGHT_SECONDARY) {
+		VERIFY0(msp->ms_weight & METASLAB_WEIGHT_PRIMARY);
+		VERIFY0(msp->ms_weight & METASLAB_WEIGHT_CLAIM);
+		VERIFY3S(msp->ms_allocator, !=, -1);
+		VERIFY(!msp->ms_primary);
+		return;
+	}
+
+	if (msp->ms_weight & METASLAB_WEIGHT_CLAIM) {
+		VERIFY0(msp->ms_weight & METASLAB_WEIGHT_PRIMARY);
+		VERIFY0(msp->ms_weight & METASLAB_WEIGHT_SECONDARY);
+		VERIFY3S(msp->ms_allocator, ==, -1);
+		return;
+	}
+}
+
 /* ARGSUSED */
 static uint64_t
 metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
-    uint64_t asize, uint64_t txg, boolean_t want_unique, dva_t *dva,
-    int d, int allocator)
+    uint64_t asize, uint64_t txg, boolean_t want_unique, dva_t *dva, int d,
+    int allocator, boolean_t try_hard)
 {
 	metaslab_t *msp = NULL;
 	uint64_t offset = -1ULL;
-	uint64_t activation_weight;
-
-	activation_weight = METASLAB_WEIGHT_PRIMARY;
+
+	uint64_t activation_weight = METASLAB_WEIGHT_PRIMARY;
 	for (int i = 0; i < d; i++) {
 		if (activation_weight == METASLAB_WEIGHT_PRIMARY &&
 		    DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) {
@@ -4026,15 +4408,37 @@
 		if (activation_weight == METASLAB_WEIGHT_PRIMARY &&
 		    mg->mg_primaries[allocator] != NULL) {
 			msp = mg->mg_primaries[allocator];
+
+			/*
+			 * Even though we don't hold the ms_lock for the
+			 * primary metaslab, those fields should not
+			 * change while we hold the mg_lock. Thus is is
+			 * safe to make assertions on them.
+			 */
+			ASSERT(msp->ms_primary);
+			ASSERT3S(msp->ms_allocator, ==, allocator);
+			ASSERT(msp->ms_loaded);
+
 			was_active = B_TRUE;
+			ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
 		} else if (activation_weight == METASLAB_WEIGHT_SECONDARY &&
 		    mg->mg_secondaries[allocator] != NULL) {
 			msp = mg->mg_secondaries[allocator];
+
+			/*
+			 * See comment above about the similar assertions
+			 * for the primary metaslab.
+			 */
+			ASSERT(!msp->ms_primary);
+			ASSERT3S(msp->ms_allocator, ==, allocator);
+			ASSERT(msp->ms_loaded);
+
 			was_active = B_TRUE;
+			ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
 		} else {
 			msp = find_valid_metaslab(mg, activation_weight, dva, d,
-			    want_unique, asize, allocator, zal, search,
-			    &was_active);
+			    want_unique, asize, allocator, try_hard, zal,
+			    search, &was_active);
 		}
 
 		mutex_exit(&mg->mg_lock);
@@ -4042,59 +4446,106 @@
 			kmem_free(search, sizeof (*search));
 			return (-1ULL);
 		}
-
 		mutex_enter(&msp->ms_lock);
+
+		metaslab_active_mask_verify(msp);
+
+		/*
+		 * This code is disabled out because of issues with
+		 * tracepoints in non-gpl kernel modules.
+		 */
+#if 0
+		DTRACE_PROBE3(ms__activation__attempt,
+		    metaslab_t *, msp, uint64_t, activation_weight,
+		    boolean_t, was_active);
+#endif
+
 		/*
 		 * Ensure that the metaslab we have selected is still
 		 * capable of handling our request. It's possible that
 		 * another thread may have changed the weight while we
 		 * were blocked on the metaslab lock. We check the
-		 * active status first to see if we need to reselect
+		 * active status first to see if we need to set_selected_txg
 		 * a new metaslab.
 		 */
 		if (was_active && !(msp->ms_weight & METASLAB_ACTIVE_MASK)) {
+			ASSERT3S(msp->ms_allocator, ==, -1);
 			mutex_exit(&msp->ms_lock);
 			continue;
 		}
 
 		/*
-		 * If the metaslab is freshly activated for an allocator that
-		 * isn't the one we're allocating from, or if it's a primary and
-		 * we're seeking a secondary (or vice versa), we go back and
-		 * select a new metaslab.
+		 * If the metaslab was activated for another allocator
+		 * while we were waiting in the ms_lock above, or it's
+		 * a primary and we're seeking a secondary (or vice versa),
+		 * we go back and select a new metaslab.
 		 */
 		if (!was_active && (msp->ms_weight & METASLAB_ACTIVE_MASK) &&
 		    (msp->ms_allocator != -1) &&
 		    (msp->ms_allocator != allocator || ((activation_weight ==
 		    METASLAB_WEIGHT_PRIMARY) != msp->ms_primary))) {
+			ASSERT(msp->ms_loaded);
+			ASSERT((msp->ms_weight & METASLAB_WEIGHT_CLAIM) ||
+			    msp->ms_allocator != -1);
 			mutex_exit(&msp->ms_lock);
 			continue;
 		}
 
+		/*
+		 * This metaslab was used for claiming regions allocated
+		 * by the ZIL during pool import. Once these regions are
+		 * claimed we don't need to keep the CLAIM bit set
+		 * anymore. Passivate this metaslab to zero its activation
+		 * mask.
+		 */
 		if (msp->ms_weight & METASLAB_WEIGHT_CLAIM &&
 		    activation_weight != METASLAB_WEIGHT_CLAIM) {
+			ASSERT(msp->ms_loaded);
+			ASSERT3S(msp->ms_allocator, ==, -1);
 			metaslab_passivate(msp, msp->ms_weight &
 			    ~METASLAB_WEIGHT_CLAIM);
 			mutex_exit(&msp->ms_lock);
 			continue;
 		}
 
-		if (metaslab_activate(msp, allocator, activation_weight,
-		    txg) != 0) {
+		metaslab_set_selected_txg(msp, txg);
+
+		int activation_error =
+		    metaslab_activate(msp, allocator, activation_weight);
+		metaslab_active_mask_verify(msp);
+
+		/*
+		 * If the metaslab was activated by another thread for
+		 * another allocator or activation_weight (EBUSY), or it
+		 * failed because another metaslab was assigned as primary
+		 * for this allocator (EEXIST) we continue using this
+		 * metaslab for our allocation, rather than going on to a
+		 * worse metaslab (we waited for that metaslab to be loaded
+		 * after all).
+		 *
+		 * If the activation failed due to an I/O error or ENOSPC we
+		 * skip to the next metaslab.
+		 */
+		boolean_t activated;
+		if (activation_error == 0) {
+			activated = B_TRUE;
+		} else if (activation_error == EBUSY ||
+		    activation_error == EEXIST) {
+			activated = B_FALSE;
+		} else {
 			mutex_exit(&msp->ms_lock);
 			continue;
 		}
-
-		msp->ms_selected_txg = txg;
+		ASSERT(msp->ms_loaded);
 
 		/*
 		 * Now that we have the lock, recheck to see if we should
 		 * continue to use this metaslab for this allocation. The
-		 * the metaslab is now loaded so metaslab_should_allocate() can
-		 * accurately determine if the allocation attempt should
+		 * the metaslab is now loaded so metaslab_should_allocate()
+		 * can accurately determine if the allocation attempt should
 		 * proceed.
 		 */
-		if (!metaslab_should_allocate(msp, asize)) {
+		if (!metaslab_should_allocate(msp, asize, try_hard)) {
 			/* Passivate this metaslab and select a new one. */
 			metaslab_trace_add(zal, mg, msp, asize, d,
 			    TRACE_TOO_SMALL, allocator);
@@ -4102,8 +4553,8 @@
 		}
 
 		/*
-		 * If this metaslab is currently condensing then pick again as
-		 * we can't manipulate this metaslab until it's committed
+		 * If this metaslab is currently condensing then pick again
+		 * as we can't manipulate this metaslab until it's committed
 		 * to disk. If this metaslab is being initialized, we shouldn't
 		 * allocate from it since the allocated region might be
 		 * overwritten after allocation.
@@ -4111,15 +4562,19 @@
 		if (msp->ms_condensing) {
 			metaslab_trace_add(zal, mg, msp, asize, d,
 			    TRACE_CONDENSING, allocator);
-			metaslab_passivate(msp, msp->ms_weight &
-			    ~METASLAB_ACTIVE_MASK);
+			if (activated) {
+				metaslab_passivate(msp, msp->ms_weight &
+				    ~METASLAB_ACTIVE_MASK);
+			}
 			mutex_exit(&msp->ms_lock);
 			continue;
 		} else if (msp->ms_disabled > 0) {
 			metaslab_trace_add(zal, mg, msp, asize, d,
 			    TRACE_DISABLED, allocator);
-			metaslab_passivate(msp, msp->ms_weight &
-			    ~METASLAB_ACTIVE_MASK);
+			if (activated) {
+				metaslab_passivate(msp, msp->ms_weight &
+				    ~METASLAB_ACTIVE_MASK);
+			}
 			mutex_exit(&msp->ms_lock);
 			continue;
 		}
@@ -4129,13 +4584,23 @@
 
 		if (offset != -1ULL) {
 			/* Proactively passivate the metaslab, if needed */
-			metaslab_segment_may_passivate(msp);
+			if (activated)
+				metaslab_segment_may_passivate(msp);
 			break;
 		}
 next:
 		ASSERT(msp->ms_loaded);
 
 		/*
+		 * This code is disabled out because of issues with
+		 * tracepoints in non-gpl kernel modules.
+		 */
+#if 0
+		DTRACE_PROBE2(ms__alloc__failure, metaslab_t *, msp,
+		    uint64_t, asize);
+#endif
+
+		/*
 		 * We were unable to allocate from this metaslab so determine
 		 * a new weight for this metaslab. Now that we have loaded
 		 * the metaslab we can provide a better hint to the metaslab
@@ -4156,14 +4621,33 @@
 		 * currently available for allocation and is accurate
 		 * even within a sync pass.
 		 */
+		uint64_t weight;
 		if (WEIGHT_IS_SPACEBASED(msp->ms_weight)) {
-			uint64_t weight = metaslab_block_maxsize(msp);
+			weight = metaslab_largest_allocatable(msp);
 			WEIGHT_SET_SPACEBASED(weight);
+		} else {
+			weight = metaslab_weight_from_range_tree(msp);
+		}
+
+		if (activated) {
 			metaslab_passivate(msp, weight);
 		} else {
-			metaslab_passivate(msp,
-			    metaslab_weight_from_range_tree(msp));
+			/*
+			 * For the case where we use the metaslab that is
+			 * active for another allocator we want to make
+			 * sure that we retain the activation mask.
+			 *
+			 * Note that we could attempt to use something like
+			 * metaslab_recalculate_weight_and_sort() that
+			 * retains the activation mask here. That function
+			 * uses metaslab_weight() to set the weight though
+			 * which is not as accurate as the calculations
+			 * above.
+			 */
+			weight |= msp->ms_weight & METASLAB_ACTIVE_MASK;
+			metaslab_group_sort(mg, msp, weight);
 		}
+		metaslab_active_mask_verify(msp);
 
 		/*
 		 * We have just failed an allocation attempt, check
@@ -4171,7 +4655,7 @@
 		 * we may end up in an infinite loop retrying the same
 		 * metaslab.
 		 */
-		ASSERT(!metaslab_should_allocate(msp, asize));
+		ASSERT(!metaslab_should_allocate(msp, asize, try_hard));
 
 		mutex_exit(&msp->ms_lock);
 	}
@@ -4182,14 +4666,14 @@
 
 static uint64_t
 metaslab_group_alloc(metaslab_group_t *mg, zio_alloc_list_t *zal,
-    uint64_t asize, uint64_t txg, boolean_t want_unique, dva_t *dva,
-    int d, int allocator)
+    uint64_t asize, uint64_t txg, boolean_t want_unique, dva_t *dva, int d,
+    int allocator, boolean_t try_hard)
 {
 	uint64_t offset;
 	ASSERT(mg->mg_initialized);
 
 	offset = metaslab_group_alloc_normal(mg, zal, asize, txg, want_unique,
-	    dva, d, allocator);
+	    dva, d, allocator, try_hard);
 
 	mutex_enter(&mg->mg_lock);
 	if (offset == -1ULL) {
@@ -4359,7 +4843,7 @@
 		 * allow any metaslab to be used (unique=false).
 		 */
 		uint64_t offset = metaslab_group_alloc(mg, zal, asize, txg,
-		    !try_hard, dva, d, allocator);
+		    !try_hard, dva, d, allocator, try_hard);
 
 		if (offset != -1ULL) {
 			/*
@@ -4682,6 +5166,7 @@
 	mutex_enter(&msp->ms_lock);
 	range_tree_remove(msp->ms_allocating[txg & TXG_MASK],
 	    offset, size);
+	msp->ms_allocating_total -= size;
 
 	VERIFY(!msp->ms_condensing);
 	VERIFY3U(offset, >=, msp->ms_start);
@@ -4787,7 +5272,7 @@
 	mutex_enter(&msp->ms_lock);
 
 	if ((txg != 0 && spa_writeable(spa)) || !msp->ms_loaded)
-		error = metaslab_activate(msp, 0, METASLAB_WEIGHT_CLAIM, txg);
+		error = metaslab_activate(msp, 0, METASLAB_WEIGHT_CLAIM);
 	/*
 	 * No need to fail in that case; someone else has activated the
 	 * metaslab, but that doesn't preclude us from using it.
@@ -4813,10 +5298,20 @@
 	range_tree_clear(msp->ms_trim, offset, size);
 
 	if (spa_writeable(spa)) {	/* don't dirty if we're zdb(1M) */
+		metaslab_class_t *mc = msp->ms_group->mg_class;
+		multilist_sublist_t *mls =
+		    multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp);
+		if (!multilist_link_active(&msp->ms_class_txg_node)) {
+			msp->ms_selected_txg = txg;
+			multilist_sublist_insert_head(mls, msp);
+		}
+		multilist_sublist_unlock(mls);
+
 		if (range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK]))
 			vdev_dirty(vd, VDD_METASLAB, msp, txg);
 		range_tree_add(msp->ms_allocating[txg & TXG_MASK],
 		    offset, size);
+		msp->ms_allocating_total += size;
 	}
 
 	mutex_exit(&msp->ms_lock);
@@ -5177,7 +5672,7 @@
 }
 
 void
-metaslab_enable(metaslab_t *msp, boolean_t sync)
+metaslab_enable(metaslab_t *msp, boolean_t sync, boolean_t unload)
 {
 	metaslab_group_t *mg = msp->ms_group;
 	spa_t *spa = mg->mg_vd->vdev_spa;
@@ -5195,6 +5690,8 @@
 	if (--msp->ms_disabled == 0) {
 		mg->mg_ms_disabled--;
 		cv_broadcast(&mg->mg_ms_disabled_cv);
+		if (unload)
+			metaslab_unload(msp);
 	}
 	mutex_exit(&msp->ms_lock);
 	mutex_exit(&mg->mg_ms_disabled_lock);
--- a/usr/src/uts/common/fs/zfs/range_tree.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/range_tree.c	Thu Nov 14 23:30:04 2019 +0000
@@ -525,6 +525,36 @@
 }
 
 /*
+ * Returns the first subset of the given range which overlaps with the range
+ * tree. Returns true if there is a segment in the range, and false if there
+ * isn't.
+ */
+boolean_t
+range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size,
+    uint64_t *ostart, uint64_t *osize)
+{
+	range_seg_t rsearch;
+	rsearch.rs_start = start;
+	rsearch.rs_end = start + 1;
+
+	avl_index_t where;
+	range_seg_t *rs = avl_find(&rt->rt_root, &rsearch, &where);
+	if (rs != NULL) {
+		*ostart = start;
+		*osize = MIN(size, rs->rs_end - start);
+		return (B_TRUE);
+	}
+
+	rs = avl_nearest(&rt->rt_root, where, AVL_AFTER);
+	if (rs == NULL || rs->rs_start > start + size)
+		return (B_FALSE);
+
+	*ostart = rs->rs_start;
+	*osize = MIN(start + size, rs->rs_end) - rs->rs_start;
+	return (B_TRUE);
+}
+
+/*
  * Ensure that this range is not in the tree, regardless of whether
  * it is currently in the tree.
  */
--- a/usr/src/uts/common/fs/zfs/spa.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/spa.c	Thu Nov 14 23:30:04 2019 +0000
@@ -8592,6 +8592,10 @@
 	while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
 	    != NULL)
 		vdev_sync_done(vd, txg);
+
+	metaslab_class_evict_old(spa->spa_normal_class, txg);
+	metaslab_class_evict_old(spa->spa_log_class, txg);
+
 	spa_sync_close_syncing_log_sm(spa);
 
 	spa_update_dspace(spa);
--- a/usr/src/uts/common/fs/zfs/spa_log_spacemap.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/spa_log_spacemap.c	Thu Nov 14 23:30:04 2019 +0000
@@ -1192,7 +1192,8 @@
 		    metaslab_unflushed_changes_memused(m);
 
 		if (metaslab_debug_load && m->ms_sm != NULL) {
-			VERIFY0(metaslab_load(m, txg));
+			VERIFY0(metaslab_load(m));
+			metaslab_set_selected_txg(m, 0);
 		}
 		mutex_exit(&m->ms_lock);
 	}
--- a/usr/src/uts/common/fs/zfs/sys/arc.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/sys/arc.h	Thu Nov 14 23:30:04 2019 +0000
@@ -236,6 +236,7 @@
 void arc_tempreserve_clear(uint64_t reserve);
 int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg);
 
+uint64_t arc_all_memory(void);
 uint64_t arc_max_bytes(void);
 void arc_init(void);
 void arc_fini(void);
--- a/usr/src/uts/common/fs/zfs/sys/dnode.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/sys/dnode.h	Thu Nov 14 23:30:04 2019 +0000
@@ -46,6 +46,7 @@
  */
 #define	DNODE_MUST_BE_ALLOCATED	1
 #define	DNODE_MUST_BE_FREE	2
+#define	DNODE_DRY_RUN		4
 
 /*
  * dnode_next_offset() flags.
@@ -393,6 +394,7 @@
 boolean_t dnode_add_ref(dnode_t *dn, void *ref);
 void dnode_rele(dnode_t *dn, void *ref);
 void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting);
+int dnode_try_claim(objset_t *os, uint64_t object, int slots);
 void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
 void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
 void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
@@ -511,11 +513,6 @@
 	 */
 	kstat_named_t dnode_hold_free_overflow;
 	/*
-	 * Number of times a dnode_hold(...) was attempted on a dnode
-	 * which had already been unlinked in an earlier txg.
-	 */
-	kstat_named_t dnode_hold_free_txg;
-	/*
 	 * Number of times dnode_free_interior_slots() needed to retry
 	 * acquiring a slot zrl lock due to contention.
 	 */
--- a/usr/src/uts/common/fs/zfs/sys/metaslab.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/sys/metaslab.h	Thu Nov 14 23:30:04 2019 +0000
@@ -65,7 +65,7 @@
 void metaslab_sync(metaslab_t *, uint64_t);
 void metaslab_sync_done(metaslab_t *, uint64_t);
 void metaslab_sync_reassess(metaslab_group_t *);
-uint64_t metaslab_block_maxsize(metaslab_t *);
+uint64_t metaslab_largest_allocatable(metaslab_t *);
 
 /*
  * metaslab alloc flags
@@ -107,7 +107,7 @@
 boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int, int,
     zio_t *, int);
 void metaslab_class_throttle_unreserve(metaslab_class_t *, int, int, zio_t *);
-
+void metaslab_class_evict_old(metaslab_class_t *, uint64_t);
 uint64_t metaslab_class_get_alloc(metaslab_class_t *);
 uint64_t metaslab_class_get_space(metaslab_class_t *);
 uint64_t metaslab_class_get_dspace(metaslab_class_t *);
@@ -130,7 +130,8 @@
 void metaslab_group_alloc_verify(spa_t *, const blkptr_t *, void *, int);
 void metaslab_recalculate_weight_and_sort(metaslab_t *);
 void metaslab_disable(metaslab_t *);
-void metaslab_enable(metaslab_t *, boolean_t);
+void metaslab_enable(metaslab_t *, boolean_t, boolean_t);
+void metaslab_set_selected_txg(metaslab_t *, uint64_t);
 
 extern int metaslab_debug_load;
 
--- a/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h	Thu Nov 14 23:30:04 2019 +0000
@@ -36,6 +36,7 @@
 #include <sys/vdev.h>
 #include <sys/txg.h>
 #include <sys/avl.h>
+#include <sys/multilist.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -194,6 +195,12 @@
 	uint64_t		mc_space;	/* total space (alloc + free) */
 	uint64_t		mc_dspace;	/* total deflated space */
 	uint64_t		mc_histogram[RANGE_TREE_HISTOGRAM_SIZE];
+
+	/*
+	 * List of all loaded metaslabs in the class, sorted in order of most
+	 * recent use.
+	 */
+	multilist_t		*mc_metaslab_txg_list;
 };
 
 /*
@@ -378,6 +385,7 @@
 	range_tree_t	*ms_allocating[TXG_SIZE];
 	range_tree_t	*ms_allocatable;
 	uint64_t	ms_allocated_this_txg;
+	uint64_t	ms_allocating_total;
 
 	/*
 	 * The following range trees are accessed only from syncing context.
@@ -475,7 +483,13 @@
 	 * stay cached.
 	 */
 	uint64_t	ms_selected_txg;
-	uint64_t	ms_loaded_txg;	/* track when metaslab was loaded */
+	/*
+	 * ms_load/unload_time can be used for performance monitoring
+	 * (e.g. by dtrace or mdb).
+	 */
+	hrtime_t	ms_load_time;	/* time last loaded */
+	hrtime_t	ms_unload_time;	/* time last unloaded */
+	hrtime_t	ms_selected_time; /* time last allocated from */
 
 	uint64_t	ms_max_size;	/* maximum allocatable size	*/
 
@@ -495,12 +509,17 @@
 	 * segment sizes.
 	 */
 	avl_tree_t	ms_allocatable_by_size;
+	avl_tree_t	ms_unflushed_frees_by_size;
 	uint64_t	ms_lbas[MAX_LBAS];
 
 	metaslab_group_t *ms_group;	/* metaslab group		*/
 	avl_node_t	ms_group_node;	/* node in metaslab group tree	*/
 	txg_node_t	ms_txg_node;	/* per-txg dirty metaslab links	*/
 	avl_node_t	ms_spa_txg_node; /* node in spa_metaslabs_by_txg */
+	/*
+	 * Node in metaslab class's selected txg list
+	 */
+	multilist_node_t	ms_class_txg_node;
 
 	/*
 	 * Allocs and frees that are committed to the vdev log spacemap but
--- a/usr/src/uts/common/fs/zfs/sys/range_tree.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/sys/range_tree.h	Thu Nov 14 23:30:04 2019 +0000
@@ -88,6 +88,8 @@
 range_tree_t *range_tree_create(range_tree_ops_t *ops, void *arg);
 void range_tree_destroy(range_tree_t *rt);
 boolean_t range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size);
+boolean_t range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size,
+    uint64_t *ostart, uint64_t *osize);
 void range_tree_verify_not_present(range_tree_t *rt,
     uint64_t start, uint64_t size);
 range_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size);
--- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h	Thu Nov 14 23:30:04 2019 +0000
@@ -332,7 +332,7 @@
 extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp,
     vattr_t *vap);
 extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
-    znode_t *dzp, char *name, uint64_t foid);
+    znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked);
 #define	ZFS_NO_OBJECT	0	/* no object id */
 extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
     znode_t *dzp, znode_t *zp, char *name);
--- a/usr/src/uts/common/fs/zfs/vdev_initialize.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/vdev_initialize.c	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2016, 2019 by Delphix. All rights reserved.
  */
 
 #include <sys/spa.h>
@@ -474,6 +474,7 @@
 	for (uint64_t i = 0; !vd->vdev_detached &&
 	    i < vd->vdev_top->vdev_ms_count; i++) {
 		metaslab_t *msp = vd->vdev_top->vdev_ms[i];
+		boolean_t unload_when_done = B_FALSE;
 
 		/*
 		 * If we've expanded the top-level vdev or it's our
@@ -487,14 +488,16 @@
 		spa_config_exit(spa, SCL_CONFIG, FTAG);
 		metaslab_disable(msp);
 		mutex_enter(&msp->ms_lock);
-		VERIFY0(metaslab_load(msp, spa_syncing_txg(spa)));
+		if (!msp->ms_loaded && !msp->ms_loading)
+			unload_when_done = B_TRUE;
+		VERIFY0(metaslab_load(msp));
 
 		range_tree_walk(msp->ms_allocatable, vdev_initialize_range_add,
 		    vd);
 		mutex_exit(&msp->ms_lock);
 
 		error = vdev_initialize_ranges(vd, deadbeef);
-		metaslab_enable(msp, B_TRUE);
+		metaslab_enable(msp, B_TRUE, unload_when_done);
 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 		range_tree_vacate(vd->vdev_initialize_tree, NULL, NULL);
--- a/usr/src/uts/common/fs/zfs/vdev_trim.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/vdev_trim.c	Thu Nov 14 23:30:04 2019 +0000
@@ -850,7 +850,7 @@
 		 */
 		if (msp->ms_sm == NULL && vd->vdev_trim_partial) {
 			mutex_exit(&msp->ms_lock);
-			metaslab_enable(msp, B_FALSE);
+			metaslab_enable(msp, B_FALSE, B_FALSE);
 			spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 			vdev_trim_calculate_progress(vd);
 			continue;
@@ -862,7 +862,7 @@
 		mutex_exit(&msp->ms_lock);
 
 		error = vdev_trim_ranges(&ta);
-		metaslab_enable(msp, B_TRUE);
+		metaslab_enable(msp, B_TRUE, B_FALSE);
 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 		range_tree_vacate(ta.trim_tree, NULL, NULL);
@@ -1167,7 +1167,7 @@
 			if (msp->ms_sm == NULL ||
 			    range_tree_is_empty(msp->ms_trim)) {
 				mutex_exit(&msp->ms_lock);
-				metaslab_enable(msp, B_FALSE);
+				metaslab_enable(msp, B_FALSE, B_FALSE);
 				continue;
 			}
 
@@ -1183,7 +1183,7 @@
 			 */
 			if (msp->ms_disabled > 1) {
 				mutex_exit(&msp->ms_lock);
-				metaslab_enable(msp, B_FALSE);
+				metaslab_enable(msp, B_FALSE, B_FALSE);
 				continue;
 			}
 
@@ -1302,7 +1302,7 @@
 			range_tree_vacate(trim_tree, NULL, NULL);
 			range_tree_destroy(trim_tree);
 
-			metaslab_enable(msp, issued_trim);
+			metaslab_enable(msp, issued_trim, B_FALSE);
 			spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 			for (uint64_t c = 0; c < children; c++) {
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Thu Nov 14 23:30:04 2019 +0000
@@ -5668,7 +5668,7 @@
 	objset_t *os = NULL;
 	int error;
 
-	error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
+	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error != 0)
 		return (error);
 
--- a/usr/src/uts/common/fs/zfs/zfs_log.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/zfs_log.c	Thu Nov 14 23:30:04 2019 +0000
@@ -354,12 +354,14 @@
 	zil_itx_assign(zilog, itx, tx);
 }
 
+void zil_remove_async(zilog_t *zilog, uint64_t oid);
+
 /*
  * Handles both TX_REMOVE and TX_RMDIR transactions.
  */
 void
 zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
-    znode_t *dzp, char *name, uint64_t foid)
+    znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked)
 {
 	itx_t *itx;
 	lr_remove_t *lr;
@@ -375,6 +377,17 @@
 
 	itx->itx_oid = foid;
 
+	/*
+	 * Object ids can be re-instantiated in the next txg so
+	 * remove any async transactions to avoid future leaks.
+	 * This can happen if a fsync occurs on the re-instantiated
+	 * object for a WR_INDIRECT or WR_NEED_COPY write, which gets
+	 * the new file data and flushes a write record for the old object.
+	 */
+	if (unlinked) {
+		ASSERT((txtype & ~TX_CI) == TX_REMOVE);
+		zil_remove_async(zilog, foid);
+	}
 	zil_itx_assign(zilog, itx, tx);
 }
 
--- a/usr/src/uts/common/fs/zfs/zfs_replay.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/zfs_replay.c	Thu Nov 14 23:30:04 2019 +0000
@@ -335,8 +335,8 @@
 	xva.xva_vattr.va_nblocks = lr->lr_gen;
 	xva.xva_vattr.va_fsid = dnodesize;
 
-	error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
-	if (error != ENOENT)
+	error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT);
+	if (error)
 		goto bail;
 
 	if (lr->lr_common.lrc_txtype & TX_CI)
@@ -469,8 +469,8 @@
 	xva.xva_vattr.va_nblocks = lr->lr_gen;
 	xva.xva_vattr.va_fsid = dnodesize;
 
-	error = dmu_object_info(zfsvfs->z_os, objid, NULL);
-	if (error != ENOENT)
+	error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT);
+	if (error)
 		goto out;
 
 	if (lr->lr_common.lrc_txtype & TX_CI)
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c	Thu Nov 14 23:30:04 2019 +0000
@@ -1918,7 +1918,7 @@
 	txtype = TX_REMOVE;
 	if (flags & FIGNORECASE)
 		txtype |= TX_CI;
-	zfs_log_remove(zilog, tx, txtype, dzp, name, obj);
+	zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
 
 	dmu_tx_commit(tx);
 out:
@@ -2234,7 +2234,8 @@
 		uint64_t txtype = TX_RMDIR;
 		if (flags & FIGNORECASE)
 			txtype |= TX_CI;
-		zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT);
+		zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT,
+		    B_FALSE);
 	}
 
 	dmu_tx_commit(tx);
--- a/usr/src/uts/common/fs/zfs/zil.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/fs/zfs/zil.c	Thu Nov 14 23:30:04 2019 +0000
@@ -1749,7 +1749,7 @@
 /*
  * Remove all async itx with the given oid.
  */
-static void
+void
 zil_remove_async(zilog_t *zilog, uint64_t oid)
 {
 	uint64_t otxg, txg;
@@ -1802,16 +1802,6 @@
 	itxs_t *itxs, *clean = NULL;
 
 	/*
-	 * Object ids can be re-instantiated in the next txg so
-	 * remove any async transactions to avoid future leaks.
-	 * This can happen if a fsync occurs on the re-instantiated
-	 * object for a WR_INDIRECT or WR_NEED_COPY write, which gets
-	 * the new file data and flushes a write record for the old object.
-	 */
-	if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_REMOVE)
-		zil_remove_async(zilog, itx->itx_oid);
-
-	/*
 	 * Ensure the data of a renamed file is committed before the rename.
 	 */
 	if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_RENAME)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/i40e/core/README.illumos	Thu Nov 14 23:30:04 2019 +0000
@@ -0,0 +1,91 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+This directory contains files extracted from the Intel ixl-1.6.10 driver for
+FreeBSD with the following modifications/differences.  The following two
+changes each modified the common code.
+
+9805 i40e should read SFP data when firmware supports it
+9601 Divide by zero in i40e_get_available_resources()
+
+The following diff was originally applied to add support for Studio and the
+32-bit kernel:
+
+--- ixl-1.6.10/src/i40e_common.c
++++ illumos-gate/usr/src/uts/common/io/i40e/core/i40e_common.c
+@@ -4037,8 +4037,8 @@
+ 
+ 	cmd->type = mib_type;
+ 	cmd->length = CPU_TO_LE16(buff_size);
+-	cmd->address_high = CPU_TO_LE32(I40E_HI_WORD((u64)buff));
+-	cmd->address_low =  CPU_TO_LE32(I40E_LO_DWORD((u64)buff));
++	cmd->address_high = CPU_TO_LE32(I40E_HI_WORD((uintptr_t)buff));
++	cmd->address_low =  CPU_TO_LE32(I40E_LO_DWORD((uintptr_t)buff));
+ 
+ 	status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+ 	return status;
+@@ -6585,9 +6585,9 @@
+ 	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_proxy_config);
+ 
+ 	desc.params.external.addr_high =
+-				  CPU_TO_LE32(I40E_HI_DWORD((u64)proxy_config));
++				  CPU_TO_LE32(I40E_HI_DWORD((uintptr_t)proxy_config));
+ 	desc.params.external.addr_low =
+-				  CPU_TO_LE32(I40E_LO_DWORD((u64)proxy_config));
++				  CPU_TO_LE32(I40E_LO_DWORD((uintptr_t)proxy_config));
+ 
+ 	status = i40e_asq_send_command(hw, &desc, proxy_config,
+ 				       sizeof(struct i40e_aqc_arp_proxy_data),
+@@ -6619,9 +6619,9 @@
+ 				i40e_aqc_opc_set_ns_proxy_table_entry);
+ 
+ 	desc.params.external.addr_high =
+-		CPU_TO_LE32(I40E_HI_DWORD((u64)ns_proxy_table_entry));
++		CPU_TO_LE32(I40E_HI_DWORD((uintptr_t)ns_proxy_table_entry));
+ 	desc.params.external.addr_low =
+-		CPU_TO_LE32(I40E_LO_DWORD((u64)ns_proxy_table_entry));
++		CPU_TO_LE32(I40E_LO_DWORD((uintptr_t)ns_proxy_table_entry));
+ 
+ 	status = i40e_asq_send_command(hw, &desc, ns_proxy_table_entry,
+ 				       sizeof(struct i40e_aqc_ns_proxy_data),
+@@ -6681,8 +6681,8 @@
+ 		valid_flags |= I40E_AQC_SET_WOL_FILTER_NO_TCO_ACTION_VALID;
+ 	cmd->valid_flags = CPU_TO_LE16(valid_flags);
+ 
+-	cmd->address_high = CPU_TO_LE32(I40E_HI_DWORD((u64)filter));
+-	cmd->address_low = CPU_TO_LE32(I40E_LO_DWORD((u64)filter));
++	cmd->address_high = CPU_TO_LE32(I40E_HI_DWORD((uintptr_t)filter));
++	cmd->address_low = CPU_TO_LE32(I40E_LO_DWORD((uintptr_t)filter));
+ 
+ 	status = i40e_asq_send_command(hw, &desc, filter,
+ 				       buff_len, cmd_details);
+--- ixl-1.6.10/src/i40e_register.h
++++ illumos-gate/usr/src/uts/common/io/i40e/core/i40e_register.h
+@@ -113,7 +113,7 @@
+ #define I40E_PF_ATQLEN_ATQCRIT_SHIFT   30
+ #define I40E_PF_ATQLEN_ATQCRIT_MASK    I40E_MASK(0x1, I40E_PF_ATQLEN_ATQCRIT_SHIFT)
+ #define I40E_PF_ATQLEN_ATQENABLE_SHIFT 31
+-#define I40E_PF_ATQLEN_ATQENABLE_MASK  I40E_MASK(0x1, I40E_PF_ATQLEN_ATQENABLE_SHIFT)
++#define I40E_PF_ATQLEN_ATQENABLE_MASK  I40E_MASK(0x1UL, I40E_PF_ATQLEN_ATQENABLE_SHIFT)
+ #define I40E_PF_ATQT            0x00080400 /* Reset: EMPR */
+ #define I40E_PF_ATQT_ATQT_SHIFT 0
+ #define I40E_PF_ATQT_ATQT_MASK  I40E_MASK(0x3FF, I40E_PF_ATQT_ATQT_SHIFT)
+--- ixl-1.6.10/src/i40e_type.h
++++ illumos-gate/usr/src/uts/common/io/i40e/core/i40e_type.h
+@@ -49,7 +49,7 @@
+ 
+ #ifndef I40E_MASK
+ /* I40E_MASK is a macro used on 32 bit registers */
+-#define I40E_MASK(mask, shift) (mask << shift)
++#define	I40E_MASK(mask, shift) (((uint32_t)(mask)) << ((uint32_t)(shift)))
+ #endif
+ 
+ #define I40E_MAX_PF			16
--- a/usr/src/uts/common/io/i40e/core/i40e_common.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/io/i40e/core/i40e_common.c	Thu Nov 14 23:30:04 2019 +0000
@@ -3823,14 +3823,16 @@
 	/* count the enabled ports (aka the "not disabled" ports) */
 	hw->num_ports = 0;
 	for (i = 0; i < 4; i++) {
-		u32 port_cfg_reg = I40E_PRTGEN_CNF + (4 * i);
+		enum i40e_status_code status;
+		u32 port_cfg_reg = I40E_PRTGEN_STATUS + (4 * i);
 		u64 port_cfg = 0;
 
 		/* use AQ read to get the physical register offset instead
 		 * of the port relative offset
 		 */
-		i40e_aq_debug_read_register(hw, port_cfg_reg, &port_cfg, NULL);
-		if (!(port_cfg & I40E_PRTGEN_CNF_PORT_DIS_MASK))
+		status = i40e_aq_debug_read_register(hw, port_cfg_reg, &port_cfg, NULL);
+		if ((status == I40E_SUCCESS) &&
+		    (port_cfg & I40E_PRTGEN_STATUS_PORT_VALID_MASK))
 			hw->num_ports++;
 	}
 
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c	Thu Nov 14 23:30:04 2019 +0000
@@ -505,8 +505,8 @@
 	hca_guid = hca_devp->hd_hca_attr->hca_node_guid;
 	mutex_exit(&ibtl_clnt_list_mutex);
 
-	ibt_status = ((ibtl_node_info_cb_t)mgrp->mgr_async_handler)(hca_guid,
-	    port, sm_lid, &node_info);
+	ibt_status = ((ibtl_node_info_cb_t)(uintptr_t)
+	    mgrp->mgr_async_handler)(hca_guid, port, sm_lid, &node_info);
 	if (ibt_status == IBT_SUCCESS) {
 		if ((node_info.n_vendor_id == IBT_VENDOR_CISCO) &&
 		    (node_info.n_node_type == IBT_NODE_TYPE_SWITCH)) {
@@ -740,7 +740,7 @@
 		if ((code == IBT_PORT_CHANGE_EVENT) &&
 		    eventp->ev_port_flags & IBT_PORT_CHANGE_SM_LID)
 			ibtl_cm_get_node_info(hca_devp,
-			    (ibt_async_handler_t)ibtl_node_info_cb);
+			    (ibt_async_handler_t)(uintptr_t)ibtl_node_info_cb);
 		/* wait for node info task to complete */
 		while (hca_devp->hd_async_task_cnt != 0)
 			cv_wait(&hca_devp->hd_async_task_cv,
--- a/usr/src/uts/common/io/mac/mac.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/io/mac/mac.c	Thu Nov 14 23:30:04 2019 +0000
@@ -2373,7 +2373,7 @@
 minor_t
 mac_minor_hold(boolean_t sleep)
 {
-	minor_t	minor;
+	id_t id;
 
 	/*
 	 * Grab a value from the arena.
@@ -2381,16 +2381,14 @@
 	atomic_inc_32(&minor_count);
 
 	if (sleep)
-		minor = (uint_t)id_alloc(minor_ids);
-	else
-		minor = (uint_t)id_alloc_nosleep(minor_ids);
-
-	if (minor == 0) {
+		return ((uint_t)id_alloc(minor_ids));
+
+	if ((id = id_alloc_nosleep(minor_ids)) == -1) {
 		atomic_dec_32(&minor_count);
 		return (0);
 	}
 
-	return (minor);
+	return ((uint_t)id);
 }
 
 /*
--- a/usr/src/uts/common/io/mac/mac_datapath_setup.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c	Thu Nov 14 23:30:04 2019 +0000
@@ -1705,10 +1705,8 @@
 	bzero(&mrf, sizeof (mac_rx_fifo_t));
 	mrf.mrf_type = MAC_RX_FIFO;
 	mrf.mrf_receive = (mac_receive_t)mac_soft_ring_poll;
-	mrf.mrf_intr_enable =
-	    (mac_intr_enable_t)mac_soft_ring_intr_enable;
-	mrf.mrf_intr_disable =
-	    (mac_intr_disable_t)mac_soft_ring_intr_disable;
+	mrf.mrf_intr_enable = (mac_intr_enable_t)mac_soft_ring_intr_enable;
+	mrf.mrf_intr_disable = (mac_intr_disable_t)mac_soft_ring_intr_disable;
 	mrf.mrf_flow_priority = pri;
 
 	softring = mac_soft_ring_create(id, mac_soft_ring_worker_wait,
--- a/usr/src/uts/common/io/mac/mac_soft_ring.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/io/mac/mac_soft_ring.c	Thu Nov 14 23:30:04 2019 +0000
@@ -498,7 +498,7 @@
  * Enabling is allow the processing thread to send packets to the
  * client while disabling does the opposite.
  */
-void
+int
 mac_soft_ring_intr_enable(void *arg)
 {
 	mac_soft_ring_t *ringp = (mac_soft_ring_t *)arg;
@@ -507,6 +507,7 @@
 	if (ringp->s_ring_first != NULL)
 		mac_soft_ring_worker_wakeup(ringp);
 	mutex_exit(&ringp->s_ring_lock);
+	return (0);
 }
 
 boolean_t
--- a/usr/src/uts/common/io/nvme/nvme.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/io/nvme/nvme.c	Thu Nov 14 23:30:04 2019 +0000
@@ -201,6 +201,14 @@
  * device.
  *
  *
+ * DDI UFM Support
+ *
+ * The driver supports the DDI UFM framework for reporting information about
+ * the device's firmware image and slot configuration. This data can be
+ * queried by userland software via ioctls to the ufm driver. For more
+ * information, see ddi_ufm(9E).
+ *
+ *
  * Driver Configuration:
  *
  * The following driver properties can be changed to control some aspects of the
@@ -247,6 +255,7 @@
 #include <sys/conf.h>
 #include <sys/devops.h>
 #include <sys/ddi.h>
+#include <sys/ddi_ufm.h>
 #include <sys/sunddi.h>
 #include <sys/sunndi.h>
 #include <sys/bitmap.h>
@@ -386,10 +395,24 @@
 
 static void nvme_prepare_devid(nvme_t *, uint32_t);
 
+/* DDI UFM callbacks */
+static int nvme_ufm_fill_image(ddi_ufm_handle_t *, void *, uint_t,
+    ddi_ufm_image_t *);
+static int nvme_ufm_fill_slot(ddi_ufm_handle_t *, void *, uint_t, uint_t,
+    ddi_ufm_slot_t *);
+static int nvme_ufm_getcaps(ddi_ufm_handle_t *, void *, ddi_ufm_cap_t *);
+
 static int nvme_open(dev_t *, int, int, cred_t *);
 static int nvme_close(dev_t, int, int, cred_t *);
 static int nvme_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 
+static ddi_ufm_ops_t nvme_ufm_ops = {
+	NULL,
+	nvme_ufm_fill_image,
+	nvme_ufm_fill_slot,
+	nvme_ufm_getcaps
+};
+
 #define	NVME_MINOR_INST_SHIFT	9
 #define	NVME_MINOR(inst, nsid)	(((inst) << NVME_MINOR_INST_SHIFT) | (nsid))
 #define	NVME_MINOR_INST(minor)	((minor) >> NVME_MINOR_INST_SHIFT)
@@ -3352,6 +3375,18 @@
 		goto fail;
 
 	/*
+	 * Initialize the driver with the UFM subsystem
+	 */
+	if (ddi_ufm_init(dip, DDI_UFM_CURRENT_VERSION, &nvme_ufm_ops,
+	    &nvme->n_ufmh, nvme) != 0) {
+		dev_err(dip, CE_WARN, "!failed to initialize UFM subsystem");
+		goto fail;
+	}
+	mutex_init(&nvme->n_fwslot_mutex, NULL, MUTEX_DRIVER, NULL);
+	ddi_ufm_update(nvme->n_ufmh);
+	nvme->n_progress |= NVME_UFM_INIT;
+
+	/*
 	 * Attach the blkdev driver for each namespace.
 	 */
 	for (i = 0; i != nvme->n_namespace_count; i++) {
@@ -3444,6 +3479,10 @@
 		kmem_free(nvme->n_ns, sizeof (nvme_namespace_t) *
 		    nvme->n_namespace_count);
 	}
+	if (nvme->n_progress & NVME_UFM_INIT) {
+		ddi_ufm_fini(nvme->n_ufmh);
+		mutex_destroy(&nvme->n_fwslot_mutex);
+	}
 
 	if (nvme->n_progress & NVME_INTERRUPTS)
 		nvme_release_interrupts(nvme);
@@ -4365,6 +4404,18 @@
 	return (rv);
 }
 
+static void
+nvme_ufm_update(nvme_t *nvme)
+{
+	mutex_enter(&nvme->n_fwslot_mutex);
+	ddi_ufm_update(nvme->n_ufmh);
+	if (nvme->n_fwslot != NULL) {
+		kmem_free(nvme->n_fwslot, sizeof (nvme_fwslot_log_t));
+		nvme->n_fwslot = NULL;
+	}
+	mutex_exit(&nvme->n_fwslot_mutex);
+}
+
 static int
 nvme_ioctl_firmware_download(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc,
     int mode, cred_t *cred_p)
@@ -4417,6 +4468,12 @@
 		len -= copylen;
 	}
 
+	/*
+	 * Let the DDI UFM subsystem know that the firmware information for
+	 * this device has changed.
+	 */
+	nvme_ufm_update(nvme);
+
 	return (rv);
 }
 
@@ -4465,6 +4522,12 @@
 
 	nioc->n_arg = ((uint64_t)cqe.cqe_sf.sf_sct << 16) | cqe.cqe_sf.sf_sc;
 
+	/*
+	 * Let the DDI UFM subsystem know that the firmware information for
+	 * this device has changed.
+	 */
+	nvme_ufm_update(nvme);
+
 	return (rv);
 }
 
@@ -4578,3 +4641,90 @@
 
 	return (rv);
 }
+
+/*
+ * DDI UFM Callbacks
+ */
+static int
+nvme_ufm_fill_image(ddi_ufm_handle_t *ufmh, void *arg, uint_t imgno,
+    ddi_ufm_image_t *img)
+{
+	nvme_t *nvme = arg;
+
+	if (imgno != 0)
+		return (EINVAL);
+
+	ddi_ufm_image_set_desc(img, "Firmware");
+	ddi_ufm_image_set_nslots(img, nvme->n_idctl->id_frmw.fw_nslot);
+
+	return (0);
+}
+
+/*
+ * Fill out firmware slot information for the requested slot.  The firmware
+ * slot information is gathered by requesting the Firmware Slot Information log
+ * page.  The format of the page is described in section 5.10.1.3.
+ *
+ * We lazily cache the log page on the first call and then invalidate the cache
+ * data after a successful firmware download or firmware commit command.
+ * The cached data is protected by a mutex as the state can change
+ * asynchronous to this callback.
+ */
+static int
+nvme_ufm_fill_slot(ddi_ufm_handle_t *ufmh, void *arg, uint_t imgno,
+    uint_t slotno, ddi_ufm_slot_t *slot)
+{
+	nvme_t *nvme = arg;
+	void *log = NULL;
+	size_t bufsize;
+	ddi_ufm_attr_t attr = 0;
+	char fw_ver[NVME_FWVER_SZ + 1];
+	int ret;
+
+	if (imgno > 0 || slotno > (nvme->n_idctl->id_frmw.fw_nslot - 1))
+		return (EINVAL);
+
+	mutex_enter(&nvme->n_fwslot_mutex);
+	if (nvme->n_fwslot == NULL) {
+		ret = nvme_get_logpage(nvme, B_TRUE, &log, &bufsize,
+		    NVME_LOGPAGE_FWSLOT, 0);
+		if (ret != DDI_SUCCESS ||
+		    bufsize != sizeof (nvme_fwslot_log_t)) {
+			if (log != NULL)
+				kmem_free(log, bufsize);
+			mutex_exit(&nvme->n_fwslot_mutex);
+			return (EIO);
+		}
+		nvme->n_fwslot = (nvme_fwslot_log_t *)log;
+	}
+
+	/*
+	 * NVMe numbers firmware slots starting at 1
+	 */
+	if (slotno == (nvme->n_fwslot->fw_afi - 1))
+		attr |= DDI_UFM_ATTR_ACTIVE;
+
+	if (slotno != 0 || nvme->n_idctl->id_frmw.fw_readonly == 0)
+		attr |= DDI_UFM_ATTR_WRITEABLE;
+
+	if (nvme->n_fwslot->fw_frs[slotno][0] == '\0') {
+		attr |= DDI_UFM_ATTR_EMPTY;
+	} else {
+		(void) strncpy(fw_ver, nvme->n_fwslot->fw_frs[slotno],
+		    NVME_FWVER_SZ);
+		fw_ver[NVME_FWVER_SZ] = '\0';
+		ddi_ufm_slot_set_version(slot, fw_ver);
+	}
+	mutex_exit(&nvme->n_fwslot_mutex);
+
+	ddi_ufm_slot_set_attrs(slot, attr);
+
+	return (0);
+}
+
+static int
+nvme_ufm_getcaps(ddi_ufm_handle_t *ufmh, void *arg, ddi_ufm_cap_t *caps)
+{
+	*caps = DDI_UFM_CAP_REPORT;
+	return (0);
+}
--- a/usr/src/uts/common/io/nvme/nvme_var.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/io/nvme/nvme_var.h	Thu Nov 14 23:30:04 2019 +0000
@@ -12,7 +12,7 @@
 /*
  * Copyright 2018 Nexenta Systems, Inc.
  * Copyright 2016 The MathWorks, Inc. All rights reserved.
- * Copyright 2017 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  * Copyright 2019 Western Digital Corporation.
  */
 
@@ -38,6 +38,7 @@
 #define	NVME_ADMIN_QUEUE		0x4
 #define	NVME_CTRL_LIMITS		0x8
 #define	NVME_INTERRUPTS			0x10
+#define	NVME_UFM_INIT			0x20
 
 #define	NVME_MIN_ADMIN_QUEUE_LEN	16
 #define	NVME_MIN_IO_QUEUE_LEN		16
@@ -242,6 +243,12 @@
 	uint32_t n_vendor_event;
 	uint32_t n_unknown_event;
 
+	/* DDI UFM handle */
+	ddi_ufm_handle_t *n_ufmh;
+	/* Cached Firmware Slot Information log page */
+	nvme_fwslot_log_t *n_fwslot;
+	/* Lock protecting the cached firmware slot info */
+	kmutex_t n_fwslot_mutex;
 };
 
 struct nvme_namespace {
--- a/usr/src/uts/common/io/sata/adapters/ahci/ahci.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/io/sata/adapters/ahci/ahci.c	Thu Nov 14 23:30:04 2019 +0000
@@ -82,7 +82,6 @@
  */
 
 #include <sys/note.h>
-#include <sys/debug.h>
 #include <sys/scsi/scsi.h>
 #include <sys/pci.h>
 #include <sys/disp.h>
@@ -10669,8 +10668,6 @@
 		return (B_TRUE);
 	}
 
-	ahci_ctlp->ahcictl_em_flags |= AHCI_EM_PRESENT;
-
 	ahci_ctlp->ahcictl_em_tx_off = ((ahci_ctlp->ahcictl_em_loc &
 	    AHCI_HBA_EM_LOC_OFST_MASK) >> AHCI_HBA_EM_LOC_OFST_SHIFT) * 4;
 	ahci_ctlp->ahcictl_em_tx_off += ahci_ctlp->ahcictl_ahci_addr;
@@ -10689,7 +10686,7 @@
 	}
 
 	mutex_enter(&ahci_ctlp->ahcictl_mutex);
-	ahci_ctlp->ahcictl_em_flags |= AHCI_EM_RESETTING;
+	ahci_ctlp->ahcictl_em_flags |= AHCI_EM_PRESENT | AHCI_EM_RESETTING;
 	mutex_exit(&ahci_ctlp->ahcictl_mutex);
 	(void) ddi_taskq_dispatch(ahci_ctlp->ahcictl_em_taskq, ahci_em_reset,
 	    ahci_ctlp, DDI_SLEEP);
@@ -10703,6 +10700,10 @@
 	int i;
 	ahci_ioc_em_get_t get;
 
+	if ((ahci_ctlp->ahcictl_em_flags & AHCI_EM_PRESENT) == 0) {
+		return (ENOTSUP);
+	}
+
 	bzero(&get, sizeof (get));
 	get.aiemg_nports = ahci_ctlp->ahcictl_ports_implemented;
 	if ((ahci_ctlp->ahcictl_em_ctl & AHCI_HBA_EM_CTL_ATTR_ALHD) == 0) {
@@ -10757,6 +10758,10 @@
 		return (EINVAL);
 	}
 
+	if ((ahci_ctlp->ahcictl_em_flags & AHCI_EM_PRESENT) == 0) {
+		return (ENOTSUP);
+	}
+
 	if ((set.aiems_leds & AHCI_EM_LED_ACTIVITY_DISABLE) != 0 &&
 	    ((ahci_ctlp->ahcictl_em_ctl & AHCI_HBA_EM_CTL_ATTR_ALHD) != 0)) {
 		return (ENOTSUP);
@@ -10844,25 +10849,34 @@
 ahci_em_quiesce(ahci_ctl_t *ahci_ctlp)
 {
 	ASSERT(ahci_ctlp->ahcictl_em_flags & AHCI_EM_PRESENT);
-	VERIFY(mutex_owned(&ahci_ctlp->ahcictl_mutex));
-
+
+	mutex_enter(&ahci_ctlp->ahcictl_mutex);
+	if ((ahci_ctlp->ahcictl_em_flags & AHCI_EM_PRESENT) == 0) {
+		mutex_exit(&ahci_ctlp->ahcictl_mutex);
+		return;
+	}
 	ahci_ctlp->ahcictl_em_flags |= AHCI_EM_QUIESCE;
+	mutex_exit(&ahci_ctlp->ahcictl_mutex);
 	ddi_taskq_wait(ahci_ctlp->ahcictl_em_taskq);
 }
 
 static void
 ahci_em_suspend(ahci_ctl_t *ahci_ctlp)
 {
-	VERIFY(mutex_owned(&ahci_ctlp->ahcictl_mutex));
-
 	ahci_em_quiesce(ahci_ctlp);
+	mutex_enter(&ahci_ctlp->ahcictl_mutex);
 	ahci_ctlp->ahcictl_em_flags &= ~AHCI_EM_READY;
+	mutex_exit(&ahci_ctlp->ahcictl_mutex);
 }
 
 static void
 ahci_em_resume(ahci_ctl_t *ahci_ctlp)
 {
 	mutex_enter(&ahci_ctlp->ahcictl_mutex);
+	if ((ahci_ctlp->ahcictl_em_flags & AHCI_EM_PRESENT) == 0) {
+		mutex_exit(&ahci_ctlp->ahcictl_mutex);
+		return;
+	}
 	ahci_ctlp->ahcictl_em_flags |= AHCI_EM_RESETTING;
 	mutex_exit(&ahci_ctlp->ahcictl_mutex);
 
@@ -10877,9 +10891,7 @@
 		return;
 	}
 
-	mutex_enter(&ahci_ctlp->ahcictl_mutex);
 	ahci_em_quiesce(ahci_ctlp);
-	mutex_exit(&ahci_ctlp->ahcictl_mutex);
 
 	ddi_taskq_destroy(ahci_ctlp->ahcictl_em_taskq);
 	ahci_ctlp->ahcictl_em_taskq = NULL;
--- a/usr/src/uts/common/io/usb/scsa2usb/scsa2usb.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/io/usb/scsa2usb/scsa2usb.c	Thu Nov 14 23:30:04 2019 +0000
@@ -24,6 +24,7 @@
  *
  * Copyright 2016 Joyent, Inc.
  * Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright 2019 Joshua M. Clulow <josh@sysmgr.org>
  */
 
 
@@ -150,9 +151,11 @@
 void		scsa2usb_close_usb_pipes(scsa2usb_state_t *);
 
 static void	scsa2usb_fill_up_cdb_len(scsa2usb_cmd_t *, int);
-static void	scsa2usb_fill_up_cdb_lba(scsa2usb_cmd_t *, int);
+static void	scsa2usb_fill_up_cdb_lba(scsa2usb_cmd_t *, uint64_t);
+static void	scsa2usb_fill_up_g4_cdb_lba(scsa2usb_cmd_t *, uint64_t);
 static void	scsa2usb_fill_up_ReadCD_cdb_len(scsa2usb_cmd_t *, int, int);
 static void	scsa2usb_fill_up_12byte_cdb_len(scsa2usb_cmd_t *, int, int);
+static void	scsa2usb_fill_up_16byte_cdb_len(scsa2usb_cmd_t *, int, int);
 static int	scsa2usb_read_cd_blk_size(uchar_t);
 int		scsa2usb_rw_transport(scsa2usb_state_t *, struct scsi_pkt *);
 void		scsa2usb_setup_next_xfer(scsa2usb_state_t *, scsa2usb_cmd_t *);
@@ -242,6 +245,8 @@
 	"\135sendcuesheet",
 	"\136prin",
 	"\137prout",
+	"\210read16",
+	"\212write16",
 	"\241blankcd",
 	"\245playaudio12",
 	"\250read12",
@@ -477,7 +482,7 @@
 	ddi_prop_op,		/* prop_op */
 	NULL,			/* stream */
 	D_MP,			/* cb_flag */
-	CB_REV, 		/* rev */
+	CB_REV,			/* rev */
 	nodev,			/* int (*cb_aread)() */
 	nodev			/* int (*cb_awrite)() */
 };
@@ -627,7 +632,7 @@
 	usb_ep_data_t		*ep_data;
 	usb_client_dev_data_t	*dev_data;
 	usb_alt_if_data_t	*altif_data;
-	usb_ugen_info_t 	usb_ugen_info;
+	usb_ugen_info_t		usb_ugen_info;
 
 	USB_DPRINTF_L4(DPRINT_MASK_SCSA, NULL,
 	    "scsa2usb_attach: dip = 0x%p", (void *)dip);
@@ -663,7 +668,7 @@
 		return (DDI_FAILURE);
 	}
 
-	scsa2usbp->scsa2usb_dip 	= dip;
+	scsa2usbp->scsa2usb_dip		= dip;
 	scsa2usbp->scsa2usb_instance	= instance;
 
 	/* allocate a log handle for debug/error messages */
@@ -698,7 +703,7 @@
 		    dev_data->dev_iblock_cookie);
 	}
 	mutex_enter(&scsa2usbp->scsa2usb_mutex);
-	scsa2usbp->scsa2usb_dip 	= dip;
+	scsa2usbp->scsa2usb_dip		= dip;
 	scsa2usbp->scsa2usb_instance	= instance;
 	scsa2usbp->scsa2usb_attrs	= SCSA2USB_ALL_ATTRS;
 	scsa2usbp->scsa2usb_dev_data	= dev_data;
@@ -3375,6 +3380,8 @@
 	case SCMD_WRITE:
 	case SCMD_READ_G1:
 	case SCMD_WRITE_G1:
+	case SCMD_READ_G4:
+	case SCMD_WRITE_G4:
 	case SCMD_READ_G5:
 	case SCMD_WRITE_G5:
 	case SCMD_READ_LONG:
@@ -3719,12 +3726,13 @@
 	case SCMD_WRITE:
 	case SCMD_READ_G1:
 	case SCMD_WRITE_G1:
+	case SCMD_READ_G4:
+	case SCMD_WRITE_G4:
 	case SCMD_READ_G5:
 	case SCMD_WRITE_G5:
 	case SCMD_READ_LONG:
 	case SCMD_WRITE_LONG:
 	case SCMD_READ_CD:
-
 		return (scsa2usb_rw_transport(scsa2usbp, pkt));
 
 	case SCMD_TEST_UNIT_READY:
@@ -3811,7 +3819,8 @@
 scsa2usb_rw_transport(scsa2usb_state_t *scsa2usbp, struct scsi_pkt *pkt)
 {
 	scsa2usb_cmd_t *cmd = PKT2CMD(pkt);
-	int lba, dir, opcode;
+	int dir, opcode;
+	uint64_t lba;
 	struct buf *bp = cmd->cmd_bp;
 	size_t len, xfer_count;
 	size_t blk_size;	/* calculate the block size to be used */
@@ -3823,7 +3832,7 @@
 	ASSERT(mutex_owned(&scsa2usbp->scsa2usb_mutex));
 
 	opcode = pkt->pkt_cdbp[0];
-	blk_size  = scsa2usbp->scsa2usb_lbasize[pkt->pkt_address.a_lun];
+	blk_size = scsa2usbp->scsa2usb_lbasize[pkt->pkt_address.a_lun];
 						/* set to default */
 
 	switch (opcode) {
@@ -3854,8 +3863,9 @@
 		lba = SCSA2USB_LBA_10BYTE(pkt);
 		len = SCSA2USB_LEN_10BYTE(pkt);
 		dir = USB_EP_DIR_OUT;
-		if (len) {
-			sz = SCSA2USB_CDRW_BLKSZ(bp ? bp->b_bcount : 0, len);
+		if (len > 0) {
+			sz = SCSA2USB_CDRW_BLKSZ(bp != NULL ?
+			    bp->b_bcount : 0, len);
 			if (SCSA2USB_VALID_CDRW_BLKSZ(sz)) {
 				blk_size = sz;	/* change it accordingly */
 			}
@@ -3869,6 +3879,16 @@
 		/* Figure out the block size */
 		blk_size = scsa2usb_read_cd_blk_size(pkt->pkt_cdbp[1] >> 2);
 		break;
+	case SCMD_READ_G4:
+		lba = SCSA2USB_LBA_16BYTE(pkt);
+		len = SCSA2USB_LEN_16BYTE(pkt);
+		dir = USB_EP_DIR_IN;
+		break;
+	case SCMD_WRITE_G4:
+		lba = SCSA2USB_LBA_16BYTE(pkt);
+		len = SCSA2USB_LEN_16BYTE(pkt);
+		dir = USB_EP_DIR_OUT;
+		break;
 	case SCMD_READ_G5:
 		lba = SCSA2USB_LBA_12BYTE(pkt);
 		len = SCSA2USB_LEN_12BYTE(pkt);
@@ -3884,8 +3904,8 @@
 	cmd->cmd_total_xfercount = xfer_count = len * blk_size;
 
 	/* reduce xfer count if necessary */
-	if (blk_size &&
-	    (xfer_count > scsa2usbp->scsa2usb_max_bulk_xfer_size)) {
+	if (blk_size != 0 &&
+	    xfer_count > scsa2usbp->scsa2usb_max_bulk_xfer_size) {
 		/*
 		 * For CD-RW devices reduce the xfer count based
 		 * on the block size used by these devices. The
@@ -3898,13 +3918,13 @@
 		 * The len part of the cdb changes as a result of that.
 		 */
 		if (SCSA2USB_VALID_CDRW_BLKSZ(blk_size)) {
-			xfer_count = ((scsa2usbp->scsa2usb_max_bulk_xfer_size/
-			    blk_size) * blk_size);
-			len = xfer_count/blk_size;
+			xfer_count = (scsa2usbp->scsa2usb_max_bulk_xfer_size /
+			    blk_size) * blk_size;
+			len = xfer_count / blk_size;
 			xfer_count = blk_size * len;
 		} else {
 			xfer_count = scsa2usbp->scsa2usb_max_bulk_xfer_size;
-			len = xfer_count/blk_size;
+			len = xfer_count / blk_size;
 		}
 	}
 
@@ -3921,19 +3941,25 @@
 	case SCMD_READ_CD:
 		bcopy(pkt->pkt_cdbp, &cmd->cmd_cdb, cmd->cmd_cdblen);
 		scsa2usb_fill_up_ReadCD_cdb_len(cmd, len, CDB_GROUP5);
+		scsa2usb_fill_up_cdb_lba(cmd, lba);
+		break;
+	case SCMD_WRITE_G4:
+	case SCMD_READ_G4:
+		scsa2usb_fill_up_16byte_cdb_len(cmd, len, CDB_GROUP4);
+		scsa2usb_fill_up_g4_cdb_lba(cmd, lba);
 		break;
 	case SCMD_WRITE_G5:
 	case SCMD_READ_G5:
 		scsa2usb_fill_up_12byte_cdb_len(cmd, len, CDB_GROUP5);
+		scsa2usb_fill_up_cdb_lba(cmd, lba);
 		break;
 	default:
 		scsa2usb_fill_up_cdb_len(cmd, len);
 		cmd->cmd_actual_len = CDB_GROUP1;
+		scsa2usb_fill_up_cdb_lba(cmd, lba);
 		break;
 	}
 
-	scsa2usb_fill_up_cdb_lba(cmd, lba);
-
 	USB_DPRINTF_L3(DPRINT_MASK_SCSA, scsa2usbp->scsa2usb_log_handle,
 	    "bcount=0x%lx lba=0x%x len=0x%lx xfercount=0x%lx total=0x%lx",
 	    bp ? bp->b_bcount : 0, lba, len, cmd->cmd_xfercount,
@@ -4011,10 +4037,23 @@
 		/* calculate lba = current_lba + len_of_prev_cmd */
 		cmd->cmd_lba += (cmd->cmd_cdb[6] << 16) +
 		    (cmd->cmd_cdb[7] << 8) + cmd->cmd_cdb[8];
-		cdb_len = xfer_len/cmd->cmd_blksize;
+		cdb_len = xfer_len / cmd->cmd_blksize;
 		cmd->cmd_cdb[SCSA2USB_READ_CD_LEN_2] = (uchar_t)cdb_len;
 		/* re-adjust xfer count */
 		cmd->cmd_xfercount = cdb_len * cmd->cmd_blksize;
+		scsa2usb_fill_up_cdb_lba(cmd, cmd->cmd_lba);
+		break;
+	case SCMD_WRITE_G4:
+	case SCMD_READ_G4:
+		/* calculate lba = current_lba + len_of_prev_cmd */
+		cmd->cmd_lba += (cmd->cmd_cdb[10] << 24) +
+		    (cmd->cmd_cdb[11] << 16) + (cmd->cmd_cdb[12] << 8) +
+		    cmd->cmd_cdb[13];
+		if (blk_size != 0) {
+			xfer_len /= blk_size;
+		}
+		scsa2usb_fill_up_16byte_cdb_len(cmd, xfer_len, CDB_GROUP5);
+		scsa2usb_fill_up_g4_cdb_lba(cmd, cmd->cmd_lba);
 		break;
 	case SCMD_WRITE_G5:
 	case SCMD_READ_G5:
@@ -4022,10 +4061,11 @@
 		cmd->cmd_lba += (cmd->cmd_cdb[6] << 24) +
 		    (cmd->cmd_cdb[7] << 16) + (cmd->cmd_cdb[8] << 8) +
 		    cmd->cmd_cdb[9];
-		if (blk_size) {
+		if (blk_size != 0) {
 			xfer_len /= blk_size;
 		}
 		scsa2usb_fill_up_12byte_cdb_len(cmd, xfer_len, CDB_GROUP5);
+		scsa2usb_fill_up_cdb_lba(cmd, cmd->cmd_lba);
 		break;
 	case SCMD_WRITE_G1:
 	case SCMD_WRITE_LONG:
@@ -4034,22 +4074,23 @@
 		if (SCSA2USB_VALID_CDRW_BLKSZ(cmd->cmd_blksize)) {
 			blk_size = cmd->cmd_blksize;
 		}
-		cdb_len = xfer_len/blk_size;
+		cdb_len = xfer_len / blk_size;
 		scsa2usb_fill_up_cdb_len(cmd, cdb_len);
 		/* re-adjust xfer count */
 		cmd->cmd_xfercount = cdb_len * blk_size;
+		scsa2usb_fill_up_cdb_lba(cmd, cmd->cmd_lba);
 		break;
 	default:
-		if (blk_size) {
+		if (blk_size != 0) {
 			xfer_len /= blk_size;
 		}
 		scsa2usb_fill_up_cdb_len(cmd, xfer_len);
-		cmd->cmd_lba += scsa2usbp->scsa2usb_max_bulk_xfer_size/blk_size;
+		cmd->cmd_lba += scsa2usbp->scsa2usb_max_bulk_xfer_size /
+		    blk_size;
+		scsa2usb_fill_up_cdb_lba(cmd, cmd->cmd_lba);
+		break;
 	}
 
-	/* fill in the lba */
-	scsa2usb_fill_up_cdb_lba(cmd, cmd->cmd_lba);
-
 	USB_DPRINTF_L4(DPRINT_MASK_SCSA, scsa2usbp->scsa2usb_log_handle,
 	    "scsa2usb_setup_next_xfer:\n\tlba = 0x%x xfer_len = 0x%x "
 	    "xfercount = 0x%lx total = 0x%lx", cmd->cmd_lba, xfer_len,
@@ -4694,7 +4735,7 @@
  *	fill up command CDBs' LBA part
  */
 static void
-scsa2usb_fill_up_cdb_lba(scsa2usb_cmd_t *cmd, int lba)
+scsa2usb_fill_up_cdb_lba(scsa2usb_cmd_t *cmd, uint64_t lba)
 {
 	/* zero cdb1, lba bits so they won't get copied in the new cdb */
 	cmd->cmd_cdb[SCSA2USB_LUN] &= 0xE0;
@@ -4707,6 +4748,27 @@
 
 
 /*
+ * scsa2usb_fill_up_g4_cdb_lba:
+ *	fill in the CDB for a Group 4 command (16-byte CDB)
+ */
+static void
+scsa2usb_fill_up_g4_cdb_lba(scsa2usb_cmd_t *cmd, uint64_t lba)
+{
+	/* zero cdb1, lba bits so they won't get copied in the new cdb */
+	cmd->cmd_cdb[SCSA2USB_LUN] &= 0xE0;
+	cmd->cmd_cdb[2] = lba >> 56;
+	cmd->cmd_cdb[3] = lba >> 48;
+	cmd->cmd_cdb[4] = lba >> 40;
+	cmd->cmd_cdb[5] = lba >> 32;
+	cmd->cmd_cdb[6] = lba >> 24;
+	cmd->cmd_cdb[7] = lba >> 16;
+	cmd->cmd_cdb[8] = lba >> 8;
+	cmd->cmd_cdb[9] = lba;
+	cmd->cmd_lba = lba;
+}
+
+
+/*
  * scsa2usb_fill_up_ReadCD_cdb_len:
  *	fill up READ_CD command CDBs' len part
  */
@@ -4721,6 +4783,21 @@
 
 
 /*
+ * scsa2usb_fill_up_16byte_cdb_len:
+ *	populate CDB length field for SCMD_WRITE_G4 and SCMD_READ_G4
+ */
+static void
+scsa2usb_fill_up_16byte_cdb_len(scsa2usb_cmd_t *cmd, int len, int actual_len)
+{
+	cmd->cmd_cdb[10] = len >> 24;
+	cmd->cmd_cdb[11] = len >> 16;
+	cmd->cmd_cdb[12] = len >> 8;
+	cmd->cmd_cdb[13] = len;
+	cmd->cmd_actual_len = actual_len;
+}
+
+
+/*
  * scsa2usb_fill_up_12byte_cdb_len:
  *	fill up generic 12-byte command CDBs' len part
  */
--- a/usr/src/uts/common/io/usb/usba/usbai.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/io/usb/usba/usbai.c	Thu Nov 14 23:30:04 2019 +0000
@@ -1040,7 +1040,7 @@
 		}
 	}
 	if (ddi_add_event_handler(dip, usba_device->rm_cookie,
-	    (peh_t)disconnect_event_handler,
+	    (peh_t)(uintptr_t)disconnect_event_handler,
 	    NULL, &evdata->ev_rm_cb_id) != DDI_SUCCESS) {
 		USB_DPRINTF_L2(DPRINT_MASK_USBAI, usbai_log_handle,
 		    "usb_register_hotplug_cbs: add disconnect handler failed");
@@ -1058,7 +1058,7 @@
 		}
 	}
 	if (ddi_add_event_handler(dip, usba_device->ins_cookie,
-	    (peh_t)reconnect_event_handler,
+	    (peh_t)(uintptr_t)reconnect_event_handler,
 	    NULL, &evdata->ev_ins_cb_id) != DDI_SUCCESS) {
 		USB_DPRINTF_L2(DPRINT_MASK_USBAI, usbai_log_handle,
 		    "usb_register_hotplug_cbs: add reconnect handler failed");
@@ -1129,7 +1129,7 @@
 			}
 		}
 		if (ddi_add_event_handler(dip, usba_device->rm_cookie,
-		    (peh_t)usb_evdata->disconnect_event_handler,
+		    (peh_t)(uintptr_t)usb_evdata->disconnect_event_handler,
 		    NULL, &evdata->ev_rm_cb_id) != DDI_SUCCESS) {
 
 			goto fail;
@@ -1144,7 +1144,7 @@
 			}
 		}
 		if (ddi_add_event_handler(dip, usba_device->ins_cookie,
-		    (peh_t)usb_evdata->reconnect_event_handler,
+		    (peh_t)(uintptr_t)usb_evdata->reconnect_event_handler,
 		    NULL, &evdata->ev_ins_cb_id) != DDI_SUCCESS) {
 
 			goto fail;
@@ -1159,7 +1159,7 @@
 			}
 		}
 		if (ddi_add_event_handler(dip, usba_device->resume_cookie,
-		    (peh_t)usb_evdata->post_resume_event_handler,
+		    (peh_t)(uintptr_t)usb_evdata->post_resume_event_handler,
 		    NULL, &evdata->ev_resume_cb_id) != DDI_SUCCESS) {
 
 			goto fail;
@@ -1174,7 +1174,7 @@
 			}
 		}
 		if (ddi_add_event_handler(dip, usba_device->suspend_cookie,
-		    (peh_t)usb_evdata->pre_suspend_event_handler,
+		    (peh_t)(uintptr_t)usb_evdata->pre_suspend_event_handler,
 		    NULL, &evdata->ev_suspend_cb_id) != DDI_SUCCESS) {
 
 			goto fail;
--- a/usr/src/uts/common/os/ddi_ufm.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/os/ddi_ufm.c	Thu Nov 14 23:30:04 2019 +0000
@@ -184,7 +184,10 @@
 		if (ret != 0)
 			goto cache_fail;
 
-		ASSERT(img->ufmi_desc != NULL && img->ufmi_nslots != 0);
+		if (img->ufmi_desc == NULL || img->ufmi_nslots == 0) {
+			ret = EIO;
+			goto cache_fail;
+		}
 
 		img->ufmi_slots =
 		    kmem_zalloc((sizeof (ddi_ufm_slot_t) * img->ufmi_nslots),
--- a/usr/src/uts/common/rpc/clnt_cots.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/rpc/clnt_cots.c	Thu Nov 14 23:30:04 2019 +0000
@@ -22,6 +22,7 @@
 /*
  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
  */
 
 /*
@@ -623,6 +624,7 @@
 	 * The zalloc initialized the fields below.
 	 * p->cku_xid = 0;
 	 * p->cku_flags = 0;
+	 * p->cku_srcaddr.buf = NULL;
 	 * p->cku_srcaddr.len = 0;
 	 * p->cku_srcaddr.maxlen = 0;
 	 */
@@ -1579,8 +1581,7 @@
 	p->cku_cred = cred;
 
 	if (p->cku_addr.maxlen < addr->len) {
-		if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL)
-			kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
+		kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
 		p->cku_addr.buf = kmem_zalloc(addr->maxlen, KM_SLEEP);
 		p->cku_addr.maxlen = addr->maxlen;
 	}
@@ -1933,10 +1934,9 @@
 			 * a later retry.
 			 */
 			if (srcaddr->len != lru_entry->x_src.len) {
-				if (srcaddr->len > 0)
-					kmem_free(srcaddr->buf,
-					    srcaddr->maxlen);
-				srcaddr->buf = kmem_zalloc(
+				kmem_free(srcaddr->buf, srcaddr->maxlen);
+				ASSERT(lru_entry->x_src.len != 0);
+				srcaddr->buf = kmem_alloc(
 				    lru_entry->x_src.len, KM_SLEEP);
 				srcaddr->maxlen = srcaddr->len =
 				    lru_entry->x_src.len;
@@ -2091,7 +2091,7 @@
 	cm_entry = (struct cm_xprt *)
 	    kmem_zalloc(sizeof (struct cm_xprt), KM_SLEEP);
 
-	cm_entry->x_server.buf = kmem_zalloc(destaddr->len, KM_SLEEP);
+	cm_entry->x_server.buf = kmem_alloc(destaddr->len, KM_SLEEP);
 	bcopy(destaddr->buf, cm_entry->x_server.buf, destaddr->len);
 	cm_entry->x_server.len = cm_entry->x_server.maxlen = destaddr->len;
 
@@ -2256,9 +2256,11 @@
 	/*
 	 * Set up a transport entry in the connection manager's list.
 	 */
-	cm_entry->x_src.buf = kmem_zalloc(srcaddr->len, KM_SLEEP);
-	bcopy(srcaddr->buf, cm_entry->x_src.buf, srcaddr->len);
-	cm_entry->x_src.len = cm_entry->x_src.maxlen = srcaddr->len;
+	if (srcaddr->len > 0) {
+		cm_entry->x_src.buf = kmem_alloc(srcaddr->len, KM_SLEEP);
+		bcopy(srcaddr->buf, cm_entry->x_src.buf, srcaddr->len);
+		cm_entry->x_src.len = cm_entry->x_src.maxlen = srcaddr->len;
+	} /* Else kmem_zalloc() of cm_entry already sets its x_src to NULL. */
 
 	cm_entry->x_tiptr = tiptr;
 	cm_entry->x_time = ddi_get_lbolt();
@@ -2438,12 +2440,11 @@
 		 * in case of a later retry.
 		 */
 		if (srcaddr->len != cm_entry->x_src.len) {
-			if (srcaddr->maxlen > 0)
-				kmem_free(srcaddr->buf, srcaddr->maxlen);
-			srcaddr->buf = kmem_zalloc(cm_entry->x_src.len,
+			kmem_free(srcaddr->buf, srcaddr->maxlen);
+			ASSERT(cm_entry->x_src.len != 0);
+			srcaddr->buf = kmem_alloc(cm_entry->x_src.len,
 			    KM_SLEEP);
-			srcaddr->maxlen = srcaddr->len =
-			    cm_entry->x_src.len;
+			srcaddr->maxlen = srcaddr->len = cm_entry->x_src.len;
 		}
 		bcopy(cm_entry->x_src.buf, srcaddr->buf, srcaddr->len);
 	}
@@ -2565,10 +2566,8 @@
 	cv_destroy(&cm_entry->x_conn_cv);
 	cv_destroy(&cm_entry->x_dis_cv);
 
-	if (cm_entry->x_server.buf != NULL)
-		kmem_free(cm_entry->x_server.buf, cm_entry->x_server.maxlen);
-	if (cm_entry->x_src.buf != NULL)
-		kmem_free(cm_entry->x_src.buf, cm_entry->x_src.maxlen);
+	kmem_free(cm_entry->x_server.buf, cm_entry->x_server.maxlen);
+	kmem_free(cm_entry->x_src.buf, cm_entry->x_src.maxlen);
 	kmem_free(cm_entry, sizeof (struct cm_xprt));
 }
 
@@ -2631,11 +2630,11 @@
 	queue_t			*wq,
 	struct netbuf		*addr,
 	int			addrfmly,
-	calllist_t 		*e,
-	int 			*tidu_ptr,
-	bool_t 			reconnect,
-	const struct timeval 	*waitp,
-	bool_t 			nosignal,
+	calllist_t		*e,
+	int			*tidu_ptr,
+	bool_t			reconnect,
+	const struct timeval	*waitp,
+	bool_t			nosignal,
 	cred_t			*cr)
 {
 	mblk_t *mp;
--- a/usr/src/uts/common/smbsrv/smb_door.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/smbsrv/smb_door.h	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #ifndef _SMBSRV_SMB_DOOR_H
@@ -70,7 +70,9 @@
 	SMB_DR_DFS_GET_REFERRALS,
 	SMB_DR_SHR_HOSTACCESS,
 	SMB_DR_SHR_EXEC,
-	SMB_DR_NOTIFY_DC_CHANGED
+	SMB_DR_NOTIFY_DC_CHANGED,
+	SMB_DR_LOOKUP_LSID,
+	SMB_DR_LOOKUP_LNAME
 } smb_dopcode_t;
 
 struct smb_event;
--- a/usr/src/uts/common/smbsrv/smb_ktypes.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/smbsrv/smb_ktypes.h	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
  */
 
 /*
@@ -1012,6 +1012,8 @@
 #define	SMB_USER_PRIV_BACKUP		(1<<17)	/* SE_BACKUP_LUID */
 #define	SMB_USER_PRIV_RESTORE		(1<<18)	/* SE_RESTORE_LUID */
 #define	SMB_USER_PRIV_CHANGE_NOTIFY	(1<<23)	/* SE_CHANGE_NOTIFY_LUID */
+#define	SMB_USER_PRIV_READ_FILE		(1<<25)	/* SE_READ_FILE_LUID */
+#define	SMB_USER_PRIV_WRITE_FILE	(1<<26)	/* SE_WRITE_FILE_LUID */
 
 /*
  * See the long "User State Machine" comment in smb_user.c
--- a/usr/src/uts/common/smbsrv/smb_privilege.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/smbsrv/smb_privilege.h	Thu Nov 14 23:30:04 2019 +0000
@@ -22,7 +22,7 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
  */
 
 #ifndef _SMB_PRIVILEGE_H
@@ -97,6 +97,8 @@
 #define	SE_SYSTEM_ENVIRONMENT_NAME	"SeSystemEnvironmentPrivilege"
 #define	SE_CHANGE_NOTIFY_NAME		"SeChangeNotifyPrivilege"
 #define	SE_REMOTE_SHUTDOWN_NAME		"SeRemoteShutdownPrivilege"
+#define	SE_READ_FILE_NAME		"BypassAclRead"
+#define	SE_WRITE_FILE_NAME		"BypassAclWrite"
 
 #define	SE_MIN_LUID			2
 #define	SE_CREATE_TOKEN_LUID		2
@@ -122,7 +124,9 @@
 #define	SE_SYSTEM_ENVIRONMENT_LUID	22
 #define	SE_CHANGE_NOTIFY_LUID		23
 #define	SE_REMOTE_SHUTDOWN_LUID		24
-#define	SE_MAX_LUID			24
+#define	SE_READ_FILE_LUID		25
+#define	SE_WRITE_FILE_LUID		26
+#define	SE_MAX_LUID			26
 
 /*
  * Privilege attributes
--- a/usr/src/uts/common/sys/ccompile.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/sys/ccompile.h	Thu Nov 14 23:30:04 2019 +0000
@@ -25,6 +25,7 @@
  */
 /*
  * Copyright 2015 EveryCity Ltd. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #ifndef	_SYS_CCOMPILE_H
@@ -135,6 +136,12 @@
 
 #endif	/* __ATTRIBUTE_IMPLEMENTED || __GNUC__ */
 
+#if __GNUC_VERSION >= 40100
+#define	__sentinel(__n)	__attribute__((__sentinel__(__n)))
+#else
+#define	__sentinel(__n)
+#endif
+
 /*
  * Shorthand versions for readability
  */
--- a/usr/src/uts/common/sys/mac_soft_ring.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/sys/mac_soft_ring.h	Thu Nov 14 23:30:04 2019 +0000
@@ -691,7 +691,7 @@
 extern void mac_update_srs_priority(mac_soft_ring_set_t *, pri_t);
 extern void mac_client_update_classifier(mac_client_impl_t *, boolean_t);
 
-extern void mac_soft_ring_intr_enable(void *);
+extern int mac_soft_ring_intr_enable(void *);
 extern boolean_t mac_soft_ring_intr_disable(void *);
 extern mac_soft_ring_t *mac_soft_ring_create(int, clock_t, uint16_t,
     pri_t, mac_client_impl_t *, mac_soft_ring_set_t *,
--- a/usr/src/uts/common/sys/nvme.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/sys/nvme.h	Thu Nov 14 23:30:04 2019 +0000
@@ -436,13 +436,22 @@
 	uint8_t hl_rsvd2[512 - 192];
 } nvme_health_log_t;
 
+/*
+ * The NVMe spec allows for up to seven firmware slots.
+ */
+#define	NVME_MAX_FWSLOTS	7
+#define	NVME_FWVER_SZ		8
+
 typedef struct {
-	uint8_t fw_afi:3;		/* Active Firmware Slot */
+	/* Active Firmware Slot */
+	uint8_t fw_afi:3;
 	uint8_t fw_rsvd1:1;
-	uint8_t fw_next:3;		/* Next Active Firmware Slot */
+	/* Next Active Firmware Slot */
+	uint8_t fw_next:3;
 	uint8_t fw_rsvd2:1;
 	uint8_t fw_rsvd3[7];
-	char fw_frs[7][8];		/* Firmware Revision / Slot */
+	/* Firmware Revision / Slot */
+	char fw_frs[NVME_MAX_FWSLOTS][NVME_FWVER_SZ];
 	uint8_t fw_rsvd4[512 - 64];
 } nvme_fwslot_log_t;
 
--- a/usr/src/uts/common/sys/usb/scsa2usb/scsa2usb.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/sys/usb/scsa2usb/scsa2usb.h	Thu Nov 14 23:30:04 2019 +0000
@@ -22,6 +22,7 @@
  * Use is subject to license terms.
  *
  * Copyright 2019, Joyent, Inc.
+ * Copyright 2019 Joshua M. Clulow <josh@sysmgr.org>
  */
 
 #ifndef _SYS_USB_SCSA2USB_H
@@ -546,7 +547,7 @@
 	/* used in multiple xfers */
 	size_t			cmd_total_xfercount;	/* total xfer val */
 	size_t			cmd_offset;		/* offset into buf */
-	int			cmd_lba;		/* current xfer lba */
+	uint64_t		cmd_lba;		/* current xfer lba */
 	int			cmd_done;		/* command done? */
 	int			cmd_blksize;		/* block size */
 	usba_list_entry_t	cmd_waitQ;		/* waitQ element */
@@ -567,7 +568,9 @@
 #define	SCSA2USB_LEN_0		7		/* LEN[0] field */
 #define	SCSA2USB_LEN_1		8		/* LEN[1] field */
 
-/* macros to calculate LBA for 6/10/12-byte commands */
+/*
+ * Extract LBA and length from 6, 10, 12, and 16-byte commands:
+ */
 #define	SCSA2USB_LBA_6BYTE(pkt) \
 	(((pkt)->pkt_cdbp[1] & 0x1f) << 16) + \
 	((pkt)->pkt_cdbp[2] << 8) + (pkt)->pkt_cdbp[3]
@@ -586,9 +589,22 @@
 	((pkt)->pkt_cdbp[2] << 24) + ((pkt)->pkt_cdbp[3] << 16) + \
 	    ((pkt)->pkt_cdbp[4] << 8) +  (pkt)->pkt_cdbp[5]
 
+#define	SCSA2USB_LEN_16BYTE(pkt) \
+	(((pkt)->pkt_cdbp[10] << 24) + ((pkt)->pkt_cdbp[11] << 16) + \
+	    ((pkt)->pkt_cdbp[12] << 8) + (pkt)->pkt_cdbp[13])
+#define	SCSA2USB_LBA_16BYTE(pkt) ((uint64_t)( \
+	((uint64_t)(pkt)->pkt_cdbp[2] << 56) + \
+	((uint64_t)(pkt)->pkt_cdbp[3] << 48) + \
+	((uint64_t)(pkt)->pkt_cdbp[4] << 40) + \
+	((uint64_t)(pkt)->pkt_cdbp[5] << 32) + \
+	((uint64_t)(pkt)->pkt_cdbp[6] << 24) + \
+	((uint64_t)(pkt)->pkt_cdbp[7] << 16) + \
+	((uint64_t)(pkt)->pkt_cdbp[8] << 8) + \
+	((uint64_t)(pkt)->pkt_cdbp[9])))
+
 /* macros to convert a pkt to cmd and vice-versa */
 #define	PKT2CMD(pkt)		((scsa2usb_cmd_t *)(pkt)->pkt_ha_private)
-#define	CMD2PKT(sp)		((sp)->cmd_pkt
+#define	CMD2PKT(sp)		((sp)->cmd_pkt)
 
 /* bulk pipe default timeout value - how long the command to be tried? */
 #define	SCSA2USB_BULK_PIPE_TIMEOUT	(2 * USB_PIPE_TIMEOUT)
--- a/usr/src/uts/common/xen/io/evtchn_dev.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/common/xen/io/evtchn_dev.c	Thu Nov 14 23:30:04 2019 +0000
@@ -112,8 +112,8 @@
 static struct evtsoftdata *port_user[NR_EVENT_CHANNELS];
 static kmutex_t port_user_lock;
 
-void
-evtchn_device_upcall()
+uint_t
+evtchn_device_upcall(caddr_t arg __unused, caddr_t arg1 __unused)
 {
 	struct evtsoftdata *ep;
 	int port;
@@ -154,6 +154,7 @@
 
 done:
 	mutex_exit(&port_user_lock);
+	return (DDI_INTR_CLAIMED);
 }
 
 /* ARGSUSED */
--- a/usr/src/uts/i86pc/io/immu_intrmap.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/i86pc/io/immu_intrmap.c	Thu Nov 14 23:30:04 2019 +0000
@@ -63,7 +63,7 @@
 	    (p))
 
 typedef enum {
-	SVT_NO_VERIFY = 0, 	/* no verification */
+	SVT_NO_VERIFY = 0,	/* no verification */
 	SVT_ALL_VERIFY,		/* using sid and sq to verify */
 	SVT_BUS_VERIFY,		/* verify #startbus and #endbus */
 	SVT_RSVD
@@ -224,7 +224,7 @@
 		}
 	}
 
-	return (INTRMAP_IDX_FULL);  		/* no free index */
+	return (INTRMAP_IDX_FULL);		/* no free index */
 }
 
 /* alloc one interrupt remapping table entry */
@@ -495,11 +495,12 @@
 
 /*
  * immu_intr_handler()
- * 	the fault event handler for a single immu unit
+ *	the fault event handler for a single immu unit
  */
-int
-immu_intr_handler(immu_t *immu)
+uint_t
+immu_intr_handler(caddr_t arg, caddr_t arg1 __unused)
 {
+	immu_t *immu = (immu_t *)arg;
 	uint32_t status;
 	int index, fault_reg_offset;
 	int max_fault_index;
@@ -995,10 +996,10 @@
 	    "%s-intr-handler", immu->immu_name);
 
 	(void) add_avintr((void *)NULL, IMMU_INTR_IPL,
-	    (avfunc)(immu_intr_handler), intr_handler_name, irq,
+	    immu_intr_handler, intr_handler_name, irq,
 	    (caddr_t)immu, NULL, NULL, NULL);
 
 	immu_regs_intr_enable(immu, msi_addr, msi_data, uaddr);
 
-	(void) immu_intr_handler(immu);
+	(void) immu_intr_handler((caddr_t)immu, NULL);
 }
--- a/usr/src/uts/i86pc/io/immu_regs.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/i86pc/io/immu_regs.c	Thu Nov 14 23:30:04 2019 +0000
@@ -253,7 +253,7 @@
 
 /*
  * set_immu_agaw()
- * 	calculate agaw for a IOMMU unit
+ *	calculate agaw for a IOMMU unit
  */
 static int
 set_agaw(immu_t *immu)
@@ -481,7 +481,7 @@
 	immu_regs_intr_enable(immu, immu->immu_regs_intr_msi_addr,
 	    immu->immu_regs_intr_msi_data, immu->immu_regs_intr_uaddr);
 
-	(void) immu_intr_handler(immu);
+	(void) immu_intr_handler((caddr_t)immu, NULL);
 
 	immu_regs_intrmap_enable(immu, immu->immu_intrmap_irta_reg);
 
@@ -638,7 +638,7 @@
 
 /*
  * immu_regs_cpu_flush()
- * 	flush the cpu cache line after CPU memory writes, so
+ *	flush the cpu cache line after CPU memory writes, so
  *      IOMMU can see the writes
  */
 void
--- a/usr/src/uts/i86pc/ml/kpti_trampolines.s	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/i86pc/ml/kpti_trampolines.s	Thu Nov 14 23:30:04 2019 +0000
@@ -9,7 +9,7 @@
  * http://www.illumos.org/license/CDDL.
  */
 /*
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  */
 
 /*
@@ -88,7 +88,7 @@
  * Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64
  * cases) in that they do not push an interrupt frame (and also have some other
  * effects). In the syscall trampolines, we assume that we can only be taking
- * the call from userland and use SWAPGS and an unconditional overwrite of %cr3.
+ * the call from userland and use swapgs and an unconditional overwrite of %cr3.
  * We do not do any stack pivoting for syscalls (and we leave SYSENTER's
  * existing %rsp pivot untouched) -- instead we spill registers into
  * %gs:CPU_KPTI_* as we need to.
@@ -503,7 +503,7 @@
 	pushq	%gs:CPU_KPTI_CS;		\
 	pushq	%gs:CPU_KPTI_RIP;		\
 	mov	%gs:CPU_KPTI_R13, %r13;		\
-	SWAPGS;					\
+	swapgs;					\
 	jmp	isr;				\
 	SET_SIZE(tr_/**/isr)
 
@@ -536,10 +536,9 @@
 	ENTRY_NP(tr_iret_user)
 #if DEBUG
 	/*
-	 * Ensure that we return to user land with CR0.TS clear. We do this
-	 * before we trampoline back and pivot the stack and %cr3. This way
-	 * we're still on the kernel stack and kernel %cr3, though we are on the
-	 * user GSBASE.
+	 * Panic if we find CR0.TS set. We're still on the kernel stack and
+	 * %cr3, but we do need to swap back to the kernel gs. (We don't worry
+	 * about swapgs speculation here.)
 	 */
 	pushq	%rax
 	mov	%cr0, %rax
@@ -559,14 +558,24 @@
 	cmpq	$1, kpti_enable
 	jne	1f
 
+	/*
+	 * KPTI enabled: we're on the user gsbase at this point, so we
+	 * need to swap back so we can pivot stacks.
+	 *
+	 * The swapgs lfence mitigation is probably not needed here
+	 * since a mis-speculation of the above branch would imply KPTI
+	 * is disabled, but we'll do so anyway.
+	 */
 	swapgs
+	lfence
 	mov	%r13, %gs:CPU_KPTI_R13
 	PIVOT_KPTI_STK(%r13)
 	SET_USER_CR3(%r13)
 	mov	%gs:CPU_KPTI_R13, %r13
-	/* Zero these to make sure they didn't leak from a kernel trap */
+	/* Zero these to make sure they didn't leak from a kernel trap. */
 	movq	$0, %gs:CPU_KPTI_R13
 	movq	$0, %gs:CPU_KPTI_R14
+	/* And back to user gsbase again. */
 	swapgs
 1:
 	iretq
--- a/usr/src/uts/i86pc/os/cpuid.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/i86pc/os/cpuid.c	Thu Nov 14 23:30:04 2019 +0000
@@ -910,6 +910,7 @@
  * more work in the system to mitigate against:
  *
  *   - Spectre v1
+ *   - swapgs (Spectre v1 variant)
  *   - Spectre v2
  *   - Meltdown (Spectre v3)
  *   - Rogue Register Read (Spectre v3a)
@@ -926,7 +927,7 @@
  * overall approach that the system has taken to address these as well as their
  * shortcomings. Unfortunately, not all of the above have been handled today.
  *
- * SPECTRE FAMILY (Spectre v2, ret2spec, SpectreRSB)
+ * SPECTRE v2, ret2spec, SpectreRSB
  *
  * The second variant of the spectre attack focuses on performing branch target
  * injection. This generally impacts indirect call instructions in the system.
@@ -1035,11 +1036,43 @@
  * it may make more sense to investigate using prediction barriers as the whole
  * system is only executing a single instruction at a time while in kmdb.
  *
- * SPECTRE FAMILY (v1, v4)
+ * SPECTRE v1, v4
  *
  * The v1 and v4 variants of spectre are not currently mitigated in the
  * system and require other classes of changes to occur in the code.
  *
+ * SPECTRE v1 (SWAPGS VARIANT)
+ *
+ * The class of Spectre v1 vulnerabilities aren't all about bounds checks, but
+ * can generally affect any branch-dependent code. The swapgs issue is one
+ * variant of this. If we are coming in from userspace, we can have code like
+ * this:
+ *
+ *	cmpw	$KCS_SEL, REGOFF_CS(%rsp)
+ *	je	1f
+ *	movq	$0, REGOFF_SAVFP(%rsp)
+ *	swapgs
+ *	1:
+ *	movq	%gs:CPU_THREAD, %rax
+ *
+ * If an attacker can cause a mis-speculation of the branch here, we could skip
+ * the needed swapgs, and use the /user/ %gsbase as the base of the %gs-based
+ * load. If subsequent code can act as the usual Spectre cache gadget, this
+ * would potentially allow KPTI bypass. To fix this, we need an lfence prior to
+ * any use of the %gs override.
+ *
+ * The other case is also an issue: if we're coming into a trap from kernel
+ * space, we could mis-speculate and swapgs the user %gsbase back in prior to
+ * using it. AMD systems are not vulnerable to this version, as a swapgs is
+ * serializing with respect to subsequent uses. But as AMD /does/ need the other
+ * case, and the fix is the same in both cases (an lfence at the branch target
+ * 1: in this example), we'll just do it unconditionally.
+ *
+ * Note that we don't enable user-space "wrgsbase" via CR4_FSGSBASE, making it
+ * harder for user-space to actually set a useful %gsbase value: although it's
+ * not clear, it might still be feasible via lwp_setprivate(), though, so we
+ * mitigate anyway.
+ *
  * MELTDOWN
  *
  * Meltdown, or spectre v3, allowed a user process to read any data in their
@@ -1159,12 +1192,13 @@
  * and what's done in various places:
  *
  *  - Spectre v1: Not currently mitigated
+ *  - swapgs: lfences after swapgs paths
  *  - Spectre v2: Retpolines/RSB Stuffing or EIBRS if HW support
  *  - Meltdown: Kernel Page Table Isolation
  *  - Spectre v3a: Updated CPU microcode
  *  - Spectre v4: Not currently mitigated
  *  - SpectreRSB: SMEP and RSB Stuffing
- *  - L1TF: spec_uarch_flush, smt exclusion, requires microcode
+ *  - L1TF: spec_uarch_flush, SMT exclusion, requires microcode
  *  - MDS: x86_md_clear, requires microcode, disabling hyper threading
  *
  * The following table indicates the x86 feature set bits that indicate that a
--- a/usr/src/uts/i86pc/sys/immu.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/i86pc/sys/immu.h	Thu Nov 14 23:30:04 2019 +0000
@@ -130,11 +130,11 @@
 	kmutex_t	dr_lock;   /* protects the dmar field */
 	struct immu	*dr_immu;
 	dev_info_t	*dr_dip;
-	uint16_t 	dr_seg;
-	uint64_t 	dr_regs;
+	uint16_t	dr_seg;
+	uint64_t	dr_regs;
 	boolean_t	dr_include_all;
-	list_t 		dr_scope_list;
-	list_node_t 	dr_node;
+	list_t		dr_scope_list;
+	list_node_t	dr_node;
 } drhd_t;
 
 typedef struct rmrr {
@@ -638,7 +638,7 @@
  * Enough space to hold the decimal number of any device instance.
  * Used for device/cache names.
  */
-#define	IMMU_ISTRLEN 	11	/* log10(2^31)  + 1 */
+#define	IMMU_ISTRLEN	11	/* log10(2^31)  + 1 */
 
 /* properties that control DVMA */
 #define	DDI_DVMA_MAPTYPE_ROOTNEX_PROP	"immu-dvma-mapping"
@@ -677,7 +677,7 @@
 	/* list node for list of domains off immu */
 	list_node_t		dom_immu_node;
 
-	mod_hash_t 		*dom_cookie_hash;
+	mod_hash_t		*dom_cookie_hash;
 
 	/* topmost device in domain; usually the device itself (non-shared) */
 	dev_info_t		*dom_dip;
@@ -944,7 +944,7 @@
 
 /* registers interrupt handler for IOMMU unit */
 void immu_intr_register(immu_t *immu);
-int immu_intr_handler(immu_t *immu);
+uint_t immu_intr_handler(caddr_t, caddr_t);
 
 
 /* immu_qinv.c interfaces */
--- a/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c	Thu Nov 14 23:30:04 2019 +0000
@@ -74,7 +74,7 @@
 #define	GCPU_XPV_MCH_POLL_NO_REARM	NULL
 
 static uint_t
-gcpu_xpv_virq_intr(void)
+gcpu_xpv_virq_intr(caddr_t arg __unused, caddr_t arg1 __unused)
 {
 	int types[] = { XEN_MC_URGENT, XEN_MC_NONURGENT };
 	uint64_t fetch_id;
@@ -194,7 +194,7 @@
 		 */
 		gcpu_xpv_virq_vect = ec_bind_virq_to_irq(VIRQ_MCA, 0);
 		(void) add_avintr(NULL, gcpu_xpv_virq_level,
-		    (avfunc)gcpu_xpv_virq_intr, "MCA", gcpu_xpv_virq_vect,
+		    gcpu_xpv_virq_intr, "MCA", gcpu_xpv_virq_vect,
 		    NULL, NULL, NULL, NULL);
 	}
 }
--- a/usr/src/uts/i86xpv/io/psm/xpv_psm.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/i86xpv/io/psm/xpv_psm.c	Thu Nov 14 23:30:04 2019 +0000
@@ -223,14 +223,13 @@
 }
 
 /* xen_psm NMI handler */
-/*ARGSUSED*/
-static void
-xen_psm_nmi_intr(caddr_t arg, struct regs *rp)
+static uint_t
+xen_psm_nmi_intr(caddr_t arg __unused, caddr_t arg1 __unused)
 {
 	xen_psm_num_nmis++;
 
 	if (!lock_try(&xen_psm_nmi_lock))
-		return;
+		return (DDI_INTR_UNCLAIMED);
 
 	if (xen_psm_kmdb_on_nmi && psm_debugger()) {
 		debug_enter("NMI received: entering kmdb\n");
@@ -247,6 +246,7 @@
 	}
 
 	lock_clear(&xen_psm_nmi_lock);
+	return (DDI_INTR_CLAIMED);
 }
 
 static void
@@ -294,7 +294,7 @@
 	/* add nmi handler - least priority nmi handler */
 	LOCK_INIT_CLEAR(&xen_psm_nmi_lock);
 
-	if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr,
+	if (!psm_add_nmintr(0, xen_psm_nmi_intr,
 	    "xVM_psm NMI handler", (caddr_t)NULL))
 		cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler");
 }
--- a/usr/src/uts/intel/amd64/ml/amd64.il	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/intel/amd64/ml/amd64.il	Thu Nov 14 23:30:04 2019 +0000
@@ -23,6 +23,10 @@
  * Use is subject to license terms.
  */
 
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
 /
 / In-line functions for amd64 kernels.
 /
@@ -189,34 +193,26 @@
         movw    %di, %gs
         .end
 
-	/*
-	 * OPTERON_ERRATUM_88 requires mfence
-	 */
-        .inline __swapgs, 0
-        mfence
-        swapgs
-	.end
-
 /*
  * prefetch 64 bytes
  */
 
- 	.inline	prefetch_read_many,8
+	.inline	prefetch_read_many,8
 	prefetcht0	(%rdi)
 	prefetcht0	32(%rdi)
 	.end
 
- 	.inline	prefetch_read_once,8
+	.inline	prefetch_read_once,8
 	prefetchnta	(%rdi)
 	prefetchnta	32(%rdi)
 	.end
 
- 	.inline	prefetch_write_many,8
+	.inline	prefetch_write_many,8
 	prefetcht0	(%rdi)
 	prefetcht0	32(%rdi)
 	.end
 
- 	.inline	prefetch_write_once,8
+	.inline	prefetch_write_once,8
 	prefetcht0	(%rdi)
 	prefetcht0	32(%rdi)
 	.end
--- a/usr/src/uts/intel/amd64/sys/privregs.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/intel/amd64/sys/privregs.h	Thu Nov 14 23:30:04 2019 +0000
@@ -24,6 +24,10 @@
  * Use is subject to license terms.
  */
 
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
 #ifndef	_AMD64_SYS_PRIVREGS_H
 #define	_AMD64_SYS_PRIVREGS_H
 
@@ -206,7 +210,8 @@
 	je	6f;				\
 	movq	$0, REGOFF_SAVFP(%rsp);		\
 	SWAPGS;					\
-6:	CLEAN_CS
+6:	lfence; /* swapgs mitigation */		\
+	CLEAN_CS
 
 #define	INTR_POP			\
 	leaq	sys_lcall32(%rip), %r11;\
@@ -216,8 +221,13 @@
 	cmpw	$KCS_SEL, REGOFF_CS(%rsp);\
 	je	8f;			\
 5:	SWAPGS;				\
-8:	addq	$REGOFF_RIP, %rsp
+8:	lfence; /* swapgs mitigation */	\
+	addq	$REGOFF_RIP, %rsp
 
+/*
+ * No need for swapgs mitigation: it's unconditional, and we're heading
+ * back to userspace.
+ */
 #define	USER_POP			\
 	__RESTORE_REGS;			\
 	SWAPGS;				\
--- a/usr/src/uts/intel/asm/cpu.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/intel/asm/cpu.h	Thu Nov 14 23:30:04 2019 +0000
@@ -172,17 +172,6 @@
 	    : "r" (value));
 }
 
-#if !defined(__xpv)
-
-extern __GNU_INLINE void
-__swapgs(void)
-{
-	__asm__ __volatile__(
-	    "mfence; swapgs");
-}
-
-#endif /* !__xpv */
-
 #endif	/* __amd64 */
 
 #endif	/* !__lint && __GNUC__ */
--- a/usr/src/uts/intel/ia32/ml/exception.s	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/intel/ia32/ml/exception.s	Thu Nov 14 23:30:04 2019 +0000
@@ -174,8 +174,9 @@
 	leaq	tr_brand_sys_sysenter(%rip), %r11
 	cmpq	%r11, 24(%rsp)
 	jne	2f
-1:	SWAPGS
-2:	popq	%r11
+1:	swapgs
+2:	lfence /* swapgs mitigation */
+	popq	%r11
 #endif	/* !__xpv */
 
 	INTR_PUSH
--- a/usr/src/uts/intel/ia32/os/sundep.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/intel/ia32/os/sundep.c	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  */
 
 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -551,16 +551,19 @@
 		 *
 		 * We've just mucked up the kernel's gsbase.  Oops.  In
 		 * particular we can't take any traps at all.  Make the newly
-		 * computed gsbase be the hidden gs via __swapgs, and fix
+		 * computed gsbase be the hidden gs via swapgs, and fix
 		 * the kernel's gsbase back again. Later, when we return to
 		 * userland we'll swapgs again restoring gsbase just loaded
 		 * above.
 		 */
-		__swapgs();
+		__asm__ __volatile__("mfence; swapgs");
+
 		rp->r_gs = pcb->pcb_gs;
 
 		/*
-		 * restore kernel's gsbase
+		 * Restore kernel's gsbase. Note that this also serializes any
+		 * attempted speculation from loading the user-controlled
+		 * %gsbase.
 		 */
 		wrmsr(MSR_AMD_GSBASE, kgsbase);
 
--- a/usr/src/uts/intel/io/ipmi/ipmi_main.c	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/intel/io/ipmi/ipmi_main.c	Thu Nov 14 23:30:04 2019 +0000
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2017 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  */
 
@@ -151,6 +151,7 @@
 {
 	minor_t minor;
 	ipmi_device_t *dev;
+	id_t mid;
 
 	if (ipmi_attached == B_FALSE)
 		return (ENXIO);
@@ -162,8 +163,9 @@
 	if (flag & FEXCL)
 		return (ENOTSUP);
 
-	if ((minor = (minor_t)id_alloc_nosleep(minor_ids)) == 0)
+	if ((mid = id_alloc_nosleep(minor_ids)) == -1)
 		return (ENODEV);
+	minor = (minor_t)mid;
 
 	/* Initialize the per file descriptor data. */
 	dev = kmem_zalloc(sizeof (ipmi_device_t), KM_SLEEP);
--- a/usr/src/uts/intel/kdi/kdi_asm.s	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/intel/kdi/kdi_asm.s	Thu Nov 14 23:30:04 2019 +0000
@@ -23,7 +23,7 @@
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  */
 
 /*
@@ -271,6 +271,9 @@
 	 * KDI_SAVE_REGS macro to prevent a usermode process's GSBASE from being
 	 * blown away.  On the hypervisor, we don't need to do this, since it's
 	 * ensured we're on our requested kernel GSBASE already.
+	 *
+	 * No need to worry about swapgs speculation here as it's unconditional
+	 * and via wrmsr anyway.
 	 */
 	subq	$10, %rsp
 	sgdt	(%rsp)
--- a/usr/src/uts/intel/sys/archsystm.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/intel/sys/archsystm.h	Thu Nov 14 23:30:04 2019 +0000
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #ifndef _SYS_ARCHSYSTM_H
@@ -94,10 +94,8 @@
 #endif
 extern void sys_sysenter();
 extern void tr_sys_sysenter();
-extern void _sys_sysenter_post_swapgs();
 extern void brand_sys_sysenter();
 extern void tr_brand_sys_sysenter();
-extern void _brand_sys_sysenter_post_swapgs();
 
 extern void dosyscall(void);
 
--- a/usr/src/uts/intel/sys/segments.h	Fri Nov 08 17:08:44 2019 +0100
+++ b/usr/src/uts/intel/sys/segments.h	Thu Nov 14 23:30:04 2019 +0000
@@ -2,7 +2,7 @@
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 /*
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #ifndef	_SYS_SEGMENTS_H
@@ -179,7 +179,6 @@
 extern void __set_es(selector_t);
 extern void __set_fs(selector_t);
 extern void __set_gs(selector_t);
-extern void __swapgs(void);
 #endif	/* __amd64 */
 
 #if defined(__amd64)