changeset 19229:9e104f9e3592

11820 upstream Nexenta iSCSI and COMSTAR fixes Portions contributed by: Albert Lee <trisk@nexenta.com> Portions contributed by: Alex Deiter <alex.deiter@nexenta.com> Portions contributed by: Bayard G. Bell <bayard.bell@nexenta.com> Portions contributed by: Dan Fields <dan.fields@nexenta.com> Portions contributed by: Dan McDonald <danmcd@nexenta.com> Portions contributed by: Dmitry Gromada <dmitry.gromada@nexenta.com> Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com> Portions contributed by: kcrowenex <kevin.crowe@nexenta.com> Portions contributed by: Matt Barden <matt.barden@nexenta.com> Portions contributed by: Michael Tsymbalyuk <michael.tsymbalyuk@nexenta.com> Portions contributed by: Rob Gittins <rob.gittins@nexenta.com> Portions contributed by: Steve Ma <steve.ma@nexenta.com> Portions contributed by: Tony Nguyen <tony.nguyen@nexenta.com> Portions contributed by: Yuri Pankov <yuri.pankov@nexenta.com> Reviewed by: Bayard Bell <bayard.bell@nexenta.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Dan Fields <dan.fields@nexenta.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Reviewed by: Evan Layton <evan.layton@nexenta.com> Reviewed by: Garrett D'Amore <garrett@damore.org> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Gordon Ross <gordon.ross@nexenta.com> Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com> Reviewed by: Jason King <jason.brian.king@gmail.com> Reviewed by: Jean McCormack <jean.mccormack@nexenta.com> Reviewed by: Josef Sipek <josef.sipek@nexenta.com> Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com> Reviewed by: Kody Kantor <kody.kantor@joyent.com> Reviewed by: Marcel Telka <marcel.telka@nexenta.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Max Grossman <max.grossman@delphix.com> Reviewed by: Rick McNeal <rick.mcneal@nexenta.com> Reviewed by: Rob Gittins <rob.gittins@nexenta.com> Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com> Reviewed by: Saso Kiselkov <skiselkov.ml@gmail.com> Reviewed by: Steve Peng <steve.peng@nexenta.com> Reviewed by: tim Jacobson <tim.jacobson@nexenta.com> Reviewed by: Tony Nguyen <tony.nguyen@nexenta.com> Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com> Approved by: Dan McDonald <danmcd@joyent.com>
author Rick McNeal <rick.mcneal@nexenta.com>
date Thu, 14 Nov 2019 07:43:52 -0700
parents 87f627c2b1db
children bedf7af858de
files exception_lists/hdrchk usr/src/lib/libiscsit/common/libiscsit.h usr/src/uts/common/Makefile.files usr/src/uts/common/fs/doorfs/door_sys.c usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.c usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.h usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h usr/src/uts/common/io/comstar/port/fct/fct.c usr/src/uts/common/io/comstar/port/iscsit/iscsit.c usr/src/uts/common/io/comstar/port/iscsit/iscsit.h usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c usr/src/uts/common/io/comstar/port/pppt/pppt.h usr/src/uts/common/io/comstar/port/pppt/pppt_msg.c usr/src/uts/common/io/comstar/stmf/lun_map.c usr/src/uts/common/io/comstar/stmf/lun_map.h usr/src/uts/common/io/comstar/stmf/stmf.c usr/src/uts/common/io/comstar/stmf/stmf_impl.h usr/src/uts/common/io/comstar/stmf/stmf_state.h usr/src/uts/common/io/comstar/stmf/stmf_stats.h usr/src/uts/common/io/idm/idm.c usr/src/uts/common/io/idm/idm_conn_sm.c usr/src/uts/common/io/idm/idm_so.c usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_lun.c usr/src/uts/common/sys/idm/idm.h usr/src/uts/common/sys/idm/idm_conn_sm.h usr/src/uts/common/sys/idm/idm_impl.h usr/src/uts/common/sys/lpif.h usr/src/uts/common/sys/portif.h usr/src/uts/common/sys/scsi/generic/commands.h usr/src/uts/common/sys/scsi/generic/mode.h usr/src/uts/common/sys/scsi/scsi_names.h usr/src/uts/common/sys/stmf.h usr/src/uts/common/sys/stmf_defines.h
diffstat 42 files changed, 3685 insertions(+), 1543 deletions(-) [+]
line wrap: on
line diff
--- a/exception_lists/hdrchk	Fri Jun 21 19:22:54 2019 -0400
+++ b/exception_lists/hdrchk	Thu Nov 14 07:43:52 2019 -0700
@@ -337,6 +337,7 @@
 usr/src/uts/common/io/bnxe/bnxe_debug.h
 usr/src/uts/common/io/bnxe/bnxe.h
 usr/src/uts/common/io/bnxe/version.h
+usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h
 usr/src/uts/common/io/cxgbe/firmware/*
 usr/src/uts/common/io/cxgbe/common/t4_msg.h
 usr/src/uts/common/io/cxgbe/common/t4_regs.h
--- a/usr/src/lib/libiscsit/common/libiscsit.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/lib/libiscsit/common/libiscsit.h	Thu Nov 14 07:43:52 2019 -0700
@@ -23,7 +23,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #ifndef	_LIBISCSIT_H
@@ -40,7 +40,7 @@
 extern "C" {
 #endif
 
-#define	MAX_TARGETS 255 /* maximum targets that may be created */
+#define	MAX_TARGETS	4095 /* maximum targets that may be created */
 #define	MAX_TPGT	256
 #define	CFG_TPGTLIST	"tpgt-list"
 
--- a/usr/src/uts/common/Makefile.files	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/Makefile.files	Thu Nov 14 07:43:52 2019 -0700
@@ -922,7 +922,7 @@
 
 STMF_OBJS += lun_map.o stmf.o
 
-STMF_SBD_OBJS += sbd.o sbd_scsi.o sbd_pgr.o sbd_zvol.o
+STMF_SBD_OBJS += sbd.o sbd_scsi.o sbd_pgr.o sbd_zvol.o ats_copy_mgr.o
 
 SYSMSG_OBJS +=	sysmsg.o
 
--- a/usr/src/uts/common/fs/doorfs/door_sys.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/fs/doorfs/door_sys.c	Thu Nov 14 07:43:52 2019 -0700
@@ -81,7 +81,7 @@
  * door_upcall.  Need to guard against a process returning huge amounts
  * of data and getting the kernel stuck in kmem_alloc.
  */
-size_t	door_max_upcall_reply = 1024 * 1024;
+size_t	door_max_upcall_reply = 4 * 1024 * 1024;
 
 /*
  * Maximum number of descriptors allowed to be passed in a single
@@ -2725,7 +2725,7 @@
  */
 static int
 door_results(kthread_t *caller, caddr_t data_ptr, size_t data_size,
-		door_desc_t *desc_ptr, uint_t desc_num)
+    door_desc_t *desc_ptr, uint_t desc_num)
 {
 	door_client_t	*ct = DOOR_CLIENT(caller->t_door);
 	door_upcall_t	*dup = ct->d_upcall;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.c	Thu Nov 14 07:43:52 2019 -0700
@@ -0,0 +1,944 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+#include <sys/conf.h>
+#include <sys/file.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/modctl.h>
+#include <sys/scsi/scsi.h>
+#include <sys/scsi/impl/scsi_reset_notify.h>
+#include <sys/scsi/generic/mode.h>
+#include <sys/disp.h>
+#include <sys/byteorder.h>
+#include <sys/atomic.h>
+#include <sys/sdt.h>
+#include <sys/dkio.h>
+
+#include <sys/dmu.h>
+#include <sys/txg.h>
+#include <sys/refcount.h>
+#include <sys/zvol.h>
+
+#include <sys/stmf.h>
+#include <sys/lpif.h>
+#include <sys/portif.h>
+#include <sys/stmf_ioctl.h>
+#include <sys/stmf_sbd_ioctl.h>
+
+#include "stmf_sbd.h"
+#include "sbd_impl.h"
+
+/* ATS tuning parameters */
+#define	OVERLAP_OFF 0
+#define	OVERLAP_LOW 1
+#define	OVERLAP_MEDIUM 2
+#define	OVERLAP_HIGH 3
+uint8_t ats_overlap_check = OVERLAP_LOW; /* check for rw overlap with ATS */
+
+uint8_t HardwareAcceleratedLocking = 1; /* 0 for disabled */
+uint8_t HardwareAcceleratedMove = 1;
+uint64_t sbd_list_length = 0;
+
+#define	SBD_ATS_MAX_NBLKS	32
+/* ATS routines. */
+uint8_t
+sbd_ats_max_nblks(void)
+{
+	if (HardwareAcceleratedLocking == 0)
+		return (0);
+	return (SBD_ATS_MAX_NBLKS);
+}
+
+#define	is_overlapping(start1, len1, start2, len2) \
+	((start2) > (start1) ? ((start2) - (start1)) < (len1) : \
+	((start1) - (start2)) < (len2))
+
+/*ARGSUSED*/
+static sbd_status_t
+sbd_ats_do_handling_before_io(scsi_task_t *task, struct sbd_lu *sl,
+    uint64_t lba, uint64_t count, uint32_t flags)
+{
+	sbd_status_t ret = SBD_SUCCESS;
+	ats_state_t *ats_state, *ats_state_ret;
+	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
+	uint8_t cdb0 = task->task_cdb[0];
+
+	if (scmd == NULL)
+		return (SBD_SUCCESS);
+
+	if (HardwareAcceleratedLocking == 0)
+		return (SBD_SUCCESS);
+	/*
+	 * if ATS overlap checking is disabled just return.  The check
+	 * is not done in the function to remove items from the list which
+	 * allows this value to be changed at runtime.  If it is turned on
+	 * at runtime the remove will just start taking items off the list.
+	 * If it is turned off at runtime the list is still cleaned up.
+	 */
+	if (ats_overlap_check == OVERLAP_OFF)
+		return (SBD_SUCCESS);
+
+	/* overlap checking for compare and write only */
+	if (ats_overlap_check == OVERLAP_LOW) {
+		if (cdb0 != SCMD_COMPARE_AND_WRITE)
+			return (SBD_SUCCESS);
+	}
+
+	/* overlap checking for compare and write and write only */
+	if (ats_overlap_check == OVERLAP_MEDIUM) {
+		if ((cdb0 != SCMD_COMPARE_AND_WRITE) && (cdb0 != SCMD_WRITE))
+			return (SBD_SUCCESS);
+	}
+
+	mutex_enter(&sl->sl_lock);
+	/*
+	 * if the list is empty then just add the element to the list and
+	 * return success. There is no overlap.  This is done for every
+	 * read, write or compare and write.
+	 */
+	if (list_is_empty(&sl->sl_ats_io_list)) {
+		goto done;
+	}
+
+	/*
+	 * There are inflight operations.  As a result the list must be scanned
+	 * and if there are any overlaps then SBD_BUSY should be returned.
+	 *
+	 * Duplicate reads and writes are allowed and kept on the list
+	 * since there is no reason that overlapping IO operations should
+	 * be delayed.
+	 *
+	 * A command that conflicts with a running compare and write will
+	 * be rescheduled and rerun.  This is handled by stmf_task_poll_lu.
+	 * There is a possibility that a command can be starved and still
+	 * return busy, which is valid in the SCSI protocol.
+	 */
+
+	for (ats_state = list_head(&sl->sl_ats_io_list); ats_state != NULL;
+	    ats_state = list_next(&sl->sl_ats_io_list, ats_state)) {
+
+		if (is_overlapping(ats_state->as_cur_ats_lba,
+		    ats_state->as_cur_ats_len, lba, count) == 0)
+			continue;
+
+		/* if the task is already listed just return */
+		if (task == ats_state->as_cur_ats_task) {
+			cmn_err(CE_WARN, "sbd_ats_handling_before_io: "
+			    "task %p already on list", (void *) task);
+			ret = SBD_SUCCESS;
+			goto exit;
+		}
+		/*
+		 * the current command is a compare and write, if there is any
+		 * overlap return error
+		 */
+
+		if ((cdb0 == SCMD_COMPARE_AND_WRITE) ||
+		    (ats_state->as_cmd == SCMD_COMPARE_AND_WRITE)) {
+			ret = SBD_BUSY;
+			goto exit;
+		}
+	}
+done:
+	ats_state_ret =
+	    (ats_state_t *)kmem_zalloc(sizeof (ats_state_t), KM_SLEEP);
+	ats_state_ret->as_cur_ats_lba = lba;
+	ats_state_ret->as_cur_ats_len = count;
+	ats_state_ret->as_cmd = cdb0;
+	ats_state_ret->as_cur_ats_task = task;
+	if (list_is_empty(&sl->sl_ats_io_list)) {
+		list_insert_head(&sl->sl_ats_io_list, ats_state_ret);
+	} else {
+		list_insert_tail(&sl->sl_ats_io_list, ats_state_ret);
+	}
+	scmd->flags |= SBD_SCSI_CMD_ATS_RELATED;
+	scmd->ats_state = ats_state;
+	sbd_list_length++;
+	mutex_exit(&sl->sl_lock);
+	return (SBD_SUCCESS);
+
+exit:
+	mutex_exit(&sl->sl_lock);
+	return (ret);
+}
+
+sbd_status_t
+sbd_ats_handling_before_io(scsi_task_t *task, struct sbd_lu *sl,
+    uint64_t lba, uint64_t count)
+{
+	return (sbd_ats_do_handling_before_io(task, sl, lba, count, 0));
+}
+
+void
+sbd_ats_remove_by_task(scsi_task_t *task)
+{
+	ats_state_t *ats_state;
+	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
+	sbd_cmd_t *scmd = task->task_lu_private;
+
+	if (scmd == NULL)
+		return;
+	/*
+	 * Scan the list and take the task off of the list. It is possible
+	 * that the call is made in a situation where the task is not
+	 * listed.  That is a valid but unlikely case. If it happens
+	 * just fall through and return.  The list removal is done by
+	 * task not LBA range and a task cannot be active for more than
+	 * one command so there is never an issue about removing the
+	 * wrong element.
+	 */
+	mutex_enter(&sl->sl_lock);
+	if (list_is_empty(&sl->sl_ats_io_list)) {
+		mutex_exit(&sl->sl_lock);
+		return;
+	}
+
+	for (ats_state = list_head(&sl->sl_ats_io_list); ats_state != NULL;
+	    ats_state = list_next(&sl->sl_ats_io_list, ats_state)) {
+
+		if (ats_state->as_cur_ats_task == task) {
+			list_remove(&sl->sl_ats_io_list, ats_state);
+			kmem_free(ats_state, sizeof (ats_state_t));
+			scmd->flags &= ~SBD_SCSI_CMD_ATS_RELATED;
+			scmd->ats_state = NULL;
+			sbd_list_length--;
+			break;
+		}
+	}
+	mutex_exit(&sl->sl_lock);
+}
+
+static sbd_status_t
+sbd_compare_and_write(struct scsi_task *task, sbd_cmd_t *scmd,
+    uint32_t *ret_off)
+{
+	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
+	uint8_t *buf;
+	sbd_status_t ret;
+	uint64_t addr;
+	uint32_t len, i;
+
+	addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
+	len = (uint32_t)task->task_cdb[13];
+
+	addr <<= sl->sl_data_blocksize_shift;
+	len <<= sl->sl_data_blocksize_shift;
+	buf = kmem_alloc(len, KM_SLEEP);
+	ret = sbd_data_read(sl, task, addr, (uint64_t)len, buf);
+	if (ret != SBD_SUCCESS) {
+		goto compare_and_write_done;
+	}
+	/*
+	 * Can't use bcmp here. We need mismatch offset.
+	 */
+	for (i = 0; i < len; i++) {
+		if (buf[i] != scmd->trans_data[i])
+			break;
+	}
+	if (i != len) {
+		*ret_off = i;
+		ret = SBD_COMPARE_FAILED;
+		goto compare_and_write_done;
+	}
+
+	ret = sbd_data_write(sl, task, addr, (uint64_t)len,
+	    scmd->trans_data + len);
+
+compare_and_write_done:
+	kmem_free(buf, len);
+	return (ret);
+}
+
+static void
+sbd_send_miscompare_status(struct scsi_task *task, uint32_t miscompare_off)
+{
+	uint8_t sd[18];
+
+	task->task_scsi_status = STATUS_CHECK;
+	bzero(sd, 18);
+	sd[0] = 0xF0;
+	sd[2] = 0xe;
+	SCSI_WRITE32(&sd[3], miscompare_off);
+	sd[7] = 10;
+	sd[12] = 0x1D;
+	task->task_sense_data = sd;
+	task->task_sense_length = 18;
+	(void) stmf_send_scsi_status(task, STMF_IOF_LU_DONE);
+}
+
+static void
+sbd_ats_release_resources(struct scsi_task *task)
+{
+	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
+
+	/*
+	 * a few basic check here to be sure that there are not multiple
+	 * calls going on.  If scmd is null just return.  This is very
+	 * unlikely, but could happed if the task is freed by an abort.
+	 * If nbufs is invalid warn but ignore the error.  Last if the
+	 * trans_data is either null or the lenght is zero just blow
+	 * off the operation and leak the memory buffer.
+	 */
+	if (scmd == NULL)
+		return;
+
+	if (scmd->nbufs == 0xFF)
+		cmn_err(CE_WARN, "%s invalid buffer count %x", __func__,
+		    scmd->nbufs);
+
+	if ((scmd->trans_data != NULL) && (scmd->trans_data_len != 0))
+		kmem_free(scmd->trans_data, scmd->trans_data_len);
+
+	scmd->trans_data = NULL; /* force panic later if re-entered */
+	scmd->trans_data_len = 0;
+	scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
+}
+
+void
+sbd_handle_ats_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
+    struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
+{
+	uint64_t laddr;
+	uint32_t buflen, iolen, miscompare_off;
+	int ndx;
+	sbd_status_t ret;
+
+	if (ATOMIC8_GET(scmd->nbufs) > 0) {
+		atomic_dec_8(&scmd->nbufs);
+	}
+
+	if (dbuf->db_xfer_status != STMF_SUCCESS) {
+		sbd_ats_remove_by_task(task);
+		sbd_ats_release_resources(task);
+		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
+		    dbuf->db_xfer_status, NULL);
+		return;
+	}
+
+	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+		goto ATS_XFER_DONE;
+	}
+
+	/* if state is confused drop the command */
+	if ((scmd->trans_data == NULL) ||
+	    ((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) == 0) ||
+	    ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0)) {
+		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+		return;
+	}
+
+	if (ATOMIC32_GET(scmd->len) != 0) {
+		/*
+		 * Initiate the next port xfer to occur in parallel
+		 * with writing this buf.  A side effect of sbd_do_ats_xfer is
+		 * it may set scmd_len to 0.  This means all the data
+		 * transfers have been started, not that they are done.
+		 */
+		sbd_do_ats_xfer(task, scmd, NULL, 0);
+	}
+
+	/*
+	 * move the most recent data transfer to the temporary buffer
+	 * used for the compare and write function.
+	 */
+	laddr = dbuf->db_relative_offset;
+	for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
+	    (ndx < dbuf->db_sglist_length); ndx++) {
+		iolen = min(dbuf->db_data_size - buflen,
+		    dbuf->db_sglist[ndx].seg_length);
+		if (iolen == 0)
+			break;
+		bcopy(dbuf->db_sglist[ndx].seg_addr, &scmd->trans_data[laddr],
+		    iolen);
+		buflen += iolen;
+		laddr += (uint64_t)iolen;
+	}
+	task->task_nbytes_transferred += buflen;
+
+ATS_XFER_DONE:
+	if (ATOMIC32_GET(scmd->len) == 0 ||
+	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+		stmf_free_dbuf(task, dbuf);
+		/*
+		 * if this is not the last buffer to be transfered then exit
+		 * and wait for the next buffer.  Once nbufs is 0 then all the
+		 * data has arrived and the compare can be done.
+		 */
+		if (ATOMIC8_GET(scmd->nbufs) > 0)
+			return;
+		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+			sbd_ats_remove_by_task(task);
+			stmf_scsilib_send_status(task, STATUS_CHECK,
+			    STMF_SAA_WRITE_ERROR);
+		} else {
+			ret = sbd_compare_and_write(task, scmd,
+			    &miscompare_off);
+			sbd_ats_remove_by_task(task);
+			sbd_ats_release_resources(task);
+			if (ret != SBD_SUCCESS) {
+				if (ret != SBD_COMPARE_FAILED) {
+					stmf_scsilib_send_status(task,
+					    STATUS_CHECK, STMF_SAA_WRITE_ERROR);
+				} else {
+					sbd_send_miscompare_status(task,
+					    miscompare_off);
+				}
+			} else {
+				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
+			}
+		}
+		return;
+	}
+	sbd_do_ats_xfer(task, scmd, dbuf, dbuf_reusable);
+}
+
+void
+sbd_do_ats_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
+    struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
+{
+	uint32_t len;
+
+	if (ATOMIC32_GET(scmd->len) == 0) {
+		if (dbuf != NULL) {
+			stmf_free_dbuf(task, dbuf);
+		}
+		return;
+	}
+
+	if ((dbuf != NULL) &&
+	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
+		/* free current dbuf and allocate a new one */
+		stmf_free_dbuf(task, dbuf);
+		dbuf = NULL;
+	}
+	if (dbuf == NULL) {
+		uint32_t maxsize, minsize, old_minsize;
+
+		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
+		    ATOMIC32_GET(scmd->len);
+		minsize = maxsize >> 2;
+		do {
+			old_minsize = minsize;
+			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
+		} while ((dbuf == NULL) && (old_minsize > minsize) &&
+		    (minsize >= 512));
+		if (dbuf == NULL) {
+			if (ATOMIC8_GET(scmd->nbufs) == 0) {
+				sbd_ats_remove_by_task(task);
+				sbd_ats_release_resources(task);
+				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
+				    STMF_ALLOC_FAILURE, NULL);
+			}
+			return;
+		}
+	}
+
+	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
+	    ATOMIC32_GET(scmd->len);
+
+	dbuf->db_relative_offset = scmd->current_ro;
+	dbuf->db_data_size = len;
+	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
+	(void) stmf_xfer_data(task, dbuf, 0);
+	/*
+	 * scmd->nbufs is the outstanding transfers
+	 * scmd->len is the number of bytes that are remaing for requests
+	 */
+	atomic_inc_8(&scmd->nbufs);
+	atomic_add_32(&scmd->len, -len);
+	scmd->current_ro += len;
+}
+
+void
+sbd_handle_ats(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
+{
+	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
+	uint64_t addr, len;
+	sbd_cmd_t *scmd;
+	stmf_data_buf_t *dbuf;
+	uint8_t do_immediate_data = 0;
+	/* int ret; */
+
+	if (HardwareAcceleratedLocking == 0) {
+		stmf_scsilib_send_status(task, STATUS_CHECK,
+		    STMF_SAA_INVALID_OPCODE);
+		return;
+	}
+
+	task->task_cmd_xfer_length = 0;
+	if (task->task_additional_flags &
+	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
+		task->task_expected_xfer_length = 0;
+	}
+	if (sl->sl_flags & SL_WRITE_PROTECTED) {
+		stmf_scsilib_send_status(task, STATUS_CHECK,
+		    STMF_SAA_WRITE_PROTECTED);
+		return;
+	}
+	addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
+	len = (uint64_t)task->task_cdb[13];
+
+	if ((task->task_cdb[1]) || (len > SBD_ATS_MAX_NBLKS)) {
+		stmf_scsilib_send_status(task, STATUS_CHECK,
+		    STMF_SAA_INVALID_FIELD_IN_CDB);
+		return;
+	}
+	if (len == 0) {
+		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
+		return;
+	}
+
+	/*
+	 * This can be called again. It will return the same handle again.
+	 */
+	if (sbd_ats_handling_before_io(task, sl, addr, len) != SBD_SUCCESS) {
+		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
+			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
+		}
+		return;
+	}
+
+	addr <<= sl->sl_data_blocksize_shift;
+	len <<= sl->sl_data_blocksize_shift;
+
+	task->task_cmd_xfer_length = len << 1;	/* actual amt of data is 2x */
+	if (task->task_additional_flags &
+	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
+		task->task_expected_xfer_length = task->task_cmd_xfer_length;
+	}
+	if ((addr + len) > sl->sl_lu_size) {
+		sbd_ats_remove_by_task(task);
+		stmf_scsilib_send_status(task, STATUS_CHECK,
+		    STMF_SAA_LBA_OUT_OF_RANGE);
+		return;
+	}
+
+	len <<= 1;
+
+	if (len != task->task_expected_xfer_length) {
+		sbd_ats_remove_by_task(task);
+		stmf_scsilib_send_status(task, STATUS_CHECK,
+		    STMF_SAA_INVALID_FIELD_IN_CDB);
+		return;
+	}
+
+	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
+		if (initial_dbuf->db_data_size > len) {
+			if (initial_dbuf->db_data_size >
+			    task->task_expected_xfer_length) {
+				/* protocol error */
+				sbd_ats_remove_by_task(task);
+				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
+				    STMF_INVALID_ARG, NULL);
+				return;
+			}
+			ASSERT(len <= 0xFFFFFFFFull);
+			initial_dbuf->db_data_size = (uint32_t)len;
+		}
+		do_immediate_data = 1;
+	}
+	dbuf = initial_dbuf;
+
+	if (task->task_lu_private) {
+		scmd = (sbd_cmd_t *)task->task_lu_private;
+	} else {
+		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
+		task->task_lu_private = scmd;
+	}
+
+	/* We dont set the ATS_RELATED flag here */
+	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA;
+	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
+	scmd->nbufs = 0;
+	ASSERT(len <= 0xFFFFFFFFull);
+	scmd->len = (uint32_t)len;
+	scmd->trans_data_len = (uint32_t)len;
+	scmd->trans_data = kmem_alloc((size_t)len, KM_SLEEP);
+	scmd->current_ro = 0;
+
+	if (do_immediate_data) {
+		/*
+		 * Account for data passed in this write command
+		 */
+		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
+		atomic_add_32(&scmd->len, -dbuf->db_data_size);
+		scmd->current_ro += dbuf->db_data_size;
+		dbuf->db_xfer_status = STMF_SUCCESS;
+		sbd_handle_ats_xfer_completion(task, scmd, dbuf, 0);
+	} else {
+		sbd_do_ats_xfer(task, scmd, dbuf, 0);
+	}
+}
+
+/*
+ * SCSI Copy Manager
+ *
+ * SCSI copy manager is the state machine which implements
+ * SCSI extended copy functionality (SPC). There is one
+ * cpmgr instance per extended copy command.
+ *
+ * Exported block-copy functions:
+ *   cpmgr_create()  - Creates the state machine.
+ *   cpmgr_destroy() - Cleans up a completed cpmgr.
+ *   cpmgr_run()     - Performs time bound copy.
+ *   cpmgr_abort()   - Aborts a cpmgr(if not already completed).
+ *   cpmgr_done()    - Tests if the copy is done.
+ */
+
+static void cpmgr_completion_cleanup(cpmgr_t *cm);
+int sbd_check_reservation_conflict(sbd_lu_t *sl, scsi_task_t *task);
+
+static uint8_t sbd_recv_copy_results_op_params[] = {
+    0, 0, 0, 42, 1, 0, 0, 0,
+    0, 2, 0, 1, 0, 0, 0xFF, 0xFF, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0,
+    0xFF, 0xFF, 0, 9, 0, 0, 0, 0, 0,
+    2, 2, 0xE4
+};
+
+cpmgr_handle_t
+cpmgr_create(scsi_task_t *task, uint8_t *params)
+{
+	cpmgr_t *cm = NULL;
+	uint8_t *p;
+	uint32_t plist_len;
+	uint32_t dbl;
+	int i;
+	uint16_t tdlen;
+	uint16_t n;
+
+	cm = kmem_zalloc(sizeof (*cm), KM_NOSLEEP);
+	if (cm == NULL)
+		return (CPMGR_INVALID_HANDLE);
+
+	cm->cm_task = task;
+	p = task->task_cdb;
+	plist_len = READ_SCSI32(&p[10], uint32_t);
+
+	/*
+	 * In case of error. Otherwise we will change this to CM_COPYING.
+	 */
+	cm->cm_state = CM_COMPLETE;
+
+	if (plist_len == 0) {
+		cm->cm_status = 0;
+		goto cpmgr_create_done;
+	}
+
+	if (plist_len < CPMGR_PARAM_HDR_LEN) {
+		cm->cm_status = CPMGR_PARAM_LIST_LEN_ERROR;
+		goto cpmgr_create_done;
+	} else if ((params[0] != 0) || ((params[1] & 0x18) != 0x18)) {
+		/*
+		 * Current implementation does not allow the use
+		 * of list ID field.
+		 */
+		cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
+		goto cpmgr_create_done;
+	}
+	/* No inline data either */
+	if (*((uint32_t *)(&params[12])) != 0) {
+		cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
+		goto cpmgr_create_done;
+	}
+
+	tdlen = READ_SCSI16(&params[2], uint16_t);
+	if ((tdlen == 0) || (tdlen % CPMGR_TARGET_DESCRIPTOR_SIZE) ||
+	    (plist_len < (CPMGR_PARAM_HDR_LEN + tdlen))) {
+		cm->cm_status = CPMGR_PARAM_LIST_LEN_ERROR;
+		goto cpmgr_create_done;
+	}
+	cm->cm_td_count = tdlen / CPMGR_TARGET_DESCRIPTOR_SIZE;
+	if (cm->cm_td_count > CPMGR_MAX_TARGET_DESCRIPTORS) {
+		cm->cm_status = CPMGR_TOO_MANY_TARGET_DESCRIPTORS;
+		goto cpmgr_create_done;
+	}
+	if (plist_len != (CPMGR_PARAM_HDR_LEN + tdlen +
+	    CPMGR_B2B_SEGMENT_DESCRIPTOR_SIZE)) {
+		cm->cm_status = CPMGR_PARAM_LIST_LEN_ERROR;
+		goto cpmgr_create_done;
+	}
+	for (i = 0; i < cm->cm_td_count; i++) {
+		p = params + CPMGR_PARAM_HDR_LEN;
+		p += i * CPMGR_TARGET_DESCRIPTOR_SIZE;
+		if ((p[0] != CPMGR_IDENT_TARGET_DESCRIPTOR) ||
+		    ((p[5] & 0x30) != 0) || (p[7] != 16)) {
+			cm->cm_status = CPMGR_UNSUPPORTED_TARGET_DESCRIPTOR;
+			goto cpmgr_create_done;
+		}
+		/*
+		 * stmf should be able to find this LU and lock it. Also
+		 * make sure that is indeed a sbd lu.
+		 */
+		if (((cm->cm_tds[i].td_lu =
+		    stmf_check_and_hold_lu(task, &p[8])) == NULL) ||
+		    (!sbd_is_valid_lu(cm->cm_tds[i].td_lu))) {
+			cm->cm_status = CPMGR_COPY_TARGET_NOT_REACHABLE;
+			goto cpmgr_create_done;
+		}
+		dbl = p[29];
+		dbl <<= 8;
+		dbl |= p[30];
+		dbl <<= 8;
+		dbl |= p[31];
+		cm->cm_tds[i].td_disk_block_len = dbl;
+		cm->cm_tds[i].td_lbasize_shift =
+		    sbd_get_lbasize_shift(cm->cm_tds[i].td_lu);
+	}
+	/* p now points to segment descriptor */
+	p += CPMGR_TARGET_DESCRIPTOR_SIZE;
+
+	if (p[0] != CPMGR_B2B_SEGMENT_DESCRIPTOR) {
+		cm->cm_status = CPMGR_UNSUPPORTED_SEGMENT_DESCRIPTOR;
+		goto cpmgr_create_done;
+	}
+	n = READ_SCSI16(&p[2], uint16_t);
+	if (n != (CPMGR_B2B_SEGMENT_DESCRIPTOR_SIZE - 4)) {
+		cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
+		goto cpmgr_create_done;
+	}
+
+	n = READ_SCSI16(&p[4], uint16_t);
+	if (n >= cm->cm_td_count) {
+		cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
+		goto cpmgr_create_done;
+	}
+	cm->cm_src_td_ndx = n;
+
+	n = READ_SCSI16(&p[6], uint16_t);
+	if (n >= cm->cm_td_count) {
+		cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
+		goto cpmgr_create_done;
+	}
+	cm->cm_dst_td_ndx = n;
+
+	cm->cm_copy_size = READ_SCSI16(&p[10], uint64_t);
+	cm->cm_copy_size *= (uint64_t)(cm->cm_tds[(p[1] & 2) ?
+	    cm->cm_dst_td_ndx : cm->cm_src_td_ndx].td_disk_block_len);
+	cm->cm_src_offset = (READ_SCSI64(&p[12], uint64_t)) <<
+	    cm->cm_tds[cm->cm_src_td_ndx].td_lbasize_shift;
+	cm->cm_dst_offset = (READ_SCSI64(&p[20], uint64_t)) <<
+	    cm->cm_tds[cm->cm_dst_td_ndx].td_lbasize_shift;
+
+	/* Allocate the xfer buffer. */
+	cm->cm_xfer_buf = kmem_alloc(CPMGR_XFER_BUF_SIZE, KM_NOSLEEP);
+	if (cm->cm_xfer_buf == NULL) {
+		cm->cm_status = CPMGR_INSUFFICIENT_RESOURCES;
+		goto cpmgr_create_done;
+	}
+
+	/*
+	 * No need to check block limits. cpmgr_run() will
+	 * take care of that.
+	 */
+
+	/* All checks passed */
+	cm->cm_state = CM_COPYING;
+
+cpmgr_create_done:
+	if (cm->cm_state == CM_COMPLETE) {
+		cpmgr_completion_cleanup(cm);
+	}
+	return (cm);
+}
+
+void
+cpmgr_destroy(cpmgr_handle_t h)
+{
+	cpmgr_t *cm = (cpmgr_t *)h;
+
+	ASSERT(cm->cm_state == CM_COMPLETE);
+	kmem_free(cm, sizeof (*cm));
+}
+
+static void
+cpmgr_completion_cleanup(cpmgr_t *cm)
+{
+	int i;
+
+	for (i = 0; i < cm->cm_td_count; i++) {
+		if (cm->cm_tds[i].td_lu) {
+			stmf_release_lu(cm->cm_tds[i].td_lu);
+			cm->cm_tds[i].td_lu = NULL;
+		}
+	}
+	if (cm->cm_xfer_buf) {
+		kmem_free(cm->cm_xfer_buf, CPMGR_XFER_BUF_SIZE);
+		cm->cm_xfer_buf = NULL;
+	}
+}
+
+void
+cpmgr_run(cpmgr_t *cm, clock_t preemption_point)
+{
+	stmf_lu_t *lu;
+	sbd_lu_t *src_slu, *dst_slu;
+	uint64_t xfer_size, start, end;
+	sbd_status_t ret;
+
+	/*
+	 * XXX: Handle reservations and read-only LU here.
+	 */
+	ASSERT(cm->cm_state == CM_COPYING);
+	lu = cm->cm_tds[cm->cm_src_td_ndx].td_lu;
+	src_slu = (sbd_lu_t *)lu->lu_provider_private;
+	if (sbd_check_reservation_conflict(src_slu, cm->cm_task)) {
+		cpmgr_abort(cm, CPMGR_RESERVATION_CONFLICT);
+		return;
+	}
+
+	lu = cm->cm_tds[cm->cm_dst_td_ndx].td_lu;
+	dst_slu = (sbd_lu_t *)lu->lu_provider_private;
+	if (sbd_check_reservation_conflict(dst_slu, cm->cm_task)) {
+		cpmgr_abort(cm, CPMGR_RESERVATION_CONFLICT);
+		return;
+	}
+	if (dst_slu->sl_flags & SL_WRITE_PROTECTED) {
+		cpmgr_abort(cm, STMF_SAA_WRITE_PROTECTED);
+		return;
+	}
+
+	while (cm->cm_size_done < cm->cm_copy_size) {
+		xfer_size = ((cm->cm_copy_size - cm->cm_size_done) >
+		    CPMGR_XFER_BUF_SIZE) ? CPMGR_XFER_BUF_SIZE :
+		    (cm->cm_copy_size - cm->cm_size_done);
+		start = cm->cm_src_offset + cm->cm_size_done;
+		ret = sbd_data_read(src_slu, cm->cm_task, start, xfer_size,
+		    cm->cm_xfer_buf);
+		if (ret != SBD_SUCCESS) {
+			if (ret == SBD_IO_PAST_EOF) {
+				cpmgr_abort(cm, CPMGR_LBA_OUT_OF_RANGE);
+			} else {
+				cpmgr_abort(cm,
+				    CPMGR_THIRD_PARTY_DEVICE_FAILURE);
+			}
+			break;
+		}
+		end = cm->cm_dst_offset + cm->cm_size_done;
+		ret = sbd_data_write(dst_slu, cm->cm_task, end, xfer_size,
+		    cm->cm_xfer_buf);
+		if (ret != SBD_SUCCESS) {
+			if (ret == SBD_IO_PAST_EOF) {
+				cpmgr_abort(cm, CPMGR_LBA_OUT_OF_RANGE);
+			} else {
+				cpmgr_abort(cm,
+				    CPMGR_THIRD_PARTY_DEVICE_FAILURE);
+			}
+			break;
+		}
+		cm->cm_size_done += xfer_size;
+		if (ddi_get_lbolt() >= preemption_point)
+			break;
+	}
+	if (cm->cm_size_done == cm->cm_copy_size) {
+		cm->cm_state = CM_COMPLETE;
+		cm->cm_status = 0;
+		cpmgr_completion_cleanup(cm);
+	}
+}
+
+void
+cpmgr_abort(cpmgr_t *cm, uint32_t s)
+{
+	if (cm->cm_state == CM_COPYING) {
+		cm->cm_state = CM_COMPLETE;
+		cm->cm_status = s;
+		cpmgr_completion_cleanup(cm);
+	}
+}
+
+void
+sbd_handle_xcopy(scsi_task_t *task, stmf_data_buf_t *dbuf)
+{
+	uint32_t cmd_xfer_len;
+
+	if (HardwareAcceleratedMove == 0) {
+		stmf_scsilib_send_status(task, STATUS_CHECK,
+		    STMF_SAA_INVALID_OPCODE);
+		return;
+	}
+
+	cmd_xfer_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
+
+	if (cmd_xfer_len == 0) {
+		task->task_cmd_xfer_length = 0;
+		if (task->task_additional_flags &
+		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
+			task->task_expected_xfer_length = 0;
+		}
+		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
+		return;
+	}
+
+	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
+}
+
+void
+sbd_handle_xcopy_xfer(scsi_task_t *task, uint8_t *buf)
+{
+	cpmgr_handle_t h;
+	uint32_t s;
+	clock_t tic, end;
+
+	/*
+	 * No need to pass buf size. Its taken from cdb.
+	 */
+	h = cpmgr_create(task, buf);
+	if (h == CPMGR_INVALID_HANDLE) {
+		stmf_scsilib_send_status(task, STATUS_CHECK,
+		    CPMGR_INSUFFICIENT_RESOURCES);
+		return;
+	}
+	tic = drv_usectohz(1000000);
+	end = ddi_get_lbolt() + (CPMGR_DEFAULT_TIMEOUT * tic);
+	while (!cpmgr_done(h)) {
+		if (stmf_is_task_being_aborted(task) || (ddi_get_lbolt() > end))
+			cpmgr_abort(h, CPMGR_THIRD_PARTY_DEVICE_FAILURE);
+		else
+			cpmgr_run(h, ddi_get_lbolt() + tic);
+	}
+	s = cpmgr_status(h);
+	if (s) {
+		if (s == CPMGR_RESERVATION_CONFLICT) {
+			stmf_scsilib_send_status(task,
+			    STATUS_RESERVATION_CONFLICT, 0);
+		} else {
+			stmf_scsilib_send_status(task, STATUS_CHECK, s);
+		}
+	} else {
+		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
+	}
+	cpmgr_destroy(h);
+}
+
+void
+sbd_handle_recv_copy_results(struct scsi_task *task,
+    struct stmf_data_buf *initial_dbuf)
+{
+	uint32_t cdb_len;
+
+	cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
+	if ((task->task_cdb[1] & 0x1F) != 3) {
+		stmf_scsilib_send_status(task, STATUS_CHECK,
+		    STMF_SAA_INVALID_FIELD_IN_CDB);
+		return;
+	}
+	sbd_handle_short_read_transfers(task, initial_dbuf,
+	    sbd_recv_copy_results_op_params, cdb_len,
+	    sizeof (sbd_recv_copy_results_op_params));
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.h	Thu Nov 14 07:43:52 2019 -0700
@@ -0,0 +1,157 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+#ifndef	_ATS_COPY_MGR_H
+#define	_ATS_COPY_MGR_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* ATS structures and functions. */
+
+typedef struct ats_state_s {
+	/*
+	 * We actually dont allow I/O which conflicts with current ats.
+	 * The conflicting_rw_count is for those I/Os which are currently
+	 * running and are potentally conflicting.
+	 */
+	list_node_t	as_next;
+	uint8_t		as_cmd;
+	uint32_t	as_conflicting_rw_count;
+	uint32_t	as_non_conflicting_rw_count;
+	uint32_t	as_ats_gen_ndx;
+	uint32_t	as_cur_ats_handle;
+	uint64_t	as_cur_ats_lba;
+	uint64_t	as_cur_ats_lba_end;
+	uint64_t	as_cur_ats_len;		/* in nblks */
+	struct scsi_task *as_cur_ats_task;
+} ats_state_t;
+
+/* Since we're technically part of stmf_sbd.h, use some defines here. */
+#define	sl_conflicting_rw_count	sl_ats_state.as_conflicting_rw_count
+#define	sl_non_conflicting_rw_count sl_ats_state.as_non_conflicting_rw_count
+#define	sl_ats_gen_ndx sl_ats_state.as_ats_gen_ndx
+#define	sl_cur_ats_handle sl_ats_state.as_cur_ats_handle
+#define	sl_cur_ats_lba sl_ats_state.as_cur_ats_lba
+#define	sl_cur_ats_len sl_ats_state.as_cur_ats_len
+#define	sl_cur_ats_task sl_ats_state.as_cur_ats_task
+
+struct sbd_cmd;
+struct sbd_lu;
+
+void sbd_handle_ats_xfer_completion(struct scsi_task *, struct sbd_cmd *,
+    struct stmf_data_buf *, uint8_t);
+void sbd_do_ats_xfer(struct scsi_task *, struct sbd_cmd *,
+    struct stmf_data_buf *, uint8_t);
+void sbd_handle_ats(scsi_task_t *, struct stmf_data_buf *);
+void sbd_handle_recv_copy_results(struct scsi_task *, struct stmf_data_buf *);
+void sbd_free_ats_handle(struct scsi_task *, struct sbd_cmd *);
+void sbd_handle_ats(scsi_task_t *, struct stmf_data_buf *);
+uint8_t sbd_ats_max_nblks(void);
+void sbd_ats_remove_by_task(scsi_task_t *);
+sbd_status_t sbd_ats_handling_before_io(scsi_task_t *task, struct sbd_lu *sl,
+    uint64_t lba, uint64_t count);
+
+/* Block-copy structures and functions. */
+
+struct scsi_task;
+typedef	void *cpmgr_handle_t;
+
+#define	CPMGR_INVALID_HANDLE		((cpmgr_handle_t)NULL)
+
+#define	CPMGR_DEFAULT_TIMEOUT		30
+
+#define	CPMGR_PARAM_HDR_LEN		16
+#define	CPMGR_IDENT_TARGET_DESCRIPTOR	0xE4
+#define	CPMGR_MAX_TARGET_DESCRIPTORS	2
+#define	CPMGR_TARGET_DESCRIPTOR_SIZE	32
+
+#define	CPMGR_B2B_SEGMENT_DESCRIPTOR		2
+#define	CPMGR_MAX_SEGMENT_DESCRIPTORS		1
+#define	CPMGR_B2B_SEGMENT_DESCRIPTOR_SIZE	28
+
+/*
+ * SCSI errors before copy starts.
+ */
+#define	CPMGR_PARAM_LIST_LEN_ERROR		0x051A00
+#define	CPMGR_INVALID_FIELD_IN_PARAM_LIST	0x052600
+#define	CPMGR_TOO_MANY_TARGET_DESCRIPTORS	0x052606
+#define	CPMGR_UNSUPPORTED_TARGET_DESCRIPTOR	0x052607
+#define	CPMGR_TOO_MANY_SEGMENT_DESCRIPTORS	0x052608
+#define	CPMGR_UNSUPPORTED_SEGMENT_DESCRIPTOR	0x052609
+#define	CPMGR_COPY_TARGET_NOT_REACHABLE		0x050D02
+#define	CPMGR_INSUFFICIENT_RESOURCES		0x0B5503
+
+/*
+ * SCSI errors after copy has started.
+ */
+#define	CPMGR_LBA_OUT_OF_RANGE			0x0A2100
+#define	CPMGR_THIRD_PARTY_DEVICE_FAILURE	0x0A0D01
+
+/*
+ * SCSI errors which dont result in STATUS_CHECK.
+ * Use and invalid sense key to mark these.
+ */
+#define	CPMGR_RESERVATION_CONFLICT		0xF00001
+
+typedef enum cm_state {
+	CM_STARTING = 0,
+	CM_COPYING,
+	CM_COMPLETE
+} cm_state_t;
+
+#define	CPMGR_XFER_BUF_SIZE		(128 * 1024)
+
+typedef struct cm_target_desc {
+	stmf_lu_t	*td_lu;
+	uint32_t	td_disk_block_len;
+	uint8_t		td_lbasize_shift;
+} cm_target_desc_t;
+
+/*
+ * Current implementation supports 2 target descriptors (identification type)
+ * for src and dst and one segment descriptor (block -> block).
+ */
+typedef struct cpmgr {
+	cm_target_desc_t	cm_tds[CPMGR_MAX_TARGET_DESCRIPTORS];
+	uint8_t			cm_td_count;
+	uint16_t		cm_src_td_ndx;
+	uint16_t		cm_dst_td_ndx;
+	cm_state_t		cm_state;
+	uint32_t		cm_status;
+	uint64_t		cm_src_offset;
+	uint64_t		cm_dst_offset;
+	uint64_t		cm_copy_size;
+	uint64_t		cm_size_done;
+	void			*cm_xfer_buf;
+	scsi_task_t		*cm_task;
+} cpmgr_t;
+
+#define	cpmgr_done(cm)	(((cpmgr_t *)(cm))->cm_state == CM_COMPLETE)
+#define	cpmgr_status(cm) (((cpmgr_t *)(cm))->cm_status)
+
+cpmgr_handle_t cpmgr_create(struct scsi_task *task, uint8_t *params);
+void cpmgr_destroy(cpmgr_handle_t h);
+void cpmgr_run(cpmgr_t *cm, clock_t preemption_point);
+void cpmgr_abort(cpmgr_t *cm, uint32_t s);
+void sbd_handle_xcopy_xfer(scsi_task_t *, uint8_t *);
+void sbd_handle_xcopy(scsi_task_t *, stmf_data_buf_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _ATS_COPY_MGR_H */
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c	Thu Nov 14 07:43:52 2019 -0700
@@ -25,8 +25,8 @@
  * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
-#include <sys/sysmacros.h>
 #include <sys/conf.h>
+#include <sys/list.h>
 #include <sys/file.h>
 #include <sys/ddi.h>
 #include <sys/sunddi.h>
@@ -56,6 +56,10 @@
 extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl);
 extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl);
 extern void sbd_pgr_reset(sbd_lu_t *sl);
+extern int HardwareAcceleratedLocking;
+extern int HardwareAcceleratedInit;
+extern int HardwareAcceleratedMove;
+extern uint8_t sbd_unmap_enable;
 
 static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
     void **result);
@@ -109,6 +113,7 @@
 static kmutex_t		sbd_lock;
 static dev_info_t	*sbd_dip;
 static uint32_t		sbd_lu_count = 0;
+uint8_t sbd_enable_unmap_sync = 0;
 
 /* Global property settings for the logical unit */
 char sbd_vendor_id[]	= "SUN     ";
@@ -155,7 +160,7 @@
 	NULL			/* power */
 };
 
-#define	SBD_NAME	"COMSTAR SBD"
+#define	SBD_NAME	"COMSTAR SBD+ "
 
 static struct modldrv modldrv = {
 	&mod_driverops,
@@ -194,6 +199,14 @@
 	}
 	mutex_init(&sbd_lock, NULL, MUTEX_DRIVER, NULL);
 	rw_init(&sbd_global_prop_lock, NULL, RW_DRIVER, NULL);
+
+	if (HardwareAcceleratedLocking == 0)
+		cmn_err(CE_NOTE, "HardwareAcceleratedLocking Disabled");
+	if (HardwareAcceleratedMove == 0)
+		cmn_err(CE_NOTE, "HardwareAcceleratedMove  Disabled");
+	if (HardwareAcceleratedInit == 0)
+		cmn_err(CE_NOTE, "HardwareAcceleratedInit  Disabled");
+
 	return (0);
 }
 
@@ -272,6 +285,8 @@
 static int
 sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 {
+	char	*prop;
+
 	switch (cmd) {
 	case DDI_ATTACH:
 		sbd_dip = dip;
@@ -281,6 +296,23 @@
 			break;
 		}
 		ddi_report_dev(dip);
+
+		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+		    DDI_PROP_DONTPASS, "vendor-id", &prop) == DDI_SUCCESS) {
+			(void) snprintf(sbd_vendor_id, 9, "%s%8s", prop, "");
+			ddi_prop_free(prop);
+		}
+		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+		    DDI_PROP_DONTPASS, "product-id", &prop) == DDI_SUCCESS) {
+			(void) snprintf(sbd_product_id, 17, "%s%16s", prop, "");
+			ddi_prop_free(prop);
+		}
+		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+		    DDI_PROP_DONTPASS, "revision", &prop) == DDI_SUCCESS) {
+			(void) snprintf(sbd_revision, 5, "%s%4s", prop, "");
+			ddi_prop_free(prop);
+		}
+
 		return (DDI_SUCCESS);
 	}
 
@@ -1396,7 +1428,10 @@
 static void
 do_unmap_setup(sbd_lu_t *sl)
 {
-	ASSERT((sl->sl_flags & SL_UNMAP_ENABLED) == 0);
+	if (sbd_unmap_enable == 0) {
+		sl->sl_flags &= ~(SL_UNMAP_ENABLED);
+		return;
+	}
 
 	if ((sl->sl_flags & SL_ZFS_META) == 0)
 		return;	/* No UNMAP for you. */
@@ -1441,8 +1476,10 @@
 	lu->lu_send_status_done = sbd_send_status_done;
 	lu->lu_task_free = sbd_task_free;
 	lu->lu_abort = sbd_abort;
+	lu->lu_task_poll = sbd_task_poll;
 	lu->lu_dbuf_free = sbd_dbuf_free;
 	lu->lu_ctl = sbd_ctl;
+	lu->lu_task_done = sbd_ats_remove_by_task;
 	lu->lu_info = sbd_info;
 	sl->sl_state = STMF_STATE_OFFLINE;
 
@@ -1455,6 +1492,12 @@
 		return (EIO);
 	}
 
+	/*
+	 * setup the ATS (compare and write) lists to handle multiple
+	 * ATS commands simultaneously
+	 */
+	list_create(&sl->sl_ats_io_list, sizeof (ats_state_t),
+	    offsetof(ats_state_t, as_next));
 	*err_ret = 0;
 	return (0);
 }
@@ -1561,6 +1604,7 @@
 			sl->sl_lu_size = vattr.va_size;
 		}
 	}
+
 	if (sl->sl_lu_size < SBD_MIN_LU_SIZE) {
 		*err_ret = SBD_RET_FILE_SIZE_ERROR;
 		ret = EINVAL;
@@ -1837,7 +1881,7 @@
 		sl->sl_flags |= SL_WRITE_PROTECTED;
 	}
 	if (slu->slu_blksize_valid) {
-		if (!ISP2(slu->slu_blksize) ||
+		if ((slu->slu_blksize & (slu->slu_blksize - 1)) ||
 		    (slu->slu_blksize > (32 * 1024)) ||
 		    (slu->slu_blksize == 0)) {
 			*err_ret = SBD_RET_INVALID_BLKSIZE;
@@ -2997,8 +3041,10 @@
 sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
     uint64_t offset, uint64_t size, uint8_t *buf)
 {
-	int ret;
+	int ret, ioflag = 0;
 	long resid;
+	hrtime_t xfer_start;
+	uint8_t op = task->task_cdb[0];
 
 	if ((offset + size) > sl->sl_lu_size) {
 		return (SBD_IO_PAST_EOF);
@@ -3006,6 +3052,16 @@
 
 	offset += sl->sl_data_offset;
 
+	/*
+	 * Check to see if the command is READ(10), READ(12), or READ(16).
+	 * If it is then check for bit 3 being set to indicate if Forced
+	 * Unit Access is being requested. If so, the FSYNC flag will be set
+	 * on the read.
+	 */
+	if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
+	    (op == SCMD_READ_G5)) && (task->task_cdb[1] & BIT_3)) {
+		ioflag = FSYNC;
+	}
 	if ((offset + size) > sl->sl_data_readable_size) {
 		uint64_t store_end;
 		if (offset > sl->sl_data_readable_size) {
@@ -3017,6 +3073,7 @@
 		size = store_end;
 	}
 
+	xfer_start = gethrtime();
 	DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
 	    uint8_t *, buf, uint64_t, size, uint64_t, offset,
 	    scsi_task_t *, task);
@@ -3032,11 +3089,14 @@
 		rw_exit(&sl->sl_access_state_lock);
 		return (SBD_FAILURE);
 	}
+
 	ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
-	    (offset_t)offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, CRED(),
+	    (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
 	    &resid);
 	rw_exit(&sl->sl_access_state_lock);
 
+	stmf_lu_xfer_done(task, B_TRUE /* read */,
+	    (gethrtime() - xfer_start));
 	DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
 	    uint8_t *, buf, uint64_t, size, uint64_t, offset,
 	    int, ret, scsi_task_t *, task);
@@ -3059,6 +3119,9 @@
 	long resid;
 	sbd_status_t sret = SBD_SUCCESS;
 	int ioflag;
+	hrtime_t xfer_start;
+	uint8_t op = task->task_cdb[0];
+	boolean_t fua_bit = B_FALSE;
 
 	if ((offset + size) > sl->sl_lu_size) {
 		return (SBD_IO_PAST_EOF);
@@ -3066,13 +3129,24 @@
 
 	offset += sl->sl_data_offset;
 
-	if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
-	    (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
+	/*
+	 * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
+	 * If it is then check for bit 3 being set to indicate if Forced
+	 * Unit Access is being requested. If so, the FSYNC flag will be set
+	 * on the write.
+	 */
+	if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
+	    (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
+		fua_bit = B_TRUE;
+	}
+	if (((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
+	    (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) || fua_bit) {
 		ioflag = FSYNC;
 	} else {
 		ioflag = 0;
 	}
 
+	xfer_start = gethrtime();
 	DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
 	    uint8_t *, buf, uint64_t, size, uint64_t, offset,
 	    scsi_task_t *, task);
@@ -3093,6 +3167,8 @@
 	    &resid);
 	rw_exit(&sl->sl_access_state_lock);
 
+	stmf_lu_xfer_done(task, B_FALSE /* write */,
+	    (gethrtime() - xfer_start));
 	DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
 	    uint8_t *, buf, uint64_t, size, uint64_t, offset,
 	    int, ret, scsi_task_t *, task);
@@ -3103,7 +3179,6 @@
 		sret = sbd_flush_data_cache(sl, 1);
 	}
 over_sl_data_write:
-
 	if ((ret || resid) || (sret != SBD_SUCCESS)) {
 		return (SBD_FAILURE);
 	} else if ((offset + size) > sl->sl_data_readable_size) {
@@ -3639,7 +3714,8 @@
 		}
 	}
 out:
-	nvlist_free(nv);
+	if (nv != NULL)
+		nvlist_free(nv);
 	kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
 	kmem_free(zc, sizeof (zfs_cmd_t));
 	(void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
@@ -3696,7 +3772,7 @@
 sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl)
 {
 	vnode_t *vp;
-	int unused;
+	int unused, ret;
 
 	/* Nothing to do */
 	if (dfl->dfl_num_exts == 0)
@@ -3717,6 +3793,29 @@
 		return (EIO);
 	}
 
-	return (VOP_IOCTL(vp, DKIOCFREE, (intptr_t)dfl, FKIOCTL, kcred,
-	    &unused, NULL));
+	ret = VOP_IOCTL(vp, DKIOCFREE, (intptr_t)dfl, FKIOCTL, kcred,
+	    &unused, NULL);
+
+	return (ret);
 }
+
+/*
+ * Check if this lu belongs to sbd or some other lu
+ * provider. A simple check for one of the module
+ * entry points is sufficient.
+ */
+int
+sbd_is_valid_lu(stmf_lu_t *lu)
+{
+	if (lu->lu_new_task == sbd_new_task)
+		return (1);
+	return (0);
+}
+
+uint8_t
+sbd_get_lbasize_shift(stmf_lu_t *lu)
+{
+	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
+
+	return (sl->sl_data_blocksize_shift);
+}
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h	Thu Nov 14 07:43:52 2019 -0700
@@ -21,7 +21,7 @@
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  *
- * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #ifndef	_SBD_IMPL_H
@@ -36,6 +36,11 @@
 struct sbd_lu_attr;
 struct sbd_it_data;
 
+#define	ATOMIC8_GET(val) (		\
+			(atomic_add_8_nv(&(val), 0)))
+#define	ATOMIC32_GET(val) (		\
+			(atomic_add_32_nv(&(val), 0)))
+
 /*
  * sms endianess
  */
@@ -206,16 +211,36 @@
 	uint32_t	len;		/* len left */
 	uint32_t	current_ro;	/* running relative offset */
 	uint8_t		*trans_data;	/* Any transient data */
+	ats_state_t	*ats_state;
+	uint32_t	rsvd;
 } sbd_cmd_t;
 
 /*
  * flags for sbd_cmd
+ *
+ * SBD_SCSI_CMD_ACTIVE means that a command is running.  This is the time
+ *      between the function sbd_new_task is called and either the command
+ *      completion is sent (stmf_scsilib_send_status) or an abort is
+ *      issued
+ *
+ * SBD_SCSI_CMD_ABORT_REQUESTED is when a command is being aborted.  It may
+ *      be set prior to the task being dispatched or anywhere in the process
+ *      of the command.
+ *
+ * SBD_SCSI_CMD_XFER_FAIL is set when a command data buffer transfer was
+ *      errored.  Usually it leads to an abort.
+ *
+ * SBD_SCSI_CMD_SYNC_WRITE synchronous write being done.
+ *
+ * SBD_SCSI_CMD_TRANS_DATA means that a buffer has been allocated to
+ *      be used for the transfer of data.
  */
 #define	SBD_SCSI_CMD_ACTIVE		0x01
 #define	SBD_SCSI_CMD_ABORT_REQUESTED	0x02
 #define	SBD_SCSI_CMD_XFER_FAIL		0x04
 #define	SBD_SCSI_CMD_SYNC_WRITE		0x08
 #define	SBD_SCSI_CMD_TRANS_DATA		0x10
+#define	SBD_SCSI_CMD_ATS_RELATED	0x20
 
 /*
  * cmd types
@@ -269,7 +294,7 @@
 typedef struct sbd_zvol_io {
 	uint64_t	zvio_offset;	/* offset into volume */
 	int		zvio_flags;	/* flags */
-	void 		*zvio_dbp;	/* array of dmu buffers */
+	void		*zvio_dbp;	/* array of dmu buffers */
 	void		*zvio_abp;	/* array of arc buffers */
 	uio_t		*zvio_uio;	/* for copy operations */
 } sbd_zvol_io_t;
@@ -300,10 +325,13 @@
 void sbd_task_free(struct scsi_task *task);
 stmf_status_t sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg,
 							uint32_t flags);
+void sbd_task_poll(struct scsi_task *task);
 void sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf);
 void sbd_ctl(struct stmf_lu *lu, int cmd, void *arg);
 stmf_status_t sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg,
 				uint8_t *buf, uint32_t *bufsizep);
+uint8_t sbd_get_lbasize_shift(stmf_lu_t *lu);
+int sbd_is_valid_lu(stmf_lu_t *lu);
 
 #ifdef	__cplusplus
 }
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c	Thu Nov 14 07:43:52 2019 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #include <sys/atomic.h>
@@ -39,7 +40,7 @@
 
 #define	MAX_PGR_PARAM_LIST_LENGTH	(256 * 1024)
 
-int  sbd_pgr_reservation_conflict(scsi_task_t *);
+int  sbd_pgr_reservation_conflict(scsi_task_t *, struct sbd_lu *sl);
 void sbd_pgr_reset(sbd_lu_t *);
 void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
 void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
@@ -82,6 +83,7 @@
 static void sbd_pgr_do_reserve(sbd_pgr_t *, sbd_pgr_key_t *, sbd_it_data_t *it,
 	stmf_scsi_session_t *, scsi_cdb_prout_t *);
 
+static boolean_t sbd_pgr_should_save(sbd_lu_t *);
 extern sbd_status_t sbd_write_meta_section(sbd_lu_t *, sm_section_hdr_t *);
 extern sbd_status_t sbd_read_meta_section(sbd_lu_t *, sm_section_hdr_t **,
 	uint16_t);
@@ -195,7 +197,7 @@
 	/* actions for PERSISTENT RESERVE OUT command */                   \
 	(((cdb[0]) == SCMD_PERSISTENT_RESERVE_OUT) && (                    \
 	    (((cdb[1]) & 0x1F) == PR_OUT_REGISTER_AND_IGNORE_EXISTING_KEY) || \
-	    (((cdb[1]) & 0x1F) == PR_OUT_REGISTER))) 			|| \
+	    (((cdb[1]) & 0x1F) == PR_OUT_REGISTER)))			|| \
 	/* ----------------------- */                                      \
 	/* SBC-3 (rev 17) Table 3  */                                      \
 	/* ----------------------- */                                      \
@@ -280,7 +282,7 @@
 sbd_pgr_meta_init(sbd_lu_t *slu)
 {
 	sbd_pgr_info_t	*spi = NULL;
-	uint32_t 	sz;
+	uint32_t	sz;
 	sbd_status_t	ret;
 
 	sz = sizeof (sbd_pgr_info_t);
@@ -295,6 +297,22 @@
 	return (ret);
 }
 
+/*
+ * Evaluate common cases where a PERSISTENT RESERVE OUT CDB handler should call
+ * sbd_pgr_meta_write().
+ */
+static boolean_t
+sbd_pgr_should_save(sbd_lu_t *slu)
+{
+	sbd_pgr_t		*pgr = slu->sl_pgr;
+
+	if (stmf_is_pgr_aptpl_always() == B_TRUE ||
+	    (pgr->pgr_flags & (SBD_PGR_APTPL)))
+		return (B_TRUE);
+	else
+		return (B_FALSE);
+}
+
 sbd_status_t
 sbd_pgr_meta_load(sbd_lu_t *slu)
 {
@@ -323,7 +341,12 @@
 	}
 
 	pgr->pgr_flags = spi->pgr_flags;
-	if (pgr->pgr_flags & SBD_PGR_APTPL) {
+	/*
+	 * We reload APTPL reservations when:
+	 *  1. Global override is enabled
+	 *  2. APTPL was explicitly asserted in the PERSISTENT RESERVE OUT CDB
+	 */
+	if (stmf_is_pgr_aptpl_always() || (pgr->pgr_flags & SBD_PGR_APTPL)) {
 		pgr->pgr_rsv_type = spi->pgr_rsv_type;
 		pgr->pgr_rsv_scope = spi->pgr_rsv_scope;
 	} else {
@@ -436,7 +459,7 @@
 
 	/* Calculate total pgr meta section size needed */
 	sz = sizeof (sbd_pgr_info_t);
-	if (pgr->pgr_flags & SBD_PGR_APTPL) {
+	if ((pgr->pgr_flags & SBD_PGR_APTPL) || stmf_is_pgr_aptpl_always()) {
 		key = pgr->pgr_keylist;
 		while (key != NULL) {
 			sz = ALIGNED_TO_8BYTE_BOUNDARY(sz +
@@ -458,7 +481,7 @@
 	spi->pgr_sms_header.sms_id = SMS_ID_PGR_INFO;
 	spi->pgr_sms_header.sms_data_order = SMS_DATA_ORDER;
 
-	if (pgr->pgr_flags & SBD_PGR_APTPL) {
+	if ((pgr->pgr_flags & SBD_PGR_APTPL) || stmf_is_pgr_aptpl_always()) {
 		uint8_t *ptr;
 		key = pgr->pgr_keylist;
 		sz = sizeof (sbd_pgr_info_t);
@@ -484,8 +507,12 @@
 			key = key->pgr_key_next;
 		}
 	}
-
+	rw_downgrade(&pgr->pgr_lock);
 	ret = sbd_write_meta_section(slu, (sm_section_hdr_t *)spi);
+	if (!rw_tryupgrade(&pgr->pgr_lock)) {
+		rw_exit(&pgr->pgr_lock);
+		rw_enter(&pgr->pgr_lock, RW_WRITER);
+	}
 	kmem_free(spi, totalsz);
 	if (ret != SBD_SUCCESS) {
 		sbd_pgr_key_t	*tmp_list;
@@ -513,7 +540,7 @@
 
 static sbd_pgr_key_t *
 sbd_pgr_key_alloc(scsi_devid_desc_t *lptid, scsi_transport_id_t *rptid,
-					int16_t lpt_len, int16_t rpt_len)
+    int16_t lpt_len, int16_t rpt_len)
 {
 	sbd_pgr_key_t *key;
 
@@ -578,7 +605,8 @@
 	sbd_pgr_t	*pgr  = slu->sl_pgr;
 
 	rw_enter(&pgr->pgr_lock, RW_WRITER);
-	if (!(pgr->pgr_flags & SBD_PGR_APTPL)) {
+	if (!(pgr->pgr_flags & SBD_PGR_APTPL) &&
+	    stmf_is_pgr_aptpl_always() == B_FALSE) {
 		sbd_pgr_keylist_dealloc(slu);
 		pgr->pgr_PRgeneration	= 0;
 		pgr->pgr_rsvholder	= NULL;
@@ -630,7 +658,7 @@
  */
 static uint32_t
 sbd_pgr_remove_keys(sbd_lu_t *slu, sbd_it_data_t *my_it, sbd_pgr_key_t *my_key,
-				uint64_t svc_key, boolean_t match)
+    uint64_t svc_key, boolean_t match)
 {
 	sbd_pgr_t	*pgr  = slu->sl_pgr;
 	sbd_it_data_t	*it;
@@ -708,7 +736,7 @@
 
 static boolean_t
 sbd_pgr_key_compare(sbd_pgr_key_t *key, scsi_devid_desc_t *lpt,
-					stmf_remote_port_t *rpt)
+    stmf_remote_port_t *rpt)
 {
 	scsi_devid_desc_t *id;
 
@@ -732,7 +760,7 @@
 
 sbd_pgr_key_t *
 sbd_pgr_key_registered(sbd_pgr_t *pgr, scsi_devid_desc_t *lpt,
-					stmf_remote_port_t *rpt)
+    stmf_remote_port_t *rpt)
 {
 	sbd_pgr_key_t *key;
 
@@ -812,9 +840,8 @@
  * Check for any PGR Reservation conflict. return 0 if access allowed
  */
 int
-sbd_pgr_reservation_conflict(scsi_task_t *task)
+sbd_pgr_reservation_conflict(scsi_task_t *task, sbd_lu_t *slu)
 {
-	sbd_lu_t	*slu = (sbd_lu_t *)task->task_lu->lu_provider_private;
 	sbd_pgr_t	*pgr = slu->sl_pgr;
 	sbd_it_data_t	*it  = (sbd_it_data_t *)task->task_lu_itl_handle;
 
@@ -1129,7 +1156,7 @@
 
 static void
 sbd_pgr_in_report_capabilities(scsi_task_t *task,
-				stmf_data_buf_t *initial_dbuf)
+    stmf_data_buf_t *initial_dbuf)
 {
 	sbd_lu_t	*slu   = (sbd_lu_t *)task->task_lu->lu_provider_private;
 	sbd_pgr_t	*pgr   =  slu->sl_pgr;
@@ -1167,14 +1194,14 @@
 
 static void
 sbd_pgr_in_read_full_status(scsi_task_t *task,
-				stmf_data_buf_t *initial_dbuf)
+    stmf_data_buf_t *initial_dbuf)
 {
 	sbd_lu_t	*slu   = (sbd_lu_t *)task->task_lu->lu_provider_private;
 	sbd_pgr_t	*pgr   = slu->sl_pgr;
 	sbd_pgr_key_t	*key;
-	scsi_prin_status_t 	*sts;
+	scsi_prin_status_t	*sts;
 	scsi_prin_full_status_t	*buf;
-	uint32_t 		i, buf_size, cdb_len;
+	uint32_t		i, buf_size, cdb_len;
 	uint8_t			*offset;
 
 	ASSERT(task->task_cdb[0] == SCMD_PERSISTENT_RESERVE_IN);
@@ -1203,7 +1230,7 @@
 		if ((pgr->pgr_flags & SBD_PGR_RSVD_ALL_REGISTRANTS) ||
 		    (pgr->pgr_rsvholder && pgr->pgr_rsvholder == key)) {
 				sts->r_holder	= 1;
-				sts->type 	= pgr->pgr_rsv_type;
+				sts->type	= pgr->pgr_rsv_type;
 				sts->scope	= pgr->pgr_rsv_scope;
 		}
 
@@ -1386,7 +1413,7 @@
 
 sbd_pgr_reg_done:
 
-	if (pgr->pgr_flags & SBD_PGR_APTPL || plist->aptpl) {
+	if (plist->aptpl || (sbd_pgr_should_save(slu) == B_TRUE)) {
 		if (plist->aptpl)
 			PGR_SET_FLAG(pgr->pgr_flags, SBD_PGR_APTPL);
 		else
@@ -1394,7 +1421,7 @@
 
 		if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) {
 			stmf_scsilib_send_status(task, STATUS_CHECK,
-			    STMF_SAA_INSUFFICIENT_REG_RESOURCES);
+			    STMF_SAA_INSUFFICIENT_REG_RESRCS);
 			return;
 		}
 	}
@@ -1405,7 +1432,7 @@
 
 static sbd_pgr_key_t *
 sbd_pgr_do_register(sbd_lu_t *slu, sbd_it_data_t *it, scsi_devid_desc_t *lpt,
-		stmf_remote_port_t *rpt, uint8_t keyflag, uint64_t svc_key)
+    stmf_remote_port_t *rpt, uint8_t keyflag, uint64_t svc_key)
 {
 	sbd_pgr_t		*pgr = slu->sl_pgr;
 	sbd_pgr_key_t		*key;
@@ -1491,10 +1518,10 @@
 	/* In case there is no reservation exist */
 	} else {
 		sbd_pgr_do_reserve(pgr, key, it, ses, pr_out);
-		if (pgr->pgr_flags & SBD_PGR_APTPL) {
+		if (sbd_pgr_should_save(slu) == B_TRUE) {
 			if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) {
 				stmf_scsilib_send_status(task, STATUS_CHECK,
-				    STMF_SAA_INSUFFICIENT_REG_RESOURCES);
+				    STMF_SAA_INSUFFICIENT_REG_RESRCS);
 				return;
 			}
 		}
@@ -1505,7 +1532,7 @@
 
 static void
 sbd_pgr_do_reserve(sbd_pgr_t *pgr, sbd_pgr_key_t *key, sbd_it_data_t *it,
-			stmf_scsi_session_t *ses, scsi_cdb_prout_t *pr_out)
+    stmf_scsi_session_t *ses, scsi_cdb_prout_t *pr_out)
 {
 	scsi_devid_desc_t	*lpt;
 	uint16_t		lpt_len;
@@ -1548,6 +1575,11 @@
 
 	ASSERT(key);
 
+	/*
+	 * XXX this does not honor APTPL
+	 * (i.e., changes made to a formerly-persistent reservation are not
+	 *  updated here!!!)
+	 */
 	if (SBD_PGR_RSVD(pgr)) {
 		if (pgr->pgr_flags & SBD_PGR_RSVD_ALL_REGISTRANTS ||
 		    pgr->pgr_rsvholder == key) {
@@ -1559,6 +1591,27 @@
 			}
 			sbd_pgr_do_release(slu, it,
 			    SBD_UA_RESERVATIONS_RELEASED);
+
+			/*
+			 * XXX T10 SPC-3 5.6.10.2 says nothing about what to
+			 * do in the event of a failure updating the
+			 * PGR nvram store for a reservation associated with
+			 * an APTPL-enabled (see SPC-3 5.6.4.1) I_T
+			 * registration during a RELEASE service action.
+			 *
+			 * Technically, the CDB completed successfully, as per
+			 * the spec, but at some point we may need to enter
+			 * a recovery mode on the initiator(s) if we power cycle
+			 * the target at the wrong instant...
+			 */
+			if (sbd_pgr_should_save(slu) == B_TRUE) {
+				if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) {
+					stmf_scsilib_send_status(task,
+					    STATUS_CHECK,
+					    STMF_SAA_INSUFFICIENT_REG_RESRCS);
+					return;
+				}
+			}
 		}
 	}
 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
@@ -1602,10 +1655,10 @@
 	mutex_exit(&slu->sl_lock);
 	sbd_pgr_keylist_dealloc(slu);
 	sbd_pgr_set_ua_conditions(slu, it, SBD_UA_RESERVATIONS_PREEMPTED);
-	if (pgr->pgr_flags & SBD_PGR_APTPL) {
+	if (sbd_pgr_should_save(slu) == B_TRUE) {
 		if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) {
 			stmf_scsilib_send_status(task, STATUS_CHECK,
-			    STMF_SAA_INSUFFICIENT_REG_RESOURCES);
+			    STMF_SAA_INSUFFICIENT_REG_RESRCS);
 			return;
 		}
 	}
@@ -1717,10 +1770,10 @@
 		}
 	}
 
-	if (pgr->pgr_flags & SBD_PGR_APTPL) {
+	if (sbd_pgr_should_save(slu) == B_TRUE) {
 		if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) {
 			stmf_scsilib_send_status(task, STATUS_CHECK,
-			    STMF_SAA_INSUFFICIENT_REG_RESOURCES);
+			    STMF_SAA_INSUFFICIENT_REG_RESRCS);
 			return;
 		}
 	}
@@ -1831,8 +1884,8 @@
 	}
 
 
-	/* Write to disk if currenty aptpl is set or given task is setting it */
-	if (pgr->pgr_flags & SBD_PGR_APTPL || plist->aptpl) {
+	/* Write to disk if aptpl is currently set or this task is setting it */
+	if (plist->aptpl || (sbd_pgr_should_save(slu) == B_TRUE)) {
 		if (plist->aptpl)
 			PGR_SET_FLAG(pgr->pgr_flags, SBD_PGR_APTPL);
 		else
@@ -1840,7 +1893,7 @@
 
 		if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) {
 			stmf_scsilib_send_status(task, STATUS_CHECK,
-			    STMF_SAA_INSUFFICIENT_REG_RESOURCES);
+			    STMF_SAA_INSUFFICIENT_REG_RESRCS);
 			return;
 		}
 	}
@@ -1850,7 +1903,8 @@
 }
 
 void
-sbd_pgr_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *my_it) {
+sbd_pgr_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *my_it)
+{
 	sbd_it_data_t *it;
 
 	rw_enter(&sl->sl_pgr->pgr_lock, RW_WRITER);
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c	Thu Nov 14 07:43:52 2019 -0700
@@ -18,6 +18,7 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
@@ -87,6 +88,21 @@
 	    (((cdb[4]) & 0xF0) == 0) && (((cdb[4]) & 0x01) == 0))))
 /* End of SCSI2_CONFLICT_FREE_CMDS */
 
+uint8_t HardwareAcceleratedInit = 1;
+uint8_t sbd_unmap_enable = 1;		/* allow unmap by default */
+
+/*
+ * An /etc/system tunable which specifies the maximum number of LBAs supported
+ * in a single UNMAP operation. Default is 0x002000 blocks or 4MB in size.
+ */
+int stmf_sbd_unmap_max_nblks  = 0x002000;
+
+/*
+ * An /etc/system tunable which indicates if READ ops can run on the standby
+ * path or return an error.
+ */
+int stmf_standby_fail_reads = 0;
+
 stmf_status_t sbd_lu_reset_state(stmf_lu_t *lu);
 static void sbd_handle_sync_cache(struct scsi_task *task,
     struct stmf_data_buf *initial_dbuf);
@@ -94,8 +110,6 @@
     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf);
 void sbd_handle_short_write_xfer_completion(scsi_task_t *task,
     stmf_data_buf_t *dbuf);
-void sbd_handle_short_write_transfers(scsi_task_t *task,
-    stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size);
 void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf,
     uint32_t buflen);
 void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf);
@@ -106,7 +120,7 @@
 static void sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf);
 
 extern void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
-extern int sbd_pgr_reservation_conflict(scsi_task_t *);
+extern int sbd_pgr_reservation_conflict(scsi_task_t *, struct sbd_lu *sl);
 extern void sbd_pgr_reset(sbd_lu_t *);
 extern void sbd_pgr_remove_it_handle(sbd_lu_t *, sbd_it_data_t *);
 extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
@@ -144,7 +158,8 @@
 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
 	    task->task_max_nbufs;
 
-	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size : scmd->len;
+	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ?
+	    dbuf->db_buf_size : ATOMIC32_GET(scmd->len);
 	laddr = scmd->addr + scmd->current_ro;
 
 	for (buflen = 0, ndx = 0; (buflen < len) &&
@@ -168,12 +183,14 @@
 	dbuf->db_data_size = buflen;
 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
 	(void) stmf_xfer_data(task, dbuf, 0);
-	scmd->len -= buflen;
+	atomic_add_32(&scmd->len, -buflen);
 	scmd->current_ro += buflen;
-	if (scmd->len && (scmd->nbufs < bufs_to_take)) {
+	if (ATOMIC32_GET(scmd->len) &&
+	    (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) {
 		uint32_t maxsize, minsize, old_minsize;
 
-		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
+		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128 * 1024 :
+		    ATOMIC32_GET(scmd->len);
 		minsize = maxsize >> 2;
 		do {
 			/*
@@ -188,7 +205,7 @@
 		if (dbuf == NULL) {
 			return;
 		}
-		scmd->nbufs++;
+		atomic_inc_8(&scmd->nbufs);
 		sbd_do_read_xfer(task, scmd, dbuf);
 	}
 }
@@ -219,6 +236,7 @@
 	uint_t nblks;
 	uint64_t blksize = sl->sl_blksize;
 	size_t db_private_sz;
+	hrtime_t xfer_start;
 	uintptr_t pad;
 
 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
@@ -260,14 +278,15 @@
 		first_len = 0;
 	}
 
-	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
-
-		xfer_len = MIN(max_len, scmd->len);
+	while (ATOMIC32_GET(scmd->len) &&
+	    ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) {
+
+		xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len));
 		if (first_len) {
 			xfer_len = MIN(xfer_len, first_len);
 			first_len = 0;
 		}
-		if (scmd->len == xfer_len) {
+		if (ATOMIC32_GET(scmd->len) == xfer_len) {
 			final_xfer = 1;
 		} else {
 			/*
@@ -334,12 +353,15 @@
 		 * Accounting for start of read.
 		 * Note there is no buffer address for the probe yet.
 		 */
+		xfer_start = gethrtime();
 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
 		    uint8_t *, NULL, uint64_t, xfer_len,
 		    uint64_t, offset, scsi_task_t *, task);
 
 		ret = sbd_zvol_alloc_read_bufs(sl, dbuf);
 
+		stmf_lu_xfer_done(task, B_TRUE /* read */,
+		    (gethrtime() - xfer_start));
 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
 		    uint8_t *, NULL, uint64_t, xfer_len,
 		    uint64_t, offset, int, ret, scsi_task_t *, task);
@@ -349,9 +371,10 @@
 			 * Read failure from the backend.
 			 */
 			stmf_free(dbuf);
-			if (scmd->nbufs == 0) {
+			if (ATOMIC8_GET(scmd->nbufs) == 0) {
 				/* nothing queued, just finish */
 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+				sbd_ats_remove_by_task(task);
 				stmf_scsilib_send_status(task, STATUS_CHECK,
 				    STMF_SAA_READ_ERROR);
 				rw_exit(&sl->sl_access_state_lock);
@@ -362,7 +385,6 @@
 			return;
 		}
 
-
 		/*
 		 * Allow PP to do setup
 		 */
@@ -376,7 +398,7 @@
 			 */
 			sbd_zvol_rele_read_bufs(sl, dbuf);
 			stmf_free(dbuf);
-			if (scmd->nbufs > 0) {
+			if (ATOMIC8_GET(scmd->nbufs) > 0) {
 				/* completion of previous dbuf will retry */
 				return;
 			}
@@ -384,6 +406,7 @@
 			 * Done with this command.
 			 */
 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+			sbd_ats_remove_by_task(task);
 			if (first_xfer)
 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
 			else
@@ -395,7 +418,7 @@
 		/*
 		 * dbuf is now queued on task
 		 */
-		scmd->nbufs++;
+		atomic_inc_8(&scmd->nbufs);
 
 		/* XXX leave this in for FW? */
 		DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
@@ -417,8 +440,8 @@
 			sbd_zvol_rele_read_bufs(sl, dbuf);
 			stmf_teardown_dbuf(task, dbuf);
 			stmf_free(dbuf);
-			scmd->nbufs--;
-			if (scmd->nbufs > 0) {
+			atomic_dec_8(&scmd->nbufs);
+			if (ATOMIC8_GET(scmd->nbufs) > 0) {
 				/* completion of previous dbuf will retry */
 				return;
 			}
@@ -427,6 +450,7 @@
 			 */
 			rw_exit(&sl->sl_access_state_lock);
 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+			sbd_ats_remove_by_task(task);
 			if (first_xfer)
 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
 			else
@@ -438,13 +462,14 @@
 			 * Completion from task_done will cleanup
 			 */
 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+			sbd_ats_remove_by_task(task);
 			return;
 		}
 		/*
 		 * Update the xfer progress.
 		 */
 		ASSERT(scmd->len >= xfer_len);
-		scmd->len -= xfer_len;
+		atomic_add_32(&scmd->len, -xfer_len);
 		scmd->current_ro += xfer_len;
 	}
 }
@@ -459,12 +484,14 @@
 		return;
 	}
 	task->task_nbytes_transferred += dbuf->db_data_size;
-	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+	if (ATOMIC32_GET(scmd->len) == 0 ||
+	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
 		stmf_free_dbuf(task, dbuf);
-		scmd->nbufs--;
-		if (scmd->nbufs)
+		atomic_dec_8(&scmd->nbufs);
+		if (ATOMIC8_GET(scmd->nbufs))
 			return;	/* wait for all buffers to complete */
 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+		sbd_ats_remove_by_task(task);
 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL)
 			stmf_scsilib_send_status(task, STATUS_CHECK,
 			    STMF_SAA_READ_ERROR);
@@ -477,7 +504,8 @@
 		uint32_t maxsize, minsize, old_minsize;
 		stmf_free_dbuf(task, dbuf);
 
-		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
+		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ?
+		    128 * 1024 : ATOMIC32_GET(scmd->len);
 		minsize = maxsize >> 2;
 		do {
 			old_minsize = minsize;
@@ -485,8 +513,8 @@
 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
 		    (minsize >= 512));
 		if (dbuf == NULL) {
-			scmd->nbufs --;
-			if (scmd->nbufs == 0) {
+			atomic_dec_8(&scmd->nbufs);
+			if (ATOMIC8_GET(scmd->nbufs) == 0) {
 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
 				    STMF_ALLOC_FAILURE, NULL);
 			}
@@ -514,7 +542,7 @@
 	ASSERT(dbuf->db_lu_private);
 	ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ);
 
-	scmd->nbufs--;	/* account for this dbuf */
+	atomic_dec_8(&scmd->nbufs);	/* account for this dbuf */
 	/*
 	 * Release the DMU resources.
 	 */
@@ -535,7 +563,8 @@
 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
 	    (xfer_status != STMF_SUCCESS));
-	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
+	if ((ATOMIC8_GET(scmd->nbufs) == 0) &&
+	    (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) {
 		/* all DMU state has been released */
 		rw_exit(&sl->sl_access_state_lock);
 	}
@@ -549,7 +578,8 @@
 		 * This chunk completed successfully
 		 */
 		task->task_nbytes_transferred += data_size;
-		if (scmd->nbufs == 0 && scmd->len == 0) {
+		if (ATOMIC8_GET(scmd->nbufs) == 0 &&
+		    ATOMIC32_GET(scmd->len) == 0) {
 			/*
 			 * This command completed successfully
 			 *
@@ -557,6 +587,7 @@
 			 * completion will occur. Tell stmf we are done.
 			 */
 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+			sbd_ats_remove_by_task(task);
 			stmf_task_lu_done(task);
 			return;
 		}
@@ -575,8 +606,9 @@
 		 * and wait for the last completion to send the status check.
 		 */
 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
-			if (scmd->nbufs == 0) {
+			if (ATOMIC8_GET(scmd->nbufs) == 0) {
 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+				sbd_ats_remove_by_task(task);
 				stmf_scsilib_send_status(task, STATUS_CHECK,
 				    STMF_SAA_READ_ERROR);
 			}
@@ -586,7 +618,14 @@
 		 * Must have been a failure on current dbuf
 		 */
 		ASSERT(xfer_status != STMF_SUCCESS);
+
+		/*
+		 * Actually this is a bug. stmf abort should have reset the
+		 * active flag but since its been there for some time.
+		 * I wont change it.
+		 */
 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+		sbd_ats_remove_by_task(task);
 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
 	}
 }
@@ -601,6 +640,7 @@
 	int scmd_err, scmd_xfer_done;
 	stmf_status_t xfer_status = dbuf->db_xfer_status;
 	uint32_t data_size = dbuf->db_data_size;
+	hrtime_t xfer_start;
 
 	ASSERT(zvio);
 
@@ -610,16 +650,18 @@
 	 */
 	stmf_teardown_dbuf(task, dbuf);
 
-	scmd->nbufs--;	/* account for this dbuf */
+	atomic_dec_8(&scmd->nbufs);	/* account for this dbuf */
 	/*
 	 * All data was queued and this is the last completion,
 	 * but there could still be an error.
 	 */
-	scmd_xfer_done = (scmd->len == 0 && scmd->nbufs == 0);
+	scmd_xfer_done = (ATOMIC32_GET(scmd->len) == 0 &&
+	    (ATOMIC8_GET(scmd->nbufs) == 0));
 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
 	    (xfer_status != STMF_SUCCESS));
 
+	xfer_start = gethrtime();
 	DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
 	    uint8_t *, NULL, uint64_t, data_size,
 	    uint64_t, zvio->zvio_offset, scsi_task_t *, task);
@@ -637,6 +679,8 @@
 		ret = sbd_zvol_rele_write_bufs(sl, dbuf);
 	}
 
+	stmf_lu_xfer_done(task, B_FALSE /* write */,
+	    (gethrtime() - xfer_start));
 	DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
 	    uint8_t *, NULL, uint64_t, data_size,
 	    uint64_t, zvio->zvio_offset, int, ret,  scsi_task_t *, task);
@@ -656,7 +700,8 @@
 	 * transferred or an error encountered, then no more dbufs
 	 * will be queued.
 	 */
-	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
+	if ((ATOMIC8_GET(scmd->nbufs) == 0) &&
+	    (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) {
 		/* all DMU state has been released */
 		rw_exit(&sl->sl_access_state_lock);
 	}
@@ -670,6 +715,7 @@
 		if (scmd_xfer_done) {
 			/* This command completed successfully */
 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+			sbd_ats_remove_by_task(task);
 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
 				stmf_scsilib_send_status(task, STATUS_CHECK,
@@ -690,8 +736,9 @@
 	 */
 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
-			if (scmd->nbufs == 0) {
+			if (ATOMIC8_GET(scmd->nbufs) == 0) {
 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+				sbd_ats_remove_by_task(task);
 				stmf_scsilib_send_status(task, STATUS_CHECK,
 				    STMF_SAA_WRITE_ERROR);
 			}
@@ -701,6 +748,7 @@
 			return;
 		}
 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+		sbd_ats_remove_by_task(task);
 		ASSERT(xfer_status != STMF_SUCCESS);
 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
 	}
@@ -726,6 +774,7 @@
 	struct iovec		*iov, *tiov, iov1[8];
 	uint32_t		len, resid;
 	int			ret, i, iovcnt, flags;
+	hrtime_t		xfer_start;
 	boolean_t		is_read;
 
 	ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE);
@@ -763,6 +812,7 @@
 	uio.uio_resid = (uint64_t)len;
 	uio.uio_llimit = RLIM64_INFINITY;
 
+	xfer_start = gethrtime();
 	if (is_read == B_TRUE) {
 		uio.uio_fmode = FREAD;
 		uio.uio_extflg = UIO_COPY_CACHED;
@@ -791,6 +841,8 @@
 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
 		    scsi_task_t *, task);
 	}
+	/* finalize accounting */
+	stmf_lu_xfer_done(task, is_read, (gethrtime() - xfer_start));
 
 	if (iov != &iov1[0])
 		kmem_free(iov, iovcnt * sizeof (*iov));
@@ -805,13 +857,26 @@
 sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
 {
 	uint64_t lba, laddr;
+	uint64_t blkcount;
 	uint32_t len;
 	uint8_t op = task->task_cdb[0];
 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
 	sbd_cmd_t *scmd;
 	stmf_data_buf_t *dbuf;
 	int fast_path;
-
+	boolean_t fua_bit = B_FALSE;
+
+	/*
+	 * Check to see if the command is READ(10), READ(12), or READ(16).
+	 * If it is then check for bit 3 being set to indicate if Forced
+	 * Unit Access is being requested. If so, we'll bypass the use of
+	 * DMA buffers to simplify support of this feature.
+	 */
+	if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
+	    (op == SCMD_READ_G5)) &&
+	    (task->task_cdb[1] & BIT_3)) {
+		fua_bit = B_TRUE;
+	}
 	if (op == SCMD_READ) {
 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
 		len = (uint32_t)task->task_cdb[4];
@@ -835,6 +900,7 @@
 	}
 
 	laddr = lba << sl->sl_data_blocksize_shift;
+	blkcount = len;
 	len <<= sl->sl_data_blocksize_shift;
 
 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
@@ -861,6 +927,13 @@
 		return;
 	}
 
+	if (sbd_ats_handling_before_io(task, sl, lba, blkcount) !=
+	    SBD_SUCCESS) {
+		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
+			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
+		}
+		return;
+	}
 	/*
 	 * Determine if this read can directly use DMU buffers.
 	 */
@@ -868,8 +941,8 @@
 	    initial_dbuf == NULL &&		/* No PP buffer passed in */
 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
 	    (task->task_additional_flags &
-	    TASK_AF_ACCEPT_LU_DBUF))		/* PP allows it */
-	{
+	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
+	    !fua_bit) {
 		/*
 		 * Reduced copy path
 		 */
@@ -884,6 +957,7 @@
 		rw_enter(&sl->sl_access_state_lock, RW_READER);
 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
 			rw_exit(&sl->sl_access_state_lock);
+			sbd_ats_remove_by_task(task);
 			stmf_scsilib_send_status(task, STATUS_CHECK,
 			    STMF_SAA_READ_ERROR);
 			return;
@@ -904,6 +978,7 @@
 			    SBD_CMD_SCSI_READ, 0);
 			/* done with the backend */
 			rw_exit(&sl->sl_access_state_lock);
+			sbd_ats_remove_by_task(task);
 			if (ret != 0) {
 				/* backend error */
 				stmf_scsilib_send_status(task, STATUS_CHECK,
@@ -936,13 +1011,12 @@
 		/*
 		 * Setup scmd to track read progress.
 		 */
-		scmd->flags = SBD_SCSI_CMD_ACTIVE;
+		scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED;
 		scmd->cmd_type = SBD_CMD_SCSI_READ;
 		scmd->nbufs = 0;
 		scmd->addr = laddr;
 		scmd->len = len;
 		scmd->current_ro = 0;
-
 		/*
 		 * Kick-off the read.
 		 */
@@ -962,6 +1036,7 @@
 		} while ((initial_dbuf == NULL) && (old_minsize > minsize) &&
 		    (minsize >= 512));
 		if (initial_dbuf == NULL) {
+			sbd_ats_remove_by_task(task);
 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
 			return;
 		}
@@ -985,6 +1060,7 @@
 			stmf_scsilib_send_status(task, STATUS_CHECK,
 			    STMF_SAA_READ_ERROR);
 		}
+		sbd_ats_remove_by_task(task);
 		return;
 	}
 
@@ -994,7 +1070,7 @@
 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
 		task->task_lu_private = scmd;
 	}
-	scmd->flags = SBD_SCSI_CMD_ACTIVE;
+	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED;
 	scmd->cmd_type = SBD_CMD_SCSI_READ;
 	scmd->nbufs = 1;
 	scmd->addr = laddr;
@@ -1011,7 +1087,7 @@
 	uint32_t len;
 	int bufs_to_take;
 
-	if (scmd->len == 0) {
+	if (ATOMIC32_GET(scmd->len) == 0) {
 		goto DO_WRITE_XFER_DONE;
 	}
 
@@ -1026,14 +1102,14 @@
 		stmf_free_dbuf(task, dbuf);
 		dbuf = NULL;
 	}
-	if (scmd->nbufs >= bufs_to_take) {
+	if (ATOMIC8_GET(scmd->nbufs) >= bufs_to_take) {
 		goto DO_WRITE_XFER_DONE;
 	}
 	if (dbuf == NULL) {
 		uint32_t maxsize, minsize, old_minsize;
 
-		maxsize = (scmd->len > (128*1024)) ? 128*1024 :
-		    scmd->len;
+		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
+		    ATOMIC32_GET(scmd->len);
 		minsize = maxsize >> 2;
 		do {
 			old_minsize = minsize;
@@ -1041,7 +1117,7 @@
 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
 		    (minsize >= 512));
 		if (dbuf == NULL) {
-			if (scmd->nbufs == 0) {
+			if (ATOMIC8_GET(scmd->nbufs) == 0) {
 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
 				    STMF_ALLOC_FAILURE, NULL);
 			}
@@ -1049,18 +1125,20 @@
 		}
 	}
 
-	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
-	    scmd->len;
+	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
+	    ATOMIC32_GET(scmd->len);
 
 	dbuf->db_relative_offset = scmd->current_ro;
 	dbuf->db_data_size = len;
 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
 	(void) stmf_xfer_data(task, dbuf, 0);
-	scmd->nbufs++; /* outstanding port xfers and bufs used */
-	scmd->len -= len;
+	/* outstanding port xfers and bufs used */
+	atomic_inc_8(&scmd->nbufs);
+	atomic_add_32(&scmd->len, -len);
 	scmd->current_ro += len;
 
-	if ((scmd->len != 0) && (scmd->nbufs < bufs_to_take)) {
+	if ((ATOMIC32_GET(scmd->len) != 0) &&
+	    (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) {
 		sbd_do_write_xfer(task, scmd, NULL, 0);
 	}
 	return;
@@ -1126,14 +1204,14 @@
 	}
 
 
-	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
-
-		xfer_len = MIN(max_len, scmd->len);
+	while (ATOMIC32_GET(scmd->len) &&
+	    ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) {
+		xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len));
 		if (first_len) {
 			xfer_len = MIN(xfer_len, first_len);
 			first_len = 0;
 		}
-		if (xfer_len < scmd->len) {
+		if (xfer_len < ATOMIC32_GET(scmd->len)) {
 			/*
 			 * Attempt to end xfer on a block boundary.
 			 * The only way this does not happen is if the
@@ -1197,10 +1275,11 @@
 			 */
 			stmf_free(dbuf);
 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
-			if (scmd->nbufs == 0) {
+			if (ATOMIC8_GET(scmd->nbufs) == 0) {
 				/*
 				 * Nothing queued, so no completions coming
 				 */
+				sbd_ats_remove_by_task(task);
 				stmf_scsilib_send_status(task, STATUS_CHECK,
 				    STMF_SAA_WRITE_ERROR);
 				rw_exit(&sl->sl_access_state_lock);
@@ -1224,7 +1303,7 @@
 			 */
 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
 			stmf_free(dbuf);
-			if (scmd->nbufs > 0) {
+			if (ATOMIC8_GET(scmd->nbufs) > 0) {
 				/* completion of previous dbuf will retry */
 				return;
 			}
@@ -1232,6 +1311,7 @@
 			 * Done with this command.
 			 */
 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+			sbd_ats_remove_by_task(task);
 			if (first_xfer)
 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
 			else
@@ -1244,7 +1324,7 @@
 		/*
 		 * dbuf is now queued on task
 		 */
-		scmd->nbufs++;
+		atomic_inc_8(&scmd->nbufs);
 
 		xstat = stmf_xfer_data(task, dbuf, 0);
 		switch (xstat) {
@@ -1258,8 +1338,8 @@
 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
 			stmf_teardown_dbuf(task, dbuf);
 			stmf_free(dbuf);
-			scmd->nbufs--;
-			if (scmd->nbufs > 0) {
+			atomic_dec_8(&scmd->nbufs);
+			if (ATOMIC8_GET(scmd->nbufs) > 0) {
 				/* completion of previous dbuf will retry */
 				return;
 			}
@@ -1267,6 +1347,7 @@
 			 * Done with this command.
 			 */
 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+			sbd_ats_remove_by_task(task);
 			if (first_xfer)
 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
 			else
@@ -1284,7 +1365,7 @@
 		/*
 		 * Update the xfer progress.
 		 */
-		scmd->len -= xfer_len;
+		atomic_add_32(&scmd->len, -xfer_len);
 		scmd->current_ro += xfer_len;
 	}
 }
@@ -1297,17 +1378,20 @@
 	uint64_t laddr;
 	uint32_t buflen, iolen;
 	int ndx;
-
-	if (scmd->nbufs > 0) {
+	uint8_t op = task->task_cdb[0];
+	boolean_t fua_bit = B_FALSE;
+
+	if (ATOMIC8_GET(scmd->nbufs) > 0) {
 		/*
 		 * Decrement the count to indicate the port xfer
 		 * into the dbuf has completed even though the buf is
 		 * still in use here in the LU provider.
 		 */
-		scmd->nbufs--;
+		atomic_dec_8(&scmd->nbufs);
 	}
 
 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
+		sbd_ats_remove_by_task(task);
 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
 		    dbuf->db_xfer_status, NULL);
 		return;
@@ -1317,7 +1401,7 @@
 		goto WRITE_XFER_DONE;
 	}
 
-	if (scmd->len != 0) {
+	if (ATOMIC32_GET(scmd->len) != 0) {
 		/*
 		 * Initiate the next port xfer to occur in parallel
 		 * with writing this buf.
@@ -1325,6 +1409,16 @@
 		sbd_do_write_xfer(task, scmd, NULL, 0);
 	}
 
+	/*
+	 * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
+	 * If it is then check for bit 3 being set to indicate if Forced
+	 * Unit Access is being requested. If so, we'll bypass the direct
+	 * call and handle it in sbd_data_write().
+	 */
+	if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
+	    (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
+		fua_bit = B_TRUE;
+	}
 	laddr = scmd->addr + dbuf->db_relative_offset;
 
 	/*
@@ -1334,13 +1428,17 @@
 	 */
 	if (sl->sl_flags & SL_CALL_ZVOL &&
 	    (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) &&
-	    (sbd_zcopy & (4|1))) {
+	    (sbd_zcopy & (4|1)) && !fua_bit) {
 		int commit;
 
-		commit = (scmd->len == 0 && scmd->nbufs == 0);
-		if (sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
+		commit = (ATOMIC32_GET(scmd->len) == 0 &&
+		    ATOMIC8_GET(scmd->nbufs) == 0);
+		rw_enter(&sl->sl_access_state_lock, RW_READER);
+		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0 ||
+		    sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
 		    commit) != STMF_SUCCESS)
 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
+		rw_exit(&sl->sl_access_state_lock);
 		buflen = dbuf->db_data_size;
 	} else {
 		for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
@@ -1360,11 +1458,13 @@
 	}
 	task->task_nbytes_transferred += buflen;
 WRITE_XFER_DONE:
-	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+	if (ATOMIC32_GET(scmd->len) == 0 ||
+	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
 		stmf_free_dbuf(task, dbuf);
-		if (scmd->nbufs)
+		if (ATOMIC8_GET(scmd->nbufs))
 			return;	/* wait for all buffers to complete */
 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+		sbd_ats_remove_by_task(task);
 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
 			stmf_scsilib_send_status(task, STATUS_CHECK,
 			    STMF_SAA_WRITE_ERROR);
@@ -1430,13 +1530,25 @@
 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
 	sbd_cmd_t *scmd;
 	stmf_data_buf_t *dbuf;
+	uint64_t blkcount;
 	uint8_t	sync_wr_flag = 0;
+	boolean_t fua_bit = B_FALSE;
 
 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
 		stmf_scsilib_send_status(task, STATUS_CHECK,
 		    STMF_SAA_WRITE_PROTECTED);
 		return;
 	}
+	/*
+	 * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
+	 * If it is then check for bit 3 being set to indicate if Forced
+	 * Unit Access is being requested. If so, we'll bypass the fast path
+	 * code to simplify support of this feature.
+	 */
+	if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
+	    (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
+		fua_bit = B_TRUE;
+	}
 	if (op == SCMD_WRITE) {
 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
 		len = (uint32_t)task->task_cdb[4];
@@ -1472,6 +1584,7 @@
 	}
 
 	laddr = lba << sl->sl_data_blocksize_shift;
+	blkcount = len;
 	len <<= sl->sl_data_blocksize_shift;
 
 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
@@ -1493,12 +1606,21 @@
 		return;
 	}
 
+	if (sbd_ats_handling_before_io(task, sl, lba, blkcount) !=
+	    SBD_SUCCESS) {
+		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
+			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
+		}
+		return;
+	}
+
 	if (sbd_zcopy & (4|1) &&		/* Debug switch */
 	    initial_dbuf == NULL &&		/* No PP buf passed in */
 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
 	    (task->task_additional_flags &
 	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
-	    sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize)) {
+	    sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize) &&
+	    !fua_bit) {
 
 		/*
 		 * XXX Note that disallowing initial_dbuf will eliminate
@@ -1510,6 +1632,7 @@
 		rw_enter(&sl->sl_access_state_lock, RW_READER);
 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
 			rw_exit(&sl->sl_access_state_lock);
+			sbd_ats_remove_by_task(task);
 			stmf_scsilib_send_status(task, STATUS_CHECK,
 			    STMF_SAA_READ_ERROR);
 			return;
@@ -1524,7 +1647,8 @@
 			    KM_SLEEP);
 			task->task_lu_private = scmd;
 		}
-		scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
+		scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED |
+		    sync_wr_flag;
 		scmd->cmd_type = SBD_CMD_SCSI_WRITE;
 		scmd->nbufs = 0;
 		scmd->addr = laddr;
@@ -1555,7 +1679,8 @@
 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
 		task->task_lu_private = scmd;
 	}
-	scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
+	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED |
+	    sync_wr_flag;
 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
 	scmd->nbufs = 0;
 	scmd->addr = laddr;
@@ -1567,7 +1692,7 @@
 		 * Account for data passed in this write command
 		 */
 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
-		scmd->len -= dbuf->db_data_size;
+		atomic_add_32(&scmd->len, -dbuf->db_data_size);
 		scmd->current_ro += dbuf->db_data_size;
 		dbuf->db_xfer_status = STMF_SUCCESS;
 		sbd_handle_write_xfer_completion(task, scmd, dbuf, 0);
@@ -1750,6 +1875,9 @@
 		sbd_handle_unmap_xfer(task,
 		    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
 		break;
+	case SCMD_EXTENDED_COPY:
+		sbd_handle_xcopy_xfer(task, dbuf->db_sglist[0].seg_addr);
+		break;
 	case SCMD_PERSISTENT_RESERVE_OUT:
 		if (sl->sl_access_state == SBD_LU_STANDBY) {
 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
@@ -1844,7 +1972,7 @@
 	uint8_t *cdb;
 	uint32_t ncyl;
 	uint8_t nsectors, nheads;
-	uint8_t page, ctrl, header_size, pc_valid;
+	uint8_t page, ctrl, header_size;
 	uint16_t nbytes;
 	uint8_t *p;
 	uint64_t s = sl->sl_lu_size;
@@ -1855,25 +1983,21 @@
 	cdb = &task->task_cdb[0];
 	page = cdb[2] & 0x3F;
 	ctrl = (cdb[2] >> 6) & 3;
-	cmd_size = (cdb[0] == SCMD_MODE_SENSE) ? cdb[4] :
-	    READ_SCSI16(&cdb[7], uint32_t);
 
 	if (cdb[0] == SCMD_MODE_SENSE) {
+		cmd_size = cdb[4];
 		header_size = 4;
 		dev_spec_param_offset = 2;
 	} else {
+		cmd_size = READ_SCSI16(&cdb[7], uint32_t);
 		header_size = 8;
 		dev_spec_param_offset = 3;
 	}
 
 	/* Now validate the command */
-	if ((cdb[2] == 0) || (page == MODEPAGE_ALLPAGES) || (page == 0x08) ||
-	    (page == 0x0A) || (page == 0x03) || (page == 0x04)) {
-		pc_valid = 1;
-	} else {
-		pc_valid = 0;
-	}
-	if ((cmd_size < header_size) || (pc_valid == 0)) {
+	if ((cdb[2] != 0) && (page != MODEPAGE_ALLPAGES) &&
+	    (page != MODEPAGE_CACHING) && (page != MODEPAGE_CTRL_MODE) &&
+	    (page != MODEPAGE_FORMAT) && (page != MODEPAGE_GEOMETRY)) {
 		stmf_scsilib_send_status(task, STATUS_CHECK,
 		    STMF_SAA_INVALID_FIELD_IN_CDB);
 		return;
@@ -1891,7 +2015,7 @@
 	nbytes = ((uint16_t)1) << sl->sl_data_blocksize_shift;
 	sbd_calc_geometry(s, nbytes, &nsectors, &nheads, &ncyl);
 
-	if ((page == 0x03) || (page == MODEPAGE_ALLPAGES)) {
+	if ((page == MODEPAGE_FORMAT) || (page == MODEPAGE_ALLPAGES)) {
 		p[n] = 0x03;
 		p[n+1] = 0x16;
 		if (ctrl != 1) {
@@ -1902,7 +2026,7 @@
 		}
 		n += 24;
 	}
-	if ((page == 0x04) || (page == MODEPAGE_ALLPAGES)) {
+	if ((page == MODEPAGE_GEOMETRY) || (page == MODEPAGE_ALLPAGES)) {
 		p[n] = 0x04;
 		p[n + 1] = 0x16;
 		if (ctrl != 1) {
@@ -1977,11 +2101,11 @@
 		 * of bytes in the length field, so adjust the count.
 		 * Byte count minus header length field size.
 		 */
-		buf[0] = (n - 1) & 0xff;
+		buf[0] = (n - header_size) & 0xff;
 	} else {
 		/* Byte count minus header length field size. */
-		buf[1] = (n - 2) & 0xff;
-		buf[0] = ((n - 2) >> 8) & 0xff;
+		buf[1] = (n - header_size) & 0xff;
+		buf[0] = ((n - header_size) >> 8) & 0xff;
 	}
 
 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
@@ -2302,6 +2426,21 @@
 }
 
 static void
+sbd_write_same_release_resources(struct scsi_task *task)
+{
+	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
+
+	if (scmd->nbufs == 0XFF)
+		cmn_err(CE_WARN, "%s invalid buffer count %x",
+		    __func__, scmd->nbufs);
+	if ((scmd->trans_data_len != 0) && (scmd->trans_data != NULL))
+		kmem_free(scmd->trans_data, scmd->trans_data_len);
+	scmd->trans_data = NULL;
+	scmd->trans_data_len = 0;
+	scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
+}
+
+static void
 sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
 {
@@ -2309,7 +2448,13 @@
 	uint32_t buflen, iolen;
 	int ndx, ret;
 
+	if (ATOMIC8_GET(scmd->nbufs) > 0) {
+		atomic_dec_8(&scmd->nbufs);
+	}
+
 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
+		sbd_write_same_release_resources(task);
+		sbd_ats_remove_by_task(task);
 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
 		    dbuf->db_xfer_status, NULL);
 		return;
@@ -2319,7 +2464,16 @@
 		goto write_same_xfer_done;
 	}
 
-	if (scmd->len != 0) {
+	/* if this is a unnessary callback just return */
+	if (((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) == 0) ||
+	    ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
+	    (scmd->trans_data == NULL)) {
+		sbd_ats_remove_by_task(task);
+		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+		return;
+	}
+
+	if (ATOMIC32_GET(scmd->len) != 0) {
 		/*
 		 * Initiate the next port xfer to occur in parallel
 		 * with writing this buf.
@@ -2343,14 +2497,21 @@
 	task->task_nbytes_transferred += buflen;
 
 write_same_xfer_done:
-	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+	if (ATOMIC32_GET(scmd->len) == 0 ||
+	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
 		stmf_free_dbuf(task, dbuf);
+		if (ATOMIC8_GET(scmd->nbufs) > 0)
+			return;
 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+			sbd_ats_remove_by_task(task);
+			sbd_write_same_release_resources(task);
 			stmf_scsilib_send_status(task, STATUS_CHECK,
 			    STMF_SAA_WRITE_ERROR);
 		} else {
 			ret = sbd_write_same_data(task, scmd);
+			sbd_ats_remove_by_task(task);
+			sbd_write_same_release_resources(task);
 			if (ret != SBD_SUCCESS) {
 				stmf_scsilib_send_status(task, STATUS_CHECK,
 				    STMF_SAA_WRITE_ERROR);
@@ -2358,14 +2519,6 @@
 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
 			}
 		}
-		/*
-		 * Only way we should get here is via handle_write_same(),
-		 * and that should make the following assertion always pass.
-		 */
-		ASSERT((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) &&
-		    scmd->trans_data != NULL);
-		kmem_free(scmd->trans_data, scmd->trans_data_len);
-		scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
 		return;
 	}
 	sbd_do_write_same_xfer(task, scmd, dbuf, dbuf_reusable);
@@ -2377,7 +2530,7 @@
 {
 	uint32_t len;
 
-	if (scmd->len == 0) {
+	if (ATOMIC32_GET(scmd->len) == 0) {
 		if (dbuf != NULL)
 			stmf_free_dbuf(task, dbuf);
 		return;
@@ -2392,8 +2545,8 @@
 	if (dbuf == NULL) {
 		uint32_t maxsize, minsize, old_minsize;
 
-		maxsize = (scmd->len > (128*1024)) ? 128*1024 :
-		    scmd->len;
+		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
+		    ATOMIC32_GET(scmd->len);
 		minsize = maxsize >> 2;
 		do {
 			old_minsize = minsize;
@@ -2401,7 +2554,9 @@
 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
 		    (minsize >= 512));
 		if (dbuf == NULL) {
-			if (scmd->nbufs == 0) {
+			sbd_ats_remove_by_task(task);
+			sbd_write_same_release_resources(task);
+			if (ATOMIC8_GET(scmd->nbufs) == 0) {
 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
 				    STMF_ALLOC_FAILURE, NULL);
 			}
@@ -2409,15 +2564,16 @@
 		}
 	}
 
-	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
-	    scmd->len;
+	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
+	    ATOMIC32_GET(scmd->len);
 
 	dbuf->db_relative_offset = scmd->current_ro;
 	dbuf->db_data_size = len;
 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
 	(void) stmf_xfer_data(task, dbuf, 0);
-	scmd->nbufs++; /* outstanding port xfers and bufs used */
-	scmd->len -= len;
+	/* outstanding port xfers and bufs used */
+	atomic_inc_8(&scmd->nbufs);
+	atomic_add_32(&scmd->len, -len);
 	scmd->current_ro += len;
 }
 
@@ -2431,6 +2587,12 @@
 	uint8_t unmap;
 	uint8_t do_immediate_data = 0;
 
+	if (HardwareAcceleratedInit == 0) {
+		stmf_scsilib_send_status(task, STATUS_CHECK,
+		    STMF_SAA_INVALID_OPCODE);
+		return;
+	}
+
 	task->task_cmd_xfer_length = 0;
 	if (task->task_additional_flags &
 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
@@ -2447,11 +2609,13 @@
 		return;
 	}
 	unmap = task->task_cdb[1] & 0x08;
+
 	if (unmap && ((sl->sl_flags & SL_UNMAP_ENABLED) == 0)) {
 		stmf_scsilib_send_status(task, STATUS_CHECK,
 		    STMF_SAA_INVALID_FIELD_IN_CDB);
 		return;
 	}
+
 	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
 		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
 		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
@@ -2459,11 +2623,20 @@
 		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
 		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
 	}
+
 	if (len == 0) {
 		stmf_scsilib_send_status(task, STATUS_CHECK,
 		    STMF_SAA_INVALID_FIELD_IN_CDB);
 		return;
 	}
+
+	if (sbd_ats_handling_before_io(task, sl, addr, len) !=
+	    SBD_SUCCESS) {
+		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS)
+			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
+		return;
+	}
+
 	addr <<= sl->sl_data_blocksize_shift;
 	len <<= sl->sl_data_blocksize_shift;
 
@@ -2492,6 +2665,7 @@
 		task->task_expected_xfer_length = task->task_cmd_xfer_length;
 	}
 	if ((addr + len) > sl->sl_lu_size) {
+		sbd_ats_remove_by_task(task);
 		stmf_scsilib_send_status(task, STATUS_CHECK,
 		    STMF_SAA_LBA_OUT_OF_RANGE);
 		return;
@@ -2502,6 +2676,7 @@
 
 	/* Some basic checks */
 	if ((len == 0) || (len != task->task_expected_xfer_length)) {
+		sbd_ats_remove_by_task(task);
 		stmf_scsilib_send_status(task, STATUS_CHECK,
 		    STMF_SAA_INVALID_FIELD_IN_CDB);
 		return;
@@ -2513,6 +2688,7 @@
 			if (initial_dbuf->db_data_size >
 			    task->task_expected_xfer_length) {
 				/* protocol error */
+				sbd_ats_remove_by_task(task);
 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
 				    STMF_INVALID_ARG, NULL);
 				return;
@@ -2529,7 +2705,8 @@
 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
 		task->task_lu_private = scmd;
 	}
-	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA;
+	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA |
+	    SBD_SCSI_CMD_ATS_RELATED;
 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
 	scmd->nbufs = 0;
 	scmd->len = (uint32_t)len;
@@ -2542,7 +2719,7 @@
 		 * Account for data passed in this write command
 		 */
 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
-		scmd->len -= dbuf->db_data_size;
+		atomic_add_32(&scmd->len, -dbuf->db_data_size);
 		scmd->current_ro += dbuf->db_data_size;
 		dbuf->db_xfer_status = STMF_SUCCESS;
 		sbd_handle_write_same_xfer_completion(task, scmd, dbuf, 0);
@@ -2554,8 +2731,20 @@
 static void
 sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf)
 {
+	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
 	uint32_t cmd_xfer_len;
 
+	if (sbd_unmap_enable == 0) {
+		stmf_scsilib_send_status(task, STATUS_CHECK,
+		    STMF_SAA_INVALID_OPCODE);
+		return;
+	}
+
+	if (sl->sl_flags & SL_WRITE_PROTECTED) {
+		stmf_scsilib_send_status(task, STATUS_CHECK,
+		    STMF_SAA_WRITE_PROTECTED);
+		return;
+	}
 	cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
 
 	if (task->task_cdb[1] & 1) {
@@ -2605,14 +2794,26 @@
 
 	dfl = kmem_zalloc(DFL_SZ(num_desc), KM_SLEEP);
 	dfl->dfl_num_exts = num_desc;
+	/*
+	 * This should use ATS locking but that was disabled by the
+	 * changes to ZFS top take advantage of TRIM in SSDs.
+	 *
+	 * Since the entire list is passed to ZFS in one list ATS
+	 * locking is not done.  This may be detectable, and if it is
+	 * then the entire list needs to be locked and then after the
+	 * unmap completes the entire list must be unlocked
+	 */
 	for (p = buf + 8, i = 0; num_desc; num_desc--, p += 16, i++) {
 		addr = READ_SCSI64(p, uint64_t);
+		len = READ_SCSI32(p+8, uint64_t);
 		addr <<= sl->sl_data_blocksize_shift;
-		len = READ_SCSI32(p+8, uint64_t);
 		len <<= sl->sl_data_blocksize_shift;
+
 		/* Prepare a list of extents to unmap */
 		dfl->dfl_exts[i].dfle_start = addr;
 		dfl->dfl_exts[i].dfle_length = len;
+
+		/* release the overlap */
 	}
 	ASSERT(i == dfl->dfl_num_exts);
 
@@ -2695,6 +2896,7 @@
 
 		inq->inq_tpgs = TPGS_FAILOVER_IMPLICIT;
 		inq->inq_cmdque = 1;
+		inq->inq_3pc = 1;
 
 		if (sl->sl_flags & SL_VID_VALID) {
 			bcopy(sl->sl_vendor_id, inq->inq_vid, 8);
@@ -2793,13 +2995,15 @@
 
 	switch (cdbp[2]) {
 	case 0x00:
-		page_length = 4 + (mgmt_url_size ? 1 : 0);
+		page_length = 5 + (mgmt_url_size ? 1 : 0);
+
 		if (sl->sl_flags & SL_UNMAP_ENABLED)
-			page_length += 2;
+			page_length += 1;
 
 		p[0] = byte0;
 		p[3] = page_length;
 		/* Supported VPD pages in ascending order */
+		/* CSTYLED */
 		{
 			uint8_t i = 5;
 
@@ -2808,8 +3012,8 @@
 			if (mgmt_url_size != 0)
 				p[i++] = 0x85;
 			p[i++] = 0x86;
+			p[i++] = 0xb0;
 			if (sl->sl_flags & SL_UNMAP_ENABLED) {
-				p[i++] = 0xb0;
 				p[i++] = 0xb2;
 			}
 		}
@@ -2842,7 +3046,7 @@
 			stmf_scsilib_send_status(task, STATUS_CHECK,
 			    STMF_SAA_INVALID_FIELD_IN_CDB);
 			goto err_done;
-		}
+		} /* CSTYLED */
 		{
 			uint16_t idx, newidx, sz, url_size;
 			char *url;
@@ -2903,17 +3107,23 @@
 		break;
 
 	case 0xb0:
-		if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
-			stmf_scsilib_send_status(task, STATUS_CHECK,
-			    STMF_SAA_INVALID_FIELD_IN_CDB);
-			goto err_done;
-		}
 		page_length = 0x3c;
 		p[0] = byte0;
 		p[1] = 0xb0;
 		p[3] = page_length;
-		p[20] = p[21] = p[22] = p[23] = 0xFF;
-		p[24] = p[25] = p[26] = p[27] = 0xFF;
+		p[4] = 1;
+		p[5] = sbd_ats_max_nblks();
+		if (sl->sl_flags & SL_UNMAP_ENABLED && sbd_unmap_enable) {
+			p[20] = (stmf_sbd_unmap_max_nblks >> 24) & 0xff;
+			p[21] = (stmf_sbd_unmap_max_nblks >> 16) & 0xff;
+			p[22] = (stmf_sbd_unmap_max_nblks >> 8) & 0xff;
+			p[23] = stmf_sbd_unmap_max_nblks & 0xff;
+
+			p[24] = 0;
+			p[25] = 0;
+			p[26] = 0;
+			p[27] = 0xFF;
+		}
 		xfer_size = page_length + 4;
 		break;
 
@@ -2935,7 +3145,7 @@
 			exp++;
 		}
 		p[4] = exp;
-		p[5] = 0xc0;
+		p[5] = 0xc0;	/* Logical provisioning UNMAP and WRITE SAME */
 		xfer_size = page_length + 4;
 		break;
 
@@ -2956,7 +3166,7 @@
 sbd_task_alloc(struct scsi_task *task)
 {
 	if ((task->task_lu_private =
-	    kmem_alloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
+	    kmem_zalloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
 		scmd->flags = 0;
 		return (STMF_SUCCESS);
@@ -3022,8 +3232,40 @@
 	mutex_exit(&sl->sl_lock);
 }
 
-
-
+/*
+ * Given a LU and a task, check if the task is causing reservation
+ * conflict. Returns 1 in case of conflict, 0 otherwise.
+ * Note that the LU might not be the same LU as in the task but the
+ * caller makes sure that the LU can be accessed.
+ */
+int
+sbd_check_reservation_conflict(struct sbd_lu *sl, struct scsi_task *task)
+{
+	sbd_it_data_t *it;
+
+	it = task->task_lu_itl_handle;
+	ASSERT(it);
+	if (sl->sl_access_state == SBD_LU_ACTIVE) {
+		if (SBD_PGR_RSVD(sl->sl_pgr)) {
+			if (sbd_pgr_reservation_conflict(task, sl)) {
+				return (1);
+			}
+		} else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
+		    ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
+			if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
+				return (1);
+			}
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Keep in mind that sbd_new_task can be called multiple times for the same
+ * task because of us calling stmf_task_poll_lu resulting in a call to
+ * sbd_task_poll().
+ */
 void
 sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
 {
@@ -3087,88 +3329,34 @@
 	if (sl->sl_access_state == SBD_LU_TRANSITION_TO_STANDBY ||
 	    sl->sl_access_state == SBD_LU_TRANSITION_TO_ACTIVE) {
 		stmf_scsilib_send_status(task, STATUS_CHECK,
-		    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
+		    STMF_SAA_LU_NO_ACCESS_TRANSITION);
 		return;
 	}
 
-	/* Checking ua conditions as per SAM3R14 5.3.2 specified order */
-	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
-		uint32_t saa = 0;
-
-		mutex_enter(&sl->sl_lock);
-		if (it->sbd_it_ua_conditions & SBD_UA_POR) {
-			it->sbd_it_ua_conditions &= ~SBD_UA_POR;
-			saa = STMF_SAA_POR;
-		}
-		mutex_exit(&sl->sl_lock);
-		if (saa) {
-			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
-			return;
-		}
-	}
-
-	/* Reservation conflict checks */
-	if (sl->sl_access_state == SBD_LU_ACTIVE) {
-		if (SBD_PGR_RSVD(sl->sl_pgr)) {
-			if (sbd_pgr_reservation_conflict(task)) {
-				stmf_scsilib_send_status(task,
-				    STATUS_RESERVATION_CONFLICT, 0);
-				return;
-			}
-		} else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
-		    ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
-			if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
-				stmf_scsilib_send_status(task,
-				    STATUS_RESERVATION_CONFLICT, 0);
-				return;
-			}
-		}
-	}
-
-	/* Rest of the ua conndition checks */
-	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
-		uint32_t saa = 0;
-
-		mutex_enter(&sl->sl_lock);
-		if (it->sbd_it_ua_conditions & SBD_UA_CAPACITY_CHANGED) {
-			it->sbd_it_ua_conditions &= ~SBD_UA_CAPACITY_CHANGED;
-			if ((task->task_cdb[0] == SCMD_READ_CAPACITY) ||
-			    ((task->task_cdb[0] == SCMD_SVC_ACTION_IN_G4) &&
-			    (task->task_cdb[1] ==
-			    SSVC_ACTION_READ_CAPACITY_G4))) {
-				saa = 0;
-			} else {
-				saa = STMF_SAA_CAPACITY_DATA_HAS_CHANGED;
-			}
-		} else if (it->sbd_it_ua_conditions &
-		    SBD_UA_MODE_PARAMETERS_CHANGED) {
-			it->sbd_it_ua_conditions &=
-			    ~SBD_UA_MODE_PARAMETERS_CHANGED;
-			saa = STMF_SAA_MODE_PARAMETERS_CHANGED;
-		} else if (it->sbd_it_ua_conditions &
-		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
-			it->sbd_it_ua_conditions &=
-			    ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
-			saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
-		} else if (it->sbd_it_ua_conditions &
-		    SBD_UA_ACCESS_STATE_TRANSITION) {
-			it->sbd_it_ua_conditions &=
-			    ~SBD_UA_ACCESS_STATE_TRANSITION;
-			saa = STMF_SAA_LU_NO_ACCESS_TRANSITION;
-		} else {
-			it->sbd_it_ua_conditions = 0;
-			saa = 0;
-		}
-		mutex_exit(&sl->sl_lock);
-		if (saa) {
-			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
-			return;
-		}
-	}
-
 	cdb0 = task->task_cdb[0];
 	cdb1 = task->task_cdb[1];
-
+	/*
+	 * Special case for different versions of Windows.
+	 * 1) Windows 2012 and VMWare will fail to discover LU's if a READ
+	 *    operation sent down the standby path returns an error. By default
+	 *    standby_fail_reads will be set to 0.
+	 * 2) Windows 2008 R2 has a severe performace problem if READ ops
+	 *    aren't rejected on the standby path. 2008 sends commands
+	 *    down the standby path which then must be proxied over to the
+	 *    active node and back.
+	 */
+	if ((sl->sl_access_state == SBD_LU_STANDBY) &&
+	    stmf_standby_fail_reads &&
+	    (cdb0 == SCMD_READ || cdb0 == SCMD_READ_G1 ||
+	    cdb0 == SCMD_READ_G4 || cdb0 == SCMD_READ_G5)) {
+		stmf_scsilib_send_status(task, STATUS_CHECK,
+		    STMF_SAA_LU_NO_ACCESS_STANDBY);
+		return;
+	}
+
+	/*
+	 * Don't go further if cmd is unsupported in standby mode
+	 */
 	if (sl->sl_access_state == SBD_LU_STANDBY) {
 		if (cdb0 != SCMD_INQUIRY &&
 		    cdb0 != SCMD_MODE_SENSE &&
@@ -3197,7 +3385,86 @@
 			    STMF_SAA_LU_NO_ACCESS_STANDBY);
 			return;
 		}
-
+	}
+
+	/*
+	 * Checking ua conditions as per SAM3R14 5.3.2 specified order. During
+	 * MPIO/ALUA failover, cmds come in through local ports and proxy port
+	 * port provider (i.e. pppt), we want to report unit attention to
+	 * only local cmds since initiators (Windows MPIO/DSM) would continue
+	 * sending I/O to the target that reported unit attention.
+	 */
+	if ((it->sbd_it_ua_conditions) &&
+	    !(task->task_additional_flags & TASK_AF_PPPT_TASK) &&
+	    (task->task_cdb[0] != SCMD_INQUIRY)) {
+		uint32_t saa = 0;
+
+		mutex_enter(&sl->sl_lock);
+		if (it->sbd_it_ua_conditions & SBD_UA_POR) {
+			it->sbd_it_ua_conditions &= ~SBD_UA_POR;
+			saa = STMF_SAA_POR;
+		} else if (it->sbd_it_ua_conditions &
+		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
+			it->sbd_it_ua_conditions &=
+			    ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
+			saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
+		}
+		mutex_exit(&sl->sl_lock);
+		if (saa) {
+			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
+			return;
+		}
+	}
+
+	/* Reservation conflict checks */
+	if (sbd_check_reservation_conflict(sl, task)) {
+		stmf_scsilib_send_status(task,
+		    STATUS_RESERVATION_CONFLICT, 0);
+		return;
+	}
+
+	/* Rest of the ua conndition checks */
+	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
+		uint32_t saa = 0;
+
+		mutex_enter(&sl->sl_lock);
+		if (it->sbd_it_ua_conditions & SBD_UA_CAPACITY_CHANGED) {
+			it->sbd_it_ua_conditions &= ~SBD_UA_CAPACITY_CHANGED;
+			if ((task->task_cdb[0] == SCMD_READ_CAPACITY) ||
+			    ((task->task_cdb[0] == SCMD_SVC_ACTION_IN_G4) &&
+			    (task->task_cdb[1] ==
+			    SSVC_ACTION_READ_CAPACITY_G4))) {
+				saa = 0;
+			} else {
+				saa = STMF_SAA_CAPACITY_DATA_HAS_CHANGED;
+			}
+		} else if (it->sbd_it_ua_conditions &
+		    SBD_UA_MODE_PARAMETERS_CHANGED) {
+			it->sbd_it_ua_conditions &=
+			    ~SBD_UA_MODE_PARAMETERS_CHANGED;
+			saa = STMF_SAA_MODE_PARAMETERS_CHANGED;
+		} else if (it->sbd_it_ua_conditions &
+		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
+			saa = 0;
+		} else if (it->sbd_it_ua_conditions & SBD_UA_POR) {
+			saa = 0;
+		} else if (it->sbd_it_ua_conditions &
+		    SBD_UA_ACCESS_STATE_TRANSITION) {
+			it->sbd_it_ua_conditions &=
+			    ~SBD_UA_ACCESS_STATE_TRANSITION;
+			saa = STMF_SAA_LU_NO_ACCESS_TRANSITION;
+		} else {
+			it->sbd_it_ua_conditions = 0;
+			saa = 0;
+		}
+		mutex_exit(&sl->sl_lock);
+		if (saa) {
+			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
+			return;
+		}
+	}
+
+	if (sl->sl_access_state == SBD_LU_STANDBY) {
 		/*
 		 * is this a short write?
 		 * if so, we'll need to wait until we have the buffer
@@ -3375,6 +3642,21 @@
 		return;
 	}
 
+	if (cdb0 == SCMD_COMPARE_AND_WRITE) {
+		sbd_handle_ats(task, initial_dbuf);
+		return;
+	}
+
+	if (cdb0 == SCMD_EXTENDED_COPY) {
+		sbd_handle_xcopy(task, initial_dbuf);
+		return;
+	}
+
+	if (cdb0 == SCMD_RECV_COPY_RESULTS) {
+		sbd_handle_recv_copy_results(task, initial_dbuf);
+		return;
+	}
+
 	if (cdb0 == SCMD_TEST_UNIT_READY) {	/* Test unit ready */
 		task->task_cmd_xfer_length = 0;
 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
@@ -3441,7 +3723,6 @@
 		sbd_handle_write(task, initial_dbuf);
 		return;
 	}
-
 	stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_OPCODE);
 }
 
@@ -3481,12 +3762,18 @@
 		break;
 
 	case (SBD_CMD_SCSI_WRITE):
-		if ((task->task_cdb[0] == SCMD_WRITE_SAME_G1) ||
-		    (task->task_cdb[0] == SCMD_WRITE_SAME_G4)) {
+		switch (task->task_cdb[0]) {
+		case SCMD_WRITE_SAME_G1:
+		case SCMD_WRITE_SAME_G4:
 			sbd_handle_write_same_xfer_completion(task, scmd, dbuf,
 			    1);
-		} else {
+			break;
+		case SCMD_COMPARE_AND_WRITE:
+			sbd_handle_ats_xfer_completion(task, scmd, dbuf, 1);
+			break;
+		default:
 			sbd_handle_write_xfer_completion(task, scmd, dbuf, 1);
+			/* FALLTHRU */
 		}
 		break;
 
@@ -3552,6 +3839,7 @@
 
 	ASSERT(abort_cmd == STMF_LU_ABORT_TASK);
 	task = (scsi_task_t *)arg;
+	sbd_ats_remove_by_task(task);
 	if (task->task_lu_private) {
 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
 
@@ -3569,6 +3857,15 @@
 	return (STMF_NOT_FOUND);
 }
 
+void
+sbd_task_poll(struct scsi_task *task)
+{
+	stmf_data_buf_t *initial_dbuf;
+
+	initial_dbuf = stmf_handle_to_buf(task, 0);
+	sbd_new_task(task, initial_dbuf);
+}
+
 /*
  * This function is called during task clean-up if the
  * DB_LU_FLAG is set on the dbuf. This should only be called for
@@ -3581,7 +3878,7 @@
 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
 
 	ASSERT(dbuf->db_lu_private);
-	ASSERT(scmd && scmd->nbufs > 0);
+	ASSERT(scmd && ATOMIC8_GET(scmd->nbufs) > 0);
 	ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0);
 	ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
 	ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
@@ -3595,7 +3892,7 @@
 		cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p",
 		    scmd->cmd_type, (void *)task);
 	}
-	if (--scmd->nbufs == 0)
+	if (atomic_dec_8_nv(&scmd->nbufs) == 0)
 		rw_exit(&sl->sl_access_state_lock);
 	stmf_teardown_dbuf(task, dbuf);
 	stmf_free(dbuf);
@@ -3692,13 +3989,20 @@
 sbd_flush_data_cache(sbd_lu_t *sl, int fsync_done)
 {
 	int r = 0;
-	int ret;
-
+	sbd_status_t ret;
+
+	rw_enter(&sl->sl_access_state_lock, RW_READER);
+	if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
+		ret = SBD_FILEIO_FAILURE;
+		goto flush_fail;
+	}
 	if (fsync_done)
 		goto over_fsync;
 	if ((sl->sl_data_vtype == VREG) || (sl->sl_data_vtype == VBLK)) {
-		if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL))
-			return (SBD_FAILURE);
+		if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL)) {
+			ret = SBD_FAILURE;
+			goto flush_fail;
+		}
 	}
 over_fsync:
 	if (((sl->sl_data_vtype == VCHR) || (sl->sl_data_vtype == VBLK)) &&
@@ -3709,12 +4013,14 @@
 			mutex_enter(&sl->sl_lock);
 			sl->sl_flags |= SL_NO_DATA_DKIOFLUSH;
 			mutex_exit(&sl->sl_lock);
-		} else if (ret != 0) {
-			return (SBD_FAILURE);
+		} else {
+			ret = (ret != 0) ? SBD_FAILURE : SBD_SUCCESS;
 		}
 	}
-
-	return (SBD_SUCCESS);
+flush_fail:
+	rw_exit(&sl->sl_access_state_lock);
+
+	return (ret);
 }
 
 /* ARGSUSED */
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h	Thu Nov 14 07:43:52 2019 -0700
@@ -34,6 +34,7 @@
 #endif
 
 typedef	stmf_status_t	sbd_status_t;
+#include "ats_copy_mgr.h"
 extern char sbd_vendor_id[];
 extern char sbd_product_id[];
 extern char sbd_revision[];
@@ -56,6 +57,7 @@
 #define	SBD_FILEIO_FAILURE	(SBD_FAILURE | STMF_FSC(7))
 #define	SBD_IO_PAST_EOF		(SBD_FAILURE | STMF_FSC(8))
 #define	SBD_BUSY		(SBD_FAILURE | STMF_FSC(9))
+#define	SBD_COMPARE_FAILED	(SBD_FAILURE | STMF_FSC(10))
 
 #define	SHARED_META_DATA_SIZE	65536
 #define	SBD_META_OFFSET		4096
@@ -246,6 +248,7 @@
 	struct sbd_it_data	*sl_it_list;
 	struct sbd_pgr		*sl_pgr;
 	uint64_t	sl_rs_owner_session_id;
+	list_t		sl_ats_io_list;
 } sbd_lu_t;
 
 /*
@@ -304,6 +307,11 @@
 void sbd_wcd_get(int *wcd, sbd_lu_t *sl);
 int sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl);
 
+void sbd_handle_short_write_transfers(scsi_task_t *, stmf_data_buf_t *,
+    uint32_t);
+void sbd_handle_short_read_transfers(scsi_task_t *, stmf_data_buf_t *,
+    uint8_t *, uint32_t, uint32_t);
+
 #ifdef	__cplusplus
 }
 #endif
--- a/usr/src/uts/common/io/comstar/port/fct/fct.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/port/fct/fct.c	Thu Nov 14 07:43:52 2019 -0700
@@ -113,6 +113,14 @@
 static fct_i_local_port_t *fct_iport_list = NULL;
 static kmutex_t fct_global_mutex;
 uint32_t fct_rscn_options = RSCN_OPTION_VERIFY;
+/*
+ * This is to keep fibre channel from hanging if syseventd is
+ * not working correctly and the queue fills. It is a tunable
+ * to allow the user to force event logging to always happen
+ * which is the default.
+ */
+static uint8_t fct_force_log = 0;  /* use DDI_SLEEP on ddi_log_sysevent */
+
 
 int
 _init(void)
@@ -1612,7 +1620,8 @@
 
 fct_cmd_t *
 fct_scsi_task_alloc(fct_local_port_t *port, uint16_t rp_handle,
-    uint32_t rportid, uint8_t *lun, uint16_t cdb_length, uint16_t task_ext)
+    uint32_t rportid, uint8_t *lun, uint16_t cdb_length,
+    uint16_t task_ext)
 {
 	fct_cmd_t *cmd;
 	fct_i_cmd_t *icmd;
@@ -2829,8 +2838,8 @@
 	/* For non FCP Rest of the work is done by the terminator */
 	/* For FCP stuff just call stmf */
 	if (cmd->cmd_type == FCT_CMD_FCP_XCHG) {
-		stmf_task_lport_aborted((scsi_task_t *)cmd->cmd_specific,
-		    s, STMF_IOF_LPORT_DONE);
+		stmf_task_lport_aborted_unlocked(
+		    (scsi_task_t *)cmd->cmd_specific, s, STMF_IOF_LPORT_DONE);
 	}
 }
 
@@ -3432,6 +3441,7 @@
 {
 	nvlist_t *attr_list;
 	int port_instance;
+	int rc, sleep = DDI_SLEEP;
 
 	if (!fct_dip)
 		return;
@@ -3452,8 +3462,15 @@
 		goto error;
 	}
 
-	(void) ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC,
-	    subclass, attr_list, NULL, DDI_SLEEP);
+	if (fct_force_log == 0) {
+		sleep = DDI_NOSLEEP;
+	}
+	rc = ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC,
+	    subclass, attr_list, NULL, sleep);
+	if (rc != DDI_SUCCESS) {
+		cmn_err(CE_WARN, "%s:event dropped", __func__);
+		goto error;
+	}
 
 	nvlist_free(attr_list);
 	return;
@@ -3471,6 +3488,7 @@
 {
 	nvlist_t *attr_list;
 	int port_instance;
+	int rc, sleep = DDI_SLEEP;
 
 	if (!fct_dip)
 		return;
@@ -3501,8 +3519,15 @@
 		goto error;
 	}
 
-	(void) ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC,
-	    subclass, attr_list, NULL, DDI_SLEEP);
+	if (fct_force_log == 0) {
+		sleep = DDI_NOSLEEP;
+	}
+	rc = ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC,
+	    subclass, attr_list, NULL, sleep);
+	if (rc != DDI_SUCCESS) {
+		cmn_err(CE_WARN, "%s: queue full event lost", __func__);
+		goto error;
+	}
 
 	nvlist_free(attr_list);
 	return;
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit.c	Thu Nov 14 07:43:52 2019 -0700
@@ -21,7 +21,7 @@
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  *
- * Copyright 2014, 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
  * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
  */
 
@@ -564,7 +564,8 @@
 			it_config_free_cmn(cfg);
 		if (cfg_pnvlist)
 			kmem_free(cfg_pnvlist, setcfg.set_cfg_pnvlist_len);
-		nvlist_free(cfg_nvlist);
+		if (cfg_nvlist)
+			nvlist_free(cfg_nvlist);
 
 		/*
 		 * Now that the reconfig is complete set our state back to
@@ -992,7 +993,7 @@
 			 * STMF_ABORTED, the code actually looks for
 			 * STMF_ABORT_SUCCESS.
 			 */
-			stmf_task_lport_aborted(itask->it_stmf_task,
+			stmf_task_lport_aborted_unlocked(itask->it_stmf_task,
 			    STMF_ABORT_SUCCESS, STMF_IOF_LPORT_DONE);
 			return;
 		} else {
@@ -1224,6 +1225,7 @@
 	mutex_init(&ict->ict_mutex, NULL, MUTEX_DRIVER, NULL);
 	mutex_init(&ict->ict_statsn_mutex, NULL, MUTEX_DRIVER, NULL);
 	idm_refcnt_init(&ict->ict_refcnt, ict);
+	idm_refcnt_init(&ict->ict_dispatch_refcnt, ict);
 
 	/*
 	 * Initialize login state machine
@@ -1369,6 +1371,9 @@
 	 * Make sure there aren't any PDU's transitioning from the receive
 	 * handler to the dispatch taskq.
 	 */
+	if (idm_refcnt_is_held(&ict->ict_dispatch_refcnt) < 0) {
+		cmn_err(CE_WARN, "Possible hang in iscsit_conn_lost");
+	}
 	idm_refcnt_wait_ref(&ict->ict_dispatch_refcnt);
 
 	return (IDM_STATUS_SUCCESS);
@@ -1385,13 +1390,10 @@
 
 	/* Generate session state machine event */
 	if (ict->ict_sess != NULL) {
-		/*
-		 * Session state machine will call iscsit_conn_destroy_done()
-		 * when it has removed references to this connection.
-		 */
 		iscsit_sess_sm_event(ict->ict_sess, SE_CONN_FAIL, ict);
 	}
 
+	idm_refcnt_wait_ref(&ict->ict_dispatch_refcnt);
 	idm_refcnt_wait_ref(&ict->ict_refcnt);
 	/*
 	 * The session state machine does not need to post
@@ -1407,6 +1409,7 @@
 	iscsit_text_cmd_fini(ict);
 
 	mutex_destroy(&ict->ict_mutex);
+	idm_refcnt_destroy(&ict->ict_dispatch_refcnt);
 	idm_refcnt_destroy(&ict->ict_refcnt);
 	kmem_free(ict, sizeof (*ict));
 
@@ -1888,20 +1891,8 @@
 		 * Call IDM to abort the task.  Due to a variety of
 		 * circumstances the task may already be in the process of
 		 * aborting.
-		 * We'll let IDM worry about rationalizing all that except
-		 * for one particular instance.  If the state of the task
-		 * is TASK_COMPLETE, we need to indicate to the framework
-		 * that we are in fact done.  This typically happens with
-		 * framework-initiated task management type requests
-		 * (e.g. abort task).
 		 */
-		if (idt->idt_state == TASK_COMPLETE) {
-			idm_refcnt_wait_ref(&idt->idt_refcnt);
-			return (STMF_ABORT_SUCCESS);
-		} else {
-			idm_task_abort(idt->idt_ic, idt, AT_TASK_MGMT_ABORT);
-			return (STMF_SUCCESS);
-		}
+		return (idm_task_abort(idt->idt_ic, idt, AT_TASK_MGMT_ABORT));
 	}
 
 	/*NOTREACHED*/
@@ -1962,6 +1953,21 @@
 	iscsit_process_pdu_in_queue(ict->ict_sess);
 }
 
+static int
+iscsit_validate_idm_pdu(idm_pdu_t *rx_pdu)
+{
+	iscsi_scsi_cmd_hdr_t	*iscsi_scsi =
+	    (iscsi_scsi_cmd_hdr_t *)rx_pdu->isp_hdr;
+
+	if ((iscsi_scsi->scb[0] == SCMD_READ) ||
+	    (iscsi_scsi->scb[0] == SCMD_READ_G1) ||
+	    (iscsi_scsi->scb[0] == SCMD_READ_G4)) {
+		if (iscsi_scsi->flags & ISCSI_FLAG_CMD_WRITE)
+			return (IDM_STATUS_FAIL);
+	}
+	return (IDM_STATUS_SUCCESS);
+}
+
 /*
  * ISCSI protocol
  */
@@ -1979,6 +1985,15 @@
 	uint16_t		addl_cdb_len = 0;
 
 	ict = ic->ic_handle;
+	if (iscsit_validate_idm_pdu(rx_pdu) != IDM_STATUS_SUCCESS) {
+		/* Finish processing request */
+		iscsit_set_cmdsn(ict, rx_pdu);
+
+		iscsit_send_direct_scsi_resp(ict, rx_pdu,
+		    ISCSI_STATUS_CMD_COMPLETED, STATUS_CHECK);
+		idm_pdu_complete(rx_pdu, IDM_STATUS_PROTOCOL_ERROR);
+		return;
+	}
 
 	itask = iscsit_task_alloc(ict);
 	if (itask == NULL) {
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit.h	Thu Nov 14 07:43:52 2019 -0700
@@ -56,9 +56,6 @@
 #define	ISCSI_MAX_TSIH		0xffff
 #define	ISCSI_UNSPEC_TSIH	0
 
-/* Max targets per system */
-#define	ISCSIT_MAX_TARGETS	1024
-
 #define	ISCSIT_MAX_WINDOW	1024
 #define	ISCSIT_RXPDU_QUEUE_LEN	2048
 
@@ -124,74 +121,63 @@
 } iscsit_portal_t;
 
 
+#define	TGT_STATE_LIST() \
+	item(TS_UNDEFINED) \
+	item(TS_CREATED) \
+	item(TS_ONLINING) \
+	item(TS_ONLINE) \
+	item(TS_STMF_ONLINE) \
+	item(TS_DELETING_NEED_OFFLINE) \
+	item(TS_OFFLINING) \
+	item(TS_OFFLINE) \
+	item(TS_STMF_OFFLINE) \
+	item(TS_DELETING_STMF_DEREG) \
+	item(TS_DELETING_STMF_DEREG_FAIL) \
+	item(TS_DELETING) \
+	item(TS_MAX_STATE)
+
 /* Target states and events, update iscsit_ts_name table whenever modified */
 typedef enum {
-	TS_UNDEFINED = 0,
-	TS_CREATED,
-	TS_ONLINING,
-	TS_ONLINE,
-	TS_STMF_ONLINE,
-	TS_DELETING_NEED_OFFLINE,
-	TS_OFFLINING,
-	TS_OFFLINE,
-	TS_STMF_OFFLINE,
-	TS_DELETING_STMF_DEREG,
-	TS_DELETING_STMF_DEREG_FAIL,
-	TS_DELETING,
-	TS_MAX_STATE
+#define	item(a)	a,
+	TGT_STATE_LIST()
+#undef	item
 } iscsit_tgt_state_t;
 
 #ifdef ISCSIT_TGT_SM_STRINGS
-static const char *iscsit_ts_name[TS_MAX_STATE+1] = {
-	"TS_UNDEFINED",
-	"TS_CREATED",
-	"TS_ONLINING",
-	"TS_ONLINE",
-	"TS_STMF_ONLINE",
-	"TS_DELETING_NEED_OFFLINE",
-	"TS_OFFLINING",
-	"TS_OFFLINE",
-	"TS_STMF_OFFLINE",
-	"TS_DELETING_STMF_DEREG",
-	"TS_DELETING_STMF_DEREG_FAIL",
-	"TS_DELETING",
-	"TS_MAX_STATE"
+static const char *iscsit_ts_name[TS_MAX_STATE + 1] = {
+#define	item(a) #a,
+	TGT_STATE_LIST()
+#undef	item
 };
 #endif
 
+#define	TGT_EVENT_LIST() \
+	item(TE_UNDEFINED) \
+	item(TE_STMF_ONLINE_REQ) \
+	item(TE_ONLINE_SUCCESS) \
+	item(TE_ONLINE_FAIL) \
+	item(TE_STMF_ONLINE_COMPLETE_ACK) \
+	item(TE_STMF_OFFLINE_REQ) \
+	item(TE_OFFLINE_COMPLETE) \
+	item(TE_STMF_OFFLINE_COMPLETE_ACK) \
+	item(TE_DELETE) \
+	item(TE_STMF_DEREG_SUCCESS) \
+	item(TE_STMF_DEREG_FAIL) \
+	item(TE_STMF_DEREG_RETRY) \
+	item(TE_WAIT_REF_COMPLETE) \
+	item(TE_MAX_EVENT)
+
 typedef enum {
-	TE_UNDEFINED = 0,
-	TE_STMF_ONLINE_REQ,
-	TE_ONLINE_SUCCESS,
-	TE_ONLINE_FAIL,
-	TE_STMF_ONLINE_COMPLETE_ACK,
-	TE_STMF_OFFLINE_REQ,
-	TE_OFFLINE_COMPLETE,
-	TE_STMF_OFFLINE_COMPLETE_ACK,
-	TE_DELETE,
-	TE_STMF_DEREG_SUCCESS,
-	TE_STMF_DEREG_FAIL,
-	TE_STMF_DEREG_RETRY,
-	TE_WAIT_REF_COMPLETE,
-	TE_MAX_EVENT
+#define	item(a) a,
+	TGT_EVENT_LIST()
+#undef	item
 } iscsit_tgt_event_t;
 
 #ifdef ISCSIT_TGT_SM_STRINGS
-static const char *iscsit_te_name[TE_MAX_EVENT+1] = {
-	"TE_UNDEFINED",
-	"TE_STMF_ONLINE_REQ",
-	"TE_ONLINE_SUCCESS",
-	"TE_ONLINE_FAIL",
-	"TE_STMF_ONLINE_COMPLETE_ACK",
-	"TE_STMF_OFFLINE_REQ",
-	"TE_OFFLINE_COMPLETE",
-	"TE_STMF_OFFLINE_COMPLETE_ACK",
-	"TE_DELETE",
-	"TE_STMF_DEREG_SUCCESS",
-	"TE_STMF_DEREG_FAIL",
-	"TE_STMF_DEREG_RETRY",
-	"TE_WAIT_REF_COMPLETE",
-	"TE_MAX_EVENT"
+static const char *iscsit_te_name[TE_MAX_EVENT + 1] = {
+#define	item(a) #a,
+	TGT_EVENT_LIST()
+#undef	item
 };
 #endif
 
@@ -238,7 +224,7 @@
 	uint8_t			ca_ini_chapsecret[iscsitAuthStringMaxLength];
 	int			ca_ini_chapsecretlen;
 
-	/* RADIUS authentication information   	*/
+	/* RADIUS authentication information	*/
 	boolean_t		ca_use_radius;
 	struct sockaddr_storage	ca_radius_server;
 	uint8_t			ca_radius_secret[iscsitAuthStringMaxLength];
@@ -284,67 +270,62 @@
 
 struct iscsit_conn_s;
 
+/* Add new session states above SS_MAX_STATE */
+#define	SESSION_STATE_LIST() \
+	item(SS_UNDEFINED) \
+	item(SS_Q1_FREE) \
+	item(SS_Q2_ACTIVE) \
+	item(SS_Q3_LOGGED_IN) \
+	item(SS_Q4_FAILED) \
+	item(SS_Q5_CONTINUE) \
+	item(SS_Q6_DONE) \
+	item(SS_Q7_ERROR) \
+	item(SS_MAX_STATE)
+
 /* Update iscsit_ss_name table whenever session states are modified */
 typedef enum {
-	SS_UNDEFINED = 0,
-	SS_Q1_FREE,
-	SS_Q2_ACTIVE,
-	SS_Q3_LOGGED_IN,
-	SS_Q4_FAILED,
-	SS_Q5_CONTINUE,
-	SS_Q6_DONE,
-	SS_Q7_ERROR,
-	/* Add new session states above SS_MAX_STATE */
-	SS_MAX_STATE
+#define	item(a) a,
+	SESSION_STATE_LIST()
+#undef	item
 } iscsit_session_state_t;
 
 #ifdef ISCSIT_SESS_SM_STRINGS
 /* An array of state text values, for use in logging state transitions */
-static const char *iscsit_ss_name[SS_MAX_STATE+1] = {
-	"SS_UNDEFINED",
-	"SS_Q1_FREE",
-	"SS_Q2_ACTIVE",
-	"SS_Q3_LOGGED_IN",
-	"SS_Q4_FAILED",
-	"SS_Q5_CONTINUE",
-	"SS_Q6_DONE",
-	"SS_Q7_ERROR",
-	"SS_MAX_STATE"
+static const char *iscsit_ss_name[SS_MAX_STATE + 1] = {
+#define	item(a) #a,
+	SESSION_STATE_LIST()
+#undef	item
 };
 #endif
 
+/* Add new events above SE_MAX_EVENT */
+#define	SESSION_EVENT_LIST() \
+	item(SE_UNDEFINED) \
+	item(SE_CONN_IN_LOGIN)	/* From login state machine */ \
+	item(SE_CONN_LOGGED_IN)	/* FFP enabled client notification */ \
+	item(SE_CONN_FFP_FAIL)	/* FFP disabled client notification */ \
+	item(SE_CONN_FFP_DISABLE) /* FFP disabled client notification */ \
+	item(SE_CONN_FAIL) /* Conn destroy client notification */ \
+	item(SE_SESSION_CLOSE)	/* FFP disabled client notification */ \
+	item(SE_SESSION_REINSTATE) /* From login state machine */ \
+	item(SE_SESSION_TIMEOUT) /* Internal */ \
+	item(SE_SESSION_CONTINUE) /* From login state machine */ \
+	item(SE_SESSION_CONTINUE_FAIL) /* From login state machine? */ \
+	item(SE_MAX_EVENT)
+
 /* Update iscsit_se_name table whenever session events are modified */
 typedef enum {
-	SE_UNDEFINED = 0,
-	SE_CONN_IN_LOGIN,	/* From login state machine */
-	SE_CONN_LOGGED_IN,	/* FFP enabled client notification */
-	SE_CONN_FFP_FAIL,	/* FFP disabled client notification */
-	SE_CONN_FFP_DISABLE,	/* FFP disabled client notification */
-	SE_CONN_FAIL,		/* Conn destroy client notification */
-	SE_SESSION_CLOSE,	/* FFP disabled client notification */
-	SE_SESSION_REINSTATE,	/* From login state machine */
-	SE_SESSION_TIMEOUT,	/* Internal */
-	SE_SESSION_CONTINUE,	/* From login state machine */
-	SE_SESSION_CONTINUE_FAIL, /* From login state machine? */
-	/* Add new events above SE_MAX_EVENT */
-	SE_MAX_EVENT
+#define	item(a) a,
+	SESSION_EVENT_LIST()
+#undef	item
 } iscsit_session_event_t;
 
 #ifdef ISCSIT_SESS_SM_STRINGS
 /* An array of event text values, for use in logging events */
-static const char *iscsit_se_name[SE_MAX_EVENT+1] = {
-	"SE_UNDEFINED",
-	"SE_CONN_IN_LOGIN",
-	"SE_CONN_LOGGED_IN",
-	"SE_CONN_FFP_FAIL",
-	"SE_CONN_FFP_DISABLE",
-	"SE_CONN_FAIL",
-	"SE_SESSION_CLOSE",
-	"SE_SESSION_REINSTATE",
-	"SE_SESSION_TIMEOUT",
-	"SE_SESSION_CONTINUE",
-	"SE_SESSION_CONTINUE_FAIL",
-	"SE_MAX_EVENT"
+static const char *iscsit_se_name[SE_MAX_EVENT + 1] = {
+#define	item(a) #a,
+	SESSION_EVENT_LIST()
+#undef	item
 };
 #endif
 
@@ -389,61 +370,59 @@
 	iscsit_cbuf_t		*ist_rxpdu_queue;
 } iscsit_sess_t;
 
+/* Add new login states above ILS_MAX_STATE */
+#define	LOGIN_STATE_LIST() \
+	item(ILS_UNDEFINED) \
+	item(ILS_LOGIN_INIT) \
+	item(ILS_LOGIN_WAITING)	/* Waiting for more login PDU's */ \
+	item(ILS_LOGIN_PROCESSING) /* Processing login request */ \
+	item(ILS_LOGIN_RESPONDING) /* Sending login response */ \
+	item(ILS_LOGIN_RESPONDED) /* Sent login response (no trans. to FFP) */ \
+	item(ILS_LOGIN_FFP) /* Sending last login PDU for final response */ \
+	item(ILS_LOGIN_DONE) /* Last login PDU sent (so we can free it) */ \
+	item(ILS_LOGIN_ERROR) /* Login error, login failed */ \
+	item(ILS_MAX_STATE)
+
 /* Update iscsit_ils_name table whenever login states are modified */
 typedef enum {
-	ILS_UNDEFINED = 0,
-	ILS_LOGIN_INIT,
-	ILS_LOGIN_WAITING,	/* Waiting for more login PDU's */
-	ILS_LOGIN_PROCESSING,	/* Processing login request */
-	ILS_LOGIN_RESPONDING,	/* Sending login response */
-	ILS_LOGIN_RESPONDED,	/* Sent login response (no trans. to FFP) */
-	ILS_LOGIN_FFP,		/* Sending last login PDU for final response */
-	ILS_LOGIN_DONE,		/* Last login PDU sent (so we can free it) */
-	ILS_LOGIN_ERROR,	/* Login error, login failed */
-	/* Add new login states above ILS_MAX_STATE */
-	ILS_MAX_STATE
+#define	item(a) a,
+	LOGIN_STATE_LIST()
+#undef	item
 } iscsit_login_state_t;
 
 #ifdef ISCSIT_LOGIN_SM_STRINGS
-/* An array of login state text values, for use in logging login progress */
-static const char *iscsit_ils_name[ILS_MAX_STATE+1] = {
-	"ILS_UNDEFINED",
-	"ILS_LOGIN_INIT",
-	"ILS_LOGIN_WAITING",
-	"ILS_LOGIN_PROCESSING",
-	"ILS_LOGIN_RESPONDING",
-	"ILS_LOGIN_RESPONDED",
-	"ILS_LOGIN_FFP",
-	"ILS_LOGIN_DONE",
-	"ILS_LOGIN_ERROR",
-	"ILS_MAX_STATE"
+/* An array of login state text values, for use in logging login progess */
+static const char *iscsit_ils_name[ILS_MAX_STATE + 1] = {
+#define	item(a) #a,
+	LOGIN_STATE_LIST()
+#undef	item
 };
 #endif
 
+/* Add new login events above ILE_MAX_EVENT */
+#define	LOGIN_EVENT_LIST() \
+	item(ILE_UNDEFINED) \
+	item(ILE_LOGIN_RCV) \
+	item(ILE_LOGIN_RESP_READY) \
+	item(ILE_LOGIN_FFP) \
+	item(ILE_LOGIN_RESP_COMPLETE) \
+	item(ILE_LOGIN_ERROR) \
+	item(ILE_LOGIN_CONN_ERROR) \
+	item(ILE_MAX_EVENT)
+
 /* Update iscsit_ile_name table whenever login events are modified */
 typedef enum {
-	ILE_UNDEFINED = 0,
-	ILE_LOGIN_RCV,
-	ILE_LOGIN_RESP_READY,
-	ILE_LOGIN_FFP,
-	ILE_LOGIN_RESP_COMPLETE,
-	ILE_LOGIN_ERROR,
-	ILE_LOGIN_CONN_ERROR,
-	/* Add new login events above ILE_MAX_EVENT */
-	ILE_MAX_EVENT
+#define	item(a) a,
+	LOGIN_EVENT_LIST()
+#undef	item
 } iscsit_login_event_t;
 
 #ifdef ISCSIT_LOGIN_SM_STRINGS
-/* An array of login event text values, for use in logging login events */
-static const char *iscsit_ile_name[ILE_MAX_EVENT+1] = {
-	"ILE_UNDEFINED",
-	"ILE_LOGIN_RCV",
-	"ILE_LOGIN_RESP_READY",
-	"ILE_LOGIN_FFP",
-	"ILE_LOGIN_RESP_COMPLETE",
-	"ILE_LOGIN_ERROR",
-	"ILE_LOGIN_CONN_ERROR",
-	"ILE_MAX_EVENT"
+/* An array of login event text values, for use in loggin login events */
+static const char *iscsit_ile_name[ILE_MAX_EVENT + 1] = {
+#define	item(a) #a,
+	LOGIN_EVENT_LIST()
+#undef	item
 };
 #endif
 
@@ -466,8 +445,8 @@
 } iscsit_op_params_t;
 
 typedef struct {
-	iscsit_login_state_t 	icl_login_state;
-	iscsit_login_state_t 	icl_login_last_state;
+	iscsit_login_state_t	icl_login_state;
+	iscsit_login_state_t	icl_login_last_state;
 	sm_audit_buf_t		icl_state_audit;
 	boolean_t		icl_busy;
 	boolean_t		icl_login_complete;
@@ -565,17 +544,22 @@
 	list_t			isns_svrs;
 } iscsit_isns_cfg_t;
 
+#define	SERVICE_ENABLED_LIST() \
+	item(ISE_UNDEFINED) \
+	item(ISE_DETACHED) \
+	item(ISE_DISABLED) \
+	item(ISE_ENABLING) \
+	item(ISE_ENABLED) \
+	item(ISE_BUSY) \
+	item(ISE_DISABLING)
+
 /*
  * State values for the iscsit service
  */
 typedef enum {
-	ISE_UNDEFINED = 0,
-	ISE_DETACHED,
-	ISE_DISABLED,
-	ISE_ENABLING,
-	ISE_ENABLED,
-	ISE_BUSY,
-	ISE_DISABLING
+#define	item(a) a,
+	SERVICE_ENABLED_LIST()
+#undef	item
 } iscsit_service_enabled_t;
 
 
@@ -682,9 +666,6 @@
 iscsit_conn_reinstate(iscsit_conn_t *existing_ict, iscsit_conn_t *ict);
 
 void
-iscsit_conn_destroy_done(iscsit_conn_t *ict);
-
-void
 iscsit_conn_set_auth(iscsit_conn_t *ict);
 
 void
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c	Thu Nov 14 07:43:52 2019 -0700
@@ -46,6 +46,7 @@
 #include <sys/portif.h>
 #include <sys/idm/idm.h>
 #include <sys/idm/idm_text.h>
+#include <sys/idm/idm_so.h>
 
 #define	ISCSIT_LOGIN_SM_STRINGS
 #include "iscsit.h"
@@ -733,7 +734,37 @@
 	lsm->icl_login_state = new_state;
 	mutex_exit(&lsm->icl_mutex);
 
-	switch (lsm->icl_login_state) {
+	/*
+	 * Tale of caution here. The use of new_state instead of using
+	 * lsm->icl_login_state is deliberate (which had been used originally).
+	 * Since the icl_mutex is dropped under the right circumstances
+	 * the login state changes between setting the state and examining
+	 * the state to proceed. No big surprise since the lock was being
+	 * used in the first place to prevent just that type of change.
+	 *
+	 * There has been a case where network errors occurred while a client
+	 * was attempting to reinstate the connection causing multiple
+	 * login packets to arrive into the state machine. Those multiple
+	 * packets which were processed incorrectly caused the reference
+	 * count on the connection to be one higher than it should be and
+	 * from then on the connection can't close correctly causing a hang.
+	 *
+	 * Upon examination of the core it was found that the connection
+	 * audit data had calls looking like:
+	 *    login_sm_event_dispatch
+	 *    login_sm_processing
+	 *    login_sm_new_state
+	 * That call sequence means the new state was/is ILS_LOGIN_ERROR
+	 * yet the audit trail continues with a call to
+	 *    login_sm_send_next_response
+	 * which could only occur if icl_login_state had changed. Had the
+	 * design of COMSTAR taken this into account the code would
+	 * originally have held the icl_mutex across the processing of the
+	 * state processing. Lock order and calls which sleep prevent that
+	 * from being possible. The next best solution is to use the local
+	 * variable which holds the state.
+	 */
+	switch (new_state) {
 	case ILS_LOGIN_WAITING:
 		/* Do nothing, waiting for more login PDU's */
 		break;
@@ -1749,6 +1780,8 @@
 	stmf_scsi_session_t	*ss;
 	iscsi_transport_id_t	*iscsi_tptid;
 	uint16_t		ident_len, adn_len, tptid_sz;
+	char			prop_buf[KSTAT_STRLEN + 1];
+	char			peer_buf[IDM_SA_NTOP_BUFSIZ];
 
 	/*
 	 * Hold target mutex until we have finished registering with STMF
@@ -1809,6 +1842,11 @@
 	ss->ss_port_private = ict->ict_sess;
 	ict->ict_sess->ist_stmf_sess = ss;
 	mutex_exit(&ist->ist_tgt->target_mutex);
+	(void) snprintf(prop_buf, sizeof (prop_buf), "peername_%"PRIxPTR"",
+	    (uintptr_t)ict->ict_sess);
+	(void) idm_sa_ntop(&ict->ict_ic->ic_raddr, peer_buf,
+	    sizeof (peer_buf));
+	(void) stmf_add_rport_info(ss, prop_buf, peer_buf);
 
 	return (IDM_STATUS_SUCCESS);
 }
@@ -2727,15 +2765,15 @@
 	/* Check for one of the supported name types */
 	if (strncasecmp(name, SNS_EUI ".", strlen(SNS_EUI) + 1) == 0) {
 		sns = SNS_EUI;
-		*buflen = SNS_EUI_U8_LEN_MAX + 1;
+		*buflen = SNS_EUI_LEN_MAX + 1;
 		flag |= U8_TEXTPREP_TOUPPER;
 	} else if (strncasecmp(name, SNS_IQN ".", strlen(SNS_IQN) + 1) == 0) {
 		sns = SNS_IQN;
-		*buflen = SNS_IQN_U8_LEN_MAX + 1;
+		*buflen = SNS_IQN_LEN_MAX + 1;
 		flag |= U8_TEXTPREP_TOLOWER;
 	} else if (strncasecmp(name, SNS_NAA ".", strlen(SNS_NAA) + 1) == 0) {
 		sns = SNS_NAA;
-		*buflen = SNS_NAA_U8_LEN_MAX + 1;
+		*buflen = SNS_NAA_LEN_MAX + 1;
 		flag |= U8_TEXTPREP_TOUPPER;
 	} else {
 		return (NULL);
@@ -2744,7 +2782,7 @@
 	ret = kmem_zalloc(*buflen, KM_SLEEP);
 	coff = strlen(sns);
 	inlen = strlen(name) - coff;
-	outlen = *buflen - coff;
+	outlen = *buflen - coff - 1;
 
 	/* Fold the case and normalize string */
 	if (u8_textprep_str(name + coff, &inlen, ret + coff, &outlen, flag,
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c	Thu Nov 14 07:43:52 2019 -0700
@@ -37,6 +37,7 @@
 #include <sys/strsubr.h>
 #include <sys/note.h>
 #include <sys/sdt.h>
+#include <sys/kstat.h>
 
 #include <sys/stmf.h>
 #include <sys/stmf_ioctl.h>
@@ -54,7 +55,7 @@
 
 static void
 sess_sm_event_locked(iscsit_sess_t *ist, iscsit_session_event_t event,
-iscsit_conn_t *ict);
+    iscsit_conn_t *ict);
 
 static void
 sess_sm_event_dispatch(iscsit_sess_t *ist, sess_event_ctx_t *ctx);
@@ -211,6 +212,7 @@
 {
 	iscsit_sess_t *ist = ist_void;
 	stmf_scsi_session_t *iss;
+	char prop_buf[KSTAT_STRLEN + 1];
 
 	/*
 	 * State machine has run to completion, destroy session
@@ -225,6 +227,9 @@
 	ASSERT(ist->ist_conn_count == 0);
 	iss = ist->ist_stmf_sess;
 	if (iss != NULL) {
+		(void) snprintf(prop_buf, sizeof (prop_buf),
+		    "peername_%"PRIxPTR"", (uintptr_t)ist);
+		stmf_remove_rport_info(iss, prop_buf);
 		stmf_deregister_scsi_session(ist->ist_lport, iss);
 		kmem_free(iss->ss_rport_id, sizeof (scsi_devid_desc_t) +
 		    strlen(ist->ist_initiator_name) + 1);
@@ -397,7 +402,7 @@
 {
 	const iscsit_sess_t	*sess1 = void_sess1;
 	const iscsit_sess_t	*sess2 = void_sess2;
-	int 			result;
+	int			result;
 
 	/*
 	 * Sort by initiator name, then ISID then portal group tag
--- a/usr/src/uts/common/io/comstar/port/pppt/pppt.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/port/pppt/pppt.h	Thu Nov 14 07:43:52 2019 -0700
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2013, 2015 Nexenta Systems, Inc. All rights reserved.
  */
 #ifndef _PPPT_H
 #define	_PPPT_H
@@ -49,74 +49,63 @@
 
 #define	PPPT_MODNAME "pppt"
 
+#define	TGT_STATE_LIST() \
+	item(TS_UNDEFINED) \
+	item(TS_CREATED) \
+	item(TS_ONLINING) \
+	item(TS_ONLINE) \
+	item(TS_STMF_ONLINE) \
+	item(TS_DELETING_NEED_OFFLINE) \
+	item(TS_OFFLINING) \
+	item(TS_OFFLINE) \
+	item(TS_STMF_OFFLINE) \
+	item(TS_DELETING_STMF_DEREG) \
+	item(TS_DELETING_STMF_DEREG_FAIL) \
+	item(TS_DELETING) \
+	item(TS_MAX_STATE)
+
 /* Target states and events, update pppt_ts_name table whenever modified */
 typedef enum {
-	TS_UNDEFINED = 0,
-	TS_CREATED,
-	TS_ONLINING,
-	TS_ONLINE,
-	TS_STMF_ONLINE,
-	TS_DELETING_NEED_OFFLINE,
-	TS_OFFLINING,
-	TS_OFFLINE,
-	TS_STMF_OFFLINE,
-	TS_DELETING_STMF_DEREG,
-	TS_DELETING_STMF_DEREG_FAIL,
-	TS_DELETING,
-	TS_MAX_STATE
+#define	item(a) a,
+	TGT_STATE_LIST()
+#undef	item
 } pppt_tgt_state_t;
 
 #ifdef PPPT_TGT_SM_STRINGS
-static const char *pppt_ts_name[TS_MAX_STATE+1] = {
-	"TS_UNDEFINED",
-	"TS_CREATED",
-	"TS_ONLINING",
-	"TS_ONLINE",
-	"TS_STMF_ONLINE",
-	"TS_DELETING_NEED_OFFLINE",
-	"TS_OFFLINING",
-	"TS_OFFLINE",
-	"TS_STMF_OFFLINE",
-	"TS_DELETING_STMF_DEREG",
-	"TS_DELETING_STMF_DEREG_FAIL",
-	"TS_DELETING",
-	"TS_MAX_STATE"
+static const char *pppt_ts_name[TS_MAX_STATE + 1] = {
+#define	item(a) #a,
+	TGT_STATE_LIST()
+#undef	item
 };
 #endif
 
+#define	TGT_EVENT_LIST() \
+	item(TE_UNDEFINED) \
+	item(TE_STMF_ONLINE_REQ) \
+	item(TE_ONLINE_SUCCESS) \
+	item(TE_ONLINE_FAIL) \
+	item(TE_STMF_ONLINE_COMPLETE_ACK) \
+	item(TE_STMF_OFFLINE_REQ) \
+	item(TE_OFFLINE_COMPLETE) \
+	item(TE_STMF_OFFLINE_COMPLETE_ACK) \
+	item(TE_DELETE) \
+	item(TE_STMF_DEREG_SUCCESS) \
+	item(TE_STMF_DEREG_FAIL) \
+	item(TE_STMF_DEREG_RETRY) \
+	item(TE_WAIT_REF_COMPLETE) /* XXX */ \
+	item(TE_MAX_EVENT)
+
 typedef enum {
-	TE_UNDEFINED = 0,
-	TE_STMF_ONLINE_REQ,
-	TE_ONLINE_SUCCESS,
-	TE_ONLINE_FAIL,
-	TE_STMF_ONLINE_COMPLETE_ACK,
-	TE_STMF_OFFLINE_REQ,
-	TE_OFFLINE_COMPLETE,
-	TE_STMF_OFFLINE_COMPLETE_ACK,
-	TE_DELETE,
-	TE_STMF_DEREG_SUCCESS,
-	TE_STMF_DEREG_FAIL,
-	TE_STMF_DEREG_RETRY,
-	TE_WAIT_REF_COMPLETE, /* XXX */
-	TE_MAX_EVENT
+#define	item(a) a,
+	TGT_EVENT_LIST()
+#undef	item
 } pppt_tgt_event_t;
 
 #ifdef PPPT_TGT_SM_STRINGS
-static const char *pppt_te_name[TE_MAX_EVENT+1] = {
-	"TE_UNDEFINED",
-	"TE_STMF_ONLINE_REQ",
-	"TE_ONLINE_SUCCESS",
-	"TE_ONLINE_FAIL",
-	"TE_STMF_ONLINE_COMPLETE_ACK",
-	"TE_STMF_OFFLINE_REQ",
-	"TE_OFFLINE_COMPLETE",
-	"TE_STMF_OFFLINE_COMPLETE_ACK",
-	"TE_DELETE",
-	"TE_STMF_DEREG_SUCCESS",
-	"TE_STMF_DEREG_FAIL",
-	"TE_STMF_DEREG_RETRY",
-	"TE_WAIT_REF_COMPLETE",
-	"TE_MAX_EVENT"
+static const char *pppt_te_name[TE_MAX_EVENT + 1] = {
+#define	item(a) #a,
+	TGT_EVENT_LIST()
+#undef	item
 };
 #endif
 
--- a/usr/src/uts/common/io/comstar/port/pppt/pppt_msg.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/port/pppt/pppt_msg.c	Thu Nov 14 07:43:52 2019 -0700
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
  */
 
 #include <sys/cpuvar.h>
@@ -331,7 +331,7 @@
 	(void) pppt_task_hold(ptask);
 	task->task_port_private = ptask;
 	task->task_flags = scmd->icsc_task_flags;
-	task->task_additional_flags = 0;
+	task->task_additional_flags = TASK_AF_PPPT_TASK;
 	task->task_priority = 0;
 
 	/*
--- a/usr/src/uts/common/io/comstar/stmf/lun_map.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/stmf/lun_map.c	Thu Nov 14 07:43:52 2019 -0700
@@ -47,8 +47,9 @@
 void stmf_add_lus_to_session_per_vemap(stmf_i_local_port_t *ilport,
 		stmf_i_scsi_session_t *iss, stmf_lun_map_t *vemap);
 stmf_id_data_t *stmf_lookup_group_for_host(uint8_t *ident, uint16_t ident_size);
-stmf_status_t stmf_add_ent_to_map(stmf_lun_map_t *sm, void *ent, uint8_t *lun);
-stmf_status_t stmf_remove_ent_from_map(stmf_lun_map_t *sm, uint8_t *lun);
+static stmf_status_t stmf_add_ent_to_map(stmf_lun_map_t *sm, void *ent,
+    uint8_t *lun);
+static stmf_status_t stmf_remove_ent_from_map(stmf_lun_map_t *sm, uint8_t *lun);
 uint16_t stmf_get_next_free_lun(stmf_lun_map_t *sm, uint8_t *lun);
 stmf_status_t stmf_add_tg(uint8_t *tg_name, uint16_t tg_name_size,
 		int allow_special, uint32_t *err_detail);
@@ -179,10 +180,11 @@
 
 /*
  * Create luns map for session based on the view
+ * iss_lockp is held
  */
 stmf_status_t
 stmf_session_create_lun_map(stmf_i_local_port_t *ilport,
-		stmf_i_scsi_session_t *iss)
+    stmf_i_scsi_session_t *iss)
 {
 	stmf_id_data_t *tg;
 	stmf_id_data_t *hg;
@@ -236,52 +238,8 @@
 }
 
 /*
- * destroy lun map for session
- */
-/* ARGSUSED */
-stmf_status_t
-stmf_session_destroy_lun_map(stmf_i_local_port_t *ilport,
-		stmf_i_scsi_session_t *iss)
-{
-	stmf_lun_map_t *sm;
-	stmf_i_lu_t *ilu;
-	uint16_t n;
-	stmf_lun_map_ent_t *ent;
-
-	ASSERT(mutex_owned(&stmf_state.stmf_lock));
-	/*
-	 * to avoid conflict with updating session's map,
-	 * which only grab stmf_lock
-	 */
-	sm = iss->iss_sm;
-	iss->iss_sm = NULL;
-	iss->iss_hg = NULL;
-	if (sm->lm_nentries) {
-		for (n = 0; n < sm->lm_nentries; n++) {
-			if ((ent = (stmf_lun_map_ent_t *)sm->lm_plus[n])
-			    != NULL) {
-				if (ent->ent_itl_datap) {
-					stmf_do_itl_dereg(ent->ent_lu,
-					    ent->ent_itl_datap,
-					    STMF_ITL_REASON_IT_NEXUS_LOSS);
-				}
-				ilu = (stmf_i_lu_t *)
-				    ent->ent_lu->lu_stmf_private;
-				atomic_dec_32(&ilu->ilu_ref_cnt);
-				kmem_free(sm->lm_plus[n],
-				    sizeof (stmf_lun_map_ent_t));
-			}
-		}
-		kmem_free(sm->lm_plus,
-		    sizeof (stmf_lun_map_ent_t *) * sm->lm_nentries);
-	}
-
-	kmem_free(sm, sizeof (*sm));
-	return (STMF_SUCCESS);
-}
-
-/*
  * Expects the session lock to be held.
+ * iss_lockp is held
  */
 stmf_xfer_data_t *
 stmf_session_prepare_report_lun_data(stmf_lun_map_t *sm)
@@ -390,12 +348,13 @@
 }
 /*
  * add lu to a session, stmf_lock is already held
+ * iss_lockp/ilport_lock already held
  */
-stmf_status_t
+static stmf_status_t
 stmf_add_lu_to_session(stmf_i_local_port_t *ilport,
-		stmf_i_scsi_session_t	*iss,
-		stmf_lu_t *lu,
-		uint8_t *lu_nbr)
+    stmf_i_scsi_session_t *iss,
+    stmf_lu_t *lu,
+    uint8_t *lu_nbr)
 {
 	stmf_lun_map_t *sm = iss->iss_sm;
 	stmf_status_t ret;
@@ -434,13 +393,11 @@
 
 /*
  * remvoe lu from a session, stmf_lock is already held
+ * iss_lockp held
  */
-/* ARGSUSED */
-stmf_status_t
-stmf_remove_lu_from_session(stmf_i_local_port_t *ilport,
-		stmf_i_scsi_session_t *iss,
-		stmf_lu_t *lu,
-		uint8_t *lu_nbr)
+static void
+stmf_remove_lu_from_session(stmf_i_scsi_session_t *iss,
+    stmf_lu_t *lu, uint8_t *lu_nbr)
 {
 	stmf_status_t ret;
 	stmf_i_lu_t *ilu;
@@ -451,7 +408,10 @@
 
 	ASSERT(mutex_owned(&stmf_state.stmf_lock));
 	lun_map_ent = stmf_get_ent_from_map(sm, luNbr);
-	ASSERT(lun_map_ent && lun_map_ent->ent_lu == lu);
+	ASSERT(lun_map_ent->ent_lu == lu);
+	if (lun_map_ent == NULL) {
+		return;
+	}
 
 	ilu = (stmf_i_lu_t *)lu->lu_stmf_private;
 
@@ -464,7 +424,6 @@
 		    STMF_ITL_REASON_USER_REQUEST);
 	}
 	kmem_free((void *)lun_map_ent, sizeof (stmf_lun_map_ent_t));
-	return (STMF_SUCCESS);
 }
 
 /*
@@ -473,7 +432,7 @@
  */
 void
 stmf_update_sessions_per_ve(stmf_view_entry_t *ve,
-		stmf_lu_t *lu, int action)
+    stmf_lu_t *lu, int action)
 {
 	stmf_i_lu_t *ilu_tmp;
 	stmf_lu_t *lu_to_add;
@@ -519,8 +478,8 @@
 				continue;
 			/* This host belongs to the host group */
 			if (action == 0) { /* to remove */
-				(void) stmf_remove_lu_from_session(ilport, iss,
-				    lu_to_add, ve->ve_lun);
+				stmf_remove_lu_from_session(iss, lu_to_add,
+				    ve->ve_lun);
 				if (ilu_tmp->ilu_ref_cnt == 0) {
 					rw_exit(&ilport->ilport_lock);
 					return;
@@ -540,8 +499,8 @@
  */
 void
 stmf_add_lus_to_session_per_vemap(stmf_i_local_port_t *ilport,
-		stmf_i_scsi_session_t *iss,
-		stmf_lun_map_t *vemap)
+    stmf_i_scsi_session_t *iss,
+    stmf_lun_map_t *vemap)
 {
 	stmf_lu_t *lu;
 	stmf_i_lu_t *ilu;
@@ -549,7 +508,6 @@
 	uint32_t	i;
 
 	ASSERT(mutex_owned(&stmf_state.stmf_lock));
-
 	for (i = 0; i < vemap->lm_nentries; i++) {
 		ve = (stmf_view_entry_t *)vemap->lm_plus[i];
 		if (!ve)
@@ -562,11 +520,13 @@
 		}
 	}
 }
-/* remove luns in view entry map from a session */
+/*
+ * remove luns in view entry map from a session
+ * iss_lockp held
+ */
 void
-stmf_remove_lus_from_session_per_vemap(stmf_i_local_port_t *ilport,
-		stmf_i_scsi_session_t *iss,
-		stmf_lun_map_t *vemap)
+stmf_remove_lus_from_session_per_vemap(stmf_i_scsi_session_t *iss,
+    stmf_lun_map_t *vemap)
 {
 	stmf_lu_t *lu;
 	stmf_i_lu_t *ilu;
@@ -582,15 +542,14 @@
 		ilu = (stmf_i_lu_t *)ve->ve_luid->id_pt_to_object;
 		if (ilu && ilu->ilu_state == STMF_STATE_ONLINE) {
 			lu = ilu->ilu_lu;
-			(void) stmf_remove_lu_from_session(ilport, iss, lu,
-			    ve->ve_lun);
+			stmf_remove_lu_from_session(iss, lu, ve->ve_lun);
 		}
 	}
 }
 
 stmf_id_data_t *
 stmf_alloc_id(uint16_t id_size, uint16_t type, uint8_t *id_data,
-			uint32_t additional_size)
+    uint32_t additional_size)
 {
 	stmf_id_data_t *id;
 	int struct_size, total_size, real_id_size;
@@ -710,6 +669,7 @@
  * is successfully added. ve_map is just another representation of the
  * view enrtries in a LU. Duplicating or merging a ve map does not
  * affect any refcnts.
+ * stmf_state.stmf_lock held
  */
 stmf_lun_map_t *
 stmf_duplicate_ve_map(stmf_lun_map_t *src)
@@ -742,9 +702,13 @@
 	kmem_free(dst, sizeof (*dst));
 }
 
+/*
+ * stmf_state.stmf_lock held. Operations are stmf global in nature and
+ * not session level.
+ */
 int
 stmf_merge_ve_map(stmf_lun_map_t *src, stmf_lun_map_t *dst,
-		stmf_lun_map_t **pp_ret_map, stmf_merge_flags_t mf)
+    stmf_lun_map_t **pp_ret_map, stmf_merge_flags_t mf)
 {
 	int i;
 	int nentries;
@@ -816,7 +780,7 @@
  */
 stmf_status_t
 stmf_add_hg(uint8_t *hg_name, uint16_t hg_name_size,
-		int allow_special, uint32_t *err_detail)
+    int allow_special, uint32_t *err_detail)
 {
 	stmf_id_data_t *id;
 
@@ -841,7 +805,7 @@
 /* add target group */
 stmf_status_t
 stmf_add_tg(uint8_t *tg_name, uint16_t tg_name_size,
-		int allow_special, uint32_t *err_detail)
+    int allow_special, uint32_t *err_detail)
 {
 	stmf_id_data_t *id;
 
@@ -920,8 +884,8 @@
 /* stmf_lock is already held, err_detail may be assigned if error happens */
 stmf_status_t
 stmf_add_view_entry(stmf_id_data_t *hg, stmf_id_data_t *tg,
-		uint8_t *lu_guid, uint32_t *ve_id, uint8_t *lun,
-		stmf_view_entry_t **conflicting, uint32_t *err_detail)
+    uint8_t *lu_guid, uint32_t *ve_id, uint8_t *lun,
+    stmf_view_entry_t **conflicting, uint32_t *err_detail)
 {
 	stmf_id_data_t *luid;
 	stmf_view_entry_t *ve;
@@ -1066,7 +1030,11 @@
 	return (ret);
 }
 
-stmf_status_t
+/*
+ * protected by stmf_state.stmf_lock when working on global lun map.
+ * iss_lockp when working at the session level.
+ */
+static stmf_status_t
 stmf_add_ent_to_map(stmf_lun_map_t *lm, void *ent, uint8_t *lun)
 {
 	uint16_t n;
@@ -1098,7 +1066,11 @@
 }
 
 
-stmf_status_t
+/*
+ * iss_lockp held when working on a session.
+ * stmf_state.stmf_lock is held when working on the global views.
+ */
+static stmf_status_t
 stmf_remove_ent_from_map(stmf_lun_map_t *lm, uint8_t *lun)
 {
 	uint16_t n, i;
@@ -1135,6 +1107,9 @@
 	return (STMF_SUCCESS);
 }
 
+/*
+ * stmf_state.stmf_lock held
+ */
 uint16_t
 stmf_get_next_free_lun(stmf_lun_map_t *sm, uint8_t *lun)
 {
@@ -1158,6 +1133,10 @@
 	return (luNbr);
 }
 
+/*
+ * stmf_state.stmf_lock is held when working on global view map
+ * iss_lockp (RW_WRITER) is held when working on session map.
+ */
 void *
 stmf_get_ent_from_map(stmf_lun_map_t *sm, uint16_t lun_num)
 {
@@ -1173,9 +1152,9 @@
 
 int
 stmf_add_ve(uint8_t *hgname, uint16_t hgname_size,
-		uint8_t *tgname, uint16_t tgname_size,
-		uint8_t *lu_guid, uint32_t *ve_id,
-		uint8_t *luNbr, uint32_t *err_detail)
+    uint8_t *tgname, uint16_t tgname_size,
+    uint8_t *lu_guid, uint32_t *ve_id,
+    uint8_t *luNbr, uint32_t *err_detail)
 {
 	stmf_id_data_t *hg;
 	stmf_id_data_t *tg;
@@ -1317,7 +1296,7 @@
 
 int
 stmf_add_group(uint8_t *grpname, uint16_t grpname_size,
-		stmf_id_type_t group_type, uint32_t *err_detail)
+    stmf_id_type_t group_type, uint32_t *err_detail)
 {
 	stmf_status_t status;
 
@@ -1348,7 +1327,7 @@
  */
 int
 stmf_remove_group(uint8_t *grpname, uint16_t grpname_size,
-		stmf_id_type_t group_type, uint32_t *err_detail)
+    stmf_id_type_t group_type, uint32_t *err_detail)
 {
 	stmf_id_data_t *id;
 	stmf_id_data_t *idmemb;
@@ -1411,8 +1390,8 @@
 
 int
 stmf_add_group_member(uint8_t *grpname, uint16_t grpname_size,
-		uint8_t	*entry_ident, uint16_t entry_size,
-		stmf_id_type_t entry_type, uint32_t *err_detail)
+    uint8_t *entry_ident, uint16_t entry_size,
+    stmf_id_type_t entry_type, uint32_t *err_detail)
 {
 	stmf_id_data_t	*id_grp, *id_alltgt;
 	stmf_id_data_t	*id_member;
@@ -1492,6 +1471,7 @@
 			stmf_id_data_t *tgid;
 			iss->iss_hg = (void *)id_grp;
 			tgid = ilport->ilport_tg;
+			rw_enter(iss->iss_lockp, RW_WRITER);
 			if (tgid) {
 				vemap = stmf_get_ve_map_per_ids(tgid, id_grp);
 				if (vemap)
@@ -1501,6 +1481,7 @@
 			if (vemap_alltgt)
 				stmf_add_lus_to_session_per_vemap(ilport,
 				    iss, vemap_alltgt);
+			rw_exit(iss->iss_lockp);
 		}
 	}
 
@@ -1509,8 +1490,8 @@
 
 int
 stmf_remove_group_member(uint8_t *grpname, uint16_t grpname_size,
-		uint8_t *entry_ident, uint16_t entry_size,
-		stmf_id_type_t entry_type, uint32_t *err_detail)
+    uint8_t *entry_ident, uint16_t entry_size,
+    stmf_id_type_t entry_type, uint32_t *err_detail)
 {
 	stmf_id_data_t	*id_grp, *id_alltgt;
 	stmf_id_data_t	*id_member;
@@ -1577,17 +1558,19 @@
 		    entry_ident, entry_size);
 		if (iss) {
 			stmf_id_data_t *tgid;
+			rw_enter(iss->iss_lockp, RW_WRITER);
 			iss->iss_hg = NULL;
 			tgid = ilport->ilport_tg;
 			if (tgid) {
 				vemap = stmf_get_ve_map_per_ids(tgid, id_grp);
 				if (vemap)
 					stmf_remove_lus_from_session_per_vemap(
-					    ilport, iss, vemap);
+					    iss, vemap);
 			}
 			if (vemap_alltgt)
-				stmf_remove_lus_from_session_per_vemap(ilport,
-				    iss, vemap_alltgt);
+				stmf_remove_lus_from_session_per_vemap(iss,
+				    vemap_alltgt);
+			rw_exit(iss->iss_lockp);
 		}
 	}
 
@@ -1616,7 +1599,7 @@
 
 stmf_i_scsi_session_t *
 stmf_lookup_session_for_hostident(stmf_i_local_port_t *ilport,
-		uint8_t *host_ident, uint16_t ident_size)
+    uint8_t *host_ident, uint16_t ident_size)
 {
 	stmf_i_scsi_session_t *iss;
 	uint8_t *id;
@@ -1734,8 +1717,8 @@
 
 int
 stmf_validate_lun_ve(uint8_t *hgname, uint16_t hgname_size,
-		uint8_t *tgname, uint16_t tgname_size,
-		uint8_t *luNbr, uint32_t *err_detail)
+    uint8_t *tgname, uint16_t tgname_size,
+    uint8_t *luNbr, uint32_t *err_detail)
 {
 	stmf_id_data_t		*hg;
 	stmf_id_data_t		*tg;
--- a/usr/src/uts/common/io/comstar/stmf/lun_map.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/stmf/lun_map.h	Thu Nov 14 07:43:52 2019 -0700
@@ -48,8 +48,6 @@
 void stmf_view_clear_config();
 stmf_status_t stmf_session_create_lun_map(stmf_i_local_port_t *ilport,
 		stmf_i_scsi_session_t *iss);
-stmf_status_t stmf_session_destroy_lun_map(stmf_i_local_port_t *ilport,
-		stmf_i_scsi_session_t *iss);
 stmf_xfer_data_t *stmf_session_prepare_report_lun_data(stmf_lun_map_t *sm);
 void stmf_add_lu_to_active_sessions(stmf_lu_t *lu);
 void stmf_session_lu_unmapall(stmf_lu_t *lu);
--- a/usr/src/uts/common/io/comstar/stmf/stmf.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/stmf/stmf.c	Thu Nov 14 07:43:52 2019 -0700
@@ -18,11 +18,12 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 /*
- * Copyright 2012, Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  */
@@ -124,7 +125,6 @@
 void stmf_trace_clear();
 void stmf_worker_init();
 stmf_status_t stmf_worker_fini();
-void stmf_worker_mgmt();
 void stmf_worker_task(void *arg);
 static void stmf_task_lu_free(scsi_task_t *task, stmf_i_scsi_session_t *iss);
 static stmf_status_t stmf_ic_lu_reg(stmf_ic_reg_dereg_lun_msg_t *msg,
@@ -164,9 +164,14 @@
 static void stmf_update_kstat_lport_q(scsi_task_t *, void());
 static void stmf_update_kstat_lu_io(scsi_task_t *, stmf_data_buf_t *);
 static void stmf_update_kstat_lport_io(scsi_task_t *, stmf_data_buf_t *);
+static hrtime_t stmf_update_rport_timestamps(hrtime_t *start_tstamp,
+    hrtime_t *done_tstamp, stmf_i_scsi_task_t *itask);
 
 static int stmf_irport_compare(const void *void_irport1,
     const void *void_irport2);
+static void stmf_create_kstat_rport(stmf_i_remote_port_t *irport);
+static void stmf_destroy_kstat_rport(stmf_i_remote_port_t *irport);
+static int stmf_kstat_rport_update(kstat_t *ksp, int rw);
 static stmf_i_remote_port_t *stmf_irport_create(scsi_devid_desc_t *rport_devid);
 static void stmf_irport_destroy(stmf_i_remote_port_t *irport);
 static stmf_i_remote_port_t *stmf_irport_register(
@@ -179,7 +184,7 @@
 
 /* =====[ Tunables ]===== */
 /* Internal tracing */
-volatile int	stmf_trace_on = 1;
+volatile int	stmf_trace_on = 0;
 volatile int	stmf_trace_buf_size = (1 * 1024 * 1024);
 /*
  * The reason default task timeout is 75 is because we want the
@@ -192,14 +197,12 @@
  */
 volatile int	stmf_allow_modunload = 0;
 
-volatile int stmf_max_nworkers = 256;
-volatile int stmf_min_nworkers = 4;
-volatile int stmf_worker_scale_down_delay = 20;
+volatile int stmf_nworkers = 512;
 
 /* === [ Debugging and fault injection ] === */
 #ifdef	DEBUG
-volatile uint32_t stmf_drop_task_counter = 0;
-volatile uint32_t stmf_drop_buf_counter = 0;
+volatile int stmf_drop_task_counter = 0;
+volatile int stmf_drop_buf_counter = 0;
 
 #endif
 
@@ -221,20 +224,13 @@
 	STMF_WORKERS_ENABLING,
 	STMF_WORKERS_ENABLED
 } stmf_workers_state = STMF_WORKERS_DISABLED;
-static int stmf_i_max_nworkers;
-static int stmf_i_min_nworkers;
-static int stmf_nworkers_cur;		/* # of workers currently running */
-static int stmf_nworkers_needed;	/* # of workers need to be running */
+static kmutex_t	stmf_worker_sel_mx;
+volatile uint32_t stmf_nworkers_cur = 0; /* # of workers currently running */
 static int stmf_worker_sel_counter = 0;
 static uint32_t stmf_cur_ntasks = 0;
-static clock_t stmf_wm_last = 0;
-/*
- * This is equal to stmf_nworkers_cur while we are increasing # workers and
- * stmf_nworkers_needed while we are decreasing the worker count.
- */
+static clock_t stmf_wm_next = 0;
 static int stmf_nworkers_accepting_cmds;
 static stmf_worker_t *stmf_workers = NULL;
-static clock_t stmf_worker_mgmt_delay = 2;
 static clock_t stmf_worker_scale_down_timer = 0;
 static int stmf_worker_scale_down_qd = 0;
 
@@ -300,6 +296,7 @@
 	trace_buf_size = stmf_trace_buf_size;
 	trace_buf_curndx = 0;
 	mutex_init(&trace_buf_lock, NULL, MUTEX_DRIVER, 0);
+	mutex_init(&stmf_worker_sel_mx, NULL, MUTEX_ADAPTIVE, 0);
 	bzero(&stmf_state, sizeof (stmf_state_t));
 	/* STMF service is off by default */
 	stmf_state.stmf_service_running = 0;
@@ -370,6 +367,7 @@
 	kmem_free(stmf_trace_buf, stmf_trace_buf_size);
 	mutex_destroy(&trace_buf_lock);
 	mutex_destroy(&stmf_state.stmf_lock);
+	mutex_destroy(&stmf_worker_sel_mx);
 	cv_destroy(&stmf_state.stmf_cv);
 	return (ret);
 }
@@ -1655,10 +1653,13 @@
 	mutex_enter(&ilu->ilu_task_lock);
 	for (itask = ilu->ilu_tasks; itask != NULL;
 	    itask = itask->itask_lu_next) {
+		mutex_enter(&itask->itask_mutex);
 		if (itask->itask_flags & (ITASK_IN_FREE_LIST |
 		    ITASK_BEING_ABORTED)) {
+			mutex_exit(&itask->itask_mutex);
 			continue;
 		}
+		mutex_exit(&itask->itask_mutex);
 		if (itask->itask_proxy_msg_id == task_msgid) {
 			break;
 		}
@@ -1903,16 +1904,15 @@
 	 * we can recognize this on return since we won't be completing
 	 * the proxied task in that case.
 	 */
+	mutex_enter(&itask->itask_mutex);
 	if (task->task_mgmt_function) {
 		itask->itask_proxy_msg_id |= MSG_ID_TM_BIT;
 	} else {
-		uint32_t new, old;
-		do {
-			new = old = itask->itask_flags;
-			if (new & ITASK_BEING_ABORTED)
-				return (STMF_FAILURE);
-			new |= ITASK_DEFAULT_HANDLING | ITASK_PROXY_TASK;
-		} while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+		if (itask->itask_flags & ITASK_BEING_ABORTED) {
+			mutex_exit(&itask->itask_mutex);
+			return (STMF_FAILURE);
+		}
+		itask->itask_flags |= ITASK_DEFAULT_HANDLING | ITASK_PROXY_TASK;
 	}
 	if (dbuf) {
 		ic_cmd_msg = ic_scsi_cmd_msg_alloc(itask->itask_proxy_msg_id,
@@ -1922,6 +1922,7 @@
 		ic_cmd_msg = ic_scsi_cmd_msg_alloc(itask->itask_proxy_msg_id,
 		    task, 0, NULL, itask->itask_proxy_msg_id);
 	}
+	mutex_exit(&itask->itask_mutex);
 	if (ic_cmd_msg) {
 		ic_ret = ic_tx_msg(ic_cmd_msg);
 		if (ic_ret == STMF_IC_MSG_SUCCESS) {
@@ -2525,7 +2526,8 @@
 	}
 
 	/* Free any existing lists and add this one to the ppd */
-	nvlist_free(ppd->ppd_nv);
+	if (ppd->ppd_nv)
+		nvlist_free(ppd->ppd_nv);
 	ppd->ppd_nv = nv;
 
 	/* set the token for writes */
@@ -2597,7 +2599,8 @@
 		return;
 
 	*pppd = ppd->ppd_next;
-	nvlist_free(ppd->ppd_nv);
+	if (ppd->ppd_nv)
+		nvlist_free(ppd->ppd_nv);
 
 	kmem_free(ppd, ppd->ppd_alloc_size);
 }
@@ -2706,6 +2709,8 @@
  */
 #define	STMF_KSTAT_LU_SZ	(STMF_GUID_INPUT + 1 + 256)
 #define	STMF_KSTAT_TGT_SZ	(256 * 2 + 16)
+#define	STMF_KSTAT_RPORT_DATAMAX	(sizeof (stmf_kstat_rport_info_t) / \
+					    sizeof (kstat_named_t))
 
 /*
  * This array matches the Protocol Identifier in stmf_ioctl.h
@@ -2783,6 +2788,96 @@
 }
 
 static void
+stmf_update_kstat_rport_io(scsi_task_t *task, stmf_data_buf_t *dbuf)
+{
+	stmf_i_scsi_session_t	*iss;
+	stmf_i_remote_port_t	*irport;
+	kstat_io_t		*kip;
+
+	iss = task->task_session->ss_stmf_private;
+	irport = iss->iss_irport;
+	if (irport->irport_kstat_io != NULL) {
+		kip = KSTAT_IO_PTR(irport->irport_kstat_io);
+		mutex_enter(irport->irport_kstat_io->ks_lock);
+		STMF_UPDATE_KSTAT_IO(kip, dbuf);
+		mutex_exit(irport->irport_kstat_io->ks_lock);
+	}
+}
+
+static void
+stmf_update_kstat_rport_estat(scsi_task_t *task)
+{
+	stmf_i_scsi_task_t		*itask;
+	stmf_i_scsi_session_t		*iss;
+	stmf_i_remote_port_t		*irport;
+	stmf_kstat_rport_estat_t	*ks_estat;
+	hrtime_t			lat = 0;
+	uint32_t			n = 0;
+
+	itask = task->task_stmf_private;
+	iss = task->task_session->ss_stmf_private;
+	irport = iss->iss_irport;
+
+	if (irport->irport_kstat_estat == NULL)
+		return;
+
+	ks_estat = (stmf_kstat_rport_estat_t *)KSTAT_NAMED_PTR(
+	    irport->irport_kstat_estat);
+
+	mutex_enter(irport->irport_kstat_estat->ks_lock);
+
+	if (task->task_flags & TF_READ_DATA)
+		n = atomic_dec_32_nv(&irport->irport_nread_tasks);
+	else if (task->task_flags & TF_WRITE_DATA)
+		n = atomic_dec_32_nv(&irport->irport_nwrite_tasks);
+
+	if (itask->itask_read_xfer > 0) {
+		ks_estat->i_nread_tasks.value.ui64++;
+		lat = stmf_update_rport_timestamps(
+		    &irport->irport_rdstart_timestamp,
+		    &irport->irport_rddone_timestamp, itask);
+		if (n == 0)
+			ks_estat->i_rport_read_latency.value.ui64 += lat;
+	} else if ((itask->itask_write_xfer > 0) ||
+	    (task->task_flags & TF_INITIAL_BURST)) {
+		ks_estat->i_nwrite_tasks.value.ui64++;
+		lat = stmf_update_rport_timestamps(
+		    &irport->irport_wrstart_timestamp,
+		    &irport->irport_wrdone_timestamp, itask);
+		if (n == 0)
+			ks_estat->i_rport_write_latency.value.ui64 += lat;
+	}
+
+	if (n == 0) {
+		if (task->task_flags & TF_READ_DATA) {
+			irport->irport_rdstart_timestamp = LLONG_MAX;
+			irport->irport_rddone_timestamp = 0;
+		} else if (task->task_flags & TF_WRITE_DATA) {
+			irport->irport_wrstart_timestamp = LLONG_MAX;
+			irport->irport_wrdone_timestamp = 0;
+		}
+	}
+
+	mutex_exit(irport->irport_kstat_estat->ks_lock);
+}
+
+static hrtime_t
+stmf_update_rport_timestamps(hrtime_t *start_tstamp, hrtime_t *done_tstamp,
+    stmf_i_scsi_task_t *itask)
+{
+	*start_tstamp = MIN(*start_tstamp, itask->itask_start_timestamp);
+	if ((*done_tstamp == 0) &&
+	    (itask->itask_xfer_done_timestamp == 0)) {
+		*done_tstamp = *start_tstamp;
+	} else {
+		*done_tstamp = MAX(*done_tstamp,
+		    itask->itask_xfer_done_timestamp);
+	}
+
+	return (*done_tstamp - *start_tstamp);
+}
+
+static void
 stmf_update_kstat_lu_io(scsi_task_t *task, stmf_data_buf_t *dbuf)
 {
 	stmf_i_lu_t		*ilu;
@@ -3443,6 +3538,8 @@
 	    sizeof (scsi_devid_desc_t) + rport_devid->ident_length - 1);
 	irport->irport_refcnt = 1;
 	mutex_init(&irport->irport_mutex, NULL, MUTEX_DEFAULT, NULL);
+	irport->irport_rdstart_timestamp = LLONG_MAX;
+	irport->irport_wrstart_timestamp = LLONG_MAX;
 
 	return (irport);
 }
@@ -3450,12 +3547,132 @@
 static void
 stmf_irport_destroy(stmf_i_remote_port_t *irport)
 {
+	stmf_destroy_kstat_rport(irport);
 	id_free(stmf_state.stmf_irport_inst_space, irport->irport_instance);
 	mutex_destroy(&irport->irport_mutex);
 	kmem_free(irport, sizeof (*irport) + sizeof (scsi_devid_desc_t) +
 	    irport->irport_id->ident_length - 1);
 }
 
+static void
+stmf_create_kstat_rport(stmf_i_remote_port_t *irport)
+{
+	scsi_devid_desc_t *id = irport->irport_id;
+	char ks_nm[KSTAT_STRLEN];
+	stmf_kstat_rport_info_t *ks_info;
+	stmf_kstat_rport_estat_t *ks_estat;
+	char *ident = NULL;
+
+	ks_info = kmem_zalloc(sizeof (*ks_info), KM_NOSLEEP);
+	if (ks_info == NULL)
+		goto err_out;
+
+	(void) snprintf(ks_nm, KSTAT_STRLEN, "stmf_rport_%"PRIxPTR"",
+	    (uintptr_t)irport);
+	irport->irport_kstat_info = kstat_create(STMF_MODULE_NAME, 0,
+	    ks_nm, "misc", KSTAT_TYPE_NAMED,
+	    STMF_KSTAT_RPORT_DATAMAX - STMF_RPORT_INFO_LIMIT,
+	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
+	if (irport->irport_kstat_info == NULL) {
+		kmem_free(ks_info, sizeof (*ks_info));
+		goto err_out;
+	}
+
+	irport->irport_kstat_info->ks_data = ks_info;
+	irport->irport_kstat_info->ks_private = irport;
+	irport->irport_kstat_info->ks_update = stmf_kstat_rport_update;
+	ident = kmem_alloc(id->ident_length + 1, KM_NOSLEEP);
+	if (ident == NULL) {
+		kstat_delete(irport->irport_kstat_info);
+		irport->irport_kstat_info = NULL;
+		kmem_free(ks_info, sizeof (*ks_info));
+		goto err_out;
+	}
+
+	(void) memcpy(ident, id->ident, id->ident_length);
+	ident[id->ident_length] = '\0';
+	kstat_named_init(&ks_info->i_rport_name, "name", KSTAT_DATA_STRING);
+	kstat_named_init(&ks_info->i_protocol, "protocol",
+	    KSTAT_DATA_STRING);
+
+	kstat_named_setstr(&ks_info->i_rport_name, ident);
+	kstat_named_setstr(&ks_info->i_protocol,
+	    protocol_ident[irport->irport_id->protocol_id]);
+	irport->irport_kstat_info->ks_lock = &irport->irport_mutex;
+	irport->irport_info_dirty = B_TRUE;
+	kstat_install(irport->irport_kstat_info);
+
+	(void) snprintf(ks_nm, KSTAT_STRLEN, "stmf_rport_io_%"PRIxPTR"",
+	    (uintptr_t)irport);
+	irport->irport_kstat_io = kstat_create(STMF_MODULE_NAME, 0, ks_nm,
+	    "io", KSTAT_TYPE_IO, 1, 0);
+	if (irport->irport_kstat_io == NULL)
+		goto err_out;
+
+	irport->irport_kstat_io->ks_lock = &irport->irport_mutex;
+	kstat_install(irport->irport_kstat_io);
+
+	(void) snprintf(ks_nm, KSTAT_STRLEN, "stmf_rport_st_%"PRIxPTR"",
+	    (uintptr_t)irport);
+	irport->irport_kstat_estat = kstat_create(STMF_MODULE_NAME, 0, ks_nm,
+	    "misc", KSTAT_TYPE_NAMED,
+	    sizeof (*ks_estat) / sizeof (kstat_named_t), 0);
+	if (irport->irport_kstat_estat == NULL)
+		goto err_out;
+
+	ks_estat = (stmf_kstat_rport_estat_t *)KSTAT_NAMED_PTR(
+	    irport->irport_kstat_estat);
+	kstat_named_init(&ks_estat->i_rport_read_latency,
+	    "rlatency", KSTAT_DATA_UINT64);
+	kstat_named_init(&ks_estat->i_rport_write_latency,
+	    "wlatency", KSTAT_DATA_UINT64);
+	kstat_named_init(&ks_estat->i_nread_tasks, "rntasks",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&ks_estat->i_nwrite_tasks, "wntasks",
+	    KSTAT_DATA_UINT64);
+	irport->irport_kstat_estat->ks_lock = &irport->irport_mutex;
+	kstat_install(irport->irport_kstat_estat);
+
+	return;
+
+err_out:
+	(void) memcpy(ks_nm, id->ident, MAX(KSTAT_STRLEN - 1,
+	    id->ident_length));
+	ks_nm[id->ident_length] = '\0';
+	cmn_err(CE_WARN, "STMF: remote port kstat creation failed: %s", ks_nm);
+}
+
+static void
+stmf_destroy_kstat_rport(stmf_i_remote_port_t *irport)
+{
+	if (irport->irport_kstat_io != NULL) {
+		kstat_delete(irport->irport_kstat_io);
+	}
+	if (irport->irport_kstat_estat != NULL) {
+		kstat_delete(irport->irport_kstat_estat);
+	}
+	if (irport->irport_kstat_info != NULL) {
+		stmf_kstat_rport_info_t *ks_info;
+		kstat_named_t *knp;
+		void *ptr;
+		int i;
+
+		ks_info = (stmf_kstat_rport_info_t *)KSTAT_NAMED_PTR(
+		    irport->irport_kstat_info);
+		kstat_delete(irport->irport_kstat_info);
+		ptr = KSTAT_NAMED_STR_PTR(&ks_info->i_rport_name);
+		kmem_free(ptr, KSTAT_NAMED_STR_BUFLEN(&ks_info->i_rport_name));
+
+		for (i = 0, knp = ks_info->i_rport_uinfo;
+		    i < STMF_RPORT_INFO_LIMIT; i++, knp++) {
+			ptr = KSTAT_NAMED_STR_PTR(knp);
+			if (ptr != NULL)
+			kmem_free(ptr, KSTAT_NAMED_STR_BUFLEN(knp));
+		}
+		kmem_free(ks_info, sizeof (*ks_info));
+	}
+}
+
 static stmf_i_remote_port_t *
 stmf_irport_register(scsi_devid_desc_t *rport_devid)
 {
@@ -3478,6 +3695,7 @@
 		return (NULL);
 	}
 
+	stmf_create_kstat_rport(irport);
 	avl_add(&stmf_state.stmf_irportlist, irport);
 	mutex_exit(&stmf_state.stmf_lock);
 
@@ -3601,6 +3819,109 @@
 	return (STMF_SUCCESS);
 }
 
+stmf_status_t
+stmf_add_rport_info(stmf_scsi_session_t *ss,
+    const char *prop_name, const char *prop_value)
+{
+	stmf_i_scsi_session_t *iss = ss->ss_stmf_private;
+	stmf_i_remote_port_t *irport = iss->iss_irport;
+	kstat_named_t *knp;
+	char *s;
+	int i;
+
+	s = strdup(prop_value);
+
+	mutex_enter(irport->irport_kstat_info->ks_lock);
+	/* Make sure the caller doesn't try to add already existing property */
+	knp = KSTAT_NAMED_PTR(irport->irport_kstat_info);
+	for (i = 0; i < STMF_KSTAT_RPORT_DATAMAX; i++, knp++) {
+		if (KSTAT_NAMED_STR_PTR(knp) == NULL)
+			break;
+
+		ASSERT(strcmp(knp->name, prop_name) != 0);
+	}
+
+	if (i == STMF_KSTAT_RPORT_DATAMAX) {
+		mutex_exit(irport->irport_kstat_info->ks_lock);
+		kmem_free(s, strlen(s) + 1);
+		return (STMF_FAILURE);
+	}
+
+	irport->irport_info_dirty = B_TRUE;
+	kstat_named_init(knp, prop_name, KSTAT_DATA_STRING);
+	kstat_named_setstr(knp, s);
+	mutex_exit(irport->irport_kstat_info->ks_lock);
+
+	return (STMF_SUCCESS);
+}
+
+void
+stmf_remove_rport_info(stmf_scsi_session_t *ss,
+    const char *prop_name)
+{
+	stmf_i_scsi_session_t *iss = ss->ss_stmf_private;
+	stmf_i_remote_port_t *irport = iss->iss_irport;
+	kstat_named_t *knp;
+	char *s;
+	int i;
+	uint32_t len;
+
+	mutex_enter(irport->irport_kstat_info->ks_lock);
+	knp = KSTAT_NAMED_PTR(irport->irport_kstat_info);
+	for (i = 0; i < STMF_KSTAT_RPORT_DATAMAX; i++, knp++) {
+		if ((knp->name != NULL) && (strcmp(knp->name, prop_name) == 0))
+			break;
+	}
+
+	if (i == STMF_KSTAT_RPORT_DATAMAX) {
+		mutex_exit(irport->irport_kstat_info->ks_lock);
+		return;
+	}
+
+	s = KSTAT_NAMED_STR_PTR(knp);
+	len = KSTAT_NAMED_STR_BUFLEN(knp);
+
+	for (; i < STMF_KSTAT_RPORT_DATAMAX - 1; i++, knp++) {
+		kstat_named_init(knp, knp[1].name, KSTAT_DATA_STRING);
+		kstat_named_setstr(knp, KSTAT_NAMED_STR_PTR(&knp[1]));
+	}
+	kstat_named_init(knp, "", KSTAT_DATA_STRING);
+
+	irport->irport_info_dirty = B_TRUE;
+	mutex_exit(irport->irport_kstat_info->ks_lock);
+	kmem_free(s, len);
+}
+
+static int
+stmf_kstat_rport_update(kstat_t *ksp, int rw)
+{
+	stmf_i_remote_port_t *irport = ksp->ks_private;
+	kstat_named_t *knp;
+	uint_t ndata = 0;
+	size_t dsize = 0;
+	int i;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	if (!irport->irport_info_dirty)
+		return (0);
+
+	knp = KSTAT_NAMED_PTR(ksp);
+	for (i = 0; i < STMF_KSTAT_RPORT_DATAMAX; i++, knp++) {
+		if (KSTAT_NAMED_STR_PTR(knp) == NULL)
+			break;
+		ndata++;
+		dsize += KSTAT_NAMED_STR_BUFLEN(knp);
+	}
+
+	ksp->ks_ndata = ndata;
+	ksp->ks_data_size = sizeof (kstat_named_t) * ndata + dsize;
+	irport->irport_info_dirty = B_FALSE;
+
+	return (0);
+}
+
 void
 stmf_deregister_scsi_session(stmf_local_port_t *lport, stmf_scsi_session_t *ss)
 {
@@ -3610,6 +3931,10 @@
 	int found = 0;
 	stmf_ic_msg_t *ic_session_dereg;
 	stmf_status_t ic_ret = STMF_FAILURE;
+	stmf_lun_map_t *sm;
+	stmf_i_lu_t *ilu;
+	uint16_t n;
+	stmf_lun_map_ent_t *ent;
 
 	DTRACE_PROBE2(session__offline, stmf_local_port_t *, lport,
 	    stmf_scsi_session_t *, ss);
@@ -3659,14 +3984,45 @@
 	ilport->ilport_nsessions--;
 
 	stmf_irport_deregister(iss->iss_irport);
-	(void) stmf_session_destroy_lun_map(ilport, iss);
+	/*
+	 * to avoid conflict with updating session's map,
+	 * which only grab stmf_lock
+	 */
+	sm = iss->iss_sm;
+	iss->iss_sm = NULL;
+	iss->iss_hg = NULL;
+
 	rw_exit(&ilport->ilport_lock);
-	mutex_exit(&stmf_state.stmf_lock);
+
+	if (sm->lm_nentries) {
+		for (n = 0; n < sm->lm_nentries; n++) {
+			if ((ent = (stmf_lun_map_ent_t *)sm->lm_plus[n])
+			    != NULL) {
+				if (ent->ent_itl_datap) {
+					stmf_do_itl_dereg(ent->ent_lu,
+					    ent->ent_itl_datap,
+					    STMF_ITL_REASON_IT_NEXUS_LOSS);
+				}
+				ilu = (stmf_i_lu_t *)
+				    ent->ent_lu->lu_stmf_private;
+				atomic_dec_32(&ilu->ilu_ref_cnt);
+				kmem_free(sm->lm_plus[n],
+				    sizeof (stmf_lun_map_ent_t));
+			}
+		}
+		kmem_free(sm->lm_plus,
+		    sizeof (stmf_lun_map_ent_t *) * sm->lm_nentries);
+	}
+	kmem_free(sm, sizeof (*sm));
 
 	if (iss->iss_flags & ISS_NULL_TPTID) {
 		stmf_remote_port_free(ss->ss_rport);
 	}
-}
+
+	mutex_exit(&stmf_state.stmf_lock);
+}
+
+
 
 stmf_i_scsi_session_t *
 stmf_session_id_to_issptr(uint64_t session_id, int stay_locked)
@@ -3863,54 +4219,6 @@
 	return (STMF_SUCCESS);
 }
 
-stmf_status_t
-stmf_get_itl_handle(stmf_lu_t *lu, uint8_t *lun, stmf_scsi_session_t *ss,
-    uint64_t session_id, void **itl_handle_retp)
-{
-	stmf_i_scsi_session_t *iss;
-	stmf_lun_map_ent_t *ent;
-	stmf_lun_map_t *lm;
-	stmf_status_t ret;
-	int i;
-	uint16_t n;
-
-	if (ss == NULL) {
-		iss = stmf_session_id_to_issptr(session_id, 1);
-		if (iss == NULL)
-			return (STMF_NOT_FOUND);
-	} else {
-		iss = (stmf_i_scsi_session_t *)ss->ss_stmf_private;
-		rw_enter(iss->iss_lockp, RW_WRITER);
-	}
-
-	ent = NULL;
-	if (lun == NULL) {
-		lm = iss->iss_sm;
-		for (i = 0; i < lm->lm_nentries; i++) {
-			if (lm->lm_plus[i] == NULL)
-				continue;
-			ent = (stmf_lun_map_ent_t *)lm->lm_plus[i];
-			if (ent->ent_lu == lu)
-				break;
-		}
-	} else {
-		n = ((uint16_t)lun[1] | (((uint16_t)(lun[0] & 0x3F)) << 8));
-		ent = (stmf_lun_map_ent_t *)
-		    stmf_get_ent_from_map(iss->iss_sm, n);
-		if (lu && (ent->ent_lu != lu))
-			ent = NULL;
-	}
-	if (ent && ent->ent_itl_datap) {
-		*itl_handle_retp = ent->ent_itl_datap->itl_handle;
-		ret = STMF_SUCCESS;
-	} else {
-		ret = STMF_NOT_FOUND;
-	}
-
-	rw_exit(iss->iss_lockp);
-	return (ret);
-}
-
 stmf_data_buf_t *
 stmf_alloc_dbuf(scsi_task_t *task, uint32_t size, uint32_t *pminsize,
     uint32_t flags)
@@ -4044,13 +4352,29 @@
 	} else {
 		lu = lun_map_ent->ent_lu;
 	}
+
 	ilu = lu->lu_stmf_private;
 	if (ilu->ilu_flags & ILU_RESET_ACTIVE) {
 		rw_exit(iss->iss_lockp);
 		return (NULL);
 	}
-	ASSERT(lu == dlun0 || (ilu->ilu_state != STMF_STATE_OFFLINING &&
-	    ilu->ilu_state != STMF_STATE_OFFLINE));
+
+	/*
+	 * if the LUN is being offlined or is offline then only command
+	 * that are to query the LUN are allowed.  These are handled in
+	 * stmf via the dlun0 vector.  It is possible that a race condition
+	 * will cause other commands to arrive while the lun is in the
+	 * process of being offlined.  Check for those and just let the
+	 * protocol stack handle the error.
+	 */
+	if ((ilu->ilu_state == STMF_STATE_OFFLINING) ||
+	    (ilu->ilu_state == STMF_STATE_OFFLINE)) {
+		if (lu != dlun0) {
+			rw_exit(iss->iss_lockp);
+			return (NULL);
+		}
+	}
+
 	do {
 		if (ilu->ilu_free_tasks == NULL) {
 			new_task = 1;
@@ -4098,15 +4422,6 @@
 			return (NULL);
 		}
 		task->task_lu = lu;
-		l = task->task_lun_no;
-		l[0] = lun[0];
-		l[1] = lun[1];
-		l[2] = lun[2];
-		l[3] = lun[3];
-		l[4] = lun[4];
-		l[5] = lun[5];
-		l[6] = lun[6];
-		l[7] = lun[7];
 		task->task_cdb = (uint8_t *)task->task_port_private;
 		if ((ulong_t)(task->task_cdb) & 7ul) {
 			task->task_cdb = (uint8_t *)(((ulong_t)
@@ -4115,7 +4430,25 @@
 		itask = (stmf_i_scsi_task_t *)task->task_stmf_private;
 		itask->itask_cdb_buf_size = cdb_length;
 		mutex_init(&itask->itask_audit_mutex, NULL, MUTEX_DRIVER, NULL);
-	}
+		mutex_init(&itask->itask_mutex, NULL, MUTEX_DRIVER, NULL);
+	}
+
+	/*
+	 * Since a LUN can be mapped as different LUN ids to different initiator
+	 * groups, we need to set LUN id for a new task and reset LUN id for
+	 * a reused task.
+	 */
+	l = task->task_lun_no;
+	l[0] = lun[0];
+	l[1] = lun[1];
+	l[2] = lun[2];
+	l[3] = lun[3];
+	l[4] = lun[4];
+	l[5] = lun[5];
+	l[6] = lun[6];
+	l[7] = lun[7];
+
+	mutex_enter(&itask->itask_mutex);
 	task->task_session = ss;
 	task->task_lport = lport;
 	task->task_cdb_length = cdb_length_in;
@@ -4125,6 +4458,9 @@
 	itask->itask_lport_read_time = itask->itask_lport_write_time = 0;
 	itask->itask_read_xfer = itask->itask_write_xfer = 0;
 	itask->itask_audit_index = 0;
+	bzero(&itask->itask_audit_records[0],
+	    sizeof (stmf_task_audit_rec_t) * ITASK_TASK_AUDIT_DEPTH);
+	mutex_exit(&itask->itask_mutex);
 
 	if (new_task) {
 		if (lu->lu_task_alloc(task) != STMF_SUCCESS) {
@@ -4165,6 +4501,7 @@
 	return (task);
 }
 
+/* ARGSUSED */
 static void
 stmf_task_lu_free(scsi_task_t *task, stmf_i_scsi_session_t *iss)
 {
@@ -4173,8 +4510,19 @@
 	stmf_i_lu_t *ilu = (stmf_i_lu_t *)task->task_lu->lu_stmf_private;
 
 	ASSERT(rw_lock_held(iss->iss_lockp));
+	ASSERT((itask->itask_flags & ITASK_IN_FREE_LIST) == 0);
+	ASSERT((itask->itask_flags & ITASK_IN_WORKER_QUEUE) == 0);
+	ASSERT((itask->itask_flags & ITASK_IN_TRANSITION) == 0);
+	ASSERT((itask->itask_flags & ITASK_KNOWN_TO_LU) == 0);
+	ASSERT(mutex_owned(&itask->itask_mutex));
+
 	itask->itask_flags = ITASK_IN_FREE_LIST;
+	itask->itask_ncmds = 0;
 	itask->itask_proxy_msg_id = 0;
+	atomic_dec_32(itask->itask_ilu_task_cntr);
+	itask->itask_worker_next = NULL;
+	mutex_exit(&itask->itask_mutex);
+
 	mutex_enter(&ilu->ilu_task_lock);
 	itask->itask_lu_free_next = ilu->ilu_free_tasks;
 	ilu->ilu_free_tasks = itask;
@@ -4182,7 +4530,6 @@
 	if (ilu->ilu_ntasks == ilu->ilu_ntasks_free)
 		cv_signal(&ilu->ilu_offline_pending_cv);
 	mutex_exit(&ilu->ilu_task_lock);
-	atomic_dec_32(itask->itask_ilu_task_cntr);
 }
 
 void
@@ -4259,7 +4606,40 @@
 	}
 }
 
-void
+/*
+ * Since this method is looking to find tasks that are stuck, lost, or senile
+ * it should be more willing to give up scaning during this time period. This
+ * is why mutex_tryenter is now used instead of the standard mutex_enter.
+ * There has been at least one case were the following occurred.
+ *
+ * 1) The iscsit_deferred() method is trying to register a session and
+ *    needs the global lock which is held.
+ * 2) Another thread which holds the global lock is trying to deregister a
+ *    session and needs the session lock.
+ * 3) A third thread is allocating a stmf task that has grabbed the session
+ *    lock and is trying to grab the lun task lock.
+ * 4) There's a timeout thread that has the lun task lock and is trying to grab
+ *    a specific task lock.
+ * 5) The thread that has the task lock is waiting for the ref count to go to
+ *    zero.
+ * 6) There's a task that would drop the count to zero, but it's in the task
+ *    queue waiting to run and is stuck because of #1 is currently block.
+ *
+ * This method is number 4 in the above chain of events. Had this code
+ * originally used mutex_tryenter the chain would have been broken and the
+ * system wouldn't have hung. So, now this method uses mutex_tryenter and
+ * you know why it does so.
+ */
+/* ---- Only one thread calls stmf_do_ilu_timeouts so no lock required ---- */
+typedef struct stmf_bailout_cnt {
+	int	no_ilu_lock;
+	int	no_task_lock;
+	int	tasks_checked;
+} stmf_bailout_cnt_t;
+
+stmf_bailout_cnt_t stmf_bailout;
+
+static void
 stmf_do_ilu_timeouts(stmf_i_lu_t *ilu)
 {
 	clock_t l = ddi_get_lbolt();
@@ -4268,11 +4648,21 @@
 	scsi_task_t *task;
 	uint32_t to;
 
-	mutex_enter(&ilu->ilu_task_lock);
+	if (mutex_tryenter(&ilu->ilu_task_lock) == 0) {
+		stmf_bailout.no_ilu_lock++;
+		return;
+	}
+
 	for (itask = ilu->ilu_tasks; itask != NULL;
 	    itask = itask->itask_lu_next) {
+		if (mutex_tryenter(&itask->itask_mutex) == 0) {
+			stmf_bailout.no_task_lock++;
+			continue;
+		}
+		stmf_bailout.tasks_checked++;
 		if (itask->itask_flags & (ITASK_IN_FREE_LIST |
 		    ITASK_BEING_ABORTED)) {
+			mutex_exit(&itask->itask_mutex);
 			continue;
 		}
 		task = itask->itask_task;
@@ -4280,8 +4670,12 @@
 			to = stmf_default_task_timeout;
 		else
 			to = task->task_timeout;
-		if ((itask->itask_start_time + (to * ps)) > l)
+
+		if ((itask->itask_start_time + (to * ps)) > l) {
+			mutex_exit(&itask->itask_mutex);
 			continue;
+		}
+		mutex_exit(&itask->itask_mutex);
 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
 		    STMF_TIMEOUT, NULL);
 	}
@@ -4336,11 +4730,14 @@
 	stmf_i_scsi_task_t *itask;
 
 	mutex_enter(&ilu->ilu_task_lock);
-
 	for (itask = ilu->ilu_tasks; itask != NULL;
 	    itask = itask->itask_lu_next) {
-		if (itask->itask_flags & ITASK_IN_FREE_LIST)
+		mutex_enter(&itask->itask_mutex);
+		if (itask->itask_flags & ITASK_IN_FREE_LIST) {
+			mutex_exit(&itask->itask_mutex);
 			continue;
+		}
+		mutex_exit(&itask->itask_mutex);
 		if (itask->itask_task == tm_task)
 			continue;
 		stmf_abort(STMF_QUEUE_TASK_ABORT, itask->itask_task, s, NULL);
@@ -4396,9 +4793,12 @@
 	    task->task_stmf_private;
 	stmf_i_scsi_session_t *iss = (stmf_i_scsi_session_t *)
 	    task->task_session->ss_stmf_private;
+	stmf_lu_t *lu = task->task_lu;
 
 	stmf_task_audit(itask, TE_TASK_FREE, CMD_OR_IOF_NA, NULL);
-
+	ASSERT(mutex_owned(&itask->itask_mutex));
+	if ((lu != NULL) && (lu->lu_task_done != NULL))
+		lu->lu_task_done(task);
 	stmf_free_task_bufs(itask, lport);
 	stmf_itl_task_done(itask);
 	DTRACE_PROBE2(stmf__task__end, scsi_task_t *, task,
@@ -4412,7 +4812,14 @@
 		}
 	}
 
+	/*
+	 * To prevent a deadlock condition must release the itask_mutex,
+	 * grab a reader lock on iss_lockp and then reacquire the itask_mutex.
+	 */
+	mutex_exit(&itask->itask_mutex);
 	rw_enter(iss->iss_lockp, RW_READER);
+	mutex_enter(&itask->itask_mutex);
+
 	lport->lport_task_free(task);
 	if (itask->itask_worker) {
 		atomic_dec_32(&stmf_cur_ntasks);
@@ -4433,9 +4840,9 @@
 	    task->task_stmf_private;
 	stmf_i_lu_t *ilu = (stmf_i_lu_t *)task->task_lu->lu_stmf_private;
 	int nv;
-	uint32_t old, new;
+	uint32_t new;
 	uint32_t ct;
-	stmf_worker_t *w, *w1;
+	stmf_worker_t *w;
 	uint8_t tm;
 
 	if (task->task_max_nbufs > 4)
@@ -4445,44 +4852,28 @@
 	ct = atomic_inc_32_nv(&stmf_cur_ntasks);
 
 	/* Select the next worker using round robin */
-	nv = (int)atomic_inc_32_nv((uint32_t *)&stmf_worker_sel_counter);
-	if (nv >= stmf_nworkers_accepting_cmds) {
-		int s = nv;
-		do {
-			nv -= stmf_nworkers_accepting_cmds;
-		} while (nv >= stmf_nworkers_accepting_cmds);
-		if (nv < 0)
-			nv = 0;
-		/* Its ok if this cas fails */
-		(void) atomic_cas_32((uint32_t *)&stmf_worker_sel_counter,
-		    s, nv);
-	}
+	mutex_enter(&stmf_worker_sel_mx);
+	stmf_worker_sel_counter++;
+	if (stmf_worker_sel_counter >= stmf_nworkers)
+		stmf_worker_sel_counter = 0;
+	nv = stmf_worker_sel_counter;
+
+	/* if the selected worker is not idle then bump to the next worker */
+	if (stmf_workers[nv].worker_queue_depth > 0) {
+		stmf_worker_sel_counter++;
+		if (stmf_worker_sel_counter >= stmf_nworkers)
+			stmf_worker_sel_counter = 0;
+		nv = stmf_worker_sel_counter;
+	}
+	mutex_exit(&stmf_worker_sel_mx);
+
 	w = &stmf_workers[nv];
 
-	/*
-	 * A worker can be pinned by interrupt. So select the next one
-	 * if it has lower load.
-	 */
-	if ((nv + 1) >= stmf_nworkers_accepting_cmds) {
-		w1 = stmf_workers;
-	} else {
-		w1 = &stmf_workers[nv + 1];
-	}
-	if (w1->worker_queue_depth < w->worker_queue_depth)
-		w = w1;
-
+	mutex_enter(&itask->itask_mutex);
 	mutex_enter(&w->worker_lock);
-	if (((w->worker_flags & STMF_WORKER_STARTED) == 0) ||
-	    (w->worker_flags & STMF_WORKER_TERMINATE)) {
-		/*
-		 * Maybe we are in the middle of a change. Just go to
-		 * the 1st worker.
-		 */
-		mutex_exit(&w->worker_lock);
-		w = stmf_workers;
-		mutex_enter(&w->worker_lock);
-	}
+
 	itask->itask_worker = w;
+
 	/*
 	 * Track max system load inside the worker as we already have the
 	 * worker lock (no point implementing another lock). The service
@@ -4492,39 +4883,33 @@
 	if (w->worker_max_sys_qdepth_pu < ct)
 		w->worker_max_sys_qdepth_pu = ct;
 
-	do {
-		old = new = itask->itask_flags;
-		new |= ITASK_KNOWN_TO_TGT_PORT | ITASK_IN_WORKER_QUEUE;
-		if (task->task_mgmt_function) {
-			tm = task->task_mgmt_function;
-			if ((tm == TM_TARGET_RESET) ||
-			    (tm == TM_TARGET_COLD_RESET) ||
-			    (tm == TM_TARGET_WARM_RESET)) {
-				new |= ITASK_DEFAULT_HANDLING;
-			}
-		} else if (task->task_cdb[0] == SCMD_REPORT_LUNS) {
+	new = itask->itask_flags;
+	new |= ITASK_KNOWN_TO_TGT_PORT;
+	if (task->task_mgmt_function) {
+		tm = task->task_mgmt_function;
+		if ((tm == TM_TARGET_RESET) ||
+		    (tm == TM_TARGET_COLD_RESET) ||
+		    (tm == TM_TARGET_WARM_RESET)) {
 			new |= ITASK_DEFAULT_HANDLING;
 		}
-		new &= ~ITASK_IN_TRANSITION;
-	} while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+	} else if (task->task_cdb[0] == SCMD_REPORT_LUNS) {
+		new |= ITASK_DEFAULT_HANDLING;
+	}
+	new &= ~ITASK_IN_TRANSITION;
+	itask->itask_flags = new;
 
 	stmf_itl_task_start(itask);
 
-	itask->itask_worker_next = NULL;
-	if (w->worker_task_tail) {
-		w->worker_task_tail->itask_worker_next = itask;
-	} else {
-		w->worker_task_head = itask;
-	}
-	w->worker_task_tail = itask;
-	if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) {
-		w->worker_max_qdepth_pu = w->worker_queue_depth;
-	}
-	/* Measure task waitq time */
-	itask->itask_waitq_enter_timestamp = gethrtime();
-	atomic_inc_32(&w->worker_ref_count);
 	itask->itask_cmd_stack[0] = ITASK_CMD_NEW_TASK;
 	itask->itask_ncmds = 1;
+
+	if ((task->task_flags & TF_INITIAL_BURST) &&
+	    !(curthread->t_flag & T_INTR_THREAD)) {
+		stmf_update_kstat_lu_io(task, dbuf);
+		stmf_update_kstat_lport_io(task, dbuf);
+		stmf_update_kstat_rport_io(task, dbuf);
+	}
+
 	stmf_task_audit(itask, TE_TASK_START, CMD_OR_IOF_NA, dbuf);
 	if (dbuf) {
 		itask->itask_allocated_buf_map = 1;
@@ -4535,13 +4920,10 @@
 		itask->itask_dbufs[0] = NULL;
 	}
 
-	if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0) {
-		w->worker_signal_timestamp = gethrtime();
-		DTRACE_PROBE2(worker__signal, stmf_worker_t *, w,
-		    scsi_task_t *, task);
-		cv_signal(&w->worker_cv);
-	}
+	STMF_ENQUEUE_ITASK(w, itask);
+
 	mutex_exit(&w->worker_lock);
+	mutex_exit(&itask->itask_mutex);
 
 	/*
 	 * This can only happen if during stmf_task_alloc(), ILU_RESET_ACTIVE
@@ -4597,26 +4979,30 @@
 
 	stmf_task_audit(itask, TE_XFER_START, ioflags, dbuf);
 
+	mutex_enter(&itask->itask_mutex);
 	if (ioflags & STMF_IOF_LU_DONE) {
-		uint32_t new, old;
-		do {
-			new = old = itask->itask_flags;
-			if (new & ITASK_BEING_ABORTED)
-				return (STMF_ABORTED);
-			new &= ~ITASK_KNOWN_TO_LU;
-		} while (atomic_cas_32(&itask->itask_flags, old, new) != old);
-	}
-	if (itask->itask_flags & ITASK_BEING_ABORTED)
+		if (itask->itask_flags & ITASK_BEING_ABORTED) {
+			mutex_exit(&itask->itask_mutex);
+			return (STMF_ABORTED);
+		}
+		itask->itask_flags &= ~ITASK_KNOWN_TO_LU;
+	}
+	if ((itask->itask_flags & ITASK_BEING_ABORTED) != 0) {
+		mutex_exit(&itask->itask_mutex);
 		return (STMF_ABORTED);
+	}
+	mutex_exit(&itask->itask_mutex);
+
 #ifdef	DEBUG
 	if (!(ioflags & STMF_IOF_STATS_ONLY) && stmf_drop_buf_counter > 0) {
-		if (atomic_dec_32_nv(&stmf_drop_buf_counter) == 1)
+		if (atomic_dec_32_nv((uint32_t *)&stmf_drop_buf_counter) == 1)
 			return (STMF_SUCCESS);
 	}
 #endif
 
 	stmf_update_kstat_lu_io(task, dbuf);
 	stmf_update_kstat_lport_io(task, dbuf);
+	stmf_update_kstat_rport_io(task, dbuf);
 	stmf_lport_xfer_start(itask, dbuf);
 	if (ioflags & STMF_IOF_STATS_ONLY) {
 		stmf_lport_xfer_done(itask, dbuf);
@@ -4646,7 +5032,7 @@
 	    (stmf_i_scsi_task_t *)task->task_stmf_private;
 	stmf_i_local_port_t *ilport;
 	stmf_worker_t *w = itask->itask_worker;
-	uint32_t new, old;
+	uint32_t new;
 	uint8_t update_queue_flags, free_it, queue_it;
 
 	stmf_lport_xfer_done(itask, dbuf);
@@ -4667,74 +5053,67 @@
 		return;
 	}
 
+	mutex_enter(&itask->itask_mutex);
 	mutex_enter(&w->worker_lock);
-	do {
-		new = old = itask->itask_flags;
-		if (old & ITASK_BEING_ABORTED) {
-			mutex_exit(&w->worker_lock);
-			return;
-		}
+	new = itask->itask_flags;
+	if (itask->itask_flags & ITASK_BEING_ABORTED) {
+		mutex_exit(&w->worker_lock);
+		mutex_exit(&itask->itask_mutex);
+		return;
+	}
+	free_it = 0;
+	if (iof & STMF_IOF_LPORT_DONE) {
+		new &= ~ITASK_KNOWN_TO_TGT_PORT;
+		task->task_completion_status = dbuf->db_xfer_status;
+		free_it = 1;
+	}
+	/*
+	 * If the task is known to LU then queue it. But if
+	 * it is already queued (multiple completions) then
+	 * just update the buffer information by grabbing the
+	 * worker lock. If the task is not known to LU,
+	 * completed/aborted, then see if we need to
+	 * free this task.
+	 */
+	if (itask->itask_flags & ITASK_KNOWN_TO_LU) {
 		free_it = 0;
-		if (iof & STMF_IOF_LPORT_DONE) {
-			new &= ~ITASK_KNOWN_TO_TGT_PORT;
-			task->task_completion_status = dbuf->db_xfer_status;
-			free_it = 1;
-		}
-		/*
-		 * If the task is known to LU then queue it. But if
-		 * it is already queued (multiple completions) then
-		 * just update the buffer information by grabbing the
-		 * worker lock. If the task is not known to LU,
-		 * completed/aborted, then see if we need to
-		 * free this task.
-		 */
-		if (old & ITASK_KNOWN_TO_LU) {
-			free_it = 0;
-			update_queue_flags = 1;
-			if (old & ITASK_IN_WORKER_QUEUE) {
-				queue_it = 0;
-			} else {
-				queue_it = 1;
-				new |= ITASK_IN_WORKER_QUEUE;
-			}
+		update_queue_flags = 1;
+		if (itask->itask_flags & ITASK_IN_WORKER_QUEUE) {
+			queue_it = 0;
 		} else {
-			update_queue_flags = 0;
-			queue_it = 0;
-		}
-	} while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+			queue_it = 1;
+		}
+	} else {
+		update_queue_flags = 0;
+		queue_it = 0;
+	}
+	itask->itask_flags = new;
 
 	if (update_queue_flags) {
 		uint8_t cmd = (dbuf->db_handle << 5) | ITASK_CMD_DATA_XFER_DONE;
 
+		ASSERT((itask->itask_flags & ITASK_IN_FREE_LIST) == 0);
 		ASSERT(itask->itask_ncmds < ITASK_MAX_NCMDS);
+
 		itask->itask_cmd_stack[itask->itask_ncmds++] = cmd;
 		if (queue_it) {
-			itask->itask_worker_next = NULL;
-			if (w->worker_task_tail) {
-				w->worker_task_tail->itask_worker_next = itask;
-			} else {
-				w->worker_task_head = itask;
-			}
-			w->worker_task_tail = itask;
-			/* Measure task waitq time */
-			itask->itask_waitq_enter_timestamp = gethrtime();
-			if (++(w->worker_queue_depth) >
-			    w->worker_max_qdepth_pu) {
-				w->worker_max_qdepth_pu = w->worker_queue_depth;
-			}
-			if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0)
-				cv_signal(&w->worker_cv);
-		}
-	}
+			STMF_ENQUEUE_ITASK(w, itask);
+		}
+		mutex_exit(&w->worker_lock);
+		mutex_exit(&itask->itask_mutex);
+		return;
+	}
+
 	mutex_exit(&w->worker_lock);
-
 	if (free_it) {
 		if ((itask->itask_flags & (ITASK_KNOWN_TO_LU |
 		    ITASK_KNOWN_TO_TGT_PORT | ITASK_IN_WORKER_QUEUE |
 		    ITASK_BEING_ABORTED)) == 0) {
 			stmf_task_free(task);
-		}
-	}
+			return;
+		}
+	}
+	mutex_exit(&itask->itask_mutex);
 }
 
 stmf_status_t
@@ -4747,22 +5126,25 @@
 
 	stmf_task_audit(itask, TE_SEND_STATUS, ioflags, NULL);
 
+	mutex_enter(&itask->itask_mutex);
 	if (ioflags & STMF_IOF_LU_DONE) {
-		uint32_t new, old;
-		do {
-			new = old = itask->itask_flags;
-			if (new & ITASK_BEING_ABORTED)
-				return (STMF_ABORTED);
-			new &= ~ITASK_KNOWN_TO_LU;
-		} while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+		if (itask->itask_flags & ITASK_BEING_ABORTED) {
+			mutex_exit(&itask->itask_mutex);
+			return (STMF_ABORTED);
+		}
+		itask->itask_flags &= ~ITASK_KNOWN_TO_LU;
 	}
 
 	if (!(itask->itask_flags & ITASK_KNOWN_TO_TGT_PORT)) {
+		mutex_exit(&itask->itask_mutex);
 		return (STMF_SUCCESS);
 	}
 
-	if (itask->itask_flags & ITASK_BEING_ABORTED)
+	if (itask->itask_flags & ITASK_BEING_ABORTED) {
+		mutex_exit(&itask->itask_mutex);
 		return (STMF_ABORTED);
+	}
+	mutex_exit(&itask->itask_mutex);
 
 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
 		task->task_status_ctrl = 0;
@@ -4790,66 +5172,57 @@
 	stmf_i_scsi_task_t *itask =
 	    (stmf_i_scsi_task_t *)task->task_stmf_private;
 	stmf_worker_t *w = itask->itask_worker;
-	uint32_t new, old;
+	uint32_t new;
 	uint8_t free_it, queue_it;
 
 	stmf_task_audit(itask, TE_SEND_STATUS_DONE, iof, NULL);
 
+	mutex_enter(&itask->itask_mutex);
 	mutex_enter(&w->worker_lock);
-	do {
-		new = old = itask->itask_flags;
-		if (old & ITASK_BEING_ABORTED) {
-			mutex_exit(&w->worker_lock);
-			return;
-		}
+	new = itask->itask_flags;
+	if (itask->itask_flags & ITASK_BEING_ABORTED) {
+		mutex_exit(&w->worker_lock);
+		mutex_exit(&itask->itask_mutex);
+		return;
+	}
+	free_it = 0;
+	if (iof & STMF_IOF_LPORT_DONE) {
+		new &= ~ITASK_KNOWN_TO_TGT_PORT;
+		free_it = 1;
+	}
+	/*
+	 * If the task is known to LU then queue it. But if
+	 * it is already queued (multiple completions) then
+	 * just update the buffer information by grabbing the
+	 * worker lock. If the task is not known to LU,
+	 * completed/aborted, then see if we need to
+	 * free this task.
+	 */
+	if (itask->itask_flags & ITASK_KNOWN_TO_LU) {
 		free_it = 0;
-		if (iof & STMF_IOF_LPORT_DONE) {
-			new &= ~ITASK_KNOWN_TO_TGT_PORT;
-			free_it = 1;
-		}
-		/*
-		 * If the task is known to LU then queue it. But if
-		 * it is already queued (multiple completions) then
-		 * just update the buffer information by grabbing the
-		 * worker lock. If the task is not known to LU,
-		 * completed/aborted, then see if we need to
-		 * free this task.
-		 */
-		if (old & ITASK_KNOWN_TO_LU) {
-			free_it = 0;
-			queue_it = 1;
-			if (old & ITASK_IN_WORKER_QUEUE) {
-				cmn_err(CE_PANIC, "status completion received"
-				    " when task is already in worker queue "
-				    " task = %p", (void *)task);
-			}
-			new |= ITASK_IN_WORKER_QUEUE;
-		} else {
-			queue_it = 0;
-		}
-	} while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+		queue_it = 1;
+		if (itask->itask_flags & ITASK_IN_WORKER_QUEUE) {
+			cmn_err(CE_PANIC, "status completion received"
+			    " when task is already in worker queue "
+			    " task = %p", (void *)task);
+		}
+	} else {
+		queue_it = 0;
+	}
+	itask->itask_flags = new;
 	task->task_completion_status = s;
 
-
 	if (queue_it) {
 		ASSERT(itask->itask_ncmds < ITASK_MAX_NCMDS);
 		itask->itask_cmd_stack[itask->itask_ncmds++] =
 		    ITASK_CMD_STATUS_DONE;
-		itask->itask_worker_next = NULL;
-		if (w->worker_task_tail) {
-			w->worker_task_tail->itask_worker_next = itask;
-		} else {
-			w->worker_task_head = itask;
-		}
-		w->worker_task_tail = itask;
-		/* Measure task waitq time */
-		itask->itask_waitq_enter_timestamp = gethrtime();
-		if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) {
-			w->worker_max_qdepth_pu = w->worker_queue_depth;
-		}
-		if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0)
-			cv_signal(&w->worker_cv);
-	}
+
+		STMF_ENQUEUE_ITASK(w, itask);
+		mutex_exit(&w->worker_lock);
+		mutex_exit(&itask->itask_mutex);
+		return;
+	}
+
 	mutex_exit(&w->worker_lock);
 
 	if (free_it) {
@@ -4857,12 +5230,14 @@
 		    ITASK_KNOWN_TO_TGT_PORT | ITASK_IN_WORKER_QUEUE |
 		    ITASK_BEING_ABORTED)) == 0) {
 			stmf_task_free(task);
+			return;
 		} else {
 			cmn_err(CE_PANIC, "LU is done with the task but LPORT "
 			    " is not done, itask %p itask_flags %x",
 			    (void *)itask, itask->itask_flags);
 		}
 	}
+	mutex_exit(&itask->itask_mutex);
 }
 
 void
@@ -4871,33 +5246,32 @@
 	stmf_i_scsi_task_t *itask =
 	    (stmf_i_scsi_task_t *)task->task_stmf_private;
 	stmf_worker_t *w = itask->itask_worker;
-	uint32_t new, old;
-
+
+	mutex_enter(&itask->itask_mutex);
 	mutex_enter(&w->worker_lock);
-	do {
-		new = old = itask->itask_flags;
-		if (old & ITASK_BEING_ABORTED) {
-			mutex_exit(&w->worker_lock);
-			return;
-		}
-		if (old & ITASK_IN_WORKER_QUEUE) {
-			cmn_err(CE_PANIC, "task_lu_done received"
-			    " when task is in worker queue "
-			    " task = %p", (void *)task);
-		}
-		new &= ~ITASK_KNOWN_TO_LU;
-	} while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+	if (itask->itask_flags & ITASK_BEING_ABORTED) {
+		mutex_exit(&w->worker_lock);
+		mutex_exit(&itask->itask_mutex);
+		return;
+	}
+	if (itask->itask_flags & ITASK_IN_WORKER_QUEUE) {
+		cmn_err(CE_PANIC, "task_lu_done received"
+		    " when task is in worker queue "
+		    " task = %p", (void *)task);
+	}
+	itask->itask_flags &= ~ITASK_KNOWN_TO_LU;
 
 	mutex_exit(&w->worker_lock);
-
 	if ((itask->itask_flags & (ITASK_KNOWN_TO_LU |
 	    ITASK_KNOWN_TO_TGT_PORT | ITASK_IN_WORKER_QUEUE |
 	    ITASK_BEING_ABORTED)) == 0) {
 		stmf_task_free(task);
+		return;
 	} else {
 		cmn_err(CE_PANIC, "stmf_lu_done should be the last stage but "
 		    " the task is still not done, task = %p", (void *)task);
 	}
+	mutex_exit(&itask->itask_mutex);
 }
 
 void
@@ -4906,54 +5280,41 @@
 	stmf_i_scsi_task_t *itask =
 	    (stmf_i_scsi_task_t *)task->task_stmf_private;
 	stmf_worker_t *w;
-	uint32_t old, new;
 
 	stmf_task_audit(itask, TE_TASK_ABORT, CMD_OR_IOF_NA, NULL);
 
-	do {
-		old = new = itask->itask_flags;
-		if ((old & ITASK_BEING_ABORTED) ||
-		    ((old & (ITASK_KNOWN_TO_TGT_PORT |
-		    ITASK_KNOWN_TO_LU)) == 0)) {
-			return;
-		}
-		new |= ITASK_BEING_ABORTED;
-	} while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+	mutex_enter(&itask->itask_mutex);
+	if ((itask->itask_flags & ITASK_BEING_ABORTED) ||
+	    ((itask->itask_flags & (ITASK_KNOWN_TO_TGT_PORT |
+	    ITASK_KNOWN_TO_LU)) == 0)) {
+		mutex_exit(&itask->itask_mutex);
+		return;
+	}
+	itask->itask_flags |= ITASK_BEING_ABORTED;
 	task->task_completion_status = s;
-	itask->itask_start_time = ddi_get_lbolt();
 
 	if (((w = itask->itask_worker) == NULL) ||
 	    (itask->itask_flags & ITASK_IN_TRANSITION)) {
+		mutex_exit(&itask->itask_mutex);
 		return;
 	}
 
 	/* Queue it and get out */
-	mutex_enter(&w->worker_lock);
 	if (itask->itask_flags & ITASK_IN_WORKER_QUEUE) {
-		mutex_exit(&w->worker_lock);
+		mutex_exit(&itask->itask_mutex);
 		return;
 	}
-	atomic_or_32(&itask->itask_flags, ITASK_IN_WORKER_QUEUE);
-	itask->itask_worker_next = NULL;
-	if (w->worker_task_tail) {
-		w->worker_task_tail->itask_worker_next = itask;
-	} else {
-		w->worker_task_head = itask;
-	}
-	w->worker_task_tail = itask;
-	if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) {
-		w->worker_max_qdepth_pu = w->worker_queue_depth;
-	}
-	if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0)
-		cv_signal(&w->worker_cv);
+	mutex_enter(&w->worker_lock);
+	STMF_ENQUEUE_ITASK(w, itask);
 	mutex_exit(&w->worker_lock);
+	mutex_exit(&itask->itask_mutex);
 }
 
 void
 stmf_abort(int abort_cmd, scsi_task_t *task, stmf_status_t s, void *arg)
 {
 	stmf_i_scsi_task_t *itask = NULL;
-	uint32_t old, new, f, rf;
+	uint32_t f, rf;
 
 	DTRACE_PROBE2(scsi__task__abort, scsi_task_t *, task,
 	    stmf_status_t, s);
@@ -4976,17 +5337,24 @@
 	default:
 		return;
 	}
+
 	itask = (stmf_i_scsi_task_t *)task->task_stmf_private;
+	mutex_enter(&itask->itask_mutex);
 	f |= ITASK_BEING_ABORTED | rf;
-	do {
-		old = new = itask->itask_flags;
-		if ((old & f) != f) {
-			return;
-		}
-		new &= ~rf;
-	} while (atomic_cas_32(&itask->itask_flags, old, new) != old);
-}
-
+
+	if ((itask->itask_flags & f) != f) {
+		mutex_exit(&itask->itask_mutex);
+		return;
+	}
+	itask->itask_flags &= ~rf;
+	mutex_exit(&itask->itask_mutex);
+
+}
+
+/*
+ * NOTE: stmf_abort_task_offline will release and then reacquire the
+ * itask_mutex. This is required to prevent a lock order violation.
+ */
 void
 stmf_task_lu_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof)
 {
@@ -4995,7 +5363,7 @@
 	unsigned long long	st;
 
 	stmf_task_audit(itask, TE_TASK_LU_ABORTED, iof, NULL);
-
+	ASSERT(mutex_owned(&itask->itask_mutex));
 	st = s;	/* gcc fix */
 	if ((s != STMF_ABORT_SUCCESS) && (s != STMF_NOT_FOUND)) {
 		(void) snprintf(info, sizeof (info),
@@ -5015,16 +5383,19 @@
 	stmf_abort_task_offline(task, 1, info);
 }
 
+/*
+ * NOTE: stmf_abort_task_offline will release and then reacquire the
+ * itask_mutex. This is required to prevent a lock order violation.
+ */
 void
 stmf_task_lport_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof)
 {
 	char			info[STMF_CHANGE_INFO_LEN];
 	stmf_i_scsi_task_t	*itask = TASK_TO_ITASK(task);
 	unsigned long long	st;
-	uint32_t		old, new;
-
+
+	ASSERT(mutex_owned(&itask->itask_mutex));
 	stmf_task_audit(itask, TE_TASK_LPORT_ABORTED, iof, NULL);
-
 	st = s;
 	if ((s != STMF_ABORT_SUCCESS) && (s != STMF_NOT_FOUND)) {
 		(void) snprintf(info, sizeof (info),
@@ -5038,18 +5409,24 @@
 		/*
 		 * LPORT abort successfully
 		 */
-		do {
-			old = new = itask->itask_flags;
-			if (!(old & ITASK_KNOWN_TO_TGT_PORT))
-				return;
-			new &= ~ITASK_KNOWN_TO_TGT_PORT;
-		} while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+		atomic_and_32(&itask->itask_flags, ~ITASK_KNOWN_TO_TGT_PORT);
 		return;
 	}
 
 	stmf_abort_task_offline(task, 0, info);
 }
 
+void
+stmf_task_lport_aborted_unlocked(scsi_task_t *task, stmf_status_t s,
+    uint32_t iof)
+{
+	stmf_i_scsi_task_t	*itask = TASK_TO_ITASK(task);
+
+	mutex_enter(&itask->itask_mutex);
+	stmf_task_lport_aborted(task, s, iof);
+	mutex_exit(&itask->itask_mutex);
+}
+
 stmf_status_t
 stmf_task_poll_lu(scsi_task_t *task, uint32_t timeout)
 {
@@ -5058,15 +5435,18 @@
 	stmf_worker_t *w = itask->itask_worker;
 	int i;
 
+	mutex_enter(&itask->itask_mutex);
 	ASSERT(itask->itask_flags & ITASK_KNOWN_TO_LU);
 	mutex_enter(&w->worker_lock);
 	if (itask->itask_ncmds >= ITASK_MAX_NCMDS) {
 		mutex_exit(&w->worker_lock);
+		mutex_exit(&itask->itask_mutex);
 		return (STMF_BUSY);
 	}
 	for (i = 0; i < itask->itask_ncmds; i++) {
 		if (itask->itask_cmd_stack[i] == ITASK_CMD_POLL_LU) {
 			mutex_exit(&w->worker_lock);
+			mutex_exit(&itask->itask_mutex);
 			return (STMF_SUCCESS);
 		}
 	}
@@ -5080,21 +5460,10 @@
 		itask->itask_poll_timeout = ddi_get_lbolt() + t;
 	}
 	if ((itask->itask_flags & ITASK_IN_WORKER_QUEUE) == 0) {
-		itask->itask_worker_next = NULL;
-		if (w->worker_task_tail) {
-			w->worker_task_tail->itask_worker_next = itask;
-		} else {
-			w->worker_task_head = itask;
-		}
-		w->worker_task_tail = itask;
-		if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) {
-			w->worker_max_qdepth_pu = w->worker_queue_depth;
-		}
-		atomic_or_32(&itask->itask_flags, ITASK_IN_WORKER_QUEUE);
-		if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0)
-			cv_signal(&w->worker_cv);
+		STMF_ENQUEUE_ITASK(w, itask);
 	}
 	mutex_exit(&w->worker_lock);
+	mutex_exit(&itask->itask_mutex);
 	return (STMF_SUCCESS);
 }
 
@@ -5106,15 +5475,18 @@
 	stmf_worker_t *w = itask->itask_worker;
 	int i;
 
+	mutex_enter(&itask->itask_mutex);
 	ASSERT(itask->itask_flags & ITASK_KNOWN_TO_TGT_PORT);
 	mutex_enter(&w->worker_lock);
 	if (itask->itask_ncmds >= ITASK_MAX_NCMDS) {
 		mutex_exit(&w->worker_lock);
+		mutex_exit(&itask->itask_mutex);
 		return (STMF_BUSY);
 	}
 	for (i = 0; i < itask->itask_ncmds; i++) {
 		if (itask->itask_cmd_stack[i] == ITASK_CMD_POLL_LPORT) {
 			mutex_exit(&w->worker_lock);
+			mutex_exit(&itask->itask_mutex);
 			return (STMF_SUCCESS);
 		}
 	}
@@ -5128,20 +5500,10 @@
 		itask->itask_poll_timeout = ddi_get_lbolt() + t;
 	}
 	if ((itask->itask_flags & ITASK_IN_WORKER_QUEUE) == 0) {
-		itask->itask_worker_next = NULL;
-		if (w->worker_task_tail) {
-			w->worker_task_tail->itask_worker_next = itask;
-		} else {
-			w->worker_task_head = itask;
-		}
-		w->worker_task_tail = itask;
-		if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) {
-			w->worker_max_qdepth_pu = w->worker_queue_depth;
-		}
-		if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0)
-			cv_signal(&w->worker_cv);
+		STMF_ENQUEUE_ITASK(w, itask);
 	}
 	mutex_exit(&w->worker_lock);
+	mutex_exit(&itask->itask_mutex);
 	return (STMF_SUCCESS);
 }
 
@@ -5152,22 +5514,22 @@
 	stmf_lu_t		*lu;
 	stmf_local_port_t	*lport;
 	unsigned long long	 ret;
-	uint32_t		 old, new;
+	uint32_t		 new = 0;
 	uint8_t			 call_lu_abort, call_port_abort;
 	char			 info[STMF_CHANGE_INFO_LEN];
 
 	lu = task->task_lu;
 	lport = task->task_lport;
-	do {
-		old = new = itask->itask_flags;
-		if ((old & (ITASK_KNOWN_TO_LU | ITASK_LU_ABORT_CALLED)) ==
-		    ITASK_KNOWN_TO_LU) {
-			new |= ITASK_LU_ABORT_CALLED;
-			call_lu_abort = 1;
-		} else {
-			call_lu_abort = 0;
-		}
-	} while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+	mutex_enter(&itask->itask_mutex);
+	new = itask->itask_flags;
+	if ((itask->itask_flags & (ITASK_KNOWN_TO_LU |
+	    ITASK_LU_ABORT_CALLED)) == ITASK_KNOWN_TO_LU) {
+		new |= ITASK_LU_ABORT_CALLED;
+		call_lu_abort = 1;
+	} else {
+		call_lu_abort = 0;
+	}
+	itask->itask_flags = new;
 
 	if (call_lu_abort) {
 		if ((itask->itask_flags & ITASK_DEFAULT_HANDLING) == 0) {
@@ -5195,16 +5557,22 @@
 		}
 	}
 
-	do {
-		old = new = itask->itask_flags;
-		if ((old & (ITASK_KNOWN_TO_TGT_PORT |
-		    ITASK_TGT_PORT_ABORT_CALLED)) == ITASK_KNOWN_TO_TGT_PORT) {
-			new |= ITASK_TGT_PORT_ABORT_CALLED;
-			call_port_abort = 1;
-		} else {
-			call_port_abort = 0;
-		}
-	} while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+	/*
+	 * NOTE: After the call to either stmf_abort_task_offline() or
+	 * stmf_task_lu_abort() the itask_mutex was dropped and reacquired
+	 * to avoid a deadlock situation with stmf_state.stmf_lock.
+	 */
+
+	new = itask->itask_flags;
+	if ((itask->itask_flags & (ITASK_KNOWN_TO_TGT_PORT |
+	    ITASK_TGT_PORT_ABORT_CALLED)) == ITASK_KNOWN_TO_TGT_PORT) {
+		new |= ITASK_TGT_PORT_ABORT_CALLED;
+		call_port_abort = 1;
+	} else {
+		call_port_abort = 0;
+	}
+	itask->itask_flags = new;
+
 	if (call_port_abort) {
 		ret = lport->lport_abort(lport, STMF_LPORT_ABORT_TASK, task, 0);
 		if ((ret == STMF_ABORT_SUCCESS) || (ret == STMF_NOT_FOUND)) {
@@ -5228,6 +5596,7 @@
 			stmf_abort_task_offline(itask->itask_task, 0, info);
 		}
 	}
+	mutex_exit(&itask->itask_mutex);
 }
 
 stmf_status_t
@@ -5559,7 +5928,7 @@
 
 	mutex_enter(&stmf_state.stmf_lock);
 	/* check if any ports are standby and create second group */
-	for (ilport = stmf_state.stmf_ilportlist; ilport;
+	for (ilport = stmf_state.stmf_ilportlist; ilport != NULL;
 	    ilport = ilport->ilport_next) {
 		if (ilport->ilport_standby == 1) {
 			nports_standby++;
@@ -5568,14 +5937,38 @@
 		}
 	}
 
-	/* The spec only allows for 255 ports to be reported per group */
+	/*
+	 * Section 6.25 REPORT TARGET PORT GROUPS
+	 * The reply can contain many group replies. Each group is limited
+	 * to 255 port identifiers so we'll need to limit the amount of
+	 * data returned. For FC ports there's a physical limitation in
+	 * machines that make reaching 255 ports very, very unlikely. For
+	 * iSCSI on the other hand recent changes mean the port count could
+	 * be as high as 4096 (current limit). Limiting the data returned
+	 * for iSCSI isn't as bad as it sounds. This information is only
+	 * important for ALUA, which isn't supported for iSCSI. iSCSI uses
+	 * virtual IP addresses to deal with node fail over in a cluster.
+	 */
 	nports = min(nports, 255);
 	nports_standby = min(nports_standby, 255);
+
+	/*
+	 * The first 4 bytes of the returned data is the length. The
+	 * size of the Target Port Group header is 8 bytes. So, that's where
+	 * the 12 comes from. Each port entry is 4 bytes in size.
+	 */
 	sz = (nports * 4) + 12;
-	if (nports_standby && ilu_alua) {
+	if (nports_standby != 0 && ilu_alua != 0) {
+		/* --- Only add 8 bytes since it's just the Group header ---- */
 		sz += (nports_standby * 4) + 8;
 	}
-	asz = sz + sizeof (*xd) - 4;
+
+	/*
+	 * The stmf_xfer_data structure contains 4 bytes that will be
+	 * part of the data buffer. So, subtract the 4 bytes from the space
+	 * needed.
+	 */
+	asz = sizeof (*xd) + sz - 4;
 	xd = (stmf_xfer_data_t *)kmem_zalloc(asz, KM_NOSLEEP);
 	if (xd == NULL) {
 		mutex_exit(&stmf_state.stmf_lock);
@@ -5586,8 +5979,11 @@
 
 	p = xd->buf;
 
+	/* ---- length values never include the field that holds the size --- */
 	*((uint32_t *)p) = BE_32(sz - 4);
 	p += 4;
+
+	/* ---- Now fill out the first Target Group header ---- */
 	p[0] = 0x80;	/* PREF */
 	p[1] = 5;	/* AO_SUP, S_SUP */
 	if (stmf_state.stmf_alua_node == 1) {
@@ -5597,15 +5993,16 @@
 	}
 	p[7] = nports & 0xff;
 	p += 8;
-	for (ilport = stmf_state.stmf_ilportlist; ilport;
+	for (ilport = stmf_state.stmf_ilportlist; ilport != NULL && nports != 0;
 	    ilport = ilport->ilport_next) {
 		if (ilport->ilport_standby == 1) {
 			continue;
 		}
 		((uint16_t *)p)[1] = BE_16(ilport->ilport_rtpid);
 		p += 4;
-	}
-	if (nports_standby && ilu_alua) {
+		nports--;
+	}
+	if (nports_standby != 0 && ilu_alua != 0) {
 		p[0] = 0x02;	/* Non PREF, Standby */
 		p[1] = 5;	/* AO_SUP, S_SUP */
 		if (stmf_state.stmf_alua_node == 1) {
@@ -5615,13 +6012,14 @@
 		}
 		p[7] = nports_standby & 0xff;
 		p += 8;
-		for (ilport = stmf_state.stmf_ilportlist; ilport;
-		    ilport = ilport->ilport_next) {
+		for (ilport = stmf_state.stmf_ilportlist; ilport != NULL &&
+		    nports_standby != 0; ilport = ilport->ilport_next) {
 			if (ilport->ilport_standby == 0) {
 				continue;
 			}
 			((uint16_t *)p)[1] = BE_16(ilport->ilport_rtpid);
 			p += 4;
+			nports_standby--;
 		}
 	}
 
@@ -5862,7 +6260,9 @@
 	stmf_xfer_data_t *xd;
 	uint32_t sz, minsz;
 
+	mutex_enter(&itask->itask_mutex);
 	itask->itask_flags |= ITASK_DEFAULT_HANDLING;
+
 	task->task_cmd_xfer_length =
 	    ((((uint32_t)task->task_cdb[6]) << 24) |
 	    (((uint32_t)task->task_cdb[7]) << 16) |
@@ -5874,6 +6274,7 @@
 		task->task_expected_xfer_length =
 		    task->task_cmd_xfer_length;
 	}
+	mutex_exit(&itask->itask_mutex);
 
 	if (task->task_cmd_xfer_length == 0) {
 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
@@ -5979,7 +6380,9 @@
 	 * was responsible for setting the ILU_RESET_ACTIVE. In case this
 	 * task itself gets aborted, we will clear ILU_RESET_ACTIVE.
 	 */
+	mutex_enter(&itask->itask_mutex);
 	itask->itask_flags |= ITASK_DEFAULT_HANDLING | ITASK_CAUSING_LU_RESET;
+	mutex_exit(&itask->itask_mutex);
 
 	/* Initiatiate abort on all commands on this LU except this one */
 	stmf_abort(STMF_QUEUE_ABORT_LU, task, STMF_ABORTED, task->task_lu);
@@ -6055,8 +6458,10 @@
 	}
 
 	/* ok, start the damage */
+	mutex_enter(&itask->itask_mutex);
 	itask->itask_flags |= ITASK_DEFAULT_HANDLING |
 	    ITASK_CAUSING_TARGET_RESET;
+	mutex_exit(&itask->itask_mutex);
 	for (i = 0; i < lm->lm_nentries; i++) {
 		if (lm->lm_plus[i] == NULL)
 			continue;
@@ -6114,34 +6519,46 @@
 stmf_worker_init()
 {
 	uint32_t i;
+	stmf_worker_t *w;
 
 	/* Make local copy of global tunables */
-	stmf_i_max_nworkers = stmf_max_nworkers;
-	stmf_i_min_nworkers = stmf_min_nworkers;
-
+
+	/*
+	 * Allow workers to be scaled down to a very low number for cases
+	 * where the load is light.  If the number of threads gets below
+	 * 4 assume it is a mistake and force the threads back to a
+	 * reasonable number.  The low limit of 4 is simply legacy and
+	 * may be too low.
+	 */
 	ASSERT(stmf_workers == NULL);
-	if (stmf_i_min_nworkers < 4) {
-		stmf_i_min_nworkers = 4;
-	}
-	if (stmf_i_max_nworkers < stmf_i_min_nworkers) {
-		stmf_i_max_nworkers = stmf_i_min_nworkers;
-	}
+	if (stmf_nworkers < 4) {
+		stmf_nworkers = 64;
+	}
+
 	stmf_workers = (stmf_worker_t *)kmem_zalloc(
-	    sizeof (stmf_worker_t) * stmf_i_max_nworkers, KM_SLEEP);
-	for (i = 0; i < stmf_i_max_nworkers; i++) {
+	    sizeof (stmf_worker_t) * stmf_nworkers, KM_SLEEP);
+	for (i = 0; i < stmf_nworkers; i++) {
 		stmf_worker_t *w = &stmf_workers[i];
 		mutex_init(&w->worker_lock, NULL, MUTEX_DRIVER, NULL);
 		cv_init(&w->worker_cv, NULL, CV_DRIVER, NULL);
 	}
-	stmf_worker_mgmt_delay = drv_usectohz(20 * 1000);
 	stmf_workers_state = STMF_WORKERS_ENABLED;
 
-	/* Workers will be started by stmf_worker_mgmt() */
+	/* Check if we are starting */
+	if (stmf_nworkers_cur < stmf_nworkers - 1) {
+		for (i = stmf_nworkers_cur; i < stmf_nworkers; i++) {
+			w = &stmf_workers[i];
+			w->worker_tid = thread_create(NULL, 0, stmf_worker_task,
+			    (void *)&stmf_workers[i], 0, &p0, TS_RUN,
+			    minclsyspri);
+			stmf_nworkers_accepting_cmds++;
+		}
+		return;
+	}
 
 	/* Lets wait for atleast one worker to start */
 	while (stmf_nworkers_cur == 0)
 		delay(drv_usectohz(20 * 1000));
-	stmf_worker_mgmt_delay = drv_usectohz(3 * 1000 * 1000);
 }
 
 stmf_status_t
@@ -6154,7 +6571,6 @@
 		return (STMF_SUCCESS);
 	ASSERT(stmf_workers);
 	stmf_workers_state = STMF_WORKERS_DISABLED;
-	stmf_worker_mgmt_delay = drv_usectohz(20 * 1000);
 	cv_signal(&stmf_state.stmf_cv);
 
 	sb = ddi_get_lbolt() + drv_usectohz(10 * 1000 * 1000);
@@ -6166,12 +6582,12 @@
 		}
 		delay(drv_usectohz(100 * 1000));
 	}
-	for (i = 0; i < stmf_i_max_nworkers; i++) {
+	for (i = 0; i < stmf_nworkers; i++) {
 		stmf_worker_t *w = &stmf_workers[i];
 		mutex_destroy(&w->worker_lock);
 		cv_destroy(&w->worker_cv);
 	}
-	kmem_free(stmf_workers, sizeof (stmf_worker_t) * stmf_i_max_nworkers);
+	kmem_free(stmf_workers, sizeof (stmf_worker_t) * stmf_nworkers);
 	stmf_workers = NULL;
 
 	return (STMF_SUCCESS);
@@ -6188,7 +6604,6 @@
 	stmf_lu_t *lu;
 	clock_t wait_timer = 0;
 	clock_t wait_ticks, wait_delta = 0;
-	uint32_t old, new;
 	uint8_t curcmd;
 	uint8_t abort_free;
 	uint8_t wait_queue;
@@ -6200,7 +6615,9 @@
 	DTRACE_PROBE1(worker__create, stmf_worker_t, w);
 	mutex_enter(&w->worker_lock);
 	w->worker_flags |= STMF_WORKER_STARTED | STMF_WORKER_ACTIVE;
-stmf_worker_loop:;
+	atomic_inc_32(&stmf_nworkers_cur);
+
+stmf_worker_loop:
 	if ((w->worker_ref_count == 0) &&
 	    (w->worker_flags & STMF_WORKER_TERMINATE)) {
 		w->worker_flags &= ~(STMF_WORKER_STARTED |
@@ -6208,10 +6625,13 @@
 		w->worker_tid = NULL;
 		mutex_exit(&w->worker_lock);
 		DTRACE_PROBE1(worker__destroy, stmf_worker_t, w);
+		atomic_dec_32(&stmf_nworkers_cur);
 		thread_exit();
 	}
+
 	/* CONSTCOND */
 	while (1) {
+		/* worker lock is held at this point */
 		dec_qdepth = 0;
 		if (wait_timer && (ddi_get_lbolt() >= wait_timer)) {
 			wait_timer = 0;
@@ -6229,42 +6649,41 @@
 				    NULL;
 			}
 		}
-		if ((itask = w->worker_task_head) == NULL) {
+
+		STMF_DEQUEUE_ITASK(w, itask);
+		if (itask == NULL)
 			break;
-		}
+
+		ASSERT((itask->itask_flags & ITASK_IN_FREE_LIST) == 0);
 		task = itask->itask_task;
 		DTRACE_PROBE2(worker__active, stmf_worker_t, w,
 		    scsi_task_t *, task);
-		w->worker_task_head = itask->itask_worker_next;
-		if (w->worker_task_head == NULL)
-			w->worker_task_tail = NULL;
-
 		wait_queue = 0;
 		abort_free = 0;
+		mutex_exit(&w->worker_lock);
+		mutex_enter(&itask->itask_mutex);
+		mutex_enter(&w->worker_lock);
+
 		if (itask->itask_ncmds > 0) {
 			curcmd = itask->itask_cmd_stack[itask->itask_ncmds - 1];
 		} else {
 			ASSERT(itask->itask_flags & ITASK_BEING_ABORTED);
 		}
-		do {
-			old = itask->itask_flags;
-			if (old & ITASK_BEING_ABORTED) {
-				itask->itask_ncmds = 1;
-				curcmd = itask->itask_cmd_stack[0] =
-				    ITASK_CMD_ABORT;
-				goto out_itask_flag_loop;
-			} else if ((curcmd & ITASK_CMD_MASK) ==
-			    ITASK_CMD_NEW_TASK) {
-				/*
-				 * set ITASK_KSTAT_IN_RUNQ, this flag
-				 * will not reset until task completed
-				 */
-				new = old | ITASK_KNOWN_TO_LU |
-				    ITASK_KSTAT_IN_RUNQ;
-			} else {
-				goto out_itask_flag_loop;
-			}
-		} while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+		if (itask->itask_flags & ITASK_BEING_ABORTED) {
+			itask->itask_ncmds = 1;
+			curcmd = itask->itask_cmd_stack[0] =
+			    ITASK_CMD_ABORT;
+			goto out_itask_flag_loop;
+		} else if ((curcmd & ITASK_CMD_MASK) == ITASK_CMD_NEW_TASK) {
+			/*
+			 * set ITASK_KSTAT_IN_RUNQ, this flag
+			 * will not reset until task completed
+			 */
+			itask->itask_flags |= ITASK_KNOWN_TO_LU |
+			    ITASK_KSTAT_IN_RUNQ;
+		} else {
+			goto out_itask_flag_loop;
+		}
 
 out_itask_flag_loop:
 
@@ -6323,24 +6742,29 @@
 			lu = task->task_lu;
 		else
 			lu = dlun0;
+
 		dbuf = itask->itask_dbufs[ITASK_CMD_BUF_NDX(curcmd)];
 		mutex_exit(&w->worker_lock);
 		curcmd &= ITASK_CMD_MASK;
 		stmf_task_audit(itask, TE_PROCESS_CMD, curcmd, dbuf);
+		mutex_exit(&itask->itask_mutex);
+
 		switch (curcmd) {
 		case ITASK_CMD_NEW_TASK:
 			iss = (stmf_i_scsi_session_t *)
 			    task->task_session->ss_stmf_private;
 			stmf_itl_lu_new_task(itask);
 			if (iss->iss_flags & ISS_LUN_INVENTORY_CHANGED) {
-				if (stmf_handle_cmd_during_ic(itask))
+				if (stmf_handle_cmd_during_ic(itask)) {
 					break;
+				}
 			}
 #ifdef	DEBUG
 			if (stmf_drop_task_counter > 0) {
-				if (atomic_dec_32_nv(&stmf_drop_task_counter)
-				    == 1)
+				if (atomic_dec_32_nv(
+				    (uint32_t *)&stmf_drop_task_counter) == 1) {
 					break;
+				}
 			}
 #endif
 			DTRACE_PROBE1(scsi__task__start, scsi_task_t *, task);
@@ -6354,6 +6778,7 @@
 			break;
 		case ITASK_CMD_ABORT:
 			if (abort_free) {
+				mutex_enter(&itask->itask_mutex);
 				stmf_task_free(task);
 			} else {
 				stmf_do_task_abort(task);
@@ -6372,6 +6797,7 @@
 		/* case ITASK_CMD_XFER_DATA: */
 			break;
 		}
+
 		mutex_enter(&w->worker_lock);
 		if (dec_qdepth) {
 			w->worker_queue_depth--;
@@ -6399,146 +6825,6 @@
 	goto stmf_worker_loop;
 }
 
-void
-stmf_worker_mgmt()
-{
-	int i;
-	int workers_needed;
-	uint32_t qd;
-	clock_t tps, d = 0;
-	uint32_t cur_max_ntasks = 0;
-	stmf_worker_t *w;
-
-	/* Check if we are trying to increase the # of threads */
-	for (i = stmf_nworkers_cur; i < stmf_nworkers_needed; i++) {
-		if (stmf_workers[i].worker_flags & STMF_WORKER_STARTED) {
-			stmf_nworkers_cur++;
-			stmf_nworkers_accepting_cmds++;
-		} else {
-			/* Wait for transition to complete */
-			return;
-		}
-	}
-	/* Check if we are trying to decrease the # of workers */
-	for (i = (stmf_nworkers_cur - 1); i >= stmf_nworkers_needed; i--) {
-		if ((stmf_workers[i].worker_flags & STMF_WORKER_STARTED) == 0) {
-			stmf_nworkers_cur--;
-			/*
-			 * stmf_nworkers_accepting_cmds has already been
-			 * updated by the request to reduce the # of workers.
-			 */
-		} else {
-			/* Wait for transition to complete */
-			return;
-		}
-	}
-	/* Check if we are being asked to quit */
-	if (stmf_workers_state != STMF_WORKERS_ENABLED) {
-		if (stmf_nworkers_cur) {
-			workers_needed = 0;
-			goto worker_mgmt_trigger_change;
-		}
-		return;
-	}
-	/* Check if we are starting */
-	if (stmf_nworkers_cur < stmf_i_min_nworkers) {
-		workers_needed = stmf_i_min_nworkers;
-		goto worker_mgmt_trigger_change;
-	}
-
-	tps = drv_usectohz(1 * 1000 * 1000);
-	if ((stmf_wm_last != 0) &&
-	    ((d = ddi_get_lbolt() - stmf_wm_last) > tps)) {
-		qd = 0;
-		for (i = 0; i < stmf_nworkers_accepting_cmds; i++) {
-			qd += stmf_workers[i].worker_max_qdepth_pu;
-			stmf_workers[i].worker_max_qdepth_pu = 0;
-			if (stmf_workers[i].worker_max_sys_qdepth_pu >
-			    cur_max_ntasks) {
-				cur_max_ntasks =
-				    stmf_workers[i].worker_max_sys_qdepth_pu;
-			}
-			stmf_workers[i].worker_max_sys_qdepth_pu = 0;
-		}
-	}
-	stmf_wm_last = ddi_get_lbolt();
-	if (d <= tps) {
-		/* still ramping up */
-		return;
-	}
-	/* max qdepth cannot be more than max tasks */
-	if (qd > cur_max_ntasks)
-		qd = cur_max_ntasks;
-
-	/* See if we have more workers */
-	if (qd < stmf_nworkers_accepting_cmds) {
-		/*
-		 * Since we dont reduce the worker count right away, monitor
-		 * the highest load during the scale_down_delay.
-		 */
-		if (qd > stmf_worker_scale_down_qd)
-			stmf_worker_scale_down_qd = qd;
-		if (stmf_worker_scale_down_timer == 0) {
-			stmf_worker_scale_down_timer = ddi_get_lbolt() +
-			    drv_usectohz(stmf_worker_scale_down_delay *
-			    1000 * 1000);
-			return;
-		}
-		if (ddi_get_lbolt() < stmf_worker_scale_down_timer) {
-			return;
-		}
-		/* Its time to reduce the workers */
-		if (stmf_worker_scale_down_qd < stmf_i_min_nworkers)
-			stmf_worker_scale_down_qd = stmf_i_min_nworkers;
-		if (stmf_worker_scale_down_qd > stmf_i_max_nworkers)
-			stmf_worker_scale_down_qd = stmf_i_max_nworkers;
-		if (stmf_worker_scale_down_qd == stmf_nworkers_cur)
-			return;
-		workers_needed = stmf_worker_scale_down_qd;
-		stmf_worker_scale_down_qd = 0;
-		goto worker_mgmt_trigger_change;
-	}
-	stmf_worker_scale_down_qd = 0;
-	stmf_worker_scale_down_timer = 0;
-	if (qd > stmf_i_max_nworkers)
-		qd = stmf_i_max_nworkers;
-	if (qd < stmf_i_min_nworkers)
-		qd = stmf_i_min_nworkers;
-	if (qd == stmf_nworkers_cur)
-		return;
-	workers_needed = qd;
-	goto worker_mgmt_trigger_change;
-
-	/* NOTREACHED */
-	return;
-
-worker_mgmt_trigger_change:
-	ASSERT(workers_needed != stmf_nworkers_cur);
-	if (workers_needed > stmf_nworkers_cur) {
-		stmf_nworkers_needed = workers_needed;
-		for (i = stmf_nworkers_cur; i < workers_needed; i++) {
-			w = &stmf_workers[i];
-			w->worker_tid = thread_create(NULL, 0, stmf_worker_task,
-			    (void *)&stmf_workers[i], 0, &p0, TS_RUN,
-			    minclsyspri);
-		}
-		return;
-	}
-	/* At this point we know that we are decreasing the # of workers */
-	stmf_nworkers_accepting_cmds = workers_needed;
-	stmf_nworkers_needed = workers_needed;
-	/* Signal the workers that its time to quit */
-	for (i = (stmf_nworkers_cur - 1); i >= stmf_nworkers_needed; i--) {
-		w = &stmf_workers[i];
-		ASSERT(w && (w->worker_flags & STMF_WORKER_STARTED));
-		mutex_enter(&w->worker_lock);
-		w->worker_flags |= STMF_WORKER_TERMINATE;
-		if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0)
-			cv_signal(&w->worker_cv);
-		mutex_exit(&w->worker_lock);
-	}
-}
-
 /*
  * Fills out a dbuf from stmf_xfer_data_t (contained in the db_lu_private).
  * If all the data has been filled out, frees the xd and makes
@@ -6867,6 +7153,12 @@
 	cmn_err(CE_WARN, "stmf_dlun0_ctl called with cmd %x", cmd);
 }
 
+/* ARGSUSED */
+void
+stmf_dlun0_task_done(struct scsi_task *task)
+{
+}
+
 void
 stmf_dlun_init()
 {
@@ -6881,6 +7173,7 @@
 	dlun0->lu_abort = stmf_dlun0_abort;
 	dlun0->lu_task_poll = stmf_dlun0_task_poll;
 	dlun0->lu_ctl = stmf_dlun0_ctl;
+	dlun0->lu_task_done = stmf_dlun0_task_done;
 
 	ilu = (stmf_i_lu_t *)dlun0->lu_stmf_private;
 	ilu->ilu_cur_task_cntr = &ilu->ilu_task_cntr1;
@@ -7144,17 +7437,28 @@
 	stmf_itl_data_t	*itl = itask->itask_itl_datap;
 	scsi_task_t	*task = itask->itask_task;
 	stmf_i_lu_t	*ilu;
+	stmf_i_scsi_session_t   *iss =
+	    itask->itask_task->task_session->ss_stmf_private;
+	stmf_i_remote_port_t    *irport = iss->iss_irport;
 
 	if (itl == NULL || task->task_lu == dlun0)
 		return;
 	ilu = (stmf_i_lu_t *)task->task_lu->lu_stmf_private;
 	itask->itask_start_timestamp = gethrtime();
+	itask->itask_xfer_done_timestamp = 0;
 	if (ilu->ilu_kstat_io != NULL) {
 		mutex_enter(ilu->ilu_kstat_io->ks_lock);
 		stmf_update_kstat_lu_q(itask->itask_task, kstat_waitq_enter);
 		mutex_exit(ilu->ilu_kstat_io->ks_lock);
 	}
 
+	if (irport->irport_kstat_estat != NULL) {
+		if (task->task_flags & TF_READ_DATA)
+			atomic_inc_32(&irport->irport_nread_tasks);
+		else if (task->task_flags & TF_WRITE_DATA)
+			atomic_inc_32(&irport->irport_nwrite_tasks);
+	}
+
 	stmf_update_kstat_lport_q(itask->itask_task, kstat_waitq_enter);
 }
 
@@ -7193,6 +7497,8 @@
 	if (ilu->ilu_kstat_io == NULL)
 		return;
 
+	stmf_update_kstat_rport_estat(task);
+
 	mutex_enter(ilu->ilu_kstat_io->ks_lock);
 
 	if (itask->itask_flags & ITASK_KSTAT_IN_RUNQ) {
@@ -7206,6 +7512,23 @@
 	}
 }
 
+void
+stmf_lu_xfer_done(scsi_task_t *task, boolean_t read, hrtime_t elapsed_time)
+{
+	stmf_i_scsi_task_t *itask = task->task_stmf_private;
+
+	if (task->task_lu == dlun0)
+		return;
+
+	if (read) {
+		atomic_add_64((uint64_t *)&itask->itask_lu_read_time,
+		    elapsed_time);
+	} else {
+		atomic_add_64((uint64_t *)&itask->itask_lu_write_time,
+		    elapsed_time);
+	}
+}
+
 static void
 stmf_lport_xfer_start(stmf_i_scsi_task_t *itask, stmf_data_buf_t *dbuf)
 {
@@ -7233,7 +7556,9 @@
 	xfer_size = (dbuf->db_xfer_status == STMF_SUCCESS) ?
 	    dbuf->db_data_size : 0;
 
-	elapsed_time = gethrtime() - dbuf->db_xfer_start_timestamp;
+	itask->itask_xfer_done_timestamp = gethrtime();
+	elapsed_time = itask->itask_xfer_done_timestamp -
+	    dbuf->db_xfer_start_timestamp;
 	if (dbuf->db_flags & DB_DIRECTION_TO_RPORT) {
 		atomic_add_64((uint64_t *)&itask->itask_lport_read_time,
 		    elapsed_time);
@@ -7257,7 +7582,8 @@
 {
 	if (stmf_state.stmf_svc_flags & STMF_SVC_STARTED)
 		return;
-	stmf_state.stmf_svc_tailp = &stmf_state.stmf_svc_active;
+	list_create(&stmf_state.stmf_svc_list, sizeof (stmf_svc_req_t),
+	    offsetof(stmf_svc_req_t, svc_list_entry));
 	stmf_state.stmf_svc_taskq = ddi_taskq_create(0, "STMF_SVC_TASKQ", 1,
 	    TASKQ_DEFAULTPRI, 0);
 	(void) ddi_taskq_dispatch(stmf_state.stmf_svc_taskq,
@@ -7286,6 +7612,7 @@
 	if (i == 500)
 		return (STMF_BUSY);
 
+	list_destroy(&stmf_state.stmf_svc_list);
 	ddi_taskq_destroy(stmf_state.stmf_svc_taskq);
 
 	return (STMF_SUCCESS);
@@ -7311,7 +7638,7 @@
 	stmf_state.stmf_svc_flags |= STMF_SVC_STARTED | STMF_SVC_ACTIVE;
 
 	while (!(stmf_state.stmf_svc_flags & STMF_SVC_TERMINATE)) {
-		if (stmf_state.stmf_svc_active == NULL) {
+		if (list_is_empty(&stmf_state.stmf_svc_list)) {
 			stmf_svc_timeout(&clks);
 			continue;
 		}
@@ -7322,11 +7649,9 @@
 		 * so it should be safe to access it without holding the
 		 * stmf state lock.
 		 */
-		req = stmf_state.stmf_svc_active;
-		stmf_state.stmf_svc_active = req->svc_next;
-
-		if (stmf_state.stmf_svc_active == NULL)
-			stmf_state.stmf_svc_tailp = &stmf_state.stmf_svc_active;
+		req = list_remove_head(&stmf_state.stmf_svc_list);
+		if (req == NULL)
+			continue;
 
 		switch (req->svc_cmd) {
 		case STMF_CMD_LPORT_ONLINE:
@@ -7398,7 +7723,7 @@
 			    ddi_get_lbolt() + drv_usectohz(1*1000*1000);
 		}
 
-		if (stmf_state.stmf_svc_active)
+		if (!list_is_empty(&stmf_state.stmf_svc_list))
 			return;
 	}
 
@@ -7423,17 +7748,10 @@
 			    ddi_get_lbolt() + drv_usectohz(1*1000*1000);
 		}
 
-		if (stmf_state.stmf_svc_active)
+		if (!list_is_empty(&stmf_state.stmf_svc_list))
 			return;
 	}
 
-	/* Check if we need to run worker_mgmt */
-	if (ddi_get_lbolt() > clks->worker_delay) {
-		stmf_worker_mgmt();
-		clks->worker_delay = ddi_get_lbolt() +
-		    stmf_worker_mgmt_delay;
-	}
-
 	/* Check if any active session got its 1st LUN */
 	if (stmf_state.stmf_process_initial_luns) {
 		int stmf_level = 0;
@@ -7560,11 +7878,9 @@
 		    info->st_additional_info);
 	}
 	req->svc_req_alloc_size = s;
-	req->svc_next = NULL;
 
 	mutex_enter(&stmf_state.stmf_lock);
-	*stmf_state.stmf_svc_tailp = req;
-	stmf_state.stmf_svc_tailp = &req->svc_next;
+	list_insert_tail(&stmf_state.stmf_svc_list, req);
 	if ((stmf_state.stmf_svc_flags & STMF_SVC_ACTIVE) == 0) {
 		cv_signal(&stmf_state.stmf_cv);
 	}
@@ -7574,29 +7890,15 @@
 static void
 stmf_svc_kill_obj_requests(void *obj)
 {
-	stmf_svc_req_t *prev_req = NULL;
-	stmf_svc_req_t *next_req;
 	stmf_svc_req_t *req;
 
 	ASSERT(mutex_owned(&stmf_state.stmf_lock));
 
-	for (req = stmf_state.stmf_svc_active; req != NULL; req = next_req) {
-		next_req = req->svc_next;
-
+	for (req = list_head(&stmf_state.stmf_svc_list); req != NULL;
+	    req = list_next(&stmf_state.stmf_svc_list, req)) {
 		if (req->svc_obj == obj) {
-			if (prev_req != NULL)
-				prev_req->svc_next = next_req;
-			else
-				stmf_state.stmf_svc_active = next_req;
-
-			if (next_req == NULL)
-				stmf_state.stmf_svc_tailp = (prev_req != NULL) ?
-				    &prev_req->svc_next :
-				    &stmf_state.stmf_svc_active;
-
+			list_remove(&stmf_state.stmf_svc_list, req);
 			kmem_free(req, req->svc_req_alloc_size);
-		} else {
-			prev_req = req;
 		}
 	}
 }
@@ -7642,6 +7944,14 @@
 	mutex_exit(&trace_buf_lock);
 }
 
+/*
+ * NOTE: Due to lock order problems that are not possible to fix this
+ * method drops and reacquires the itask_mutex around the call to stmf_ctl.
+ * Another possible work around would be to use a dispatch queue and have
+ * the call to stmf_ctl run on another thread that's not holding the
+ * itask_mutex. The problem with that approach is that it's difficult to
+ * determine what impact an asynchronous change would have on the system state.
+ */
 static void
 stmf_abort_task_offline(scsi_task_t *task, int offline_lu, char *info)
 {
@@ -7649,10 +7959,14 @@
 	void				*ctl_private;
 	uint32_t			ctl_cmd;
 	int				msg = 0;
+	stmf_i_scsi_task_t		*itask =
+	    (stmf_i_scsi_task_t *)task->task_stmf_private;
 
 	stmf_trace("FROM STMF", "abort_task_offline called for %s: %s",
 	    offline_lu ? "LU" : "LPORT", info ? info : "no additional info");
 	change_info.st_additional_info = info;
+	ASSERT(mutex_owned(&itask->itask_mutex));
+
 	if (offline_lu) {
 		change_info.st_rflags = STMF_RFLAG_RESET |
 		    STMF_RFLAG_LU_ABORT;
@@ -7680,7 +7994,9 @@
 		    offline_lu ? "LU" : "LPORT", info ? info :
 		    "<no additional info>");
 	}
+	mutex_exit(&itask->itask_mutex);
 	(void) stmf_ctl(ctl_cmd, ctl_private, &change_info);
+	mutex_enter(&itask->itask_mutex);
 }
 
 static char
@@ -7737,7 +8053,7 @@
 			return (B_FALSE);
 		break;
 
-	case PROTOCOL_iSCSI:
+	case PROTOCOL_iSCSI: /* CSTYLED */
 		{
 		iscsi_transport_id_t	*iscsiid;
 		uint16_t		adn_len, name_len;
@@ -7782,7 +8098,7 @@
 	case PROTOCOL_SAS:
 	case PROTOCOL_ADT:
 	case PROTOCOL_ATAPI:
-	default:
+	default: /* CSTYLED */
 		{
 		stmf_dflt_scsi_tptid_t *dflttpd;
 
@@ -7810,7 +8126,7 @@
 
 	switch (tpd1->protocol_id) {
 
-	case PROTOCOL_iSCSI:
+	case PROTOCOL_iSCSI: /* CSTYLED */
 		{
 		iscsi_transport_id_t *iscsitpd1, *iscsitpd2;
 		uint16_t len;
@@ -7825,7 +8141,7 @@
 		}
 		break;
 
-	case PROTOCOL_SRP:
+	case PROTOCOL_SRP: /* CSTYLED */
 		{
 		scsi_srp_transport_id_t *srptpd1, *srptpd2;
 
@@ -7837,7 +8153,7 @@
 		}
 		break;
 
-	case PROTOCOL_FIBRE_CHANNEL:
+	case PROTOCOL_FIBRE_CHANNEL: /* CSTYLED */
 		{
 		scsi_fc_transport_id_t *fctpd1, *fctpd2;
 
@@ -7855,7 +8171,7 @@
 	case PROTOCOL_SAS:
 	case PROTOCOL_ADT:
 	case PROTOCOL_ATAPI:
-	default:
+	default: /* CSTYLED */
 		{
 		stmf_dflt_scsi_tptid_t *dflt1, *dflt2;
 		uint16_t len;
@@ -7988,3 +8304,74 @@
 	 */
 	kmem_free(rpt, sizeof (stmf_remote_port_t) + rpt->rport_tptid_sz);
 }
+
+stmf_lu_t *
+stmf_check_and_hold_lu(scsi_task_t *task, uint8_t *guid)
+{
+	stmf_i_scsi_session_t *iss;
+	stmf_lu_t *lu;
+	stmf_i_lu_t *ilu = NULL;
+	stmf_lun_map_t *sm;
+	stmf_lun_map_ent_t *lme;
+	int i;
+
+	iss = (stmf_i_scsi_session_t *)task->task_session->ss_stmf_private;
+	rw_enter(iss->iss_lockp, RW_READER);
+	sm = iss->iss_sm;
+
+	for (i = 0; i < sm->lm_nentries; i++) {
+		if (sm->lm_plus[i] == NULL)
+			continue;
+		lme = (stmf_lun_map_ent_t *)sm->lm_plus[i];
+		lu = lme->ent_lu;
+		if (bcmp(lu->lu_id->ident, guid, 16) == 0) {
+			break;
+		}
+		lu = NULL;
+	}
+
+	if (!lu) {
+		goto hold_lu_done;
+	}
+
+	ilu = lu->lu_stmf_private;
+	mutex_enter(&ilu->ilu_task_lock);
+	ilu->ilu_additional_ref++;
+	mutex_exit(&ilu->ilu_task_lock);
+
+hold_lu_done:
+	rw_exit(iss->iss_lockp);
+	return (lu);
+}
+
+void
+stmf_release_lu(stmf_lu_t *lu)
+{
+	stmf_i_lu_t *ilu;
+
+	ilu = lu->lu_stmf_private;
+	ASSERT(ilu->ilu_additional_ref != 0);
+	mutex_enter(&ilu->ilu_task_lock);
+	ilu->ilu_additional_ref--;
+	mutex_exit(&ilu->ilu_task_lock);
+}
+
+int
+stmf_is_task_being_aborted(scsi_task_t *task)
+{
+	stmf_i_scsi_task_t *itask;
+
+	itask = (stmf_i_scsi_task_t *)task->task_stmf_private;
+	if (itask->itask_flags & ITASK_BEING_ABORTED)
+		return (1);
+
+	return (0);
+}
+
+volatile boolean_t stmf_pgr_aptpl_always = B_FALSE;
+
+boolean_t
+stmf_is_pgr_aptpl_always()
+{
+	return (stmf_pgr_aptpl_always);
+}
--- a/usr/src/uts/common/io/comstar/stmf/stmf_impl.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/stmf/stmf_impl.h	Thu Nov 14 07:43:52 2019 -0700
@@ -20,6 +20,8 @@
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 #ifndef _STMF_IMPL_H
@@ -89,7 +91,7 @@
 	uint32_t	ilu_ntasks;	 /* # of tasks in the ilu_task list */
 	uint32_t	ilu_ntasks_free;	/* # of tasks that are free */
 	uint32_t	ilu_ntasks_min_free; /* # minimal free tasks */
-	uint32_t	rsvd1;
+	uint32_t	ilu_additional_ref;
 	uint32_t	ilu_proxy_registered;
 	uint64_t	ilu_reg_msgid;
 	struct stmf_i_scsi_task	*ilu_tasks;
@@ -164,6 +166,18 @@
 	int			irport_refcnt;
 	id_t			irport_instance;
 	avl_node_t		irport_ln;
+	/* number of active read tasks */
+	uint32_t		irport_nread_tasks;
+	/* number of active write tasks */
+	uint32_t		irport_nwrite_tasks;
+	hrtime_t		irport_rdstart_timestamp;
+	hrtime_t		irport_rddone_timestamp;
+	hrtime_t		irport_wrstart_timestamp;
+	hrtime_t		irport_wrdone_timestamp;
+	kstat_t			*irport_kstat_info;
+	kstat_t			*irport_kstat_io;
+	kstat_t			*irport_kstat_estat;	/* extended stats */
+	boolean_t		irport_info_dirty;
 } stmf_i_remote_port_t;
 
 /*
@@ -237,6 +251,7 @@
 	scsi_task_t		*itask_task;
 	uint32_t		itask_alloc_size;
 	uint32_t		itask_flags;
+	kmutex_t		itask_mutex; /* protects flags and lists */
 	uint64_t		itask_proxy_msg_id;
 	stmf_data_buf_t		*itask_proxy_dbuf;
 	struct stmf_worker	*itask_worker;
@@ -245,7 +260,6 @@
 	struct stmf_i_scsi_task	*itask_lu_next;
 	struct stmf_i_scsi_task	*itask_lu_prev;
 	struct stmf_i_scsi_task	*itask_lu_free_next;
-	struct stmf_i_scsi_task	*itask_abort_next;
 	struct stmf_itl_data	*itask_itl_datap;
 	clock_t			itask_start_time;	/* abort and normal */
 	/* For now we only support 4 parallel buffers. Should be enough. */
@@ -259,6 +273,7 @@
 	/* Task profile data */
 	hrtime_t		itask_start_timestamp;
 	hrtime_t		itask_done_timestamp;
+	hrtime_t		itask_xfer_done_timestamp;
 	hrtime_t		itask_waitq_enter_timestamp;
 	hrtime_t		itask_waitq_time;
 	hrtime_t		itask_lu_read_time;
@@ -275,6 +290,41 @@
 #define	ITASK_DEFAULT_ABORT_TIMEOUT	5
 
 /*
+ * Common code to encode an itask onto the worker_task queue is placed
+ * in this macro to simplify future maintenace activity.
+ */
+#define	STMF_ENQUEUE_ITASK(w, i) \
+	ASSERT((itask->itask_flags & ITASK_IN_FREE_LIST) == 0); \
+	ASSERT(mutex_owned(&itask->itask_mutex)); \
+	ASSERT(mutex_owned(&w->worker_lock)); \
+	i->itask_worker_next = NULL; \
+	if (w->worker_task_tail) { \
+		w->worker_task_tail->itask_worker_next = i; \
+	} else { \
+		w->worker_task_head = i; \
+	} \
+	w->worker_task_tail = i; \
+	if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) { \
+		w->worker_max_qdepth_pu = w->worker_queue_depth; \
+	} \
+	atomic_inc_32(&w->worker_ref_count); \
+	atomic_or_32(&itask->itask_flags, ITASK_IN_WORKER_QUEUE); \
+	i->itask_waitq_enter_timestamp = gethrtime(); \
+	if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0) \
+		cv_signal(&w->worker_cv);
+
+#define	STMF_DEQUEUE_ITASK(w, itask) \
+	ASSERT(mutex_owned(&w->worker_lock)); \
+	if ((itask = w->worker_task_head) != NULL) { \
+		w->worker_task_head = itask->itask_worker_next; \
+		if (w->worker_task_head == NULL) { \
+			w->worker_task_tail = NULL; \
+		} \
+	} else { \
+		w->worker_task_tail = NULL; \
+	}
+
+/*
  * itask_flags
  */
 #define	ITASK_IN_FREE_LIST		0x0001
--- a/usr/src/uts/common/io/comstar/stmf/stmf_state.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/stmf/stmf_state.h	Thu Nov 14 07:43:52 2019 -0700
@@ -22,7 +22,7 @@
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 /*
- * Copyright 2011, Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2016, Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 #ifndef	_STMF_STATE_H
@@ -62,8 +62,7 @@
 	uint32_t		stmf_svc_flags;
 	stmf_i_lu_t		*stmf_svc_ilu_draining;
 	stmf_i_lu_t		*stmf_svc_ilu_timing;
-	struct stmf_svc_req	*stmf_svc_active;
-	struct stmf_svc_req	**stmf_svc_tailp;
+	list_t			stmf_svc_list;
 
 	stmf_id_list_t		stmf_hg_list;
 	stmf_id_list_t		stmf_tg_list;
@@ -86,7 +85,7 @@
  * different types of services) are added to the stmf_svc_thread.
  */
 typedef struct stmf_svc_req {
-	struct stmf_svc_req		*svc_next;
+	list_node_t			svc_list_entry;
 	int				svc_req_alloc_size;
 	int				svc_cmd;
 	void				*svc_obj;
--- a/usr/src/uts/common/io/comstar/stmf/stmf_stats.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/comstar/stmf/stmf_stats.h	Thu Nov 14 07:43:52 2019 -0700
@@ -29,6 +29,8 @@
 extern "C" {
 #endif
 
+#include <sys/portif.h>
+
 typedef struct stmf_kstat_itl_info {
 	kstat_named_t		i_rport_name;
 	kstat_named_t		i_rport_alias;
@@ -58,6 +60,21 @@
 	kstat_named_t		i_protocol;
 } stmf_kstat_tgt_info_t;
 
+#define	STMF_RPORT_INFO_LIMIT 8
+
+typedef struct stmf_kstat_rport_info {
+    kstat_named_t i_rport_name;
+    kstat_named_t i_protocol;
+    kstat_named_t i_rport_uinfo[STMF_RPORT_INFO_LIMIT];
+} stmf_kstat_rport_info_t;
+
+typedef struct stmf_kstat_rport_estat {
+	kstat_named_t i_rport_read_latency;
+	kstat_named_t i_rport_write_latency;
+	kstat_named_t i_nread_tasks;
+	kstat_named_t i_nwrite_tasks;
+} stmf_kstat_rport_estat_t;
+
 #ifdef	__cplusplus
 }
 #endif
--- a/usr/src/uts/common/io/idm/idm.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/idm/idm.c	Thu Nov 14 07:43:52 2019 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
  */
 
 #include <sys/cpuvar.h>
@@ -61,7 +62,7 @@
 static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf);
 static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf);
 static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf);
-static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
+static stmf_status_t idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
     idm_abort_type_t abort_type);
 static void idm_task_aborted(idm_task_t *idt, idm_status_t status);
 static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen,
@@ -1523,11 +1524,12 @@
 	idm_refcnt_rele(&idt->idt_refcnt);
 }
 
-void
+stmf_status_t
 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
 {
 	idm_task_t	*task;
 	int		idx;
+	stmf_status_t	s = STMF_SUCCESS;
 
 	/*
 	 * Passing NULL as the task indicates that all tasks
@@ -1549,7 +1551,7 @@
 			    (task->idt_state != TASK_COMPLETE) &&
 			    (task->idt_ic == ic)) {
 				rw_exit(&idm.idm_taskid_table_lock);
-				idm_task_abort_one(ic, task, abort_type);
+				s = idm_task_abort_one(ic, task, abort_type);
 				rw_enter(&idm.idm_taskid_table_lock, RW_READER);
 			} else
 				mutex_exit(&task->idt_mutex);
@@ -1557,8 +1559,9 @@
 		rw_exit(&idm.idm_taskid_table_lock);
 	} else {
 		mutex_enter(&idt->idt_mutex);
-		idm_task_abort_one(ic, idt, abort_type);
+		s = idm_task_abort_one(ic, idt, abort_type);
 	}
+	return (s);
 }
 
 static void
@@ -1589,9 +1592,11 @@
  * Abort the idm task.
  *    Caller must hold the task mutex, which will be released before return
  */
-static void
+static stmf_status_t
 idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
 {
+	stmf_status_t	s = STMF_SUCCESS;
+
 	/* Caller must hold connection mutex */
 	ASSERT(mutex_owned(&idt->idt_mutex));
 	switch (idt->idt_state) {
@@ -1610,7 +1615,7 @@
 			 */
 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
 			    &idm_task_abort_unref_cb);
-			return;
+			return (s);
 		case AT_INTERNAL_ABORT:
 		case AT_TASK_MGMT_ABORT:
 			idt->idt_state = TASK_ABORTING;
@@ -1624,7 +1629,7 @@
 			 */
 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
 			    &idm_task_abort_unref_cb);
-			return;
+			return (s);
 		default:
 			ASSERT(0);
 		}
@@ -1664,7 +1669,7 @@
 			 */
 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
 			    &idm_task_abort_unref_cb);
-			return;
+			return (s);
 		default:
 			ASSERT(0);
 		}
@@ -1683,17 +1688,15 @@
 		}
 		break;
 	case TASK_COMPLETE:
-		/*
-		 * In this case, let it go.  The status has already been
-		 * sent (which may or may not get successfully transmitted)
-		 * and we don't want to end up in a race between completing
-		 * the status PDU and marking the task suspended.
-		 */
+		idm_refcnt_wait_ref(&idt->idt_refcnt);
+		s = STMF_ABORT_SUCCESS;
 		break;
 	default:
 		ASSERT(0);
 	}
 	mutex_exit(&idt->idt_mutex);
+
+	return (s);
 }
 
 static void
@@ -2267,6 +2270,29 @@
 	mutex_exit(&refcnt->ir_mutex);
 }
 
+/*
+ * used to determine the status of the refcnt.
+ *
+ * if refcnt is 0 return is 0
+ * if refcnt is negative return is -1
+ * if refcnt > 0 and no waiters return is 1
+ * if refcnt > 0 and waiters return is 2
+ */
+int
+idm_refcnt_is_held(idm_refcnt_t *refcnt)
+{
+	if (refcnt->ir_refcnt < 0)
+		return (-1);
+
+	if (refcnt->ir_refcnt == 0)
+		return (0);
+
+	if (refcnt->ir_waiting == REF_NOWAIT && refcnt->ir_refcnt > 0)
+		return (1);
+
+	return (2);
+}
+
 void
 idm_conn_hold(idm_conn_t *ic)
 {
--- a/usr/src/uts/common/io/idm/idm_conn_sm.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/idm/idm_conn_sm.c	Thu Nov 14 07:43:52 2019 -0700
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #include <sys/cpuvar.h>
@@ -1246,7 +1246,7 @@
 		}
 
 		/* Stop executing active tasks */
-		idm_task_abort(ic, NULL, AT_INTERNAL_SUSPEND);
+		(void) idm_task_abort(ic, NULL, AT_INTERNAL_SUSPEND);
 
 		/* Start logout timer */
 		IDM_SM_TIMER_CHECK(ic);
@@ -1302,7 +1302,7 @@
 		}
 
 		/* Abort all tasks */
-		idm_task_abort(ic, NULL, AT_INTERNAL_ABORT);
+		(void) idm_task_abort(ic, NULL, AT_INTERNAL_ABORT);
 
 		/*
 		 * Handle terminal state actions on the global taskq so
--- a/usr/src/uts/common/io/idm/idm_so.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/idm/idm_so.c	Thu Nov 14 07:43:52 2019 -0700
@@ -24,6 +24,7 @@
  */
 /*
  * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
  */
 
@@ -859,6 +860,11 @@
 	/*
 	 * Check actual AHS length against the amount available in the buffer
 	 */
+	if ((IDM_PDU_OPCODE(pdu) != ISCSI_OP_SCSI_CMD) &&
+	    (bhs->hlength != 0)) {
+		/* ---- hlength is only only valid for SCSI Request ---- */
+		return (IDM_STATUS_FAIL);
+	}
 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
 	    (bhs->hlength * sizeof (uint32_t));
 	pdu->isp_datalen = n2h24(bhs->dlength);
@@ -868,7 +874,7 @@
 		    "idm_sorecvhdr: invalid data segment length");
 		return (IDM_STATUS_FAIL);
 	}
-	if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
+	if (bhs->hlength > IDM_SORX_WIRE_AHSLEN) {
 		/* Allocate a new header segment and change the callback */
 		new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
 		bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c	Thu Nov 14 07:43:52 2019 -0700
@@ -21,6 +21,7 @@
 /*
  * Copyright 2000 by Cisco Systems, Inc.  All rights reserved.
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright 2019 Joshua M. Clulow <josh@sysmgr.org>
  *
  * iSCSI Software Initiator
@@ -848,6 +849,7 @@
 		icmdp->cmd_sig			= ISCSI_SIG_CMD;
 		icmdp->cmd_state		= ISCSI_CMD_STATE_FREE;
 		icmdp->cmd_lun			= ilp;
+		iscsi_lun_hold(ilp);
 		icmdp->cmd_type			= ISCSI_CMD_TYPE_SCSI;
 		/* add the report lun addressing type on to the lun */
 		icmdp->cmd_un.scsi.lun		= ilp->lun_addr_type << 14;
@@ -1091,6 +1093,7 @@
 	ASSERT(icmdp->cmd_sig == ISCSI_SIG_CMD);
 	ASSERT(icmdp->cmd_state == ISCSI_CMD_STATE_FREE);
 
+	iscsi_lun_rele(icmdp->cmd_lun);
 	mutex_destroy(&icmdp->cmd_mutex);
 	cv_destroy(&icmdp->cmd_completion);
 	scsi_hba_pkt_free(ap, pkt);
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h	Thu Nov 14 07:43:52 2019 -0700
@@ -22,7 +22,7 @@
 /*
  * Copyright 2000 by Cisco Systems, Inc.  All rights reserved.
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2014-2015 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #ifndef _ISCSI_H
@@ -549,6 +549,8 @@
 	uchar_t			lun_pid[ISCSI_INQ_PID_BUF_LEN];	/* Product ID */
 
 	uchar_t			lun_type;
+	kmutex_t		lun_mutex;
+	int			lun_refcnt;
 } iscsi_lun_t;
 
 #define	ISCSI_LUN_STATE_CLEAR	    0		/* used to clear all states */
@@ -1328,8 +1330,9 @@
 /* iscsi_lun.c */
 iscsi_status_t iscsi_lun_create(iscsi_sess_t *isp, uint16_t lun_num,
     uint8_t lun_addr_type, struct scsi_inquiry *inq, char *guid);
-iscsi_status_t iscsi_lun_destroy(iscsi_hba_t *ihp,
-    iscsi_lun_t *ilp);
+void iscsi_lun_hold(iscsi_lun_t *ilp);
+void iscsi_lun_rele(iscsi_lun_t *ilp);
+iscsi_status_t iscsi_lun_destroy(iscsi_hba_t *ihp, iscsi_lun_t *ilp);
 void iscsi_lun_online(iscsi_hba_t *ihp,
     iscsi_lun_t *ilp);
 iscsi_status_t iscsi_lun_offline(iscsi_hba_t *ihp,
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c	Thu Nov 14 07:43:52 2019 -0700
@@ -22,6 +22,7 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  *
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
  * iSCSI command interfaces
  */
 
@@ -796,8 +797,8 @@
 				 */
 				ISCSI_CMD_SET_REASON_STAT(
 				    t_icmdp, CMD_TIMEOUT, STAT_ABORTED);
-				idm_task_abort(icp->conn_ic, t_icmdp->cmd_itp,
-				    AT_TASK_MGMT_ABORT);
+				(void) idm_task_abort(icp->conn_ic,
+				    t_icmdp->cmd_itp, AT_TASK_MGMT_ABORT);
 			} else {
 				cv_broadcast(&t_icmdp->cmd_completion);
 			}
@@ -942,7 +943,7 @@
 					 */
 					ISCSI_CMD_SET_REASON_STAT(t_icmdp,
 					    CMD_TIMEOUT, STAT_TIMEOUT);
-					idm_task_abort(icp->conn_ic,
+					(void) idm_task_abort(icp->conn_ic,
 					    t_icmdp->cmd_itp,
 					    AT_TASK_MGMT_ABORT);
 				} else {
@@ -1027,7 +1028,7 @@
 			mutex_exit(&icp->conn_queue_idm_aborting.mutex);
 			ISCSI_CMD_SET_REASON_STAT(icmdp,
 			    CMD_TRAN_ERR, icmdp->cmd_un.scsi.pkt_stat);
-			idm_task_abort(icp->conn_ic, icmdp->cmd_itp,
+			(void) idm_task_abort(icp->conn_ic, icmdp->cmd_itp,
 			    AT_TASK_MGMT_ABORT);
 			break;
 
@@ -1208,7 +1209,7 @@
 		ISCSI_CMD_SET_REASON_STAT(icmdp,
 		    CMD_TRAN_ERR, icmdp->cmd_un.scsi.pkt_stat);
 
-		idm_task_abort(icmdp->cmd_conn->conn_ic, icmdp->cmd_itp,
+		(void) idm_task_abort(icmdp->cmd_conn->conn_ic, icmdp->cmd_itp,
 		    AT_TASK_MGMT_ABORT);
 		break;
 
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c	Thu Nov 14 07:43:52 2019 -0700
@@ -21,7 +21,7 @@
 /*
  * Copyright 2000 by Cisco Systems, Inc.  All rights reserved.
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2011, 2015 Nexenta Systems, Inc. All rights reserved.
  *
  * iSCSI Pseudo HBA Driver
  */
@@ -3511,11 +3511,14 @@
 
 	icp = isp->sess_conn_list;
 	while (icp != NULL) {
-		if (icp->conn_timeout == B_TRUE) {
+		mutex_enter(&icp->conn_state_mutex);
+		if ((icp->conn_timeout == B_TRUE) &&
+		    (icp->conn_state_idm_connected == B_TRUE)) {
 			/* timeout on this connect detected */
 			idm_ini_conn_disconnect(icp->conn_ic);
 			icp->conn_timeout = B_FALSE;
 		}
+		mutex_exit(&icp->conn_state_mutex);
 		icp = icp->conn_next;
 	}
 	rw_exit(&isp->sess_conn_list_rwlock);
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_lun.c	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_lun.c	Thu Nov 14 07:43:52 2019 -0700
@@ -21,12 +21,17 @@
 /*
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
- *
+ */
+
+/*
+ * Copyright 2019 Nexenta Systems, Inc.
+ */
+
+/*
  * iSCSI logical unit interfaces
  */
 
 #include "iscsi.h"
-#include <sys/fs/dv_node.h>	/* devfs_clean */
 #include <sys/bootprops.h>
 #include <sys/sysevent/eventdefs.h>
 #include <sys/sysevent/dev.h>
@@ -123,6 +128,11 @@
 	ilp->lun_addr	    = addr;
 	ilp->lun_type	    = inq->inq_dtype & DTYPE_MASK;
 	ilp->lun_oid	    = oid_tmp;
+	/*
+	 * Setting refcnt to 1 is the first hold for the LUN structure.
+	 */
+	ilp->lun_refcnt	    = 1;
+	mutex_init(&ilp->lun_mutex, NULL, MUTEX_DRIVER, NULL);
 
 	bcopy(inq->inq_vid, ilp->lun_vid, sizeof (inq->inq_vid));
 	bcopy(inq->inq_pid, ilp->lun_pid, sizeof (inq->inq_pid));
@@ -189,6 +199,7 @@
 			kmem_free(ilp->lun_guid, ilp->lun_guid_size);
 			ilp->lun_guid = NULL;
 		}
+		mutex_destroy(&ilp->lun_mutex);
 		kmem_free(ilp, sizeof (iscsi_lun_t));
 	} else {
 		ilp->lun_state &= ISCSI_LUN_STATE_CLEAR;
@@ -215,6 +226,81 @@
 	return (rtn);
 }
 
+void
+iscsi_lun_hold(iscsi_lun_t *ilp)
+{
+	mutex_enter(&ilp->lun_mutex);
+	/*
+	 * By design lun_refcnt should never be zero when this routine
+	 * is called. When the LUN is created the refcnt is set to 1.
+	 * If iscsi_lun_rele is called and the refcnt goes to zero the
+	 * structure will be freed so this method shouldn't be called
+	 * afterwards.
+	 */
+	ASSERT(ilp->lun_refcnt > 0);
+	ilp->lun_refcnt++;
+	mutex_exit(&ilp->lun_mutex);
+}
+
+void
+iscsi_lun_rele(iscsi_lun_t *ilp)
+{
+	ASSERT(ilp != NULL);
+
+	mutex_enter(&ilp->lun_mutex);
+	ASSERT(ilp->lun_refcnt > 0);
+	if (--ilp->lun_refcnt == 0) {
+		iscsi_sess_t		*isp;
+
+		isp = ilp->lun_sess;
+		ASSERT(isp != NULL);
+
+		/* ---- release its memory ---- */
+		kmem_free(ilp->lun_addr, (strlen((char *)isp->sess_name) +
+		    ADDR_EXT_SIZE + 1));
+
+		if (ilp->lun_guid != NULL) {
+			kmem_free(ilp->lun_guid, ilp->lun_guid_size);
+		}
+		mutex_destroy(&ilp->lun_mutex);
+		kmem_free(ilp, sizeof (iscsi_lun_t));
+	} else {
+		mutex_exit(&ilp->lun_mutex);
+	}
+}
+
+/*
+ * iscsi_lun_cmd_cancel -- as the name implies, cancel all commands for the lun
+ *
+ * This code is similar to the timeout function with a lot less checking of
+ * state before sending the ABORT event for commands on the pending queue.
+ *
+ * This function is only used by iscsi_lun_destroy().
+ */
+static void
+iscsi_lun_cmd_cancel(iscsi_lun_t *ilp)
+{
+	iscsi_sess_t	*isp;
+	iscsi_cmd_t	*icmdp, *nicmdp;
+
+	isp = ilp->lun_sess;
+	rw_enter(&isp->sess_state_rwlock, RW_READER);
+	mutex_enter(&isp->sess_queue_pending.mutex);
+	for (icmdp = isp->sess_queue_pending.head;
+	    icmdp; icmdp = nicmdp) {
+		nicmdp = icmdp->cmd_next;
+
+		/*
+		 * For commands on the pending queue we can go straight
+		 * to and abort request which will free the command
+		 * and call back to the complete function.
+		 */
+		iscsi_cmd_state_machine(icmdp, ISCSI_CMD_EVENT_E4, isp);
+	}
+	mutex_exit(&isp->sess_queue_pending.mutex);
+	rw_exit(&isp->sess_state_rwlock);
+}
+
 /*
  * iscsi_lun_destroy - offline and remove lun
  *
@@ -240,6 +326,9 @@
 	isp = ilp->lun_sess;
 	ASSERT(isp != NULL);
 
+	/* flush all outstanding commands first */
+	iscsi_lun_cmd_cancel(ilp);
+
 	/* attempt to offline and free solaris node */
 	status = iscsi_lun_offline(ihp, ilp, B_TRUE);
 
@@ -269,16 +358,7 @@
 			}
 		}
 
-		/* release its memory */
-		kmem_free(ilp->lun_addr, (strlen((char *)isp->sess_name) +
-		    ADDR_EXT_SIZE + 1));
-		ilp->lun_addr = NULL;
-		if (ilp->lun_guid != NULL) {
-			kmem_free(ilp->lun_guid, ilp->lun_guid_size);
-			ilp->lun_guid = NULL;
-		}
-		kmem_free(ilp, sizeof (iscsi_lun_t));
-		ilp = NULL;
+		iscsi_lun_rele(ilp);
 	}
 
 	return (status);
@@ -641,56 +721,18 @@
 {
 	iscsi_status_t		status		= ISCSI_STATUS_SUCCESS;
 	int			circ		= 0;
-	dev_info_t		*cdip, *pdip;
-	char			*devname	= NULL;
+	dev_info_t		*cdip;
 	char			*pathname	= NULL;
-	int			rval;
 	boolean_t		offline		= B_FALSE;
 	nvlist_t		*attr_list	= NULL;
 
 	ASSERT(ilp != NULL);
 	ASSERT((ilp->lun_pip != NULL) || (ilp->lun_dip != NULL));
 
-	/*
-	 * Since we carry the logical units parent
-	 * lock across the offline call it will not
-	 * issue devfs_clean() and may fail with a
-	 * devi_ref count > 0.
-	 */
-	if (ilp->lun_pip == NULL) {
+	if (ilp->lun_pip == NULL)
 		cdip = ilp->lun_dip;
-	} else {
+	else
 		cdip = mdi_pi_get_client(ilp->lun_pip);
-	}
-
-	if ((cdip != NULL) &&
-	    (lun_free == B_TRUE) &&
-	    (ilp->lun_state & ISCSI_LUN_STATE_ONLINE)) {
-		/*
-		 * Make sure node is attached otherwise
-		 * it won't have related cache nodes to
-		 * clean up.  i_ddi_devi_attached is
-		 * similiar to i_ddi_node_state(cdip) >=
-		 * DS_ATTACHED. We should clean up only
-		 * when lun_free is set.
-		 */
-		if (i_ddi_devi_attached(cdip)) {
-
-			/* Get parent dip */
-			pdip = ddi_get_parent(cdip);
-
-			/* Get full devname */
-			devname = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
-			ndi_devi_enter(pdip, &circ);
-			(void) ddi_deviname(cdip, devname);
-			/* Release lock before devfs_clean() */
-			ndi_devi_exit(pdip, circ);
-
-			/* Clean cache */
-			(void) devfs_clean(pdip, devname + 1, DV_CLEAN_FORCE);
-			kmem_free(devname, MAXNAMELEN + 1);
-		}
-	}
 
 	if (cdip != NULL && ilp->lun_type == DTYPE_DIRECT) {
 		pathname = kmem_zalloc(MAXNAMELEN + 1, KM_SLEEP);
@@ -699,18 +741,9 @@
 
 	/* Attempt to offline the logical units */
 	if (ilp->lun_pip != NULL) {
-
 		/* virt/mdi */
 		ndi_devi_enter(scsi_vhci_dip, &circ);
-		if ((lun_free == B_TRUE) &&
-		    (ilp->lun_state & ISCSI_LUN_STATE_ONLINE)) {
-			rval = mdi_pi_offline(ilp->lun_pip,
-			    NDI_DEVI_REMOVE);
-		} else {
-			rval = mdi_pi_offline(ilp->lun_pip, 0);
-		}
-
-		if (rval == MDI_SUCCESS) {
+		if (mdi_pi_offline(ilp->lun_pip, 0) == MDI_SUCCESS) {
 			ilp->lun_state &= ISCSI_LUN_STATE_CLEAR;
 			ilp->lun_state |= ISCSI_LUN_STATE_OFFLINE;
 			if (lun_free == B_TRUE) {
@@ -728,18 +761,14 @@
 		ndi_devi_exit(scsi_vhci_dip, circ);
 
 	} else  {
+		/* phys/ndi */
+		int flags = NDI_DEVFS_CLEAN;
 
-		/* phys/ndi */
 		ndi_devi_enter(ihp->hba_dip, &circ);
-		if ((lun_free == B_TRUE) &&
-		    (ilp->lun_state & ISCSI_LUN_STATE_ONLINE)) {
-			rval = ndi_devi_offline(
-			    ilp->lun_dip, NDI_DEVI_REMOVE);
-		} else {
-			rval = ndi_devi_offline(
-			    ilp->lun_dip, 0);
-		}
-		if (rval != NDI_SUCCESS) {
+		if (lun_free == B_TRUE &&
+		    (ilp->lun_state & ISCSI_LUN_STATE_ONLINE))
+			flags |= NDI_DEVI_REMOVE;
+		if (ndi_devi_offline(ilp->lun_dip, flags) != NDI_SUCCESS) {
 			status = ISCSI_STATUS_BUSY;
 			if (lun_free == B_FALSE) {
 				ilp->lun_state |= ISCSI_LUN_STATE_INVALID;
--- a/usr/src/uts/common/sys/idm/idm.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/sys/idm/idm.h	Thu Nov 14 07:43:52 2019 -0700
@@ -21,6 +21,7 @@
 
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -31,6 +32,8 @@
 extern "C" {
 #endif
 
+#include <sys/stmf_defines.h>
+
 typedef enum {
 	IDM_STATUS_SUCCESS = 0,
 	IDM_STATUS_FAIL,
@@ -45,31 +48,29 @@
 	IDM_STATUS_LOGIN_FAIL
 } idm_status_t;
 
+#define	IDM_CLIENT_NOTIFY_LIST() \
+	item(CN_UNDEFINED) \
+	item(CN_CONNECT_ACCEPT)		/* Target only */ \
+	item(CN_LOGIN_FAIL) \
+	item(CN_READY_FOR_LOGIN)	/* Initiator only */ \
+	item(CN_FFP_ENABLED) \
+	item(CN_FFP_DISABLED) \
+	item(CN_CONNECT_LOST) \
+	item(CN_CONNECT_DESTROY) \
+	item(CN_CONNECT_FAIL) \
+	item(CN_MAX)
 
 typedef enum {
-	CN_CONNECT_ACCEPT = 1,	/* Target only */
-	CN_LOGIN_FAIL,
-	CN_READY_FOR_LOGIN,	/* Initiator only */
-	CN_FFP_ENABLED,
-	CN_FFP_DISABLED,
-	CN_CONNECT_LOST,
-	CN_CONNECT_DESTROY,
-	CN_CONNECT_FAIL,
-	CN_MAX
+#define	item(a) a,
+	IDM_CLIENT_NOTIFY_LIST()
+#undef	item
 } idm_client_notify_t;
 
 #ifdef IDM_CN_NOTIFY_STRINGS
 static const char *idm_cn_strings[CN_MAX + 1] = {
-	"CN_UNDEFINED",
-	"CN_CONNECT_ACCEPT",
-	"CN_LOGIN_FAIL",
-	"CN_READY_FOR_LOGIN",
-	"CN_FFP_ENABLED",
-	"CN_FFP_DISABLED",
-	"CN_CONNECT_LOST",
-	"CN_CONNECT_DESTROY",
-	"CN_CONNECT_FAIL",
-	"CN_MAX"
+#define	item(a) #a,
+	IDM_CLIENT_NOTIFY_LIST()
+#undef	item
 };
 #endif
 
@@ -85,27 +86,27 @@
 	AT_TASK_MGMT_ABORT
 } idm_abort_type_t;
 
+#define	IDM_TASK_STATE_LIST() \
+	item(TASK_IDLE) \
+	item(TASK_ACTIVE) \
+	item(TASK_SUSPENDING) \
+	item(TASK_SUSPENDED) \
+	item(TASK_ABORTING) \
+	item(TASK_ABORTED) \
+	item(TASK_COMPLETE) \
+	item(TASK_MAX_STATE)
+
 typedef enum {
-	TASK_IDLE,
-	TASK_ACTIVE,
-	TASK_SUSPENDING,
-	TASK_SUSPENDED,
-	TASK_ABORTING,
-	TASK_ABORTED,
-	TASK_COMPLETE,
-	TASK_MAX_STATE
+#define	item(a) a,
+	IDM_TASK_STATE_LIST()
+#undef	item
 } idm_task_state_t;
 
 #ifdef IDM_TASK_SM_STRINGS
 static const char *idm_ts_name[TASK_MAX_STATE+1] = {
-	"TASK_IDLE",
-	"TASK_ACTIVE",
-	"TASK_SUSPENDING",
-	"TASK_SUSPENDED",
-	"TASK_ABORTING",
-	"TASK_ABORTED",
-	"TASK_COMPLETE",
-	"TASK_MAX_STATE"
+#define	item(a) #a,
+	IDM_TASK_STATE_LIST()
+#undef	item
 };
 #endif
 
@@ -422,12 +423,12 @@
 
 extern boolean_t idm_pattern_checking;
 
-#define	IDM_BUFPAT_SET(CHK_BUF) 				\
+#define	IDM_BUFPAT_SET(CHK_BUF)					\
 	if (idm_pattern_checking && (CHK_BUF)->idb_bufalloc) {	\
 		idm_bufpat_set(CHK_BUF);			\
 	}
 
-#define	IDM_BUFPAT_CHECK(CHK_BUF, CHK_LEN, CHK_TYPE) 		\
+#define	IDM_BUFPAT_CHECK(CHK_BUF, CHK_LEN, CHK_TYPE)		\
 	if (idm_pattern_checking) {				\
 		(void) idm_bufpat_check(CHK_BUF, CHK_LEN, CHK_TYPE);	\
 	}
@@ -441,7 +442,7 @@
 void
 idm_task_start(idm_task_t *idt, uintptr_t handle);
 
-void
+stmf_status_t
 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type);
 
 void
@@ -524,6 +525,8 @@
 void
 idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func);
 
+int
+idm_refcnt_is_held(idm_refcnt_t *refcnt);
 
 #ifdef	__cplusplus
 }
--- a/usr/src/uts/common/sys/idm/idm_conn_sm.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/sys/idm/idm_conn_sm.h	Thu Nov 14 07:43:52 2019 -0700
@@ -65,158 +65,106 @@
 #define	IDM_LOGOUT_SECONDS	20
 #define	IDM_CLEANUP_SECONDS	0
 
+#define	IDM_CONN_EVENT_LIST() \
+	item(CE_UNDEFINED) \
+	/* Initiator events */ \
+	item(CE_CONNECT_REQ) \
+	item(CE_CONNECT_FAIL) \
+	item(CE_CONNECT_SUCCESS) \
+	item(CE_LOGIN_SND) \
+	item(CE_LOGIN_SUCCESS_RCV) \
+	item(CE_LOGIN_FAIL_RCV) \
+	item(CE_LOGOUT_THIS_CONN_SND) \
+	item(CE_LOGOUT_OTHER_CONN_SND) \
+	item(CE_LOGOUT_SESSION_SND) \
+	item(CE_LOGOUT_SUCCESS_RCV) \
+	item(CE_LOGOUT_FAIL_RCV) \
+	item(CE_ASYNC_LOGOUT_RCV) \
+	item(CE_ASYNC_DROP_CONN_RCV) \
+	item(CE_ASYNC_DROP_ALL_CONN_RCV) \
+	/* Target events */ \
+	item(CE_CONNECT_ACCEPT) \
+	item(CE_CONNECT_REJECT) \
+	item(CE_LOGIN_RCV) \
+	item(CE_LOGIN_TIMEOUT) \
+	item(CE_LOGIN_SUCCESS_SND) \
+	item(CE_LOGIN_FAIL_SND) \
+	item(CE_LOGIN_FAIL_SND_DONE) \
+	item(CE_LOGOUT_THIS_CONN_RCV) \
+	item(CE_LOGOUT_OTHER_CONN_RCV) \
+	item(CE_LOGOUT_SESSION_RCV) \
+	item(CE_LOGOUT_SUCCESS_SND) \
+	item(CE_LOGOUT_SUCCESS_SND_DONE) \
+	item(CE_LOGOUT_FAIL_SND) \
+	item(CE_LOGOUT_FAIL_SND_DONE) \
+	item(CE_CLEANUP_TIMEOUT) \
+	item(CE_ASYNC_LOGOUT_SND) \
+	item(CE_ASYNC_DROP_CONN_SND) \
+	item(CE_ASYNC_DROP_ALL_CONN_SND) \
+	item(CE_LOGOUT_TIMEOUT) \
+	/* Common events */ \
+	item(CE_TRANSPORT_FAIL) \
+	item(CE_MISC_TX) \
+	item(CE_TX_PROTOCOL_ERROR) \
+	item(CE_MISC_RX) \
+	item(CE_RX_PROTOCOL_ERROR) \
+	item(CE_LOGOUT_SESSION_SUCCESS) \
+	item(CE_CONN_REINSTATE) \
+	item(CE_CONN_REINSTATE_SUCCESS) \
+	item(CE_CONN_REINSTATE_FAIL) \
+	item(CE_ENABLE_DM_SUCCESS) \
+	item(CE_ENABLE_DM_FAIL) \
+	/* Add new events above CE_MAX_EVENT */ \
+	item(CE_MAX_EVENT)
+
 /* Update idm_ce_name table whenever connection events are modified */
 typedef enum {
-	CE_UNDEFINED = 0,
-
-	/* Initiator events */
-	CE_CONNECT_REQ,
-	CE_CONNECT_FAIL,
-	CE_CONNECT_SUCCESS,
-	CE_LOGIN_SND,
-	CE_LOGIN_SUCCESS_RCV,
-	CE_LOGIN_FAIL_RCV,
-	CE_LOGOUT_THIS_CONN_SND,
-	CE_LOGOUT_OTHER_CONN_SND,
-	CE_LOGOUT_SESSION_SND,
-	CE_LOGOUT_SUCCESS_RCV,
-	CE_LOGOUT_FAIL_RCV,
-	CE_ASYNC_LOGOUT_RCV,
-	CE_ASYNC_DROP_CONN_RCV,
-	CE_ASYNC_DROP_ALL_CONN_RCV,
-
-	/* Target events */
-	CE_CONNECT_ACCEPT,
-	CE_CONNECT_REJECT,
-	CE_LOGIN_RCV,
-	CE_LOGIN_TIMEOUT,
-	CE_LOGIN_SUCCESS_SND,
-	CE_LOGIN_FAIL_SND,
-	CE_LOGIN_FAIL_SND_DONE,
-	CE_LOGOUT_THIS_CONN_RCV,
-	CE_LOGOUT_OTHER_CONN_RCV,
-	CE_LOGOUT_SESSION_RCV,
-	CE_LOGOUT_SUCCESS_SND,
-	CE_LOGOUT_SUCCESS_SND_DONE,
-	CE_LOGOUT_FAIL_SND,
-	CE_LOGOUT_FAIL_SND_DONE,
-	CE_CLEANUP_TIMEOUT,
-	CE_ASYNC_LOGOUT_SND,
-	CE_ASYNC_DROP_CONN_SND,
-	CE_ASYNC_DROP_ALL_CONN_SND,
-	CE_LOGOUT_TIMEOUT,
-
-	/* Common events */
-	CE_TRANSPORT_FAIL,
-	CE_MISC_TX,
-	CE_TX_PROTOCOL_ERROR,
-	CE_MISC_RX,
-	CE_RX_PROTOCOL_ERROR,
-	CE_LOGOUT_SESSION_SUCCESS,
-	CE_CONN_REINSTATE,
-	CE_CONN_REINSTATE_SUCCESS,
-	CE_CONN_REINSTATE_FAIL,
-	CE_ENABLE_DM_SUCCESS,
-	CE_ENABLE_DM_FAIL,
-
-	/* Add new events above CE_MAX_EVENT */
-	CE_MAX_EVENT
+#define	item(a) a,
+	IDM_CONN_EVENT_LIST()
+#undef	item
 } idm_conn_event_t;
 
 #ifdef IDM_CONN_SM_STRINGS
 /* An array of event text values, for use in logging events */
 static const char *idm_ce_name[CE_MAX_EVENT+1] = {
-	"CE_UNDEFINED",
-	"CE_CONNECT_REQ",
-	"CE_CONNECT_FAIL",
-	"CE_CONNECT_SUCCESS",
-	"CE_LOGIN_SND",
-	"CE_LOGIN_SUCCESS_RCV",
-	"CE_LOGIN_FAIL_RCV",
-	"CE_LOGOUT_THIS_CONN_SND",
-	"CE_LOGOUT_OTHER_CONN_SND",
-	"CE_LOGOUT_SESSION_SND",
-	"CE_LOGOUT_SUCCESS_RCV",
-	"CE_LOGOUT_FAIL_RCV",
-	"CE_ASYNC_LOGOUT_RCV",
-	"CE_ASYNC_DROP_CONN_RCV",
-	"CE_ASYNC_DROP_ALL_CONN_RCV",
-	"CE_CONNECT_ACCEPT",
-	"CE_CONNECT_REJECT",
-	"CE_LOGIN_RCV",
-	"CE_LOGIN_TIMEOUT",
-	"CE_LOGIN_SUCCESS_SND",
-	"CE_LOGIN_FAIL_SND",
-	"CE_LOGIN_FAIL_SND_DONE",
-	"CE_LOGOUT_THIS_CONN_RCV",
-	"CE_LOGOUT_OTHER_CONN_RCV",
-	"CE_LOGOUT_SESSION_RCV",
-	"CE_LOGOUT_SUCCESS_SND",
-	"CE_LOGOUT_SUCCESS_SND_DONE",
-	"CE_LOGOUT_FAIL_SND",
-	"CE_LOGOUT_FAIL_SND_DONE",
-	"CE_CLEANUP_TIMEOUT",
-	"CE_ASYNC_LOGOUT_SND",
-	"CE_ASYNC_DROP_CONN_SND",
-	"CE_ASYNC_DROP_ALL_CONN_SND",
-	"CE_LOGOUT_TIMEOUT",
-	"CE_TRANSPORT_FAIL",
-	"CE_MISC_TX",
-	"CE_TX_PROTOCOL_ERROR",
-	"CE_MISC_RX",
-	"CE_RX_PROTOCOL_ERROR",
-	"CE_LOGOUT_SESSION_SUCCESS",
-	"CE_CONN_REINSTATE",
-	"CE_CONN_REINSTATE_SUCCESS",
-	"CE_CONN_REINSTATE_FAIL",
-	"CE_ENABLE_DM_SUCCESS",
-	"CE_ENABLE_DM_FAIL",
-	"CE_MAX_EVENT"
+#define	item(a) #a,
+	IDM_CONN_EVENT_LIST()
+#undef	item
 };
 #endif
 
+#define	CONN_STATE_LIST() \
+	item(CS_S0_UNDEFINED) \
+	item(CS_S1_FREE) \
+	item(CS_S2_XPT_WAIT) \
+	item(CS_S3_XPT_UP) \
+	item(CS_S4_IN_LOGIN) \
+	item(CS_S5_LOGGED_IN) \
+	item(CS_S6_IN_LOGOUT) \
+	item(CS_S7_LOGOUT_REQ) \
+	item(CS_S8_CLEANUP) \
+	item(CS_S9_INIT_ERROR) \
+	item(CS_S10_IN_CLEANUP) \
+	item(CS_S11_COMPLETE) \
+	item(CS_S12_ENABLE_DM) \
+	item(CS_S9A_REJECTED) \
+	item(CS_S9B_WAIT_SND_DONE) \
+	/* Add new connection states above CS_MAX_STATE */ \
+	item(CS_MAX_STATE)
+
 /* Update idm_cs_name table whenever connection states are modified */
 typedef enum {
-	CS_S0_UNDEFINED = 0,
-
-	CS_S1_FREE,
-	CS_S2_XPT_WAIT,
-	CS_S3_XPT_UP,
-	CS_S4_IN_LOGIN,
-	CS_S5_LOGGED_IN,
-	CS_S6_IN_LOGOUT,
-	CS_S7_LOGOUT_REQ,
-	CS_S8_CLEANUP,
-	CS_S9_INIT_ERROR,
-	CS_S10_IN_CLEANUP,
-	CS_S11_COMPLETE,
-	CS_S12_ENABLE_DM,
-	CS_S9A_REJECTED,
-	CS_S9B_WAIT_SND_DONE,
-
-	/* Add new connection states above CS_MAX_STATE */
-	CS_MAX_STATE
+#define	item(a) a,
+	CONN_STATE_LIST()
+#undef	item
 } idm_conn_state_t;
 
 #ifdef IDM_CONN_SM_STRINGS
 /* An array of state text values, for use in logging state transitions */
 static const char *idm_cs_name[CS_MAX_STATE+1] = {
-	"CS_S0_UNDEFINED",
-	"CS_S1_FREE",
-	"CS_S2_XPT_WAIT",
-	"CS_S3_XPT_UP",
-	"CS_S4_IN_LOGIN",
-	"CS_S5_LOGGED_IN",
-	"CS_S6_IN_LOGOUT",
-	"CS_S7_LOGOUT_REQ",
-	"CS_S8_CLEANUP",
-	"CS_S9_INIT_ERROR",
-	"CS_S10_IN_CLEANUP",
-	"CS_S11_COMPLETE",
-	"CS_S12_ENABLE_DM",
-	"CS_S9A_REJECTED",
-	"CS_S9B_WAIT_SND_DONE",
-	"CS_MAX_STATE"
+#define	item(a) #a,
+	CONN_STATE_LIST()
+#undef	item
 };
 #endif
 
--- a/usr/src/uts/common/sys/idm/idm_impl.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/sys/idm/idm_impl.h	Thu Nov 14 07:43:52 2019 -0700
@@ -22,7 +22,7 @@
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 /*
- * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2014-2015 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #ifndef	_IDM_IMPL_H_
@@ -155,7 +155,7 @@
 	void			*ic_handle;
 	idm_refcnt_t		ic_refcnt;
 	idm_svc_t		*ic_svc_binding; /* Target conn. only */
-	idm_sockaddr_t 		ic_ini_dst_addr;
+	idm_sockaddr_t		ic_ini_dst_addr;
 	struct sockaddr_storage	ic_laddr;	/* conn local address */
 	struct sockaddr_storage	ic_raddr;	/* conn remote address */
 
@@ -321,7 +321,7 @@
 	BP_CHECK_ASSERT
 } idm_bufpat_check_type_t;
 
-#define	BUFPAT_MATCH(bc_bufpat, bc_idb) 		\
+#define	BUFPAT_MATCH(bc_bufpat, bc_idb)			\
 	((bufpat->bufpat_idb == bc_idb) &&		\
 	    (bufpat->bufpat_bufmagic == IDM_BUF_MAGIC))
 
@@ -409,9 +409,19 @@
 
 #define	OSD_EXT_CDB_AHSLEN	(200 - 15)
 #define	BIDI_AHS_LENGTH		5
+/*
+ * Additional Header Segment (AHS)
+ * AHS is only valid for SCSI Requests and contains SCSI CDB information
+ * which doesn't fit in the standard 16 byte area of the PDU. Commonly
+ * this only holds true for OSD device commands.
+ *
+ * IDM_SORX_CACHE_ASHLEN is the amount of memory which is preallocated in bytes.
+ * When used in the header the AHS length is stored as the number of 4-byte
+ * words; so IDM_SORX_WIRE_ASHLEN is IDM_SORX_CACHE_ASHLEN in words.
+ */
 #define	IDM_SORX_CACHE_AHSLEN \
-	(((OSD_EXT_CDB_AHSLEN + 3) + \
-	    (BIDI_AHS_LENGTH + 3)) / sizeof (uint32_t))
+	((OSD_EXT_CDB_AHSLEN + 3) + (BIDI_AHS_LENGTH + 3))
+#define	IDM_SORX_WIRE_AHSLEN (IDM_SORX_CACHE_AHSLEN / sizeof (uint32_t))
 #define	IDM_SORX_CACHE_HDRLEN	(sizeof (iscsi_hdr_t) + IDM_SORX_CACHE_AHSLEN)
 
 /*
--- a/usr/src/uts/common/sys/lpif.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/sys/lpif.h	Thu Nov 14 07:43:52 2019 -0700
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  */
 #ifndef	_LPIF_H
 #define	_LPIF_H
@@ -76,6 +77,7 @@
 	uint32_t		lu_proxy_reg_arg_len;
 	void			(*lu_dbuf_free)(struct scsi_task *task,
 		struct stmf_data_buf *dbuf);
+	void			(*lu_task_done)(struct scsi_task *task);
 } stmf_lu_t;
 
 /*
@@ -84,6 +86,7 @@
 #define	STMF_LU_ABORT_TASK		1
 #define	STMF_LU_RESET_STATE		2
 #define	STMF_LU_ITL_HANDLE_REMOVED	3
+#define	STMF_LU_SET_ABORT		4
 
 /*
  * Asymmetric access state
@@ -132,6 +135,9 @@
 stmf_status_t stmf_set_lu_access(stmf_lu_t *lup, uint8_t access_state);
 stmf_status_t stmf_proxy_scsi_cmd(scsi_task_t *, stmf_data_buf_t *dbuf);
 int stmf_is_standby_port(scsi_task_t *);
+void stmf_lu_xfer_done(struct scsi_task *task, boolean_t read,
+    hrtime_t elapsed_time);
+boolean_t stmf_is_pgr_aptpl_always();
 
 #ifdef	__cplusplus
 }
--- a/usr/src/uts/common/sys/portif.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/sys/portif.h	Thu Nov 14 07:43:52 2019 -0700
@@ -138,6 +138,10 @@
 stmf_status_t stmf_deregister_local_port(stmf_local_port_t *lport);
 stmf_status_t stmf_register_scsi_session(stmf_local_port_t *lport,
 				stmf_scsi_session_t *ss);
+stmf_status_t stmf_add_rport_info(stmf_scsi_session_t *ss,
+	const char *prop_name, const char *prop_value);
+void stmf_remove_rport_info(stmf_scsi_session_t *ss,
+	const char *prop_name);
 void stmf_deregister_scsi_session(stmf_local_port_t *lport,
 				stmf_scsi_session_t *ss);
 void stmf_set_port_standby(stmf_local_port_t *lport, uint16_t rtpid);
--- a/usr/src/uts/common/sys/scsi/generic/commands.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/sys/scsi/generic/commands.h	Thu Nov 14 07:43:52 2019 -0700
@@ -329,6 +329,7 @@
  */
 #define	SCMD_GROUP4		0x80
 #define	SCMD_EXTENDED_COPY	0x83
+#define	SCMD_RECV_COPY_RESULTS	0x84
 #define	SCMD_VERIFY_G4		0x8f
 
 /*
@@ -357,6 +358,7 @@
  */
 #define	SCMD_WRITE_FILE_MARK_G4	0x80
 #define	SCMD_READ_REVERSE_G4	0x81
+#define	SCMD_COMPARE_AND_WRITE	0x89
 #define	SCMD_READ_ATTRIBUTE	0x8c
 #define	SCMD_WRITE_ATTRIBUTE	0x8d
 #define	SCMD_SPACE_G4		0x91
--- a/usr/src/uts/common/sys/scsi/generic/mode.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/sys/scsi/generic/mode.h	Thu Nov 14 07:43:52 2019 -0700
@@ -191,6 +191,8 @@
  */
 
 #define	MODEPAGE_DISCO_RECO	0x02
+#define	MODEPAGE_FORMAT		0x03
+#define	MODEPAGE_GEOMETRY	0x04
 #define	MODEPAGE_CACHING	0x08
 #define	MODEPAGE_PDEVICE	0x09
 #define	MODEPAGE_CTRL_MODE	0x0A
--- a/usr/src/uts/common/sys/scsi/scsi_names.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/sys/scsi/scsi_names.h	Thu Nov 14 07:43:52 2019 -0700
@@ -35,13 +35,13 @@
 #define	SNS_NAA_32	32
 #define	SNS_WWN_16	16
 
-/*
- * Maximum number of bytes needed to store SCSI Name Strings in UTF-8 format,
- * assuming that (per RFC3629) one UTF-8 character can take up to 4 bytes.
- */
-#define	SNS_EUI_U8_LEN_MAX	(SNS_EUI_16 * 4)
-#define	SNS_IQN_U8_LEN_MAX	(SNS_IQN_223 * 4)
-#define	SNS_NAA_U8_LEN_MAX	(SNS_NAA_32 * 4)
+#define	SNS_EUI_LEN_MAX		sizeof (SNS_EUI) + SNS_EUI_16
+#define	SNS_IQN_LEN_MAX		SNS_IQN_223
+#define	SNS_MAC_LEN_MAX		sizeof (SNS_MAC) + SNS_MAC_12
+#define	SNS_NAA_LEN_MAX		sizeof (SNS_NAA) + SNS_NAA_32
+#define	SNS_WWN_LEN_MAX		sizeof (SNS_WWN) + SNS_WWN_16
+
+#define	SNS_LEN_MAX		SNS_IQN_LEN_MAX
 
 #ifdef __cplusplus
 }
--- a/usr/src/uts/common/sys/stmf.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/sys/stmf.h	Thu Nov 14 07:43:52 2019 -0700
@@ -18,10 +18,13 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  */
+
 #ifndef	_STMF_H
 #define	_STMF_H
 
@@ -205,6 +208,11 @@
 #define	TASK_AF_ACCEPT_LU_DBUF		0x08
 
 /*
+ * Indicating a PPPT task
+ */
+#define	TASK_AF_PPPT_TASK		0x10
+
+/*
  * scsi_task_t extension identifiers
  */
 #define	STMF_TASK_EXT_NONE		0
@@ -379,7 +387,8 @@
 void stmf_task_lu_done(scsi_task_t *task);
 void stmf_abort(int abort_cmd, scsi_task_t *task, stmf_status_t s, void *arg);
 void stmf_task_lu_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof);
-void stmf_task_lport_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof);
+void stmf_task_lport_aborted_unlocked(scsi_task_t *task, stmf_status_t s,
+    uint32_t iof);
 stmf_status_t stmf_task_poll_lu(scsi_task_t *task, uint32_t timeout);
 stmf_status_t stmf_task_poll_lport(scsi_task_t *task, uint32_t timeout);
 stmf_status_t stmf_ctl(int cmd, void *obj, void *arg);
@@ -416,6 +425,9 @@
     struct scsi_transport_id *);
 struct stmf_remote_port *stmf_remote_port_alloc(uint16_t);
 void stmf_remote_port_free(struct stmf_remote_port *);
+struct stmf_lu *stmf_check_and_hold_lu(struct scsi_task *task, uint8_t *guid);
+void stmf_release_lu(struct stmf_lu *lu);
+int stmf_is_task_being_aborted(struct scsi_task *task);
 #ifdef	__cplusplus
 }
 #endif
--- a/usr/src/uts/common/sys/stmf_defines.h	Fri Jun 21 19:22:54 2019 -0400
+++ b/usr/src/uts/common/sys/stmf_defines.h	Thu Nov 14 07:43:52 2019 -0700
@@ -123,7 +123,7 @@
 #define	STMF_SAA_INVALID_FIELD_IN_PARAM_LIST	0x052600
 #define	STMF_SAA_INVALID_RELEASE_OF_PR		0x052604
 #define	STMF_SAA_MEDIUM_REMOVAL_PREVENTED	0x055302
-#define	STMF_SAA_INSUFFICIENT_REG_RESOURCES	0x055504
+#define	STMF_SAA_INSUFFICIENT_REG_RESRCS	0x055504
 #define	STMF_SAA_POR				0x062900
 #define	STMF_SAA_MODE_PARAMETERS_CHANGED	0x062A01
 #define	STMF_SAA_ASYMMETRIC_ACCESS_CHANGED	0x062A06