changeset 10861:43a614b9a540

Branch merge + libscf/Makfile.com s390 standardize flag names
author tide@localhost
date Mon, 17 Aug 2009 09:14:35 -0400
parents b1ec56e41796 (current diff) 3928a1af021c (diff)
children b5304f1daa92
files usr/src/cmd/isns/isnsd/Makefile usr/src/lib/libscf/Makefile.com
diffstat 189 files changed, 12267 insertions(+), 4065 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/cmd-inet/lib/nwamd/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/cmd-inet/lib/nwamd/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -20,7 +20,7 @@
 #
 
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 # usr/src/cmd/cmd-inet/lib/nwamd/Makefile
@@ -43,7 +43,7 @@
 ROOTCMDDIR=	$(ROOTFS_LIBDIR)/inet
 
 LDLIBS +=	-lsocket -lnsl -linetcfg -linetutil -lumem -lscf -ldladm \
-		-lgen -ldoor -lsecdb -lbsm -lsysevent -lnvpair
+		-lgen -lsecdb -lbsm -lsysevent -lnvpair
 
 .KEEP_STATE:
 
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c	Mon Aug 17 09:14:35 2009 -0400
@@ -1,3 +1,4 @@
+
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
@@ -103,11 +104,11 @@
  * The state of a phyint that is capable of being probed, is completely
  * specified by the 3-tuple <pi_state, pg_state, I>.
  *
- * A phyint starts in either PI_RUNNING or PI_FAILED, depending on the state
- * of the link (according to the driver).  If the phyint is also configured
- * with a test address (the common case) and probe targets, then a phyint must
- * also successfully be able to send and receive probes in order to remain in
- * the PI_RUNNING state (otherwise, it transitions to PI_FAILED).
+ * A phyint starts in either PI_RUNNING or PI_OFFLINE, depending on whether
+ * IFF_OFFLINE is set.  If the phyint is also configured with a test address
+ * (the common case) and probe targets, then a phyint must also successfully
+ * be able to send and receive probes in order to remain in the PI_RUNNING
+ * state (otherwise, it transitions to PI_FAILED).
  *
  * Further, if a PI_RUNNING phyint is configured with a test address but is
  * unable to find any probe targets, it will transition to the PI_NOTARGETS
@@ -1360,11 +1361,11 @@
 }
 
 /*
- * Transition a phyint back to PI_RUNNING (from PI_FAILED or PI_OFFLINE).  The
- * caller must ensure that the transition is appropriate.  Clears IFF_OFFLINE
- * or IFF_FAILED, as appropriate.  Also sets IFF_INACTIVE on this or other
- * interfaces as appropriate (see comment below).  Finally, also updates the
- * phyint's group state to account for the change.
+ * Transition a phyint to PI_RUNNING.  The caller must ensure that the
+ * transition is appropriate.  Clears IFF_OFFLINE or IFF_FAILED if
+ * appropriate.  Also sets IFF_INACTIVE on this or other interfaces as
+ * appropriate (see comment below).  Finally, also updates the phyint's group
+ * state to account for the change.
  */
 void
 phyint_transition_to_running(struct phyint *pi)
@@ -1373,6 +1374,7 @@
 	struct phyint *actstandbypi = NULL;
 	uint_t nactive = 0, nnonstandby = 0;
 	boolean_t onlining = (pi->pi_state == PI_OFFLINE);
+	boolean_t initial = (pi->pi_state == PI_INIT);
 	uint64_t set, clear;
 
 	/*
@@ -1422,7 +1424,7 @@
 	} else if (onlining || failback_enabled) {		/* case 2 */
 		if (nactive >= nnonstandby && actstandbypi != NULL)
 			(void) change_pif_flags(actstandbypi, IFF_INACTIVE, 0);
-	} else if (!GROUP_FAILED(pi->pi_group)) {		/* case 3 */
+	} else if (!initial && !GROUP_FAILED(pi->pi_group)) {	/* case 3 */
 		set |= IFF_INACTIVE;
 	}
 	(void) change_pif_flags(pi, set, clear);
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c	Mon Aug 17 09:14:35 2009 -0400
@@ -386,15 +386,7 @@
 	pi->pi_ifindex = ifindex;
 	pi->pi_icmpid = htons(((getpid() & 0xFF) << 8) | (ifindex & 0xFF));
 
-	/*
-	 * If the interface is offline, we set the state to PI_OFFLINE.
-	 * Otherwise, we optimistically start in the PI_RUNNING state.  Later
-	 * (in process_link_state_changes()), we will adjust this to match the
-	 * current state of the link.  Further, if test addresses are
-	 * subsequently assigned, we will transition to PI_NOTARGETS and then
-	 * to either PI_RUNNING or PI_FAILED depending on the probe results.
-	 */
-	pi->pi_state = (flags & IFF_OFFLINE) ? PI_OFFLINE : PI_RUNNING;
+	pi->pi_state = PI_INIT;
 	pi->pi_flags = PHYINT_FLAGS(flags);
 
 	/*
@@ -417,6 +409,19 @@
 	 */
 	phyint_insert(pi, pg);
 
+	/*
+	 * If the interface is offline, we set the state to PI_OFFLINE.
+	 * Otherwise, optimistically consider this interface running.  Later
+	 * (in process_link_state_changes()), we will adjust this to match the
+	 * current state of the link.  Further, if test addresses are
+	 * subsequently assigned, we will transition to PI_NOTARGETS and then
+	 * to either PI_RUNNING or PI_FAILED depending on the probe results.
+	 */
+	if (flags & IFF_OFFLINE)
+		phyint_chstate(pi, PI_OFFLINE);
+	else
+		phyint_transition_to_running(pi); /* calls phyint_chstate() */
+
 	return (pi);
 }
 
@@ -2677,6 +2682,9 @@
 ifstate(struct phyint *pi)
 {
 	switch (pi->pi_state) {
+	case PI_INIT:
+		return (IPMP_IF_UNKNOWN);
+
 	case PI_NOTARGETS:
 		if (pi->pi_flags & IFF_FAILED)
 			return (IPMP_IF_FAILED);
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h	Mon Aug 17 09:14:35 2009 -0400
@@ -167,6 +167,7 @@
  * Phyint states; see below for the phyint definition.
  */
 enum pi_state {
+	PI_INIT		= 0,	/* Phyint is being initialized */
 	PI_NOTARGETS	= 1,	/* Phyint has no targets */
 	PI_RUNNING	= 2,	/* Phyint is functioning */
 	PI_FAILED	= 3,	/* Phyint is failed */
--- a/usr/src/cmd/devfsadm/Makefile.com	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/devfsadm/Makefile.com	Mon Aug 17 09:14:35 2009 -0400
@@ -22,6 +22,11 @@
 # Use is subject to license terms.
 #
 
+# This target builds both a command (daemon) and various shared objects.  This
+# isn't a typical target, and the inclusion of both library and command
+# Makefiles were probably not in their original design.  However, there doesn't
+# presently seem to be a clash of any required definitions.
+include ../../../lib/Makefile.lib
 include ../../Makefile.cmd
 
 COMMON = ..
@@ -90,10 +95,15 @@
 LINTFLAGS += -erroff=E_NAME_DEF_NOT_USED2
 LINTFLAGS += -erroff=E_NAME_MULTIPLY_DEF2
 
-LDLIBS += -ldevinfo -lgen -lsysevent -lnvpair -ldoor -lzonecfg -lbsm
+# Define the dependencies required by devfsadm and all shared objects.
+LDLIBS +=		-ldevinfo
+devfsadm :=		LDLIBS += -lgen -lsysevent -lnvpair -lzonecfg -lbsm
+SUNW_md_link.so :=	LDLIBS += -lmeta
 
-LINK_MOD_LDLIBS=
-SUNW_md_link.so :=	LINK_MOD_LDLIBS=	-lmeta
+# All libraries are built from the same SUNW_%.so rule (see below), and define
+# their own SONAME using -h explicitly.  Null the generic -h macro that gets
+# inherited from Makefile.lib, otherwise we'll get two -h definitions.
+HSONAME =
 
 SRCS = $(DEVFSADM_SRC) $(LINK_SRCS)
 OBJS = $(DEVFSADM_OBJ) $(LINK_OBJS)
@@ -164,8 +174,8 @@
 	$(LINK.c) -o $@ $< $(DEVFSADM_OBJ) $(LDLIBS)
 	$(POST_PROCESS)
 
-SUNW_%.so: %.o
-	$(LINK.c) -o $@ $(GSHARED) -h $@ $< $(LINK_MOD_LDLIBS)
+SUNW_%.so: %.o $(MAPFILES)
+	$(CC) -o $@ $(GSHARED) $(DYNFLAGS) -h $@ $< $(LDLIBS) -lc
 	$(POST_PROCESS_SO)
 
 %.o: $(COMMON)/%.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/devfsadm/mapfile-vers	Mon Aug 17 09:14:35 2009 -0400
@@ -0,0 +1,64 @@
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING:  STOP NOW.  DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+#	usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+# External interface requirements
+{
+	global:
+		devfsadm_devlink_cache =	EXTERN;
+		devfsadm_enumerate_char_start =	EXTERN;
+		devfsadm_enumerate_int =	EXTERN;
+		devfsadm_errprint =		EXTERN;
+		devfsadm_free_dev_names =	EXTERN;
+		devfsadm_have_reserved =	EXTERN;
+		devfsadm_is_reserved =		EXTERN;
+		devfsadm_link_valid =		EXTERN;
+		devfsadm_lookup_dev_names =	EXTERN;
+		devfsadm_mklink =		EXTERN;
+		devfsadm_noupdate =		EXTERN;
+		devfsadm_print =		EXTERN;
+		devfsadm_read_link =		EXTERN;
+		devfsadm_reserve_id_cache =	EXTERN;
+		devfsadm_rm_all =		EXTERN;
+		devfsadm_rm_link =		EXTERN;
+		devfsadm_rm_stale_links =	EXTERN;
+		devfsadm_root_path =		EXTERN;
+		devfsadm_secondary_link =	EXTERN;
+		disk_enumerate_int =		EXTERN;
+		s_strdup =			EXTERN;
+		system_labeled =		EXTERN;
+};
--- a/usr/src/cmd/fps/fpsd/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/fps/fpsd/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -20,7 +20,7 @@
 #
 
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 
@@ -44,7 +44,7 @@
 
 CFLAGS += $(CCMT)
 
-LDLIBS  += -lc -ldevinfo -ldoor -lkstat -lscf -lgen
+LDLIBS  += -lc -ldevinfo -lkstat -lscf -lgen
 
 LDFLAGS += $(ZIGNORE)
 
--- a/usr/src/cmd/fs.d/autofs/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/fs.d/autofs/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -82,9 +82,9 @@
 GREP= egrep
 SED= sed
 
-$(AUTO) := 	LDLIBS += -lnsl -lsldap  -ldoor
+$(AUTO) := 	LDLIBS += -lnsl -lsldap
 $(MOUNT):=	LDLIBS += -lnsl
-$(TYPEPROG) :=	LDLIBS += -lrpcsvc -lsocket -lnsl -lsldap -lkstat -ldoor
+$(TYPEPROG) :=	LDLIBS += -lrpcsvc -lsocket -lnsl -lsldap -lkstat
 
 CFLAGS +=	$(CCVERBOSE) -D_FILE_OFFSET_BITS=64
 CPPFLAGS=	-I. -I.. -I../nfs/lib $(CPPFLAGS.master) -D_REENTRANT \
--- a/usr/src/cmd/fs.d/nfs/mountd/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/fs.d/nfs/mountd/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -19,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 
@@ -35,7 +35,7 @@
 OBJS	  = 	$(LOCAL) $(COMMON)
 SRCS	  =	$(LOCAL:%.o=%.c) $(FSLIBSRC) ../lib/nfs_sec.c \
 		../lib/sharetab.c ../lib/daemon.c
-LDLIBS	 +=	-lrpcsvc -lnsl -lbsm -lsocket -ldoor -ltsnet -ltsol
+LDLIBS	 +=	-lrpcsvc -lnsl -lbsm -lsocket -ltsnet -ltsol
 CPPFLAGS +=	-D_REENTRANT
 
 $(TYPEPROG):	$(OBJS)
--- a/usr/src/cmd/fs.d/nfs/nfsmapid/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/fs.d/nfs/nfsmapid/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -19,11 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
 
 FSTYPE	  =	nfs
 TYPEPROG  =	nfsmapid
@@ -33,7 +31,7 @@
 include		../../Makefile.fstype
 
 LDLIBS   +=	-L$(ROOT)/usr/lib/nfs -R/usr/lib/nfs
-LDLIBS   +=	-lnsl -lmapid -ldoor -ldtrace -lidmap
+LDLIBS   +=	-lnsl -lmapid -ldtrace -lidmap
 SRCS	  =	nfsmapid.c nfsmapid_server.c
 DSRC	  =	nfsmapid_dt.d
 DOBJ	  =	$(DSRC:%.d=%.o)
--- a/usr/src/cmd/fs.d/smbclnt/smbiod/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/fs.d/smbclnt/smbiod/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -44,7 +44,7 @@
 
 # This is a multi-thread program but Nevada
 # no longer needs -lthread
-LDLIBS += -lsmbfs -ldoor
+LDLIBS += -lsmbfs
 
 CPPFLAGS += -I$(SRC)/lib/libsmbfs \
 	-I$(SRC)/uts/common/smbclnt -I$(SRC)/uts/common
--- a/usr/src/cmd/iscsi/iscsitgtd/Makefile.com	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/iscsi/iscsitgtd/Makefile.com	Mon Aug 17 09:14:35 2009 -0400
@@ -56,7 +56,7 @@
 
 all: $(PROG)
 
-LDLIBS	+= -lumem -luuid -lsocket -lnsl -ldoor -lavl -lmd5 -ladm -lefi
+LDLIBS	+= -lumem -luuid -lsocket -lnsl -lavl -lmd5 -ladm -lefi
 LDLIBS  += -liscsitgt -lzfs -ldlpi -lscf -lsasl
 XMLLIB   = -lxml2
 
--- a/usr/src/cmd/iscsid/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/iscsid/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -40,7 +40,7 @@
 include ../Makefile.cmd
 
 CCVERBOSE	=
-LDLIBS += -ldoor -lnsl
+LDLIBS += -lnsl
 
 ROOTMANIFESTDIR=	$(ROOTSVCNETWORKISCSI)
 $(ROOTSVCNETWORKISCSI)/iscsi-initiator.xml := FILEMODE = 0444
--- a/usr/src/cmd/isns/isnsadm/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/isns/isnsadm/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -19,7 +19,7 @@
 # CDDL HEADER END
 
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 
@@ -29,7 +29,7 @@
 
 include ../../Makefile.cmd
 
-LDLIBS += -lxml2 -ldoor -lscf -lc
+LDLIBS += -lxml2 -lscf -lc
 CFLAGS += $(CCVERBOSE)
 CPPFLAGS += -I. -I/usr/include/libxml2 -I../isnsd
 POFILE = isnsadm_all.po
--- a/usr/src/cmd/isns/isnsd/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/isns/isnsd/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -58,7 +58,7 @@
 obj.o := CERRWARN += -erroff=E_CONST_OBJ_SHOULD_HAVE_INITIZR
 obj.o := CERRWARN += -erroff=E_IMPLICIT_DECL_FUNC_RETURN_INT
 
-LDLIBS		+= -lsecdb -lsocket -lnsl -lscf -lxml2 -ldoor
+LDLIBS		+= -lsecdb -lsocket -lnsl -lscf -lxml2
 
 .KEEP_STATE:
 
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Mon Aug 17 09:14:35 2009 -0400
@@ -241,7 +241,7 @@
 objset_name(uintptr_t addr, char *buf)
 {
 	static int gotid;
-	static mdb_ctf_id_t osi_id, ds_id;
+	static mdb_ctf_id_t os_id, ds_id;
 	uintptr_t os_dsl_dataset;
 	char ds_snapname[MAXNAMELEN];
 	uintptr_t ds_dir;
@@ -249,9 +249,9 @@
 	buf[0] = '\0';
 
 	if (!gotid) {
-		if (mdb_ctf_lookup_by_name("struct objset_impl",
-		    &osi_id) == -1) {
-			mdb_warn("couldn't find struct objset_impl");
+		if (mdb_ctf_lookup_by_name("struct objset",
+		    &os_id) == -1) {
+			mdb_warn("couldn't find struct objset");
 			return (DCMD_ERR);
 		}
 		if (mdb_ctf_lookup_by_name("struct dsl_dataset",
@@ -263,7 +263,7 @@
 		gotid = TRUE;
 	}
 
-	if (GETMEMBID(addr, &osi_id, os_dsl_dataset, os_dsl_dataset))
+	if (GETMEMBID(addr, &os_id, os_dsl_dataset, os_dsl_dataset))
 		return (DCMD_ERR);
 
 	if (os_dsl_dataset == 0) {
@@ -2158,7 +2158,7 @@
 	{ "dbuf", ":", "print dmu_buf_impl_t", dbuf },
 	{ "dbuf_stats", ":", "dbuf stats", dbuf_stats },
 	{ "dbufs",
-	    "\t[-O objset_impl_t*] [-n objset_name | \"mos\"] "
+	    "\t[-O objset_t*] [-n objset_name | \"mos\"] "
 	    "[-o object | \"mdn\"] \n"
 	    "\t[-l level] [-b blkid | \"bonus\"]",
 	    "find dmu_buf_impl_t's that match specified criteria", dbufs },
--- a/usr/src/cmd/mv/mv.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/mv/mv.c	Mon Aug 17 09:14:35 2009 -0400
@@ -66,6 +66,7 @@
 #define	ISDEV(A)	((A.st_mode & S_IFMT) == S_IFCHR || \
 			(A.st_mode & S_IFMT) == S_IFBLK || \
 			(A.st_mode & S_IFMT) == S_IFIFO)
+#define	ISSOCK(A)	((A.st_mode & S_IFMT) == S_IFSOCK)
 
 #define	BLKSIZE	4096
 #define	PATHSIZE 1024
@@ -477,7 +478,7 @@
 
 	/*
 	 * Make sure source file is not a directory,
-	 * we can't link directories...
+	 * we cannot link directories...
 	 */
 
 	if (ISDIR(s1)) {
@@ -643,15 +644,24 @@
 			return (n);
 		}
 
-		/* doors can't be moved across filesystems */
+		/* doors cannot be moved across filesystems */
 		if (ISDOOR(s1)) {
 			(void) fprintf(stderr,
-			    gettext("%s: %s: can't move door "
+			    gettext("%s: %s: cannot move door "
 			    "across file systems\n"), cmd, source);
 			return (1);
 		}
+
+		/* sockets cannot be moved across filesystems */
+		if (ISSOCK(s1)) {
+			(void) fprintf(stderr,
+			    gettext("%s: %s: cannot move socket "
+			    "across file systems\n"), cmd, source);
+			return (1);
+		}
+
 		/*
-		 * File can't be renamed, try to recreate the symbolic
+		 * File cannot be renamed, try to recreate the symbolic
 		 * link or special device, or copy the file wholesale
 		 * between file systems.
 		 */
@@ -1212,7 +1222,7 @@
 	if (realpath(path, rpath) == NULL) {
 		int	errno_save = errno;
 		(void) fprintf(stderr, gettext(
-		    "%s: can't resolve path %s: "), cmd, path);
+		    "%s: cannot resolve path %s: "), cmd, path);
 		errno = errno_save;
 		perror("");
 		return (0);
--- a/usr/src/cmd/prctl/prctl.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/prctl/prctl.c	Mon Aug 17 09:14:35 2009 -0400
@@ -77,6 +77,7 @@
 
 typedef struct prctl_list {
 	char *name;
+	rctl_qty_t *usage;
 	prctl_value_t *val_list;
 	struct prctl_list *next;
 } prctl_list_t;
@@ -1220,6 +1221,34 @@
 		rblk1 = rblk2;
 		rblk2 = rblk_tmp;
 	}
+
+	/*
+	 * Get the current usage for the resource control if it matched the
+	 * privilege and entity type criteria.
+	 */
+	if (list != NULL) {
+		if (pr_getrctl(Pr, rctlname, NULL, rblk2, RCTL_USAGE) == 0) {
+			list->usage = (rctl_qty_t *)malloc(sizeof (rctl_qty_t));
+			if (list->usage == NULL) {
+				preserve_error(gettext("malloc failed: %s"),
+				    strerror(errno));
+				free(rblk1);
+				free(rblk2);
+				return (1);
+			}
+			*list->usage = rctlblk_get_value(rblk2);
+		} else {
+			list->usage = NULL;
+			if (errno != ENOTSUP) {
+				preserve_error(gettext("failed to get "
+				    "resource control usage for %s: %s"),
+				    rctlname, strerror(errno));
+				free(rblk1);
+				free(rblk2);
+				return (1);
+			}
+		}
+	}
 	free(rblk1);
 	free(rblk2);
 	return (0);
@@ -1323,6 +1352,7 @@
 			old_val = new_val;
 		}
 		free(old_list->name);
+		free(old_list->usage);
 		new_list = old_list->next;
 		free(old_list);
 		old_list = new_list;
@@ -1432,6 +1462,23 @@
 			unit = SCALED_UNIT_NONE;
 			scale = scale_metric;
 		}
+
+		/* print the current usage for the rctl if available */
+		if (iter_list->usage != NULL) {
+			rblk_value = *(iter_list->usage);
+			if (!arg_parseable_mode) {
+				(void) uint64toscaled(rblk_value, 4, "E",
+				    rctl_valuestring, NULL, NULL,
+				    scale, NULL, 0);
+
+				(void) fprintf(stdout, "%8s%-16s%5s%-4s\n",
+				    "", "usage", rctl_valuestring, unit);
+			} else {
+				(void) fprintf(stdout, "%s %s %llu - - -\n",
+				    iter_list->name, "usage", rblk_value);
+			}
+		}
+
 		/* iterate over an print all control values */
 		while (iter_val != NULL) {
 
--- a/usr/src/cmd/ssh/include/authfd.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/ssh/include/authfd.h	Mon Aug 17 09:14:35 2009 -0400
@@ -3,8 +3,6 @@
 #ifndef	_AUTHFD_H
 #define	_AUTHFD_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -58,6 +56,7 @@
 #define SSH2_AGENTC_ADD_ID_CONSTRAINED		25
 
 #define	SSH_AGENT_CONSTRAIN_LIFETIME		1
+#define	SSH_AGENT_CONSTRAIN_CONFIRM		2
 
 /* extended failure messages */
 #define SSH2_AGENT_FAILURE			30
--- a/usr/src/cmd/ssh/include/readpass.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/ssh/include/readpass.h	Mon Aug 17 09:14:35 2009 -0400
@@ -3,8 +3,6 @@
 #ifndef	_READPASS_H
 #define	_READPASS_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -25,8 +23,12 @@
 #define RP_ECHO			0x0001
 #define RP_ALLOW_STDIN		0x0002
 #define RP_ALLOW_EOF		0x0004
+#define RP_USE_ASKPASS          0x0008
 
 char	*read_passphrase(const char *, int);
+int	ask_permission(const char *, ...)
+    __attribute__((format(printf, 1, 2)));
+int	read_keyfile_line(FILE *, const char *, char *, size_t, u_long *);
 
 #ifdef __cplusplus
 }
--- a/usr/src/cmd/ssh/include/ssh.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/ssh/include/ssh.h	Mon Aug 17 09:14:35 2009 -0400
@@ -3,8 +3,6 @@
 #ifndef	_SSH_H
 #define	_SSH_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -22,7 +20,7 @@
  * called by a name other than "ssh" or "Secure Shell".
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -118,6 +116,9 @@
 /* Minimum modulus size (n) for RSA keys. */
 #define SSH_RSA_MINIMUM_MODULUS_SIZE	768
 
+/* Listen backlog for sshd, ssh-agent and forwarding sockets */
+#define SSH_LISTEN_BACKLOG		128
+
 /*
  * Do not display banner when in remote command mode only. Note that RFC 4254
  * uses "exec" as a mode name for the channel opened for the execution of the
--- a/usr/src/cmd/ssh/libssh/common/readpass.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/ssh/libssh/common/readpass.c	Mon Aug 17 09:14:35 2009 -0400
@@ -1,3 +1,8 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
 /*
  * Copyright (c) 2001 Markus Friedl.  All rights reserved.
  *
@@ -25,13 +30,12 @@
 #include "includes.h"
 RCSID("$OpenBSD: readpass.c,v 1.27 2002/03/26 15:58:46 markus Exp $");
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include "xmalloc.h"
 #include "readpass.h"
 #include "pathnames.h"
 #include "log.h"
 #include "ssh.h"
+#include <langinfo.h>
 
 static char *
 ssh_askpass(char *askpass, const char *msg)
@@ -130,3 +134,30 @@
 	memset(buf, 'x', sizeof buf);
 	return ret;
 }
+
+int
+ask_permission(const char *fmt, ...)
+{
+	va_list args;
+	char *p, prompt[1024];
+	int allowed = 0;
+	char *yeschar = nl_langinfo(YESSTR);
+
+	va_start(args, fmt);
+	vsnprintf(prompt, sizeof(prompt), fmt, args);
+	va_end(args);
+
+	p = read_passphrase(prompt, RP_USE_ASKPASS|RP_ALLOW_EOF);
+	if (p != NULL) {
+		/*
+		 * Accept empty responses and responses consisting
+		 * of the word "yes" as affirmative.
+		 */
+		if (*p == '\0' || *p == '\n' ||
+		    strncasecmp(p, yeschar, 1) == 0)
+			allowed = 1;
+		xfree(p);
+	}
+
+	return (allowed);
+}
--- a/usr/src/cmd/ssh/ssh-agent/ssh-agent.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/ssh/ssh-agent/ssh-agent.c	Mon Aug 17 09:14:35 2009 -0400
@@ -40,7 +40,7 @@
 
 #include "includes.h"
 #include "sys-queue.h"
-RCSID("$OpenBSD: ssh-agent.c,v 1.105 2002/10/01 20:34:12 markus Exp $");
+RCSID("$OpenBSD: ssh-agent.c,v 1.159 2008/06/28 14:05:15 djm Exp $");
 
 #ifdef HAVE_SOLARIS_PRIVILEGE
 #include <priv.h>
@@ -54,11 +54,12 @@
 #include "buffer.h"
 #include "bufaux.h"
 #include "xmalloc.h"
-#include "getput.h"
 #include "key.h"
 #include "authfd.h"
 #include "compat.h"
 #include "log.h"
+#include "readpass.h"
+#include "misc.h"
 
 typedef enum {
 	AUTH_UNUSED,
@@ -82,6 +83,7 @@
 	Key *key;
 	char *comment;
 	u_int death;
+	u_int confirm;
 } Identity;
 
 typedef struct {
@@ -96,10 +98,11 @@
 
 /* pid of shell == parent of agent */
 pid_t parent_pid = -1;
+u_int parent_alive_interval = 0;
 
 /* pathname and directory for AUTH_SOCKET */
-char socket_name[1024];
-char socket_dir[1024];
+char socket_name[MAXPATHLEN];
+char socket_dir[MAXPATHLEN];
 
 /* locking */
 int locked = 0;
@@ -111,6 +114,9 @@
 char *__progname;
 #endif
 
+/* Default lifetime (0 == forever) */
+static int lifetime = 0;
+
 static void
 close_socket(SocketEntry *e)
 {
@@ -164,6 +170,23 @@
 	return (NULL);
 }
 
+/* Check confirmation of keysign request */
+static int
+confirm_key(Identity *id)
+{
+	char *p;
+	int ret = -1;
+
+	p = key_fingerprint(id->key, SSH_FP_MD5, SSH_FP_HEX);
+	if (ask_permission(
+	    gettext("Allow use of key %s?\nKey fingerprint %s."),
+	    id->comment, p))
+		ret = 0;
+	xfree(p);
+
+	return (ret);
+}
+
 /* send list of supported public keys to 'client' */
 static void
 process_request_identities(SocketEntry *e, int version)
@@ -227,7 +250,7 @@
 		goto failure;
 
 	id = lookup_identity(key, 1);
-	if (id != NULL) {
+	if (id != NULL && (!id->confirm || confirm_key(id) == 0)) {
 		Key *private = id->key;
 		/* Decrypt the challenge using the private key. */
 		if (rsa_private_decrypt(challenge, challenge, private->rsa) <= 0)
@@ -270,6 +293,8 @@
 {
 	u_char *blob, *data, *signature = NULL;
 	u_int blen, dlen, slen = 0;
+	extern uint32_t datafellows;
+	int odatafellows;
 	int ok = -1, flags;
 	Buffer msg;
 	Key *key;
@@ -280,16 +305,17 @@
 	data = buffer_get_string(&e->request, &dlen);
 
 	flags = buffer_get_int(&e->request);
+	odatafellows = datafellows;
 	if (flags & SSH_AGENT_OLD_SIGNATURE)
 		datafellows = SSH_BUG_SIGBLOB;
 
 	key = key_from_blob(blob, blen);
 	if (key != NULL) {
 		Identity *id = lookup_identity(key, 2);
-		if (id != NULL)
+		if (id != NULL && (!id->confirm || confirm_key(id) == 0))
 			ok = key_sign(id->key, &signature, &slen, data, dlen);
+		key_free(key);
 	}
-	key_free(key);
 	buffer_init(&msg);
 	if (ok == 0) {
 		buffer_put_char(&msg, SSH2_AGENT_SIGN_RESPONSE);
@@ -305,6 +331,7 @@
 	xfree(blob);
 	if (signature != NULL)
 		xfree(signature);
+	datafellows = odatafellows;
 }
 
 /* shared */
@@ -338,7 +365,7 @@
 		if (id != NULL) {
 			/*
 			 * We have this key.  Free the old key.  Since we
-			 * don\'t want to leave empty slots in the middle of
+			 * don't want to leave empty slots in the middle of
 			 * the array, we actually free the key there and move
 			 * all the entries between the empty slot and the end
 			 * of the array.
@@ -381,10 +408,11 @@
 	buffer_put_char(&e->output, SSH_AGENT_SUCCESS);
 }
 
-static void
+/* removes expired keys and returns number of seconds until the next expiry */
+static u_int
 reaper(void)
 {
-	u_int now = time(NULL);
+	u_int deadline = 0, now = time(NULL);
 	Identity *id, *nxt;
 	int version;
 	Idtab *tab;
@@ -393,20 +421,30 @@
 		tab = idtab_lookup(version);
 		for (id = TAILQ_FIRST(&tab->idlist); id; id = nxt) {
 			nxt = TAILQ_NEXT(id, next);
-			if (id->death != 0 && now >= id->death) {
+			if (id->death == 0)
+				continue;
+			if (now >= id->death) {
+				debug("expiring key '%s'", id->comment);
 				TAILQ_REMOVE(&tab->idlist, id, next);
 				free_identity(id);
 				tab->nentries--;
-			}
+			} else
+				deadline = (deadline == 0) ? id->death :
+				    MIN(deadline, id->death);
 		}
 	}
+	if (deadline == 0 || deadline <= now)
+		return 0;
+	else
+		return (deadline - now);
 }
 
 static void
 process_add_identity(SocketEntry *e, int version)
 {
 	Idtab *tab = idtab_lookup(version);
-	int type, success = 0, death = 0;
+	Identity *id;
+	int type, success = 0, death = 0, confirm = 0;
 	char *type_name, *comment;
 	Key *k = NULL;
 
@@ -457,33 +495,54 @@
 		}
 		break;
 	}
+	/* enable blinding */
+	switch (k->type) {
+	case KEY_RSA:
+	case KEY_RSA1:
+		if (RSA_blinding_on(k->rsa, NULL) != 1) {
+			error("process_add_identity: RSA_blinding_on failed");
+			key_free(k);
+			goto send;
+		}
+		break;
+	}
 	comment = buffer_get_string(&e->request, NULL);
 	if (k == NULL) {
 		xfree(comment);
 		goto send;
 	}
-	success = 1;
 	while (buffer_len(&e->request)) {
-		switch (buffer_get_char(&e->request)) {
+		switch ((type = buffer_get_char(&e->request))) {
 		case SSH_AGENT_CONSTRAIN_LIFETIME:
 			death = time(NULL) + buffer_get_int(&e->request);
 			break;
+		case SSH_AGENT_CONSTRAIN_CONFIRM:
+			confirm = 1;
+			break;
 		default:
-			break;
+			error("process_add_identity: "
+			    "Unknown constraint type %d", type);
+			xfree(comment);
+			key_free(k);
+			goto send;
 		}
 	}
-	if (lookup_identity(k, version) == NULL) {
-		Identity *id = xmalloc(sizeof(Identity));
+	success = 1;
+	if (lifetime && !death)
+		death = time(NULL) + lifetime;
+	if ((id = lookup_identity(k, version)) == NULL) {
+		id = xmalloc(sizeof(Identity));
 		id->key = k;
-		id->comment = comment;
-		id->death = death;
 		TAILQ_INSERT_TAIL(&tab->idlist, id, next);
 		/* Increment the number of identities. */
 		tab->nentries++;
 	} else {
 		key_free(k);
-		xfree(comment);
+		xfree(id->comment);
 	}
+	id->comment = comment;
+	id->death = death;
+	id->confirm = confirm;
 send:
 	buffer_put_int(&e->output, 1);
 	buffer_put_char(&e->output,
@@ -540,13 +599,10 @@
 	u_int msg_len, type;
 	u_char *cp;
 
-	/* kill dead keys */
-	reaper();
-
 	if (buffer_len(&e->input) < 5)
 		return;		/* Incomplete message. */
 	cp = buffer_ptr(&e->input);
-	msg_len = GET_32BIT(cp);
+	msg_len = get_u32(cp);
 	if (msg_len > 256 * 1024) {
 		close_socket(e);
 		return;
@@ -631,10 +687,9 @@
 static void
 new_socket(sock_type type, int fd)
 {
-	u_int i, old_alloc;
+	u_int i, old_alloc, new_alloc;
 
-	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
-		error("fcntl O_NONBLOCK: %s", strerror(errno));
+	set_nonblock(fd);
 
 	if (fd > max_fd)
 		max_fd = fd;
@@ -642,32 +697,32 @@
 	for (i = 0; i < sockets_alloc; i++)
 		if (sockets[i].type == AUTH_UNUSED) {
 			sockets[i].fd = fd;
-			sockets[i].type = type;
 			buffer_init(&sockets[i].input);
 			buffer_init(&sockets[i].output);
 			buffer_init(&sockets[i].request);
+			sockets[i].type = type;
 			return;
 		}
 	old_alloc = sockets_alloc;
-	sockets_alloc += 10;
-	if (sockets)
-		sockets = xrealloc(sockets, sockets_alloc * sizeof(sockets[0]));
-	else
-		sockets = xmalloc(sockets_alloc * sizeof(sockets[0]));
-	for (i = old_alloc; i < sockets_alloc; i++)
+	new_alloc = sockets_alloc + 10;
+	sockets = xrealloc(sockets, new_alloc * sizeof(sockets[0]));
+	for (i = old_alloc; i < new_alloc; i++)
 		sockets[i].type = AUTH_UNUSED;
-	sockets[old_alloc].type = type;
+	sockets_alloc = new_alloc;
 	sockets[old_alloc].fd = fd;
 	buffer_init(&sockets[old_alloc].input);
 	buffer_init(&sockets[old_alloc].output);
 	buffer_init(&sockets[old_alloc].request);
+	sockets[old_alloc].type = type;
 }
 
 static int
-prepare_select(fd_set **fdrp, fd_set **fdwp, int *fdl, int *nallocp)
+prepare_select(fd_set **fdrp, fd_set **fdwp, int *fdl, u_int *nallocp,
+    struct timeval **tvpp)
 {
-	u_int i, sz;
+	u_int i, sz, deadline;
 	int n = 0;
+	static struct timeval tv;
 
 	for (i = 0; i < sockets_alloc; i++) {
 		switch (sockets[i].type) {
@@ -711,6 +766,17 @@
 			break;
 		}
 	}
+	deadline = reaper();
+	if (parent_alive_interval != 0)
+		deadline = (deadline == 0) ? parent_alive_interval :
+		    MIN(deadline, parent_alive_interval);
+	if (deadline == 0) {
+		*tvpp = NULL;
+	} else {
+		tv.tv_sec = deadline;
+		tv.tv_usec = 0;
+		*tvpp = &tv;
+	}
 	return (1);
 }
 
@@ -733,7 +799,7 @@
 			if (FD_ISSET(sockets[i].fd, readset)) {
 				slen = sizeof(sunaddr);
 				sock = accept(sockets[i].fd,
-				    (struct sockaddr *) &sunaddr, &slen);
+				    (struct sockaddr *)&sunaddr, &slen);
 				if (sock < 0) {
 					error("accept from AUTH_SOCKET: %s",
 					    strerror(errno));
@@ -763,7 +829,8 @@
 					    buffer_ptr(&sockets[i].output),
 					    buffer_len(&sockets[i].output));
 					if (len == -1 && (errno == EAGAIN ||
-					    errno == EINTR))
+					    errno == EINTR ||
+					    errno == EWOULDBLOCK))
 						continue;
 					break;
 				} while (1);
@@ -777,7 +844,8 @@
 				do {
 					len = read(sockets[i].fd, buf, sizeof(buf));
 					if (len == -1 && (errno == EAGAIN ||
-					    errno == EINTR))
+					    errno == EINTR ||
+					    errno == EWOULDBLOCK))
 						continue;
 					break;
 				} while (1);
@@ -795,7 +863,7 @@
 }
 
 static void
-cleanup_socket(void *p)
+cleanup_socket(void)
 {
 	if (socket_name[0])
 		unlink(socket_name);
@@ -803,32 +871,29 @@
 		rmdir(socket_dir);
 }
 
-static void
+void
 cleanup_exit(int i)
 {
-	cleanup_socket(NULL);
-	exit(i);
+	cleanup_socket();
+	_exit(i);
 }
 
+/*ARGSUSED*/
 static void
 cleanup_handler(int sig)
 {
-	cleanup_socket(NULL);
+	cleanup_socket();
 	_exit(2);
 }
 
 static void
-check_parent_exists(int sig)
+check_parent_exists(void)
 {
-	int save_errno = errno;
-
-	if (parent_pid != -1 && getppid() != parent_pid) {
+	if (parent_pid != -1 && kill(parent_pid, 0) < 0) {
 		/* printf("Parent has died - Authentication agent exiting.\n"); */
-		cleanup_handler(sig); /* safe */
+		cleanup_socket();
+		_exit(2);
 	}
-	signal(SIGALRM, check_parent_exists);
-	alarm(10);
-	errno = save_errno;
 }
 
 static void
@@ -841,7 +906,8 @@
 		    "  -s          Generate Bourne shell commands on stdout.\n"
 		    "  -k          Kill the current agent.\n"
 		    "  -d          Debug mode.\n"
-		    "  -a socket   Bind agent socket to given name.\n"),
+		    "  -a socket   Bind agent socket to given name.\n"
+		    "  -t life     Default identity lifetime (seconds).\n"),
 		__progname);
 	exit(1);
 }
@@ -849,7 +915,9 @@
 int
 main(int ac, char **av)
 {
-	int sock, c_flag = 0, d_flag = 0, k_flag = 0, s_flag = 0, ch, nalloc;
+	int c_flag = 0, d_flag = 0, k_flag = 0, s_flag = 0;
+	int sock, fd, ch, result, saved_errno;
+	u_int nalloc;
 	char *shell, *pidstr, *agentsocket = NULL;
 	const char *format;
 	fd_set *readsetp = NULL, *writesetp = NULL;
@@ -857,30 +925,30 @@
 #ifdef HAVE_SETRLIMIT
 	struct rlimit rlim;
 #endif
-#ifdef HAVE_CYGWIN
 	int prev_mask;
-#endif
 	extern int optind;
 	extern char *optarg;
 	pid_t pid;
 	char pidstrbuf[1 + 3 * sizeof pid];
+	struct timeval *tvp = NULL;
 #ifdef HAVE_SOLARIS_PRIVILEGE
 	priv_set_t *myprivs;
 #endif /* HAVE_SOLARIS_PRIVILEGE */
 
+	/* Ensure that fds 0, 1 and 2 are open or directed to /dev/null */
+	sanitise_stdfd();
+
 	/* drop */
 	setegid(getgid());
 	setgid(getgid());
 
-	(void) g11n_setlocale(LC_ALL, "");
-
 	SSLeay_add_all_algorithms();
 
 	__progname = get_progname(av[0]);
 	init_rng();
 	seed_rng();
 
-	while ((ch = getopt(ac, av, "cdksa:")) != -1) {
+	while ((ch = getopt(ac, av, "cdksa:t:")) != -1) {
 		switch (ch) {
 		case 'c':
 			if (s_flag)
@@ -903,6 +971,12 @@
 		case 'a':
 			agentsocket = optarg;
 			break;
+		case 't':
+			if ((lifetime = convtime(optarg)) == -1) {
+				fprintf(stderr, gettext("Invalid lifetime\n"));
+				usage();
+			}
+			break;
 		default:
 			usage();
 		}
@@ -915,7 +989,8 @@
 
 	if (ac == 0 && !c_flag && !s_flag) {
 		shell = getenv("SHELL");
-		if (shell != NULL && strncmp(shell + strlen(shell) - 3, "csh", 3) == 0)
+		if (shell != NULL &&
+		    strncmp(shell + strlen(shell) - 3, "csh", 3) == 0)
 			c_flag = 1;
 	}
 	if (k_flag) {
@@ -929,8 +1004,8 @@
 		pid = atoi(pidstr);
 		if (pid < 1) {
 			fprintf(stderr,
-			    gettext("%s=\")%s\", which is not a good PID\n"),
-			    SSH_AGENTPID_ENV_NAME, pidstr);
+				gettext("%s not set, cannot kill agent\n"),
+				SSH_AGENTPID_ENV_NAME);
 			exit(1);
 		}
 		if (kill(pid, SIGTERM) == -1) {
@@ -948,7 +1023,7 @@
 
 	if (agentsocket == NULL) {
 		/* Create private directory for agent socket */
-		strlcpy(socket_dir, "/tmp/ssh-XXXXXXXX", sizeof socket_dir);
+		strlcpy(socket_dir, "/tmp/ssh-XXXXXXXXXX", sizeof socket_dir);
 		if (mkdtemp(socket_dir) == NULL) {
 			perror("mkdtemp: private socket dir");
 			exit(1);
@@ -968,25 +1043,21 @@
 	sock = socket(AF_UNIX, SOCK_STREAM, 0);
 	if (sock < 0) {
 		perror("socket");
+		*socket_name = '\0'; /* Don't unlink any existing file */
 		cleanup_exit(1);
 	}
 	memset(&sunaddr, 0, sizeof(sunaddr));
 	sunaddr.sun_family = AF_UNIX;
 	strlcpy(sunaddr.sun_path, socket_name, sizeof(sunaddr.sun_path));
-#ifdef HAVE_CYGWIN
 	prev_mask = umask(0177);
-#endif
-	if (bind(sock, (struct sockaddr *) & sunaddr, sizeof(sunaddr)) < 0) {
+	if (bind(sock, (struct sockaddr *) &sunaddr, sizeof(sunaddr)) < 0) {
 		perror("bind");
-#ifdef HAVE_CYGWIN
+		*socket_name = '\0'; /* Don't unlink any existing file */
 		umask(prev_mask);
-#endif
 		cleanup_exit(1);
 	}
-#ifdef HAVE_CYGWIN
 	umask(prev_mask);
-#endif
-	if (listen(sock, 128) < 0) {
+	if (listen(sock, SSH_LISTEN_BACKLOG) < 0) {
 		perror("listen");
 		cleanup_exit(1);
 	}
@@ -1019,7 +1090,7 @@
 			printf(format, SSH_AGENTPID_ENV_NAME, pidstrbuf,
 			    SSH_AGENTPID_ENV_NAME);
 			printf("echo ");
-			printf(gettext("Agent pid %ld;\n"), (long)pid);
+                        printf(gettext("Agent pid %ld;\n"), (long)pid);
 			exit(0);
 		}
 		if (setenv(SSH_AUTHSOCKET_ENV_NAME, socket_name, 1) == -1 ||
@@ -1062,9 +1133,14 @@
 	}
 
 	(void)chdir("/");
-	close(0);
-	close(1);
-	close(2);
+	if ((fd = open(_PATH_DEVNULL, O_RDWR, 0)) != -1) {
+		/* XXX might close listen socket */
+		(void)dup2(fd, STDIN_FILENO);
+		(void)dup2(fd, STDOUT_FILENO);
+		(void)dup2(fd, STDERR_FILENO);
+		if (fd > 2)
+			close(fd);
+	}
 
 #ifdef HAVE_SETRLIMIT
 	/* deny core dumps, since memory contains unencrypted private keys */
@@ -1076,12 +1152,9 @@
 #endif
 
 skip:
-	fatal_add_cleanup(cleanup_socket, NULL);
 	new_socket(AUTH_SOCKET, sock);
-	if (ac > 0) {
-		signal(SIGALRM, check_parent_exists);
-		alarm(10);
-	}
+	if (ac > 0)
+		parent_alive_interval = 10;
 	idtab_init();
 	if (!d_flag)
 		signal(SIGINT, SIG_IGN);
@@ -1091,13 +1164,18 @@
 	nalloc = 0;
 
 	while (1) {
-		prepare_select(&readsetp, &writesetp, &max_fd, &nalloc);
-		if (select(max_fd + 1, readsetp, writesetp, NULL, NULL) < 0) {
-			if (errno == EINTR)
+		prepare_select(&readsetp, &writesetp, &max_fd, &nalloc, &tvp);
+		result = select(max_fd + 1, readsetp, writesetp, NULL, tvp);
+		saved_errno = errno;
+		if (parent_alive_interval != 0)
+			check_parent_exists();
+		(void) reaper();	/* remove expired keys */
+		if (result < 0) {
+			if (saved_errno == EINTR)
 				continue;
-			fatal("select: %s", strerror(errno));
-		}
-		after_select(readsetp, writesetp);
+			fatal("select: %s", strerror(saved_errno));
+		} else if (result > 0)
+			after_select(readsetp, writesetp);
 	}
 	/* NOTREACHED */
 	return (0);	/* keep lint happy */
--- a/usr/src/cmd/svc/configd/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/svc/configd/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -71,8 +71,7 @@
 $(NATIVE_BUILD)CPPFLAGS = $(MYCPPFLAGS) -I$(LIBUUTIL)/common -I$(LIBSCF)/inc
 $(NATIVE_BUILD)CPPFLAGS += -DNATIVE_BUILD
 $(NATIVE_BUILD)LDFLAGS =
-$(NATIVE_BUILD)LDLIBS = -L$(LIBUUTIL)/native -R $(LIBUUTIL)/native \
-	$(MYLDLIBS) -ldoor
+$(NATIVE_BUILD)LDLIBS = -L$(LIBUUTIL)/native -R $(LIBUUTIL)/native $(MYLDLIBS)
 
 DIRMODE = 0755
 FILEMODE = 0555
--- a/usr/src/cmd/svc/startd/graph.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/svc/startd/graph.c	Mon Aug 17 09:14:35 2009 -0400
@@ -6076,9 +6076,9 @@
 	vertex = vertex_get_by_name(inst_name);
 	if (vertex == NULL) {
 		MUTEX_UNLOCK(&dgraph_lock);
-		startd_free(inst_name, max_scf_fmri_size);
 		log_framework(LOG_DEBUG, "%s: Can't find graph vertex. "
 		    "The instance must have been removed.\n", inst_name);
+		startd_free(inst_name, max_scf_fmri_size);
 		return (0);
 	}
 
--- a/usr/src/cmd/svc/svccfg/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/svc/svccfg/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -20,7 +20,7 @@
 #
 
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 
@@ -105,7 +105,7 @@
 $(NATIVE_BUILD)LDLIBS = \
 	-L$(LIBUUTIL)/native -R $(LIBUUTIL)/native \
 	-L$(LIBSCF)/native -R $(LIBSCF)/native \
-	$(SVCCFG_EXTRA_LIBS) -ldoor
+	$(SVCCFG_EXTRA_LIBS)
 
 svccfg_lex.o svccfg_grammar.o := CCVERBOSE =
 
--- a/usr/src/cmd/vt/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/vt/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -19,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 
--- a/usr/src/cmd/wusbadm/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/wusbadm/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -39,7 +39,7 @@
 
 
 CPPFLAGS += -I$(SRC)/uts/common
-LDLIBS += -lpkcs11 -lkmf -ldoor -lsysevent -lnvpair -lsecdb
+LDLIBS += -lpkcs11 -lkmf -lsysevent -lnvpair -lsecdb
 
 .KEEP_STATE:
 
--- a/usr/src/cmd/zdb/zdb.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/zdb/zdb.c	Mon Aug 17 09:14:35 2009 -0400
@@ -724,11 +724,11 @@
 
 	(void) printf("Indirect blocks:\n");
 
-	SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os),
+	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
 	    dn->dn_object, dnp->dn_nlevels - 1, 0);
 	for (j = 0; j < dnp->dn_nblkptr; j++) {
 		czb.zb_blkid = j;
-		(void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp,
+		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
 		    &dnp->dn_blkptr[j], &czb);
 	}
 
@@ -1073,7 +1073,7 @@
 	}
 
 	if (object == 0) {
-		dn = os->os->os_meta_dnode;
+		dn = os->os_meta_dnode;
 	} else {
 		error = dmu_bonus_hold(os, object, FTAG, &db);
 		if (error)
@@ -1191,21 +1191,21 @@
 
 	if (dds.dds_type == DMU_OST_META) {
 		dds.dds_creation_txg = TXG_INITIAL;
-		usedobjs = os->os->os_rootbp->blk_fill;
-		refdbytes = os->os->os_spa->spa_dsl_pool->
+		usedobjs = os->os_rootbp->blk_fill;
+		refdbytes = os->os_spa->spa_dsl_pool->
 		    dp_mos_dir->dd_phys->dd_used_bytes;
 	} else {
 		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
 	}
 
-	ASSERT3U(usedobjs, ==, os->os->os_rootbp->blk_fill);
+	ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
 
 	nicenum(refdbytes, numbuf);
 
 	if (verbosity >= 4) {
 		(void) sprintf(blkbuf + strlen(blkbuf), ", rootbp ");
 		(void) sprintf_blkptr(blkbuf + strlen(blkbuf),
-		    BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp);
+		    BP_SPRINTF_LEN - strlen(blkbuf), os->os_rootbp);
 	} else {
 		blkbuf[0] = '\0';
 	}
@@ -1227,7 +1227,7 @@
 	if (verbosity < 2)
 		return;
 
-	if (os->os->os_rootbp->blk_birth == 0)
+	if (os->os_rootbp->blk_birth == 0)
 		return;
 
 	if (zopt_objects != 0) {
@@ -1240,8 +1240,8 @@
 
 	dump_object(os, 0, verbosity, &print_header);
 	object_count = 0;
-	if (os->os->os_userused_dnode &&
-	    os->os->os_userused_dnode->dn_type != 0) {
+	if (os->os_userused_dnode &&
+	    os->os_userused_dnode->dn_type != 0) {
 		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
 		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
 	}
@@ -1398,14 +1398,13 @@
 	int error;
 	objset_t *os;
 
-	error = dmu_objset_open(dsname, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &os);
+	error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
 	if (error) {
 		(void) printf("Could not open %s\n", dsname);
 		return (0);
 	}
 	dump_dir(os);
-	dmu_objset_close(os);
+	dmu_objset_disown(os, FTAG);
 	fuid_table_destroy();
 	return (0);
 }
@@ -2474,8 +2473,8 @@
 
 	if (error == 0) {
 		if (strchr(argv[0], '/') != NULL) {
-			error = dmu_objset_open(argv[0], DMU_OST_ANY,
-			    DS_MODE_USER | DS_MODE_READONLY, &os);
+			error = dmu_objset_own(argv[0], DMU_OST_ANY,
+			    B_TRUE, FTAG, &os);
 		} else {
 			error = spa_open(argv[0], &spa, FTAG);
 		}
@@ -2499,7 +2498,7 @@
 
 	if (os != NULL) {
 		dump_dir(os);
-		dmu_objset_close(os);
+		dmu_objset_disown(os, FTAG);
 	} else {
 		dump_zpool(spa);
 		spa_close(spa, FTAG);
--- a/usr/src/cmd/zinject/translate.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/zinject/translate.c	Mon Aug 17 09:14:35 2009 -0400
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -162,8 +162,8 @@
 	 */
 	sync();
 
-	if ((err = dmu_objset_open(dataset, DMU_OST_ZFS,
-	    DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
+	err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os);
+	if (err != 0) {
 		(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
 		    dataset, strerror(err));
 		return (-1);
@@ -172,7 +172,7 @@
 	record->zi_objset = dmu_objset_id(os);
 	record->zi_object = statbuf->st_ino;
 
-	dmu_objset_close(os);
+	dmu_objset_disown(os, FTAG);
 
 	return (0);
 }
@@ -247,17 +247,17 @@
 	 * Get the dnode associated with object, so we can calculate the block
 	 * size.
 	 */
-	if ((err = dmu_objset_open(dataset, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
+	if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
+	    B_TRUE, FTAG, &os)) != 0) {
 		(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
 		    dataset, strerror(err));
 		goto out;
 	}
 
 	if (record->zi_object == 0) {
-		dn = os->os->os_meta_dnode;
+		dn = os->os_meta_dnode;
 	} else {
-		err = dnode_hold(os->os, record->zi_object, FTAG, &dn);
+		err = dnode_hold(os, record->zi_object, FTAG, &dn);
 		if (err != 0) {
 			(void) fprintf(stderr, "failed to hold dnode "
 			    "for object %llu\n",
@@ -306,11 +306,11 @@
 	ret = 0;
 out:
 	if (dn) {
-		if (dn != os->os->os_meta_dnode)
+		if (dn != os->os_meta_dnode)
 			dnode_rele(dn, FTAG);
 	}
 	if (os)
-		dmu_objset_close(os);
+		dmu_objset_disown(os, FTAG);
 
 	return (ret);
 }
--- a/usr/src/cmd/ztest/ztest.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/cmd/ztest/ztest.c	Mon Aug 17 09:14:35 2009 -0400
@@ -1372,8 +1372,7 @@
 	/*
 	 * Verify that the dataset contains a directory object.
 	 */
-	error = dmu_objset_open(name, DMU_OST_OTHER,
-	    DS_MODE_USER | DS_MODE_READONLY, &os);
+	error = dmu_objset_hold(name, FTAG, &os);
 	ASSERT3U(error, ==, 0);
 	error = dmu_object_info(os, ZTEST_DIROBJ, doi);
 	if (error != ENOENT) {
@@ -1382,16 +1381,15 @@
 		ASSERT3U(doi->doi_type, ==, DMU_OT_UINT64_OTHER);
 		ASSERT3S(doi->doi_physical_blks, >=, 0);
 	}
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 
 	/*
 	 * Destroy the dataset.
 	 */
 	error = dmu_objset_destroy(name, B_FALSE);
 	if (error) {
-		(void) dmu_objset_open(name, DMU_OST_OTHER,
-		    DS_MODE_USER | DS_MODE_READONLY, &os);
-		fatal(0, "dmu_objset_destroy(os=%p) = %d\n", &os, error);
+		(void) dmu_objset_hold(name, FTAG, &os);
+		fatal(0, "dmu_objset_destroy(os=%p) = %d\n", os, error);
 	}
 	return (0);
 }
@@ -1434,7 +1432,6 @@
 	int error;
 	objset_t *os, *os2;
 	char name[100];
-	int basemode, expected_error;
 	zilog_t *zilog;
 	uint64_t seq;
 	uint64_t objects;
@@ -1443,19 +1440,15 @@
 	(void) snprintf(name, 100, "%s/%s_temp_%llu", za->za_pool, za->za_pool,
 	    (u_longlong_t)za->za_instance);
 
-	basemode = DS_MODE_TYPE(za->za_instance);
-	if (basemode != DS_MODE_USER && basemode != DS_MODE_OWNER)
-		basemode = DS_MODE_USER;
-
 	/*
 	 * If this dataset exists from a previous run, process its replay log
 	 * half of the time.  If we don't replay it, then dmu_objset_destroy()
 	 * (invoked from ztest_destroy_cb() below) should just throw it away.
 	 */
 	if (ztest_random(2) == 0 &&
-	    dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os) == 0) {
+	    dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) {
 		zil_replay(os, os, ztest_replay_vector);
-		dmu_objset_close(os);
+		dmu_objset_disown(os, FTAG);
 	}
 
 	/*
@@ -1469,7 +1462,7 @@
 	/*
 	 * Verify that the destroyed dataset is no longer in the namespace.
 	 */
-	error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
+	error = dmu_objset_hold(name, FTAG, &os);
 	if (error != ENOENT)
 		fatal(1, "dmu_objset_open(%s) found destroyed dataset %p",
 		    name, os);
@@ -1488,7 +1481,7 @@
 		fatal(0, "dmu_objset_create(%s) = %d", name, error);
 	}
 
-	error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
+	error = dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os);
 	if (error) {
 		fatal(0, "dmu_objset_open(%s) = %d", name, error);
 	}
@@ -1538,27 +1531,23 @@
 		fatal(0, "created existing dataset, error = %d", error);
 
 	/*
-	 * Verify that multiple dataset holds are allowed, but only when
-	 * the new access mode is compatible with the base mode.
+	 * Verify that we can hold an objset that is also owned.
 	 */
-	if (basemode == DS_MODE_OWNER) {
-		error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_USER,
-		    &os2);
-		if (error)
-			fatal(0, "dmu_objset_open('%s') = %d", name, error);
-		else
-			dmu_objset_close(os2);
-	}
-	error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os2);
-	expected_error = (basemode == DS_MODE_OWNER) ? EBUSY : 0;
-	if (error != expected_error)
-		fatal(0, "dmu_objset_open('%s') = %d, expected %d",
-		    name, error, expected_error);
-	if (error == 0)
-		dmu_objset_close(os2);
+	error = dmu_objset_hold(name, FTAG, &os2);
+	if (error)
+		fatal(0, "dmu_objset_open('%s') = %d", name, error);
+	dmu_objset_rele(os2, FTAG);
+
+	/*
+	 * Verify that we can not own an objset that is already owned.
+	 */
+	error = dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2);
+	if (error != EBUSY)
+		fatal(0, "dmu_objset_open('%s') = %d, expected EBUSY",
+		    name, error);
 
 	zil_close(zilog);
-	dmu_objset_close(os);
+	dmu_objset_disown(os, FTAG);
 
 	error = dmu_objset_destroy(name, B_FALSE);
 	if (error)
@@ -1670,13 +1659,12 @@
 		fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error);
 	}
 
-	error = dmu_objset_open(snap1name, DMU_OST_OTHER,
-	    DS_MODE_USER | DS_MODE_READONLY, &clone);
+	error = dmu_objset_hold(snap1name, FTAG, &clone);
 	if (error)
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error);
 
 	error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0);
-	dmu_objset_close(clone);
+	dmu_objset_rele(clone, FTAG);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_create");
@@ -1705,13 +1693,12 @@
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
 	}
 
-	error = dmu_objset_open(snap3name, DMU_OST_OTHER,
-	    DS_MODE_USER | DS_MODE_READONLY, &clone);
+	error = dmu_objset_hold(snap3name, FTAG, &clone);
 	if (error)
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
 
 	error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0);
-	dmu_objset_close(clone);
+	dmu_objset_rele(clone, FTAG);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_create");
@@ -1720,7 +1707,7 @@
 		fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
 	}
 
-	error = dsl_dataset_own(snap1name, DS_MODE_READONLY, FTAG, &ds);
+	error = dsl_dataset_own(snap1name, B_FALSE, FTAG, &ds);
 	if (error)
 		fatal(0, "dsl_dataset_own(%s) = %d", snap1name, error);
 	error = dsl_dataset_promote(clone2name);
@@ -3806,8 +3793,7 @@
 				fatal(0, "dmu_objset_create(%s) = %d",
 				    name, error);
 			}
-			error = dmu_objset_open(name, DMU_OST_OTHER,
-			    DS_MODE_USER, &za[d].za_os);
+			error = dmu_objset_hold(name, FTAG, &za[d].za_os);
 			if (error)
 				fatal(0, "dmu_objset_open('%s') = %d",
 				    name, error);
@@ -3827,7 +3813,7 @@
 		VERIFY(thr_join(za[t].za_thread, NULL, NULL) == 0);
 		if (t < zopt_datasets) {
 			zil_close(za[t].za_zilog);
-			dmu_objset_close(za[t].za_os);
+			dmu_objset_rele(za[t].za_os, FTAG);
 		}
 	}
 
--- a/usr/src/common/zfs/zfs_prop.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/common/zfs/zfs_prop.c	Mon Aug 17 09:14:35 2009 -0400
@@ -152,6 +152,12 @@
 		{ NULL }
 	};
 
+	static zprop_index_t logbias_table[] = {
+		{ "latency",	ZFS_LOGBIAS_LATENCY },
+		{ "throughput",	ZFS_LOGBIAS_THROUGHPUT },
+		{ NULL }
+	};
+
 	static zprop_index_t canmount_table[] = {
 		{ "off",	ZFS_CANMOUNT_OFF },
 		{ "on",		ZFS_CANMOUNT_ON },
@@ -196,6 +202,9 @@
 	    ZFS_CACHE_ALL, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
 	    "all | none | metadata", "SECONDARYCACHE", cache_table);
+	register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY,
+	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+	    "latency | throughput", "LOGBIAS", logbias_table);
 
 	/* inherit index (boolean) properties */
 	register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
--- a/usr/src/lib/cfgadm_plugins/sata/common/cfga_sata.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/lib/cfgadm_plugins/sata/common/cfga_sata.c	Mon Aug 17 09:14:35 2009 -0400
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -59,7 +59,7 @@
 	NULL,
 	"SATA specific commands:\n",
 	" cfgadm -c [configure|unconfigure|disconnect|connect] ap_id "
-	"[ap_id...]\n",
+	    "[ap_id...]\n",
 	" cfgadm -x sata_reset_port ap_id  [ap_id...]\n",
 	" cfgadm -x sata_reset_device ap_id [ap_id...]\n",
 	" cfgadm -x sata_reset_all ap_id\n",
@@ -114,7 +114,7 @@
 
 	/* CFGA_SATA_DEVCTL / CFGA_LIB_ERROR -> "Library error" */
 	{ CVT, CFGA_LIB_ERROR, "Internal error: "
-	    "Cannot allocate devctl handle " },
+		"Cannot allocate devctl handle " },
 
 	/*
 	 * CFGA_SATA_DEV_CONFIGURE /
@@ -226,15 +226,15 @@
 
 static cfga_sata_ret_t
 setup_for_devctl_cmd(const char *ap_id, devctl_hdl_t *devctl_hdl,
-	nvlist_t **user_nvlistp, uint_t oflag);
+    nvlist_t **user_nvlistp, uint_t oflag);
 
 static cfga_sata_ret_t
 port_state(devctl_hdl_t hdl, nvlist_t *list,
-	ap_rstate_t *rstate, ap_ostate_t *ostate);
+    ap_rstate_t *rstate, ap_ostate_t *ostate);
 
 static cfga_sata_ret_t
 do_control_ioctl(const char *ap_id, sata_cfga_apctl_t subcommand, uint_t arg,
-	void **descrp, size_t *sizep);
+    void **descrp, size_t *sizep);
 
 static void
 cleanup_after_devctl_cmd(devctl_hdl_t devctl_hdl, nvlist_t *user_nvlist);
@@ -245,6 +245,8 @@
 static int
 sata_confirm(struct cfga_confirm *confp, char *msg);
 
+static cfga_sata_ret_t
+get_port_num(const char *ap_id, uint32_t *port);
 
 /* Utilities */
 
@@ -399,8 +401,8 @@
 	}
 
 	return ((msg_tbl[msg_index].intl) ?
-		dgettext(TEXT_DOMAIN, msg_tbl[msg_index].msgstr) :
-		msg_tbl[msg_index].msgstr);
+	    dgettext(TEXT_DOMAIN, msg_tbl[msg_index].msgstr) :
+	    msg_tbl[msg_index].msgstr);
 }
 
 /*
@@ -453,10 +455,10 @@
 
 cfga_err_t
 sata_err_msg(
-	char **errstring,
-	cfga_sata_ret_t rv,
-	const char *ap_id,
-	int l_errno)
+    char **errstring,
+    cfga_sata_ret_t rv,
+    const char *ap_id,
+    int l_errno)
 {
 	if (errstring == NULL) {
 		return (sata_msgs[rv].cfga_err);
@@ -488,7 +490,7 @@
 		/* hardware-specific help needed */
 		set_msg(errstring, ERR_STR(rv), NULL);
 		set_msg(errstring, "\n",
-			dgettext(TEXT_DOMAIN, sata_help[HELP_HEADER]), NULL);
+		    dgettext(TEXT_DOMAIN, sata_help[HELP_HEADER]), NULL);
 		set_msg(errstring, sata_help[HELP_RESET_PORT], NULL);
 		set_msg(errstring, sata_help[HELP_RESET_DEVICE], NULL);
 		set_msg(errstring, sata_help[HELP_RESET_ALL],  NULL);
@@ -559,13 +561,13 @@
 /*ARGSUSED*/
 cfga_err_t
 cfga_change_state(
-	cfga_cmd_t state_change_cmd,
-	const char *ap_id,
-	const char *options,
-	struct cfga_confirm *confp,
-	struct cfga_msg *msgp,
-	char **errstring,
-	cfga_flags_t flags)
+    cfga_cmd_t state_change_cmd,
+    const char *ap_id,
+    const char *options,
+    struct cfga_confirm *confp,
+    struct cfga_msg *msgp,
+    char **errstring,
+    cfga_flags_t flags)
 {
 	int		ret;
 	int 		len;
@@ -577,6 +579,9 @@
 	devctl_hdl_t	hdl = NULL;
 	cfga_sata_ret_t	rv = CFGA_SATA_OK;
 	char		*pdyn;
+	char		*str_type;
+	size_t		size;
+	boolean_t	pmult = B_FALSE;
 
 	/*
 	 * All sub-commands which can change state of device require
@@ -593,12 +598,30 @@
 	}
 
 	if ((rv = setup_for_devctl_cmd(ap_id, &hdl, &nvl,
-		DC_RDONLY)) != CFGA_SATA_OK) {
+	    DC_RDONLY)) != CFGA_SATA_OK) {
 		goto bailout;
 	}
 
+	/*
+	 * Checking device type. A port multiplier is not configurable - it is
+	 * already configured as soon as it is connected.
+	 */
+	if ((rv = do_control_ioctl(ap_id, SATA_CFGA_GET_AP_TYPE, NULL,
+	    (void **)&str_type, &size)) != CFGA_SATA_OK) {
+		/* no such deivce */
+		goto bailout;
+	}
+	if (strncmp(str_type, "sata-pmult", sizeof ("sata-pmult")) == 0) {
+		pmult = B_TRUE;
+	}
+
 	switch (state_change_cmd) {
 	case CFGA_CMD_CONFIGURE:
+		if (pmult == B_TRUE) {
+			rv = CFGA_SATA_HWOPNOTSUPP;
+			goto bailout;
+		}
+
 		if ((rv = port_state(hdl, nvl, &rstate, &ostate)) !=
 		    CFGA_SATA_OK)
 			goto bailout;
@@ -647,6 +670,11 @@
 		break;
 
 	case CFGA_CMD_UNCONFIGURE:
+		if (pmult == B_TRUE) {
+			rv = CFGA_SATA_HWOPNOTSUPP;
+			goto bailout;
+		}
+
 		if ((rv = port_state(hdl, nvl, &rstate, &ostate)) !=
 		    CFGA_SATA_OK)
 			goto bailout;
@@ -668,13 +696,13 @@
 		rv = CFGA_SATA_OK;
 
 		len = strlen(SATA_CONFIRM_DEVICE) +
-			strlen(SATA_CONFIRM_DEVICE_SUSPEND) +
-			strlen("Unconfigure") + strlen(ap_id);
+		    strlen(SATA_CONFIRM_DEVICE_SUSPEND) +
+		    strlen("Unconfigure") + strlen(ap_id);
 		if ((msg = (char *)calloc(len +3, 1)) != NULL) {
 			(void) snprintf(msg, len + 3, "Unconfigure"
-				" %s%s\n%s",
-				SATA_CONFIRM_DEVICE, ap_id,
-				SATA_CONFIRM_DEVICE_SUSPEND);
+			    " %s%s\n%s",
+			    SATA_CONFIRM_DEVICE, ap_id,
+			    SATA_CONFIRM_DEVICE_SUSPEND);
 		}
 
 		if (!sata_confirm(confp, msg)) {
@@ -687,7 +715,7 @@
 		devpath = sata_get_devicepath(ap_id);
 		if (devpath == NULL) {
 			(void) printf(
-				"cfga_change_state: get device path failed\n");
+			    "cfga_change_state: get device path failed\n");
 			rv = CFGA_SATA_DEV_UNCONFIGURE;
 			break;
 		}
@@ -705,10 +733,10 @@
 				rv = CFGA_SATA_BUSY;
 			}
 			(void) sata_rcm_online(ap_id, errstring, devpath,
-				flags);
+			    flags);
 		} else {
 			(void) sata_rcm_remove(ap_id, errstring, devpath,
-				flags);
+			    flags);
 
 		}
 		S_FREE(devpath);
@@ -733,15 +761,14 @@
 
 		rv = CFGA_SATA_OK; /* other statuses don't matter */
 
-
 		/*
 		 * If the port originally with device attached and was
 		 * unconfigured already, the devicepath for the sd will be
 		 * removed. sata_get_devicepath in this case is not necessary.
 		 */
-
 		/* only call rcm_offline if the state was CONFIGURED */
-		if (ostate == AP_OSTATE_CONFIGURED) {
+		if (ostate == AP_OSTATE_CONFIGURED &&
+		    pmult == B_FALSE) {
 			devpath = sata_get_devicepath(ap_id);
 			if (devpath == NULL) {
 				(void) printf(
@@ -751,14 +778,14 @@
 			}
 
 			len = strlen(SATA_CONFIRM_DEVICE) +
-				strlen(SATA_CONFIRM_DEVICE_SUSPEND) +
-				strlen("Disconnect") + strlen(ap_id);
+			    strlen(SATA_CONFIRM_DEVICE_SUSPEND) +
+			    strlen("Disconnect") + strlen(ap_id);
 			if ((msg = (char *)calloc(len +3, 1)) != NULL) {
 				(void) snprintf(msg, len + 3,
-					"Disconnect"
-					" %s%s\n%s",
-					SATA_CONFIRM_DEVICE, ap_id,
-					SATA_CONFIRM_DEVICE_SUSPEND);
+				    "Disconnect"
+				    " %s%s\n%s",
+				    SATA_CONFIRM_DEVICE, ap_id,
+				    SATA_CONFIRM_DEVICE_SUSPEND);
 			}
 			if (!sata_confirm(confp, msg)) {
 				free(msg);
@@ -780,7 +807,7 @@
 				if (errno == EBUSY)
 					rv = CFGA_SATA_BUSY;
 				(void) sata_rcm_online(ap_id, errstring,
-					devpath, flags);
+				    devpath, flags);
 				S_FREE(devpath);
 
 				/*
@@ -796,20 +823,20 @@
 				(void) printf("%s\n",
 				    ERR_STR(CFGA_SATA_DEVICE_UNCONFIGURED));
 				(void) sata_rcm_remove(ap_id, errstring,
-					devpath, flags);
+				    devpath, flags);
 			}
 			S_FREE(devpath);
 		} else if (rstate == AP_RSTATE_CONNECTED ||
 		    rstate == AP_RSTATE_EMPTY) {
 			len = strlen(SATA_CONFIRM_PORT) +
-				strlen(SATA_CONFIRM_PORT_DISABLE) +
-				strlen("Deactivate Port") + strlen(ap_id);
+			    strlen(SATA_CONFIRM_PORT_DISABLE) +
+			    strlen("Deactivate Port") + strlen(ap_id);
 			if ((msg = (char *)calloc(len +3, 1)) != NULL) {
 				(void) snprintf(msg, len +3,
-					"Disconnect"
-					" %s%s\n%s",
-					SATA_CONFIRM_PORT, ap_id,
-					SATA_CONFIRM_PORT_DISABLE);
+				    "Disconnect"
+				    " %s%s\n%s",
+				    SATA_CONFIRM_PORT, ap_id,
+				    SATA_CONFIRM_PORT_DISABLE);
 			}
 			if (!sata_confirm(confp, msg)) {
 				free(msg);
@@ -837,13 +864,13 @@
 		}
 
 		len = strlen(SATA_CONFIRM_PORT) +
-			strlen(SATA_CONFIRM_PORT_ENABLE) +
-			strlen("Activate Port") + strlen(ap_id);
+		    strlen(SATA_CONFIRM_PORT_ENABLE) +
+		    strlen("Activate Port") + strlen(ap_id);
 		if ((msg = (char *)calloc(len +3, 1)) != NULL) {
 			(void) snprintf(msg, len +3, "Activate"
-				" %s%s\n%s",
-				SATA_CONFIRM_PORT, ap_id,
-				SATA_CONFIRM_PORT_ENABLE);
+			    " %s%s\n%s",
+			    SATA_CONFIRM_PORT, ap_id,
+			    SATA_CONFIRM_PORT_ENABLE);
 		}
 		if (!sata_confirm(confp, msg)) {
 			rv = CFGA_SATA_NACK;
@@ -886,13 +913,13 @@
 /* cfgadm entry point */
 cfga_err_t
 cfga_private_func(
-	const char *func,
-	const char *ap_id,
-	const char *options,
-	struct cfga_confirm *confp,
-	struct cfga_msg *msgp,
-	char **errstring,
-	cfga_flags_t flags)
+    const char *func,
+    const char *ap_id,
+    const char *options,
+    struct cfga_confirm *confp,
+    struct cfga_msg *msgp,
+    char **errstring,
+    cfga_flags_t flags)
 {
 	int			len;
 	char 			*msg;
@@ -943,9 +970,9 @@
 
 		if ((msg = (char *)calloc(len +3, 1)) != NULL) {
 			(void) snprintf(msg, len +3, "Reset"
-				" %s%s\n%s",
-				SATA_CONFIRM_PORT, ap_id,
-				SATA_CONFIRM_DEVICE_ABORT);
+			    " %s%s\n%s",
+			    SATA_CONFIRM_PORT, ap_id,
+			    SATA_CONFIRM_DEVICE_ABORT);
 		} else {
 			rv = CFGA_SATA_NACK;
 			goto bailout;
@@ -957,7 +984,7 @@
 		}
 
 		rv = do_control_ioctl(ap_id, SATA_CFGA_RESET_PORT, NULL,
-			(void **)&str_p, &size);
+		    (void **)&str_p, &size);
 
 	} else if (strcmp(func, SATA_RESET_DEVICE) == 0) {
 		if ((rv = port_state(hdl, list, &rstate, &ostate)) !=
@@ -977,9 +1004,9 @@
 
 		if ((msg = (char *)calloc(len +3, 1)) != NULL) {
 			(void) snprintf(msg, len +3, "Reset"
-				" %s%s\n%s",
-				SATA_CONFIRM_DEVICE, ap_id,
-				SATA_CONFIRM_DEVICE_ABORT);
+			    " %s%s\n%s",
+			    SATA_CONFIRM_DEVICE, ap_id,
+			    SATA_CONFIRM_DEVICE_ABORT);
 		} else {
 			rv = CFGA_SATA_NACK;
 			goto bailout;
@@ -1000,9 +1027,9 @@
 
 		if ((msg = (char *)calloc(len +3, 1)) != NULL) {
 			(void) snprintf(msg, len +3, "Reset"
-				" %s%s\n%s",
-				SATA_CONFIRM_CONTROLLER, ap_id,
-				SATA_CONFIRM_CONTROLLER_ABORT);
+			    " %s%s\n%s",
+			    SATA_CONFIRM_CONTROLLER, ap_id,
+			    SATA_CONFIRM_CONTROLLER_ABORT);
 		} else {
 			rv = CFGA_SATA_NACK;
 			goto bailout;
@@ -1013,7 +1040,7 @@
 			goto bailout;
 		}
 		rv = do_control_ioctl(ap_id, SATA_CFGA_RESET_ALL, NULL,
-			(void **)&str_p, &size);
+		    (void **)&str_p, &size);
 
 	} else if (strcmp(func, SATA_PORT_DEACTIVATE) == 0) {
 		len = strlen(SATA_CONFIRM_PORT) +
@@ -1022,9 +1049,9 @@
 
 		if ((msg = (char *)calloc(len +3, 1)) != NULL) {
 			(void) snprintf(msg, len +3, "Deactivate"
-				" %s%s\n%s",
-				SATA_CONFIRM_PORT, ap_id,
-				SATA_CONFIRM_PORT_DISABLE);
+			    " %s%s\n%s",
+			    SATA_CONFIRM_PORT, ap_id,
+			    SATA_CONFIRM_PORT_DISABLE);
 		} else {
 			rv = CFGA_SATA_NACK;
 			goto bailout;
@@ -1035,7 +1062,7 @@
 		}
 
 		rv = do_control_ioctl(ap_id, SATA_CFGA_PORT_DEACTIVATE, NULL,
-			(void **)&str_p, &size);
+		    (void **)&str_p, &size);
 
 	} else if (strcmp(func, SATA_PORT_ACTIVATE) == 0) {
 		len = strlen(SATA_CONFIRM_PORT) +
@@ -1044,9 +1071,9 @@
 
 		if ((msg = (char *)calloc(len +3, 1)) != NULL) {
 			(void) snprintf(msg, len +3, "Activate"
-				" %s%s\n%s",
-				SATA_CONFIRM_PORT, ap_id,
-				SATA_CONFIRM_PORT_ENABLE);
+			    " %s%s\n%s",
+			    SATA_CONFIRM_PORT, ap_id,
+			    SATA_CONFIRM_PORT_ENABLE);
 		} else {
 			rv = CFGA_SATA_NACK;
 			goto bailout;
@@ -1058,7 +1085,7 @@
 
 		rv = do_control_ioctl(ap_id, SATA_CFGA_PORT_ACTIVATE,
 		    NULL, (void **)&str_p, &size);
-			goto bailout;
+		goto bailout;
 
 	} else if (strcmp(func, SATA_PORT_SELF_TEST) == 0) {
 		len = strlen(SATA_CONFIRM_PORT) +
@@ -1067,9 +1094,9 @@
 
 		if ((msg = (char *)calloc(len +3, 1)) != NULL) {
 			(void) snprintf(msg, len +3, "Self Test"
-				" %s%s\n%s",
-				SATA_CONFIRM_PORT, ap_id,
-				SATA_CONFIRM_DEVICE_SUSPEND);
+			    " %s%s\n%s",
+			    SATA_CONFIRM_PORT, ap_id,
+			    SATA_CONFIRM_DEVICE_SUSPEND);
 		} else {
 			rv = CFGA_SATA_NACK;
 			goto bailout;
@@ -1080,7 +1107,7 @@
 		}
 
 		rv = do_control_ioctl(ap_id, SATA_CFGA_PORT_SELF_TEST,
-			NULL, (void **)&str_p, &size);
+		    NULL, (void **)&str_p, &size);
 	} else {
 		/* Unrecognized operation request */
 		rv = CFGA_SATA_HWOPNOTSUPP;
@@ -1097,11 +1124,11 @@
 /*ARGSUSED*/
 cfga_err_t
 cfga_test(
-	const char *ap_id,
-	const char *options,
-	struct cfga_msg *msgp,
-	char **errstring,
-	cfga_flags_t flags)
+    const char *ap_id,
+    const char *options,
+    struct cfga_msg *msgp,
+    char **errstring,
+    cfga_flags_t flags)
 {
 	/* Should call ioctl for self test - phase 2 */
 	return (CFGA_OPNOTSUPP);
@@ -1382,13 +1409,13 @@
 /*ARGSUSED*/
 cfga_err_t
 cfga_list_ext(
-	const char *ap_id,
-	cfga_list_data_t **ap_id_list,
-	int *nlistp,
-	const char *options,
-	const char *listopts,
-	char **errstring,
-	cfga_flags_t flags)
+    const char *ap_id,
+    cfga_list_data_t **ap_id_list,
+    int *nlistp,
+    const char *options,
+    const char *listopts,
+    char **errstring,
+    cfga_flags_t flags)
 {
 	int			l_errno;
 	char			*ap_id_log = NULL;
@@ -1398,6 +1425,8 @@
 	cfga_sata_ret_t		rv = CFGA_SATA_OK;
 	devctl_ap_state_t	devctl_ap_state;
 	char			*pdyn;
+	boolean_t		pmult = B_FALSE;
+	uint32_t		port;
 
 
 	if ((rv = verify_params(ap_id, options, errstring)) != CFGA_SATA_OK) {
@@ -1472,61 +1501,61 @@
 	    sizeof ((*ap_id_list)->ap_phys_id));
 
 	switch (devctl_ap_state.ap_rstate) {
-		case AP_RSTATE_EMPTY:
-			(*ap_id_list)->ap_r_state = CFGA_STAT_EMPTY;
-			break;
+	case AP_RSTATE_EMPTY:
+		(*ap_id_list)->ap_r_state = CFGA_STAT_EMPTY;
+		break;
 
-		case AP_RSTATE_DISCONNECTED:
-			(*ap_id_list)->ap_r_state = CFGA_STAT_DISCONNECTED;
-			break;
+	case AP_RSTATE_DISCONNECTED:
+		(*ap_id_list)->ap_r_state = CFGA_STAT_DISCONNECTED;
+		break;
 
-		case AP_RSTATE_CONNECTED:
-			(*ap_id_list)->ap_r_state = CFGA_STAT_CONNECTED;
-			break;
+	case AP_RSTATE_CONNECTED:
+		(*ap_id_list)->ap_r_state = CFGA_STAT_CONNECTED;
+		break;
 
-		default:
-			rv = CFGA_SATA_STATE;
-			goto bailout;
+	default:
+		rv = CFGA_SATA_STATE;
+		goto bailout;
 	}
 
 	switch (devctl_ap_state.ap_ostate) {
-		case AP_OSTATE_CONFIGURED:
-			(*ap_id_list)->ap_o_state = CFGA_STAT_CONFIGURED;
-			break;
+	case AP_OSTATE_CONFIGURED:
+		(*ap_id_list)->ap_o_state = CFGA_STAT_CONFIGURED;
+		break;
 
-		case AP_OSTATE_UNCONFIGURED:
-			(*ap_id_list)->ap_o_state = CFGA_STAT_UNCONFIGURED;
-			break;
+	case AP_OSTATE_UNCONFIGURED:
+		(*ap_id_list)->ap_o_state = CFGA_STAT_UNCONFIGURED;
+		break;
 
-		default:
-			rv = CFGA_SATA_STATE;
-			goto bailout;
+	default:
+		rv = CFGA_SATA_STATE;
+		goto bailout;
 	}
 
 	switch (devctl_ap_state.ap_condition) {
-		case AP_COND_OK:
-			(*ap_id_list)->ap_cond = CFGA_COND_OK;
-			break;
+	case AP_COND_OK:
+		(*ap_id_list)->ap_cond = CFGA_COND_OK;
+		break;
 
-		case AP_COND_FAILING:
-			(*ap_id_list)->ap_cond = CFGA_COND_FAILING;
-			break;
+	case AP_COND_FAILING:
+		(*ap_id_list)->ap_cond = CFGA_COND_FAILING;
+		break;
 
-		case AP_COND_FAILED:
-			(*ap_id_list)->ap_cond = CFGA_COND_FAILED;
-			break;
+	case AP_COND_FAILED:
+		(*ap_id_list)->ap_cond = CFGA_COND_FAILED;
+		break;
 
-		case AP_COND_UNUSABLE:
-			(*ap_id_list)->ap_cond = CFGA_COND_UNUSABLE;
-			break;
+	case AP_COND_UNUSABLE:
+		(*ap_id_list)->ap_cond = CFGA_COND_UNUSABLE;
+		break;
 
-		case AP_COND_UNKNOWN:
-			(*ap_id_list)->ap_cond = CFGA_COND_UNKNOWN;
-			break;
+	case AP_COND_UNKNOWN:
+		(*ap_id_list)->ap_cond = CFGA_COND_UNKNOWN;
+		break;
 
-		default:
-			rv = CFGA_SATA_STATE;
-			goto bailout;
+	default:
+		rv = CFGA_SATA_STATE;
+		goto bailout;
 	}
 
 	(*ap_id_list)->ap_class[0] = '\0';	/* Filled by libcfgadm */
@@ -1545,7 +1574,7 @@
 		if ((rv = do_control_ioctl(ap_id, SATA_CFGA_GET_MODEL_INFO,
 		    NULL, (void **)&str_p, &size)) != CFGA_SATA_OK) {
 			(void) printf(
-				"SATA_CFGA_GET_MODULE_INFO ioctl failed\n");
+			    "SATA_CFGA_GET_MODULE_INFO ioctl failed\n");
 			goto bailout;
 		}
 		/* drop leading and trailing spaces */
@@ -1558,9 +1587,9 @@
 		}
 
 		(void) strlcpy((*ap_id_list)->ap_info, "Mod: ",
-			sizeof ((*ap_id_list)->ap_info));
+		    sizeof ((*ap_id_list)->ap_info));
 		(void) strlcat((*ap_id_list)->ap_info, str_p + skip,
-			sizeof ((*ap_id_list)->ap_info));
+		    sizeof ((*ap_id_list)->ap_info));
 
 		free(str_p);
 
@@ -1584,9 +1613,9 @@
 				break;
 		}
 		(void) strlcat((*ap_id_list)->ap_info, " FRev: ",
-			sizeof ((*ap_id_list)->ap_info));
+		    sizeof ((*ap_id_list)->ap_info));
 		(void) strlcat((*ap_id_list)->ap_info, str_p + skip,
-			sizeof ((*ap_id_list)->ap_info));
+		    sizeof ((*ap_id_list)->ap_info));
 
 		free(str_p);
 
@@ -1611,9 +1640,9 @@
 				break;
 		}
 		(void) strlcat((*ap_id_list)->ap_info, " SN: ",
-				sizeof ((*ap_id_list)->ap_info));
+		    sizeof ((*ap_id_list)->ap_info));
 		(void) strlcat((*ap_id_list)->ap_info, str_p + skip,
-				sizeof ((*ap_id_list)->ap_info));
+		    sizeof ((*ap_id_list)->ap_info));
 
 		free(str_p);
 
@@ -1624,16 +1653,25 @@
 		if ((rv = do_control_ioctl(ap_id, SATA_CFGA_GET_AP_TYPE, NULL,
 		    (void **)&str_p, &size)) != CFGA_SATA_OK) {
 			(void) printf(
-				"SATA_CFGA_GET_AP_TYPE ioctl failed\n");
+			    "SATA_CFGA_GET_AP_TYPE ioctl failed\n");
 			goto bailout;
 		}
 
 		(void) strlcpy((*ap_id_list)->ap_type, str_p,
-			sizeof ((*ap_id_list)->ap_type));
+		    sizeof ((*ap_id_list)->ap_type));
 
 		free(str_p);
 
-		if ((*ap_id_list)->ap_o_state == CFGA_STAT_CONFIGURED) {
+		/*
+		 * Checking device type. Port multiplier has no dynamic
+		 * suffix.
+		 */
+		if (strncmp((*ap_id_list)->ap_type, "sata-pmult",
+		    sizeof ("sata-pmult")) == 0)
+			pmult = B_TRUE;
+
+		if ((*ap_id_list)->ap_o_state == CFGA_STAT_CONFIGURED &&
+		    pmult == B_FALSE) {
 
 			char *dyncomp = NULL;
 
@@ -1647,18 +1685,27 @@
 				goto bailout;
 			if (dyncomp != NULL) {
 				(void) strcat((*ap_id_list)->ap_log_id,
-					DYN_SEP);
+				    DYN_SEP);
 				(void) strlcat((*ap_id_list)->ap_log_id,
-					dyncomp,
-					sizeof ((*ap_id_list)->ap_log_id));
+				    dyncomp,
+				    sizeof ((*ap_id_list)->ap_log_id));
 				free(dyncomp);
 			}
 		}
 
 	} else {
-		/* Change it when port multiplier is supported */
-		(void) strlcpy((*ap_id_list)->ap_type, "sata-port",
-			sizeof ((*ap_id_list)->ap_type));
+		/* This is an empty port */
+		if (get_port_num(ap_id, &port) != SATA_CFGA_OK) {
+			goto bailout;
+		}
+
+		if (port & SATA_CFGA_PMPORT_QUAL) {
+			(void) strlcpy((*ap_id_list)->ap_type, "pmult-port",
+			    sizeof ((*ap_id_list)->ap_type));
+		} else {
+			(void) strlcpy((*ap_id_list)->ap_type, "sata-port",
+			    sizeof ((*ap_id_list)->ap_type));
+		}
 	}
 
 	return (sata_err_msg(errstring, rv, ap_id, errno));
@@ -1694,8 +1741,7 @@
 	}
 
 	if ((q = (char *)calloc(len + 1, 1)) == NULL) {
-		perror("cfga_msg"
-);
+		perror("cfga_msg");
 		return;
 	}
 
@@ -1766,9 +1812,9 @@
  */
 static cfga_sata_ret_t
 verify_params(
-	const char *ap_id,
-	const char *options,
-	char **errstring)
+    const char *ap_id,
+    const char *options,
+    char **errstring)
 {
 	char *pdyn, *lap_id;
 	int rv;
@@ -1802,29 +1848,35 @@
 
 /*
  * Takes a validated ap_id and extracts the port number.
- * For now, we do not support port multiplier port .
+ * Port multiplier is supported now.
  */
 static cfga_sata_ret_t
 get_port_num(const char *ap_id, uint32_t *port)
 {
-	char *port_nbr_str;
-	char *temp;
-	uint32_t cport;
-	uint32_t pmport = 0;	/* port multiplier not supported yet */
-	int pmport_qual = 0;	/* port multiplier not supported yet */
+	uint32_t	cport, pmport = 0, qual = 0;
+	char		*cport_str, *pmport_str;
 
-	port_nbr_str = strrchr(ap_id, (int)*MINOR_SEP) + strlen(MINOR_SEP);
-	if ((temp = strrchr(ap_id, (int)*PORT_SEPARATOR)) != 0) {
-		port_nbr_str = temp + strlen(PORT_SEPARATOR);
+	/* Get the cport number */
+	cport_str = strrchr(ap_id, (int)*MINOR_SEP) + strlen(MINOR_SEP);
+
+	errno = 0;
+	cport = strtol(cport_str, NULL, 10);
+	if ((cport & ~SATA_CFGA_CPORT_MASK) != 0 || errno != 0) {
+		return (CFGA_SATA_PORT);
 	}
 
+	/* Get pmport number if there is a PORT_SEPARATOR */
 	errno = 0;
-	cport = strtol(port_nbr_str, NULL, 10);
-	if ((cport & ~SATA_CFGA_CPORT_MASK) != 0 || errno != 0)
-		return (CFGA_SATA_PORT);
+	if ((pmport_str = strrchr(ap_id, (int)*PORT_SEPARATOR)) != 0) {
+		pmport_str += strlen(PORT_SEPARATOR);
+		pmport = strtol(pmport_str, NULL, 10);
+		qual = SATA_CFGA_PMPORT_QUAL;
+		if ((pmport & ~SATA_CFGA_PMPORT_MASK) != 0 || errno != 0) {
+			return (CFGA_SATA_PORT);
+		}
+	}
 
-	*port = cport + (pmport << SATA_CFGA_PMPORT_SHIFT) + pmport_qual;
-
+	*port = cport | (pmport << SATA_CFGA_PMPORT_SHIFT) | qual;
 	return (CFGA_SATA_OK);
 }
 
@@ -1844,10 +1896,10 @@
 
 static cfga_sata_ret_t
 setup_for_devctl_cmd(
-	const char *ap_id,
-	devctl_hdl_t *devctl_hdl,
-	nvlist_t **user_nvlistp,
-	uint_t oflag)
+    const char *ap_id,
+    devctl_hdl_t *devctl_hdl,
+    nvlist_t **user_nvlistp,
+    uint_t oflag)
 {
 
 	uint_t	port;
@@ -1887,8 +1939,8 @@
 	 */
 	if ((rv = get_port_num(lap_id, &port)) != CFGA_SATA_OK) {
 		(void) printf(
-			"setup_for_devctl_cmd: get_port_num, errno: %d\n",
-			errno);
+		    "setup_for_devctl_cmd: get_port_num, errno: %d\n",
+		    errno);
 		goto bailout;
 	}
 
@@ -1912,7 +1964,7 @@
 
 static cfga_sata_ret_t
 port_state(devctl_hdl_t hdl, nvlist_t *list,
-	ap_rstate_t *rstate, ap_ostate_t *ostate)
+    ap_rstate_t *rstate, ap_ostate_t *ostate)
 {
 	devctl_ap_state_t	devctl_ap_state;
 
@@ -1936,7 +1988,7 @@
  */
 cfga_sata_ret_t
 do_control_ioctl(const char *ap_id, sata_cfga_apctl_t subcommand, uint_t arg,
-		void **descrp, size_t *sizep)
+    void **descrp, size_t *sizep)
 {
 	int			fd = -1;
 	uint_t			port;
@@ -1954,7 +2006,7 @@
 
 	if ((fd = open(ap_id, O_RDONLY)) == -1) {
 		(void) printf("do_control_ioctl: open failed: errno:%d\n",
-			errno);
+		    errno);
 		rv = CFGA_SATA_OPEN;
 		if (errno == EBUSY) {
 			rv = CFGA_SATA_BUSY;
--- a/usr/src/lib/krb5/plugins/kdb/db2/kdb_db2.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/lib/krb5/plugins/kdb/db2/kdb_db2.c	Mon Aug 17 09:14:35 2009 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -569,14 +569,14 @@
 	    /* Solaris Kerberos: Better error logging */
 	    snprintf(errbuf, sizeof(errbuf), gettext("Failed to modify "
 	        "access and modification times for \"%s\": "),
-	        db_ctx->db_lf_file);
+	        db_ctx->db_lf_name);
 	    krb5_db2_prepend_err_str(context, errbuf, retval, retval);
 	}
     } else {
 	retval = errno;
 	/* Solaris Kerberos: Better error logging */
 	snprintf(errbuf, sizeof(errbuf), gettext("Failed to stat \"%s\": "),
-	    db_ctx->db_lf_file);
+	    db_ctx->db_lf_name);
 	krb5_db2_prepend_err_str(context, errbuf, retval, retval);
     }
     if (!retval) {
@@ -586,7 +586,7 @@
 	    retval = errno;
 	    /* Solaris Kerberos: Better error logging */
 	    snprintf(errbuf, sizeof(errbuf), gettext("Failed to stat \"%s\": "),
-	        db_ctx->db_lf_file);
+	        db_ctx->db_lf_name);
 	    krb5_db2_prepend_err_str(context, errbuf, retval, retval);
 	}
     }
@@ -648,7 +648,7 @@
 	    /* Solaris Kerberos: Better error logging */
 	    snprintf(errbuf, sizeof(errbuf),
 	        gettext("Failed to exclusively lock \"%s\": "),
-	        db_ctx->db_lf_file);
+	        db_ctx->db_lf_name);
 	    krb5_db2_prepend_err_str(context, errbuf, EBADF, EBADF);
 
 	    return KRB5_KDB_CANTLOCK_DB;
@@ -660,7 +660,7 @@
 	/* Solaris Kerberos: Better error logging */
 	snprintf(errbuf, sizeof(errbuf),
 	    gettext("Failed to lock \"%s\": "),
-	    db_ctx->db_lf_file);
+	    db_ctx->db_lf_name);
 	krb5_db2_prepend_err_str(context, errbuf, retval, retval);
     }
 
--- a/usr/src/lib/libnwam/Makefile.com	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/lib/libnwam/Makefile.com	Mon Aug 17 09:14:35 2009 -0400
@@ -19,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 
@@ -38,7 +38,7 @@
 
 $(LINTLIB):=	SRCS = $(SRCDIR)/$(LINTSRC)
 
-LDLIBS +=	-ldoor -ldladm -lc
+LDLIBS +=	-ldladm -lc
 
 CFLAGS +=	$(CCVERBOSE)
 CPPFLAGS +=	-I$(SRCDIR) -I$(NWAMDIR)
--- a/usr/src/lib/libscf/Makefile.com	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/lib/libscf/Makefile.com	Mon Aug 17 09:14:35 2009 -0400
@@ -61,13 +61,13 @@
 # of libuutil.
 #
 LIBUUTIL =	$(SRC)/lib/libuutil
-s390_NCPPFL =	-_gcc=-nostdinc -I$(ROOT)/usr/include
-MY_NATIVE_CPPFLAGS =\
-		-DNATIVE_BUILD $(DTEXTDOM) \
-		$($(MACH)_NCPPFL)	\
+MY_NATIVE_CPPFLAGS_s390 =	-_gcc=-nostdinc -I$(ROOT)/usr/include
+MY_NATIVE_CPPFLAGS =				\
+		-DNATIVE_BUILD $(DTEXTDOM)	\
+		$(MY_NATIVE_CPPFLAGS_$(MACH))	\
 		-I../inc -I$(COMDIR) -I$(LIBUUTIL)/common -I$(ROOTHDRDIR)
-s390_NLDLIBS =	-L$(ROOT)/usr/lib
-MY_NATIVE_LDLIBS = $($(MACH)_NLDLIBS)	\
+MY_NATIVE_LDDIR_s390 = -L$(ROOT)/usr/lib
+MY_NATIVE_LDLIBS = $(MY_NATIVE_LDDIR_$(MACH))	\
 		-L$(LIBUUTIL)/native -R$(LIBUUTIL)/native -luutil -ldoor -lc -lgen
 MY_NATIVE_LDLIBS_i386 = -lsmbios
 MY_NATIVE_LDLIBS += $(MY_NATIVE_LDLIBS_$(MACH))
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c	Mon Aug 17 09:14:35 2009 -0400
@@ -401,70 +401,8 @@
 static int
 make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc)
 {
-	char *logstr;
-	libzfs_handle_t *hdl = zhp->zfs_hdl;
-
-	/*
-	 * Preserve history log string.
-	 * any changes performed here will be
-	 * logged as an internal event.
-	 */
-	logstr = zhp->zfs_hdl->libzfs_log_str;
-	zhp->zfs_hdl->libzfs_log_str = NULL;
-
-top:
-	if (put_stats_zhdl(zhp, zc) != 0) {
-		zhp->zfs_hdl->libzfs_log_str = logstr;
+	if (put_stats_zhdl(zhp, zc) != 0)
 		return (-1);
-	}
-
-
-	if (zhp->zfs_dmustats.dds_inconsistent) {
-		zfs_cmd_t zc2 = { 0 };
-
-		/*
-		 * If it is dds_inconsistent, then we've caught it in
-		 * the middle of a 'zfs receive' or 'zfs destroy', and
-		 * it is inconsistent from the ZPL's point of view, so
-		 * can't be mounted.  However, it could also be that we
-		 * have crashed in the middle of one of those
-		 * operations, in which case we need to get rid of the
-		 * inconsistent state.  We do that by either rolling
-		 * back to the previous snapshot (which will fail if
-		 * there is none), or destroying the filesystem.  Note
-		 * that if we are still in the middle of an active
-		 * 'receive' or 'destroy', then the rollback and destroy
-		 * will fail with EBUSY and we will drive on as usual.
-		 */
-
-		(void) strlcpy(zc2.zc_name, zhp->zfs_name,
-		    sizeof (zc2.zc_name));
-
-		if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
-			(void) zvol_remove_link(hdl, zhp->zfs_name);
-			zc2.zc_objset_type = DMU_OST_ZVOL;
-		} else {
-			zc2.zc_objset_type = DMU_OST_ZFS;
-		}
-
-		/*
-		 * If we can successfully destroy it, pretend that it
-		 * never existed.
-		 */
-		if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc2) == 0) {
-			zhp->zfs_hdl->libzfs_log_str = logstr;
-			errno = ENOENT;
-			return (-1);
-		}
-		/* If we can successfully roll it back, reset the stats */
-		if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc2) == 0) {
-			if (get_stats_ioctl(zhp, zc) != 0) {
-				zhp->zfs_hdl->libzfs_log_str = logstr;
-				return (-1);
-			}
-			goto top;
-		}
-	}
 
 	/*
 	 * We've managed to open the dataset and gather statistics.  Determine
@@ -486,7 +424,6 @@
 	else
 		abort();	/* we should never see any other types */
 
-	zhp->zfs_hdl->libzfs_log_str = logstr;
 	zhp->zpool_hdl = zpool_handle(zhp);
 	return (0);
 }
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Mon Aug 17 09:14:35 2009 -0400
@@ -1139,6 +1139,7 @@
 	char newname[ZFS_MAXNAMELEN];
 	int error;
 	boolean_t needagain, progress;
+	char *s1, *s2;
 
 	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
 	VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap));
@@ -1333,11 +1334,13 @@
 		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
 		    "parentfromsnap", &stream_parent_fromsnap_guid));
 
+		s1 = strrchr(fsname, '/');
+		s2 = strrchr(stream_fsname, '/');
+
 		/* check for rename */
 		if ((stream_parent_fromsnap_guid != 0 &&
 		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
-		    strcmp(strrchr(fsname, '/'),
-		    strrchr(stream_fsname, '/')) != 0) {
+		    ((s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
 			nvlist_t *parent;
 			char tryname[ZFS_MAXNAMELEN];
 
--- a/usr/src/pkgdefs/SUNWigb/postinstall	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/pkgdefs/SUNWigb/postinstall	Mon Aug 17 09:14:35 2009 -0400
@@ -133,5 +133,7 @@
 	"pciex8086,10d6"
 	"pciex8086,10e6"
 	"pciex8086,10e7"
-	"pciex8086,10e8"'	\
+	"pciex8086,10e8"
+	"pciex8086,150a"
+	"pciex8086,150d"'	\
 	-b "$BASEDIR" igb
--- a/usr/src/pkgdefs/SUNWixgbe/postinstall	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/pkgdefs/SUNWixgbe/postinstall	Mon Aug 17 09:14:35 2009 -0400
@@ -141,6 +141,7 @@
 	"pciex8086,10f7"
 	"pciex8086,10f9"
 	"pciex8086,10fb"
+	"pciex8086,10fc"
 	"pciex8086,1507"
 	"pciex8086,1508"
 	"pciex8086,1514"'	\
--- a/usr/src/tools/SUNWonbld/prototype_com	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/tools/SUNWonbld/prototype_com	Mon Aug 17 09:14:35 2009 -0400
@@ -182,6 +182,7 @@
 f none opt/onbld/man/man1/codereview.1 644 root bin
 f none opt/onbld/man/man1/cstyle.1 644 root bin
 f none opt/onbld/man/man1/cw.1 644 root bin
+f none opt/onbld/man/man1/findunref.1 644 root bin
 f none opt/onbld/man/man1/flg.flp.1 644 root bin
 f none opt/onbld/man/man1/get_depend_info.1 644 root bin
 f none opt/onbld/man/man1/intf_check.1 644 root bin
--- a/usr/src/tools/findunref/Makefile	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/tools/findunref/Makefile	Mon Aug 17 09:14:35 2009 -0400
@@ -20,18 +20,19 @@
 #
 
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
 
-PROG	= findunref
+PROG	  = findunref
+MAN1FILES = findunref.1
 CFLAGS += $(CCVERBOSE)
 LINTFLAGS += -ux
 
 include ../Makefile.tools
 
+$(ROOTONBLDMAN1FILES) := FILEMODE=      644
+
 EXCEPTION_SRC=	$(SCM_TYPE:none=) common open
 $(CLOSED_BUILD)EXCEPTION_SRC += closed
 EXCEPTION_LISTS=	$(EXCEPTION_SRC:%=exception_list.%)
@@ -42,7 +43,7 @@
 
 all:    $(PROG) exception_list
 
-install: all .WAIT $(ROOTONBLDMACHPROG)
+install: all .WAIT $(ROOTONBLDMACHPROG) $(ROOTONBLDMAN1FILES)
 
 lint:	lint_PROG
 
--- a/usr/src/tools/findunref/exception_list.closed	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/tools/findunref/exception_list.closed	Mon Aug 17 09:14:35 2009 -0400
@@ -20,11 +20,9 @@
 #
 
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
 
 #
 # closed-tree exception list
@@ -36,19 +34,19 @@
 # Ignore internal packages, scripts, and tools that are intentionally not
 # built or used during a nightly.
 #
-./closed/cmd/zic/makefile.tzpkg
-./closed/cmd/zic/tzpkg.awk
+./usr/closed/cmd/zic/makefile.tzpkg
+./usr/closed/cmd/zic/tzpkg.awk
 
 #
 # Ignore files that get used during a EXPORT_SRC or CRYPT_SRC build only.
 #
-./closed/uts/sun4v/io/n2cp/Makefile
-./closed/uts/sun4v/io/ncp/Makefile
+./usr/closed/uts/sun4v/io/n2cp/Makefile
+./usr/closed/uts/sun4v/io/ncp/Makefile
 
 #
 # Ignore files that are only used for warlock
 #
-./closed/uts/sparc/marvell88sx/Makefile
+./usr/closed/uts/sparc/marvell88sx/Makefile
 
 #
 # An unfortunate artifact of the bridged, split gate is that closed-source
@@ -59,4 +57,4 @@
 # won't exist, so we need the ISUSED directive here.
 #
 # ISUSED - let checkpaths know that the next entry is good.
-./closed/deleted_files
+./usr/closed/deleted_files
--- a/usr/src/tools/findunref/exception_list.open	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/tools/findunref/exception_list.open	Mon Aug 17 09:14:35 2009 -0400
@@ -34,87 +34,87 @@
 # Ignore oddly-named text files scattered about -- someday these should be
 # suffixed with .txt so we don't have to list them.
 #
-./src/cmd/oawk/EXPLAIN
-./src/cmd/rpcsvc/nis/rpc.nisd/resolv_server/DNS_FWD
-./src/cmd/vi/port/ex.news
-./src/cmd/ssh/doc
+./usr/src/cmd/oawk/EXPLAIN
+./usr/src/cmd/rpcsvc/nis/rpc.nisd/resolv_server/DNS_FWD
+./usr/src/cmd/vi/port/ex.news
+./usr/src/cmd/ssh/doc
 
 #
 # Ignore everything under trees that may be resynched from outside ON.
 #
-./src/cmd/perl
-./src/cmd/sqlite
-./src/lib/libsqlite
-./src/cmd/tcpd
-./src/common/openssl
-./src/grub
-./src/uts/intel/sys/acpi
+./usr/src/cmd/perl
+./usr/src/cmd/sqlite
+./usr/src/lib/libsqlite
+./usr/src/cmd/tcpd
+./usr/src/common/openssl
+./usr/src/grub
+./usr/src/uts/intel/sys/acpi
 
 #
 # Ignore ksh93/ast-related files that are only used to resync our build
 # configuration with upstream.
 #
-./src/lib/libast/*/src/lib/libast/FEATURE
-./src/lib/libast/common/comp/conf.*
-./src/lib/libast/common/features
-./src/lib/libast/common/include/ast_windows.h
-./src/lib/libast/common/port/lc.tab
-./src/lib/libast/common/port/lcgen.c
-./src/lib/libcmd/*/src/lib/libcmd/FEATURE
-./src/lib/libcmd/common/features
-./src/lib/libdll/*/src/lib/libdll/FEATURE
-./src/lib/libdll/common/features
-./src/lib/libpp/*/pp.*
-./src/lib/libpp/common/gentab.sh
-./src/lib/libpp/common/ppsym.c
-./src/lib/libpp/i386/ppdebug.h
-./src/lib/libpp/sparc/ppdebug.h
-./src/lib/libshell/*/src/cmd/ksh93/FEATURE
-./src/lib/libshell/common/data/math.tab
-./src/lib/libshell/common/features
-./src/lib/libshell/misc/buildksh93.sh
-./src/lib/libshell/misc/buildksh93.readme
+./usr/src/lib/libast/*/src/lib/libast/FEATURE
+./usr/src/lib/libast/common/comp/conf.*
+./usr/src/lib/libast/common/features
+./usr/src/lib/libast/common/include/ast_windows.h
+./usr/src/lib/libast/common/port/lc.tab
+./usr/src/lib/libast/common/port/lcgen.c
+./usr/src/lib/libcmd/*/src/lib/libcmd/FEATURE
+./usr/src/lib/libcmd/common/features
+./usr/src/lib/libdll/*/src/lib/libdll/FEATURE
+./usr/src/lib/libdll/common/features
+./usr/src/lib/libpp/*/pp.*
+./usr/src/lib/libpp/common/gentab.sh
+./usr/src/lib/libpp/common/ppsym.c
+./usr/src/lib/libpp/i386/ppdebug.h
+./usr/src/lib/libpp/sparc/ppdebug.h
+./usr/src/lib/libshell/*/src/cmd/ksh93/FEATURE
+./usr/src/lib/libshell/common/data/math.tab
+./usr/src/lib/libshell/common/features
+./usr/src/lib/libshell/misc/buildksh93.sh
+./usr/src/lib/libshell/misc/buildksh93.readme
 
 #
 # Ignore ksh93/ast-related "iffe" (if feature enabled) probe
 #
-./src/lib/libsum/common/features/sum
+./usr/src/lib/libsum/common/features/sum
 
 #
 # Ignore ksh93/ast-related upstream source, currently superseded by
 # a per-platform version of sum.h, since we use libmd.so.1 for some
 # ciphers.
 #
-./src/lib/libsum/common/sum.h
+./usr/src/lib/libsum/common/sum.h
 
 #
 # Ignore ksh93/ast-related test programs.
 #
-./src/cmd/ast/msgcc/msgcc.tst
-./src/lib/libast/common/port/astmath.c
+./usr/src/cmd/ast/msgcc/msgcc.tst
+./usr/src/lib/libast/common/port/astmath.c
 
 #
 # Ignore ksh93/ast-related source components that are not currently
 # used but may be useful later.
 #
-./src/lib/libcmd/common/cksum.c
-./src/lib/libcmd/common/md5sum.c
-./src/lib/libcmd/common/sum.c
-./src/lib/libshell/common/bltins/mkservice.c
-./src/lib/libshell/common/data/bash_pre_rc.sh
-./src/lib/libshell/common/include/env.h
-./src/lib/libshell/common/sh/bash.c
-./src/lib/libshell/common/sh/env.c
-./src/lib/libshell/common/sh/shcomp.c
-./src/lib/libshell/common/sh/suid_exec.c
+./usr/src/lib/libcmd/common/cksum.c
+./usr/src/lib/libcmd/common/md5sum.c
+./usr/src/lib/libcmd/common/sum.c
+./usr/src/lib/libshell/common/bltins/mkservice.c
+./usr/src/lib/libshell/common/data/bash_pre_rc.sh
+./usr/src/lib/libshell/common/include/env.h
+./usr/src/lib/libshell/common/sh/bash.c
+./usr/src/lib/libshell/common/sh/env.c
+./usr/src/lib/libshell/common/sh/shcomp.c
+./usr/src/lib/libshell/common/sh/suid_exec.c
 
 #
 # Ignore any files built as part of the nightly program itself.
 #
 # ISUSED - let checkpaths know that the next entry is good.
-./src/*.out
+./usr/src/*.out
 # ISUSED - let checkpaths know that the next entry is good.
-./src/*.ref
+./usr/src/*.ref
 
 #
 # Ignore internal test directories and test programs.
@@ -123,145 +123,145 @@
 */test
 *Test.java
 *_test.[ch]
-./src/cmd/ldap/common/*test.c
-./src/cmd/logadm/tester
-./src/cmd/print/printmgr/com/sun/admin/pm/client/helptools/extract
-./src/cmd/print/printmgr/com/sun/admin/pm/server/pmtest
-./src/cmd/sendmail/libsm/t-*.c
-./src/cmd/sort/common/convert.c
-./src/cmd/sort/common/invoke.c
-./src/lib/crypt_modules/sha256/test.c
-./src/lib/efcode/fcode_test
-./src/lib/libkvm/common/test.c
+./usr/src/cmd/ldap/common/*test.c
+./usr/src/cmd/logadm/tester
+./usr/src/cmd/print/printmgr/com/sun/admin/pm/client/helptools/extract
+./usr/src/cmd/print/printmgr/com/sun/admin/pm/server/pmtest
+./usr/src/cmd/sendmail/libsm/t-*.c
+./usr/src/cmd/sort/common/convert.c
+./usr/src/cmd/sort/common/invoke.c
+./usr/src/lib/crypt_modules/sha256/test.c
+./usr/src/lib/efcode/fcode_test
+./usr/src/lib/libkvm/common/test.c
 
 #
 # Ignore debugging code.
 #
-./src/cmd/fs.d/pcfs/fsck/inject.c
-./src/cmd/sort/common/statistics.c
+./usr/src/cmd/fs.d/pcfs/fsck/inject.c
+./usr/src/cmd/sort/common/statistics.c
 
 #
 # Ignore internal packages, scripts, and tools that are intentionally not
 # built or used during a nightly.
 #
-./src/cmd/mdb/packages
-./src/cmd/sgs/packages
-./src/cmd/sgs/rtld.4.x
-./src/prototypes
-./src/cmd/pools/poold/com/sun/solaris/*/*/package.html
-./src/uts/intel/io/acpica/cmp_ca.sh
+./usr/src/cmd/mdb/packages
+./usr/src/cmd/sgs/packages
+./usr/src/cmd/sgs/rtld.4.x
+./usr/src/prototypes
+./usr/src/cmd/pools/poold/com/sun/solaris/*/*/package.html
+./usr/src/uts/intel/io/acpica/cmp_ca.sh
 
 #
 # Ignore files that are only used by internal packages.
 #
-./src/cmd/sgs/*/*/*chk.msg
+./usr/src/cmd/sgs/*/*/*chk.msg
 
 #
 # Ignore files that get used during a EXPORT_SRC or CRYPT_SRC build only.
 #
-./src/common/crypto/aes/Makefile
-./src/common/crypto/arcfour/Makefile
-./src/common/crypto/blowfish/Makefile
-./src/common/crypto/des/Makefile
-./src/common/crypto/rsa/Makefile
-./src/lib/gss_mechs/mech_dh/backend/mapfile-vers
-./src/lib/gss_mechs/mech_dh/dh1024/mapfile-vers
-./src/lib/gss_mechs/mech_dh/dh192/mapfile-vers
-./src/lib/gss_mechs/mech_dh/dh640/mapfile-vers
-./src/lib/gss_mechs/mech_krb5/mapfile-vers-clean
-./src/lib/gss_mechs/mech_spnego/mapfile-vers-clean
-./src/lib/pkcs11/pkcs11_softtoken/common/Makefile
-./src/uts/common/Makefile
-./src/uts/common/crypto/io/Makefile
-./src/uts/common/gssapi/include/Makefile
-./src/uts/common/gssapi/mechs/dummy/Makefile
-./src/uts/common/gssapi/mechs/krb5/Makefile
-./src/xmod
+./usr/src/common/crypto/aes/Makefile
+./usr/src/common/crypto/arcfour/Makefile
+./usr/src/common/crypto/blowfish/Makefile
+./usr/src/common/crypto/des/Makefile
+./usr/src/common/crypto/rsa/Makefile
+./usr/src/lib/gss_mechs/mech_dh/backend/mapfile-vers
+./usr/src/lib/gss_mechs/mech_dh/dh1024/mapfile-vers
+./usr/src/lib/gss_mechs/mech_dh/dh192/mapfile-vers
+./usr/src/lib/gss_mechs/mech_dh/dh640/mapfile-vers
+./usr/src/lib/gss_mechs/mech_krb5/mapfile-vers-clean
+./usr/src/lib/gss_mechs/mech_spnego/mapfile-vers-clean
+./usr/src/lib/pkcs11/pkcs11_softtoken/common/Makefile
+./usr/src/uts/common/Makefile
+./usr/src/uts/common/crypto/io/Makefile
+./usr/src/uts/common/gssapi/include/Makefile
+./usr/src/uts/common/gssapi/mechs/dummy/Makefile
+./usr/src/uts/common/gssapi/mechs/krb5/Makefile
+./usr/src/xmod
 
 #
 # Ignore Makefiles which are used by developers but not used by nightly
 # itself.  This is a questionable practice, since they tend to rot.
 #
-./src/cmd/syslogd/sparcv9/Makefile
-./src/uts/sparc/uhci/Makefile
-./src/lib/pam_modules/smb/amd64/Makefile
-./src/lib/pam_modules/smb/sparcv9/Makefile
-./src/cmd/isns/isnsd/xml_def/isnsmgmtSchema.xsd
+./usr/src/cmd/syslogd/sparcv9/Makefile
+./usr/src/uts/sparc/uhci/Makefile
+./usr/src/lib/pam_modules/smb/amd64/Makefile
+./usr/src/lib/pam_modules/smb/sparcv9/Makefile
+./usr/src/cmd/isns/isnsd/xml_def/isnsmgmtSchema.xsd
 
 #
 # Ignore dtrace scripts only used by developers
 #
-./src/cmd/vscan/vscand/vscan.d
+./usr/src/cmd/vscan/vscand/vscan.d
 
 #
 # Ignore sample source code.
 #
-./src/cmd/sendmail/libmilter/example.c
-./src/lib/libdhcpsvc/modules/templates
+./usr/src/cmd/sendmail/libmilter/example.c
+./usr/src/lib/libdhcpsvc/modules/templates
 
 #
 # Ignore .xcl files that aren't used because the program is statically linked.
 #
-./src/cmd/cmd-inet/sbin/dhcpagent/dhcpagent.xcl
+./usr/src/cmd/cmd-inet/sbin/dhcpagent/dhcpagent.xcl
 
 #
 # Ignore sendmail files included for completeness' sake, but which won't
 # be used until certain _FFR (for future release) #define's go live.
 #
-./src/cmd/sendmail/src/statusd_shm.h
+./usr/src/cmd/sendmail/src/statusd_shm.h
 
 #
 # Ignore files originally supplied by ISC (Internet Software Consortium) 
 # as part of a BIND release.
 #
-./src/lib/libresolv2/common/cylink/bn68000.c
-./src/lib/libresolv2/common/cylink/bn8086.c
-./src/lib/libresolv2/common/cylink/lbn68000.c
-./src/lib/libresolv2/common/cylink/lbn68000.h
-./src/lib/libresolv2/common/cylink/lbn68020.c
-./src/lib/libresolv2/common/cylink/lbn68020.h
-./src/lib/libresolv2/common/cylink/lbn80386.h
-./src/lib/libresolv2/common/cylink/lbn8086.h
-./src/lib/libresolv2/common/cylink/lbnppc.c
-./src/lib/libresolv2/common/cylink/lbnppc.h
-./src/lib/libresolv2/common/cylink/ppcasm.h
-./src/lib/libresolv2/common/cylink/sizetest.c
-./src/lib/libresolv2/common/irs/getaddrinfo.c
-./src/lib/libresolv2/common/irs/nis_p.h
-./src/lib/libresolv2/common/resolv/res_mkupdate.h
-./src/lib/libresolv2/include/err.h
+./usr/src/lib/libresolv2/common/cylink/bn68000.c
+./usr/src/lib/libresolv2/common/cylink/bn8086.c
+./usr/src/lib/libresolv2/common/cylink/lbn68000.c
+./usr/src/lib/libresolv2/common/cylink/lbn68000.h
+./usr/src/lib/libresolv2/common/cylink/lbn68020.c
+./usr/src/lib/libresolv2/common/cylink/lbn68020.h
+./usr/src/lib/libresolv2/common/cylink/lbn80386.h
+./usr/src/lib/libresolv2/common/cylink/lbn8086.h
+./usr/src/lib/libresolv2/common/cylink/lbnppc.c
+./usr/src/lib/libresolv2/common/cylink/lbnppc.h
+./usr/src/lib/libresolv2/common/cylink/ppcasm.h
+./usr/src/lib/libresolv2/common/cylink/sizetest.c
+./usr/src/lib/libresolv2/common/irs/getaddrinfo.c
+./usr/src/lib/libresolv2/common/irs/nis_p.h
+./usr/src/lib/libresolv2/common/resolv/res_mkupdate.h
+./usr/src/lib/libresolv2/include/err.h
 
 #
 # Ignore mont_mulf.c. It is used as a starting point for some hand optimized
 # assembly files. We keep it around for future reference.
 #
-./src/common/bignum/mont_mulf.c
+./usr/src/common/bignum/mont_mulf.c
 
 #
 # Ignore the sparc Makefiles for x86-only drivers;
 # they're used to build warlock only.
 #
-./src/uts/sparc/sata/Makefile
-./src/uts/sparc/si3124/Makefile
-./src/uts/sparc/nv_sata/Makefile
-./src/uts/sparc/ahci/Makefile
+./usr/src/uts/sparc/sata/Makefile
+./usr/src/uts/sparc/si3124/Makefile
+./usr/src/uts/sparc/nv_sata/Makefile
+./usr/src/uts/sparc/ahci/Makefile
 
 #
 # Ignore uttrack.c.  It is provided as part of the standard
 # ACPI CA source code but provides optional resource tracking
 # functionality which is not used.
 #
-./src/uts/intel/io/acpica/utilities/uttrack.c
+./usr/src/uts/intel/io/acpica/utilities/uttrack.c
 
 #
 # Ignore any files that get used during a gcc build only.
 #
-./src/cmd/sgs/rtld/common/mapfile-order-gcc
+./usr/src/cmd/sgs/rtld/common/mapfile-order-gcc
 
 #
 # Ignore compiler dependent header files for fpscrubber
 # and sparc prototype files
 #
-./src/cmd/fps/fptest/singdoub*
-./src/pkgdefs/SUNWfsu/prototype_gcc_sparc
-./src/pkgdefs/SUNWfsu/prototype_cc_sparc
+./usr/src/cmd/fps/fptest/singdoub*
+./usr/src/pkgdefs/SUNWfsu/prototype_gcc_sparc
+./usr/src/pkgdefs/SUNWfsu/prototype_cc_sparc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/tools/findunref/findunref.1	Mon Aug 17 09:14:35 2009 -0400
@@ -0,0 +1,94 @@
+.\" " CDDL HEADER START
+.\" "
+.\" " The contents of this file are subject to the terms of the
+.\" " Common Development and Distribution License (the "License").
+.\" " You may not use this file except in compliance with the License.
+.\" "
+.\" " You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" " or http://www.opensolaris.org/os/licensing.
+.\" " See the License for the specific language governing permissions
+.\" " and limitations under the License.
+.\" "
+.\" " When distributing Covered Code, include this CDDL HEADER in each
+.\" " file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" " If applicable, add the following below this CDDL HEADER, with the
+.\" " fields enclosed by brackets "[]" replaced with your own identifying
+.\" " information: Portions Copyright [yyyy] [name of copyright owner]
+.\" "
+.\" " CDDL HEADER END
+.\" "
+.\" "Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+.\" "Use is subject to license terms.
+.TH findunref 1 "11 Aug 2009"
+.I findunref
+\- find unused files in a source tree
+.SH SYNOPSIS
+findunref [\fB-s\fP \fIsubtree\fP] [\fB-t\fP \fItstampfile\fP]
+[\fB-S\fP \fBhg\fP|\fBtw\fP] \fIsrcroot\fP \fIexceptfile\fP
+.LP
+.SH DESCRIPTION
+.IX "OS-Net build tools" "findunref" "" "\fBfindunref\fP"
+.LP
+The findunref utility lists the files in a source tree which have not been
+accessed more recently than a particular timestamp file.  Although
+findunref may be used on its own, it is usually invoked by
+\fBnightly\fP(1) to find files that are never referenced during a given
+build (see \fB-f\fP in \fBnightly\fP(1)).
+.LP
+The root of the source tree to examine is specified by \fIsrcroot\fP.  To
+simplify comparing findunref output from different source trees, findunref
+outputs all filenames relative to \fIsrcroot\fP.
+.LP
+Some files in a source tree may be intentionally unreferenced (e.g.,
+documentation) or only referenced during specialized types of builds.
+Accordingly, \fIexceptfile\fP names a file containing a list of pathname
+globs that will be ignored by findunref.  Within \fIexceptfile\fP, any
+lines consisting solely of whitespace or starting with \fB#\fP will be
+ignored.  Directory globs may also be specified, which will cause any
+matching directories to be skipped entirely.  If no exceptions are
+desired, \fIexceptfile\fP can be \fB/dev/null\fP.
+.LP
+Depending on how findunref is invoked, it can either check all files, or
+limit its checks to files under control of a specific source code
+management (SCM) system.  To limit checks to files managed by Mercurial,
+the \fBhg\fP(1) utility must be present in \fB/usr/bin\fP and any relevant
+repositories must be located at or under \fBsrcroot\fP.  Nested Mercurial
+repositories are supported.
+.SH OPTIONS
+.TP 10
+.B -s \fIsubtree\fP
+Only look under \fIsubtree\fP for unreferenced files.  By default, all
+directories under \fIsrcroot\fP are examined.
+.TP 10
+.B -t \fItstampfile\fP
+Consider files older than \fItstampfile\fP to be unreferenced. 
+By default, \fIsrcroot\fB/.build.tstamp\fR is used.
+.TP 10
+.B -S \fBhg\fP|\fBtw\fP
+Only check files that are managed by the specified SCM.  To simplify
+interaction with \fBwhich_scm\fP(1), the SCM names "mercurial" and
+"teamware" may also be specified.  By default, all files are checked.
+.SH SEE ALSO
+.LP
+\fBhg\fP(1),
+\fBnightly\fP(1),
+\fBwhich_scm\fP(1)
+.SH NOTES
+Since many files are only used when building for a particular ISA (e.g.,
+Makefiles that are specified to i386 or sparc), builds must be done on all
+applicable ISAs and the results merged.  For instance, if nightly builds
+(with \fB-f\fP) are done on both SPARC and x86, \fBusr/src\fP will be
+populated with a corresponding \fBunref-\fIisa\fB.out\fR file, which can
+be merged with \fBcomm\fP(1):
+.LP
+.nf
+comm -12 /path/to/unref-i386.out
+         /path/to/unref-sparc.out > unref.out 
+.fi
+.LP
+This merged file can then be compared against the gate's latest
+unreferenced file list (e.g. \fB/ws/onnv-gate/usr/src/unrefmaster.out\fP).
+.LP
+Different gates have different unreferenced file policies.  Any changes to
+\fIexceptfile\fP that would define new unreferenced file policies for a
+given gate must be cleared with the appropriate gatekeepers.
--- a/usr/src/tools/findunref/findunref.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/tools/findunref/findunref.c	Mon Aug 17 09:14:35 2009 -0400
@@ -17,13 +17,10 @@
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ *
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
 
 /*
  * Finds all unreferenced files in a source tree that do not match a list of
@@ -55,18 +52,54 @@
 	unsigned int	maxpaths;
 } pnset_t;
 
+/*
+ * Data associated with the current Mercurial manifest.
+ */
+typedef struct hgdata {
+	pnset_t		*manifest;
+	char		hgpath[MAXPATHLEN];
+	char		root[MAXPATHLEN];
+	unsigned int	rootlen;
+	boolean_t	rootwarn;
+} hgdata_t;
+
+/*
+ * Hooks used to check if a given unreferenced file is known to an SCM
+ * (currently Mercurial and TeamWare).
+ */
+typedef int checkscm_func_t(const char *, const struct FTW *);
+typedef void chdirscm_func_t(const char *);
+
+typedef struct {
+	const char	*name;
+	checkscm_func_t	*checkfunc;
+	chdirscm_func_t	*chdirfunc;
+} scm_t;
+
+static checkscm_func_t check_tw, check_hg;
+static chdirscm_func_t chdir_hg;
 static int	pnset_add(pnset_t *, const char *);
 static int	pnset_check(const pnset_t *, const char *);
 static void	pnset_empty(pnset_t *);
+static void	pnset_free(pnset_t *);
 static int	checkpath(const char *, const struct stat *, int, struct FTW *);
 static pnset_t	*make_exset(const char *);
 static void	warn(const char *, ...);
 static void	die(const char *, ...);
 
+static const scm_t scms[] = {
+	{ "tw",		check_tw,	NULL		},
+	{ "teamware",	check_tw,	NULL		},
+	{ "hg",		check_hg,	chdir_hg 	},
+	{ "mercurial",	check_hg,	chdir_hg	},
+	{ NULL,		NULL, 		NULL		}
+};
+
+static const scm_t	*scm;
+static hgdata_t		hgdata;
 static time_t		tstamp;		/* timestamp to compare files to */
 static pnset_t		*exsetp;	/* pathname globs to ignore */
 static const char	*progname;
-static boolean_t	allfiles = B_FALSE;
 
 int
 main(int argc, char *argv[])
@@ -83,10 +116,10 @@
 	else
 		progname++;
 
-	while ((c = getopt(argc, argv, "as:t:")) != EOF) {
+	while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {
 		switch (c) {
 		case 'a':
-			allfiles = B_TRUE;
+			/* for compatibility; now the default */
 			break;
 
 		case 's':
@@ -97,6 +130,15 @@
 			tstampfile = optarg;
 			break;
 
+		case 'S':
+			for (scm = scms; scm->name != NULL; scm++) {
+				if (strcmp(scm->name, optarg) == 0)
+					break;
+			}
+			if (scm->name == NULL)
+				die("unsupported SCM `%s'\n", optarg);
+			break;
+
 		default:
 		case '?':
 			goto usage;
@@ -107,8 +149,9 @@
 	argv += optind;
 
 	if (argc != 2) {
-usage:		(void) fprintf(stderr, "usage: %s [-a] [-s subtree] "
-		    "[-t tstampfile] srcroot exceptfile\n", progname);
+usage:		(void) fprintf(stderr, "usage: %s [-s <subtree>] "
+		    "[-t <tstampfile>] [-S hg|tw] <srcroot> <exceptfile>\n",
+		    progname);
 		return (EXIT_FAILURE);
 	}
 
@@ -134,7 +177,9 @@
 	/*
 	 * Walk the specified subtree of the tree rooted at argv[0].
 	 */
-	(void) chdir(argv[0]);
+	if (chdir(argv[0]) == -1)
+		die("cannot change directory to \"%s\"", argv[0]);
+
 	if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
 		die("cannot walk tree rooted at \"%s\"\n", argv[0]);
 
@@ -143,6 +188,143 @@
 }
 
 /*
+ * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
+ */
+static pnset_t *
+load_manifest(const char *hgroot)
+{
+	FILE	*fp = NULL;
+	char	*hgcmd = NULL;
+	char	*newline;
+	pnset_t	*pnsetp;
+	char	path[MAXPATHLEN];
+
+	pnsetp = calloc(sizeof (pnset_t), 1);
+	if (pnsetp == NULL ||
+	    asprintf(&hgcmd, "/usr/bin/hg manifest -R %s", hgroot) == -1)
+		goto fail;
+
+	fp = popen(hgcmd, "r");
+	if (fp == NULL)
+		goto fail;
+
+	while (fgets(path, sizeof (path), fp) != NULL) {
+		newline = strrchr(path, '\n');
+		if (newline != NULL)
+			*newline = '\0';
+
+		if (pnset_add(pnsetp, path) == 0)
+			goto fail;
+	}
+
+	(void) pclose(fp);
+	free(hgcmd);
+	return (pnsetp);
+fail:
+	warn("cannot load hg manifest at %s", hgroot);
+	if (fp != NULL)
+		(void) pclose(fp);
+	free(hgcmd);
+	pnset_free(pnsetp);
+	return (NULL);
+}
+
+/*
+ * If necessary, change our active manifest to be appropriate for `path'.
+ */
+static void
+chdir_hg(const char *path)
+{
+	char hgpath[MAXPATHLEN];
+	char basepath[MAXPATHLEN];
+	char *slash;
+
+	(void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", path);
+
+	/*
+	 * Change our active manifest if any one of the following is true:
+	 *
+	 *   1. No manifest is loaded.  Find the nearest hgroot to load from.
+	 *
+	 *   2. A manifest is loaded, but we've moved into a directory with
+	 *	its own hgroot (e.g., usr/closed).  Load from its hgroot.
+	 *
+	 *   3. A manifest is loaded, but no longer applies (e.g., the manifest
+	 *	under usr/closed is loaded, but we've moved to usr/src).
+	 */
+	if (hgdata.manifest == NULL ||
+	    strcmp(hgpath, hgdata.hgpath) != 0 && access(hgpath, X_OK) == 0 ||
+	    strncmp(path, hgdata.root, hgdata.rootlen - 1) != 0) {
+		pnset_free(hgdata.manifest);
+		hgdata.manifest = NULL;
+
+		(void) strlcpy(basepath, path, MAXPATHLEN);
+
+		/*
+		 * Walk up the directory tree looking for .hg subdirectories.
+		 */
+		while (access(hgpath, X_OK) == -1) {
+			slash = strrchr(basepath, '/');
+			if (slash == NULL) {
+				if (!hgdata.rootwarn) {
+					warn("no hg root for \"%s\"\n", path);
+					hgdata.rootwarn = B_TRUE;
+				}
+				return;
+			}
+			*slash = '\0';
+			(void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", basepath);
+		}
+
+		/*
+		 * We found a directory with an .hg subdirectory; record it
+		 * and load its manifest.
+		 */
+		(void) strlcpy(hgdata.hgpath, hgpath, MAXPATHLEN);
+		(void) strlcpy(hgdata.root, basepath, MAXPATHLEN);
+		hgdata.manifest = load_manifest(hgdata.root);
+
+		/*
+		 * The logic in check_hg() depends on hgdata.root having a
+		 * single trailing slash, so only add it if it's missing.
+		 */
+		if (hgdata.root[strlen(hgdata.root) - 1] != '/')
+			(void) strlcat(hgdata.root, "/", MAXPATHLEN);
+		hgdata.rootlen = strlen(hgdata.root);
+	}
+}
+
+/*
+ * Check if a file is under Mercurial control by checking against the manifest.
+ */
+/* ARGSUSED */
+static int
+check_hg(const char *path, const struct FTW *ftwp)
+{
+	/*
+	 * The manifest paths are relative to the manifest root; skip past it.
+	 */
+	path += hgdata.rootlen;
+
+	return (hgdata.manifest != NULL && pnset_check(hgdata.manifest, path));
+}
+
+/*
+ * Check if a file is under TeamWare control by checking for its corresponding
+ * SCCS "s-dot" file.
+ */
+static int
+check_tw(const char *path, const struct FTW *ftwp)
+{
+	char sccspath[MAXPATHLEN];
+
+	(void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
+	    path, path + ftwp->base);
+
+	return (access(sccspath, F_OK) == 0);
+}
+
+/*
  * Using `exceptfile' and a built-in list of exceptions, build and return a
  * pnset_t consisting of all of the pathnames globs which are allowed to be
  * unreferenced in the source tree.
@@ -189,8 +371,7 @@
 	(void) fclose(fp);
 	return (pnsetp);
 fail:
-	pnset_empty(pnsetp);
-	free(pnsetp);
+	pnset_free(pnsetp);
 	return (NULL);
 }
 
@@ -201,8 +382,6 @@
 checkpath(const char *path, const struct stat *statp, int type,
     struct FTW *ftwp)
 {
-	char sccspath[MAXPATHLEN];
-
 	switch (type) {
 	case FTW_F:
 		/*
@@ -212,26 +391,28 @@
 			return (0);
 
 		/*
-		 * If not explicitly checking all files, restrict ourselves
-		 * to unreferenced files under SCCS control.
+		 * If requested, restrict ourselves to unreferenced files
+		 * under SCM control.
 		 */
-		if (!allfiles) {
-			(void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s",
-			    ftwp->base, path, path + ftwp->base);
-
-			if (access(sccspath, F_OK) == -1)
-				return (0);
-		}
-
-		(void) puts(path);
+		if (scm == NULL || scm->checkfunc(path, ftwp))
+			(void) puts(path);
 		return (0);
 
 	case FTW_D:
 		/*
 		 * Prune any directories in the exception list.
 		 */
-		if (pnset_check(exsetp, path))
+		if (pnset_check(exsetp, path)) {
 			ftwp->quit = FTW_PRUNE;
+			return (0);
+		}
+
+		/*
+		 * If necessary, advise the SCM logic of our new directory.
+		 */
+		if (scm != NULL && scm->chdirfunc != NULL)
+			scm->chdirfunc(path);
+
 		return (0);
 
 	case FTW_DNR:
@@ -256,14 +437,15 @@
 pnset_add(pnset_t *pnsetp, const char *path)
 {
 	char **newpaths;
+	unsigned int maxpaths;
 
 	if (pnsetp->npath == pnsetp->maxpaths) {
-		newpaths = realloc(pnsetp->paths, sizeof (const char *) *
-		    (pnsetp->maxpaths + 15));
+		maxpaths = (pnsetp->maxpaths == 0) ? 512 : pnsetp->maxpaths * 2;
+		newpaths = realloc(pnsetp->paths, sizeof (char *) * maxpaths);
 		if (newpaths == NULL)
 			return (0);
 		pnsetp->paths = newpaths;
-		pnsetp->maxpaths += 15;
+		pnsetp->maxpaths = maxpaths;
 	}
 
 	pnsetp->paths[pnsetp->npath] = strdup(path);
@@ -302,6 +484,18 @@
 	pnsetp->maxpaths = 0;
 }
 
+/*
+ * Free the pnset_t pointed to by `pnsetp'.
+ */
+static void
+pnset_free(pnset_t *pnsetp)
+{
+	if (pnsetp != NULL) {
+		pnset_empty(pnsetp);
+		free(pnsetp);
+	}
+}
+
 /* PRINTFLIKE1 */
 static void
 warn(const char *format, ...)
--- a/usr/src/tools/scripts/check_rtime.pl	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/tools/scripts/check_rtime.pl	Mon Aug 17 09:14:35 2009 -0400
@@ -71,7 +71,6 @@
 
 # Define any directories we should skip completely.
 $SkipDirs = qr{ 
-	usr/lib/devfsadm |		# 4382889
 	usr/lib/libc |			# optimized libc
 	usr/lib/rcm |			# 4426119
 	usr/perl5 |			# alan's taking care of these :-)
@@ -229,15 +228,18 @@
 
 # Catch libintl and libw, although ld(1) will bind to these and thus determine
 # they're needed, their content was moved into libc as of on297 build 7.
-# libthread and libpthread were completely moved into libc as of on10 build 53.
-# libdl was moved into libc as of on10 build 49.  librt and libaio were moved
-# into libc as of Nevada build 44.
+# libsched was moved into libc as of on10 build 36.  libdl was moved into libc
+# as of on10 build 49.  libthread and libpthread were into libc as of on10 build
+# 53.  libdoor was moved into libc as of Nevada build 12.  librt and libaio were
+# moved into libc in Nevada build 44.
 $OldDeps = qr{ ^(?:
 	libintl\.so\.1 |
 	libw\.so\.1 |
+	libsched\.so\.1 |
+	libdl\.so\.1 |
 	libthread\.so\.1 |
 	libpthread\.so\.1 |
-	libdl\.so\.1 |
+	libdoor\.so\.1 |
 	librt\.so\.1 |
 	libaio\.so\.1
 	)$
--- a/usr/src/tools/scripts/checkpaths.sh	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/tools/scripts/checkpaths.sh	Mon Aug 17 09:14:35 2009 -0400
@@ -21,11 +21,9 @@
 #
 
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
 
 # Quis custodiet ipsos custodies?
 
@@ -97,10 +95,9 @@
 #
 # The exception_list is generated from whichever input files are appropriate
 # for this workspace, so checking it obviates the need to check the inputs.
-elist=""
+
 if [ -r $SRC/tools/findunref/exception_list ]; then
-	validate_paths -k ISUSED -r -e '^\*' -b $SRC/.. \
-		$SRC/tools/findunref/exception_list
+	validate_paths -k ISUSED -r -e '^\*' $SRC/tools/findunref/exception_list
 fi
 
 # These are straightforward.
--- a/usr/src/tools/scripts/nightly.sh	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/tools/scripts/nightly.sh	Mon Aug 17 09:14:35 2009 -0400
@@ -3159,19 +3159,9 @@
 		mv $SRC/unref-${MACH}.out $SRC/unref-${MACH}.ref
 	fi
 
-	#
-	# For any SCM other than teamware, we want to disable the
-	# managed-by-SCCS test in findunref
-	#
-	findunref_all=""
-	if [ "$SCM_TYPE" != teamware ]; then
-		findunref_all="-a"
-	fi
- 
-	findunref $findunref_all -t $SRC/.build.tstamp $SRC/.. \
+	findunref -S $SCM_TYPE -t $SRC/.build.tstamp -s usr $CODEMGR_WS \
 	    ${TOOLS}/findunref/exception_list 2>> $mail_msg_file | \
-	    sort | sed -e s=^./src/=./= -e s=^./closed/=../closed/= \
-	    > $SRC/unref-${MACH}.out
+	    sort > $SRC/unref-${MACH}.out
 
 	if [ ! -f $SRC/unref-${MACH}.ref ]; then
 		cp $SRC/unref-${MACH}.out $SRC/unref-${MACH}.ref
--- a/usr/src/tools/scripts/xref.1	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/tools/scripts/xref.1	Mon Aug 17 09:14:35 2009 -0400
@@ -1,10 +1,8 @@
-.\" ident	"%Z%%M%	%I%	%E% SMI"
 .\" " CDDL HEADER START
 .\" "
 .\" " The contents of this file are subject to the terms of the
-.\" " Common Development and Distribution License, Version 1.0 only
-.\" " (the "License").  You may not use this file except in compliance
-.\" " with the License.
+.\" " Common Development and Distribution License (the "License").
+.\" " You may not use this file except in compliance with the License.
 .\" "
 .\" " You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 .\" " or http://www.opensolaris.org/os/licensing.
@@ -19,9 +17,9 @@
 .\" "
 .\" " CDDL HEADER END
 .\" "
-.\" "Copyright 2006 Sun Microsystems, Inc.  All rights reserved."
+.\" "Copyright 2009 Sun Microsystems, Inc.  All rights reserved."
 .\" "Use is subject to license terms."
-.TH xref 1 "10 Feb 2006"
+.TH xref 1 "10 Aug 2009"
 .SH NAME
 .I xref
 \- build and maintain source cross-references
@@ -40,7 +38,7 @@
 matching -- by default, files matching the following patterns are
 eligible:
 
-	*.[Ccshlxy] Makefile* *.il* *.cc *.adb llib-*
+	*.[Ccdshlxy] Makefile* *.il* *.cc llib-* *.xml *.dtd.* *.ndl
 
 However, this default behavior can be changed, either through command-line
 options or by setting the cross-reference customization macros (see
@@ -165,5 +163,5 @@
 xref will silently not build etags.
 .LP
 The ETAGS environment variable can be used to specify an alternate
-path to the etags utility.  For instance, to use the one packaged with
-Sun Freeware, set ETAGS to /opt/sfw/bin/etags prior to invoking xref.
+path to the etags utility.  For instance, to use the one included with
+SUNWgnu-emacs, set ETAGS to /usr/gnu/bin/etags prior to invoking xref.
--- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_node.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_node.h	Mon Aug 17 09:14:35 2009 -0400
@@ -217,12 +217,11 @@
 	uint32_t	n_flag;
 	smbmntinfo_t	*n_mount;
 	ino64_t		n_ino;
-	/* Lock for the next 8 is r_lkserlock */
+	/* Lock for the next 7 is r_lkserlock */
 	enum vtype	n_ovtype;	/* vnode type opened */
 	int		n_dirrefs;
 	struct smbfs_fctx	*n_dirseq;	/* ff context */
-	long		n_dirofs;	/* last ff offset */
-	long		n_direof;	/* End of dir. offset. */
+	int		n_dirofs;	/* last ff offset */
 	int		n_vcgenid;	/* gereration no. (reconnect) */
 	int		n_fidrefs;
 	uint16_t	n_fid;		/* file handle */
--- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_smb.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_smb.c	Mon Aug 17 09:14:35 2009 -0400
@@ -560,18 +560,6 @@
 		md_get_mem(mdp, fsa->fsa_tname, nlen, MB_MSYSTEM);
 	}
 
-	/*
-	 * If fs_name isn't NTFS they probably require resume keys.
-	 * This is another example of the client trying to fix a server
-	 * bug. This code uses the logic created by PR-3983209. See
-	 * long block comment in smbfs_smb_findnextLM2.
-	 */
-	if (strcmp(fsa->fsa_tname, "NTFS")) {
-		SMB_SS_LOCK(ssp);
-		ssp->ss_flags |= SMBS_RESUMEKEYS;
-		SMB_SS_UNLOCK(ssp);
-	}
-
 	smb_t2_done(t2p);
 	return (0);
 }
@@ -2304,14 +2292,13 @@
 	struct smb_vc *vcp = SSTOVC(ctx->f_ssp);
 	struct mbchain *mbp;
 	struct mdchain *mdp;
-	uint16_t tw, flags;
+	uint16_t ecnt, eos, lno, flags;
 	int len, error;
 
 	if (ctx->f_t2) {
 		smb_t2_done(ctx->f_t2);
 		ctx->f_t2 = NULL;
 	}
-	ctx->f_flags &= ~SMBFS_RDD_GOTRNAME;
 	flags = FIND2_RETURN_RESUME_KEYS | FIND2_CLOSE_ON_EOS;
 	if (ctx->f_flags & SMBFS_RDD_FINDSINGLE) {
 		flags |= FIND2_CLOSE_AFTER_REQUEST;
@@ -2330,7 +2317,6 @@
 		mb_put_uint16le(mbp, flags);
 		mb_put_uint16le(mbp, ctx->f_infolevel);
 		mb_put_uint32le(mbp, 0);
-		/* mb_put_uint8(mbp, SMB_DT_ASCII); specs? hah! */
 		len = ctx->f_wclen;
 		error = smbfs_fullpath(mbp, vcp, ctx->f_dnp, ctx->f_wildcard,
 		    &len, '\\');
@@ -2347,11 +2333,10 @@
 		mb_put_uint16le(mbp, ctx->f_Sid);
 		mb_put_uint16le(mbp, ctx->f_limit);
 		mb_put_uint16le(mbp, ctx->f_infolevel);
-		if (ctx->f_ssp->ss_flags & SMBS_RESUMEKEYS) {
-			mb_put_uint32le(mbp, ctx->f_rkey);
-		} else
-			mb_put_uint32le(mbp, 0);
+		/* Send whatever resume key we received... */
+		mb_put_uint32le(mbp, ctx->f_rkey);
 		mb_put_uint16le(mbp, flags);
+		/* ... and the resume name if we have one. */
 		if (ctx->f_rname) {
 			/* resume file name */
 			mb_put_mem(mbp, ctx->f_rname, ctx->f_rnamelen,
@@ -2361,67 +2346,66 @@
 		if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp)))
 			mb_put_uint8(mbp, 0);	/* 1st byte NULL Unicode char */
 		mb_put_uint8(mbp, 0);
-#if 0
-		struct timespec ts;
-		ts.tv_sec = 0;
-		ts.tv_nsec = 200 * 1000 * 1000;	/* 200ms */
-		if (vcp->vc_flags & SMBC_WIN95) {
-			/*
-			 * some implementations suggests to sleep here
-			 * for 200ms, due to the bug in the Win95.
-			 * I've didn't notice any problem, but put code
-			 * for it.
-			 */
-			msleep(&flags, 0, PVFS, "fix95", &ts);
-		}
-#endif
 	}
 	t2p->t2_maxpcount = 5 * 2;
-	t2p->t2_maxdcount = vcp->vc_txmax;
+	t2p->t2_maxdcount = 0xF000;	/* 64K less some overhead */
 	error = smb_t2_request(t2p);
 	if (error)
 		return (error);
+
+	/*
+	 * This is the "resume name" we just sent.
+	 * We want the new one (if any) that may be
+	 * found in the response we just received and
+	 * will now begin parsing.  Free the old one
+	 * now so we'll know if we found a new one.
+	 */
+	if (ctx->f_rname) {
+		kmem_free(ctx->f_rname, ctx->f_rnamelen);
+		ctx->f_rname = NULL;
+		ctx->f_rnamelen = 0;
+	}
+
 	mdp = &t2p->t2_rparam;
 	if (ctx->f_flags & SMBFS_RDD_FINDFIRST) {
 		if ((error = md_get_uint16le(mdp, &ctx->f_Sid)) != 0)
-			return (error);
+			goto nodata;
 		ctx->f_flags &= ~SMBFS_RDD_FINDFIRST;
 	}
-	if ((error = md_get_uint16le(mdp, &tw)) != 0)
-		return (error);
-	ctx->f_ecnt = tw; /* search count - # entries returned */
-	if ((error = md_get_uint16le(mdp, &tw)) != 0)
-		return (error);
+	md_get_uint16le(mdp, &ecnt);		/* entry count */
+	md_get_uint16le(mdp, &eos);		/* end of search */
+	md_get_uint16le(mdp, NULL);		/* EA err. off. */
+	error = md_get_uint16le(mdp, &lno);	/* last name off. */
+	if (error != 0)
+		goto nodata;
+
 	/*
-	 * tw now is the "end of search" flag. against an XP server tw
+	 * The "end of search" flag from an XP server sometimes
 	 * comes back zero when the prior find_next returned exactly
 	 * the number of entries requested.  in which case we'd try again
-	 * but the search has in fact been closed so an EBADF results.  our
-	 * circumvention is to check here for a zero search count.
+	 * but the search has in fact been closed so an EBADF results.
+	 * our circumvention is to check here for a zero entry count.
 	 */
-	if (tw || ctx->f_ecnt == 0)
+	ctx->f_ecnt = ecnt;
+	if (eos || ctx->f_ecnt == 0)
 		ctx->f_flags |= SMBFS_RDD_EOF | SMBFS_RDD_NOCLOSE;
-	if ((error = md_get_uint16le(mdp, &tw)) != 0)
-		return (error);
-	if ((error = md_get_uint16le(mdp, &tw)) != 0)
-		return (error);
 	if (ctx->f_ecnt == 0)
 		return (ENOENT);
-	ctx->f_rnameofs = tw;
-	mdp = &t2p->t2_rdata;
-	if (mdp->md_top == NULL) {
-		SMBVDEBUG("ecnt = %d, but data is NULL\n", ctx->f_ecnt);
-		return (ENOENT);
-	}
-#ifdef APPLE
-	if (mdp->md_top->m_len == 0) {
-		printf("bug: ecnt = %d, but m_len = 0 and m_next = %p "
-		    "(please report)\n", ctx->f_ecnt, mbp->mb_top->m_next);
-		return (ENOENT);
-	}
-#endif
+
+	/* Last Name Off (LNO) is the entry with the resume name. */
+	ctx->f_rnameofs = lno;
 	ctx->f_eofs = 0;
 	return (0);
+
+nodata:
+	/*
+	 * Failed parsing the FindFirst or FindNext response.
+	 * Force this directory listing closed, otherwise the
+	 * calling process may hang in an infinite loop.
+	 */
+	ctx->f_ecnt = 0; /* Force closed. */
+	ctx->f_flags |= SMBFS_RDD_EOF;
+	return (EIO);
 }
 
 static int
@@ -2478,11 +2462,9 @@
 	uint16_t date, time, wattr;
 	uint32_t size, next, dattr, resumekey = 0;
 	uint64_t llongint;
-	int error, svtz, cnt, fxsz, nmlen, recsz, otw;
+	int error, svtz, cnt, fxsz, nmlen, recsz;
 	struct timespec ts;
 
-again:
-	otw = 0;	/* nothing sent Over The Wire (yet) */
 	if (ctx->f_ecnt == 0) {
 		if (ctx->f_flags & SMBFS_RDD_EOF)
 			return (ENOENT);
@@ -2493,7 +2475,6 @@
 			return (error);
 		ctx->f_attr.fa_reqtime = ts;
 		ctx->f_otws++;
-		otw = 1;
 	}
 	t2p = ctx->f_t2;
 	mdp = &t2p->t2_rdata;
@@ -2515,7 +2496,9 @@
 		md_get_uint32le(mdp, NULL);	/* allocation size */
 		md_get_uint16le(mdp, &wattr);
 		ctx->f_attr.fa_attr = wattr;
-		md_get_uint8(mdp, &tb);
+		error = md_get_uint8(mdp, &tb);
+		if (error)
+			goto nodata;
 		size = nmlen = tb;
 		fxsz = 23;
 		recsz = next = 24 + nmlen;	/* docs misses zero byte @end */
@@ -2537,7 +2520,9 @@
 		/* freebsd bug: fa_attr endian bug */
 		md_get_uint32le(mdp, &dattr);	/* extended file attributes */
 		ctx->f_attr.fa_attr = dattr;
-		md_get_uint32le(mdp, &size);	/* name len */
+		error = md_get_uint32le(mdp, &size);	/* name len */
+		if (error)
+			goto nodata;
 		fxsz = 64; /* size ofinfo up to filename */
 		if (ctx->f_infolevel == SMB_FIND_BOTH_DIRECTORY_INFO) {
 			/*
@@ -2545,7 +2530,9 @@
 			 * a reserved byte, and ShortName(8.3 means 24 bytes,
 			 * as Leach defined it to always be Unicode)
 			 */
-			md_get_mem(mdp, NULL, 30, MB_MSYSTEM);
+			error = md_get_mem(mdp, NULL, 30, MB_MSYSTEM);
+			if (error)
+				goto nodata;
 			fxsz += 30;
 		}
 		recsz = next ? next : fxsz + size;
@@ -2565,15 +2552,16 @@
 
 	error = md_get_mem(mdp, cp, nmlen, MB_MSYSTEM);
 	if (error)
-		return (error);
+		goto nodata;
 	if (next) {
+		/* How much data to skip? */
 		cnt = next - nmlen - fxsz;
+		if (cnt < 0) {
+			SMBVDEBUG("out of sync\n");
+			goto nodata;
+		}
 		if (cnt > 0)
 			md_get_mem(mdp, NULL, cnt, MB_MSYSTEM);
-		else if (cnt < 0) {
-			SMBVDEBUG("out of sync\n");
-			return (EBADRPC);
-		}
 	}
 	/* Don't count any trailing null in the name. */
 	if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
@@ -2584,63 +2572,27 @@
 			nmlen--;
 	}
 	if (nmlen == 0)
-		return (EBADRPC);
+		goto nodata;
 
 	/*
-	 * Ref radar 3983209.  On a find-next we expect a server will
-	 * 1) if the continue bit is set, use the server's idea of current loc,
-	 * 2) else if the resume key is non-zero, use that location,
-	 * 3) else if the resume name is set, use that location,
-	 * 4) else use the server's idea of current location.
-	 *
-	 * Current NetApps don't do that.  If we send no continue bit, a zero
-	 * resume key, and a resume name, the NetApp ignores the resume name
-	 * and acts on the (zero) resume key, sending back the start of the
-	 * directory again.  Panther doesn't expose the netapp bug; Panther used
-	 * the continue bit, but that was changed for 2866172. Win2000 as a
-	 * client also relies upon the resume name, but they request a very
-	 * large number of files, so the bug would be seen only with very
-	 * large directories.
-	 *
-	 * Our fix is to notice if the second OTW op (the first find-next)
-	 * returns, in the first filename, the same filename we got back
-	 * at the start of the first OTW (the find-first).  In that case
-	 * we've detected the server bug and set SMBS_RESUMEKEYS, causing us
-	 * to send non-zero resume keys henceforth.
+	 * On a find-next we expect that the server will:
+	 * 1) if the continue bit is set, use the server's offset,
+	 * 2) else if the resume key is non-zero, use that offset,
+	 * 3) else if the resume name is set, use that offset,
+	 * 4) else use the server's idea of current offset.
 	 *
-	 * Caveat: if there's a netapp so old it doesn't negotiate NTLM 0.12
-	 * then we get no resume keys so f_rkey stays zero and this "fix"
-	 * changes nothing.
-	 *
-	 * Note due to a similar problem (4051871) we also set SMBS_RESUMEKEYS
-	 * for FAT volumes, at mount time.
+	 * We always set the resume key flag. If the server returns
+	 * a resume key then we should always send it back to them.
 	 */
-	if (otw && !(ctx->f_ssp->ss_flags & SMBS_RESUMEKEYS)) {
-		if (ctx->f_otws == 1) {
-			ctx->f_firstnmlen = nmlen;
-			ctx->f_firstnm = kmem_alloc(nmlen, KM_SLEEP);
-			bcopy(ctx->f_name, ctx->f_firstnm, nmlen);
-		} else if (ctx->f_otws == 2 && nmlen == ctx->f_firstnmlen &&
-		    !(bcmp(ctx->f_name, ctx->f_firstnm, nmlen) == 0)) {
-			struct smb_share *ssp = ctx->f_ssp;
-			SMBERROR(
-			    "server resume_name bug seen; using resume keys\n");
-			SMB_SS_LOCK(ssp);
-			ssp->ss_flags |= SMBS_RESUMEKEYS;
-			SMB_SS_UNLOCK(ssp);
-			ctx->f_ecnt = 0;
-			goto again; /* must redo last otw op! */
-		}
-	}
 	ctx->f_rkey = resumekey;
 
 	next = ctx->f_eofs + recsz;
 	if (ctx->f_rnameofs &&
-	    (ctx->f_flags & SMBFS_RDD_GOTRNAME) == 0 &&
-	    (ctx->f_rnameofs >= ctx->f_eofs &&
-	    ctx->f_rnameofs < (int)next)) {
+	    ctx->f_rnameofs >= ctx->f_eofs &&
+	    ctx->f_rnameofs < (int)next) {
 		/*
-		 * Server needs a resume filename.
+		 * This entry is the "resume name".
+		 * Save it for the next request.
 		 */
 		if (ctx->f_rnamelen != nmlen) {
 			if (ctx->f_rname)
@@ -2649,7 +2601,6 @@
 			ctx->f_rnamelen = nmlen;
 		}
 		bcopy(ctx->f_name, ctx->f_rname, nmlen);
-		ctx->f_flags |= SMBFS_RDD_GOTRNAME;
 	}
 	ctx->f_nmlen = nmlen;
 	ctx->f_eofs = next;
@@ -2658,6 +2609,18 @@
 
 	smbfs_fname_tolocal(ctx);
 	return (0);
+
+nodata:
+	/*
+	 * Something bad has happened and we ran out of data
+	 * before we could parse all f_ecnt entries expected.
+	 * Force this directory listing closed, otherwise the
+	 * calling process may hang in an infinite loop.
+	 */
+	SMBVDEBUG("ran out of data\n");
+	ctx->f_ecnt = 0; /* Force closed. */
+	ctx->f_flags |= SMBFS_RDD_EOF;
+	return (EIO);
 }
 
 static int
@@ -2715,22 +2678,9 @@
 
 	/*
 	 * Note: "limit" (maxcount) needs to fit in a short!
-	 *
-	 * smb_lookup always uses 1, which is OK (no wildcards).
-	 * Otherwise, this is smbfs_readdir, and we want to force
-	 * limit to be in the range 3 to 1000.  The low limit (3)
-	 * is so we can always give the caller one "real" entry
-	 * (something other than "." or "..") The high limit is
-	 * just tuning. WinNT used 512, Win2k 1366.  We use 1000.
-	 *
-	 * XXX: Move the [skip . ..] gunk to our caller (readdir).
 	 */
-	if ((ctx->f_flags & SMBFS_RDD_FINDSINGLE) == 0) {
-		if (limit < 3)
-			limit = 3;
-		if (limit > 1000)
-			limit = 1000;
-	}
+	if (limit > 0xffff)
+		limit = 0xffff;
 
 	ctx->f_scred = scrp;
 	for (;;) {
--- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_subr.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_subr.h	Mon Aug 17 09:14:35 2009 -0400
@@ -93,7 +93,6 @@
 #define	SMBFS_RDD_FINDSINGLE	0x04
 /* note	SMBFS_RDD_USESEARCH	0x08 replaced by smbfs_fctx_type */
 #define	SMBFS_RDD_NOCLOSE	0x10
-#define	SMBFS_RDD_GOTRNAME	0x1000
 
 /*
  * Search context supplied by server
--- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_subr2.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_subr2.c	Mon Aug 17 09:14:35 2009 -0400
@@ -392,7 +392,6 @@
 	np->r_vnode = vp;
 	np->n_mount = mi;
 	np->r_hashq = rhtp;
-	np->n_direof = -1;
 	np->n_fid = SMB_FID_UNUSED;
 	np->n_uid = UID_NOBODY;
 	np->n_gid = GID_NOBODY;
--- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vnops.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vnops.c	Mon Aug 17 09:14:35 2009 -0400
@@ -66,6 +66,17 @@
 #include <fs/fs_subr.h>
 
 /*
+ * We assign directory offsets like the NFS client, where the
+ * offset increments by _one_ after each directory entry.
+ * Further, the entries "." and ".." are always at offsets
+ * zero and one (respectively) and the "real" entries from
+ * the server appear at offsets starting with two.  This
+ * macro is used to initialize the n_dirofs field after
+ * setting n_dirseq with a _findopen call.
+ */
+#define	FIRST_DIROFS	2
+
+/*
  * These characters are illegal in NTFS file names.
  * ref: http://support.microsoft.com/kb/147438
  *
@@ -285,9 +296,18 @@
 	}
 
 	/*
-	 * Directory open is easy.
+	 * Directory open.  See smbfs_readvdir()
 	 */
 	if (vp->v_type == VDIR) {
+		if (np->n_dirseq == NULL) {
+			/* first open */
+			error = smbfs_smb_findopen(np, "*", 1,
+			    SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR,
+			    &scred, &np->n_dirseq);
+			if (error != 0)
+				goto out;
+		}
+		np->n_dirofs = FIRST_DIROFS;
 		np->n_dirrefs++;
 		goto have_fid;
 	}
@@ -307,18 +327,16 @@
 	    np->n_vcgenid == ssp->ss_vcgenid) {
 		int upgrade = 0;
 
-		/* BEGIN CSTYLED */
 		if ((flag & FWRITE) &&
 		    !(np->n_rights & (SA_RIGHT_FILE_WRITE_DATA |
-				GENERIC_RIGHT_ALL_ACCESS |
-				GENERIC_RIGHT_WRITE_ACCESS)))
+		    GENERIC_RIGHT_ALL_ACCESS |
+		    GENERIC_RIGHT_WRITE_ACCESS)))
 			upgrade = 1;
 		if ((flag & FREAD) &&
 		    !(np->n_rights & (SA_RIGHT_FILE_READ_DATA |
-				GENERIC_RIGHT_ALL_ACCESS |
-				GENERIC_RIGHT_READ_ACCESS)))
+		    GENERIC_RIGHT_ALL_ACCESS |
+		    GENERIC_RIGHT_READ_ACCESS)))
 			upgrade = 1;
-		/* END CSTYLED */
 		if (!upgrade) {
 			/*
 			 *  the existing open is good enough
@@ -509,6 +527,7 @@
 			goto out;
 		if ((fctx = np->n_dirseq) != NULL) {
 			np->n_dirseq = NULL;
+			np->n_dirofs = 0;
 			error = smbfs_smb_findclose(fctx, &scred);
 		}
 	} else {
@@ -2573,49 +2592,69 @@
 smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
 	caller_context_t *ct)
 {
-	size_t		dbufsiz;
-	struct dirent64 *dp;
+	/*
+	 * Note: "limit" tells the SMB-level FindFirst/FindNext
+	 * functions how many directory entries to request in
+	 * each OtW call.  It needs to be large enough so that
+	 * we don't make lots of tiny OtW requests, but there's
+	 * no point making it larger than the maximum number of
+	 * OtW entries that would fit in a maximum sized trans2
+	 * response (64k / 48).  Beyond that, it's just tuning.
+	 * WinNT used 512, Win2k used 1366.  We use 1000.
+	 */
+	static const int limit = 1000;
+	/* Largest possible dirent size. */
+	static const size_t dbufsiz = DIRENT64_RECLEN(SMB_MAXFNAMELEN);
 	struct smb_cred scred;
 	vnode_t		*newvp;
 	struct smbnode	*np = VTOSMB(vp);
-	int		nmlen, reclen, error = 0;
-	long		offset, limit;
 	struct smbfs_fctx *ctx;
+	struct dirent64 *dp;
+	ssize_t		save_resid;
+	offset_t	save_offset; /* 64 bits */
+	int		offset; /* yes, 32 bits */
+	int		nmlen, error;
+	ushort_t	reclen;
 
 	ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone);
 
 	/* Make sure we serialize for n_dirseq use. */
 	ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER));
 
-	/* Min size is DIRENT64_RECLEN(256) rounded up. */
-	if (uio->uio_resid < 512 || uio->uio_offset < 0)
+	/*
+	 * Make sure smbfs_open filled in n_dirseq
+	 */
+	if (np->n_dirseq == NULL)
+		return (EBADF);
+
+	/* Check for overflow of (32-bit) directory offset. */
+	if (uio->uio_loffset < 0 || uio->uio_loffset > INT32_MAX ||
+	    (uio->uio_loffset + uio->uio_resid) > INT32_MAX)
 		return (EINVAL);
 
+	/* Require space for at least one dirent. */
+	if (uio->uio_resid < dbufsiz)
+		return (EINVAL);
+
+#ifdef USE_DNLC
 	/*
 	 * This dnlc_purge_vp ensures that name cache for this dir will be
 	 * current - it'll only have the items for which the smbfs_nget
 	 * MAKEENTRY happened.
 	 */
-#ifdef NOT_YET
 	if (smbfs_fastlookup)
 		dnlc_purge_vp(vp);
 #endif
 	SMBVDEBUG("dirname='%s'\n", np->n_rpath);
 	smb_credinit(&scred, cr);
-	dbufsiz = DIRENT64_RECLEN(SMB_MAXFNAMELEN);
 	dp = kmem_alloc(dbufsiz, KM_SLEEP);
 
-	offset = uio->uio_offset; /* NB: "cookie" */
-	limit = uio->uio_resid / DIRENT64_RECLEN(1);
-	SMBVDEBUG("offset=0x%ld, limit=0x%ld\n", offset, limit);
-
-	if (offset == 0) {
-		/* Don't know EOF until findclose */
-		np->n_direof = -1;
-	} else if (offset == np->n_direof) {
-		/* Arrived at end of directory. */
-		goto out;
-	}
+	save_resid = uio->uio_resid;
+	save_offset = uio->uio_loffset;
+	offset = uio->uio_offset;
+	SMBVDEBUG("in: offset=%d, resid=%d\n",
+	    (int)uio->uio_offset, (int)uio->uio_resid);
+	error = 0;
 
 	/*
 	 * Generate the "." and ".." entries here so we can
@@ -2623,20 +2662,24 @@
 	 * (2) deal with getting their I numbers which the
 	 * findnext below does only for normal names.
 	 */
-	while (limit && offset < 2) {
-		limit--;
+	while (offset < FIRST_DIROFS) {
+		/*
+		 * Tricky bit filling in the first two:
+		 * offset 0 is ".", offset 1 is ".."
+		 * so strlen of these is offset+1.
+		 */
 		reclen = DIRENT64_RECLEN(offset + 1);
+		if (uio->uio_resid < reclen)
+			goto out;
 		bzero(dp, reclen);
-		/*LINTED*/
 		dp->d_reclen = reclen;
-		/* Tricky: offset 0 is ".", offset 1 is ".." */
 		dp->d_name[0] = '.';
 		dp->d_name[1] = '.';
 		dp->d_name[offset + 1] = '\0';
 		/*
 		 * Want the real I-numbers for the "." and ".."
 		 * entries.  For these two names, we know that
-		 * smbfslookup can do this all locally.
+		 * smbfslookup can get the nodes efficiently.
 		 */
 		error = smbfslookup(vp, dp->d_name, &newvp, cr, 1, ct);
 		if (error) {
@@ -2645,21 +2688,29 @@
 			dp->d_ino = VTOSMB(newvp)->n_ino;
 			VN_RELE(newvp);
 		}
-		dp->d_off = offset + 1;  /* see d_off below */
-		error = uiomove(dp, dp->d_reclen, UIO_READ, uio);
+		/*
+		 * Note: d_off is the offset that a user-level program
+		 * should seek to for reading the NEXT directory entry.
+		 * See libc: readdir, telldir, seekdir
+		 */
+		dp->d_off = offset + 1;
+		error = uiomove(dp, reclen, UIO_READ, uio);
 		if (error)
 			goto out;
+		/*
+		 * Note: uiomove updates uio->uio_offset,
+		 * but we want it to be our "cookie" value,
+		 * which just counts dirents ignoring size.
+		 */
 		uio->uio_offset = ++offset;
 	}
-	if (limit == 0)
-		goto out;
-	if (offset != np->n_dirofs || np->n_dirseq == NULL) {
-		SMBVDEBUG("Reopening search %ld:%ld\n", offset, np->n_dirofs);
-		if (np->n_dirseq) {
-			(void) smbfs_smb_findclose(np->n_dirseq, &scred);
-			np->n_dirseq = NULL;
-		}
-		np->n_dirofs = 2;
+
+	/*
+	 * If there was a backward seek, we have to reopen.
+	 */
+	if (offset < np->n_dirofs) {
+		SMBVDEBUG("Reopening search %d:%d\n",
+		    offset, np->n_dirofs);
 		error = smbfs_smb_findopen(np, "*", 1,
 		    SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR,
 		    &scred, &ctx);
@@ -2667,72 +2718,97 @@
 			SMBVDEBUG("can not open search, error = %d", error);
 			goto out;
 		}
+		/* free the old one */
+		(void) smbfs_smb_findclose(np->n_dirseq, &scred);
+		/* save the new one */
 		np->n_dirseq = ctx;
-	} else
+		np->n_dirofs = FIRST_DIROFS;
+	} else {
 		ctx = np->n_dirseq;
-	while (np->n_dirofs < offset) {
-		if (smbfs_smb_findnext(ctx, offset - np->n_dirofs++,
-		    &scred) != 0) {
-			(void) smbfs_smb_findclose(np->n_dirseq, &scred);
-			np->n_dirseq = NULL;
-			np->n_direof = np->n_dirofs;
-			np->n_dirofs = 0;
-			*eofp = 1;
-			error = 0;
-			goto out;
-		}
 	}
-	error = 0;
-	for (; limit; limit--) {
+
+	/*
+	 * Skip entries before the requested offset.
+	 */
+	while (np->n_dirofs < offset) {
 		error = smbfs_smb_findnext(ctx, limit, &scred);
-		if (error) {
-			if (error == EBADRPC)
-				error = ENOENT;
-			(void) smbfs_smb_findclose(np->n_dirseq, &scred);
-			np->n_dirseq = NULL;
-			np->n_direof = np->n_dirofs;
-			np->n_dirofs = 0;
-			*eofp = 1;
-			error = 0;
-			break;
-		}
+		if (error != 0)
+			goto out;
 		np->n_dirofs++;
+	}
+
+	/*
+	 * While there's room in the caller's buffer:
+	 *	get a directory entry from SMB,
+	 *	convert to a dirent, copyout.
+	 * We stop when there is no longer room for a
+	 * maximum sized dirent because we must decide
+	 * before we know anything about the next entry.
+	 */
+	while (uio->uio_resid >= dbufsiz) {
+		error = smbfs_smb_findnext(ctx, limit, &scred);
+		if (error != 0)
+			goto out;
+		np->n_dirofs++;
+
 		/* Sanity check the name length. */
 		nmlen = ctx->f_nmlen;
 		if (nmlen > SMB_MAXFNAMELEN) {
 			nmlen = SMB_MAXFNAMELEN;
 			SMBVDEBUG("Truncating name: %s\n", ctx->f_name);
 		}
-		reclen = DIRENT64_RECLEN(nmlen);
-		if (uio->uio_resid < reclen)
-			break;
-		bzero(dp, reclen);
-		/*LINTED*/
-		dp->d_reclen = reclen;
-		dp->d_ino = ctx->f_attr.fa_ino;
-		/*
-		 * Note: d_off is the offset that a user-level program
-		 * should seek to for reading the _next_ directory entry.
-		 * See libc: readdir, telldir, seekdir
-		 */
-		dp->d_off = offset + 1;
-		bcopy(ctx->f_name, dp->d_name, nmlen);
-		dp->d_name[nmlen] = '\0';
 #ifdef NOT_YET
 		if (smbfs_fastlookup) {
-			if (smbfs_nget(vp, ctx->f_name,
-			    ctx->f_nmlen, &ctx->f_attr, &newvp) == 0)
+			if (smbfs_nget(vp, ctx->f_name, nmlen,
+			    &ctx->f_attr, &newvp) == 0)
 				VN_RELE(newvp);
 		}
 #endif /* NOT_YET */
-		error = uiomove(dp, dp->d_reclen, UIO_READ, uio);
+
+		reclen = DIRENT64_RECLEN(nmlen);
+		bzero(dp, reclen);
+		dp->d_reclen = reclen;
+		bcopy(ctx->f_name, dp->d_name, nmlen);
+		dp->d_name[nmlen] = '\0';
+		dp->d_ino = ctx->f_attr.fa_ino;
+		dp->d_off = offset + 1;	/* See d_off comment above */
+		error = uiomove(dp, reclen, UIO_READ, uio);
 		if (error)
-			break;
+			goto out;
+		/* See comment re. uio_offset above. */
 		uio->uio_offset = ++offset;
 	}
-	if (error == ENOENT)
+
+out:
+	/*
+	 * When we come to the end of a directory, the
+	 * SMB-level functions return ENOENT, but the
+	 * caller is not expecting an error return.
+	 *
+	 * Also note that we must delay the call to
+	 * smbfs_smb_findclose(np->n_dirseq, ...)
+	 * until smbfs_close so that all reads at the
+	 * end of the directory will return no data.
+	 */
+	if (error == ENOENT) {
 		error = 0;
-out:
+		if (eofp)
+			*eofp = 1;
+	}
+	/*
+	 * If we encountered an error (i.e. "access denied")
+	 * from the FindFirst call, we will have copied out
+	 * the "." and ".." entries leaving offset == 2.
+	 * In that case, restore the original offset/resid
+	 * so the caller gets no data with the error.
+	 */
+	if (error != 0 && offset == FIRST_DIROFS) {
+		uio->uio_loffset = save_offset;
+		uio->uio_resid = save_resid;
+	}
+	SMBVDEBUG("out: offset=%d, resid=%d\n",
+	    (int)uio->uio_offset, (int)uio->uio_resid);
+
 	kmem_free(dp, dbufsiz);
 	smb_credrele(&scred);
 	return (error);
--- a/usr/src/uts/common/fs/zfs/dbuf.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/dbuf.c	Mon Aug 17 09:14:35 2009 -0400
@@ -109,7 +109,7 @@
 dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid)
 {
 	dbuf_hash_table_t *h = &dbuf_hash_table;
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	uint64_t obj = dn->dn_object;
 	uint64_t hv = DBUF_HASH(os, obj, level, blkid);
 	uint64_t idx = hv & h->hash_table_mask;
@@ -140,7 +140,7 @@
 dbuf_hash_insert(dmu_buf_impl_t *db)
 {
 	dbuf_hash_table_t *h = &dbuf_hash_table;
-	objset_impl_t *os = db->db_objset;
+	objset_t *os = db->db_objset;
 	uint64_t obj = db->db.db_object;
 	int level = db->db_level;
 	uint64_t blkid = db->db_blkid;
@@ -894,7 +894,7 @@
 dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 {
 	dnode_t *dn = db->db_dnode;
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	dbuf_dirty_record_t **drp, *dr;
 	int drop_struct_lock = FALSE;
 	boolean_t do_free_accounting = B_FALSE;
@@ -1488,7 +1488,7 @@
 dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
     dmu_buf_impl_t *parent, blkptr_t *blkptr)
 {
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	dmu_buf_impl_t *db, *odb;
 
 	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
@@ -2011,7 +2011,7 @@
 	arc_buf_t **datap = &dr->dt.dl.dr_data;
 	dmu_buf_impl_t *db = dr->dr_dbuf;
 	dnode_t *dn = db->db_dnode;
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	uint64_t txg = tx->tx_txg;
 
 	ASSERT(dmu_tx_is_syncing(tx));
@@ -2182,7 +2182,7 @@
 {
 	dmu_buf_impl_t *db = dr->dr_dbuf;
 	dnode_t *dn = db->db_dnode;
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	dmu_buf_impl_t *parent = db->db_parent;
 	uint64_t txg = tx->tx_txg;
 	zbookmark_t zb;
@@ -2279,7 +2279,7 @@
 {
 	dmu_buf_impl_t *db = vdb;
 	dnode_t *dn = db->db_dnode;
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	blkptr_t *bp = zio->io_bp;
 	blkptr_t *bp_orig = &zio->io_bp_orig;
 	uint64_t fill = 0;
--- a/usr/src/uts/common/fs/zfs/dmu.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/dmu.c	Mon Aug 17 09:14:35 2009 -0400
@@ -99,7 +99,7 @@
 	dmu_buf_impl_t *db;
 	int err;
 
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 	blkid = dbuf_whichblock(dn, offset);
@@ -150,7 +150,7 @@
 	dmu_buf_impl_t *db;
 	int error;
 
-	error = dnode_hold(os->os, object, FTAG, &dn);
+	error = dnode_hold(os, object, FTAG, &dn);
 	if (error)
 		return (error);
 
@@ -282,7 +282,7 @@
 	dnode_t *dn;
 	int err;
 
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 
@@ -335,7 +335,7 @@
 		return;
 
 	if (len == 0) {  /* they're interested in the bonus buffer */
-		dn = os->os->os_meta_dnode;
+		dn = os->os_meta_dnode;
 
 		if (object == 0 || object >= DN_MAX_OBJECT)
 			return;
@@ -352,7 +352,7 @@
 	 * already cached, we will do a *synchronous* read in the
 	 * dnode_hold() call.  The same is true for any indirects.
 	 */
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err != 0)
 		return;
 
@@ -484,7 +484,7 @@
 	dnode_t *dn;
 	int err;
 
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err != 0)
 		return (err);
 	err = dmu_free_long_range_impl(os, dn, offset, length, FALSE);
@@ -499,7 +499,7 @@
 	dmu_tx_t *tx;
 	int err;
 
-	err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
+	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
 	    FTAG, &dn);
 	if (err != 0)
 		return (err);
@@ -527,7 +527,7 @@
     uint64_t size, dmu_tx_t *tx)
 {
 	dnode_t *dn;
-	int err = dnode_hold(os->os, object, FTAG, &dn);
+	int err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 	ASSERT(offset < UINT64_MAX);
@@ -545,7 +545,7 @@
 	dmu_buf_t **dbp;
 	int numbufs, err;
 
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 
@@ -852,8 +852,7 @@
 		dbuf_rele(db, FTAG);
 	} else {
 		dbuf_rele(db, FTAG);
-		ASSERT(dn->dn_objset->os.os == dn->dn_objset);
-		dmu_write(&dn->dn_objset->os, dn->dn_object, offset, blksz,
+		dmu_write(dn->dn_objset, dn->dn_object, offset, blksz,
 		    buf->b_data, tx);
 		dmu_return_arcbuf(buf);
 	}
@@ -930,7 +929,7 @@
     blkptr_t *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg)
 {
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
-	objset_impl_t *os = db->db_objset;
+	objset_t *os = db->db_objset;
 	dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
 	tx_state_t *tx = &dp->dp_tx;
 	dbuf_dirty_record_t *dr;
@@ -1078,7 +1077,7 @@
 	dnode_t *dn;
 	int err;
 
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 	err = dnode_set_blksz(dn, size, ibs, tx);
@@ -1093,7 +1092,7 @@
 	dnode_t *dn;
 
 	/* XXX assumes dnode_hold will not get an i/o error */
-	(void) dnode_hold(os->os, object, FTAG, &dn);
+	(void) dnode_hold(os, object, FTAG, &dn);
 	ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS);
 	dn->dn_checksum = checksum;
 	dnode_setdirty(dn, tx);
@@ -1107,7 +1106,7 @@
 	dnode_t *dn;
 
 	/* XXX assumes dnode_hold will not get an i/o error */
-	(void) dnode_hold(os->os, object, FTAG, &dn);
+	(void) dnode_hold(os, object, FTAG, &dn);
 	ASSERT(compress < ZIO_COMPRESS_FUNCTIONS);
 	dn->dn_compress = compress;
 	dnode_setdirty(dn, tx);
@@ -1120,7 +1119,7 @@
 	dnode_t *dn;
 	int i, err;
 
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 	/*
@@ -1134,7 +1133,7 @@
 	if (i != TXG_SIZE) {
 		dnode_rele(dn, FTAG);
 		txg_wait_synced(dmu_objset_pool(os), 0);
-		err = dnode_hold(os->os, object, FTAG, &dn);
+		err = dnode_hold(os, object, FTAG, &dn);
 		if (err)
 			return (err);
 	}
@@ -1176,7 +1175,7 @@
 dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi)
 {
 	dnode_t *dn;
-	int err = dnode_hold(os->os, object, FTAG, &dn);
+	int err = dnode_hold(os, object, FTAG, &dn);
 
 	if (err)
 		return (err);
--- a/usr/src/uts/common/fs/zfs/dmu_object.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/dmu_object.c	Mon Aug 17 09:14:35 2009 -0400
@@ -32,16 +32,15 @@
 dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
-	objset_impl_t *osi = os->os;
 	uint64_t object;
 	uint64_t L2_dnode_count = DNODES_PER_BLOCK <<
-	    (osi->os_meta_dnode->dn_indblkshift - SPA_BLKPTRSHIFT);
+	    (os->os_meta_dnode->dn_indblkshift - SPA_BLKPTRSHIFT);
 	dnode_t *dn = NULL;
 	int restarted = B_FALSE;
 
-	mutex_enter(&osi->os_obj_lock);
+	mutex_enter(&os->os_obj_lock);
 	for (;;) {
-		object = osi->os_obj_next;
+		object = os->os_obj_next;
 		/*
 		 * Each time we polish off an L2 bp worth of dnodes
 		 * (2^13 objects), move to another L2 bp that's still
@@ -51,14 +50,14 @@
 		 */
 		if (P2PHASE(object, L2_dnode_count) == 0) {
 			uint64_t offset = restarted ? object << DNODE_SHIFT : 0;
-			int error = dnode_next_offset(osi->os_meta_dnode,
+			int error = dnode_next_offset(os->os_meta_dnode,
 			    DNODE_FIND_HOLE,
 			    &offset, 2, DNODES_PER_BLOCK >> 2, 0);
 			restarted = B_TRUE;
 			if (error == 0)
 				object = offset >> DNODE_SHIFT;
 		}
-		osi->os_obj_next = ++object;
+		os->os_obj_next = ++object;
 
 		/*
 		 * XXX We should check for an i/o error here and return
@@ -66,19 +65,19 @@
 		 * dmu_tx_assign(), but there is currently no mechanism
 		 * to do so.
 		 */
-		(void) dnode_hold_impl(os->os, object, DNODE_MUST_BE_FREE,
+		(void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
 		    FTAG, &dn);
 		if (dn)
 			break;
 
 		if (dmu_object_next(os, &object, B_TRUE, 0) == 0)
-			osi->os_obj_next = object - 1;
+			os->os_obj_next = object - 1;
 	}
 
 	dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
 	dnode_rele(dn, FTAG);
 
-	mutex_exit(&osi->os_obj_lock);
+	mutex_exit(&os->os_obj_lock);
 
 	dmu_tx_add_new_object(tx, os, object);
 	return (object);
@@ -94,7 +93,7 @@
 	if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
 		return (EBADF);
 
-	err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
+	err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
 	if (err)
 		return (err);
 	dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
@@ -116,7 +115,7 @@
 	if (object == DMU_META_DNODE_OBJECT)
 		return (EBADF);
 
-	err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
+	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
 	    FTAG, &dn);
 	if (err)
 		return (err);
@@ -166,7 +165,7 @@
 
 	ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
 
-	err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
+	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
 	    FTAG, &dn);
 	if (err)
 		return (err);
@@ -185,7 +184,7 @@
 	uint64_t offset = (*objectp + 1) << DNODE_SHIFT;
 	int error;
 
-	error = dnode_next_offset(os->os->os_meta_dnode,
+	error = dnode_next_offset(os->os_meta_dnode,
 	    (hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg);
 
 	*objectp = offset >> DNODE_SHIFT;
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c	Mon Aug 17 09:14:35 2009 -0400
@@ -45,13 +45,13 @@
 spa_t *
 dmu_objset_spa(objset_t *os)
 {
-	return (os->os->os_spa);
+	return (os->os_spa);
 }
 
 zilog_t *
 dmu_objset_zil(objset_t *os)
 {
-	return (os->os->os_zil);
+	return (os->os_zil);
 }
 
 dsl_pool_t *
@@ -59,82 +59,88 @@
 {
 	dsl_dataset_t *ds;
 
-	if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir)
+	if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
 		return (ds->ds_dir->dd_pool);
 	else
-		return (spa_get_dsl(os->os->os_spa));
+		return (spa_get_dsl(os->os_spa));
 }
 
 dsl_dataset_t *
 dmu_objset_ds(objset_t *os)
 {
-	return (os->os->os_dsl_dataset);
+	return (os->os_dsl_dataset);
 }
 
 dmu_objset_type_t
 dmu_objset_type(objset_t *os)
 {
-	return (os->os->os_phys->os_type);
+	return (os->os_phys->os_type);
 }
 
 void
 dmu_objset_name(objset_t *os, char *buf)
 {
-	dsl_dataset_name(os->os->os_dsl_dataset, buf);
+	dsl_dataset_name(os->os_dsl_dataset, buf);
 }
 
 uint64_t
 dmu_objset_id(objset_t *os)
 {
-	dsl_dataset_t *ds = os->os->os_dsl_dataset;
+	dsl_dataset_t *ds = os->os_dsl_dataset;
 
 	return (ds ? ds->ds_object : 0);
 }
 
+uint64_t
+dmu_objset_logbias(objset_t *os)
+{
+	return (os->os_logbias);
+}
+
 static void
 checksum_changed_cb(void *arg, uint64_t newval)
 {
-	objset_impl_t *osi = arg;
+	objset_t *os = arg;
 
 	/*
 	 * Inheritance should have been done by now.
 	 */
 	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
 
-	osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
+	os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
 }
 
 static void
 compression_changed_cb(void *arg, uint64_t newval)
 {
-	objset_impl_t *osi = arg;
+	objset_t *os = arg;
 
 	/*
 	 * Inheritance and range checking should have been done by now.
 	 */
 	ASSERT(newval != ZIO_COMPRESS_INHERIT);
 
-	osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE);
+	os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE);
 }
 
 static void
 copies_changed_cb(void *arg, uint64_t newval)
 {
-	objset_impl_t *osi = arg;
+	objset_t *os = arg;
 
 	/*
 	 * Inheritance and range checking should have been done by now.
 	 */
 	ASSERT(newval > 0);
-	ASSERT(newval <= spa_max_replication(osi->os_spa));
+	ASSERT(newval <= spa_max_replication(os->os_spa));
 
-	osi->os_copies = newval;
+	os->os_copies = newval;
 }
 
 static void
 primary_cache_changed_cb(void *arg, uint64_t newval)
 {
-	objset_impl_t *osi = arg;
+	objset_t *os = arg;
 
 	/*
 	 * Inheritance and range checking should have been done by now.
@@ -142,13 +148,13 @@
 	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
 	    newval == ZFS_CACHE_METADATA);
 
-	osi->os_primary_cache = newval;
+	os->os_primary_cache = newval;
 }
 
 static void
 secondary_cache_changed_cb(void *arg, uint64_t newval)
 {
-	objset_impl_t *osi = arg;
+	objset_t *os = arg;
 
 	/*
 	 * Inheritance and range checking should have been done by now.
@@ -156,7 +162,19 @@
 	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
 	    newval == ZFS_CACHE_METADATA);
 
-	osi->os_secondary_cache = newval;
+	os->os_secondary_cache = newval;
+}
+
+static void
+logbias_changed_cb(void *arg, uint64_t newval)
+{
+	objset_t *os = arg;
+
+	ASSERT(newval == ZFS_LOGBIAS_LATENCY ||
+	    newval == ZFS_LOGBIAS_THROUGHPUT);
+	os->os_logbias = newval;
+	if (os->os_zil)
+		zil_set_logbias(os->os_zil, newval);
 }
 
 void
@@ -177,39 +195,38 @@
 
 int
 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
-    objset_impl_t **osip)
+    objset_t **osp)
 {
-	objset_impl_t *osi;
+	objset_t *os;
 	int i, err;
 
 	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
 
-	osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP);
-	osi->os.os = osi;
-	osi->os_dsl_dataset = ds;
-	osi->os_spa = spa;
-	osi->os_rootbp = bp;
-	if (!BP_IS_HOLE(osi->os_rootbp)) {
+	os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
+	os->os_dsl_dataset = ds;
+	os->os_spa = spa;
+	os->os_rootbp = bp;
+	if (!BP_IS_HOLE(os->os_rootbp)) {
 		uint32_t aflags = ARC_WAIT;
 		zbookmark_t zb;
 		zb.zb_objset = ds ? ds->ds_object : 0;
 		zb.zb_object = 0;
 		zb.zb_level = -1;
 		zb.zb_blkid = 0;
-		if (DMU_OS_IS_L2CACHEABLE(osi))
+		if (DMU_OS_IS_L2CACHEABLE(os))
 			aflags |= ARC_L2CACHE;
 
-		dprintf_bp(osi->os_rootbp, "reading %s", "");
+		dprintf_bp(os->os_rootbp, "reading %s", "");
 		/*
 		 * NB: when bprewrite scrub can change the bp,
 		 * and this is called from dmu_objset_open_ds_os, the bp
 		 * could change, and we'll need a lock.
 		 */
-		err = arc_read_nolock(NULL, spa, osi->os_rootbp,
-		    arc_getbuf_func, &osi->os_phys_buf,
+		err = arc_read_nolock(NULL, spa, os->os_rootbp,
+		    arc_getbuf_func, &os->os_phys_buf,
 		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
 		if (err) {
-			kmem_free(osi, sizeof (objset_impl_t));
+			kmem_free(os, sizeof (objset_t));
 			/* convert checksum errors into IO errors */
 			if (err == ECKSUM)
 				err = EIO;
@@ -218,27 +235,27 @@
 
 		/* Increase the blocksize if we are permitted. */
 		if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
-		    arc_buf_size(osi->os_phys_buf) < sizeof (objset_phys_t)) {
+		    arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
 			arc_buf_t *buf = arc_buf_alloc(spa,
-			    sizeof (objset_phys_t), &osi->os_phys_buf,
+			    sizeof (objset_phys_t), &os->os_phys_buf,
 			    ARC_BUFC_METADATA);
 			bzero(buf->b_data, sizeof (objset_phys_t));
-			bcopy(osi->os_phys_buf->b_data, buf->b_data,
-			    arc_buf_size(osi->os_phys_buf));
-			(void) arc_buf_remove_ref(osi->os_phys_buf,
-			    &osi->os_phys_buf);
-			osi->os_phys_buf = buf;
+			bcopy(os->os_phys_buf->b_data, buf->b_data,
+			    arc_buf_size(os->os_phys_buf));
+			(void) arc_buf_remove_ref(os->os_phys_buf,
+			    &os->os_phys_buf);
+			os->os_phys_buf = buf;
 		}
 
-		osi->os_phys = osi->os_phys_buf->b_data;
-		osi->os_flags = osi->os_phys->os_flags;
+		os->os_phys = os->os_phys_buf->b_data;
+		os->os_flags = os->os_phys->os_flags;
 	} else {
 		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
 		    sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
-		osi->os_phys_buf = arc_buf_alloc(spa, size,
-		    &osi->os_phys_buf, ARC_BUFC_METADATA);
-		osi->os_phys = osi->os_phys_buf->b_data;
-		bzero(osi->os_phys, size);
+		os->os_phys_buf = arc_buf_alloc(spa, size,
+		    &os->os_phys_buf, ARC_BUFC_METADATA);
+		os->os_phys = os->os_phys_buf->b_data;
+		bzero(os->os_phys, size);
 	}
 
 	/*
@@ -249,61 +266,64 @@
 	 */
 	if (ds) {
 		err = dsl_prop_register(ds, "primarycache",
-		    primary_cache_changed_cb, osi);
+		    primary_cache_changed_cb, os);
 		if (err == 0)
 			err = dsl_prop_register(ds, "secondarycache",
-			    secondary_cache_changed_cb, osi);
+			    secondary_cache_changed_cb, os);
 		if (!dsl_dataset_is_snapshot(ds)) {
 			if (err == 0)
 				err = dsl_prop_register(ds, "checksum",
-				    checksum_changed_cb, osi);
+				    checksum_changed_cb, os);
 			if (err == 0)
 				err = dsl_prop_register(ds, "compression",
-				    compression_changed_cb, osi);
+				    compression_changed_cb, os);
 			if (err == 0)
 				err = dsl_prop_register(ds, "copies",
-				    copies_changed_cb, osi);
+				    copies_changed_cb, os);
+			if (err == 0)
+				err = dsl_prop_register(ds, "logbias",
+				    logbias_changed_cb, os);
 		}
 		if (err) {
-			VERIFY(arc_buf_remove_ref(osi->os_phys_buf,
-			    &osi->os_phys_buf) == 1);
-			kmem_free(osi, sizeof (objset_impl_t));
+			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
+			    &os->os_phys_buf) == 1);
+			kmem_free(os, sizeof (objset_t));
 			return (err);
 		}
 	} else if (ds == NULL) {
 		/* It's the meta-objset. */
-		osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
-		osi->os_compress = ZIO_COMPRESS_LZJB;
-		osi->os_copies = spa_max_replication(spa);
-		osi->os_primary_cache = ZFS_CACHE_ALL;
-		osi->os_secondary_cache = ZFS_CACHE_ALL;
+		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
+		os->os_compress = ZIO_COMPRESS_LZJB;
+		os->os_copies = spa_max_replication(spa);
+		os->os_primary_cache = ZFS_CACHE_ALL;
+		os->os_secondary_cache = ZFS_CACHE_ALL;
 	}
 
-	osi->os_zil_header = osi->os_phys->os_zil_header;
-	osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header);
+	os->os_zil_header = os->os_phys->os_zil_header;
+	os->os_zil = zil_alloc(os, &os->os_zil_header);
 
 	for (i = 0; i < TXG_SIZE; i++) {
-		list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t),
+		list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
 		    offsetof(dnode_t, dn_dirty_link[i]));
-		list_create(&osi->os_free_dnodes[i], sizeof (dnode_t),
+		list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
 		    offsetof(dnode_t, dn_dirty_link[i]));
 	}
-	list_create(&osi->os_dnodes, sizeof (dnode_t),
+	list_create(&os->os_dnodes, sizeof (dnode_t),
 	    offsetof(dnode_t, dn_link));
-	list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
+	list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
 	    offsetof(dmu_buf_impl_t, db_link));
 
-	mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL);
-	mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
-	mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
 
-	osi->os_meta_dnode = dnode_special_open(osi,
-	    &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT);
-	if (arc_buf_size(osi->os_phys_buf) >= sizeof (objset_phys_t)) {
-		osi->os_userused_dnode = dnode_special_open(osi,
-		    &osi->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT);
-		osi->os_groupused_dnode = dnode_special_open(osi,
-		    &osi->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT);
+	os->os_meta_dnode = dnode_special_open(os,
+	    &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT);
+	if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
+		os->os_userused_dnode = dnode_special_open(os,
+		    &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT);
+		os->os_groupused_dnode = dnode_special_open(os,
+		    &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT);
 	}
 
 	/*
@@ -311,117 +331,95 @@
 	 * have ds_opening_lock
 	 */
 	if (ds) {
-		VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi,
-		    dmu_objset_evict));
+		mutex_enter(&ds->ds_lock);
+		ASSERT(ds->ds_objset == NULL);
+		ds->ds_objset = os;
+		mutex_exit(&ds->ds_lock);
 	}
 
-	*osip = osi;
-	return (0);
-}
-
-static int
-dmu_objset_open_ds_os(dsl_dataset_t *ds, objset_t *os, dmu_objset_type_t type)
-{
-	objset_impl_t *osi;
-
-	mutex_enter(&ds->ds_opening_lock);
-	osi = dsl_dataset_get_user_ptr(ds);
-	if (osi == NULL) {
-		int err;
-
-		err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
-		    ds, &ds->ds_phys->ds_bp, &osi);
-		if (err) {
-			mutex_exit(&ds->ds_opening_lock);
-			return (err);
-		}
-	}
-	mutex_exit(&ds->ds_opening_lock);
-
-	os->os = osi;
-	os->os_mode = DS_MODE_NOHOLD;
-
-	if (type != DMU_OST_ANY && type != os->os->os_phys->os_type)
-		return (EINVAL);
+	*osp = os;
 	return (0);
 }
 
 int
-dmu_objset_open_ds(dsl_dataset_t *ds, dmu_objset_type_t type, objset_t **osp)
+dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
 {
-	objset_t *os;
+	int err = 0;
+
+	mutex_enter(&ds->ds_opening_lock);
+	*osp = ds->ds_objset;
+	if (*osp == NULL) {
+		err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
+		    ds, &ds->ds_phys->ds_bp, osp);
+	}
+	mutex_exit(&ds->ds_opening_lock);
+	return (err);
+}
+
+/* called from zpl */
+int
+dmu_objset_hold(const char *name, void *tag, objset_t **osp)
+{
+	dsl_dataset_t *ds;
 	int err;
 
-	os = kmem_alloc(sizeof (objset_t), KM_SLEEP);
-	err = dmu_objset_open_ds_os(ds, os, type);
+	err = dsl_dataset_hold(name, tag, &ds);
 	if (err)
-		kmem_free(os, sizeof (objset_t));
-	else
-		*osp = os;
+		return (err);
+
+	err = dmu_objset_from_ds(ds, osp);
+	if (err)
+		dsl_dataset_rele(ds, tag);
+
 	return (err);
 }
 
 /* called from zpl */
 int
-dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
-    objset_t **osp)
+dmu_objset_own(const char *name, dmu_objset_type_t type,
+    boolean_t readonly, void *tag, objset_t **osp)
 {
-	objset_t *os;
 	dsl_dataset_t *ds;
 	int err;
 
-	ASSERT(DS_MODE_TYPE(mode) == DS_MODE_USER ||
-	    DS_MODE_TYPE(mode) == DS_MODE_OWNER);
+	err = dsl_dataset_own(name, B_FALSE, tag, &ds);
+	if (err)
+		return (err);
 
-	os = kmem_alloc(sizeof (objset_t), KM_SLEEP);
-	if (DS_MODE_TYPE(mode) == DS_MODE_USER)
-		err = dsl_dataset_hold(name, os, &ds);
-	else
-		err = dsl_dataset_own(name, mode, os, &ds);
+	err = dmu_objset_from_ds(ds, osp);
 	if (err) {
-		kmem_free(os, sizeof (objset_t));
-		return (err);
+		dsl_dataset_disown(ds, tag);
+	} else if ((type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) ||
+	    (!readonly && dsl_dataset_is_snapshot(ds))) {
+		dmu_objset_disown(*osp, tag);
+		return (EINVAL);
 	}
 
-	err = dmu_objset_open_ds_os(ds, os, type);
-	if (err) {
-		if (DS_MODE_TYPE(mode) == DS_MODE_USER)
-			dsl_dataset_rele(ds, os);
-		else
-			dsl_dataset_disown(ds, os);
-		kmem_free(os, sizeof (objset_t));
-	} else {
-		os->os_mode = mode;
-		*osp = os;
-	}
 	return (err);
 }
 
 void
-dmu_objset_close(objset_t *os)
+dmu_objset_rele(objset_t *os, void *tag)
 {
-	ASSERT(DS_MODE_TYPE(os->os_mode) == DS_MODE_USER ||
-	    DS_MODE_TYPE(os->os_mode) == DS_MODE_OWNER ||
-	    DS_MODE_TYPE(os->os_mode) == DS_MODE_NOHOLD);
+	dsl_dataset_rele(os->os_dsl_dataset, tag);
+}
 
-	if (DS_MODE_TYPE(os->os_mode) == DS_MODE_USER)
-		dsl_dataset_rele(os->os->os_dsl_dataset, os);
-	else if (DS_MODE_TYPE(os->os_mode) == DS_MODE_OWNER)
-		dsl_dataset_disown(os->os->os_dsl_dataset, os);
-	kmem_free(os, sizeof (objset_t));
+void
+dmu_objset_disown(objset_t *os, void *tag)
+{
+	dsl_dataset_disown(os->os_dsl_dataset, tag);
 }
 
 int
 dmu_objset_evict_dbufs(objset_t *os)
 {
-	objset_impl_t *osi = os->os;
 	dnode_t *dn;
 
-	mutex_enter(&osi->os_lock);
+	mutex_enter(&os->os_lock);
 
 	/* process the mdn last, since the other dnodes have holds on it */
-	list_remove(&osi->os_dnodes, osi->os_meta_dnode);
-	list_insert_tail(&osi->os_dnodes, osi->os_meta_dnode);
+	list_remove(&os->os_dnodes, os->os_meta_dnode);
+	list_insert_tail(&os->os_dnodes, os->os_meta_dnode);
 
 	/*
 	 * Find the first dnode with holds.  We have to do this dance
@@ -429,93 +427,93 @@
 	 * hold.  If there are no holds then it has no dbufs so OK to
 	 * skip.
 	 */
-	for (dn = list_head(&osi->os_dnodes);
+	for (dn = list_head(&os->os_dnodes);
 	    dn && !dnode_add_ref(dn, FTAG);
-	    dn = list_next(&osi->os_dnodes, dn))
+	    dn = list_next(&os->os_dnodes, dn))
 		continue;
 
 	while (dn) {
 		dnode_t *next_dn = dn;
 
 		do {
-			next_dn = list_next(&osi->os_dnodes, next_dn);
+			next_dn = list_next(&os->os_dnodes, next_dn);
 		} while (next_dn && !dnode_add_ref(next_dn, FTAG));
 
-		mutex_exit(&osi->os_lock);
+		mutex_exit(&os->os_lock);
 		dnode_evict_dbufs(dn);
 		dnode_rele(dn, FTAG);
-		mutex_enter(&osi->os_lock);
+		mutex_enter(&os->os_lock);
 		dn = next_dn;
 	}
-	mutex_exit(&osi->os_lock);
-	return (list_head(&osi->os_dnodes) != osi->os_meta_dnode);
+	mutex_exit(&os->os_lock);
+	return (list_head(&os->os_dnodes) != os->os_meta_dnode);
 }
 
 void
-dmu_objset_evict(dsl_dataset_t *ds, void *arg)
+dmu_objset_evict(objset_t *os)
 {
-	objset_impl_t *osi = arg;
-	objset_t os;
+	dsl_dataset_t *ds = os->os_dsl_dataset;
 	int i;
 
 	for (i = 0; i < TXG_SIZE; i++) {
-		ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL);
-		ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL);
+		ASSERT(list_head(&os->os_dirty_dnodes[i]) == NULL);
+		ASSERT(list_head(&os->os_free_dnodes[i]) == NULL);
 	}
 
 	if (ds) {
 		if (!dsl_dataset_is_snapshot(ds)) {
 			VERIFY(0 == dsl_prop_unregister(ds, "checksum",
-			    checksum_changed_cb, osi));
+			    checksum_changed_cb, os));
 			VERIFY(0 == dsl_prop_unregister(ds, "compression",
-			    compression_changed_cb, osi));
+			    compression_changed_cb, os));
 			VERIFY(0 == dsl_prop_unregister(ds, "copies",
-			    copies_changed_cb, osi));
+			    copies_changed_cb, os));
+			VERIFY(0 == dsl_prop_unregister(ds, "logbias",
+			    logbias_changed_cb, os));
 		}
 		VERIFY(0 == dsl_prop_unregister(ds, "primarycache",
-		    primary_cache_changed_cb, osi));
+		    primary_cache_changed_cb, os));
 		VERIFY(0 == dsl_prop_unregister(ds, "secondarycache",
-		    secondary_cache_changed_cb, osi));
+		    secondary_cache_changed_cb, os));
 	}
 
 	/*
 	 * We should need only a single pass over the dnode list, since
 	 * nothing can be added to the list at this point.
 	 */
-	os.os = osi;
-	(void) dmu_objset_evict_dbufs(&os);
+	(void) dmu_objset_evict_dbufs(os);
 
-	dnode_special_close(osi->os_meta_dnode);
-	if (osi->os_userused_dnode) {
-		dnode_special_close(osi->os_userused_dnode);
-		dnode_special_close(osi->os_groupused_dnode);
+	dnode_special_close(os->os_meta_dnode);
+	if (os->os_userused_dnode) {
+		dnode_special_close(os->os_userused_dnode);
+		dnode_special_close(os->os_groupused_dnode);
 	}
-	zil_free(osi->os_zil);
+	zil_free(os->os_zil);
 
-	ASSERT3P(list_head(&osi->os_dnodes), ==, NULL);
+	ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
 
-	VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1);
-	mutex_destroy(&osi->os_lock);
-	mutex_destroy(&osi->os_obj_lock);
-	mutex_destroy(&osi->os_user_ptr_lock);
-	kmem_free(osi, sizeof (objset_impl_t));
+	VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1);
+	mutex_destroy(&os->os_lock);
+	mutex_destroy(&os->os_obj_lock);
+	mutex_destroy(&os->os_user_ptr_lock);
+	kmem_free(os, sizeof (objset_t));
 }
 
 /* called from dsl for meta-objset */
-objset_impl_t *
+objset_t *
 dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
     dmu_objset_type_t type, dmu_tx_t *tx)
 {
-	objset_impl_t *osi;
+	objset_t *os;
 	dnode_t *mdn;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	if (ds)
 		mutex_enter(&ds->ds_opening_lock);
-	VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &osi));
+	VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &os));
 	if (ds)
 		mutex_exit(&ds->ds_opening_lock);
-	mdn = osi->os_meta_dnode;
+	mdn = os->os_meta_dnode;
 
 	dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
 	    DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);
@@ -550,15 +548,15 @@
 	ASSERT(type != DMU_OST_NONE);
 	ASSERT(type != DMU_OST_ANY);
 	ASSERT(type < DMU_OST_NUMTYPES);
-	osi->os_phys->os_type = type;
-	if (dmu_objset_userused_enabled(osi)) {
-		osi->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
-		osi->os_flags = osi->os_phys->os_flags;
+	os->os_phys->os_type = type;
+	if (dmu_objset_userused_enabled(os)) {
+		os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
+		os->os_flags = os->os_phys->os_flags;
 	}
 
 	dsl_dataset_dirty(ds, tx);
 
-	return (osi);
+	return (os);
 }
 
 struct oscarg {
@@ -613,18 +611,18 @@
 	if (oa->clone_origin == NULL) {
 		dsl_dataset_t *ds;
 		blkptr_t *bp;
-		objset_impl_t *osi;
+		objset_t *os;
 
 		VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj,
 		    FTAG, &ds));
 		bp = dsl_dataset_get_blkptr(ds);
 		ASSERT(BP_IS_HOLE(bp));
 
-		osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds),
+		os = dmu_objset_create_impl(dsl_dataset_get_spa(ds),
 		    ds, bp, oa->type, tx);
 
 		if (oa->userfunc)
-			oa->userfunc(&osi->os, oa->userarg, cr, tx);
+			oa->userfunc(os, oa->userarg, cr, tx);
 		dsl_dataset_rele(ds, FTAG);
 	}
 
@@ -692,7 +690,7 @@
 int
 dmu_objset_destroy(const char *name, boolean_t defer)
 {
-	objset_t *os;
+	dsl_dataset_t *ds;
 	int error;
 
 	/*
@@ -702,15 +700,13 @@
 	 * structure.  Only the ZIL knows how to free them, so we have
 	 * to call into it here.
 	 */
-	error = dmu_objset_open(name, DMU_OST_ANY,
-	    DS_MODE_OWNER|DS_MODE_READONLY|DS_MODE_INCONSISTENT, &os);
+	error = dsl_dataset_own(name, B_TRUE, FTAG, &ds);
 	if (error == 0) {
-		dsl_dataset_t *ds = os->os->os_dsl_dataset;
-		zil_destroy(dmu_objset_zil(os), B_FALSE);
-
-		error = dsl_dataset_destroy(ds, os, defer);
+		objset_t *os;
+		if (dmu_objset_from_ds(ds, &os) == 0)
+			zil_destroy(dmu_objset_zil(os), B_FALSE);
+		error = dsl_dataset_destroy(ds, FTAG, defer);
 		/* dsl_dataset_destroy() closes the ds. */
-		kmem_free(os, sizeof (objset_t));
 	}
 
 	return (error);
@@ -732,7 +728,7 @@
 
 	/* The props have already been checked by zfs_check_userprops(). */
 
-	return (dsl_dataset_snapshot_check(os->os->os_dsl_dataset,
+	return (dsl_dataset_snapshot_check(os->os_dsl_dataset,
 	    sn->snapname, tx));
 }
 
@@ -740,7 +736,7 @@
 snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 {
 	objset_t *os = arg1;
-	dsl_dataset_t *ds = os->os->os_dsl_dataset;
+	dsl_dataset_t *ds = os->os_dsl_dataset;
 	struct snaparg *sn = arg2;
 
 	dsl_dataset_snapshot_sync(ds, sn->snapname, cr, tx);
@@ -767,13 +763,13 @@
 	    (err = zfs_secpolicy_snapshot_perms(name, CRED())))
 		return (err);
 
-	err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_USER, &os);
+	err = dmu_objset_hold(name, sn, &os);
 	if (err != 0)
 		return (err);
 
 	/* If the objset is in an inconsistent state, return busy */
-	if (os->os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) {
-		dmu_objset_close(os);
+	if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) {
+		dmu_objset_rele(os, sn);
 		return (EBUSY);
 	}
 
@@ -787,7 +783,7 @@
 		dsl_sync_task_create(sn->dstg, snapshot_check,
 		    snapshot_sync, os, sn, 3);
 	} else {
-		dmu_objset_close(os);
+		dmu_objset_rele(os, sn);
 	}
 
 	return (err);
@@ -827,11 +823,11 @@
 	for (dst = list_head(&sn.dstg->dstg_tasks); dst;
 	    dst = list_next(&sn.dstg->dstg_tasks, dst)) {
 		objset_t *os = dst->dst_arg1;
-		dsl_dataset_t *ds = os->os->os_dsl_dataset;
+		dsl_dataset_t *ds = os->os_dsl_dataset;
 		if (dst->dst_err)
 			dsl_dataset_name(ds, sn.failed);
 		zil_resume(dmu_objset_zil(os));
-		dmu_objset_close(os);
+		dmu_objset_rele(os, &sn);
 	}
 
 	if (err)
@@ -874,7 +870,7 @@
 {
 	blkptr_t *bp = zio->io_bp;
 	blkptr_t *bp_orig = &zio->io_bp_orig;
-	objset_impl_t *os = arg;
+	objset_t *os = arg;
 	dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
 
 	ASSERT(bp == os->os_rootbp);
@@ -903,7 +899,7 @@
 
 /* called from dsl */
 void
-dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
+dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
 {
 	int txgoff;
 	zbookmark_t zb;
@@ -1008,7 +1004,7 @@
 }
 
 boolean_t
-dmu_objset_userused_enabled(objset_impl_t *os)
+dmu_objset_userused_enabled(objset_t *os)
 {
 	return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
 	    used_cbs[os->os_phys->os_type] &&
@@ -1016,7 +1012,7 @@
 }
 
 void
-dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx)
+dmu_objset_do_userquota_callbacks(objset_t *os, dmu_tx_t *tx)
 {
 	dnode_t *dn;
 	list_t *list = &os->os_synced_dnodes;
@@ -1035,10 +1031,10 @@
 
 		/* Allocate the user/groupused objects if necessary. */
 		if (os->os_userused_dnode->dn_type == DMU_OT_NONE) {
-			VERIFY(0 == zap_create_claim(&os->os,
+			VERIFY(0 == zap_create_claim(os,
 			    DMU_USERUSED_OBJECT,
 			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
-			VERIFY(0 == zap_create_claim(&os->os,
+			VERIFY(0 == zap_create_claim(os,
 			    DMU_GROUPUSED_OBJECT,
 			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
 		}
@@ -1065,7 +1061,7 @@
 		    (bcmp(DN_BONUS(dn->dn_oldphys), zerobuf,
 		    DN_MAX_BONUSLEN) == 0 &&
 		    DN_USED_BYTES(dn->dn_oldphys) == 0));
-		used_cbs[os->os_phys->os_type](&os->os, bonustype,
+		used_cbs[os->os_phys->os_type](os, bonustype,
 		    DN_BONUS(dn->dn_oldphys), DN_BONUS(dn->dn_phys),
 		    DN_USED_BYTES(dn->dn_oldphys),
 		    DN_USED_BYTES(dn->dn_phys), tx);
@@ -1086,7 +1082,7 @@
 boolean_t
 dmu_objset_userspace_present(objset_t *os)
 {
-	return (os->os->os_phys->os_flags &
+	return (os->os_phys->os_flags &
 	    OBJSET_FLAG_USERACCOUNTING_COMPLETE);
 }
 
@@ -1098,7 +1094,7 @@
 
 	if (dmu_objset_userspace_present(os))
 		return (0);
-	if (!dmu_objset_userused_enabled(os->os))
+	if (!dmu_objset_userused_enabled(os))
 		return (ENOTSUP);
 	if (dmu_objset_is_snapshot(os))
 		return (EINVAL);
@@ -1134,7 +1130,7 @@
 		dmu_tx_commit(tx);
 	}
 
-	os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
+	os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
 	txg_wait_synced(dmu_objset_pool(os), 0);
 	return (0);
 }
@@ -1143,35 +1139,35 @@
 dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp)
 {
-	dsl_dataset_space(os->os->os_dsl_dataset, refdbytesp, availbytesp,
+	dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp,
 	    usedobjsp, availobjsp);
 }
 
 uint64_t
 dmu_objset_fsid_guid(objset_t *os)
 {
-	return (dsl_dataset_fsid_guid(os->os->os_dsl_dataset));
+	return (dsl_dataset_fsid_guid(os->os_dsl_dataset));
 }
 
 void
 dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat)
 {
-	stat->dds_type = os->os->os_phys->os_type;
-	if (os->os->os_dsl_dataset)
-		dsl_dataset_fast_stat(os->os->os_dsl_dataset, stat);
+	stat->dds_type = os->os_phys->os_type;
+	if (os->os_dsl_dataset)
+		dsl_dataset_fast_stat(os->os_dsl_dataset, stat);
 }
 
 void
 dmu_objset_stats(objset_t *os, nvlist_t *nv)
 {
-	ASSERT(os->os->os_dsl_dataset ||
-	    os->os->os_phys->os_type == DMU_OST_META);
+	ASSERT(os->os_dsl_dataset ||
+	    os->os_phys->os_type == DMU_OST_META);
 
-	if (os->os->os_dsl_dataset != NULL)
-		dsl_dataset_stats(os->os->os_dsl_dataset, nv);
+	if (os->os_dsl_dataset != NULL)
+		dsl_dataset_stats(os->os_dsl_dataset, nv);
 
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
-	    os->os->os_phys->os_type);
+	    os->os_phys->os_type);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
 	    dmu_objset_userspace_present(os));
 }
@@ -1179,8 +1175,8 @@
 int
 dmu_objset_is_snapshot(objset_t *os)
 {
-	if (os->os->os_dsl_dataset != NULL)
-		return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset));
+	if (os->os_dsl_dataset != NULL)
+		return (dsl_dataset_is_snapshot(os->os_dsl_dataset));
 	else
 		return (B_FALSE);
 }
@@ -1189,7 +1185,7 @@
 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
     boolean_t *conflict)
 {
-	dsl_dataset_t *ds = os->os->os_dsl_dataset;
+	dsl_dataset_t *ds = os->os_dsl_dataset;
 	uint64_t ignored;
 
 	if (ds->ds_phys->ds_snapnames_zapobj == 0)
@@ -1204,7 +1200,7 @@
 dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
     uint64_t *idp, uint64_t *offp, boolean_t *case_conflict)
 {
-	dsl_dataset_t *ds = os->os->os_dsl_dataset;
+	dsl_dataset_t *ds = os->os_dsl_dataset;
 	zap_cursor_t cursor;
 	zap_attribute_t attr;
 
@@ -1241,12 +1237,12 @@
 dmu_dir_list_next(objset_t *os, int namelen, char *name,
     uint64_t *idp, uint64_t *offp)
 {
-	dsl_dir_t *dd = os->os->os_dsl_dataset->ds_dir;
+	dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
 	zap_cursor_t cursor;
 	zap_attribute_t attr;
 
 	/* there is no next dir on a snapshot! */
-	if (os->os->os_dsl_dataset->ds_object !=
+	if (os->os_dsl_dataset->ds_object !=
 	    dd->dd_phys->dd_head_dataset_obj)
 		return (ENOENT);
 
@@ -1420,7 +1416,7 @@
 
 	if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) {
 		mutex_enter(&ds->ds_opening_lock);
-		if (!dsl_dataset_get_user_ptr(ds)) {
+		if (ds->ds_objset == NULL) {
 			uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
 			zbookmark_t zb;
 
@@ -1445,13 +1441,13 @@
 void
 dmu_objset_set_user(objset_t *os, void *user_ptr)
 {
-	ASSERT(MUTEX_HELD(&os->os->os_user_ptr_lock));
-	os->os->os_user_ptr = user_ptr;
+	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
+	os->os_user_ptr = user_ptr;
 }
 
 void *
 dmu_objset_get_user(objset_t *os)
 {
-	ASSERT(MUTEX_HELD(&os->os->os_user_ptr_lock));
-	return (os->os->os_user_ptr);
+	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
+	return (os->os_user_ptr);
 }
--- a/usr/src/uts/common/fs/zfs/dmu_send.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c	Mon Aug 17 09:14:35 2009 -0400
@@ -216,8 +216,8 @@
 dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
     vnode_t *vp, offset_t *off)
 {
-	dsl_dataset_t *ds = tosnap->os->os_dsl_dataset;
-	dsl_dataset_t *fromds = fromsnap ? fromsnap->os->os_dsl_dataset : NULL;
+	dsl_dataset_t *ds = tosnap->os_dsl_dataset;
+	dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
 	dmu_replay_record_t *drr;
 	struct backuparg ba;
 	int err;
@@ -257,7 +257,7 @@
 	drr->drr_u.drr_begin.drr_version = DMU_BACKUP_STREAM_VERSION;
 	drr->drr_u.drr_begin.drr_creation_time =
 	    ds->ds_phys->ds_creation_time;
-	drr->drr_u.drr_begin.drr_type = tosnap->os->os_phys->os_type;
+	drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type;
 	if (fromorigin)
 		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
 	drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid;
@@ -362,7 +362,7 @@
 	dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1,
 	    rbsa->origin, flags, cr, tx);
 	VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj,
-	    DS_MODE_INCONSISTENT, dmu_recv_tag, &rbsa->ds));
+	    B_TRUE, dmu_recv_tag, &rbsa->ds));
 
 	if (rbsa->origin == NULL) {
 		(void) dmu_objset_create_impl(dd->dd_pool->dp_spa,
@@ -431,8 +431,7 @@
 	/* create and open the temporary clone */
 	dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname,
 	    ohds->ds_prev, flags, cr, tx);
-	VERIFY(0 == dsl_dataset_own_obj(dp, dsobj,
-	    DS_MODE_INCONSISTENT, dmu_recv_tag, &cds));
+	VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds));
 
 	/*
 	 * If we actually created a non-clone, we need to create the
@@ -477,7 +476,7 @@
 
 	rbsa.tofs = tofs;
 	rbsa.tosnap = tosnap;
-	rbsa.origin = origin ? origin->os->os_dsl_dataset : NULL;
+	rbsa.origin = origin ? origin->os_dsl_dataset : NULL;
 	rbsa.fromguid = drrb->drr_fromguid;
 	rbsa.type = drrb->drr_type;
 	rbsa.tag = FTAG;
@@ -880,7 +879,7 @@
 	/*
 	 * Open the objset we are modifying.
 	 */
-	VERIFY(dmu_objset_open_ds(drc->drc_real_ds, DMU_OST_ANY, &os) == 0);
+	VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0);
 
 	ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT);
 
@@ -950,8 +949,6 @@
 	ASSERT(ra.err != 0);
 
 out:
-	dmu_objset_close(os);
-
 	if (ra.err != 0) {
 		/*
 		 * destroy what we created, so we don't leave it in the
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c	Mon Aug 17 09:14:35 2009 -0400
@@ -58,9 +58,9 @@
 dmu_tx_t *
 dmu_tx_create(objset_t *os)
 {
-	dmu_tx_t *tx = dmu_tx_create_dd(os->os->os_dsl_dataset->ds_dir);
+	dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir);
 	tx->tx_objset = os;
-	tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os->os_dsl_dataset);
+	tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os_dsl_dataset);
 	return (tx);
 }
 
@@ -98,7 +98,7 @@
 	int err;
 
 	if (object != DMU_NEW_OBJECT) {
-		err = dnode_hold(os->os, object, tx, &dn);
+		err = dnode_hold(os, object, tx, &dn);
 		if (err) {
 			tx->tx_err = err;
 			return (NULL);
@@ -376,7 +376,7 @@
 dmu_tx_count_dnode(dmu_tx_hold_t *txh)
 {
 	dnode_t *dn = txh->txh_dnode;
-	dnode_t *mdn = txh->txh_tx->tx_objset->os->os_meta_dnode;
+	dnode_t *mdn = txh->txh_tx->tx_objset->os_meta_dnode;
 	uint64_t space = mdn->dn_datablksz +
 	    ((mdn->dn_nlevels-1) << mdn->dn_indblkshift);
 
@@ -688,7 +688,7 @@
 		 * access the name in this fat-zap so that we'll check
 		 * for i/o errors to the leaf blocks, etc.
 		 */
-		err = zap_lookup(&dn->dn_objset->os, dn->dn_object, name,
+		err = zap_lookup(dn->dn_objset, dn->dn_object, name,
 		    8, 0, NULL);
 		if (err == EIO) {
 			tx->tx_err = err;
@@ -696,7 +696,7 @@
 		}
 	}
 
-	err = zap_count_write(&dn->dn_objset->os, dn->dn_object, name, add,
+	err = zap_count_write(dn->dn_objset, dn->dn_object, name, add,
 	    &txh->txh_space_towrite, &txh->txh_space_tooverwrite);
 
 	/*
@@ -771,7 +771,7 @@
 	dnode_t *dn = db->db_dnode;
 
 	ASSERT(tx->tx_txg != 0);
-	ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset->os);
+	ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset);
 	ASSERT3U(dn->dn_object, ==, db->db.db_object);
 
 	if (tx->tx_anyobj)
@@ -931,7 +931,7 @@
 	 * assume that we won't be able to free or overwrite anything.
 	 */
 	if (tx->tx_objset &&
-	    dsl_dataset_prev_snap_txg(tx->tx_objset->os->os_dsl_dataset) >
+	    dsl_dataset_prev_snap_txg(tx->tx_objset->os_dsl_dataset) >
 	    tx->tx_lastsnap_txg) {
 		towrite += tooverwrite;
 		tooverwrite = tofree = 0;
--- a/usr/src/uts/common/fs/zfs/dnode.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/dnode.c	Mon Aug 17 09:14:35 2009 -0400
@@ -272,7 +272,7 @@
 }
 
 static dnode_t *
-dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
+dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
     uint64_t object)
 {
 	dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
@@ -309,7 +309,7 @@
 static void
 dnode_destroy(dnode_t *dn)
 {
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 
 #ifdef ZFS_DEBUG
 	int i;
@@ -488,7 +488,7 @@
 }
 
 dnode_t *
-dnode_special_open(objset_impl_t *os, dnode_phys_t *dnp, uint64_t object)
+dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object)
 {
 	dnode_t *dn = dnode_create(os, dnp, NULL, object);
 	DNODE_VERIFY(dn);
@@ -535,7 +535,7 @@
  * succeeds even for free dnodes.
  */
 int
-dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
+dnode_hold_impl(objset_t *os, uint64_t object, int flag,
     void *tag, dnode_t **dnp)
 {
 	int epb, idx, err;
@@ -650,7 +650,7 @@
  * Return held dnode if the object is allocated, NULL if not.
  */
 int
-dnode_hold(objset_impl_t *os, uint64_t object, void *tag, dnode_t **dnp)
+dnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp)
 {
 	return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp));
 }
@@ -689,7 +689,7 @@
 void
 dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
 {
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	uint64_t txg = tx->tx_txg;
 
 	if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
@@ -1248,7 +1248,7 @@
 void
 dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx)
 {
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	dsl_dataset_t *ds = os->os_dsl_dataset;
 
 	if (space > 0)
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Mon Aug 17 09:14:35 2009 -0400
@@ -244,8 +244,8 @@
 
 	unique_remove(ds->ds_fsid_guid);
 
-	if (ds->ds_user_ptr != NULL)
-		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
+	if (ds->ds_objset != NULL)
+		dmu_objset_evict(ds->ds_objset);
 
 	if (ds->ds_prev) {
 		dsl_dataset_drop_ref(ds->ds_prev, ds);
@@ -545,17 +545,14 @@
 }
 
 int
-dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, int flags, void *owner,
-    dsl_dataset_t **dsp)
+dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok,
+    void *tag, dsl_dataset_t **dsp)
 {
-	int err = dsl_dataset_hold_obj(dp, dsobj, owner, dsp);
-
-	ASSERT(DS_MODE_TYPE(flags) != DS_MODE_USER);
-
+	int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
 	if (err)
 		return (err);
-	if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) {
-		dsl_dataset_rele(*dsp, owner);
+	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
+		dsl_dataset_rele(*dsp, tag);
 		*dsp = NULL;
 		return (EBUSY);
 	}
@@ -622,18 +619,14 @@
 }
 
 int
-dsl_dataset_own(const char *name, int flags, void *owner, dsl_dataset_t **dsp)
+dsl_dataset_own(const char *name, boolean_t inconsistentok,
+    void *tag, dsl_dataset_t **dsp)
 {
-	int err = dsl_dataset_hold(name, owner, dsp);
+	int err = dsl_dataset_hold(name, tag, dsp);
 	if (err)
 		return (err);
-	if ((*dsp)->ds_phys->ds_num_children > 0 &&
-	    !DS_MODE_IS_READONLY(flags)) {
-		dsl_dataset_rele(*dsp, owner);
-		return (EROFS);
-	}
-	if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) {
-		dsl_dataset_rele(*dsp, owner);
+	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
+		dsl_dataset_rele(*dsp, tag);
 		return (EBUSY);
 	}
 	return (0);
@@ -705,9 +698,9 @@
 }
 
 void
-dsl_dataset_disown(dsl_dataset_t *ds, void *owner)
+dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
 {
-	ASSERT((ds->ds_owner == owner && ds->ds_dbuf) ||
+	ASSERT((ds->ds_owner == tag && ds->ds_dbuf) ||
 	    (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
 
 	mutex_enter(&ds->ds_lock);
@@ -718,20 +711,20 @@
 	}
 	mutex_exit(&ds->ds_lock);
 	if (ds->ds_dbuf)
-		dsl_dataset_drop_ref(ds, owner);
+		dsl_dataset_drop_ref(ds, tag);
 	else
 		dsl_dataset_evict(ds->ds_dbuf, ds);
 }
 
 boolean_t
-dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *owner)
+dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag)
 {
 	boolean_t gotit = FALSE;
 
 	mutex_enter(&ds->ds_lock);
 	if (ds->ds_owner == NULL &&
 	    (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
-		ds->ds_owner = owner;
+		ds->ds_owner = tag;
 		if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
 			rw_exit(&ds->ds_rwlock);
 		gotit = TRUE;
@@ -865,16 +858,15 @@
 	char *dsname;
 
 	dsname = kmem_asprintf("%s@%s", name, da->snapname);
-	err = dsl_dataset_own(dsname, DS_MODE_READONLY | DS_MODE_INCONSISTENT,
-	    da->dstg, &ds);
+	err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds);
 	strfree(dsname);
 	if (err == 0) {
 		struct dsl_ds_destroyarg *dsda;
 
 		dsl_dataset_make_exclusive(ds, da->dstg);
-		if (ds->ds_user_ptr) {
-			ds->ds_user_evict_func(ds, ds->ds_user_ptr);
-			ds->ds_user_ptr = NULL;
+		if (ds->ds_objset != NULL) {
+			dmu_objset_evict(ds->ds_objset);
+			ds->ds_objset = NULL;
 		}
 		dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP);
 		dsda->ds = ds;
@@ -958,13 +950,12 @@
 	int error;
 	objset_t *os;
 
-	error = dmu_objset_open_ds(ds, DMU_OST_ANY, &os);
+	error = dmu_objset_from_ds(ds, &os);
 	if (error)
 		return (error);
 
 	if (dmu_objset_type(os) == DMU_OST_ZVOL)
 		error = zvol_remove_minor(name);
-	dmu_objset_close(os);
 
 	return (error);
 }
@@ -1003,9 +994,7 @@
 			return (error);
 		}
 #endif
-		error = dsl_dataset_own(name,
-		    DS_MODE_READONLY | DS_MODE_INCONSISTENT,
-		    tag, &origin);
+		error = dsl_dataset_own(name, B_TRUE, tag, &origin);
 		kmem_free(name, namelen);
 		if (error)
 			return (error);
@@ -1036,9 +1025,9 @@
 		/* Destroying a snapshot is simpler */
 		dsl_dataset_make_exclusive(ds, tag);
 
-		if (ds->ds_user_ptr) {
-			ds->ds_user_evict_func(ds, ds->ds_user_ptr);
-			ds->ds_user_ptr = NULL;
+		if (ds->ds_objset != NULL) {
+			dmu_objset_evict(ds->ds_objset);
+			ds->ds_objset = NULL;
 		}
 		/* NOTE: defer is always B_FALSE for non-snapshots */
 		dsda.defer = defer;
@@ -1060,7 +1049,7 @@
 	if (err)
 		goto out;
 
-	err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os);
+	err = dmu_objset_from_ds(ds, &os);
 	if (err)
 		goto out;
 
@@ -1089,7 +1078,7 @@
 	 * context, the user space accounting should be zero.
 	 */
 	if (ds->ds_phys->ds_bp.blk_fill == 0 &&
-	    dmu_objset_userused_enabled(os->os)) {
+	    dmu_objset_userused_enabled(os)) {
 		uint64_t count;
 
 		ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
@@ -1098,7 +1087,6 @@
 		    count == 0);
 	}
 
-	dmu_objset_close(os);
 	if (err != ESRCH)
 		goto out;
 
@@ -1109,7 +1097,7 @@
 	if (err)
 		goto out;
 
-	if (ds->ds_user_ptr) {
+	if (ds->ds_objset) {
 		/*
 		 * We need to sync out all in-flight IO before we try
 		 * to evict (the dataset evict func is trying to clear
@@ -1122,9 +1110,9 @@
 	 * Blow away the dsl_dir + head dataset.
 	 */
 	dsl_dataset_make_exclusive(ds, tag);
-	if (ds->ds_user_ptr) {
-		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
-		ds->ds_user_ptr = NULL;
+	if (ds->ds_objset) {
+		dmu_objset_evict(ds->ds_objset);
+		ds->ds_objset = NULL;
 	}
 
 	/*
@@ -1173,28 +1161,6 @@
 	return (err);
 }
 
-void *
-dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
-    void *p, dsl_dataset_evict_func_t func)
-{
-	void *old;
-
-	mutex_enter(&ds->ds_lock);
-	old = ds->ds_user_ptr;
-	if (old == NULL) {
-		ds->ds_user_ptr = p;
-		ds->ds_user_evict_func = func;
-	}
-	mutex_exit(&ds->ds_lock);
-	return (old);
-}
-
-void *
-dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
-{
-	return (ds->ds_user_ptr);
-}
-
 blkptr_t *
 dsl_dataset_get_blkptr(dsl_dataset_t *ds)
 {
@@ -1228,7 +1194,7 @@
 	if (ds == NULL) /* this is the meta-objset */
 		return;
 
-	ASSERT(ds->ds_user_ptr != NULL);
+	ASSERT(ds->ds_objset != NULL);
 
 	if (ds->ds_phys->ds_next_snap_obj != 0)
 		panic("dirtying snapshot!");
@@ -1820,12 +1786,12 @@
 		struct dsl_ds_destroyarg ndsda = {0};
 
 		ASSERT3P(origin, ==, dsda->rm_origin);
-		if (origin->ds_user_ptr) {
-			origin->ds_user_evict_func(origin, origin->ds_user_ptr);
-			origin->ds_user_ptr = NULL;
+		if (origin->ds_objset) {
+			dmu_objset_evict(origin->ds_objset);
+			origin->ds_objset = NULL;
 		}
 
-		dsl_dataset_rele(origin, ds);
+		dsl_dataset_rele(ds->ds_prev, ds);
 		ds->ds_prev = NULL;
 
 		ndsda.ds = origin;
@@ -2010,7 +1976,7 @@
 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
 {
 	ASSERT(dmu_tx_is_syncing(tx));
-	ASSERT(ds->ds_user_ptr != NULL);
+	ASSERT(ds->ds_objset != NULL);
 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
 
 	/*
@@ -2021,7 +1987,7 @@
 	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
 
 	dsl_dir_dirty(ds->ds_dir, tx);
-	dmu_objset_sync(ds->ds_user_ptr, zio, tx);
+	dmu_objset_sync(ds->ds_objset, zio, tx);
 }
 
 void
@@ -2577,9 +2543,9 @@
 		dsl_dataset_t *ds = snap->ds;
 
 		/* unregister props as dsl_dir is changing */
-		if (ds->ds_user_ptr) {
-			ds->ds_user_evict_func(ds, ds->ds_user_ptr);
-			ds->ds_user_ptr = NULL;
+		if (ds->ds_objset) {
+			dmu_objset_evict(ds->ds_objset);
+			ds->ds_objset = NULL;
 		}
 		/* move snap name entry */
 		VERIFY(0 == dsl_dataset_get_snapname(ds));
@@ -2879,15 +2845,14 @@
 	dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
 	dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
 
-	if (csa->cds->ds_user_ptr != NULL) {
-		csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr);
-		csa->cds->ds_user_ptr = NULL;
+	if (csa->cds->ds_objset != NULL) {
+		dmu_objset_evict(csa->cds->ds_objset);
+		csa->cds->ds_objset = NULL;
 	}
 
-	if (csa->ohds->ds_user_ptr != NULL) {
-		csa->ohds->ds_user_evict_func(csa->ohds,
-		    csa->ohds->ds_user_ptr);
-		csa->ohds->ds_user_ptr = NULL;
+	if (csa->ohds->ds_objset != NULL) {
+		dmu_objset_evict(csa->ohds->ds_objset);
+		csa->ohds->ds_objset = NULL;
 	}
 
 	/*
@@ -3463,9 +3428,9 @@
 			 */
 			if (!ra->own)
 				return (EBUSY);
-			if (ds->ds_user_ptr) {
-				ds->ds_user_evict_func(ds, ds->ds_user_ptr);
-				ds->ds_user_ptr = NULL;
+			if (ds->ds_objset) {
+				dmu_objset_evict(ds->ds_objset);
+				ds->ds_objset = NULL;
 			}
 		}
 		dsda.ds = ds;
@@ -3557,8 +3522,7 @@
 			return (error);
 		}
 #endif
-		if (!dsl_dataset_tryown(ds,
-		    DS_MODE_READONLY | DS_MODE_INCONSISTENT, dtag)) {
+		if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) {
 			dsl_dataset_rele(ds, dtag);
 			return (EBUSY);
 		} else {
@@ -3666,3 +3630,24 @@
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
+
+/*
+ * Note, this fuction is used as the callback for dmu_objset_find().  We
+ * always return 0 so that we will continue to find and process
+ * inconsistent datasets, even if we encounter an error trying to
+ * process one of them.
+ */
+/* ARGSUSED */
+int
+dsl_destroy_inconsistent(char *dsname, void *arg)
+{
+	dsl_dataset_t *ds;
+
+	if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) {
+		if (DS_IS_INCONSISTENT(ds))
+			(void) dsl_dataset_destroy(ds, FTAG, B_FALSE);
+		else
+			dsl_dataset_disown(ds, FTAG);
+	}
+	return (0);
+}
--- a/usr/src/uts/common/fs/zfs/dsl_deleg.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/dsl_deleg.c	Mon Aug 17 09:14:35 2009 -0400
@@ -739,5 +739,5 @@
 boolean_t
 dsl_delegation_on(objset_t *os)
 {
-	return (os->os->os_spa->spa_delegation);
+	return (os->os_spa->spa_delegation);
 }
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c	Mon Aug 17 09:14:35 2009 -0400
@@ -703,7 +703,7 @@
 	 */
 	if (first && tx->tx_objset) {
 		int error;
-		dsl_dataset_t *ds = tx->tx_objset->os->os_dsl_dataset;
+		dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
 
 		error = dsl_dataset_check_quota(ds, checkrefquota,
 		    asize, est_inflight, &used_on_disk, &ref_rsrv);
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c	Mon Aug 17 09:14:35 2009 -0400
@@ -103,13 +103,12 @@
 	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
-	objset_impl_t *osi;
 
 	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
-	err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, &osi);
+	err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
+	    &dp->dp_meta_objset);
 	if (err)
 		goto out;
-	dp->dp_meta_objset = &osi->os;
 
 	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
@@ -219,7 +218,7 @@
 
 	/* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */
 	if (dp->dp_meta_objset)
-		dmu_objset_evict(NULL, dp->dp_meta_objset->os);
+		dmu_objset_evict(dp->dp_meta_objset);
 
 	txg_list_destroy(&dp->dp_dirty_datasets);
 	txg_list_destroy(&dp->dp_dirty_dirs);
@@ -242,13 +241,13 @@
 	int err;
 	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
 	dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
-	objset_impl_t *osip;
+	objset_t *os;
 	dsl_dataset_t *ds;
 	uint64_t dsobj;
 
 	/* create and open the MOS (meta-objset) */
-	dp->dp_meta_objset = &dmu_objset_create_impl(spa,
-	    NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx)->os;
+	dp->dp_meta_objset = dmu_objset_create_impl(spa,
+	    NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
 
 	/* create the pool directory */
 	err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
@@ -273,10 +272,10 @@
 
 	/* create the root objset */
 	VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
-	osip = dmu_objset_create_impl(dp->dp_spa, ds,
+	os = dmu_objset_create_impl(dp->dp_spa, ds,
 	    dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx);
 #ifdef _KERNEL
-	zfs_create_fs(&osip->os, kcred, zplprops, tx);
+	zfs_create_fs(os, kcred, zplprops, tx);
 #endif
 	dsl_dataset_rele(ds, FTAG);
 
@@ -293,7 +292,7 @@
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 	dsl_sync_task_group_t *dstg;
-	objset_impl_t *mosi = dp->dp_meta_objset->os;
+	objset_t *mos = dp->dp_meta_objset;
 	hrtime_t start, write_time;
 	uint64_t data_written;
 	int err;
@@ -323,7 +322,7 @@
 
 	for (ds = list_head(&dp->dp_synced_datasets); ds;
 	    ds = list_next(&dp->dp_synced_datasets, ds))
-		dmu_objset_do_userquota_callbacks(ds->ds_user_ptr, tx);
+		dmu_objset_do_userquota_callbacks(ds->ds_objset, tx);
 
 	/*
 	 * Sync the datasets again to push out the changes due to
@@ -358,10 +357,10 @@
 		dsl_pool_scrub_sync(dp, tx);
 
 	start = gethrtime();
-	if (list_head(&mosi->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
-	    list_head(&mosi->os_free_dnodes[txg & TXG_MASK]) != NULL) {
+	if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
+	    list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) {
 		zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
-		dmu_objset_sync(mosi, zio, tx);
+		dmu_objset_sync(mos, zio, tx);
 		err = zio_wait(zio);
 		ASSERT(err == 0);
 		dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", "");
@@ -421,8 +420,8 @@
 
 	while (ds = list_head(&dp->dp_synced_datasets)) {
 		list_remove(&dp->dp_synced_datasets, ds);
-		ASSERT(ds->ds_user_ptr != NULL);
-		zil_clean(((objset_impl_t *)ds->ds_user_ptr)->os_zil);
+		ASSERT(ds->ds_objset != NULL);
+		zil_clean(ds->ds_objset->os_zil);
 		dmu_buf_rele(ds->ds_dbuf, ds);
 	}
 }
--- a/usr/src/uts/common/fs/zfs/dsl_prop.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/dsl_prop.c	Mon Aug 17 09:14:35 2009 -0400
@@ -552,7 +552,7 @@
 int
 dsl_prop_get_all(objset_t *os, nvlist_t **nvp, boolean_t local)
 {
-	dsl_dataset_t *ds = os->os->os_dsl_dataset;
+	dsl_dataset_t *ds = os->os_dsl_dataset;
 	dsl_dir_t *dd = ds->ds_dir;
 	boolean_t snapshot = dsl_dataset_is_snapshot(ds);
 	int err = 0;
--- a/usr/src/uts/common/fs/zfs/spa.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/spa.c	Mon Aug 17 09:14:35 2009 -0400
@@ -371,12 +371,14 @@
 					break;
 				}
 
-				if (error = dmu_objset_open(strval, DMU_OST_ZFS,
-				    DS_MODE_USER | DS_MODE_READONLY, &os))
+				if (error = dmu_objset_hold(strval, FTAG, &os))
 					break;
 
-				/* We don't support gzip bootable datasets */
-				if ((error = dsl_prop_get_integer(strval,
+				/* Must be ZPL and not gzip compressed. */
+
+				if (dmu_objset_type(os) != DMU_OST_ZFS) {
+					error = ENOTSUP;
+				} else if ((error = dsl_prop_get_integer(strval,
 				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
 				    &compress, NULL)) == 0 &&
 				    !BOOTFS_COMPRESS_VALID(compress)) {
@@ -384,7 +386,7 @@
 				} else {
 					objnum = dmu_objset_id(os);
 				}
-				dmu_objset_close(os);
+				dmu_objset_rele(os, FTAG);
 			}
 			break;
 
@@ -1598,6 +1600,12 @@
 		 */
 		if (vdev_resilver_needed(rvd, NULL, NULL))
 			spa_async_request(spa, SPA_ASYNC_RESILVER);
+
+		/*
+		 * Delete any inconsistent datasets.
+		 */
+		(void) dmu_objset_find(spa_name(spa),
+		    dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
 	}
 
 	error = 0;
--- a/usr/src/uts/common/fs/zfs/sys/dbuf.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h	Mon Aug 17 09:14:35 2009 -0400
@@ -75,7 +75,6 @@
 	DB_EVICTING
 } dbuf_states_t;
 
-struct objset_impl;
 struct dnode;
 struct dmu_tx;
 
@@ -148,7 +147,7 @@
 	dmu_buf_t db;
 
 	/* the objset we belong to */
-	struct objset_impl *db_objset;
+	struct objset *db_objset;
 
 	/*
 	 * the dnode we belong to (NULL when evicted)
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Mon Aug 17 09:14:35 2009 -0400
@@ -59,7 +59,6 @@
 struct zbookmark;
 struct spa;
 struct nvlist;
-struct objset_impl;
 struct arc_buf;
 
 typedef struct objset objset_t;
@@ -140,16 +139,6 @@
 void zfs_acl_byteswap(void *buf, size_t size);
 void zfs_znode_byteswap(void *buf, size_t size);
 
-#define	DS_MODE_NOHOLD		0	/* internal use only */
-#define	DS_MODE_USER		1	/* simple access, no special needs */
-#define	DS_MODE_OWNER		2	/* the "main" access, e.g. a mount */
-#define	DS_MODE_TYPE_MASK	0x3
-#define	DS_MODE_TYPE(x)		((x) & DS_MODE_TYPE_MASK)
-#define	DS_MODE_READONLY	0x8
-#define	DS_MODE_IS_READONLY(x)	((x) & DS_MODE_READONLY)
-#define	DS_MODE_INCONSISTENT	0x10
-#define	DS_MODE_IS_INCONSISTENT(x)	((x) & DS_MODE_INCONSISTENT)
-
 #define	DS_FIND_SNAPSHOTS	(1<<0)
 #define	DS_FIND_CHILDREN	(1<<1)
 
@@ -166,11 +155,13 @@
 /*
  * Public routines to create, destroy, open, and close objsets.
  */
-int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
-    objset_t **osp);
-int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type,
-    objset_t **osp);
-void dmu_objset_close(objset_t *os);
+int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
+int dmu_objset_own(const char *name, dmu_objset_type_t type,
+    boolean_t readonly, void *tag, objset_t **osp);
+void dmu_objset_rele(objset_t *os, void *tag);
+void dmu_objset_disown(objset_t *os, void *tag);
+int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
+
 int dmu_objset_evict_dbufs(objset_t *os);
 int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
@@ -309,7 +300,7 @@
  * Decide how many copies of a given block we should make.  Can be from
  * 1 to SPA_DVAS_PER_BP.
  */
-int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
+int dmu_get_replication_level(objset_t *os, struct zbookmark *zb,
     dmu_object_type_t ot);
 /*
  * The bonus data is accessed more or less like a regular buffer.
@@ -575,6 +566,7 @@
 extern void dmu_objset_name(objset_t *os, char *buf);
 extern dmu_objset_type_t dmu_objset_type(objset_t *os);
 extern uint64_t dmu_objset_id(objset_t *os);
+extern uint64_t dmu_objset_logbias(objset_t *os);
 extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
     uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
 extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
--- a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h	Mon Aug 17 09:14:35 2009 -0400
@@ -210,8 +210,7 @@
  *
  * ds_lock
  *    protects:
- *    	ds_user_ptr
- *    	ds_user_evict_func
+ *    	ds_objset
  *    	ds_open_refcount
  *    	ds_snapname
  *    	ds_phys accounting
--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Mon Aug 17 09:14:35 2009 -0400
@@ -40,7 +40,6 @@
 
 struct dsl_dataset;
 struct dmu_tx;
-struct objset_impl;
 
 #define	OBJSET_PHYS_SIZE 2048
 #define	OBJSET_OLD_PHYS_SIZE 1024
@@ -59,11 +58,6 @@
 } objset_phys_t;
 
 struct objset {
-	struct objset_impl *os;
-	int os_mode;
-};
-
-typedef struct objset_impl {
 	/* Immutable: */
 	struct dsl_dataset *os_dsl_dataset;
 	spa_t *os_spa;
@@ -73,12 +67,12 @@
 	dnode_t *os_userused_dnode;
 	dnode_t *os_groupused_dnode;
 	zilog_t *os_zil;
-	objset_t os;
 	uint8_t os_checksum;	/* can change, under dsl_dir's locks */
 	uint8_t os_compress;	/* can change, under dsl_dir's locks */
 	uint8_t os_copies;	/* can change, under dsl_dir's locks */
 	uint8_t os_primary_cache;	/* can change, under dsl_dir's locks */
 	uint8_t os_secondary_cache;	/* can change, under dsl_dir's locks */
+	uint8_t os_logbias;	/* can change, under dsl_dir's locks */
 
 	/* no lock needed: */
 	struct dmu_tx *os_synctx; /* XXX sketchy */
@@ -101,7 +95,7 @@
 	/* stuff we store for the user */
 	kmutex_t os_user_ptr_lock;
 	void *os_user_ptr;
-} objset_impl_t;
+};
 
 #define	DMU_META_DNODE_OBJECT	0
 #define	DMU_OBJECT_IS_SPECIAL(obj) ((int64_t)(obj) <= 0)
@@ -111,9 +105,13 @@
 	(os)->os_secondary_cache == ZFS_CACHE_METADATA)
 
 /* called from zpl */
-int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
-    objset_t **osp);
-void dmu_objset_close(objset_t *os);
+int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
+int dmu_objset_own(const char *name, dmu_objset_type_t type,
+    boolean_t readonly, void *tag, objset_t **osp);
+void dmu_objset_rele(objset_t *os, void *tag);
+void dmu_objset_disown(objset_t *os, void *tag);
+int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
+
 int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
 int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
@@ -135,14 +133,14 @@
 int dmu_objset_evict_dbufs(objset_t *os);
 
 /* called from dsl */
-void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx);
-objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
+void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
+objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
     blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
 int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
-    objset_impl_t **osip);
-void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
-void dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx);
-boolean_t dmu_objset_userused_enabled(objset_impl_t *os);
+    objset_t **osp);
+void dmu_objset_evict(objset_t *os);
+void dmu_objset_do_userquota_callbacks(objset_t *os, dmu_tx_t *tx);
+boolean_t dmu_objset_userused_enabled(objset_t *os);
 int dmu_objset_userspace_upgrade(objset_t *os);
 boolean_t dmu_objset_userspace_present(objset_t *os);
 
--- a/usr/src/uts/common/fs/zfs/sys/dnode.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/dnode.h	Mon Aug 17 09:14:35 2009 -0400
@@ -88,7 +88,7 @@
 #define	EPB(blkshift, typeshift)	(1 << (blkshift - typeshift))
 
 struct dmu_buf_impl;
-struct objset_impl;
+struct objset;
 struct zio;
 
 enum dnode_dirtycontext {
@@ -136,7 +136,7 @@
 	list_node_t dn_link;
 
 	/* immutable: */
-	struct objset_impl *dn_objset;
+	struct objset *dn_objset;
 	uint64_t dn_object;
 	struct dmu_buf_impl *dn_dbuf;
 	dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */
@@ -202,14 +202,14 @@
 	uint64_t fr_nblks;
 } free_range_t;
 
-dnode_t *dnode_special_open(struct objset_impl *dd, dnode_phys_t *dnp,
+dnode_t *dnode_special_open(struct objset *dd, dnode_phys_t *dnp,
     uint64_t object);
 void dnode_special_close(dnode_t *dn);
 
 void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
-int dnode_hold(struct objset_impl *dd, uint64_t object,
+int dnode_hold(struct objset *dd, uint64_t object,
     void *ref, dnode_t **dnp);
-int dnode_hold_impl(struct objset_impl *dd, uint64_t object, int flag,
+int dnode_hold_impl(struct objset *dd, uint64_t object, int flag,
     void *ref, dnode_t **dnp);
 boolean_t dnode_add_ref(dnode_t *dn, void *ref);
 void dnode_rele(dnode_t *dn, void *ref);
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Mon Aug 17 09:14:35 2009 -0400
@@ -42,8 +42,6 @@
 struct dsl_dir;
 struct dsl_pool;
 
-typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
-
 #define	DS_FLAG_INCONSISTENT	(1ULL<<0)
 #define	DS_IS_INCONSISTENT(ds)	\
 	((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
@@ -132,8 +130,7 @@
 	 * Protected by ds_lock:
 	 */
 	kmutex_t ds_lock;
-	void *ds_user_ptr;
-	dsl_dataset_evict_func_t *ds_user_evict_func;
+	objset_t *ds_objset;
 	uint64_t ds_userrefs;
 
 	/*
@@ -174,17 +171,17 @@
 int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
 int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
     void *tag, dsl_dataset_t **);
-int dsl_dataset_own(const char *name, int flags, void *owner,
-    dsl_dataset_t **dsp);
+int dsl_dataset_own(const char *name, boolean_t inconsistentok,
+    void *tag, dsl_dataset_t **dsp);
 int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
-    int flags, void *owner, dsl_dataset_t **);
+    boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp);
 void dsl_dataset_name(dsl_dataset_t *ds, char *name);
 void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
-void dsl_dataset_disown(dsl_dataset_t *ds, void *owner);
+void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
 void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
 boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
-    void *owner);
-void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner);
+    void *tag);
+void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag);
 uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
     dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
 uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
@@ -205,10 +202,6 @@
     boolean_t recursive);
 int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
 
-void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
-    void *p, dsl_dataset_evict_func_t func);
-void *dsl_dataset_get_user_ptr(dsl_dataset_t *ds);
-
 blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
 void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
 
@@ -245,6 +238,8 @@
 int64_t dsl_dataset_new_refreservation(dsl_dataset_t *ds, uint64_t reservation,
     dmu_tx_t *tx);
 
+int dsl_destroy_inconsistent(char *dsname, void *arg);
+
 #ifdef ZFS_DEBUG
 #define	dprintf_ds(ds, fmt, ...) do { \
 	if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
--- a/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h	Mon Aug 17 09:14:35 2009 -0400
@@ -132,8 +132,8 @@
 
 extern uint_t zfs_fsyncer_key;
 
-extern int zfs_suspend_fs(zfsvfs_t *zfsvfs, char *osname, int *mode);
-extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode);
+extern int zfs_suspend_fs(zfsvfs_t *zfsvfs);
+extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname);
 extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
     const char *domain, uint64_t rid, uint64_t *valuep);
 extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
@@ -143,7 +143,7 @@
 extern boolean_t zfs_usergroup_overquota(zfsvfs_t *zfsvfs,
     boolean_t isgroup, uint64_t fuid);
 extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
-extern int zfsvfs_create(const char *name, int mode, zfsvfs_t **zvp);
+extern int zfsvfs_create(const char *name, zfsvfs_t **zvp);
 extern void zfsvfs_free(zfsvfs_t *zfsvfs);
 
 #ifdef	__cplusplus
--- a/usr/src/uts/common/fs/zfs/sys/zil.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/zil.h	Mon Aug 17 09:14:35 2009 -0400
@@ -397,6 +397,8 @@
 
 extern void	zil_add_block(zilog_t *zilog, blkptr_t *bp);
 
+extern void	zil_set_logbias(zilog_t *zilog, uint64_t slogval);
+
 extern int zil_disable;
 
 #ifdef	__cplusplus
--- a/usr/src/uts/common/fs/zfs/sys/zil_impl.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/zil_impl.h	Mon Aug 17 09:14:35 2009 -0400
@@ -83,6 +83,7 @@
 	uint8_t		zl_stop_sync;	/* for debugging */
 	uint8_t		zl_writer;	/* boolean: write setup in progress */
 	uint8_t		zl_log_error;	/* boolean: log write error */
+	uint8_t		zl_logbias;	/* latency or throughput */
 	list_t		zl_itx_list;	/* in-memory itx list */
 	uint64_t	zl_itx_list_sz;	/* total size of records on list */
 	uint64_t	zl_cur_used;	/* current commit log size used */
--- a/usr/src/uts/common/fs/zfs/sys/zio.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h	Mon Aug 17 09:14:35 2009 -0400
@@ -383,7 +383,7 @@
     boolean_t labels);
 
 extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp,
-    blkptr_t *old_bp, uint64_t txg);
+    blkptr_t *old_bp, uint64_t txg, boolean_t bypass_slog);
 extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg);
 extern void zio_flush(zio_t *zio, vdev_t *vd);
 
--- a/usr/src/uts/common/fs/zfs/zfs_acl.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/zfs_acl.c	Mon Aug 17 09:14:35 2009 -0400
@@ -2011,8 +2011,6 @@
 	if (mask & VSA_ACE) {
 		size_t aclsz;
 
-		zfs_acl_node_t *aclnode = list_head(&aclp->z_acl);
-
 		aclsz = count * sizeof (ace_t) +
 		    sizeof (ace_object_t) * largeace;
 
@@ -2023,8 +2021,17 @@
 			zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr,
 			    vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
 		else {
-			bcopy(aclnode->z_acldata, vsecp->vsa_aclentp,
-			    count * sizeof (ace_t));
+			zfs_acl_node_t *aclnode;
+			void *start = vsecp->vsa_aclentp;
+
+			for (aclnode = list_head(&aclp->z_acl); aclnode;
+			    aclnode = list_next(&aclp->z_acl, aclnode)) {
+				bcopy(aclnode->z_acldata, start,
+				    aclnode->z_size);
+				start = (caddr_t)start + aclnode->z_size;
+			}
+			ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp ==
+			    aclp->z_acl_bytes);
 		}
 	}
 	if (mask & VSA_ACE_ACLFLAGS) {
--- a/usr/src/uts/common/fs/zfs/zfs_ctldir.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/zfs_ctldir.c	Mon Aug 17 09:14:35 2009 -0400
@@ -853,8 +853,7 @@
 		 */
 		return (err == EILSEQ ? ENOENT : err);
 	}
-	if (dmu_objset_open(snapname, DMU_OST_ZFS,
-	    DS_MODE_USER | DS_MODE_READONLY, &snap) != 0) {
+	if (dmu_objset_hold(snapname, FTAG, &snap) != 0) {
 		mutex_exit(&sdp->sd_lock);
 		ZFS_EXIT(zfsvfs);
 		return (ENOENT);
@@ -866,7 +865,7 @@
 	*vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
 	avl_insert(&sdp->sd_snaps, sep, where);
 
-	dmu_objset_close(snap);
+	dmu_objset_rele(snap, FTAG);
 domount:
 	mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) +
 	    strlen("/.zfs/snapshot/") + strlen(nm) + 1;
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Mon Aug 17 09:14:35 2009 -0400
@@ -175,22 +175,15 @@
 static boolean_t
 zfs_is_bootfs(const char *name)
 {
-	spa_t *spa;
-	boolean_t ret = B_FALSE;
-
-	if (spa_open(name, &spa, FTAG) == 0) {
-		if (spa->spa_bootfs) {
-			objset_t *os;
-
-			if (dmu_objset_open(name, DMU_OST_ZFS,
-			    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
-				ret = (dmu_objset_id(os) == spa->spa_bootfs);
-				dmu_objset_close(os);
-			}
-		}
-		spa_close(spa, FTAG);
+	objset_t *os;
+
+	if (dmu_objset_hold(name, FTAG, &os) == 0) {
+		boolean_t ret;
+		ret = (dmu_objset_id(os) == dmu_objset_spa(os)->spa_bootfs);
+		dmu_objset_rele(os, FTAG);
+		return (ret);
 	}
-	return (ret);
+	return (B_FALSE);
 }
 
 /*
@@ -224,13 +217,17 @@
 	objset_t *os;
 	boolean_t rc = B_TRUE;
 
-	if (dmu_objset_open(name, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
+	if (dmu_objset_hold(name, FTAG, &os) == 0) {
 		uint64_t zplversion;
 
+		if (dmu_objset_type(os) != DMU_OST_ZFS) {
+			dmu_objset_rele(os, FTAG);
+			return (B_TRUE);
+		}
+		/* XXX reading from non-owned objset */
 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
 			rc = zplversion < version;
-		dmu_objset_close(os);
+		dmu_objset_rele(os, FTAG);
 	}
 	return (rc);
 }
@@ -548,20 +545,19 @@
 	if (error)
 		return (error);
 
-	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &clone);
+	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
 
 	if (error == 0) {
 		dsl_dataset_t *pclone = NULL;
 		dsl_dir_t *dd;
-		dd = clone->os->os_dsl_dataset->ds_dir;
+		dd = clone->os_dsl_dataset->ds_dir;
 
 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 		error = dsl_dataset_hold_obj(dd->dd_pool,
 		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
 		rw_exit(&dd->dd_pool->dp_config_rwlock);
 		if (error) {
-			dmu_objset_close(clone);
+			dmu_objset_rele(clone, FTAG);
 			return (error);
 		}
 
@@ -569,7 +565,7 @@
 		    ZFS_DELEG_PERM_MOUNT, cr);
 
 		dsl_dataset_name(pclone, parentname);
-		dmu_objset_close(clone);
+		dmu_objset_rele(clone, FTAG);
 		dsl_dataset_rele(pclone, FTAG);
 		if (error == 0)
 			error = zfs_secpolicy_write_perms(parentname,
@@ -840,20 +836,23 @@
 	objset_t *os;
 	int error;
 
-	error = dmu_objset_open(dsname, DMU_OST_ZFS,
-	    DS_MODE_USER | DS_MODE_READONLY, &os);
+	error = dmu_objset_hold(dsname, FTAG, &os);
 	if (error)
 		return (error);
-
-	mutex_enter(&os->os->os_user_ptr_lock);
+	if (dmu_objset_type(os) != DMU_OST_ZFS) {
+		dmu_objset_rele(os, FTAG);
+		return (EINVAL);
+	}
+
+	mutex_enter(&os->os_user_ptr_lock);
 	*zvp = dmu_objset_get_user(os);
 	if (*zvp) {
 		VFS_HOLD((*zvp)->z_vfs);
 	} else {
 		error = ESRCH;
 	}
-	mutex_exit(&os->os->os_user_ptr_lock);
-	dmu_objset_close(os);
+	mutex_exit(&os->os_user_ptr_lock);
+	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
@@ -862,13 +861,12 @@
  * case its z_vfs will be NULL, and it will be opened as the owner.
  */
 static int
-zfsvfs_hold(const char *name, boolean_t readonly, void *tag, zfsvfs_t **zvp)
+zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zvp)
 {
 	int error = 0;
-	int mode = DS_MODE_OWNER | (readonly ? DS_MODE_READONLY : 0);
 
 	if (getzfsvfs(name, zvp) != 0)
-		error = zfsvfs_create(name, mode, zvp);
+		error = zfsvfs_create(name, zvp);
 	if (error == 0) {
 		rrw_enter(&(*zvp)->z_teardown_lock, RW_READER, tag);
 		if ((*zvp)->z_unmounted) {
@@ -892,7 +890,7 @@
 	if (zfsvfs->z_vfs) {
 		VFS_RELE(zfsvfs->z_vfs);
 	} else {
-		dmu_objset_close(zfsvfs->z_os);
+		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
 		zfsvfs_free(zfsvfs);
 	}
 }
@@ -1186,18 +1184,30 @@
 	return (0);
 }
 
+/*
+ * inputs:
+ * zc_name		name of filesystem
+ * zc_obj		object to find
+ *
+ * outputs:
+ * zc_value		name of object
+ */
 static int
 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
 {
-	objset_t *osp;
+	objset_t *os;
 	int error;
 
-	if ((error = dmu_objset_open(zc->zc_name, DMU_OST_ZFS,
-	    DS_MODE_USER | DS_MODE_READONLY, &osp)) != 0)
+	/* XXX reading from objset not owned */
+	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
 		return (error);
-	error = zfs_obj_to_path(osp, zc->zc_obj, zc->zc_value,
+	if (dmu_objset_type(os) != DMU_OST_ZFS) {
+		dmu_objset_rele(os, FTAG);
+		return (EINVAL);
+	}
+	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
 	    sizeof (zc->zc_value));
-	dmu_objset_close(osp);
+	dmu_objset_rele(os, FTAG);
 
 	return (error);
 }
@@ -1380,8 +1390,7 @@
 	int error;
 	nvlist_t *nv;
 
-	if (error = dmu_objset_open(zc->zc_name,
-	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os))
+	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
 		return (error);
 
 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
@@ -1394,6 +1403,7 @@
 		 * which we aren't supposed to do with a
 		 * DS_MODE_USER hold, because it could be
 		 * inconsistent.  So this is a bit of a workaround...
+		 * XXX reading with out owning
 		 */
 		if (!zc->zc_objset_stats.dds_inconsistent) {
 			if (dmu_objset_type(os) == DMU_OST_ZVOL)
@@ -1403,7 +1413,7 @@
 		nvlist_free(nv);
 	}
 
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
@@ -1438,8 +1448,8 @@
 	objset_t *os;
 	int err;
 
-	if (err = dmu_objset_open(zc->zc_name,
-	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os))
+	/* XXX reading without owning */
+	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
 		return (err);
 
 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
@@ -1464,7 +1474,7 @@
 	} else {
 		err = ENOENT;
 	}
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	return (err);
 }
 
@@ -1505,8 +1515,7 @@
 	int error;
 	char *p;
 
-	if (error = dmu_objset_open(zc->zc_name,
-	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os)) {
+	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
 		if (error == ENOENT)
 			error = ESRCH;
 		return (error);
@@ -1536,7 +1545,7 @@
 		if (error == ENOENT)
 			error = ESRCH;
 	} while (error == 0 && dataset_name_hidden(zc->zc_name));
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 
 	if (error == 0)
 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
@@ -1562,8 +1571,7 @@
 	objset_t *os;
 	int error;
 
-	error = dmu_objset_open(zc->zc_name,
-	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os);
+	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error)
 		return (error == ENOENT ? ESRCH : error);
 
@@ -1576,14 +1584,14 @@
 	 * so exit immediately.
 	 */
 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
-		dmu_objset_close(os);
+		dmu_objset_rele(os, FTAG);
 		return (ESRCH);
 	}
 
 	error = dmu_snapshot_list_next(os,
 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
 	    zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie, NULL);
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	if (error == 0)
 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
 	else if (error == ENOENT)
@@ -1726,8 +1734,7 @@
 				domain = propname +
 				    strlen(zfs_userquota_prop_prefixes[type]);
 
-				error = zfsvfs_hold(name, B_FALSE, FTAG,
-				    &zfsvfs);
+				error = zfsvfs_hold(name, FTAG, &zfsvfs);
 				if (error == 0) {
 					error = zfs_set_userquota(zfsvfs,
 					    type, domain, rid, quota);
@@ -1794,8 +1801,7 @@
 
 			if ((error = nvpair_value_uint64(elem, &intval)) != 0)
 				goto out;
-			if ((error = zfsvfs_hold(name, B_FALSE, FTAG,
-			    &zfsvfs)) != 0)
+			if ((error = zfsvfs_hold(name, FTAG, &zfsvfs)) != 0)
 				goto out;
 			error = zfs_set_version(zfsvfs, intval);
 			zfsvfs_rele(zfsvfs, FTAG);
@@ -1911,13 +1917,12 @@
 		nvlist_t *origprops;
 		objset_t *os;
 
-		if (dmu_objset_open(zc->zc_name, DMU_OST_ANY,
-		    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
+		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
 			if (dsl_prop_get_all(os, &origprops, TRUE) == 0) {
 				clear_props(zc->zc_name, origprops, nvl);
 				nvlist_free(origprops);
 			}
-			dmu_objset_close(os);
+			dmu_objset_rele(os, FTAG);
 		}
 
 	}
@@ -2316,13 +2321,12 @@
 	/*
 	 * Open parent object set so we can inherit zplprop values.
 	 */
-	if ((error = dmu_objset_open(parentname, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &os)) != 0)
+	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
 		return (error);
 
 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, createprops,
 	    zplprops, is_ci);
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
@@ -2399,15 +2403,14 @@
 			return (EINVAL);
 		}
 
-		error = dmu_objset_open(zc->zc_value, type,
-		    DS_MODE_USER | DS_MODE_READONLY, &clone);
+		error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
 		if (error) {
 			nvlist_free(nvprops);
 			return (error);
 		}
 
 		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
-		dmu_objset_close(clone);
+		dmu_objset_rele(clone, FTAG);
 		if (error) {
 			nvlist_free(nvprops);
 			return (error);
@@ -2644,7 +2647,7 @@
 	if (error)
 		goto out;
 
-	error = dsl_dataset_own(clone_name, DS_MODE_INCONSISTENT, FTAG, &clone);
+	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
 	if (error)
 		goto out;
 
@@ -2652,9 +2655,7 @@
 	 * Do clone swap.
 	 */
 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
-		int mode;
-
-		error = zfs_suspend_fs(zfsvfs, NULL, &mode);
+		error = zfs_suspend_fs(zfsvfs);
 		if (error == 0) {
 			int resume_err;
 
@@ -2666,7 +2667,7 @@
 			} else {
 				error = EBUSY;
 			}
-			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name, mode);
+			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
 			error = error ? error : resume_err;
 		}
 		VFS_RELE(zfsvfs->z_vfs);
@@ -2796,20 +2797,18 @@
 		return (EBADF);
 	}
 
-	if (props && dmu_objset_open(tofs, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
+	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
 		/*
 		 * If new properties are supplied, they are to completely
 		 * replace the existing ones, so stash away the existing ones.
 		 */
-		(void) dsl_prop_get_all(os, &origprops, TRUE);
-
-		dmu_objset_close(os);
+		(void) dsl_prop_get_all(os, &origprops, B_TRUE);
+
+		dmu_objset_rele(os, FTAG);
 	}
 
 	if (zc->zc_string[0]) {
-		error = dmu_objset_open(zc->zc_string, DMU_OST_ANY,
-		    DS_MODE_USER | DS_MODE_READONLY, &origin);
+		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
 		if (error)
 			goto out;
 	}
@@ -2817,7 +2816,7 @@
 	error = dmu_recv_begin(tofs, tosnap, &zc->zc_begin_record,
 	    force, origin, &drc);
 	if (origin)
-		dmu_objset_close(origin);
+		dmu_objset_rele(origin, FTAG);
 	if (error)
 		goto out;
 
@@ -2842,11 +2841,8 @@
 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
 			/* online recv */
 			int end_err;
-			char *osname;
-			int mode;
-
-			osname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
-			error = zfs_suspend_fs(zfsvfs, osname, &mode);
+
+			error = zfs_suspend_fs(zfsvfs);
 			/*
 			 * If the suspend fails, then the recv_end will
 			 * likely also fail, and clean up after itself.
@@ -2854,12 +2850,11 @@
 			end_err = dmu_recv_end(&drc);
 			if (error == 0) {
 				int resume_err =
-				    zfs_resume_fs(zfsvfs, osname, mode);
+				    zfs_resume_fs(zfsvfs, tofs);
 				error = error ? error : resume_err;
 			}
 			error = error ? error : end_err;
 			VFS_RELE(zfsvfs->z_vfs);
-			kmem_free(osname, MAXNAMELEN);
 		} else {
 			error = dmu_recv_end(&drc);
 		}
@@ -2901,8 +2896,7 @@
 	int error;
 	offset_t off;
 
-	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &tosnap);
+	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
 	if (error)
 		return (error);
 
@@ -2916,20 +2910,19 @@
 		if (cp)
 			*(cp+1) = 0;
 		(void) strncat(buf, zc->zc_value, MAXPATHLEN);
-		error = dmu_objset_open(buf, DMU_OST_ANY,
-		    DS_MODE_USER | DS_MODE_READONLY, &fromsnap);
+		error = dmu_objset_hold(buf, FTAG, &fromsnap);
 		kmem_free(buf, MAXPATHLEN);
 		if (error) {
-			dmu_objset_close(tosnap);
+			dmu_objset_rele(tosnap, FTAG);
 			return (error);
 		}
 	}
 
 	fp = getf(zc->zc_cookie);
 	if (fp == NULL) {
-		dmu_objset_close(tosnap);
+		dmu_objset_rele(tosnap, FTAG);
 		if (fromsnap)
-			dmu_objset_close(fromsnap);
+			dmu_objset_rele(fromsnap, FTAG);
 		return (EBADF);
 	}
 
@@ -2940,8 +2933,8 @@
 		fp->f_offset = off;
 	releasef(zc->zc_cookie);
 	if (fromsnap)
-		dmu_objset_close(fromsnap);
-	dmu_objset_close(tosnap);
+		dmu_objset_rele(fromsnap, FTAG);
+	dmu_objset_rele(tosnap, FTAG);
 	return (error);
 }
 
@@ -3099,7 +3092,7 @@
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (EINVAL);
 
-	error = zfsvfs_hold(zc->zc_name, B_TRUE, FTAG, &zfsvfs);
+	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs);
 	if (error)
 		return (error);
 
@@ -3127,7 +3120,7 @@
 	zfsvfs_t *zfsvfs;
 	int error;
 
-	error = zfsvfs_hold(zc->zc_name, B_TRUE, FTAG, &zfsvfs);
+	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs);
 	if (error)
 		return (error);
 
@@ -3163,30 +3156,27 @@
 	zfsvfs_t *zfsvfs;
 
 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
-		if (!dmu_objset_userused_enabled(zfsvfs->z_os->os)) {
+		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
 			/*
 			 * If userused is not enabled, it may be because the
 			 * objset needs to be closed & reopened (to grow the
 			 * objset_phys_t).  Suspend/resume the fs will do that.
 			 */
-			int mode;
-			error = zfs_suspend_fs(zfsvfs, NULL, &mode);
-			if (error == 0) {
-				error = zfs_resume_fs(zfsvfs,
-				    zc->zc_name, mode);
-			}
+			error = zfs_suspend_fs(zfsvfs);
+			if (error == 0)
+				error = zfs_resume_fs(zfsvfs, zc->zc_name);
 		}
 		if (error == 0)
 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
 		VFS_RELE(zfsvfs->z_vfs);
 	} else {
-		error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
-		    DS_MODE_USER, &os);
+		/* XXX kind of reading contents without owning */
+		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 		if (error)
 			return (error);
 
 		error = dmu_objset_userspace_upgrade(os);
-		dmu_objset_close(os);
+		dmu_objset_rele(os, FTAG);
 	}
 
 	return (error);
--- a/usr/src/uts/common/fs/zfs/zfs_log.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/zfs_log.c	Mon Aug 17 09:14:35 2009 -0400
@@ -474,14 +474,19 @@
 	itx_wr_state_t write_state;
 	boolean_t slogging;
 	uintptr_t fsync_cnt;
+	ssize_t immediate_write_sz;
 
 	if (zilog == NULL || zp->z_unlinked)
 		return;
 
 	ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
 
-	slogging = spa_has_slogs(zilog->zl_spa);
-	if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz)
+	immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+	    ? 0 : zfs_immediate_write_sz;
+
+	slogging = spa_has_slogs(zilog->zl_spa) &&
+	    (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
+	if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz)
 		write_state = WR_INDIRECT;
 	else if (ioflag & (FSYNC | FDSYNC))
 		write_state = WR_COPIED;
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Mon Aug 17 09:14:35 2009 -0400
@@ -835,32 +835,30 @@
 }
 
 int
-zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
+zfsvfs_create(const char *osname, zfsvfs_t **zvp)
 {
 	objset_t *os;
 	zfsvfs_t *zfsvfs;
 	uint64_t zval;
 	int i, error;
 
-	if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL))
-		return (error);
-	if (zval)
-		mode |= DS_MODE_READONLY;
+	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
 
-	error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
-	if (error == EROFS) {
-		mode |= DS_MODE_READONLY;
-		error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
+	/*
+	 * We claim to always be readonly so we can open snapshots;
+	 * other ZPL code will prevent us from writing to snapshots.
+	 */
+	error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
+	if (error) {
+		kmem_free(zfsvfs, sizeof (zfsvfs_t));
+		return (error);
 	}
-	if (error)
-		return (error);
 
 	/*
 	 * Initialize the zfs-specific filesystem structure.
 	 * Should probably make this a kmem cache, shuffle fields,
 	 * and just bzero up to z_hold_mtx[].
 	 */
-	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
 	zfsvfs->z_vfs = NULL;
 	zfsvfs->z_parent = zfsvfs;
 	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
@@ -948,7 +946,7 @@
 	return (0);
 
 out:
-	dmu_objset_close(os);
+	dmu_objset_disown(os, zfsvfs);
 	*zvp = NULL;
 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
 	return (error);
@@ -966,9 +964,9 @@
 	/*
 	 * Set the objset user_ptr to track its zfsvfs.
 	 */
-	mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
+	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
 	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
-	mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
+	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
 
 	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
 	if (zil_disable) {
@@ -1084,7 +1082,7 @@
 	ASSERT(vfsp);
 	ASSERT(osname);
 
-	error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs);
+	error = zfsvfs_create(osname, &zfsvfs);
 	if (error)
 		return (error);
 	zfsvfs->z_vfs = vfsp;
@@ -1146,9 +1144,9 @@
 		xattr_changed_cb(zfsvfs, pval);
 		zfsvfs->z_issnap = B_TRUE;
 
-		mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
+		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
 		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
-		mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
+		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
 	} else {
 		error = zfsvfs_setup(zfsvfs, B_TRUE);
 	}
@@ -1157,7 +1155,7 @@
 		zfsctl_create(zfsvfs);
 out:
 	if (error) {
-		dmu_objset_close(zfsvfs->z_os);
+		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
 		zfsvfs_free(zfsvfs);
 	} else {
 		atomic_add_32(&zfs_active_fs_count, 1);
@@ -1725,14 +1723,14 @@
 		/*
 		 * Unset the objset user_ptr.
 		 */
-		mutex_enter(&os->os->os_user_ptr_lock);
+		mutex_enter(&os->os_user_ptr_lock);
 		dmu_objset_set_user(os, NULL);
-		mutex_exit(&os->os->os_user_ptr_lock);
+		mutex_exit(&os->os_user_ptr_lock);
 
 		/*
 		 * Finally release the objset
 		 */
-		dmu_objset_close(os);
+		dmu_objset_disown(os, zfsvfs);
 	}
 
 	/*
@@ -1835,17 +1833,13 @@
  * 'z_teardown_inactive_lock' write held.
  */
 int
-zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep)
+zfs_suspend_fs(zfsvfs_t *zfsvfs)
 {
 	int error;
 
 	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
 		return (error);
-
-	*modep = zfsvfs->z_os->os_mode;
-	if (name)
-		dmu_objset_name(zfsvfs->z_os, name);
-	dmu_objset_close(zfsvfs->z_os);
+	dmu_objset_disown(zfsvfs->z_os, zfsvfs);
 
 	return (0);
 }
@@ -1854,14 +1848,15 @@
  * Reopen zfsvfs_t::z_os and release VOPs.
  */
 int
-zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode)
+zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
 {
 	int err;
 
 	ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
 	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
 
-	err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
+	err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
+	    &zfsvfs->z_os);
 	if (err) {
 		zfsvfs->z_os = NULL;
 	} else {
--- a/usr/src/uts/common/fs/zfs/zil.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/zil.c	Mon Aug 17 09:14:35 2009 -0400
@@ -367,7 +367,7 @@
 		}
 
 		error = zio_alloc_blk(zilog->zl_spa, ZIL_MIN_BLKSZ, &blk,
-		    NULL, txg);
+		    NULL, txg, zilog->zl_logbias != ZFS_LOGBIAS_LATENCY);
 
 		if (error == 0)
 			zil_init_log_chain(zilog, &blk);
@@ -500,7 +500,7 @@
 	objset_t *os;
 	int error;
 
-	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
+	error = dmu_objset_hold(osname, FTAG, &os);
 	if (error) {
 		cmn_err(CE_WARN, "can't open objset for %s", osname);
 		return (0);
@@ -549,7 +549,7 @@
 	}
 
 	ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	return (0);
 }
 
@@ -571,7 +571,7 @@
 	zil_trailer_t *ztp;
 	int error;
 
-	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
+	error = dmu_objset_hold(osname, FTAG, &os);
 	if (error) {
 		cmn_err(CE_WARN, "can't open objset for %s", osname);
 		return (0);
@@ -581,7 +581,7 @@
 	zh = zil_header_in_syncing_context(zilog);
 	blk = zh->zh_log;
 	if (BP_IS_HOLE(&blk)) {
-		dmu_objset_close(os);
+		dmu_objset_rele(os, FTAG);
 		return (0); /* no chain */
 	}
 
@@ -594,7 +594,7 @@
 		blk = ztp->zit_next_blk;
 		VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
 	}
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	if (error == ECKSUM)
 		return (0); /* normal end of chain */
 	return (error);
@@ -791,7 +791,8 @@
 
 	BP_ZERO(bp);
 	/* pass the old blkptr in order to spread log blocks across devs */
-	error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg);
+	error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg,
+	    zilog->zl_logbias != ZFS_LOGBIAS_LATENCY);
 	if (error) {
 		dmu_tx_t *tx = dmu_tx_create_assigned(zilog->zl_dmu_pool, txg);
 
@@ -1280,6 +1281,12 @@
 	kmem_cache_destroy(zil_lwb_cache);
 }
 
+void
+zil_set_logbias(zilog_t *zilog, uint64_t logbias)
+{
+	zilog->zl_logbias = logbias;
+}
+
 zilog_t *
 zil_alloc(objset_t *os, zil_header_t *zh_phys)
 {
@@ -1292,6 +1299,7 @@
 	zilog->zl_spa = dmu_objset_spa(os);
 	zilog->zl_dmu_pool = dmu_objset_pool(os);
 	zilog->zl_destroy_txg = TXG_INITIAL - 1;
+	zilog->zl_logbias = dmu_objset_logbias(os);
 
 	mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL);
 
@@ -1658,7 +1666,7 @@
 	zilog_t *zilog;
 	int error;
 
-	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
+	error = dmu_objset_hold(osname, FTAG, &os);
 	if (error)
 		return (error);
 
@@ -1667,6 +1675,6 @@
 		error = EEXIST;
 	else
 		zil_resume(zilog);
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	return (error);
 }
--- a/usr/src/uts/common/fs/zfs/zio.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/zio.c	Mon Aug 17 09:14:35 2009 -0400
@@ -1716,12 +1716,13 @@
  */
 int
 zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp, blkptr_t *old_bp,
-    uint64_t txg)
+    uint64_t txg, boolean_t bypass_slog)
 {
-	int error;
+	int error = 1;
 
-	error = metaslab_alloc(spa, spa->spa_log_class, size,
-	    new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID);
+	if (!bypass_slog)
+		error = metaslab_alloc(spa, spa->spa_log_class, size,
+		    new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID);
 
 	if (error)
 		error = metaslab_alloc(spa, spa->spa_normal_class, size,
--- a/usr/src/uts/common/fs/zfs/zvol.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/zvol.c	Mon Aug 17 09:14:35 2009 -0400
@@ -80,6 +80,7 @@
 #include "zfs_namecheck.h"
 
 static void *zvol_state;
+static char *zvol_tag = "zvol_tag";
 
 #define	ZVOL_DUMPSIZE		"dumpsize"
 
@@ -109,7 +110,7 @@
 	uint8_t		zv_min_bs;	/* minimum addressable block shift */
 	uint8_t		zv_flags;	/* readonly, dumpified, etc. */
 	objset_t	*zv_objset;	/* objset handle */
-	uint32_t	zv_mode;	/* DS_MODE_* flags at open time */
+	boolean_t 	zv_issnap;	/* is a snapshot (read-only) */
 	uint32_t	zv_open_count[OTYPCNT];	/* open counts */
 	uint32_t	zv_total_opens;	/* total open count */
 	zilog_t		*zv_zilog;	/* ZIL handle */
@@ -432,7 +433,6 @@
 	uint64_t volsize;
 	minor_t minor = 0;
 	struct pathname linkpath;
-	int ds_mode = DS_MODE_OWNER;
 	vnode_t *vp = NULL;
 	char *devpath;
 	char chrbuf[30], blkbuf[30];
@@ -445,10 +445,8 @@
 		return (EEXIST);
 	}
 
-	if (strchr(name, '@') != 0)
-		ds_mode |= DS_MODE_READONLY;
-
-	error = dmu_objset_open(name, DMU_OST_ZVOL, ds_mode, &os);
+	/* lie and say we're read-only */
+	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
 
 	if (error) {
 		mutex_exit(&zvol_state_lock);
@@ -458,7 +456,7 @@
 	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
 
 	if (error) {
-		dmu_objset_close(os);
+		dmu_objset_disown(os, zvol_tag);
 		mutex_exit(&zvol_state_lock);
 		return (error);
 	}
@@ -500,13 +498,13 @@
 		minor = zvol_minor_alloc();
 
 	if (minor == 0) {
-		dmu_objset_close(os);
+		dmu_objset_disown(os, zvol_tag);
 		mutex_exit(&zvol_state_lock);
 		return (ENXIO);
 	}
 
 	if (ddi_soft_state_zalloc(zvol_state, minor) != DDI_SUCCESS) {
-		dmu_objset_close(os);
+		dmu_objset_disown(os, zvol_tag);
 		mutex_exit(&zvol_state_lock);
 		return (EAGAIN);
 	}
@@ -519,7 +517,7 @@
 	if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR,
 	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
 		ddi_soft_state_free(zvol_state, minor);
-		dmu_objset_close(os);
+		dmu_objset_disown(os, zvol_tag);
 		mutex_exit(&zvol_state_lock);
 		return (EAGAIN);
 	}
@@ -530,7 +528,7 @@
 	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
 		ddi_remove_minor_node(zfs_dip, chrbuf);
 		ddi_soft_state_free(zvol_state, minor);
-		dmu_objset_close(os);
+		dmu_objset_disown(os, zvol_tag);
 		mutex_exit(&zvol_state_lock);
 		return (EAGAIN);
 	}
@@ -542,7 +540,7 @@
 	zv->zv_minor = minor;
 	zv->zv_volsize = volsize;
 	zv->zv_objset = os;
-	zv->zv_mode = ds_mode;
+	zv->zv_issnap = dmu_objset_is_snapshot(os);
 	zv->zv_zilog = zil_open(os, zvol_get_data);
 	mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL);
 	avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare,
@@ -600,7 +598,7 @@
 
 	zil_close(zv->zv_zilog);
 	zv->zv_zilog = NULL;
-	dmu_objset_close(zv->zv_objset);
+	dmu_objset_disown(zv->zv_objset, zvol_tag);
 	zv->zv_objset = NULL;
 	avl_destroy(&zv->zv_znode.z_range_avl);
 	mutex_destroy(&zv->zv_znode.z_range_lock);
@@ -704,7 +702,7 @@
 		 * If we are doing a "zfs clone -o volsize=", then the
 		 * minor node won't exist yet.
 		 */
-		error = dmu_objset_open(name, DMU_OST_ZVOL, DS_MODE_OWNER,
+		error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, FTAG,
 		    &state.zv_objset);
 		if (error != 0)
 			goto out;
@@ -717,7 +715,7 @@
 	    doi.doi_data_block_size)) != 0)
 		goto out;
 
-	if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) {
+	if (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap) {
 		error = EROFS;
 		goto out;
 	}
@@ -760,7 +758,7 @@
 
 out:
 	if (state.zv_objset)
-		dmu_objset_close(state.zv_objset);
+		dmu_objset_disown(state.zv_objset, FTAG);
 
 	mutex_exit(&zvol_state_lock);
 
@@ -788,7 +786,7 @@
 			mutex_exit(&zvol_state_lock);
 		return (ENXIO);
 	}
-	if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) {
+	if (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap) {
 		if (needlock)
 			mutex_exit(&zvol_state_lock);
 		return (EROFS);
@@ -836,7 +834,7 @@
 	ASSERT(zv->zv_objset != NULL);
 
 	if ((flag & FWRITE) &&
-	    (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY))) {
+	    (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap)) {
 		mutex_exit(&zvol_state_lock);
 		return (EROFS);
 	}
@@ -992,6 +990,7 @@
 	uint32_t blocksize = zv->zv_volblocksize;
 	zilog_t *zilog = zv->zv_zilog;
 	boolean_t slogging;
+	ssize_t immediate_write_sz;
 
 	if (zil_disable)
 		return;
@@ -1003,7 +1002,11 @@
 		return;
 	}
 
-	slogging = spa_has_slogs(zilog->zl_spa);
+	immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+	    ? 0 : zvol_immediate_write_sz;
+
+	slogging = spa_has_slogs(zilog->zl_spa) &&
+	    (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
 
 	while (resid) {
 		itx_t *itx;
@@ -1015,7 +1018,7 @@
 		 * Unlike zfs_log_write() we can be called with
 		 * upto DMU_MAX_ACCESS/2 (5MB) writes.
 		 */
-		if (blocksize > zvol_immediate_write_sz && !slogging &&
+		if (blocksize > immediate_write_sz && !slogging &&
 		    resid >= blocksize && off % blocksize == 0) {
 			write_state = WR_INDIRECT; /* uses dmu_sync */
 			len = blocksize;
@@ -1160,8 +1163,7 @@
 	}
 
 	if (!(bp->b_flags & B_READ) &&
-	    (zv->zv_flags & ZVOL_RDONLY ||
-	    zv->zv_mode & DS_MODE_READONLY)) {
+	    (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap)) {
 		bioerror(bp, EROFS);
 		biodone(bp);
 		return (0);
@@ -1721,7 +1723,7 @@
 	dmu_tx_t *tx;
 	objset_t *os = zv->zv_objset;
 
-	if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY))
+	if (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap)
 		return (EROFS);
 
 	/*
--- a/usr/src/uts/common/inet/tcp.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/inet/tcp.h	Mon Aug 17 09:14:35 2009 -0400
@@ -536,7 +536,6 @@
 	 */
 	struct tcp_s *tcp_loopback_peer;	/* peer tcp for loopback */
 	mblk_t	*tcp_fused_sigurg_mp;		/* M_PCSIG mblk for SIGURG */
-	size_t	tcp_fuse_rcv_hiwater;		/* fusion receive queue size */
 
 	uint32_t
 		tcp_fused : 1,		/* loopback tcp in fusion mode */
--- a/usr/src/uts/common/inet/tcp/tcp.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/inet/tcp/tcp.c	Mon Aug 17 09:14:35 2009 -0400
@@ -789,7 +789,6 @@
 static void	tcp_process_options(tcp_t *, tcph_t *);
 static void	tcp_rput_common(tcp_t *tcp, mblk_t *mp);
 static void	tcp_rsrv(queue_t *q);
-static int	tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd);
 static int	tcp_snmp_state(tcp_t *tcp);
 static void	tcp_timer(void *arg);
 static void	tcp_timer_callback(void *);
@@ -1373,6 +1372,28 @@
 	    iph, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha,		\
 	    ip6_t *, ip6h, int, 0);
 
+static void
+tcp_set_recv_threshold(tcp_t *tcp, uint32_t new_rcvthresh)
+{
+	uint32_t default_threshold = SOCKET_RECVHIWATER >> 3;
+
+	if (IPCL_IS_NONSTR(tcp->tcp_connp)) {
+		conn_t *connp = tcp->tcp_connp;
+		struct sock_proto_props sopp;
+
+		/*
+		 * only increase rcvthresh upto default_threshold
+		 */
+		if (new_rcvthresh > default_threshold)
+			new_rcvthresh = default_threshold;
+
+		sopp.sopp_flags = SOCKOPT_RCVTHRESH;
+		sopp.sopp_rcvthresh = new_rcvthresh;
+
+		(*connp->conn_upcalls->su_set_proto_props)
+		    (connp->conn_upper_handle, &sopp);
+	}
+}
 /*
  * Figure out the value of window scale opton.  Note that the rwnd is
  * ASSUMED to be rounded up to the nearest MSS before the calculation.
@@ -5536,10 +5557,10 @@
 	}
 
 	/*
-	 * listener->tcp_rq->q_hiwat should be the default window size or a
-	 * window size changed via SO_RCVBUF option.  First round up the
-	 * eager's tcp_rwnd to the nearest MSS.  Then find out the window
-	 * scale option value if needed.  Call tcp_rwnd_set() to finish the
+	 * listeners tcp_recv_hiwater should be the default window size or a
+	 * window size changed via SO_RCVBUF option. First round up the
+	 * eager's tcp_rwnd to the nearest MSS. Then find out the window
+	 * scale option value if needed. Call tcp_rwnd_set() to finish the
 	 * setting.
 	 *
 	 * Note if there is a rpipe metric associated with the remote host,
@@ -7568,8 +7589,6 @@
 	tcp->tcp_ip_src_v6 = tcp->tcp_bound_source_v6;
 
 	ASSERT(tcp->tcp_ptpbhn != NULL);
-	if (!IPCL_IS_NONSTR(tcp->tcp_connp))
-		tcp->tcp_rq->q_hiwat = tcps->tcps_recv_hiwat;
 	tcp->tcp_recv_hiwater = tcps->tcps_recv_hiwat;
 	tcp->tcp_recv_lowater = tcp_rinfo.mi_lowat;
 	tcp->tcp_rwnd = tcps->tcps_recv_hiwat;
@@ -7857,7 +7876,7 @@
 	tcp->tcp_unfusable = B_FALSE;
 	tcp->tcp_fused_sigurg = B_FALSE;
 	tcp->tcp_loopback_peer = NULL;
-	tcp->tcp_fuse_rcv_hiwater = 0;
+	tcp->tcp_recv_hiwater = 0;
 
 	tcp->tcp_lso = B_FALSE;
 
@@ -7961,7 +7980,7 @@
 	tcp->tcp_unfusable = B_FALSE;
 	tcp->tcp_fused_sigurg = B_FALSE;
 	tcp->tcp_loopback_peer = NULL;
-	tcp->tcp_fuse_rcv_hiwater = 0;
+	tcp->tcp_recv_hiwater = 0;
 
 	/* Initialize the header template */
 	if (tcp->tcp_ipversion == IPV4_VERSION) {
@@ -8902,7 +8921,7 @@
 	if (TCP_IS_DETACHED(tcp))
 		return (mss);
 	if (tcp->tcp_fused) {
-		maxpsz = tcp_fuse_maxpsz_set(tcp);
+		maxpsz = tcp_fuse_maxpsz(tcp);
 		mss = INFPSZ;
 	} else if (tcp->tcp_mdt || tcp->tcp_lso || tcp->tcp_maxpsz == 0) {
 		/*
@@ -9321,8 +9340,6 @@
 			return (NULL);
 		}
 		q = connp->conn_rq;
-	} else {
-		RD(q)->q_hiwat = tcps->tcps_recv_hiwat;
 	}
 
 	SOCK_CONNID_INIT(tcp->tcp_connid);
@@ -15650,7 +15667,7 @@
 
 	if (canputnext(q)) {
 		/* Not flow-controlled, open rwnd */
-		tcp->tcp_rwnd = q->q_hiwat;
+		tcp->tcp_rwnd = tcp->tcp_recv_hiwater;
 
 		/*
 		 * Send back a window update immediately if TCP is above
@@ -15725,7 +15742,7 @@
  * XXX - Should allow a lower rwnd than tcp_recv_hiwat_minmss * mss if the
  * user requests so.
  */
-static int
+int
 tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd)
 {
 	uint32_t	mss = tcp->tcp_mss;
@@ -15739,25 +15756,11 @@
 		tcp_t *peer_tcp = tcp->tcp_loopback_peer;
 
 		ASSERT(peer_tcp != NULL);
-		/*
-		 * Record the stream head's high water mark for
-		 * this endpoint; this is used for flow-control
-		 * purposes in tcp_fuse_output().
-		 */
 		sth_hiwat = tcp_fuse_set_rcv_hiwat(tcp, rwnd);
 		if (!tcp_detached) {
 			(void) proto_set_rx_hiwat(tcp->tcp_rq, tcp->tcp_connp,
 			    sth_hiwat);
-			if (IPCL_IS_NONSTR(tcp->tcp_connp)) {
-				conn_t *connp = tcp->tcp_connp;
-				struct sock_proto_props sopp;
-
-				sopp.sopp_flags = SOCKOPT_RCVTHRESH;
-				sopp.sopp_rcvthresh = sth_hiwat >> 3;
-
-				(*connp->conn_upcalls->su_set_proto_props)
-				    (connp->conn_upper_handle, &sopp);
-			}
+			tcp_set_recv_threshold(tcp, sth_hiwat >> 3);
 		}
 
 		/*
@@ -15767,7 +15770,7 @@
 		 * have changed we need to update the peer's maxpsz.
 		 */
 		(void) tcp_maxpsz_set(peer_tcp, B_TRUE);
-		return (rwnd);
+		return (sth_hiwat);
 	}
 
 	if (tcp_detached) {
@@ -15840,14 +15843,11 @@
 
 	if (tcp_detached)
 		return (rwnd);
-	/*
-	 * We set the maximum receive window into rq->q_hiwat if it is
-	 * a STREAMS socket.
-	 * This is not actually used for flow control.
-	 */
-	if (!IPCL_IS_NONSTR(tcp->tcp_connp))
-		tcp->tcp_rq->q_hiwat = rwnd;
+
+	tcp_set_recv_threshold(tcp, rwnd >> 3);
+
 	tcp->tcp_recv_hiwater = rwnd;
+
 	/*
 	 * Set the STREAM head high water mark. This doesn't have to be
 	 * here, since we are simply using default values, but we would
@@ -17258,19 +17258,13 @@
 	}
 
 	/*
-	 * Set the max window size (tcp_rq->q_hiwat) of the acceptor
-	 * properly.  This is the first time we know of the acceptor'
-	 * queue.  So we do it here.
-	 *
-	 * XXX
+	 * Set max window size (tcp_recv_hiwater) of the acceptor.
 	 */
 	if (tcp->tcp_rcv_list == NULL) {
 		/*
 		 * Recv queue is empty, tcp_rwnd should not have changed.
 		 * That means it should be equal to the listener's tcp_rwnd.
 		 */
-		if (!IPCL_IS_NONSTR(connp))
-			tcp->tcp_rq->q_hiwat = tcp->tcp_rwnd;
 		tcp->tcp_recv_hiwater = tcp->tcp_rwnd;
 	} else {
 #ifdef DEBUG
@@ -17286,8 +17280,6 @@
 		ASSERT(cnt != 0 && tcp->tcp_rcv_cnt == cnt);
 #endif
 		/* There is some data, add them back to get the max. */
-		if (!IPCL_IS_NONSTR(connp))
-			tcp->tcp_rq->q_hiwat = tcp->tcp_rwnd + tcp->tcp_rcv_cnt;
 		tcp->tcp_recv_hiwater = tcp->tcp_rwnd + tcp->tcp_rcv_cnt;
 	}
 	/*
@@ -17298,10 +17290,6 @@
 	sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_MAXBLK | SOCKOPT_WROFF;
 	sopp_maxblk = tcp_maxpsz_set(tcp, B_FALSE);
 
-	/*
-	 * Record the stream head's high water mark for this endpoint;
-	 * this is used for flow-control purposes.
-	 */
 	sopp_rxhiwat = tcp->tcp_fused ?
 	    tcp_fuse_set_rcv_hiwat(tcp, tcp->tcp_recv_hiwater) :
 	    MAX(tcp->tcp_recv_hiwater, tcps->tcps_sth_rcv_hiwat);
@@ -17455,7 +17443,7 @@
 			/* We drain directly in case of fused tcp loopback */
 
 			if (!tcp->tcp_fused && canputnext(q)) {
-				tcp->tcp_rwnd = q->q_hiwat;
+				tcp->tcp_rwnd = tcp->tcp_recv_hiwater;
 				if (tcp->tcp_state >= TCPS_ESTABLISHED &&
 				    tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) {
 					tcp_xmit_ctl(NULL,
@@ -25639,7 +25627,6 @@
 	cred_t	*ecr;
 	ts_label_t	*tsl;
 	uint32_t	mss;
-	queue_t	*q = tcp->tcp_rq;
 	conn_t	*connp = tcp->tcp_connp;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
 
@@ -25748,8 +25735,6 @@
 		 */
 		tcp->tcp_rwnd = MAX(MSS_ROUNDUP(tcp->tcp_rwnd, mss),
 		    tcps->tcps_recv_hiwat_minmss * mss);
-		if (!IPCL_IS_NONSTR(connp))
-			q->q_hiwat = tcp->tcp_rwnd;
 		tcp->tcp_recv_hiwater = tcp->tcp_rwnd;
 		tcp_set_ws_value(tcp);
 		U32_TO_ABE16((tcp->tcp_rwnd >> tcp->tcp_rcv_ws),
@@ -26799,9 +26784,7 @@
 	    tcp->tcp_tcps->tcps_wroff_xtra);
 	if (tcp->tcp_snd_sack_ok)
 		stropt->so_wroff += TCPOPT_MAX_SACK_LEN;
-	stropt->so_hiwat = tcp->tcp_fused ?
-	    tcp_fuse_set_rcv_hiwat(tcp, tcp->tcp_recv_hiwater) :
-	    MAX(tcp->tcp_recv_hiwater, tcp->tcp_tcps->tcps_sth_rcv_hiwat);
+	stropt->so_hiwat = tcp->tcp_recv_hiwater;
 	stropt->so_maxblk = tcp_maxpsz_set(tcp, B_FALSE);
 
 	putnext(RD(q), stropt_mp);
--- a/usr/src/uts/common/inet/tcp/tcp_fusion.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c	Mon Aug 17 09:14:35 2009 -0400
@@ -136,14 +136,14 @@
 	ASSERT(tcp->tcp_loopback);
 	ASSERT(tcp->tcp_loopback_peer == NULL);
 	/*
-	 * We need to inherit q_hiwat of the listener tcp, but we can't
-	 * really use tcp_listener since we get here after sending up
-	 * T_CONN_IND and tcp_wput_accept() may be called independently,
-	 * at which point tcp_listener is cleared; this is why we use
-	 * tcp_saved_listener.  The listener itself is guaranteed to be
-	 * around until tcp_accept_finish() is called on this eager --
-	 * this won't happen until we're done since we're inside the
-	 * eager's perimeter now.
+	 * We need to inherit tcp_recv_hiwater of the listener tcp,
+	 * but we can't really use tcp_listener since we get here after
+	 * sending up T_CONN_IND and tcp_wput_accept() may be called
+	 * independently, at which point tcp_listener is cleared;
+	 * this is why we use tcp_saved_listener. The listener itself
+	 * is guaranteed to be around until tcp_accept_finish() is called
+	 * on this eager -- this won't happen until we're done since we're
+	 * inside the eager's perimeter now.
 	 *
 	 * We can also get called in the case were a connection needs
 	 * to be re-fused. In this case tcp_saved_listener will be
@@ -272,29 +272,19 @@
 		tcp_timers_stop(tcp);
 		tcp_timers_stop(peer_tcp);
 
-		/*
-		 * At this point we are a detached eager tcp and therefore
-		 * don't have a queue assigned to us until accept happens.
-		 * In the mean time the peer endpoint may immediately send
-		 * us data as soon as fusion is finished, and we need to be
-		 * able to flow control it in case it sends down huge amount
-		 * of data while we're still detached.  To prevent that we
-		 * inherit the listener's recv_hiwater value; this is temporary
-		 * since we'll repeat the process in tcp_accept_finish().
-		 */
 		if (!tcp->tcp_refuse) {
-			(void) tcp_fuse_set_rcv_hiwat(tcp,
-			    tcp->tcp_saved_listener->tcp_recv_hiwater);
+			/*
+			 * Set receive buffer and max packet size for the
+			 * active open tcp.
+			 * eager's values will be set in tcp_accept_finish.
+			 */
+
+			(void) tcp_rwnd_set(peer_tcp,
+			    peer_tcp->tcp_recv_hiwater);
 
 			/*
-			 * Set the stream head's write offset value to zero
-			 * since we won't be needing any room for TCP/IP
-			 * headers; tell it to not break up the writes (this
-			 * would reduce the amount of work done by kmem); and
-			 * configure our receive buffer. Note that we can only
-			 * do this for the active connect tcp since our eager is
-			 * still detached; it will be dealt with later in
-			 * tcp_accept_finish().
+			 * Set the write offset value to zero since we won't
+			 * be needing any room for TCP/IP headers.
 			 */
 			if (!IPCL_IS_NONSTR(peer_tcp->tcp_connp)) {
 				struct stroptions *stropt;
@@ -303,19 +293,9 @@
 				mp->b_wptr += sizeof (*stropt);
 
 				stropt = (struct stroptions *)mp->b_rptr;
-				stropt->so_flags = SO_MAXBLK|SO_WROFF|SO_HIWAT;
-				stropt->so_maxblk = tcp_maxpsz_set(peer_tcp,
-				    B_FALSE);
+				stropt->so_flags = SO_WROFF;
 				stropt->so_wroff = 0;
 
-				/*
-				 * Record the stream head's high water mark for
-				 * peer endpoint; this is used for flow-control
-				 * purposes in tcp_fuse_output().
-				 */
-				stropt->so_hiwat = tcp_fuse_set_rcv_hiwat(
-				    peer_tcp, peer_rq->q_hiwat);
-
 				/* Send the options up */
 				putnext(peer_rq, mp);
 			} else {
@@ -324,16 +304,8 @@
 				/* The peer is a non-STREAMS end point */
 				ASSERT(IPCL_IS_TCP(peer_connp));
 
-				(void) tcp_fuse_set_rcv_hiwat(tcp,
-				    tcp->tcp_saved_listener->tcp_recv_hiwater);
-
-				sopp.sopp_flags = SOCKOPT_MAXBLK |
-				    SOCKOPT_WROFF | SOCKOPT_RCVHIWAT;
-				sopp.sopp_maxblk = tcp_maxpsz_set(peer_tcp,
-				    B_FALSE);
+				sopp.sopp_flags = SOCKOPT_WROFF;
 				sopp.sopp_wroff = 0;
-				sopp.sopp_rxhiwat = tcp_fuse_set_rcv_hiwat(
-				    peer_tcp, peer_tcp->tcp_recv_hiwater);
 				(*peer_connp->conn_upcalls->su_set_proto_props)
 				    (peer_connp->conn_upper_handle, &sopp);
 			}
@@ -789,7 +761,7 @@
 	mutex_enter(&tcp->tcp_non_sq_lock);
 	flow_stopped = tcp->tcp_flow_stopped;
 	if ((TCP_IS_DETACHED(peer_tcp) &&
-	    (peer_tcp->tcp_rcv_cnt >= peer_tcp->tcp_fuse_rcv_hiwater)) ||
+	    (peer_tcp->tcp_rcv_cnt >= peer_tcp->tcp_recv_hiwater)) ||
 	    (!TCP_IS_DETACHED(peer_tcp) &&
 	    !IPCL_IS_NONSTR(peer_tcp->tcp_connp) &&
 	    !canputnext(peer_tcp->tcp_rq))) {
@@ -989,7 +961,12 @@
 	 * after SO_SNDBUF; the latter is also similarly rounded up.
 	 */
 	rwnd = P2ROUNDUP_TYPED(rwnd, PAGESIZE, size_t);
-	tcp->tcp_fuse_rcv_hiwater = rwnd;
+
+	/*
+	 * Record high water mark, this is used for flow-control
+	 * purposes in tcp_fuse_output().
+	 */
+	tcp->tcp_recv_hiwater = rwnd;
 	return (rwnd);
 }
 
@@ -997,7 +974,7 @@
  * Calculate the maximum outstanding unread data block for a fused tcp endpoint.
  */
 int
-tcp_fuse_maxpsz_set(tcp_t *tcp)
+tcp_fuse_maxpsz(tcp_t *tcp)
 {
 	tcp_t *peer_tcp = tcp->tcp_loopback_peer;
 	uint_t sndbuf = tcp->tcp_xmit_hiwater;
@@ -1005,7 +982,7 @@
 
 	ASSERT(tcp->tcp_fused);
 	ASSERT(peer_tcp != NULL);
-	ASSERT(peer_tcp->tcp_fuse_rcv_hiwater != 0);
+	ASSERT(peer_tcp->tcp_recv_hiwater != 0);
 	/*
 	 * In the fused loopback case, we want the stream head to split
 	 * up larger writes into smaller chunks for a more accurate flow-
@@ -1014,8 +991,8 @@
 	 * We round up the buffer to system page size due to the lack of
 	 * TCP MSS concept in Fusion.
 	 */
-	if (maxpsz > peer_tcp->tcp_fuse_rcv_hiwater)
-		maxpsz = peer_tcp->tcp_fuse_rcv_hiwater;
+	if (maxpsz > peer_tcp->tcp_recv_hiwater)
+		maxpsz = peer_tcp->tcp_recv_hiwater;
 	maxpsz = P2ROUNDUP_TYPED(maxpsz, PAGESIZE, uint_t) >> 1;
 
 	return (maxpsz);
--- a/usr/src/uts/common/inet/tcp_impl.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/inet/tcp_impl.h	Mon Aug 17 09:14:35 2009 -0400
@@ -222,8 +222,9 @@
 extern void	tcp_fuse_output_urg(tcp_t *, mblk_t *);
 extern boolean_t tcp_fuse_rcv_drain(queue_t *, tcp_t *, mblk_t **);
 extern size_t	tcp_fuse_set_rcv_hiwat(tcp_t *, size_t);
-extern int	tcp_fuse_maxpsz_set(tcp_t *);
-extern void	tcp_fuse_backenable(tcp_t *tcp);
+extern int	tcp_fuse_maxpsz(tcp_t *);
+extern void	tcp_fuse_backenable(tcp_t *);
+extern int	tcp_rwnd_set(tcp_t *, uint32_t);
 
 /*
  * Object to represent database of options to search passed to
--- a/usr/src/uts/common/io/aggr/aggr_grp.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/aggr/aggr_grp.c	Mon Aug 17 09:14:35 2009 -0400
@@ -623,7 +623,8 @@
 	/*
 	 * Get the list the the underlying HW rings.
 	 */
-	hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh);
+	hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh,
+	    MAC_RING_TYPE_RX);
 
 	if (port->lp_hwgh != NULL) {
 		/*
@@ -689,7 +690,8 @@
 		goto done;
 
 	ASSERT(rx_grp->arg_gh != NULL);
-	hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh);
+	hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh,
+	    MAC_RING_TYPE_RX);
 
 	/*
 	 * If hw_rh_cnt is 0, it means that the underlying port does not
--- a/usr/src/uts/common/io/audio/drv/audiovia823x/audiovia823x.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/audio/drv/audiovia823x/audiovia823x.c	Mon Aug 17 09:14:35 2009 -0400
@@ -203,9 +203,9 @@
 {
 	auvia_devc_t	*devc = (void *)argp;
 	auvia_portc_t	*portc;
-	uint32_t	gstat;
 	uint8_t		status;
 	unsigned	intrs = 0;
+	boolean_t	claimed = B_FALSE;
 
 	_NOTE(ARGUNUSED(nocare));
 
@@ -215,12 +215,6 @@
 		return (DDI_INTR_UNCLAIMED);
 	}
 
-	gstat = INL(devc, devc->base + REG_GSTAT);
-	if (gstat == 0) {
-		mutex_exit(&devc->mutex);
-		return (DDI_INTR_UNCLAIMED);
-	}
-
 	for (int i = 0; i < AUVIA_NUM_PORTC; i++) {
 
 		portc = devc->portc[i];
@@ -242,13 +236,18 @@
 		if (portc->started) {
 			intrs |= (1U << i);
 		}
-		/* XXX: do we really need to do this? */
+		/* let the chip know we are acking the interrupt */
 		OUTB(devc, portc->base + OFF_STATUS, status);
+
+		claimed = B_TRUE;
 	}
 
-	OUTL(devc, devc->base + REG_GSTAT, gstat);
+	mutex_exit(&devc->mutex);
 
-	mutex_exit(&devc->mutex);
+	if (!claimed) {
+		return (DDI_INTR_UNCLAIMED);
+	}
+
 	if (intrs & (1U << AUVIA_PLAY_SGD_NUM)) {
 		audio_engine_consume(devc->portc[AUVIA_PLAY_SGD_NUM]->engine);
 	}
--- a/usr/src/uts/common/io/cmlb.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/cmlb.c	Mon Aug 17 09:14:35 2009 -0400
@@ -313,7 +313,8 @@
 #endif
 
 #if defined(_SUNOS_VTOC_16)
-static void cmlb_convert_geometry(diskaddr_t capacity, struct dk_geom *cl_g);
+static void cmlb_convert_geometry(struct cmlb_lun *cl, diskaddr_t capacity,
+    struct dk_geom *cl_g, void *tg_cookie);
 #endif
 
 static int cmlb_dkio_get_geometry(struct cmlb_lun *cl, caddr_t arg, int flag,
@@ -349,7 +350,8 @@
 static int cmlb_is_linux_swap(struct cmlb_lun *cl, uint32_t part_start,
     void *tg_cookie);
 static int cmlb_dkio_get_virtgeom(struct cmlb_lun *cl, caddr_t arg, int flag);
-static int cmlb_dkio_get_phygeom(struct cmlb_lun *cl, caddr_t  arg, int flag);
+static int cmlb_dkio_get_phygeom(struct cmlb_lun *cl, caddr_t  arg, int flag,
+    void *tg_cookie);
 static int cmlb_dkio_partinfo(struct cmlb_lun *cl, dev_t dev, caddr_t arg,
     int flag);
 static int cmlb_dkio_extpartinfo(struct cmlb_lun *cl, dev_t dev, caddr_t arg,
@@ -1286,7 +1288,7 @@
 	case DKIOCG_PHYGEOM:
 		cmlb_dbg(CMLB_TRACE, cl, "DKIOCG_PHYGEOM\n");
 #if defined(__i386) || defined(__amd64)
-		err = cmlb_dkio_get_phygeom(cl, (caddr_t)arg, flag);
+		err = cmlb_dkio_get_phygeom(cl, (caddr_t)arg, flag, tg_cookie);
 #else
 		err = ENOTTY;
 #endif
@@ -1803,9 +1805,13 @@
  *     Context: Kernel thread only
  */
 static void
-cmlb_convert_geometry(diskaddr_t capacity, struct dk_geom *cl_g)
+cmlb_convert_geometry(struct cmlb_lun *cl, diskaddr_t capacity,
+    struct dk_geom *cl_g, void *tg_cookie)
 {
 
+	ASSERT(cl != NULL);
+	ASSERT(mutex_owned(CMLB_MUTEX(cl)));
+
 	/* Unlabeled SCSI floppy device */
 	if (capacity <= 0x1000) {
 		cl_g->dkg_nhead = 2;
@@ -1839,6 +1845,15 @@
 	 * 4211279100 (1.96TB)			255  	252
 	 * 5264098875 (2.45TB)			255  	315
 	 * ...
+	 *
+	 * For Solid State Drive(SSD), it uses 4K page size inside and may be
+	 * double with every new generation. If the I/O is not aligned with
+	 * page size on SSDs, SSDs perform a lot slower.
+	 * By default, Solaris partition starts from cylinder 1. It will be
+	 * misaligned even with 4K if using heads(255) and SPT(63). To
+	 * workaround the problem, if the device is SSD, we use heads(224) and
+	 * SPT multiple of 56. Thus the default Solaris partition starts from
+	 * a position that aligns with 128K on a 512 bytes sector size SSD.
 	 */
 
 	if (capacity <= 0x200000) {
@@ -1848,15 +1863,36 @@
 		cl_g->dkg_nhead = 128;
 		cl_g->dkg_nsect = 32;
 	} else {
-		cl_g->dkg_nhead = 255;
-
-		/* make nsect be smallest multiple of 63 */
+		tg_attribute_t tgattribute;
+		int is_solid_state;
+		unsigned short nhead;
+		unsigned short nsect;
+
+		bzero(&tgattribute, sizeof (tg_attribute_t));
+
+		mutex_exit(CMLB_MUTEX(cl));
+		is_solid_state =
+		    (DK_TG_GETATTRIBUTE(cl, &tgattribute, tg_cookie) == 0) ?
+		    tgattribute.media_is_solid_state : FALSE;
+		mutex_enter(CMLB_MUTEX(cl));
+
+		if (is_solid_state) {
+			nhead = 224;
+			nsect = 56;
+		} else {
+			nhead = 255;
+			nsect = 63;
+		}
+
+		cl_g->dkg_nhead = nhead;
+
+		/* make nsect be smallest multiple of nhead */
 		cl_g->dkg_nsect = ((capacity +
-		    (UINT16_MAX * 255 * 63) - 1) /
-		    (UINT16_MAX * 255 * 63)) * 63;
+		    (UINT16_MAX * nhead * nsect) - 1) /
+		    (UINT16_MAX * nhead * nsect)) * nsect;
 
 		if (cl_g->dkg_nsect == 0)
-			cl_g->dkg_nsect = (UINT16_MAX / 63) * 63;
+			cl_g->dkg_nsect = (UINT16_MAX / nsect) * nsect;
 	}
 
 }
@@ -3327,7 +3363,7 @@
 			capacity = cl->cl_blockcount;
 
 
-		cmlb_convert_geometry(capacity, &cl_g);
+		cmlb_convert_geometry(cl, capacity, &cl_g, tg_cookie);
 		bcopy(&cl_g, &cl->cl_g, sizeof (cl->cl_g));
 		phys_spc = cl->cl_g.dkg_nhead * cl->cl_g.dkg_nsect;
 	}
@@ -5381,7 +5417,8 @@
 
 #if defined(__i386) || defined(__amd64)
 static int
-cmlb_dkio_get_phygeom(struct cmlb_lun *cl, caddr_t  arg, int flag)
+cmlb_dkio_get_phygeom(struct cmlb_lun *cl, caddr_t  arg, int flag,
+    void *tg_cookie)
 {
 	int err = 0;
 	diskaddr_t capacity;
@@ -5443,7 +5480,7 @@
 			else
 				capacity = cl->cl_blockcount;
 
-			cmlb_convert_geometry(capacity, dkgp);
+			cmlb_convert_geometry(cl, capacity, dkgp, tg_cookie);
 			dkgp->dkg_acyl = 0;
 			dkgp->dkg_ncyl = capacity /
 			    (dkgp->dkg_nhead * dkgp->dkg_nsect);
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c	Mon Aug 17 09:14:35 2009 -0400
@@ -1647,17 +1647,10 @@
 	pgr->pgr_PRgeneration++;
 
 	if (pr_out->action == PR_OUT_PREEMPT_ABORT) {
-		/*
-		 * XXX iscsi port provider doesn't like this idea
-		 * Need to implement abort differently
-		 *
-		 * task->task_mgmt_function = TM_ABORT_TASK_SET;
-		 * stmf_scsilib_handle_task_mgmt(task);
-		 */
-		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
-	} else {
-		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
+		stmf_abort(STMF_QUEUE_ABORT_LU, task, STMF_ABORTED,
+		    (void *)slu->sl_lu);
 	}
+	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
 }
 
 static void
--- a/usr/src/uts/common/io/ib/clients/ibd/ibd.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/ib/clients/ibd/ibd.c	Mon Aug 17 09:14:35 2009 -0400
@@ -256,6 +256,14 @@
 #define	IBD_DRV_STARTED			0x80000
 
 /*
+ * Start/stop in-progress flags; note that restart must always remain
+ * the OR of start and stop flag values.
+ */
+#define	IBD_DRV_START_IN_PROGRESS	0x10000000
+#define	IBD_DRV_STOP_IN_PROGRESS	0x20000000
+#define	IBD_DRV_RESTART_IN_PROGRESS	0x30000000
+
+/*
  * Miscellaneous constants
  */
 #define	IBD_SEND			0
@@ -433,7 +441,10 @@
 static int ibd_get_port_details(ibd_state_t *);
 static int ibd_alloc_cqs(ibd_state_t *);
 static int ibd_setup_ud_channel(ibd_state_t *);
-static int ibd_undo_m_start(ibd_state_t *);
+static int ibd_start(ibd_state_t *);
+static int ibd_undo_start(ibd_state_t *, link_state_t);
+static void ibd_set_mac_progress(ibd_state_t *, uint_t);
+static void ibd_clr_mac_progress(ibd_state_t *, uint_t);
 
 
 /*
@@ -2006,19 +2017,14 @@
 	link_state_t new_link_state = LINK_STATE_UP;
 	uint8_t itreply;
 	uint16_t pkix;
-
-	/*
-	 * Do not send a request to the async daemon if it has not
-	 * yet been created or is being destroyed. If the async
-	 * daemon has not yet been created, we still need to track
-	 * last known state of the link. If this code races with the
-	 * detach path, then we are assured that the detach path has
-	 * not yet done the ibt_close_hca (which waits for all async
-	 * events to complete). If the code races with the attach path,
-	 * we need to validate the pkey/gid (in the link_up case) if
-	 * the initialization path has already set these up and created
-	 * IBTF resources based on the values.
-	 */
+	int ret;
+
+	/*
+	 * Let's not race with a plumb or an unplumb; if we detect a
+	 * pkey relocation event later on here, we may have to restart.
+	 */
+	ibd_set_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS);
+
 	mutex_enter(&state->id_link_mutex);
 
 	/*
@@ -2027,7 +2033,7 @@
 	 */
 	if (state->id_link_state == LINK_STATE_UNKNOWN) {
 		mutex_exit(&state->id_link_mutex);
-		return;
+		goto link_mod_return;
 	}
 
 	/*
@@ -2085,12 +2091,21 @@
 			ibt_free_portinfo(port_infop, port_infosz);
 			mutex_exit(&state->id_link_mutex);
 
-			ibd_m_stop(state);
-			if ((ibt_status = ibd_m_start(state)) != IBT_SUCCESS) {
-				DPRINT(10, "link_mod: cannot "
-				    "restart, ret=%d", ibt_status);
+			/*
+			 * Currently a restart is required if our pkey has moved
+			 * in the pkey table. If we get the ibt_recycle_ud() to
+			 * work as documented (expected), we may be able to
+			 * avoid a complete restart.  Note that we've already
+			 * marked both the start and stop 'in-progress' flags,
+			 * so it is ok to go ahead and do this restart.
+			 */
+			ibd_undo_start(state, LINK_STATE_DOWN);
+			if ((ret = ibd_start(state)) != 0) {
+				DPRINT(10, "ibd_restart: cannot restart, "
+				    "ret=%d", ret);
 			}
-			return;
+
+			goto link_mod_return;
 		} else {
 			new_link_state = LINK_STATE_DOWN;
 		}
@@ -2106,7 +2121,7 @@
 	 */
 	if (state->id_link_state == new_link_state) {
 		mutex_exit(&state->id_link_mutex);
-		return;
+		goto link_mod_return;
 	}
 
 	/*
@@ -2116,7 +2131,7 @@
 	if (!ibd_async_safe(state)) {
 		state->id_link_state = new_link_state;
 		mutex_exit(&state->id_link_mutex);
-		return;
+		goto link_mod_return;
 	}
 
 	mutex_exit(&state->id_link_mutex);
@@ -2144,6 +2159,9 @@
 	req = kmem_cache_alloc(state->id_req_kmc, KM_SLEEP);
 	req->rq_ptr = (void *)opcode;
 	ibd_queue_work_slot(state, req, IBD_ASYNC_LINK);
+
+link_mod_return:
+	ibd_clr_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS);
 }
 
 /*
@@ -2619,6 +2637,9 @@
 	state->id_req_kmc = kmem_cache_create(buf, sizeof (ibd_req_t),
 	    0, NULL, NULL, NULL, NULL, NULL, 0);
 
+	mutex_init(&state->id_macst_lock, NULL, MUTEX_DRIVER, NULL);
+	cv_init(&state->id_macst_cv, NULL, CV_DEFAULT, NULL);
+
 	return (DDI_SUCCESS);
 }
 
@@ -2628,6 +2649,9 @@
 static void
 ibd_state_fini(ibd_state_t *state)
 {
+	cv_destroy(&state->id_macst_cv);
+	mutex_destroy(&state->id_macst_lock);
+
 	kmem_cache_destroy(state->id_req_kmc);
 
 	mutex_destroy(&state->id_rxpost_lock);
@@ -4050,7 +4074,7 @@
 		mutex_exit(&state->id_link_mutex);
 		DPRINT(10, "ibd_get_port_details: ibt_query_hca_ports() "
 		    "failed, ret=%d", ret);
-		return (DDI_FAILURE);
+		return (ENETDOWN);
 	}
 
 	/*
@@ -4061,7 +4085,7 @@
 		mutex_exit(&state->id_link_mutex);
 		ibt_free_portinfo(port_infop, port_infosz);
 		DPRINT(10, "ibd_get_port_details: port is not active");
-		return (DDI_FAILURE);
+		return (ENETDOWN);
 	}
 
 	/*
@@ -4073,7 +4097,7 @@
 		ibt_free_portinfo(port_infop, port_infosz);
 		DPRINT(10, "ibd_get_port_details: ibt_pkey2index "
 		    "failed, ret=%d", ret);
-		return (DDI_FAILURE);
+		return (ENONET);
 	}
 
 	state->id_mtu = (128 << port_infop->p_mtu);
@@ -4088,7 +4112,7 @@
 	 */
 	state->id_link_speed = ibd_get_portspeed(state);
 
-	return (DDI_SUCCESS);
+	return (0);
 }
 
 static int
@@ -4263,7 +4287,7 @@
 }
 
 static int
-ibd_undo_m_start(ibd_state_t *state)
+ibd_undo_start(ibd_state_t *state, link_state_t cur_link_state)
 {
 	uint32_t progress = state->id_mac_state;
 	uint_t attempts;
@@ -4273,17 +4297,23 @@
 	uint8_t jstate;
 
 	/*
-	 * Before we try to stop/undo whatever we did in ibd_m_start(),
-	 * we need to mark the link state as unknown to prevent nw
-	 * layer from using this instance for any new transfers.
-	 */
-	if (progress & IBD_DRV_PORT_DETAILS_OBTAINED) {
+	 * Before we try to stop/undo whatever we did in ibd_start(),
+	 * we need to mark the link state appropriately to prevent the
+	 * ip layer from using this instance for any new transfers. Note
+	 * that if the original state of the link was "up" when we're
+	 * here, we'll set the final link state to "unknown", to behave
+	 * in the same fashion as other ethernet drivers.
+	 */
+	mutex_enter(&state->id_link_mutex);
+	if (cur_link_state == LINK_STATE_DOWN) {
+		state->id_link_state = cur_link_state;
+	} else {
 		state->id_link_state = LINK_STATE_UNKNOWN;
-		mac_link_update(state->id_mh, state->id_link_state);
-
-		state->id_mac_state &= (~IBD_DRV_PORT_DETAILS_OBTAINED);
-	}
-
+	}
+	mutex_exit(&state->id_link_mutex);
+	mac_link_update(state->id_mh, state->id_link_state);
+
+	state->id_mac_state &= (~IBD_DRV_PORT_DETAILS_OBTAINED);
 	if (progress & IBD_DRV_STARTED) {
 		state->id_mac_state &= (~IBD_DRV_STARTED);
 	}
@@ -4309,7 +4339,7 @@
 				 * we turned off the notification and
 				 * return failure.
 				 */
-				DPRINT(2, "ibd_undo_m_start: "
+				DPRINT(2, "ibd_undo_start: "
 				    "reclaiming failed");
 				ibd_poll_compq(state, state->id_rcq_hdl);
 				ibt_set_cq_handler(state->id_rcq_hdl,
@@ -4342,7 +4372,7 @@
 		 */
 		if ((ret = ibt_flush_channel(state->id_chnl_hdl)) !=
 		    IBT_SUCCESS) {
-			DPRINT(10, "undo_m_start: flush_channel "
+			DPRINT(10, "ibd_undo_start: flush_channel "
 			    "failed, ret=%d", ret);
 		}
 
@@ -4394,7 +4424,7 @@
 		 * has also been suppressed at this point. Thus, no locks
 		 * are required while traversing the mc full list.
 		 */
-		DPRINT(2, "ibd_undo_m_start: clear full cache entries");
+		DPRINT(2, "ibd_undo_start: clear full cache entries");
 		mce = list_head(&state->id_mc_full);
 		while (mce != NULL) {
 			mgid = mce->mc_info.mc_adds_vect.av_dgid;
@@ -4418,7 +4448,7 @@
 	if (progress & IBD_DRV_UD_CHANNEL_SETUP) {
 		if ((ret = ibt_free_channel(state->id_chnl_hdl)) !=
 		    IBT_SUCCESS) {
-			DPRINT(10, "undo_m_start: free_channel "
+			DPRINT(10, "ibd_undo_start: free_channel "
 			    "failed, ret=%d", ret);
 		}
 
@@ -4431,7 +4461,7 @@
 			    sizeof (ibt_wc_t) * state->id_txwcs_size);
 			if ((ret = ibt_free_cq(state->id_scq_hdl)) !=
 			    IBT_SUCCESS) {
-				DPRINT(10, "undo_m_start: free_cq(scq) "
+				DPRINT(10, "ibd_undo_start: free_cq(scq) "
 				    "failed, ret=%d", ret);
 			}
 		}
@@ -4439,7 +4469,7 @@
 		kmem_free(state->id_rxwcs,
 		    sizeof (ibt_wc_t) * state->id_rxwcs_size);
 		if ((ret = ibt_free_cq(state->id_rcq_hdl)) != IBT_SUCCESS) {
-			DPRINT(10, "undo_m_start: free_cq(rcq) failed, "
+			DPRINT(10, "ibd_undo_start: free_cq(rcq) failed, "
 			    "ret=%d", ret);
 		}
 
@@ -4478,6 +4508,34 @@
 }
 
 /*
+ * These pair of routines are used to set/clear the condition that
+ * the caller is likely to do something to change the id_mac_state.
+ * If there's already someone doing either a start or a stop (possibly
+ * due to the async handler detecting a pkey relocation event, a plumb
+ * or dlpi_open, or an unplumb or dlpi_close coming in), we wait until
+ * that's done.
+ */
+static void
+ibd_set_mac_progress(ibd_state_t *state, uint_t flag)
+{
+	mutex_enter(&state->id_macst_lock);
+	while (state->id_mac_state & IBD_DRV_RESTART_IN_PROGRESS)
+		cv_wait(&state->id_macst_cv, &state->id_macst_lock);
+
+	state->id_mac_state |= flag;
+	mutex_exit(&state->id_macst_lock);
+}
+
+static void
+ibd_clr_mac_progress(ibd_state_t *state, uint_t flag)
+{
+	mutex_enter(&state->id_macst_lock);
+	state->id_mac_state &= (~flag);
+	cv_signal(&state->id_macst_cv);
+	mutex_exit(&state->id_macst_lock);
+}
+
+/*
  * GLDv3 entry point to start hardware.
  */
 /*ARGSUSED*/
@@ -4485,6 +4543,20 @@
 ibd_m_start(void *arg)
 {
 	ibd_state_t *state = arg;
+	int	ret;
+
+	ibd_set_mac_progress(state, IBD_DRV_START_IN_PROGRESS);
+
+	ret = ibd_start(state);
+
+	ibd_clr_mac_progress(state, IBD_DRV_START_IN_PROGRESS);
+
+	return (ret);
+}
+
+static int
+ibd_start(ibd_state_t *state)
+{
 	kthread_t *kht;
 	int err;
 	ibt_status_t ret;
@@ -4494,11 +4566,11 @@
 
 	/*
 	 * Get port details; if we fail here, very likely the port
-	 * state is inactive or the pkey can't be verified
-	 */
-	if (ibd_get_port_details(state) != DDI_SUCCESS) {
-		DPRINT(10, "ibd_m_start: ibd_get_port_details() failed");
-		return (EAGAIN);
+	 * state is inactive or the pkey can't be verified.
+	 */
+	if ((err = ibd_get_port_details(state)) != 0) {
+		DPRINT(10, "ibd_start: ibd_get_port_details() failed");
+		goto start_fail;
 	}
 	state->id_mac_state |= IBD_DRV_PORT_DETAILS_OBTAINED;
 
@@ -4506,9 +4578,9 @@
 	 * Find the IPoIB broadcast group
 	 */
 	if (ibd_find_bgroup(state) != IBT_SUCCESS) {
-		DPRINT(10, "ibd_m_start: ibd_find_bgroup() failed");
-		err = ENOENT;
-		goto m_start_fail;
+		DPRINT(10, "ibd_start: ibd_find_bgroup() failed");
+		err = ENOTACTIVE;
+		goto start_fail;
 	}
 	state->id_mac_state |= IBD_DRV_BCAST_GROUP_FOUND;
 
@@ -4517,9 +4589,9 @@
 	 * it is most likely due to a lack of resources
 	 */
 	if (ibd_acache_init(state) != DDI_SUCCESS) {
-		DPRINT(10, "ibd_m_start: ibd_acache_init() failed");
+		DPRINT(10, "ibd_start: ibd_acache_init() failed");
 		err = ENOMEM;
-		goto m_start_fail;
+		goto start_fail;
 	}
 	state->id_mac_state |= IBD_DRV_ACACHE_INITIALIZED;
 
@@ -4527,9 +4599,9 @@
 	 * Allocate send and receive completion queues
 	 */
 	if (ibd_alloc_cqs(state) != DDI_SUCCESS) {
-		DPRINT(10, "ibd_m_start: ibd_alloc_cqs() failed");
+		DPRINT(10, "ibd_start: ibd_alloc_cqs() failed");
 		err = ENOMEM;
-		goto m_start_fail;
+		goto start_fail;
 	}
 	state->id_mac_state |= IBD_DRV_CQS_ALLOCD;
 
@@ -4538,8 +4610,8 @@
 	 */
 	if (ibd_setup_ud_channel(state) != DDI_SUCCESS) {
 		err = ENOMEM;
-		DPRINT(10, "ibd_m_start: ibd_setup_ud_channel() failed");
-		goto m_start_fail;
+		DPRINT(10, "ibd_start: ibd_setup_ud_channel() failed");
+		goto start_fail;
 	}
 	state->id_mac_state |= IBD_DRV_UD_CHANNEL_SETUP;
 
@@ -4547,9 +4619,9 @@
 	 * Allocate and initialize the tx buffer list
 	 */
 	if (ibd_init_txlist(state) != DDI_SUCCESS) {
-		DPRINT(10, "ibd_m_start: ibd_init_txlist() failed");
+		DPRINT(10, "ibd_start: ibd_init_txlist() failed");
 		err = ENOMEM;
-		goto m_start_fail;
+		goto start_fail;
 	}
 	state->id_mac_state |= IBD_DRV_TXLIST_ALLOCD;
 
@@ -4560,10 +4632,10 @@
 		ibt_set_cq_handler(state->id_scq_hdl, ibd_scq_handler, state);
 		if ((ret = ibt_enable_cq_notify(state->id_scq_hdl,
 		    IBT_NEXT_COMPLETION)) != IBT_SUCCESS) {
-			DPRINT(10, "ibd_m_start: ibt_enable_cq_notify(scq) "
+			DPRINT(10, "ibd_start: ibt_enable_cq_notify(scq) "
 			    "failed, ret=%d", ret);
 			err = EINVAL;
-			goto m_start_fail;
+			goto start_fail;
 		}
 		state->id_mac_state |= IBD_DRV_SCQ_NOTIFY_ENABLED;
 	}
@@ -4572,9 +4644,9 @@
 	 * Allocate and initialize the rx buffer list
 	 */
 	if (ibd_init_rxlist(state) != DDI_SUCCESS) {
-		DPRINT(10, "ibd_m_start: ibd_init_rxlist() failed");
+		DPRINT(10, "ibd_start: ibd_init_rxlist() failed");
 		err = ENOMEM;
-		goto m_start_fail;
+		goto start_fail;
 	}
 	state->id_mac_state |= IBD_DRV_RXLIST_ALLOCD;
 
@@ -4582,9 +4654,9 @@
 	 * Join IPoIB broadcast group
 	 */
 	if (ibd_join_group(state, state->id_mgid, IB_MC_JSTATE_FULL) == NULL) {
-		DPRINT(10, "ibd_m_start: ibd_join_group() failed");
-		err = EINVAL;
-		goto m_start_fail;
+		DPRINT(10, "ibd_start: ibd_join_group() failed");
+		err = ENOTACTIVE;
+		goto start_fail;
 	}
 	state->id_mac_state |= IBD_DRV_BCAST_GROUP_JOINED;
 
@@ -4617,10 +4689,10 @@
 	ibt_set_cq_handler(state->id_rcq_hdl, ibd_rcq_handler, state);
 	if ((ret = ibt_enable_cq_notify(state->id_rcq_hdl,
 	    IBT_NEXT_COMPLETION)) != IBT_SUCCESS) {
-		DPRINT(10, "ibd_m_start: ibt_enable_cq_notify(rcq) "
+		DPRINT(10, "ibd_start: ibt_enable_cq_notify(rcq) "
 		    "failed, ret=%d", ret);
 		err = EINVAL;
-		goto m_start_fail;
+		goto start_fail;
 	}
 	state->id_mac_state |= IBD_DRV_RCQ_NOTIFY_ENABLED;
 
@@ -4656,14 +4728,14 @@
 
 	return (DDI_SUCCESS);
 
-m_start_fail:
-	/*
-	 * If we ran into a problem during ibd_m_start() and ran into
+start_fail:
+	/*
+	 * If we ran into a problem during ibd_start() and ran into
 	 * some other problem during undoing our partial work, we can't
 	 * do anything about it.  Ignore any errors we might get from
-	 * ibd_undo_m_start() and just return the original error we got.
-	 */
-	(void) ibd_undo_m_start(state);
+	 * ibd_undo_start() and just return the original error we got.
+	 */
+	(void) ibd_undo_start(state, LINK_STATE_DOWN);
 	return (err);
 }
 
@@ -4674,15 +4746,13 @@
 static void
 ibd_m_stop(void *arg)
 {
-	ibd_state_t *state = arg;
-
-	/*
-	 * Since ibd_m_stop() doesn't expect any return, we cannot
-	 * fail even if we run into some problem with ibd_undo_m_start().
-	 * The best we can do is to leave it in a good state, so
-	 * perhaps a future unplumb will succeed.
-	 */
-	(void) ibd_undo_m_start(state);
+	ibd_state_t *state = (ibd_state_t *)arg;
+
+	ibd_set_mac_progress(state, IBD_DRV_STOP_IN_PROGRESS);
+
+	(void) ibd_undo_start(state, state->id_link_state);
+
+	ibd_clr_mac_progress(state, IBD_DRV_STOP_IN_PROGRESS);
 }
 
 /*
--- a/usr/src/uts/common/io/igb/igb_82575.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_82575.c	Mon Aug 17 09:14:35 2009 -0400
@@ -26,13 +26,14 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.94 v2008-10-7 */
+/* IntelVersion: 1.123 v2-9-8_2009-6-12 */
 
 /*
  * 82575EB Gigabit Network Connection
  * 82575EB Gigabit Backplane Connection
  * 82575GB Gigabit Network Connection
  * 82576 Gigabit Network Connection
+ * 82576 Quad Port Gigabit Mezzanine Adapter
  */
 
 #include "igb_api.h"
@@ -71,11 +72,8 @@
 static s32 e1000_reset_init_script_82575(struct e1000_hw *hw);
 static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw);
 static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw);
-static void e1000_init_rx_addrs_82575(struct e1000_hw *hw, u16 rar_count);
-static void e1000_update_mc_addr_list_82575(struct e1000_hw *hw,
-    u8 *mc_addr_list, u32 mc_addr_count,
-    u32 rar_used_count, u32 rar_count);
 void e1000_shutdown_fiber_serdes_link_82575(struct e1000_hw *hw);
+static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw);
 
 /*
  * e1000_init_phy_params_82575 - Init PHY func ptrs.
@@ -279,13 +277,15 @@
 	/* read mac address */
 	mac->ops.read_mac_addr = e1000_read_mac_addr_82575;
 	/* multicast address update */
-	mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_82575;
+	mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic;
 	/* writing VFTA */
 	mac->ops.write_vfta = e1000_write_vfta_generic;
 	/* clearing VFTA */
 	mac->ops.clear_vfta = e1000_clear_vfta_generic;
 	/* setting MTA */
 	mac->ops.mta_set = e1000_mta_set_generic;
+	/* ID LED init */
+	mac->ops.id_led_init = e1000_id_led_init_generic;
 	/* blink LED */
 	mac->ops.blink_led = e1000_blink_led_generic;
 	/* setup LED */
@@ -328,11 +328,12 @@
 static s32
 e1000_acquire_phy_82575(struct e1000_hw *hw)
 {
-	u16 mask;
+	u16 mask = E1000_SWFW_PHY0_SM;
 
 	DEBUGFUNC("e1000_acquire_phy_82575");
 
-	mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM;
+	if (hw->bus.func == E1000_FUNC_1)
+		mask = E1000_SWFW_PHY1_SM;
 
 	return (e1000_acquire_swfw_sync_82575(hw, mask));
 }
@@ -346,11 +347,13 @@
 static void
 e1000_release_phy_82575(struct e1000_hw *hw)
 {
-	u16 mask;
+	u16 mask = E1000_SWFW_PHY0_SM;
 
 	DEBUGFUNC("e1000_release_phy_82575");
 
-	mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM;
+	if (hw->bus.func == E1000_FUNC_1)
+		mask = E1000_SWFW_PHY1_SM;
+
 	e1000_release_swfw_sync_82575(hw, mask);
 }
 
@@ -801,7 +804,7 @@
 
 	DEBUGFUNC("e1000_get_cfg_done_82575");
 
-	if (hw->bus.func == 1)
+	if (hw->bus.func == E1000_FUNC_1)
 		mask = E1000_NVM_CFG_DONE_PORT_1;
 
 	while (timeout) {
@@ -868,11 +871,18 @@
 
 	/* SGMII link check is done through the PCS register. */
 	if ((hw->phy.media_type != e1000_media_type_copper) ||
-	    (e1000_sgmii_active_82575(hw)))
+	    (e1000_sgmii_active_82575(hw))) {
 		ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, &speed,
 		    &duplex);
-	else
+		/*
+		 * Use this flag to determine if link needs to be checked or
+		 * not.  If we have link clear the flag so that we do not
+		 * continue to check for link.
+		 */
+		hw->mac.get_link_status = !hw->mac.serdes_has_link;
+	} else {
 		ret_val = e1000_check_for_copper_link_generic(hw);
+	}
 
 	return (ret_val);
 }
@@ -936,103 +946,6 @@
 }
 
 /*
- * e1000_init_rx_addrs_82575 - Initialize receive address's
- * @hw: pointer to the HW structure
- * @rar_count: receive address registers
- *
- * Setups the receive address registers by setting the base receive address
- * register to the devices MAC address and clearing all the other receive
- * address registers to 0.
- */
-static void
-e1000_init_rx_addrs_82575(struct e1000_hw *hw, u16 rar_count)
-{
-	u32 i;
-	u8 addr[6] = {0, 0, 0, 0, 0, 0};
-	/*
-	 * This function is essentially the same as that of
-	 * e1000_init_rx_addrs_generic. However it also takes care
-	 * of the special case where the register offset of the
-	 * second set of RARs begins elsewhere. This is implicitly taken care by
-	 * function e1000_rar_set_generic.
-	 */
-
-	DEBUGFUNC("e1000_init_rx_addrs_82575");
-
-	/* Setup the receive address */
-	DEBUGOUT("Programming MAC Address into RAR[0]\n");
-	hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
-
-	/* Zero out the other (rar_entry_count - 1) receive addresses */
-	DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count - 1);
-	for (i = 1; i < rar_count; i++) {
-		hw->mac.ops.rar_set(hw, addr, i);
-	}
-}
-
-/*
- * e1000_update_mc_addr_list_82575 - Update Multicast addresses
- * @hw: pointer to the HW structure
- * @mc_addr_list: array of multicast addresses to program
- * @mc_addr_count: number of multicast addresses to program
- * @rar_used_count: the first RAR register free to program
- * @rar_count: total number of supported Receive Address Registers
- *
- * Updates the Receive Address Registers and Multicast Table Array.
- * The caller must have a packed mc_addr_list of multicast addresses.
- * The parameter rar_count will usually be hw->mac.rar_entry_count
- * unless there are workarounds that change this.
- */
-static void
-e1000_update_mc_addr_list_82575(struct e1000_hw *hw,
-    u8 *mc_addr_list, u32 mc_addr_count,
-    u32 rar_used_count, u32 rar_count)
-{
-	u32 hash_value;
-	u32 i;
-	u8 addr[6] = {0, 0, 0, 0, 0, 0};
-	/*
-	 * This function is essentially the same as that of
-	 * e1000_update_mc_addr_list_generic. However it also takes care
-	 * of the special case where the register offset of the
-	 * second set of RARs begins elsewhere. This is implicitly taken care by
-	 * function e1000_rar_set_generic.
-	 */
-
-	DEBUGFUNC("e1000_update_mc_addr_list_82575");
-
-	/*
-	 * Load the first set of multicast addresses into the exact
-	 * filters (RAR).  If there are not enough to fill the RAR
-	 * array, clear the filters.
-	 */
-	for (i = rar_used_count; i < rar_count; i++) {
-		if (mc_addr_count) {
-			e1000_rar_set_generic(hw, mc_addr_list, i);
-			mc_addr_count--;
-			mc_addr_list += ETH_ADDR_LEN;
-		} else {
-			e1000_rar_set_generic(hw, addr, i);
-		}
-	}
-
-	/* Clear the old settings from the MTA */
-	DEBUGOUT("Clearing MTA\n");
-	for (i = 0; i < hw->mac.mta_reg_count; i++) {
-		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
-		E1000_WRITE_FLUSH(hw);
-	}
-
-	/* Load any remaining multicast addresses into the hash table. */
-	for (; mc_addr_count > 0; mc_addr_count--) {
-		hash_value = e1000_hash_mc_addr(hw, mc_addr_list);
-		DEBUGOUT1("Hash value = 0x%03X\n", hash_value);
-		hw->mac.ops.mta_set(hw, hash_value);
-		mc_addr_list += ETH_ADDR_LEN;
-	}
-}
-
-/*
  * e1000_shutdown_fiber_serdes_link_82575 - Remove link during power down
  * @hw: pointer to the HW structure
  *
@@ -1045,13 +958,13 @@
 	u32 reg;
 	u16 eeprom_data = 0;
 
-	if (hw->mac.type != e1000_82576 ||
-	    (hw->phy.media_type != e1000_media_type_fiber &&
-	    hw->phy.media_type != e1000_media_type_internal_serdes))
+	if (hw->phy.media_type != e1000_media_type_internal_serdes)
 		return;
 
-	if (hw->bus.func == 0)
+	if (hw->bus.func == E1000_FUNC_0)
 		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
+	else if (hw->bus.func == E1000_FUNC_1)
+		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
 
 	/*
 	 * If APM is not enabled in the EEPROM and management interface is
@@ -1076,6 +989,48 @@
 }
 
 /*
+ * e1000_vmdq_set_loopback_pf - enable or disable vmdq loopback
+ * @hw: pointer to the HW structure
+ * @enable: state to enter, either enabled or disabled
+ *
+ * enables/disables L2 switch loopback functionality
+ */
+void
+e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable)
+{
+	u32 reg;
+
+	reg = E1000_READ_REG(hw, E1000_DTXSWC);
+	if (enable)
+		reg |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+	else
+		reg &= ~(E1000_DTXSWC_VMDQ_LOOPBACK_EN);
+
+	E1000_WRITE_REG(hw, E1000_DTXSWC, reg);
+}
+
+/*
+ * e1000_vmdq_set_replication_pf - enable or disable vmdq replication
+ * @hw: pointer to the HW structure
+ * @enable: state to enter, either enabled or disabled
+ *
+ * enables/disables replication of packets across multiple pools
+ */
+void
+e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable)
+{
+	u32 reg;
+
+	reg = E1000_READ_REG(hw, E1000_VT_CTL);
+	if (enable)
+		reg |= E1000_VT_CTL_VM_REPL_EN;
+	else
+		reg &= ~(E1000_VT_CTL_VM_REPL_EN);
+
+	E1000_WRITE_REG(hw, E1000_VT_CTL, reg);
+}
+
+/*
  * e1000_reset_hw_82575 - Reset hardware
  * @hw: pointer to the HW structure
  *
@@ -1098,6 +1053,12 @@
 		DEBUGOUT("PCI-E Master disable polling has failed.\n");
 	}
 
+	/* set the completion timeout for interface */
+	ret_val = e1000_set_pcie_completion_timeout(hw);
+	if (ret_val) {
+		DEBUGOUT("PCI-E Set completion timeout has failed.\n");
+	}
+
 	DEBUGOUT("Masking off all interrupts\n");
 	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
 
@@ -1130,7 +1091,8 @@
 	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
 	(void) E1000_READ_REG(hw, E1000_ICR);
 
-	(void) e1000_check_alt_mac_addr_generic(hw);
+	/* Install any alternate MAC address into RAR0 */
+	ret_val = e1000_check_alt_mac_addr_generic(hw);
 
 	return (ret_val);
 }
@@ -1151,7 +1113,7 @@
 	DEBUGFUNC("e1000_init_hw_82575");
 
 	/* Initialize identification LED */
-	ret_val = e1000_id_led_init_generic(hw);
+	ret_val = mac->ops.id_led_init(hw);
 	if (ret_val) {
 		DEBUGOUT("Error initializing identification LED\n");
 		/* This is not fatal and we should not stop init due to this */
@@ -1162,7 +1124,7 @@
 	mac->ops.clear_vfta(hw);
 
 	/* Setup the receive address */
-	e1000_init_rx_addrs_82575(hw, rar_count);
+	e1000_init_rx_addrs_generic(hw, rar_count);
 	/* Zero out the Multicast HASH table */
 	DEBUGOUT("Zeroing the MTA\n");
 	for (i = 0; i < mac->mta_reg_count; i++)
@@ -1193,7 +1155,7 @@
 static s32
 e1000_setup_copper_link_82575(struct e1000_hw *hw)
 {
-	u32 ctrl, led_ctrl;
+	u32 ctrl;
 	s32 ret_val;
 	bool link;
 
@@ -1210,11 +1172,6 @@
 		break;
 	case e1000_phy_igp_3:
 		ret_val = e1000_copper_link_setup_igp(hw);
-		/* Setup activity LED */
-		led_ctrl = E1000_READ_REG(hw, E1000_LEDCTL);
-		led_ctrl &= IGP_ACTIVITY_LED_MASK;
-		led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
-		E1000_WRITE_REG(hw, E1000_LEDCTL, led_ctrl);
 		break;
 	default:
 		ret_val = -E1000_ERR_PHY;
@@ -1294,13 +1251,13 @@
 	 */
 	E1000_WRITE_REG(hw, E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
 
-	/* Force link up, set 1gb, set both sw defined pins */
+	/* Force link up, set 1gb */
 	reg = E1000_READ_REG(hw, E1000_CTRL);
-	reg |= E1000_CTRL_SLU |
-	    E1000_CTRL_SPD_1000 |
-	    E1000_CTRL_FRCSPD |
-	    E1000_CTRL_SWDPIN0 |
-	    E1000_CTRL_SWDPIN1;
+	reg |= E1000_CTRL_SLU | E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD;
+	if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576) {
+		/* set both sw defined pins */
+		reg |= E1000_CTRL_SWDPIN0 | E1000_CTRL_SWDPIN1;
+	}
 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
 
 	/* Power on phy for 82576 fiber adapters */
@@ -1376,7 +1333,6 @@
 
 	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) {
 		switch (hw->phy.media_type) {
-		case e1000_media_type_fiber:
 		case e1000_media_type_internal_serdes:
 			*data = ID_LED_DEFAULT_82575_SERDES;
 			break;
@@ -1469,12 +1425,6 @@
 e1000_sgmii_active_82575(struct e1000_hw *hw)
 {
 	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
-
-	DEBUGFUNC("e1000_sgmii_active_82575");
-
-	if (hw->mac.type != e1000_82575 && hw->mac.type != e1000_82576)
-		return (false);
-
 	return (dev_spec->sgmii_active);
 }
 
@@ -1540,9 +1490,19 @@
 	s32 ret_val = E1000_SUCCESS;
 
 	DEBUGFUNC("e1000_read_mac_addr_82575");
-	if (e1000_check_alt_mac_addr_generic(hw))
-		ret_val = e1000_read_mac_addr_generic(hw);
 
+	/*
+	 * If there's an alternate MAC address place it in RAR0
+	 * so that it will override the Si installed default perm
+	 * address.
+	 */
+	ret_val = e1000_check_alt_mac_addr_generic(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = e1000_read_mac_addr_generic(hw);
+
+out:
 	return (ret_val);
 }
 
@@ -1708,3 +1668,55 @@
 	(void) E1000_READ_REG(hw, E1000_RNBC);
 	(void) E1000_READ_REG(hw, E1000_MPC);
 }
+
+/*
+ * e1000_set_pcie_completion_timeout - set pci-e completion timeout
+ * @hw: pointer to the HW structure
+ *
+ * The defaults for 82575 and 82576 should be in the range of 50us to 50ms,
+ * however the hardware default for these parts is 500us to 1ms which is less
+ * than the 10ms recommended by the pci-e spec.  To address this we need to
+ * increase the value to either 10ms to 200ms for capability version 1 config,
+ * or 16ms to 55ms for version 2.
+ */
+static s32
+e1000_set_pcie_completion_timeout(struct e1000_hw *hw)
+{
+	u32 gcr = E1000_READ_REG(hw, E1000_GCR);
+	s32 ret_val = E1000_SUCCESS;
+	u16 pcie_devctl2;
+
+	/* only take action if timeout value is defaulted to 0 */
+	if (gcr & E1000_GCR_CMPL_TMOUT_MASK)
+		goto out;
+
+	/*
+	 * if capababilities version is type 1 we can write the
+	 * timeout of 10ms to 200ms through the GCR register
+	 */
+	if (!(gcr & E1000_GCR_CAP_VER2)) {
+		gcr |= E1000_GCR_CMPL_TMOUT_10ms;
+		goto out;
+	}
+
+	/*
+	 * for version 2 capabilities we need to write the config space
+	 * directly in order to set the completion timeout value for
+	 * 16ms to 55ms
+	 */
+	ret_val = e1000_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
+	    &pcie_devctl2);
+	if (ret_val)
+		goto out;
+
+	pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms;
+
+	ret_val = e1000_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
+	    &pcie_devctl2);
+out:
+	/* disable completion timeout resend */
+	gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND;
+
+	E1000_WRITE_REG(hw, E1000_GCR, gcr);
+	return (ret_val);
+}
--- a/usr/src/uts/common/io/igb/igb_82575.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_82575.h	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.57 v2008-10-7 */
+/* IntelVersion: 1.77 v2-9-8_2009-6-12 */
 
 #ifndef _IGB_82575_H
 #define	_IGB_82575_H
@@ -129,6 +129,7 @@
 #define	E1000_SRRCTL_DESCTYPE_HDR_REPLICATION		0x06000000
 #define	E1000_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT	0x08000000
 #define	E1000_SRRCTL_DESCTYPE_MASK			0x0E000000
+#define	E1000_SRRCTL_DROP_EN				0x80000000
 
 #define	E1000_SRRCTL_BSIZEPKT_MASK	0x0000007F
 #define	E1000_SRRCTL_BSIZEHDR_MASK	0x00003F00
@@ -138,6 +139,7 @@
 
 #define	E1000_MRQC_ENABLE_RSS_4Q		0x00000002
 #define	E1000_MRQC_ENABLE_VMDQ			0x00000003
+#define	E1000_MRQC_ENABLE_VMDQ_RSS_2Q		0x80000000
 #define	E1000_MRQC_RSS_FIELD_IPV4_UDP		0x00400000
 #define	E1000_MRQC_RSS_FIELD_IPV6_UDP		0x00800000
 #define	E1000_MRQC_RSS_FIELD_IPV6_UDP_EX	0x01000000
@@ -214,7 +216,7 @@
 	} wb;		/* writeback */
 };
 
-#define	E1000_RXDADV_RSSTYPE_MASK	0x0000F000
+#define	E1000_RXDADV_RSSTYPE_MASK	0x0000000F
 #define	E1000_RXDADV_RSSTYPE_SHIFT	12
 #define	E1000_RXDADV_HDRBUFLEN_MASK	0x7FE0
 #define	E1000_RXDADV_HDRBUFLEN_SHIFT	5
@@ -316,6 +318,7 @@
 #define	E1000_ADVTXD_TUCMD_IPV6		0x00000000 /* IP Packet Type: 0=IPv6 */
 #define	E1000_ADVTXD_TUCMD_L4T_UDP	0x00000000 /* L4 Packet TYPE of UDP */
 #define	E1000_ADVTXD_TUCMD_L4T_TCP	0x00000800 /* L4 Packet TYPE of TCP */
+#define	E1000_ADVTXD_TUCMD_L4T_SCTP	0x00001000 /* L4 Packet TYPE of SCTP */
 #define	E1000_ADVTXD_TUCMD_IPSEC_TYPE_ESP	0x00002000 /* IPSec Type ESP */
 /* IPSec Encrypt Enable for ESP */
 #define	E1000_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN	0x00004000
@@ -358,11 +361,10 @@
 #define	E1000_DCA_TXCTRL_DESC_DCA_EN (1 << 5)	/* DCA Tx Desc enable */
 #define	E1000_DCA_TXCTRL_TX_WB_RO_EN (1 << 11)	/* Tx Desc writeback RO bit */
 
-/* Additional DCA related definitions, note change in position of CPUID */
 #define	E1000_DCA_TXCTRL_CPUID_MASK_82576 0xFF000000 /* Tx CPUID Mask */
 #define	E1000_DCA_RXCTRL_CPUID_MASK_82576 0xFF000000 /* Rx CPUID Mask */
-#define	E1000_DCA_TXCTRL_CPUID_SHIFT 24 /* Tx CPUID now in the last byte */
-#define	E1000_DCA_RXCTRL_CPUID_SHIFT 24 /* Rx CPUID now in the last byte */
+#define	E1000_DCA_TXCTRL_CPUID_SHIFT_82576 24 /* Tx CPUID */
+#define	E1000_DCA_RXCTRL_CPUID_SHIFT_82576 24 /* Rx CPUID */
 
 /* Additional interrupt register bit definitions */
 #define	E1000_ICR_LSECPNS	0x00000020	/* PN threshold - server */
@@ -387,10 +389,14 @@
 #define	E1000_NVM_APME_82575		0x0400
 #define	MAX_NUM_VFS			8
 
-#define	E1000_DTXSWC_MAC_SPOOF_MASK   0x000000FF /* Per VF MAC spoof control */
-#define	E1000_DTXSWC_VLAN_SPOOF_MASK  0x0000FF00 /* Per VF VLAN spoof control */
+/* Per VF MAC spoof control */
+#define	E1000_DTXSWC_MAC_SPOOF_MASK	0x000000FF
+/* Per VF VLAN spoof control */
+#define	E1000_DTXSWC_VLAN_SPOOF_MASK	0x0000FF00
 #define	E1000_DTXSWC_LLE_MASK		0x00FF0000 /* Per VF Local LB enables */
-#define	E1000_DTXSWC_VMDQ_LOOPBACK_EN	(1 << 31)  /* global VF LB enable */
+#define	E1000_DTXSWC_VLAN_SPOOF_SHIFT	8
+#define	E1000_DTXSWC_LLE_SHIFT		16
+#define	E1000_DTXSWC_VMDQ_LOOPBACK_EN	((u32)1 << 31) /* global VF LB enable */
 
 /* Easy defines for setting default pool, would normally be left a zero */
 #define	E1000_VT_CTL_DEFAULT_POOL_SHIFT	7
@@ -402,84 +408,35 @@
 #define	E1000_VT_CTL_VM_REPL_EN		(1 << 30)
 
 /* Per VM Offload register setup */
+#define	E1000_VMOLR_RLPML_MASK	0x00003FFF /* Long Packet Maximum Length mask */
 #define	E1000_VMOLR_LPE		0x00010000 /* Accept Long packet */
+#define	E1000_VMOLR_RSSE	0x00020000 /* Enable RSS */
 #define	E1000_VMOLR_AUPE	0x01000000 /* Accept untagged packets */
+#define	E1000_VMOLR_ROMPE	0x02000000 /* Accept overflow multicast */
+#define	E1000_VMOLR_ROPE	0x04000000 /* Accept overflow unicast */
 #define	E1000_VMOLR_BAM		0x08000000 /* Accept Broadcast packets */
 #define	E1000_VMOLR_MPME	0x10000000 /* Multicast promiscuous mode */
 #define	E1000_VMOLR_STRVLAN	0x40000000 /* Vlan stripping enable */
-
-#define	E1000_V2PMAILBOX_REQ	0x00000001 /* Request for PF Ready bit */
-#define	E1000_V2PMAILBOX_ACK	0x00000002 /* Ack PF message received */
-#define	E1000_V2PMAILBOX_VFU	0x00000004 /* VF owns the mailbox buffer */
-#define	E1000_V2PMAILBOX_PFU	0x00000008 /* PF owns the mailbox buffer */
-#define	E1000_V2PMAILBOX_PFSTS	0x00000010 /* PF wrote a message in the MB */
-#define	E1000_V2PMAILBOX_PFACK	0x00000020 /* PF ack the previous VF msg */
-#define	E1000_V2PMAILBOX_RSTI	0x00000040 /* PF has reset indication */
-
-#define	E1000_P2VMAILBOX_STS	0x00000001 /* Initiate message send to VF */
-#define	E1000_P2VMAILBOX_ACK	0x00000002 /* Ack message recv'd from VF */
-#define	E1000_P2VMAILBOX_VFU	0x00000004 /* VF owns the mailbox buffer */
-#define	E1000_P2VMAILBOX_PFU	0x00000008 /* PF owns the mailbox buffer */
-#define	E1000_P2VMAILBOX_RVFU	0x00000010 /* Reset VFU - used when VF stuck */
-
-#define	E1000_VFMAILBOX_SIZE	16 /* 16 32 bit words - 64 bytes */
+#define	E1000_VMOLR_STRCRC	0x80000000 /* CRC stripping enable */
 
-/*
- * If it's a E1000_VF_* msg then it originates in the VF and is sent to the
- * PF.  The reverse is true if it is E1000_PF_*.
- * Message ACK's are the value or'd with 0xF0000000
- */
-/* Messages below or'd with this are the ACK */
-#define	E1000_VT_MSGTYPE_ACK	0xF0000000
-/* Messages below or'd with this are the NACK */
-#define	E1000_VT_MSGTYPE_NACK	0xFF000000
-#define	E1000_VT_MSGINFO_SHIFT	16
-/* bits 23:16 are used for exra info for certain messages */
-#define	E1000_VT_MSGINFO_MASK	(0xFF << E1000_VT_MSGINFO_SHIFT)
-
-#define	E1000_VF_MSGTYPE_REQ_MAC	1 /* VF needs to know its MAC */
-#define	E1000_VF_MSGTYPE_VFLR		2 /* VF notifies VFLR to PF */
-#define	E1000_VF_SET_MULTICAST		3 /* VF requests PF to set MC addr */
-#define	E1000_VF_SET_VLAN		4 /* VF requests PF to set VLAN */
+#define	E1000_VLVF_ARRAY_SIZE		32
+#define	E1000_VLVF_VLANID_MASK		0x00000FFF
+#define	E1000_VLVF_POOLSEL_SHIFT	12
+#define	E1000_VLVF_POOLSEL_MASK		(0xFF << E1000_VLVF_POOLSEL_SHIFT)
+#define	E1000_VLVF_LVLAN		0x00100000
+#define	E1000_VLVF_VLANID_ENABLE	0x80000000
 
-/*
- * Add 100h to all PF msgs, leaves room for up to 255 discrete message types
- * from VF to PF - way more than we'll ever need
- */
-/* PF notifies global reset imminent to VF */
-#define	E1000_PF_MSGTYPE_RESET		(1 + 0x100)
-/* PF notifies VF of LSC... VF will see extra msg info for status */
-#define	E1000_PF_MSGTYPE_LSC		(2 + 0x100)
+#define	E1000_VF_INIT_TIMEOUT	200 /* Number of retries to clear RSTI */
 
-#define	E1000_PF_MSG_LSCDOWN		(1 << E1000_VT_MSGINFO_SHIFT)
-#define	E1000_PF_MSG_LSCUP		(2 << E1000_VT_MSGINFO_SHIFT)
-
-#define	ALL_QUEUES	0xFFFF
+#define	E1000_IOVCTL		0x05BBC
+#define	E1000_IOVCTL_REUSE_VFQ	0x00000001
 
-s32 e1000_send_mail_to_pf_vf(struct e1000_hw *hw, u32 *msg,
-    s16 size);
-s32 e1000_receive_mail_from_pf_vf(struct e1000_hw *hw,
-    u32 *msg, s16 size);
-s32 e1000_send_mail_to_vf(struct e1000_hw *hw, u32 *msg,
-    u32 vf_number, s16 size);
-s32 e1000_receive_mail_from_vf(struct e1000_hw *hw, u32 *msg,
-    u32 vf_number, s16 size);
-void e1000_vmdq_loopback_enable_vf(struct e1000_hw *hw);
-void e1000_vmdq_loopback_disable_vf(struct e1000_hw *hw);
-void e1000_vmdq_replication_enable_vf(struct e1000_hw *hw, u32 enables);
-void e1000_vmdq_replication_disable_vf(struct e1000_hw *hw);
-void e1000_vmdq_enable_replication_mode_vf(struct e1000_hw *hw);
-void e1000_vmdq_broadcast_replication_enable_vf(struct e1000_hw *hw,
-    u32 enables);
-void e1000_vmdq_multicast_replication_enable_vf(struct e1000_hw *hw,
-    u32 enables);
-void e1000_vmdq_broadcast_replication_disable_vf(struct e1000_hw *hw,
-    u32 disables);
-void e1000_vmdq_multicast_replication_disable_vf(struct e1000_hw *hw,
-    u32 disables);
-bool e1000_check_for_pf_ack_vf(struct e1000_hw *hw);
-bool e1000_check_for_pf_mail_vf(struct e1000_hw *hw, u32*);
+#define	E1000_RPLOLR_STRVLAN	0x40000000
 
+#define	ALL_QUEUES		0xFFFF
+
+void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable);
+void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable);
 
 #ifdef __cplusplus
 }
--- a/usr/src/uts/common/io/igb/igb_api.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_api.c	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.107 v2008-10-7 */
+/* IntelVersion: 1.122 v2-9-8_2009-6-12 */
 
 #include "igb_api.h"
 
@@ -138,6 +138,8 @@
 	case E1000_DEV_ID_82576_FIBER:
 	case E1000_DEV_ID_82576_SERDES:
 	case E1000_DEV_ID_82576_QUAD_COPPER:
+	case E1000_DEV_ID_82576_NS:
+	case E1000_DEV_ID_82576_SERDES_QUAD:
 		mac->type = e1000_82576;
 		break;
 	default:
@@ -278,26 +280,17 @@
  * @hw: pointer to the HW structure
  * @mc_addr_list: array of multicast addresses to program
  * @mc_addr_count: number of multicast addresses to program
- * @rar_used_count: the first RAR register free to program
- * @rar_count: total number of supported Receive Address Registers
  *
- * Updates the Receive Address Registers and Multicast Table Array.
+ * Updates the Multicast Table Array.
  * The caller must have a packed mc_addr_list of multicast addresses.
- * The parameter rar_count will usually be hw->mac.rar_entry_count
- * unless there are workarounds that change this.  Currently no func pointer
- * exists and all implementations are handled in the generic version of this
- * function.
  */
 void
 e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list,
-    u32 mc_addr_count, u32 rar_used_count, u32 rar_count)
+    u32 mc_addr_count)
 {
 	if (hw->mac.ops.update_mc_addr_list)
 		hw->mac.ops.update_mc_addr_list(hw,
-		    mc_addr_list,
-		    mc_addr_count,
-		    rar_used_count,
-		    rar_count);
+		    mc_addr_list, mc_addr_count);
 }
 
 /*
@@ -480,6 +473,22 @@
 }
 
 /*
+ * e1000_id_led_init - store LED configurations in SW
+ * @hw: pointer to the HW structure
+ *
+ * Initializes the LED config in SW. This is a function pointer entry point
+ * called by drivers.
+ */
+s32
+e1000_id_led_init(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.id_led_init)
+		return (hw->mac.ops.id_led_init(hw));
+
+	return (E1000_SUCCESS);
+}
+
+/*
  * e1000_led_on - Turn on SW controllable LED
  * @hw: pointer to the HW structure
  *
--- a/usr/src/uts/common/io/igb/igb_api.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_api.h	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.50 v2008-10-7 */
+/* IntelVersion: 1.52 v2-9-8_2009-6-12 */
 
 #ifndef _IGB_API_H
 #define	_IGB_API_H
@@ -62,14 +62,14 @@
 void e1000_mta_set(struct e1000_hw *hw, u32 hash_value);
 u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr);
 void e1000_update_mc_addr_list(struct e1000_hw *hw,
-    u8 *mc_addr_list, u32 mc_addr_count,
-    u32 rar_used_count, u32 rar_count);
+    u8 *mc_addr_list, u32 mc_addr_count);
 s32 e1000_setup_led(struct e1000_hw *hw);
 s32 e1000_cleanup_led(struct e1000_hw *hw);
 s32 e1000_check_reset_block(struct e1000_hw *hw);
 s32 e1000_blink_led(struct e1000_hw *hw);
 s32 e1000_led_on(struct e1000_hw *hw);
 s32 e1000_led_off(struct e1000_hw *hw);
+s32 e1000_id_led_init(struct e1000_hw *hw);
 void e1000_reset_adaptive(struct e1000_hw *hw);
 void e1000_update_adaptive(struct e1000_hw *hw);
 s32 e1000_get_cable_length(struct e1000_hw *hw);
--- a/usr/src/uts/common/io/igb/igb_defines.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_defines.h	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.79 v2008-10-7 */
+/* IntelVersion: 1.111 v2-9-8_2009-6-12 */
 
 #ifndef _IGB_DEFINES_H
 #define	_IGB_DEFINES_H
@@ -131,6 +131,8 @@
 #define	E1000_CTRL_EXT_PFRSTD	0x00004000
 #define	E1000_CTRL_EXT_SPD_BYPS	0x00008000 /* Speed Select Bypass */
 #define	E1000_CTRL_EXT_RO_DIS	0x00020000 /* Relaxed Ordering disable */
+/* DMA Dynamic Clock Gating */
+#define	E1000_CTRL_EXT_DMA_DYN_CLK_EN	0x00080000
 #define	E1000_CTRL_EXT_LINK_MODE_MASK	0x00C00000
 #define	E1000_CTRL_EXT_LINK_MODE_GMII	0x00000000
 #define	E1000_CTRL_EXT_LINK_MODE_TBI	0x00C00000
@@ -382,6 +384,7 @@
 #define	E1000_CTRL_SWDPIN0	0x00040000  /* SWDPIN 0 value */
 #define	E1000_CTRL_SWDPIN1	0x00080000  /* SWDPIN 1 value */
 #define	E1000_CTRL_SWDPIN2	0x00100000  /* SWDPIN 2 value */
+#define	E1000_CTRL_ADVD3WUC	0x00100000  /* D3 WUC */
 #define	E1000_CTRL_SWDPIN3	0x00200000  /* SWDPIN 3 value */
 #define	E1000_CTRL_SWDPIO0	0x00400000  /* SWDPIN 0 Input or output */
 #define	E1000_CTRL_SWDPIO1	0x00800000  /* SWDPIN 1 input or output */
@@ -458,6 +461,7 @@
 #define	E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Completion by NVM */
 #define	E1000_STATUS_ASDV	0x00000300 /* Auto speed detect value */
 /* Change in Dock/Undock state. Clear on write '0'. */
+#define	E1000_STATUS_PHYRA	0x00000400 /* PHY Reset Asserted */
 #define	E1000_STATUS_DOCK_CI	0x00000800
 #define	E1000_STATUS_GIO_MASTER_ENABLE	0x00080000 /* Master request status */
 #define	E1000_STATUS_MTXCKOK	0x00000400 /* MTX clock running OK */
@@ -667,14 +671,19 @@
 /* PBA constants */
 #define	E1000_PBA_6K	0x0006	/* 6KB */
 #define	E1000_PBA_8K	0x0008	/* 8KB */
+#define	E1000_PBA_10K	0x000A	/* 10KB */
 #define	E1000_PBA_12K	0x000C	/* 12KB */
+#define	E1000_PBA_14K	0x000E	/* 14KB */
 #define	E1000_PBA_16K	0x0010	/* 16KB */
+#define	E1000_PBA_18K	0x0012
 #define	E1000_PBA_20K	0x0014
 #define	E1000_PBA_22K	0x0016
 #define	E1000_PBA_24K	0x0018
+#define	E1000_PBA_26K	0x001A
 #define	E1000_PBA_30K	0x001E
 #define	E1000_PBA_32K	0x0020
 #define	E1000_PBA_34K	0x0022
+#define	E1000_PBA_35K	0x0023
 #define	E1000_PBA_38K	0x0026
 #define	E1000_PBA_40K	0x0028
 #define	E1000_PBA_48K	0x0030    /* 48KB */
@@ -695,6 +704,9 @@
 #define	E1000_SWSM_WMNG		0x00000004 /* Wake MNG Clock */
 #define	E1000_SWSM_DRV_LOAD	0x00000008 /* Driver Loaded Bit */
 
+/* Secondary driver semaphore bit */
+#define	E1000_SWSM2_LOCK	0x00000002
+
 /* Interrupt Cause Read */
 #define	E1000_ICR_TXDW		0x00000001 /* Transmit desc written back */
 #define	E1000_ICR_TXQE		0x00000002 /* Transmit Queue empty */
@@ -871,6 +883,8 @@
 #define	E1000_EICS_TCP_TIMER	E1000_EICR_TCP_TIMER /* TCP Timer */
 #define	E1000_EICS_OTHER	E1000_EICR_OTHER   /* Interrupt Cause Active */
 
+#define	E1000_EITR_ITR_INT_MASK	0x0000FFFF
+
 /* Transmit Descriptor Control */
 #define	E1000_TXDCTL_PTHRESH	0x0000003F /* TXDCTL Prefetch Threshold */
 #define	E1000_TXDCTL_HTHRESH	0x00003F00 /* TXDCTL Host Threshold */
@@ -901,6 +915,10 @@
  */
 #define	E1000_RAR_ENTRIES	15
 #define	E1000_RAH_AV		0x80000000	/* Receive descriptor valid */
+#define	E1000_RAL_MAC_ADDR_LEN	4
+#define	E1000_RAH_MAC_ADDR_LEN	2
+#define	E1000_RAH_POOL_MASK	0x03FC0000
+#define	E1000_RAH_POOL_1	0x00040000
 
 /* Error Codes */
 #define	E1000_SUCCESS		0
@@ -916,6 +934,7 @@
 #define	E1000_BLK_PHY_RESET	12
 #define	E1000_ERR_SWFW_SYNC	13
 #define	E1000_NOT_IMPLEMENTED	14
+#define	E1000_ERR_MBX		15
 
 /* Loop limit on how long we wait for auto-negotiation to complete */
 #define	FIBER_LINK_UP_LIMIT	50
@@ -965,6 +984,10 @@
 #define	E1000_GCR_TXD_NO_SNOOP		0x00000008
 #define	E1000_GCR_TXDSCW_NO_SNOOP	0x00000010
 #define	E1000_GCR_TXDSCR_NO_SNOOP	0x00000020
+#define	E1000_GCR_CMPL_TMOUT_MASK	0x0000F000
+#define	E1000_GCR_CMPL_TMOUT_10ms	0x00001000
+#define	E1000_GCR_CMPL_TMOUT_RESEND	0x00010000
+#define	E1000_GCR_CAP_VER2		0x00040000
 
 #define	PCIE_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP	| \
 			E1000_GCR_RXDSCW_NO_SNOOP	| \
@@ -1080,6 +1103,8 @@
 #define	PHY_1000T_STATUS	0x0A /* 1000Base-T Status Reg */
 #define	PHY_EXT_STATUS		0x0F /* Extended Status Reg */
 
+#define	PHY_CONTROL_LB		0x4000 /* PHY Loopback bit */
+
 /* NVM Control */
 #define	E1000_EECD_SK		0x00000001 /* NVM Clock */
 #define	E1000_EECD_CS		0x00000002 /* NVM Chip Select */
@@ -1110,6 +1135,7 @@
 #define	E1000_EECD_SHADV	0x00200000 /* Shadow RAM Data Valid */
 #define	E1000_EECD_SEC1VAL	0x00400000 /* Sector One Valid */
 #define	E1000_EECD_SECVAL_SHIFT		22
+#define	E1000_EECD_SEC1VAL_VALID_MASK	(E1000_EECD_AUTO_RD | E1000_EECD_PRES)
 
 #define	E1000_NVM_SWDPIN0	0x0001	/* SWDPIN 0 NVM Value */
 #define	E1000_NVM_LED_LOGIC	0x0020	/* Led Logic Word */
@@ -1213,22 +1239,14 @@
 #define	IGP_LED3_MODE		0x07000000
 
 /* PCI/PCI-X/PCI-EX Config space */
-#define	PCIX_COMMAND_REGISTER		0xE6
-#define	PCIX_STATUS_REGISTER_LO		0xE8
-#define	PCIX_STATUS_REGISTER_HI		0xEA
 #define	PCI_HEADER_TYPE_REGISTER	0x0E
 #define	PCIE_LINK_STATUS		0x12
+#define	PCIE_DEVICE_CONTROL2		0x28
 
-#define	PCIX_COMMAND_MMRBC_MASK		0x000C
-#define	PCIX_COMMAND_MMRBC_SHIFT	0x2
-#define	PCIX_STATUS_HI_MMRBC_MASK	0x0060
-#define	PCIX_STATUS_HI_MMRBC_SHIFT	0x5
-#define	PCIX_STATUS_HI_MMRBC_4K		0x3
-#define	PCIX_STATUS_HI_MMRBC_2K		0x2
-#define	PCIX_STATUS_LO_FUNC_MASK	0x7
 #define	PCI_HEADER_TYPE_MULTIFUNC	0x80
 #define	PCIE_LINK_WIDTH_MASK		0x3F0
 #define	PCIE_LINK_WIDTH_SHIFT		4
+#define	PCIE_DEVICE_CONTROL2_16ms	0x0005
 
 #ifndef ETH_ADDR_LEN
 #define	ETH_ADDR_LEN			6
--- a/usr/src/uts/common/io/igb/igb_hw.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_hw.h	Mon Aug 17 09:14:35 2009 -0400
@@ -45,6 +45,8 @@
 #define	E1000_DEV_ID_82576_FIBER		0x10E6
 #define	E1000_DEV_ID_82576_SERDES		0x10E7
 #define	E1000_DEV_ID_82576_QUAD_COPPER		0x10E8
+#define	E1000_DEV_ID_82576_NS			0x150A
+#define	E1000_DEV_ID_82576_SERDES_QUAD		0x150D
 #define	E1000_DEV_ID_82575EB_COPPER		0x10A7
 #define	E1000_DEV_ID_82575EB_FIBER_SERDES	0x10A9
 #define	E1000_DEV_ID_82575GB_QUAD_COPPER	0x10D6
@@ -57,6 +59,8 @@
 
 #define	E1000_FUNC_0	0
 #define	E1000_FUNC_1	1
+#define	E1000_ALT_MAC_ADDRESS_OFFSET_LAN0	0
+#define	E1000_ALT_MAC_ADDRESS_OFFSET_LAN1	3
 
 enum e1000_mac_type {
 	e1000_undefined = 0,
@@ -166,6 +170,13 @@
 	e1000_smart_speed_off
 };
 
+enum e1000_serdes_link_state {
+	e1000_serdes_link_down = 0,
+	e1000_serdes_link_autoneg_progress,
+	e1000_serdes_link_autoneg_complete,
+	e1000_serdes_link_forced_up
+};
+
 /* Receive Descriptor */
 struct e1000_rx_desc {
 	__le64 buffer_addr; /* Address of the descriptor's data buffer */
@@ -436,6 +447,7 @@
 struct e1000_mac_operations {
 	/* Function pointers for the MAC. */
 	s32  (*init_params)(struct e1000_hw *);
+	s32  (*id_led_init)(struct e1000_hw *);
 	s32  (*blink_led)(struct e1000_hw *);
 	s32  (*check_for_link)(struct e1000_hw *);
 	bool (*check_mng_mode)(struct e1000_hw *hw);
@@ -443,10 +455,11 @@
 	void (*clear_hw_cntrs)(struct e1000_hw *);
 	void (*clear_vfta)(struct e1000_hw *);
 	s32  (*get_bus_info)(struct e1000_hw *);
+	void (*set_lan_id)(struct e1000_hw *);
 	s32  (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *);
 	s32  (*led_on)(struct e1000_hw *);
 	s32  (*led_off)(struct e1000_hw *);
-	void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32, u32, u32);
+	void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32);
 	s32  (*reset_hw)(struct e1000_hw *);
 	s32  (*init_hw)(struct e1000_hw *);
 	void (*shutdown_serdes)(struct e1000_hw *);
@@ -519,6 +532,10 @@
 	u16 ifs_ratio;
 	u16 ifs_step_size;
 	u16 mta_reg_count;
+
+	/* Maximum size of the MTA register table in all supported adapters */
+#define	MAX_MTA_REG 128
+	u32 mta_shadow[MAX_MTA_REG];
 	u16 rar_entry_count;
 
 	u8  forced_speed_duplex;
@@ -530,6 +547,7 @@
 	bool autoneg_failed;
 	bool get_link_status;
 	bool in_ifs_mode;
+	enum e1000_serdes_link_state serdes_link_state;
 	bool serdes_has_link;
 	bool tx_pkt_filtering;
 };
@@ -604,10 +622,12 @@
 
 struct e1000_dev_spec_82575 {
 	bool sgmii_active;
+	bool global_device_reset;
 };
 
 struct e1000_dev_spec_vf {
 	u32	vf_number;
+	u32	v2p_mailbox;
 };
 
 struct e1000_hw {
@@ -641,8 +661,7 @@
 
 /* These functions must be implemented by drivers */
 s32  e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
-void e1000_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value);
-void e1000_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value);
+s32  e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
 
 #ifdef __cplusplus
 }
--- a/usr/src/uts/common/io/igb/igb_mac.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_mac.c	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.92 v2008-10-7 */
+/* IntelVersion: 1.104 v2-9-8_2009-6-12 */
 
 #include "igb_api.h"
 
@@ -34,6 +34,7 @@
 static s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw);
 static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw);
 static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw);
+static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw);
 
 /*
  * e1000_init_mac_ops_generic - Initialize MAC function pointers
@@ -53,6 +54,7 @@
 	mac->ops.reset_hw = e1000_null_ops_generic;
 	mac->ops.setup_physical_interface = e1000_null_ops_generic;
 	mac->ops.get_bus_info = e1000_null_ops_generic;
+	mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pcie;
 	mac->ops.read_mac_addr = e1000_read_mac_addr_generic;
 	mac->ops.config_collision_dist = e1000_config_collision_dist_generic;
 	mac->ops.clear_hw_cntrs = e1000_null_mac_generic;
@@ -133,10 +135,10 @@
  * @hw: pointer to the HW structure
  */
 void
-e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a, u32 b, u32 c)
+e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a)
 {
 	DEBUGFUNC("e1000_null_update_mc");
-	UNREFERENCED_5PARAMETER(hw, h, a, b, c);
+	UNREFERENCED_3PARAMETER(hw, h, a);
 }
 
 /*
@@ -173,67 +175,6 @@
 }
 
 /*
- * e1000_get_bus_info_pci_generic - Get PCI(x) bus information
- * @hw: pointer to the HW structure
- *
- * Determines and stores the system bus information for a particular
- * network interface.  The following bus information is determined and stored:
- * bus speed, bus width, type (PCI/PCIx), and PCI(-x) function.
- */
-s32
-e1000_get_bus_info_pci_generic(struct e1000_hw *hw)
-{
-	struct e1000_bus_info *bus = &hw->bus;
-	u32 status = E1000_READ_REG(hw, E1000_STATUS);
-	s32 ret_val = E1000_SUCCESS;
-	u16 pci_header_type;
-
-	DEBUGFUNC("e1000_get_bus_info_pci_generic");
-
-	/* PCI or PCI-X? */
-	bus->type = (status & E1000_STATUS_PCIX_MODE)
-	    ? e1000_bus_type_pcix
-	    : e1000_bus_type_pci;
-
-	/* Bus speed */
-	if (bus->type == e1000_bus_type_pci) {
-		bus->speed = (status & E1000_STATUS_PCI66)
-		    ? e1000_bus_speed_66
-		    : e1000_bus_speed_33;
-	} else {
-		switch (status & E1000_STATUS_PCIX_SPEED) {
-		case E1000_STATUS_PCIX_SPEED_66:
-			bus->speed = e1000_bus_speed_66;
-			break;
-		case E1000_STATUS_PCIX_SPEED_100:
-			bus->speed = e1000_bus_speed_100;
-			break;
-		case E1000_STATUS_PCIX_SPEED_133:
-			bus->speed = e1000_bus_speed_133;
-			break;
-		default:
-			bus->speed = e1000_bus_speed_reserved;
-			break;
-		}
-	}
-
-	/* Bus width */
-	bus->width = (status & E1000_STATUS_BUS64)
-	    ? e1000_bus_width_64
-	    : e1000_bus_width_32;
-
-	/* Which PCI(-X) function? */
-	e1000_read_pci_cfg(hw, PCI_HEADER_TYPE_REGISTER, &pci_header_type);
-	if (pci_header_type & PCI_HEADER_TYPE_MULTIFUNC)
-		bus->func = (status & E1000_STATUS_FUNC_MASK)
-		    >> E1000_STATUS_FUNC_SHIFT;
-	else
-		bus->func = 0;
-
-	return (ret_val);
-}
-
-/*
  * e1000_get_bus_info_pcie_generic - Get PCIe bus information
  * @hw: pointer to the HW structure
  *
@@ -244,10 +185,10 @@
 s32
 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw)
 {
+	struct e1000_mac_info *mac = &hw->mac;
 	struct e1000_bus_info *bus = &hw->bus;
 	s32 ret_val;
-	u32 status;
-	u16 pcie_link_status, pci_header_type;
+	u16 pcie_link_status;
 
 	DEBUGFUNC("e1000_get_bus_info_pcie_generic");
 
@@ -262,19 +203,48 @@
 		bus->width = (enum e1000_bus_width)((pcie_link_status &
 		    PCIE_LINK_WIDTH_MASK) >> PCIE_LINK_WIDTH_SHIFT);
 
-	e1000_read_pci_cfg(hw, PCI_HEADER_TYPE_REGISTER, &pci_header_type);
-	if (pci_header_type & PCI_HEADER_TYPE_MULTIFUNC) {
-		status = E1000_READ_REG(hw, E1000_STATUS);
-		bus->func = (status & E1000_STATUS_FUNC_MASK)
-		    >> E1000_STATUS_FUNC_SHIFT;
-	} else {
-		bus->func = 0;
-	}
+	mac->ops.set_lan_id(hw);
 
 	return (E1000_SUCCESS);
 }
 
 /*
+ * e1000_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices
+ *
+ * @hw: pointer to the HW structure
+ *
+ * Determines the LAN function id by reading memory-mapped registers
+ * and swaps the port value if requested.
+ */
+static void
+e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw)
+{
+	struct e1000_bus_info *bus = &hw->bus;
+	u32 reg;
+
+	/*
+	 * The status register reports the correct function number
+	 * for the device regardless of function swap state.
+	 */
+	reg = E1000_READ_REG(hw, E1000_STATUS);
+	bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
+}
+
+/*
+ * e1000_set_lan_id_single_port - Set LAN id for a single port device
+ * @hw: pointer to the HW structure
+ *
+ * Sets the LAN function id to zero for a single port device.
+ */
+void
+e1000_set_lan_id_single_port(struct e1000_hw *hw)
+{
+	struct e1000_bus_info *bus = &hw->bus;
+
+	bus->func = 0;
+}
+
+/*
  * e1000_clear_vfta_generic - Clear VLAN filter table
  * @hw: pointer to the HW structure
  *
@@ -325,6 +295,7 @@
 e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count)
 {
 	u32 i;
+	u8 mac_addr[ETH_ADDR_LEN] = {0};
 
 	DEBUGFUNC("e1000_init_rx_addrs_generic");
 
@@ -335,12 +306,8 @@
 
 	/* Zero out the other (rar_entry_count - 1) receive addresses */
 	DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count-1);
-	for (i = 1; i < rar_count; i++) {
-		E1000_WRITE_REG_ARRAY(hw, E1000_RA, (i << 1), 0);
-		E1000_WRITE_FLUSH(hw);
-		E1000_WRITE_REG_ARRAY(hw, E1000_RA, ((i << 1) + 1), 0);
-		E1000_WRITE_FLUSH(hw);
-	}
+	for (i = 1; i < rar_count; i++)
+		hw->mac.ops.rar_set(hw, mac_addr, i);
 }
 
 /*
@@ -350,9 +317,10 @@
  * Checks the nvm for an alternate MAC address.  An alternate MAC address
  * can be setup by pre-boot software and must be treated like a permanent
  * address and must override the actual permanent MAC address.  If an
- * alternate MAC address is found it is saved in the hw struct and
- * programmed into RAR0 and the function returns success, otherwise the
- * function returns an error.
+ * alternate MAC address is found it is programmed into RAR0, replacing
+ * the permanent address that was installed into RAR0 by the Si on reset.
+ * This function will return SUCCESS unless it encounters an error while
+ * reading the EEPROM.
  */
 s32
 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw)
@@ -372,13 +340,12 @@
 	}
 
 	if (nvm_alt_mac_addr_offset == 0xFFFF) {
-		ret_val = -(E1000_NOT_IMPLEMENTED);
+		/* There is no Alternate MAC Address */
 		goto out;
 	}
 
 	if (hw->bus.func == E1000_FUNC_1)
-		nvm_alt_mac_addr_offset += ETH_ADDR_LEN / sizeof (u16);
-
+		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1;
 	for (i = 0; i < ETH_ADDR_LEN; i += 2) {
 		offset = nvm_alt_mac_addr_offset + (i >> 1);
 		ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data);
@@ -393,14 +360,16 @@
 
 	/* if multicast bit is set, the alternate address will not be used */
 	if (alt_mac_addr[0] & 0x01) {
-		ret_val = -(E1000_NOT_IMPLEMENTED);
+		DEBUGOUT("Ignoring Alternate Mac Address with MC bit set\n");
 		goto out;
 	}
 
-	for (i = 0; i < ETH_ADDR_LEN; i++)
-		hw->mac.addr[i] = hw->mac.perm_addr[i] = alt_mac_addr[i];
-
-	hw->mac.ops.rar_set(hw, hw->mac.perm_addr, 0);
+	/*
+	 * We have a valid alternate MAC address, and we want to treat it the
+	 * same as the normal permanent MAC address stored by the HW into the
+	 * RAR. Do this by mapping this address into RAR0.
+	 */
+	hw->mac.ops.rar_set(hw, alt_mac_addr, 0);
 
 out:
 	return (ret_val);
@@ -436,8 +405,15 @@
 	if (rar_low || rar_high)
 		rar_high |= E1000_RAH_AV;
 
+	/*
+	 * Some bridges will combine consecutive 32-bit writes into
+	 * a single burst write, which will malfunction on some parts.
+	 * The flushes avoid this.
+	 */
 	E1000_WRITE_REG(hw, E1000_RAL(index), rar_low);
+	E1000_WRITE_FLUSH(hw);
 	E1000_WRITE_REG(hw, E1000_RAH(index), rar_high);
+	E1000_WRITE_FLUSH(hw);
 }
 
 /*
@@ -482,56 +458,37 @@
  * @hw: pointer to the HW structure
  * @mc_addr_list: array of multicast addresses to program
  * @mc_addr_count: number of multicast addresses to program
- * @rar_used_count: the first RAR register free to program
- * @rar_count: total number of supported Receive Address Registers
  *
- * Updates the Receive Address Registers and Multicast Table Array.
+ * Updates the Multicast Table Array.
  * The caller must have a packed mc_addr_list of multicast addresses.
- * The parameter rar_count will usually be hw->mac.rar_entry_count
- * unless there are workarounds that change this.
  */
 void
 e1000_update_mc_addr_list_generic(struct e1000_hw *hw,
-    u8 *mc_addr_list, u32 mc_addr_count,
-    u32 rar_used_count, u32 rar_count)
+    u8 *mc_addr_list, u32 mc_addr_count)
 {
-	u32 hash_value;
-	u32 i;
+	u32 hash_value, hash_bit, hash_reg;
+	int i;
 
 	DEBUGFUNC("e1000_update_mc_addr_list_generic");
 
-	/*
-	 * Load the first set of multicast addresses into the exact
-	 * filters (RAR).  If there are not enough to fill the RAR
-	 * array, clear the filters.
-	 */
-	for (i = rar_used_count; i < rar_count; i++) {
-		if (mc_addr_count) {
-			hw->mac.ops.rar_set(hw, mc_addr_list, i);
-			mc_addr_count--;
-			mc_addr_list += ETH_ADDR_LEN;
-		} else {
-			E1000_WRITE_REG_ARRAY(hw, E1000_RA, i << 1, 0);
-			E1000_WRITE_FLUSH(hw);
-			E1000_WRITE_REG_ARRAY(hw, E1000_RA, (i << 1) + 1, 0);
-			E1000_WRITE_FLUSH(hw);
-		}
+	/* clear mta_shadow */
+	memset(&hw->mac.mta_shadow, 0, sizeof (hw->mac.mta_shadow));
+
+	/* update mta_shadow from mc_addr_list */
+	for (i = 0; (u32) i < mc_addr_count; i++) {
+		hash_value = e1000_hash_mc_addr_generic(hw, mc_addr_list);
+
+		hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+		hash_bit = hash_value & 0x1F;
+
+		hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit);
+		mc_addr_list += (ETH_ADDR_LEN);
 	}
 
-	/* Clear the old settings from the MTA */
-	DEBUGOUT("Clearing MTA\n");
-	for (i = 0; i < hw->mac.mta_reg_count; i++) {
-		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
-		E1000_WRITE_FLUSH(hw);
-	}
-
-	/* Load any remaining multicast addresses into the hash table. */
-	for (; mc_addr_count > 0; mc_addr_count--) {
-		hash_value = e1000_hash_mc_addr_generic(hw, mc_addr_list);
-		DEBUGOUT1("Hash value = 0x%03X\n", hash_value);
-		hw->mac.ops.mta_set(hw, hash_value);
-		mc_addr_list += ETH_ADDR_LEN;
-	}
+	/* replace the entire MTA table */
+	for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
+		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]);
+	E1000_WRITE_FLUSH(hw);
 }
 
 /*
@@ -609,44 +566,6 @@
 }
 
 /*
- * e1000_pcix_mmrbc_workaround_generic - Fix incorrect MMRBC value
- * @hw: pointer to the HW structure
- *
- * In certain situations, a system BIOS may report that the PCIx maximum
- * memory read byte count (MMRBC) value is higher than than the actual
- * value. We check the PCIx command register with the current PCIx status
- * register.
- */
-void
-e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw)
-{
-	u16 cmd_mmrbc;
-	u16 pcix_cmd;
-	u16 pcix_stat_hi_word;
-	u16 stat_mmrbc;
-
-	DEBUGFUNC("e1000_pcix_mmrbc_workaround_generic");
-
-	/* Workaround for PCI-X issue when BIOS sets MMRBC incorrectly */
-	if (hw->bus.type != e1000_bus_type_pcix)
-		return;
-
-	e1000_read_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd);
-	e1000_read_pci_cfg(hw, PCIX_STATUS_REGISTER_HI, &pcix_stat_hi_word);
-	cmd_mmrbc = (pcix_cmd & PCIX_COMMAND_MMRBC_MASK) >>
-	    PCIX_COMMAND_MMRBC_SHIFT;
-	stat_mmrbc = (pcix_stat_hi_word & PCIX_STATUS_HI_MMRBC_MASK) >>
-	    PCIX_STATUS_HI_MMRBC_SHIFT;
-	if (stat_mmrbc == PCIX_STATUS_HI_MMRBC_4K)
-		stat_mmrbc = PCIX_STATUS_HI_MMRBC_2K;
-	if (cmd_mmrbc > stat_mmrbc) {
-		pcix_cmd &= ~PCIX_COMMAND_MMRBC_MASK;
-		pcix_cmd |= stat_mmrbc << PCIX_COMMAND_MMRBC_SHIFT;
-		e1000_write_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd);
-	}
-}
-
-/*
  * e1000_clear_hw_cntrs_base_generic - Clear base hardware counters
  * @hw: pointer to the HW structure
  *
--- a/usr/src/uts/common/io/igb/igb_mac.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_mac.h	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.29 v2008-10-7 */
+/* IntelVersion: 1.32 v2-9-8_2009-6-12 */
 
 #ifndef	_IGB_MAC_H
 #define	_IGB_MAC_H
@@ -44,7 +44,7 @@
 s32  e1000_null_ops_generic(struct e1000_hw *hw);
 s32  e1000_null_link_info(struct e1000_hw *hw, u16 *s, u16 *d);
 bool e1000_null_mng_mode(struct e1000_hw *hw);
-void e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a, u32 b, u32 c);
+void e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a);
 void e1000_null_write_vfta(struct e1000_hw *hw, u32 a, u32 b);
 void e1000_null_mta_set(struct e1000_hw *hw, u32 a);
 void e1000_null_rar_set(struct e1000_hw *hw, u8 *h, u32 a);
@@ -57,8 +57,8 @@
 s32  e1000_disable_pcie_master_generic(struct e1000_hw *hw);
 s32  e1000_force_mac_fc_generic(struct e1000_hw *hw);
 s32  e1000_get_auto_rd_done_generic(struct e1000_hw *hw);
-s32  e1000_get_bus_info_pci_generic(struct e1000_hw *hw);
 s32  e1000_get_bus_info_pcie_generic(struct e1000_hw *hw);
+void e1000_set_lan_id_single_port(struct e1000_hw *hw);
 s32  e1000_get_hw_semaphore_generic(struct e1000_hw *hw);
 s32  e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed,
     u16 *duplex);
@@ -68,8 +68,7 @@
 s32  e1000_led_on_generic(struct e1000_hw *hw);
 s32  e1000_led_off_generic(struct e1000_hw *hw);
 void e1000_update_mc_addr_list_generic(struct e1000_hw *hw,
-    u8 *mc_addr_list, u32 mc_addr_count,
-    u32 rar_used_count, u32 rar_count);
+    u8 *mc_addr_list, u32 mc_addr_count);
 s32  e1000_set_fc_watermarks_generic(struct e1000_hw *hw);
 s32  e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw);
 s32  e1000_setup_led_generic(struct e1000_hw *hw);
--- a/usr/src/uts/common/io/igb/igb_main.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_main.c	Mon Aug 17 09:14:35 2009 -0400
@@ -29,7 +29,7 @@
 #include "igb_sw.h"
 
 static char ident[] = "Intel 1Gb Ethernet";
-static char igb_version[] = "igb 1.1.7";
+static char igb_version[] = "igb 1.1.8";
 
 /*
  * Local function protoypes
@@ -2584,8 +2584,7 @@
 	/*
 	 * Update the multicase addresses to the MTA registers
 	 */
-	e1000_update_mc_addr_list(hw, mc_addr_list, mc_addr_count,
-	    igb->unicst_total, hw->mac.rar_entry_count);
+	e1000_update_mc_addr_list(hw, mc_addr_list, mc_addr_count);
 }
 
 /*
@@ -2680,7 +2679,7 @@
 	igb->rx_ring_size = igb_get_prop(igb, PROP_RX_RING_SIZE,
 	    MIN_RX_RING_SIZE, MAX_RX_RING_SIZE, DEFAULT_RX_RING_SIZE);
 
-	igb->mr_enable = igb_get_prop(igb, PROP_MR_ENABLE, 0, 1, 1);
+	igb->mr_enable = igb_get_prop(igb, PROP_MR_ENABLE, 0, 1, 0);
 	igb->num_rx_groups = igb_get_prop(igb, PROP_RX_GROUP_NUM,
 	    MIN_RX_GROUP_NUM, MAX_RX_GROUP_NUM, DEFAULT_RX_GROUP_NUM);
 	/*
--- a/usr/src/uts/common/io/igb/igb_manage.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_manage.c	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.27 v2008-10-7 */
+/* IntelVersion: 1.27 v2-9-8_2009-6-12 */
 
 #include "igb_api.h"
 
--- a/usr/src/uts/common/io/igb/igb_manage.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_manage.h	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.18 v2008-10-7 */
+/* IntelVersion: 1.18 v2-9-8_2009-6-12 */
 
 #ifndef _IGB_MANAGE_H
 #define	_IGB_MANAGE_H
--- a/usr/src/uts/common/io/igb/igb_nvm.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_nvm.c	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.46 v2008-10-7 */
+/* IntelVersion: 1.49 v2-9-8_2009-6-12 */
 
 #include "igb_api.h"
 
@@ -771,31 +771,23 @@
 s32
 e1000_read_mac_addr_generic(struct e1000_hw *hw)
 {
-	s32  ret_val = E1000_SUCCESS;
-	u16 offset, nvm_data, i;
-
-	DEBUGFUNC("e1000_read_mac_addr");
+	u32 rar_high;
+	u32 rar_low;
+	u16 i;
 
-	for (i = 0; i < ETH_ADDR_LEN; i += 2) {
-		offset = i >> 1;
-		ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data);
-		if (ret_val) {
-			DEBUGOUT("NVM Read Error\n");
-			goto out;
-		}
-		hw->mac.perm_addr[i] = (u8)(nvm_data & 0xFF);
-		hw->mac.perm_addr[i+1] = (u8)(nvm_data >> 8);
-	}
+	rar_high = E1000_READ_REG(hw, E1000_RAH(0));
+	rar_low = E1000_READ_REG(hw, E1000_RAL(0));
 
-	/* Flip last bit of mac address if we're on second port */
-	if (hw->bus.func == E1000_FUNC_1)
-		hw->mac.perm_addr[5] ^= 1;
+	for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8));
+
+	for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8));
 
 	for (i = 0; i < ETH_ADDR_LEN; i++)
 		hw->mac.addr[i] = hw->mac.perm_addr[i];
 
-out:
-	return (ret_val);
+	return (E1000_SUCCESS);
 }
 
 /*
--- a/usr/src/uts/common/io/igb/igb_nvm.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_nvm.h	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.18 v2008-10-7 */
+/* IntelVersion: 1.18 v2-9-8_2009-6-12 */
 
 #ifndef _IGB_NVM_H
 #define	_IGB_NVM_H
--- a/usr/src/uts/common/io/igb/igb_osdep.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_osdep.c	Mon Aug 17 09:14:35 2009 -0400
@@ -31,22 +31,6 @@
 
 
 void
-e1000_pci_set_mwi(struct e1000_hw *hw)
-{
-	uint16_t val = hw->bus.pci_cmd_word | CMD_MEM_WRT_INVALIDATE;
-
-	e1000_write_pci_cfg(hw, PCI_COMMAND_REGISTER, &val);
-}
-
-void
-e1000_pci_clear_mwi(struct e1000_hw *hw)
-{
-	uint16_t val = hw->bus.pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE;
-
-	e1000_write_pci_cfg(hw, PCI_COMMAND_REGISTER, &val);
-}
-
-void
 e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
 {
 	pci_config_put16(OS_DEP(hw)->cfg_handle, reg, *value);
@@ -61,25 +45,50 @@
 
 /*
  * Return the 16-bit value from pci-e config space at offset reg into the pci-e
- * capability block.
+ * capability block.  Note that this refers to the pci-e capability block in
+ * standard pci config space, not the block in pci-e extended config space.
  */
 int32_t
 e1000_read_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
 {
-	uint32_t pcie_cap = PCI_EX_CONF_CAP;	/* default */
+	uint8_t pcie_id = PCI_CAP_ID_PCI_E;
+	uint16_t pcie_cap;
+	int32_t status;
 
-	switch (hw->mac.type) {
-	case e1000_82575:
-		pcie_cap = 0xa0;
-		break;
-	case e1000_82576:
-		pcie_cap = 0xa0;
-		break;
+	/* locate the pci-e capability block */
+	status = pci_lcap_locate((OS_DEP(hw))->cfg_handle, pcie_id, &pcie_cap);
+	if (status == DDI_SUCCESS) {
+
+		/* read at given offset into block */
+		*value = pci_config_get16(OS_DEP(hw)->cfg_handle,
+		    (pcie_cap + reg));
 	}
 
-	*value = pci_config_get16(OS_DEP(hw)->cfg_handle, (pcie_cap + reg));
+	return (status);
+}
 
-	return (0);
+/*
+ * Write the given 16-bit value to pci-e config space at offset reg into the
+ * pci-e capability block.  Note that this refers to the pci-e capability block
+ * in standard pci config space, not the block in pci-e extended config space.
+ */
+int32_t
+e1000_write_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
+{
+	uint8_t pcie_id = PCI_CAP_ID_PCI_E;
+	uint16_t pcie_cap;
+	int32_t status;
+
+	/* locate the pci-e capability block */
+	status = pci_lcap_locate(OS_DEP(hw)->cfg_handle, pcie_id, &pcie_cap);
+	if (status == DDI_SUCCESS) {
+
+		/* write at given offset into block */
+		pci_config_put16(OS_DEP(hw)->cfg_handle,
+		    (off_t)(pcie_cap + reg), *value);
+	}
+
+	return (status);
 }
 
 /*
--- a/usr/src/uts/common/io/igb/igb_osdep.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_osdep.h	Mon Aug 17 09:14:35 2009 -0400
@@ -48,6 +48,7 @@
 #include <sys/dditypes.h>
 #include <sys/sunddi.h>
 #include <sys/pci.h>
+#include <sys/pci_cap.h>
 #include <sys/atomic.h>
 #include <sys/note.h>
 #include "igb_debug.h"
--- a/usr/src/uts/common/io/igb/igb_phy.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_phy.c	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.112 v2008-10-7 */
+/* IntelVersion: 1.140 v2-9-8_2009-6-12 */
 
 #include "igb_api.h"
 
@@ -1283,6 +1283,76 @@
 }
 
 /*
+ * e1000_phy_force_speed_duplex_ife - Force PHY speed & duplex
+ * @hw: pointer to the HW structure
+ *
+ * Forces the speed and duplex settings of the PHY.
+ * This is a function pointer entry point only called by
+ * PHY setup routines.
+ */
+s32
+e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+	bool link;
+
+	DEBUGFUNC("e1000_phy_force_speed_duplex_ife");
+
+	if (phy->type != e1000_phy_ife) {
+		ret_val = e1000_phy_force_speed_duplex_igp(hw);
+		goto out;
+	}
+
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &data);
+	if (ret_val)
+		goto out;
+
+	e1000_phy_force_speed_duplex_setup(hw, &data);
+
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, data);
+	if (ret_val)
+		goto out;
+
+	/* Disable MDI-X support for 10/100 */
+	ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data);
+	if (ret_val)
+		goto out;
+
+	data &= ~IFE_PMC_AUTO_MDIX;
+	data &= ~IFE_PMC_FORCE_MDIX;
+
+	ret_val = phy->ops.write_reg(hw, IFE_PHY_MDIX_CONTROL, data);
+	if (ret_val)
+		goto out;
+
+	DEBUGOUT1("IFE PMC: %X\n", data);
+
+	usec_delay(1);
+
+	if (phy->autoneg_wait_to_complete) {
+		DEBUGOUT("Waiting for forced speed/duplex link on IFE phy.\n");
+
+		ret_val = e1000_phy_has_link_generic(hw,
+		    PHY_FORCE_LIMIT, 100000, &link);
+		if (ret_val)
+			goto out;
+
+		if (!link)
+			DEBUGOUT("Link taking longer than expected.\n");
+
+		/* Try once more */
+		ret_val = e1000_phy_has_link_generic(hw,
+		    PHY_FORCE_LIMIT, 100000, &link);
+		if (ret_val)
+			goto out;
+	}
+
+out:
+	return (ret_val);
+}
+/*
  * e1000_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex
  * @hw: pointer to the HW structure
  * @phy_ctrl: pointer to current value of PHY_CONTROL
@@ -1566,6 +1636,41 @@
 }
 
 /*
+ * e1000_check_polarity_ife - Check cable polarity for IFE PHY
+ * @hw: pointer to the HW structure
+ *
+ * Polarity is determined on the polarity reversal feature being enabled.
+ */
+s32
+e1000_check_polarity_ife(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, offset, mask;
+
+	DEBUGFUNC("e1000_check_polarity_ife");
+
+	/*
+	 * Polarity is determined based on the reversal feature being enabled.
+	 */
+	if (phy->polarity_correction) {
+		offset = IFE_PHY_EXTENDED_STATUS_CONTROL;
+		mask = IFE_PESC_POLARITY_REVERSED;
+	} else {
+		offset = IFE_PHY_SPECIAL_CONTROL;
+		mask = IFE_PSC_FORCE_POLARITY;
+	}
+
+	ret_val = phy->ops.read_reg(hw, offset, &phy_data);
+
+	if (!ret_val)
+		phy->cable_polarity = (phy_data & mask)
+		    ? e1000_rev_polarity_reversed
+		    : e1000_rev_polarity_normal;
+
+	return (ret_val);
+}
+/*
  * e1000_wait_autoneg_generic - Wait for auto-neg completion
  * @hw: pointer to the HW structure
  *
@@ -1631,9 +1736,16 @@
 		 * it across the board.
 		 */
 		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
-		if (ret_val)
-			break;
-		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val) {
+			/*
+			 * If the first read fails, another entity may have
+			 * ownership of the resources, wait and try again to
+			 * see if they have relinquished the resources yet.
+			 */
+			usec_delay(usec_interval);
+			ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS,
+			    &phy_status);
+		}
 		if (ret_val)
 			break;
 		if (phy_status & MII_SR_LINK_STATUS)
@@ -1680,14 +1792,15 @@
 	index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
 	    M88E1000_PSSR_CABLE_LENGTH_SHIFT;
 	if (index < (M88E1000_CABLE_LENGTH_TABLE_SIZE + 1)) {
-		phy->min_cable_length = e1000_m88_cable_length_table[index];
-		phy->max_cable_length = e1000_m88_cable_length_table[index+1];
+		ret_val = E1000_ERR_PHY;
+		goto out;
+	}
 
-		phy->cable_length = (phy->min_cable_length +
-		    phy->max_cable_length) / 2;
-	} else {
-		ret_val = E1000_ERR_PHY;
-	}
+	phy->min_cable_length = e1000_m88_cable_length_table[index];
+	phy->max_cable_length = e1000_m88_cable_length_table[index+1];
+
+	phy->cable_length = (phy->min_cable_length +
+	    phy->max_cable_length) / 2;
 
 out:
 	return (ret_val);
@@ -2140,6 +2253,48 @@
 }
 
 /*
+ * e1000_determine_phy_address - Determines PHY address.
+ * @hw: pointer to the HW structure
+ *
+ * This uses a trial and error method to loop through possible PHY
+ * addresses. It tests each by reading the PHY ID registers and
+ * checking for a match.
+ */
+s32
+e1000_determine_phy_address(struct e1000_hw *hw)
+{
+	s32 ret_val = -E1000_ERR_PHY_TYPE;
+	u32 phy_addr = 0;
+	u32 i;
+	enum e1000_phy_type phy_type = e1000_phy_unknown;
+
+	hw->phy.id = phy_type;
+
+	for (phy_addr = 0; phy_addr < E1000_MAX_PHY_ADDR; phy_addr++) {
+		hw->phy.addr = phy_addr;
+		i = 0;
+
+		do {
+			e1000_get_phy_id(hw);
+			phy_type = e1000_get_phy_type_from_id(hw->phy.id);
+
+			/*
+			 * If phy_type is valid, break - we found our
+			 * PHY address
+			 */
+			if (phy_type  != e1000_phy_unknown) {
+				ret_val = E1000_SUCCESS;
+				goto out;
+			}
+			msec_delay(1);
+			i++;
+		} while (i < 10);
+	}
+
+out:
+	return (ret_val);
+}
+/*
  * e1000_power_up_phy_copper - Restore copper link in case of PHY power down
  * @hw: pointer to the HW structure
  *
--- a/usr/src/uts/common/io/igb/igb_phy.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_phy.h	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.55 v2008-10-7 */
+/* IntelVersion: 1.69 v2-9-8_2009-6-12 */
 
 #ifndef _IGB_PHY_H
 #define	_IGB_PHY_H
@@ -43,12 +43,14 @@
 s32 e1000_check_downshift_generic(struct e1000_hw *hw);
 s32 e1000_check_polarity_m88(struct e1000_hw *hw);
 s32 e1000_check_polarity_igp(struct e1000_hw *hw);
+s32 e1000_check_polarity_ife(struct e1000_hw *hw);
 s32 e1000_check_reset_block_generic(struct e1000_hw *hw);
 s32 e1000_copper_link_autoneg(struct e1000_hw *hw);
 s32 e1000_copper_link_setup_igp(struct e1000_hw *hw);
 s32 e1000_copper_link_setup_m88(struct e1000_hw *hw);
 s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw);
 s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw);
+s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw);
 s32 e1000_get_cable_length_m88(struct e1000_hw *hw);
 s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw);
 s32 e1000_get_cfg_done_generic(struct e1000_hw *hw);
@@ -73,6 +75,7 @@
     u32 usec_interval, bool *success);
 s32 e1000_phy_init_script_igp3(struct e1000_hw *hw);
 enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id);
+s32 e1000_determine_phy_address(struct e1000_hw *hw);
 void e1000_power_up_phy_copper(struct e1000_hw *hw);
 void e1000_power_down_phy_copper(struct e1000_hw *hw);
 s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
@@ -112,7 +115,7 @@
 #define	IGP01E1000_PLHR_SS_DOWNGRADE	0x8000
 
 #define	IGP01E1000_PSSR_POLARITY_REVERSED	0x0002
-#define	IGP01E1000_PSSR_MDIX			0x0008
+#define	IGP01E1000_PSSR_MDIX			0x0800
 #define	IGP01E1000_PSSR_SPEED_MASK		0xC000
 #define	IGP01E1000_PSSR_SPEED_1000MBPS		0xC000
 
@@ -135,6 +138,8 @@
 #define	E1000_KMRNCTRLSTA_OFFSET_SHIFT	16
 #define	E1000_KMRNCTRLSTA_REN		0x00200000
 #define	E1000_KMRNCTRLSTA_DIAG_OFFSET	0x3	/* Kumeran Diagnostic */
+#define	E1000_KMRNCTRLSTA_TIMEOUTS	0x4	/* Kumeran Timeouts */
+#define	E1000_KMRNCTRLSTA_INBAND_PARAM	0x9	/* Kumeran InBand Parameters */
 #define	E1000_KMRNCTRLSTA_DIAG_NELPBK	0x1000	/* Nearend Loopback mode */
 
 #define	IFE_PHY_EXTENDED_STATUS_CONTROL	0x10
--- a/usr/src/uts/common/io/igb/igb_regs.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_regs.h	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms of the CDDL.
  */
 
-/* IntelVersion: 1.61 v2008-10-7 */
+/* IntelVersion: 1.70 v2-9-8_2009-6-12 */
 
 #ifndef _IGB_REGS_H
 #define	_IGB_REGS_H
@@ -426,6 +426,7 @@
 #define	E1000_GIOCTL		0x05B44 /* GIO Analog Control Register */
 #define	E1000_SCCTL		0x05B4C /* PCIc PLL Configuration Register */
 #define	E1000_GCR		0x05B00 /* PCI-Ex Control */
+#define	E1000_GCR2		0x05B64 /* PCI-Ex Control #2 */
 #define	E1000_GSCL_1		0x05B10 /* PCI-Ex Statistic Control #1 */
 #define	E1000_GSCL_2		0x05B14 /* PCI-Ex Statistic Control #2 */
 #define	E1000_GSCL_3		0x05B18 /* PCI-Ex Statistic Control #3 */
@@ -434,6 +435,8 @@
 #define	E1000_FACTPS		0x05B30
 #define	E1000_SWSM		0x05B50 /* SW Semaphore */
 #define	E1000_FWSM		0x05B54 /* FW Semaphore */
+/* Driver-only SW semaphore (not used by BOOT agents) */
+#define	E1000_SWSM2		0x05B58
 #define	E1000_DCA_ID		0x05B70 /* DCA Requester ID Information - RO */
 #define	E1000_DCA_CTRL		0x05B74 /* DCA Control - RW */
 #define	E1000_FFLT_DBG		0x05F04 /* Debug Register */
@@ -472,7 +475,6 @@
 #define	E1000_VFTE	0x00C90 /* VF Transmit Enables */
 #define	E1000_QDE	0x02408 /* Queue Drop Enable - RW */
 #define	E1000_DTXSWC	0x03500 /* DMA Tx Switch Control - RW */
-#define	E1000_VLVF	0x05D00 /* VLAN Virtual Machine Filter - RW */
 #define	E1000_RPLOLR	0x05AF0 /* Replication Offload - RW */
 #define	E1000_UTA	0x0A000 /* Unicast Table Array - RW */
 #define	E1000_IOVTCL	0x05BBC /* IOV Control Register */
@@ -483,12 +485,15 @@
 #define	E1000_VMBMEM(_n)	(0x00800 + (64 * (_n)))
 #define	E1000_VFVMBMEM(_n)	(0x00800 + (_n))
 #define	E1000_VMOLR(_n)		(0x05AD0 + (4 * (_n)))
+/* VLAN Virtual Machine Filter - RW */
+#define	E1000_VLVF(_n)		(0x05D00 + (4 * (_n)))
 
 /* Filtering Registers */
 #define	E1000_SAQF(_n)	(0x05980 + (4 * (_n))) /* Source Address Queue Fltr */
 #define	E1000_DAQF(_n)	(0x059A0 + (4 * (_n))) /* Dest Address Queue Fltr */
 #define	E1000_SPQF(_n)	(0x059C0 + (4 * (_n))) /* Source Port Queue Fltr */
 #define	E1000_FTQF(_n)	(0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */
+#define	E1000_TTQF(_n)	(0x059E0 + (4 * (_n))) /* 2-tuple Queue Fltr */
 #define	E1000_SYNQF(_n)	(0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */
 #define	E1000_ETQF(_n)	(0x05CB0 + (4 * (_n))) /* EType Queue Fltr */
 
--- a/usr/src/uts/common/io/igb/igb_sw.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/igb/igb_sw.h	Mon Aug 17 09:14:35 2009 -0400
@@ -830,6 +830,10 @@
 /*
  * Function prototypes in e1000_osdep.c
  */
+void e1000_write_pci_cfg(struct e1000_hw *, uint32_t, uint16_t *);
+void e1000_read_pci_cfg(struct e1000_hw *, uint32_t, uint16_t *);
+int32_t e1000_read_pcie_cap_reg(struct e1000_hw *, uint32_t, uint16_t *);
+int32_t e1000_write_pcie_cap_reg(struct e1000_hw *, uint32_t, uint16_t *);
 void e1000_rar_clear(struct e1000_hw *hw, uint32_t);
 void e1000_rar_set_vmdq(struct e1000_hw *hw, const uint8_t *, uint32_t,
     uint32_t, uint8_t);
--- a/usr/src/uts/common/io/ixgbe/ixgbe_82598.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_82598.c	Mon Aug 17 09:14:35 2009 -0400
@@ -26,13 +26,14 @@
  * Use is subject to license terms.
  */
 
-/* IntelVersion: 1.138 v2-7-8_2009-4-7 */
+/* IntelVersion: 1.144 v2-9-1-1_2009-6-10_NSW1 */
 
 #include "ixgbe_type.h"
 #include "ixgbe_api.h"
 #include "ixgbe_common.h"
 #include "ixgbe_phy.h"
 
+u32 ixgbe_get_pcie_msix_count_82598(struct ixgbe_hw *hw);
 s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw);
 static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw,
     ixgbe_link_speed *speed, bool *autoneg);
@@ -59,6 +60,7 @@
     u8 *eeprom_data);
 u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw);
 s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw);
+void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw);
 
 /*
  * ixgbe_get_pcie_msix_count_82598 - Gets MSI-X vector count
@@ -113,6 +115,7 @@
 	    &ixgbe_get_supported_physical_layer_82598;
 	mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82598;
 	mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82598;
+	mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie_82598;
 
 	/* RAR, Multicast, VLAN */
 	mac->ops.set_vmdq = &ixgbe_set_vmdq_82598;
@@ -133,12 +136,6 @@
 	/* SFP+ Module */
 	phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_82598;
 
-	/*
-	 * Call PHY identify routine to assign the phy type for PHY init
-	 * and media type determination
-	 */
-	phy->ops.identify(hw);
-
 	/* Link */
 	mac->ops.check_link = &ixgbe_check_mac_link_82598;
 	mac->ops.setup_link = &ixgbe_setup_mac_link_82598;
@@ -184,6 +181,10 @@
 		phy->ops.get_firmware_version =
 		    &ixgbe_get_phy_firmware_version_tnx;
 		break;
+	case ixgbe_phy_aq:
+		phy->ops.get_firmware_version =
+		    &ixgbe_get_phy_firmware_version_aq;
+		break;
 	case ixgbe_phy_nl:
 		phy->ops.reset = &ixgbe_reset_phy_nl;
 
@@ -283,7 +284,8 @@
 
 	/* Detect if there is a copper PHY attached. */
 	if (hw->phy.type == ixgbe_phy_cu_unknown ||
-	    hw->phy.type == ixgbe_phy_tn) {
+	    hw->phy.type == ixgbe_phy_tn ||
+	    hw->phy.type == ixgbe_phy_aq) {
 		media_type = ixgbe_media_type_copper;
 		goto out;
 	}
@@ -411,7 +413,7 @@
 	}
 
 	/* Configure pause time (2 TCs per register) */
-	reg = IXGBE_READ_REG(hw, IXGBE_FCTTV(packetbuf_num));
+	reg = IXGBE_READ_REG(hw, IXGBE_FCTTV(packetbuf_num / 2));
 	if ((packetbuf_num & 1) == 0)
 		reg = (reg & 0xFFFF0000) | hw->fc.pause_time;
 	else
@@ -673,6 +675,7 @@
 ixgbe_reset_hw_82598(struct ixgbe_hw *hw)
 {
 	s32 status = IXGBE_SUCCESS;
+	s32 phy_status = IXGBE_SUCCESS;
 	u32 ctrl;
 	u32 gheccr;
 	u32 i;
@@ -720,13 +723,16 @@
 		/* PHY ops must be identified and initialized prior to reset */
 
 		/* Init PHY and function pointers, perform SFP setup */
-		status = hw->phy.ops.init(hw);
-		/* Do not return error if SFP module is not supported. */
-		status = IXGBE_SUCCESS;
+		phy_status = hw->phy.ops.init(hw);
+		if (phy_status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+			goto reset_hw_out;
+		else if (phy_status == IXGBE_ERR_SFP_NOT_PRESENT)
+			goto no_phy_reset;
 
 		hw->phy.ops.reset(hw);
 	}
 
+no_phy_reset:
 	/*
 	 * Prevent the PCI-E bus from from hanging by disabling PCI-E master
 	 * access and verify no pending requests before reset
@@ -776,14 +782,18 @@
 		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, hw->mac.orig_autoc);
 	}
 
+	/* Store the permanent mac address */
+	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+
 	/*
 	 * Store MAC address from RAR0, clear receive address registers, and
 	 * clear the multicast table
 	 */
 	hw->mac.ops.init_rx_addrs(hw);
 
-	/* Store the permanent mac address */
-	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+reset_hw_out:
+	if (phy_status != IXGBE_SUCCESS)
+		status = phy_status;
 
 	return (status);
 }
@@ -1107,3 +1117,33 @@
 out:
 	return (physical_layer);
 }
+
+/*
+ * ixgbe_set_lan_id_multi_port_pcie_82598 - Set LAN id for PCIe multiple
+ * port devices.
+ * @hw: pointer to the HW structure
+ *
+ * Calls common function and corrects issue with some single port devices
+ * that enable LAN1 but not LAN0.
+ */
+void
+ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw)
+{
+	struct ixgbe_bus_info *bus = &hw->bus;
+	u16 pci_gen, pci_ctrl2;
+
+	ixgbe_set_lan_id_multi_port_pcie(hw);
+
+	/* check if LAN0 is disabled */
+	hw->eeprom.ops.read(hw, IXGBE_PCIE_GENERAL_PTR, &pci_gen);
+	if ((pci_gen != 0) && (pci_gen != 0xFFFF)) {
+		hw->eeprom.ops.read(hw, pci_gen + IXGBE_PCIE_CTRL2, &pci_ctrl2);
+
+		/* if LAN0 is completely disabled force function to 0 */
+		if ((pci_ctrl2 & IXGBE_PCIE_CTRL2_LAN_DISABLE) &&
+		    !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DISABLE_SELECT) &&
+		    !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DUMMY_ENABLE)) {
+			bus->func = 0;
+		}
+	}
+}
--- a/usr/src/uts/common/io/ixgbe/ixgbe_82599.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_82599.c	Mon Aug 17 09:14:35 2009 -0400
@@ -26,13 +26,14 @@
  * Use is subject to license terms.
  */
 
-/* IntelVersion: 1.155 v2-7-8_2009-4-7 */
+/* IntelVersion: 1.176 v2-9-1-1_2009-6-10_NSW1 */
 
 #include "ixgbe_type.h"
 #include "ixgbe_api.h"
 #include "ixgbe_common.h"
 #include "ixgbe_phy.h"
 
+u32 ixgbe_get_pcie_msix_count_82599(struct ixgbe_hw *hw);
 s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw);
 s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
     ixgbe_link_speed *speed, bool *autoneg);
@@ -73,6 +74,7 @@
 s32 ixgbe_get_san_mac_addr_82599(struct ixgbe_hw *hw, u8 *san_mac_addr);
 s32 ixgbe_set_san_mac_addr_82599(struct ixgbe_hw *hw, u8 *san_mac_addr);
 s32 ixgbe_get_device_caps_82599(struct ixgbe_hw *hw, u16 *device_caps);
+static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw);
 
 void
 ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw)
@@ -115,6 +117,8 @@
 
 	/* Identify the PHY or SFP module */
 	ret_val = phy->ops.identify(hw);
+	if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED)
+		goto init_phy_ops_out;
 
 	/* Setup function pointers based on detected SFP module and speeds */
 	ixgbe_init_mac_link_ops_82599(hw);
@@ -137,10 +141,15 @@
 		phy->ops.get_firmware_version =
 		    &ixgbe_get_phy_firmware_version_tnx;
 		break;
+	case ixgbe_phy_aq:
+		phy->ops.get_firmware_version =
+		    &ixgbe_get_phy_firmware_version_aq;
+		break;
 	default:
 		break;
 	}
 
+init_phy_ops_out:
 	return (ret_val);
 }
 
@@ -163,16 +172,27 @@
 		if (ret_val != IXGBE_SUCCESS)
 			goto setup_sfp_out;
 
+		/* PHY config will finish before releasing the semaphore */
+		ret_val = ixgbe_acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
+		if (ret_val != IXGBE_SUCCESS) {
+			ret_val = IXGBE_ERR_SWFW_SYNC;
+			goto setup_sfp_out;
+		}
+
 		hw->eeprom.ops.read(hw, ++data_offset, &data_value);
 		while (data_value != 0xffff) {
 			IXGBE_WRITE_REG(hw, IXGBE_CORECTL, data_value);
 			IXGBE_WRITE_FLUSH(hw);
 			hw->eeprom.ops.read(hw, ++data_offset, &data_value);
 		}
-		/* Now restart DSP */
-		IXGBE_WRITE_REG(hw, IXGBE_CORECTL, 0x00000102);
-		IXGBE_WRITE_REG(hw, IXGBE_CORECTL, 0x00000b1d);
-		IXGBE_WRITE_FLUSH(hw);
+		/* Now restart DSP by setting Restart_AN */
+		IXGBE_WRITE_REG(hw, IXGBE_AUTOC,
+		    (IXGBE_READ_REG(hw, IXGBE_AUTOC) | IXGBE_AUTOC_AN_RESTART));
+
+		/* Release the semaphore */
+		ixgbe_release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
+		/* Delay obtaining semaphore again to allow FW access */
+		msec_delay(hw->eeprom.semaphore_delay);
 	}
 
 setup_sfp_out:
@@ -368,7 +388,8 @@
 
 	/* Detect if there is a copper PHY attached. */
 	if (hw->phy.type == ixgbe_phy_cu_unknown ||
-	    hw->phy.type == ixgbe_phy_tn) {
+	    hw->phy.type == ixgbe_phy_tn ||
+	    hw->phy.type == ixgbe_phy_aq) {
 		media_type = ixgbe_media_type_copper;
 		goto out;
 	}
@@ -376,6 +397,7 @@
 	switch (hw->device_id) {
 	case IXGBE_DEV_ID_82599_KX4:
 	case IXGBE_DEV_ID_82599_KX4_SIK:
+	case IXGBE_DEV_ID_82599_XAUI_LOM:
 		/* Default device ID is mezzanine card KX/KX4 */
 		media_type = ixgbe_media_type_backplane;
 		break;
@@ -481,6 +503,7 @@
 	ixgbe_link_speed highest_link_speed = IXGBE_LINK_SPEED_UNKNOWN;
 	u32 speedcnt = 0;
 	u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP);
+	u32 i = 0;
 	bool link_up = false;
 	bool negotiation;
 
@@ -491,6 +514,25 @@
 
 	speed &= link_speed;
 
+	/* Set autoneg_advertised value based on input link speed */
+	hw->phy.autoneg_advertised = 0;
+
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
+
+	/*
+	 * When the driver changes the link speeds that it can support,
+	 * it sets autotry_restart to true to indicate that we need to
+	 * initiate a new autotry session with the link partner.  To do
+	 * so, we set the speed then disable and re-enable the tx laser, to
+	 * alert the link partner that it also needs to restart autotry on its
+	 * end.  This is consistent with true clause 37 autoneg, which also
+	 * involves a loss of signal.
+	 */
+
 	/*
 	 * Try each speed one by one, highest priority first.  We do this in
 	 * software because 10gb fiber doesn't support speed autonegotiation.
@@ -507,7 +549,7 @@
 		if ((link_speed == IXGBE_LINK_SPEED_10GB_FULL) && link_up)
 			goto out;
 
-		/* Set hardware SDP's */
+		/* Set the module link speed */
 		esdp_reg |= (IXGBE_ESDP_SDP5_DIR | IXGBE_ESDP_SDP5);
 		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
 
@@ -520,15 +562,35 @@
 		if (status != IXGBE_SUCCESS)
 			goto out;
 
-		msec_delay(100);
+		/* Flap the tx laser if it has not already been done */
+		if (hw->mac.autotry_restart) {
+			/* Disable tx laser; allow 100us to go dark per spec */
+			esdp_reg |= IXGBE_ESDP_SDP3;
+			IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+			usec_delay(100);
+
+			/* Enable tx laser; allow 2ms to light up per spec */
+			esdp_reg &= ~IXGBE_ESDP_SDP3;
+			IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+			msec_delay(2);
+
+			hw->mac.autotry_restart = false;
+		}
 
-		/* If we have link, just jump out */
-		status = ixgbe_check_link(hw, &link_speed, &link_up, false);
-		if (status != IXGBE_SUCCESS)
-			goto out;
+		/* The controller may take up to 500ms at 10g to acquire link */
+		for (i = 0; i < 5; i++) {
+			/* Wait for the link partner to also set speed */
+			msec_delay(100);
 
-		if (link_up)
-			goto out;
+			/* If we have link, just jump out */
+			status = ixgbe_check_link(hw, &link_speed,
+			    &link_up, false);
+			if (status != IXGBE_SUCCESS)
+				goto out;
+
+			if (link_up)
+				goto out;
+		}
 	}
 
 	if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
@@ -544,7 +606,7 @@
 		if ((link_speed == IXGBE_LINK_SPEED_1GB_FULL) && link_up)
 			goto out;
 
-		/* Set hardware SDP's */
+		/* Set the module link speed */
 		esdp_reg &= ~IXGBE_ESDP_SDP5;
 		esdp_reg |= IXGBE_ESDP_SDP5_DIR;
 		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
@@ -558,6 +620,22 @@
 		if (status != IXGBE_SUCCESS)
 			goto out;
 
+		/* Flap the tx laser if it has not already been done */
+		if (hw->mac.autotry_restart) {
+			/* Disable tx laser; allow 100us to go dark per spec */
+			esdp_reg |= IXGBE_ESDP_SDP3;
+			IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+			usec_delay(100);
+
+			/* Enable tx laser; allow 2ms to light up per spec */
+			esdp_reg &= ~IXGBE_ESDP_SDP3;
+			IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+			msec_delay(2);
+
+			hw->mac.autotry_restart = false;
+		}
+
+		/* Wait for the link partner to also set speed */
 		msec_delay(100);
 
 		/* If we have link, just jump out */
@@ -651,6 +729,8 @@
 	s32 status = IXGBE_SUCCESS;
 	u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
 	u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+	u32 start_autoc = autoc;
+	u32 orig_autoc = 0;
 	u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK;
 	u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK;
 	u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK;
@@ -667,15 +747,26 @@
 
 	if (speed == IXGBE_LINK_SPEED_UNKNOWN) {
 		status = IXGBE_ERR_LINK_SETUP;
-	} else if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR ||
+		goto out;
+	}
+
+	/*
+	 * Use stored value (EEPROM defaults) of AUTOC to find KR/KX4 support
+	 */
+	if (hw->mac.orig_link_settings_stored)
+		orig_autoc = hw->mac.orig_autoc;
+	else
+		orig_autoc = autoc;
+
+	if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR ||
 	    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN ||
 	    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
 		/* Set KX4/KX/KR support according to speed requested */
 		autoc &= ~(IXGBE_AUTOC_KX4_KX_SUPP_MASK | IXGBE_AUTOC_KR_SUPP);
 		if (speed & IXGBE_LINK_SPEED_10GB_FULL)
-			if (autoc & IXGBE_AUTOC_KX4_SUPP)
+			if (orig_autoc & IXGBE_AUTOC_KX4_SUPP)
 				autoc |= IXGBE_AUTOC_KX4_SUPP;
-			if (autoc & IXGBE_AUTOC_KR_SUPP)
+			if (orig_autoc & IXGBE_AUTOC_KR_SUPP)
 				autoc |= IXGBE_AUTOC_KR_SUPP;
 		if (speed & IXGBE_LINK_SPEED_1GB_FULL)
 			autoc |= IXGBE_AUTOC_KX_SUPP;
@@ -701,7 +792,7 @@
 			}
 	}
 
-	if (status == IXGBE_SUCCESS) {
+	if (autoc != start_autoc) {
 		/* Restart link */
 		autoc |= IXGBE_AUTOC_AN_RESTART;
 		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc);
@@ -803,12 +894,18 @@
 	/* Identify PHY and related function pointers */
 	status = hw->phy.ops.init(hw);
 
+	if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+		goto reset_hw_out;
+
 	/* Setup SFP module if there is one present. */
 	if (hw->phy.sfp_setup_needed) {
 		status = hw->mac.ops.setup_sfp(hw);
 		hw->phy.sfp_setup_needed = false;
 	}
 
+	if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+		goto reset_hw_out;
+
 	/* Reset PHY */
 	if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL)
 		hw->phy.ops.reset(hw);
@@ -877,6 +974,9 @@
 		}
 	}
 
+	/* Store the permanent mac address */
+	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+
 	/*
 	 * Store MAC address from RAR0, clear receive address registers, and
 	 * clear the multicast table.  Also reset num_rar_entries to 128,
@@ -885,8 +985,8 @@
 	hw->mac.num_rar_entries = 128;
 	hw->mac.ops.init_rx_addrs(hw);
 
-	/* Store the permanent mac address */
-	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+	/* Store the permanent SAN mac address */
+	hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
 
 	/* Add the SAN MAC address to the RAR only if it's a valid address */
 	if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
@@ -897,6 +997,7 @@
 		hw->mac.num_rar_entries--;
 	}
 
+reset_hw_out:
 	return (status);
 }
 
@@ -1011,6 +1112,7 @@
 		}
 	} else {
 		DEBUGOUT1("RAR index %d is out of range.\n", rar);
+		return (IXGBE_ERR_PARAM);
 	}
 done:
 	return (IXGBE_SUCCESS);
@@ -1040,6 +1142,7 @@
 		}
 	} else {
 		DEBUGOUT1("RAR index %d is out of range.\n", rar);
+		return (IXGBE_ERR_PARAM);
 	}
 
 	return (IXGBE_SUCCESS);
@@ -1115,6 +1218,7 @@
 				regindex = first_empty_slot;
 			else {
 				DEBUGOUT("No space in VLVF.\n");
+				goto out;
 			}
 		}
 
@@ -1162,6 +1266,7 @@
 			IXGBE_WRITE_REG(hw, IXGBE_VLVF(regindex), 0);
 	}
 
+out:
 	return (IXGBE_SUCCESS);
 }
 
@@ -1211,16 +1316,75 @@
 s32
 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw)
 {
+	int i;
 	u32 fdirctrl = IXGBE_READ_REG(hw, IXGBE_FDIRCTRL);
 	fdirctrl &= ~IXGBE_FDIRCTRL_INIT_DONE;
+
+	/*
+	 * Before starting reinitialization process,
+	 * FDIRCMD.CMD must be zero.
+	 */
+	for (i = 0; i < IXGBE_FDIRCMD_CMD_POLL; i++) {
+		if (!(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) &
+		    IXGBE_FDIRCMD_CMD_MASK))
+			break;
+		usec_delay(10);
+	}
+	if (i >= IXGBE_FDIRCMD_CMD_POLL) {
+		DEBUGOUT("Flow Director previous command isn't complete, "
+		    "aborting table re-initialization. \n");
+		return (IXGBE_ERR_FDIR_REINIT_FAILED);
+	}
+
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRFREE, 0);
 	IXGBE_WRITE_FLUSH(hw);
+	/*
+	 * 82599 adapters flow director init flow cannot be restarted,
+	 * Workaround 82599 silicon errata by performing the following steps
+	 * before re-writing the FDIRCTRL control register with the same value.
+	 * - write 1 to bit 8 of FDIRCMD register &
+	 * - write 0 to bit 8 of FDIRCMD register
+	 */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
+	    (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) |
+	    IXGBE_FDIRCMD_CLEARHT));
+	IXGBE_WRITE_FLUSH(hw);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
+	    (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) &
+	    ~IXGBE_FDIRCMD_CLEARHT));
+	IXGBE_WRITE_FLUSH(hw);
+	/*
+	 * Clear FDIR Hash register to clear any leftover hashes
+	 * waiting to be programmed.
+	 */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, 0x00);
+	IXGBE_WRITE_FLUSH(hw);
+
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Poll init-done after we write FDIRCTRL register */
+	for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
+		if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) &
+		    IXGBE_FDIRCTRL_INIT_DONE)
+			break;
+		usec_delay(10);
+	}
+	if (i >= IXGBE_FDIR_INIT_DONE_POLL) {
+		DEBUGOUT("Flow Director Signature poll time exceeded!\n");
+		return (IXGBE_ERR_FDIR_REINIT_FAILED);
+	}
+
+	/* Clear FDIR statistics registers (read to clear) */
+	(void) IXGBE_READ_REG(hw, IXGBE_FDIRUSTAT);
+	(void) IXGBE_READ_REG(hw, IXGBE_FDIRFSTAT);
+	(void) IXGBE_READ_REG(hw, IXGBE_FDIRMATCH);
+	(void) IXGBE_READ_REG(hw, IXGBE_FDIRMISS);
+	(void) IXGBE_READ_REG(hw, IXGBE_FDIRLEN);
 
 	return (IXGBE_SUCCESS);
 }
 
-#define	IXGBE_FDIR_INIT_DONE_POLL	10
 /*
  * ixgbe_init_fdir_signature_82599 - Initialize Flow Director signature filters
  * @hw: pointer to hardware structure
@@ -1446,26 +1610,33 @@
 	 *    hash[15:0] = 0;
 	 *    for (i = 0; i < 351; i++) {
 	 *	if (int_key[i])
-	 *	hash ^= int_stream[(i + 15):i];
+	 *		hash ^= int_stream[(i + 15):i];
 	 *    }
 	 */
 
 	union {
-		u32	key[11];
-		u8	key_stream[44];
+		u64 fill[6];
+		u32 key[11];
+		u8 key_stream[44];
 	} tmp_key;
 
 	u8 *stream = (u8 *)atr_input;
 	u8 int_key[44];		/* upper-most bit unused */
 	u8 hash_str[46];	/* upper-most 2 bits unused */
 	u16 hash_result = 0;
-	u16 tmp = 0;
 	int i, j, k, h;
 
-	(void) memset(&tmp_key, 0, sizeof (tmp_key));
+	/*
+	 * Initialize the fill member to prevent warnings
+	 * on some compilers
+	 */
+	tmp_key.fill[0] = 0;
+
 	/* First load the temporary key stream */
-	for (i = 0; i < 11; i++)
-		tmp_key.key[i] = key;
+	for (i = 0; i < 6; i++) {
+		u64 fillkey = ((u64)key << 32) | key;
+		tmp_key.fill[i] = fillkey;
+	}
 
 	/*
 	 * Set the interim key for the hashing.  Bit 352 is unused, so we must
@@ -1473,9 +1644,9 @@
 	 */
 	int_key[0] = tmp_key.key_stream[0] >> 1;
 	for (i = 1, j = 0; i < 44; i++) {
-		int_key[i] = (tmp_key.key_stream[j] & 0x1) << 7;
+		unsigned int this_key = tmp_key.key_stream[j] << 7;
 		j++;
-		int_key[i] |= tmp_key.key_stream[j] >> 1;
+		int_key[i] = (u8)(this_key | (tmp_key.key_stream[j] >> 1));
 	}
 
 	/*
@@ -1484,11 +1655,11 @@
 	 */
 	hash_str[0] = (stream[40] & 0x7f) >> 1;
 	for (i = 1, j = 40; i < 46; i++) {
-		hash_str[i] = (stream[j] & 0x1) << 7;
+		unsigned int this_str = stream[j] << 7;
 		j++;
 		if (j > 41)
 			j = 0;
-		hash_str[i] |= stream[j] >> 1;
+		hash_str[i] = (u8)(this_str | (stream[j] >> 1));
 	}
 
 	/*
@@ -1498,7 +1669,7 @@
 	 */
 	for (i = 45, j = 43, k = 0; k < 351 && i >= 2 && j >= 0; i--, j--) {
 		for (h = 0; h < 8 && k < 351; h++, k++) {
-			if ((int_key[j] >> h) & 0x1) {
+			if (int_key[j] & (1 << h)) {
 				/*
 				 * Key bit is set, XOR in the current 16-bit
 				 * string.  Example of processing:
@@ -1513,11 +1684,11 @@
 				 *		(hash_str[i - 1] & 0xff << 1) |
 				 *		(hash_str[i] & 0x80 >> 7)
 				 */
-				tmp = ((hash_str[i] & (0xff << h)) >> h);
-				tmp |= ((hash_str[i - 1] & 0xff) << (8 - h));
-				tmp |= (hash_str[i - 2] & (0xff >> (8 - h)))
+				int tmp = (hash_str[i] >> h);
+				tmp |= (hash_str[i - 1] << (8 - h));
+				tmp |= (int)(hash_str[i - 2] & ((1 << h) - 1))
 				    << (16 - h);
-				hash_result ^= tmp;
+				hash_result ^= (u16)tmp;
 			}
 		}
 	}
@@ -2152,6 +2323,12 @@
 	}
 	IXGBE_WRITE_FLUSH(hw);
 
+	/* We need to run link autotry after the driver loads */
+	hw->mac.autotry_restart = true;
+
+	if (ret_val == IXGBE_SUCCESS)
+		ret_val = ixgbe_verify_fw_version_82599(hw);
+
 	return (ret_val);
 }
 
@@ -2206,6 +2383,7 @@
 	hw->phy.ops.identify(hw);
 
 	if (hw->phy.type == ixgbe_phy_tn ||
+	    hw->phy.type == ixgbe_phy_tn ||
 	    hw->phy.type == ixgbe_phy_cu_unknown) {
 		hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY,
 		    IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability);
@@ -2225,14 +2403,17 @@
 			physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX |
 			    IXGBE_PHYSICAL_LAYER_1000BASE_BX;
 			goto out;
-		} else
+		} else {
 			/* SFI mode so read SFP module */
 			goto sfp_check;
+		}
 	case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
 		if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_CX4)
 			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4;
 		else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_KX4)
 			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4;
+		else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_XAUI)
+			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_XAUI;
 		goto out;
 	case IXGBE_AUTOC_LMS_10G_SERIAL:
 		if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_KR) {
@@ -2458,3 +2639,50 @@
 san_mac_addr_out:
 	return (status);
 }
+
+/*
+ * ixgbe_verify_fw_version_82599 - verify fw version for 82599
+ * @hw: pointer to hardware structure
+ *
+ * Verifies that installed the firmware version is 0.6 or higher
+ * for SFI devices. All 82599 SFI devices should have version 0.6 or higher.
+ *
+ * Returns IXGBE_ERR_EEPROM_VERSION if the FW is not present or
+ * if the FW version is not supported.
+ */
+static s32
+ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw)
+{
+	s32 status = IXGBE_ERR_EEPROM_VERSION;
+	u16 fw_offset, fw_ptp_cfg_offset;
+	u16 fw_version = 0;
+
+	/* firmware check is only necessary for SFI devices */
+	if (hw->phy.media_type != ixgbe_media_type_fiber) {
+		status = IXGBE_SUCCESS;
+		goto fw_version_out;
+	}
+
+	/* get the offset to the Firmware Module block */
+	hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset);
+
+	if ((fw_offset == 0) || (fw_offset == 0xFFFF))
+		goto fw_version_out;
+
+	/* get the offset to the Pass Through Patch Configuration block */
+	hw->eeprom.ops.read(hw, (fw_offset +
+	    IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR), &fw_ptp_cfg_offset);
+
+	if ((fw_ptp_cfg_offset == 0) || (fw_ptp_cfg_offset == 0xFFFF))
+		goto fw_version_out;
+
+	/* get the firmware version */
+	hw->eeprom.ops.read(hw, (fw_ptp_cfg_offset + IXGBE_FW_PATCH_VERSION_4),
+	    &fw_version);
+
+	if (fw_version > 0x5)
+		status = IXGBE_SUCCESS;
+
+fw_version_out:
+	return (status);
+}
--- a/usr/src/uts/common/io/ixgbe/ixgbe_api.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_api.c	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms.
  */
 
-/* IntelVersion: 1.117 v2-7-8_2009-4-7 */
+/* IntelVersion: 1.120 v2-9-1-1_2009-6-10_NSW1 */
 
 #include "ixgbe_api.h"
 #include "ixgbe_common.h"
@@ -102,6 +102,7 @@
 			break;
 		case IXGBE_DEV_ID_82599_KX4:
 		case IXGBE_DEV_ID_82599_KX4_SIK:
+		case IXGBE_DEV_ID_82599_XAUI_LOM:
 		case IXGBE_DEV_ID_82599_SFP:
 		case IXGBE_DEV_ID_82599_SPW:
 		case IXGBE_DEV_ID_82599_CX4:
@@ -986,3 +987,33 @@
 	return ixgbe_call_func(hw, hw->mac.ops.enable_rx_dma,
 	    (hw, regval), IXGBE_NOT_IMPLEMENTED);
 }
+
+/*
+ * ixgbe_acquire_swfw_semaphore - Acquire SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to acquire
+ *
+ * Acquires the SWFW semaphore through SW_FW_SYNC register for the specified
+ * function (CSR, PHY0, PHY1, EEPROM, Flash)
+ */
+s32
+ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.acquire_swfw_sync,
+	    (hw, mask), IXGBE_NOT_IMPLEMENTED);
+}
+
+/*
+ * ixgbe_release_swfw_semaphore - Release SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to release
+ *
+ * Releases the SWFW semaphore through SW_FW_SYNC register for the specified
+ * function (CSR, PHY0, PHY1, EEPROM, Flash)
+ */
+void
+ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask)
+{
+	if (hw->mac.ops.release_swfw_sync)
+		hw->mac.ops.release_swfw_sync(hw, mask);
+}
--- a/usr/src/uts/common/io/ixgbe/ixgbe_api.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_api.h	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms.
  */
 
-/* IntelVersion: 1.73 v2-7-8_2009-4-7  */
+/* IntelVersion: 1.74 v2-9-1-1_2009-6-10_NSW1 */
 
 #ifndef _IXGBE_API_H
 #define	_IXGBE_API_H
@@ -147,5 +147,7 @@
 s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr);
 s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr);
 s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps);
+s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask);
+void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask);
 
 #endif /* _IXGBE_API_H */
--- a/usr/src/uts/common/io/ixgbe/ixgbe_common.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_common.c	Mon Aug 17 09:14:35 2009 -0400
@@ -25,7 +25,7 @@
  * Use is subject to license terms.
  */
 
-/* IntelVersion: 1.199 v2-7-8_2009-4-7 */
+/* IntelVersion: 1.206 v2-9-1-1_2009-6-10_NSW1 */
 
 #include "ixgbe_common.h"
 #include "ixgbe_api.h"
@@ -83,6 +83,8 @@
 	mac->ops.stop_adapter = &ixgbe_stop_adapter_generic;
 	mac->ops.get_bus_info = &ixgbe_get_bus_info_generic;
 	mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie;
+	mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync;
+	mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync;
 
 	/* LEDs */
 	mac->ops.led_on = &ixgbe_led_on_generic;
@@ -136,9 +138,6 @@
 
 	/* PHY ops initialization must be done in reset_hw() */
 
-	/* Identify the PHY */
-	hw->phy.ops.identify(hw);
-
 	/* Clear the VLAN filter table */
 	hw->mac.ops.clear_vfta(hw);
 
@@ -624,8 +623,6 @@
 		ixgbe_shift_out_eeprom_bits(hw, data, 16);
 		ixgbe_standby_eeprom(hw);
 
-		msec_delay(hw->eeprom.semaphore_delay);
-
 		/* Done with writing - release the EEPROM */
 		ixgbe_release_eeprom(hw);
 	}
@@ -820,13 +817,10 @@
 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw)
 {
 	s32 status = IXGBE_ERR_EEPROM;
-	u32 timeout;
+	u32 timeout = 2000;
 	u32 i;
 	u32 swsm;
 
-	/* Set timeout value based on size of EEPROM */
-	timeout = hw->eeprom.word_size + 1;
-
 	/* Get SMBI software semaphore between device drivers first */
 	for (i = 0; i < timeout; i++) {
 		/*
@@ -838,7 +832,7 @@
 			status = IXGBE_SUCCESS;
 			break;
 		}
-		msec_delay(1);
+		usec_delay(50);
 	}
 
 	/* Now get the semaphore between SW/FW through the SWESMBI bit */
@@ -1114,6 +1108,9 @@
 	IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
 
 	ixgbe_release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+
+	/* Delay before attempt to obtain semaphore again to allow FW access */
+	msec_delay(hw->eeprom.semaphore_delay);
 }
 
 /*
@@ -1685,6 +1682,7 @@
 	s32 ret_val = IXGBE_SUCCESS;
 	u32 mflcn_reg, fccfg_reg;
 	u32 reg;
+	u32 rx_pba_size;
 
 	DEBUGFUNC("ixgbe_fc_enable_generic");
 
@@ -1751,22 +1749,42 @@
 	IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg);
 	IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg);
 
-	/* Set up and enable Rx high/low water mark thresholds, enable XON. */
-	if (hw->fc.current_mode & ixgbe_fc_tx_pause) {
-		if (hw->fc.send_xon) {
-			IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(packetbuf_num),
-			    (hw->fc.low_water | IXGBE_FCRTL_XONE));
-		} else {
-			IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(packetbuf_num),
-			    hw->fc.low_water);
+	reg = IXGBE_READ_REG(hw, IXGBE_MTQC);
+	/* Thresholds are different for link flow control when in DCB mode */
+	if (reg & IXGBE_MTQC_RT_ENA) {
+		rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(packetbuf_num));
+
+		/* Always disable XON for LFC when in DCB mode */
+		reg = (rx_pba_size >> 5) & 0xFFE0;
+		IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(packetbuf_num), reg);
+
+		reg = (rx_pba_size >> 2) & 0xFFE0;
+		if (hw->fc.current_mode & ixgbe_fc_tx_pause)
+			reg |= IXGBE_FCRTH_FCEN;
+		IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(packetbuf_num), reg);
+	} else {
+		/*
+		 * Set up and enable Rx high/low water mark thresholds,
+		 * enable XON.
+		 */
+		if (hw->fc.current_mode & ixgbe_fc_tx_pause) {
+			if (hw->fc.send_xon) {
+				IXGBE_WRITE_REG(hw,
+				    IXGBE_FCRTL_82599(packetbuf_num),
+				    (hw->fc.low_water | IXGBE_FCRTL_XONE));
+			} else {
+				IXGBE_WRITE_REG(hw,
+				    IXGBE_FCRTL_82599(packetbuf_num),
+				    hw->fc.low_water);
+			}
+
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(packetbuf_num),
+			    (hw->fc.high_water | IXGBE_FCRTH_FCEN));
 		}
-
-		IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(packetbuf_num),
-		    (hw->fc.high_water | IXGBE_FCRTH_FCEN));
 	}
 
 	/* Configure pause time (2 TCs per register) */
-	reg = IXGBE_READ_REG(hw, IXGBE_FCTTV(packetbuf_num));
+	reg = IXGBE_READ_REG(hw, IXGBE_FCTTV(packetbuf_num / 2));
 	if ((packetbuf_num & 1) == 0)
 		reg = (reg & 0xFFFF0000) | hw->fc.pause_time;
 	else
@@ -2134,6 +2152,7 @@
 	hw->mac.ops.check_link(hw, &speed, &link_up, false);
 
 	if (!link_up) {
+		autoc_reg |= IXGBE_AUTOC_AN_RESTART;
 		autoc_reg |= IXGBE_AUTOC_FLU;
 		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
 		msec_delay(10);
--- a/usr/src/uts/common/io/ixgbe/ixgbe_common.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_common.h	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms.
  */
 
-/* IntelVersion: 1.90 v2-7-8_2009-4-7 */
+/* IntelVersion: 1.90 v2-9-1-1_2009-6-10_NSW1 */
 
 #ifndef _IXGBE_COMMON_H
 #define	_IXGBE_COMMON_H
--- a/usr/src/uts/common/io/ixgbe/ixgbe_main.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_main.c	Mon Aug 17 09:14:35 2009 -0400
@@ -1212,7 +1212,7 @@
 ixgbe_chip_start(ixgbe_t *ixgbe)
 {
 	struct ixgbe_hw *hw = &ixgbe->hw;
-	int i;
+	int ret_val, i;
 
 	ASSERT(mutex_owned(&ixgbe->gen_lock));
 
@@ -1237,9 +1237,17 @@
 	/*
 	 * Configure/Initialize hardware
 	 */
-	if (ixgbe_init_hw(hw) != IXGBE_SUCCESS) {
-		ixgbe_error(ixgbe, "Failed to initialize hardware");
-		return (IXGBE_FAILURE);
+	ret_val = ixgbe_init_hw(hw);
+	if (ret_val != IXGBE_SUCCESS) {
+		if (ret_val == IXGBE_ERR_EEPROM_VERSION) {
+			ixgbe_error(ixgbe,
+			    "This 82599 device is pre-release and contains"
+			    " outdated firmware, please contact your hardware"
+			    " vendor for a replacement.");
+		} else {
+			ixgbe_error(ixgbe, "Failed to initialize hardware");
+			return (IXGBE_FAILURE);
+		}
 	}
 
 	/*
@@ -2476,8 +2484,8 @@
 	    DEFAULT_RX_LIMIT_PER_INTR);
 
 	/*
-	 * Interrupt throttling is per 256ns in 82598 and 2 usec increments
-	 * in 82599.
+	 * Interrupt throttling is per 256ns in 82598 and 2.048usec
+	 * (256ns * 8) increments in 82599.
 	 */
 	switch (hw->mac.type) {
 	case ixgbe_mac_82598EB:
@@ -2491,6 +2499,14 @@
 		    PROP_INTR_THROTTLING,
 		    MIN_INTR_THROTTLING, MAX_INTR_THROTTLING_82599,
 		    DEFAULT_INTR_THROTTLING_82599);
+
+		/*
+		 * 82599 requires the interupt throttling rate is
+		 * a multiple of 8. This is enforced by the register
+		 * definiton.
+		 */
+		ixgbe->intr_throttling[0] = ixgbe->intr_throttling[0] &
+		    0xFF8;
 		break;
 	}
 }
@@ -2740,7 +2756,9 @@
 	result = B_FALSE;
 	for (i = 0; i < ixgbe->num_tx_rings; i++) {
 		tx_ring = &ixgbe->tx_rings[i];
-		tx_ring->tx_recycle(tx_ring);
+		if (tx_ring->tbd_free <= tx_ring->recycle_thresh) {
+			tx_ring->tx_recycle(tx_ring);
+		}
 
 		if (tx_ring->recycle_fail > 0)
 			tx_ring->stall_watchdog++;
@@ -3296,8 +3314,8 @@
 		if ((ddi_taskq_dispatch(ixgbe->lsc_taskq,
 		    ixgbe_sfp_check, (void *)ixgbe,
 		    DDI_NOSLEEP)) != DDI_SUCCESS) {
-			ixgbe_log(ixgbe,
-			    "No memory available to dispatch taskq");
+			ixgbe_log(ixgbe, "No memory available to dispatch "
+			    "taskq for SFP check");
 		}
 	}
 }
@@ -4633,6 +4651,13 @@
 	ixgbe_enable_ivar(ixgbe, r_idx, 0);
 
 	BT_SET(ixgbe->vect_map[v_idx].rx_map, r_idx);
+
+	/*
+	 * To trigger a Rx interrupt to on this ring
+	 */
+	IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_EICS, (1 << v_idx));
+	IXGBE_WRITE_FLUSH(&ixgbe->hw);
+
 	mutex_exit(&ixgbe->gen_lock);
 
 	return (0);
--- a/usr/src/uts/common/io/ixgbe/ixgbe_phy.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_phy.c	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms.
  */
 
-/* IntelVersion: 1.83 v2-7-8_2009-4-7 */
+/* IntelVersion: 1.94 v2-9-1-1_2009-6-10_NSW1 */
 
 #include "ixgbe_api.h"
 #include "ixgbe_common.h"
@@ -189,6 +189,9 @@
 	case TN1010_PHY_ID:
 		phy_type = ixgbe_phy_tn;
 		break;
+	case AQ1002_PHY_ID:
+		phy_type = ixgbe_phy_aq;
+		break;
 	case QT2022_PHY_ID:
 		phy_type = ixgbe_phy_qt;
 		break;
@@ -450,7 +453,7 @@
 s32
 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw)
 {
-	s32 status = IXGBE_NOT_IMPLEMENTED;
+	s32 status = IXGBE_SUCCESS;
 	u32 time_out;
 	u32 max_time_out = 10;
 	u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
@@ -491,7 +494,6 @@
 
 		autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE;
 		if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE) {
-			status = IXGBE_SUCCESS;
 			break;
 		}
 	}
@@ -525,10 +527,14 @@
 	if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
 		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
 	}
+
 	if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
 		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
 	}
 
+	if (speed & IXGBE_LINK_SPEED_100_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL;
+
 	/* Setup link based on the new speed settings */
 	hw->phy.ops.setup_link(hw);
 
@@ -560,6 +566,8 @@
 			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
 		if (speed_ability & IXGBE_MDIO_PHY_SPEED_1G)
 			*speed |= IXGBE_LINK_SPEED_1GB_FULL;
+		if (speed_ability & IXGBE_MDIO_PHY_SPEED_100M)
+			*speed |= IXGBE_LINK_SPEED_100_FULL;
 	}
 
 	return (status);
@@ -631,6 +639,22 @@
 }
 
 /*
+ * ixgbe_get_phy_firmware_version_aq - Gets the PHY Firmware Version
+ * @hw: pointer to hardware structure
+ * @firmware_version: pointer to the PHY Firmware Version
+ */
+s32
+ixgbe_get_phy_firmware_version_aq(struct ixgbe_hw *hw, u16 *firmware_version)
+{
+	s32 status = IXGBE_SUCCESS;
+
+	status = hw->phy.ops.read_reg(hw, AQ_FW_REV,
+	    IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, firmware_version);
+
+	return (status);
+}
+
+/*
  * ixgbe_reset_phy_nl - Performs a PHY reset
  * @hw: pointer to hardware structure
  */
@@ -743,9 +767,15 @@
 	u8 comp_codes_1g = 0;
 	u8 comp_codes_10g = 0;
 	u8 oui_bytes[3] = {0, 0, 0};
-	u8 transmission_media = 0;
+	u8 cable_tech = 0;
 	u16 enforce_sfp = 0;
 
+	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber) {
+		hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+		status = IXGBE_ERR_SFP_NOT_PRESENT;
+		goto out;
+	}
+
 	status = hw->phy.ops.read_i2c_eeprom(hw,
 	    IXGBE_SFF_IDENTIFIER, &identifier);
 
@@ -762,13 +792,16 @@
 	/* LAN ID is needed for sfp_type determination */
 	hw->mac.ops.set_lan_id(hw);
 
-	if (identifier == IXGBE_SFF_IDENTIFIER_SFP) {
+	if (identifier != IXGBE_SFF_IDENTIFIER_SFP) {
+		hw->phy.type = ixgbe_phy_sfp_unsupported;
+		status = IXGBE_ERR_SFP_NOT_SUPPORTED;
+	} else {
 		hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_1GBE_COMP_CODES,
 		    &comp_codes_1g);
 		hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_10GBE_COMP_CODES,
 		    &comp_codes_10g);
-		hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_TRANSMISSION_MEDIA,
-		    &transmission_media);
+		hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_CABLE_TECHNOLOGY,
+		    &cable_tech);
 
 		/*
 		 * ID  Module
@@ -782,7 +815,7 @@
 		 * 6	SFP_SR/LR_CORE1 - 82599-specific
 		 */
 		if (hw->mac.type == ixgbe_mac_82598EB) {
-			if (transmission_media & IXGBE_SFF_TWIN_AX_CAPABLE)
+			if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
 				hw->phy.sfp_type = ixgbe_sfp_type_da_cu;
 			else if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)
 				hw->phy.sfp_type = ixgbe_sfp_type_sr;
@@ -791,7 +824,7 @@
 			else
 				hw->phy.sfp_type = ixgbe_sfp_type_unknown;
 		} else if (hw->mac.type == ixgbe_mac_82599EB) {
-			if (transmission_media & IXGBE_SFF_TWIN_AX_CAPABLE)
+			if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
 				if (hw->bus.lan_id == 0)
 					hw->phy.sfp_type =
 					    ixgbe_sfp_type_da_cu_core0;
@@ -820,6 +853,7 @@
 			hw->phy.sfp_setup_needed = true;
 
 		/* Determine if the SFP+ PHY is dual speed or not. */
+		hw->phy.multispeed_fiber = false;
 		if (((comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) &&
 		    (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)) ||
 		    ((comp_codes_1g & IXGBE_SFF_1GBASELX_CAPABLE) &&
@@ -844,8 +878,7 @@
 
 			switch (vendor_oui) {
 			case IXGBE_SFF_VENDOR_OUI_TYCO:
-				if (transmission_media &
-				    IXGBE_SFF_TWIN_AX_CAPABLE)
+				if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
 					hw->phy.type = ixgbe_phy_tw_tyco;
 				break;
 			case IXGBE_SFF_VENDOR_OUI_FTL:
@@ -858,8 +891,7 @@
 				hw->phy.type = ixgbe_phy_sfp_intel;
 				break;
 			default:
-				if (transmission_media &
-				    IXGBE_SFF_TWIN_AX_CAPABLE)
+				if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
 					hw->phy.type = ixgbe_phy_tw_unknown;
 				else
 					hw->phy.type = ixgbe_phy_sfp_unknown;
@@ -867,11 +899,21 @@
 			}
 		}
 
-		if (hw->mac.type == ixgbe_mac_82598EB ||
-		    (hw->phy.sfp_type != ixgbe_sfp_type_sr &&
-		    hw->phy.sfp_type != ixgbe_sfp_type_lr &&
-		    hw->phy.sfp_type != ixgbe_sfp_type_srlr_core0 &&
-		    hw->phy.sfp_type != ixgbe_sfp_type_srlr_core1)) {
+		/* All passive DA cables are supported */
+		if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) {
+			status = IXGBE_SUCCESS;
+			goto out;
+		}
+
+		/* 1G SFP modules are not supported */
+		if (comp_codes_10g == 0) {
+			hw->phy.type = ixgbe_phy_sfp_unsupported;
+			status = IXGBE_ERR_SFP_NOT_SUPPORTED;
+			goto out;
+		}
+
+		/* Anything else 82598-based is supported */
+		if (hw->mac.type == ixgbe_mac_82598EB) {
 			status = IXGBE_SUCCESS;
 			goto out;
 		}
@@ -1009,7 +1051,7 @@
     u8 dev_addr, u8 *data)
 {
 	s32 status = IXGBE_SUCCESS;
-	u32 max_retry = 1;
+	u32 max_retry = 10;
 	u32 retry = 0;
 	u16 swfw_mask = 0;
 	bool nack = 1;
@@ -1021,12 +1063,12 @@
 	else
 		swfw_mask = IXGBE_GSSR_PHY0_SM;
 
-	if (ixgbe_acquire_swfw_sync(hw, swfw_mask) != IXGBE_SUCCESS) {
-		status = IXGBE_ERR_SWFW_SYNC;
-		goto read_byte_out;
-	}
+	do {
+		if (ixgbe_acquire_swfw_sync(hw, swfw_mask) != IXGBE_SUCCESS) {
+			status = IXGBE_ERR_SWFW_SYNC;
+			goto read_byte_out;
+		}
 
-	do {
 		ixgbe_i2c_start(hw);
 
 		/* Device Address and write indication */
@@ -1069,6 +1111,8 @@
 		break;
 
 fail:
+		ixgbe_release_swfw_sync(hw, swfw_mask);
+		msec_delay(100);
 		ixgbe_i2c_bus_clear(hw);
 		retry++;
 		if (retry < max_retry)
--- a/usr/src/uts/common/io/ixgbe/ixgbe_phy.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_phy.h	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms.
  */
 
-/* IntelVersion: 1.32 v2-7-8_2009-4-7 */
+/* IntelVersion: 1.34 v2-9-1-1_2009-6-10_NSW1 */
 
 #ifndef _IXGBE_PHY_H
 #define	_IXGBE_PHY_H
@@ -43,10 +43,10 @@
 #define	IXGBE_SFF_VENDOR_OUI_BYTE2	0x27
 #define	IXGBE_SFF_1GBE_COMP_CODES	0x6
 #define	IXGBE_SFF_10GBE_COMP_CODES	0x3
-#define	IXGBE_SFF_TRANSMISSION_MEDIA	0x9
+#define	IXGBE_SFF_CABLE_TECHNOLOGY	0x8
 
 /* Bitmasks */
-#define	IXGBE_SFF_TWIN_AX_CAPABLE	0x80
+#define	IXGBE_SFF_DA_PASSIVE_CABLE	0x4
 #define	IXGBE_SFF_1GBASESX_CAPABLE	0x1
 #define	IXGBE_SFF_1GBASELX_CAPABLE	0x2
 #define	IXGBE_SFF_10GBASESR_CAPABLE	0x10
@@ -102,6 +102,8 @@
     ixgbe_link_speed *speed, bool *link_up);
 s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw,
     u16 *firmware_version);
+s32 ixgbe_get_phy_firmware_version_aq(struct ixgbe_hw *hw,
+    u16 *firmware_version);
 
 s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw);
 s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw);
--- a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h	Mon Aug 17 09:14:35 2009 -0400
@@ -112,8 +112,8 @@
 
 #define	MAX_MTU				16366
 #define	MAX_RX_LIMIT_PER_INTR		4096
-#define	MAX_INTR_THROTTLING_82598	65535
-#define	MAX_INTR_THROTTLING_82599	0x7FC
+#define	MAX_INTR_THROTTLING_82598	0xFFFF
+#define	MAX_INTR_THROTTLING_82599	0xFF8
 
 #define	MAX_RX_COPY_THRESHOLD		9216
 #define	MAX_TX_COPY_THRESHOLD		9216
@@ -147,7 +147,7 @@
 #define	DEFAULT_MTU			ETHERMTU
 #define	DEFAULT_RX_LIMIT_PER_INTR	256
 #define	DEFAULT_INTR_THROTTLING_82598	200	/* In unit of 256 nsec */
-#define	DEFAULT_INTR_THROTTLING_82599	26	/* In unit of 2 usec */
+#define	DEFAULT_INTR_THROTTLING_82599	200	/* In unit of 256 nsec */
 #define	DEFAULT_RX_COPY_THRESHOLD	128
 #define	DEFAULT_TX_COPY_THRESHOLD	512
 #define	DEFAULT_TX_RECYCLE_THRESHOLD	(MAX_COOKIE + 1)
--- a/usr/src/uts/common/io/ixgbe/ixgbe_tx.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_tx.c	Mon Aug 17 09:14:35 2009 -0400
@@ -87,7 +87,7 @@
 	size_t mbsize;
 	int desc_num;
 	boolean_t copy_done, eop;
-	mblk_t *current_mp, *next_mp, *nmp;
+	mblk_t *current_mp, *next_mp, *nmp, *pull_mp = NULL;
 	tx_control_block_t *tcb;
 	ixgbe_tx_context_t tx_context, *ctx;
 	link_list_t pending_list;
@@ -400,34 +400,54 @@
 		/*
 		 * pull up the mblk and send it out with bind way
 		 */
-		if ((nmp = msgpullup(mp, -1)) == NULL) {
-			freemsg(mp);
-			return (NULL);
-		} else {
-			freemsg(mp);
-			mp = nmp;
+		if ((pull_mp = msgpullup(mp, -1)) == NULL) {
+			tx_ring->reschedule = B_TRUE;
+			return (mp);
 		}
 
 		LINK_LIST_INIT(&pending_list);
+		desc_total = 0;
+
+		/*
+		 * if the packet is a LSO packet, we simply
+		 * transmit the header in one descriptor using the copy way
+		 */
+		if ((ctx != NULL) && ctx->lso_flag) {
+			hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len +
+			    ctx->l4_hdr_len;
+
+			tcb = ixgbe_get_free_list(tx_ring);
+			if (tcb == NULL) {
+				IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb);
+				goto tx_failure;
+			}
+			desc_num = ixgbe_tx_copy(tx_ring, tcb, pull_mp,
+			    hdr_len, B_TRUE);
+			LIST_PUSH_TAIL(&pending_list, &tcb->link);
+			desc_total  += desc_num;
+
+			pull_mp->b_rptr += hdr_len;
+		}
+
 		tcb = ixgbe_get_free_list(tx_ring);
 		if (tcb == NULL) {
 			IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb);
-			freemsg(mp);
-			return (NULL);
+			goto tx_failure;
+		}
+		if ((ctx != NULL) && ctx->lso_flag) {
+			desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp,
+			    mbsize - hdr_len);
+		} else {
+			desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp,
+			    mbsize);
+		}
+		if (desc_num < 0) {
+			goto tx_failure;
 		}
 		LIST_PUSH_TAIL(&pending_list, &tcb->link);
 
-		desc_num = ixgbe_tx_bind(tx_ring, tcb, mp, mbsize);
-		if ((desc_num < 0) ||
-		    ((desc_num + 1) > IXGBE_TX_DESC_LIMIT)) {
-			ixgbe_free_tcb(tcb);
-			ixgbe_put_free_list(tx_ring, &pending_list);
-			freemsg(mp);
-			return (NULL);
-		}
-
-		desc_total = desc_num;
-		tcb->mp = mp;
+		desc_total += desc_num;
+		tcb->mp = pull_mp;
 	}
 
 	/*
@@ -462,10 +482,25 @@
 
 	mutex_exit(&tx_ring->tx_lock);
 
+	/*
+	 * now that the transmission succeeds, need to free the original
+	 * mp if we used the pulling up mblk for transmission.
+	 */
+	if (pull_mp) {
+		freemsg(mp);
+	}
+
 	return (NULL);
 
 tx_failure:
 	/*
+	 * If transmission fails, need to free the pulling up mblk.
+	 */
+	if (pull_mp) {
+		freemsg(pull_mp);
+	}
+
+	/*
 	 * Discard the mblk and free the used resources
 	 */
 	tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list);
--- a/usr/src/uts/common/io/ixgbe/ixgbe_type.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_type.h	Mon Aug 17 09:14:35 2009 -0400
@@ -26,7 +26,7 @@
  * Use is subject to license terms.
  */
 
-/* IntelVersion: 1.316 v2-7-8_2009-4-7 */
+/* IntelVersion: 1.338 v2-9-1-1_2009-6-10_NSW1 */
 
 #ifndef _IXGBE_TYPE_H
 #define	_IXGBE_TYPE_H
@@ -53,6 +53,7 @@
 #define	IXGBE_DEV_ID_82599_CX4			0x10F9
 #define	IXGBE_DEV_ID_82599_SFP			0x10FB
 #define	IXGBE_DEV_ID_82599_SPW			0x1507
+#define	IXGBE_DEV_ID_82599_XAUI_LOM		0x10FC
 
 /* General Registers */
 #define	IXGBE_CTRL	0x00000
@@ -113,7 +114,7 @@
 #define	IXGBE_MAX_EITR		0x00000FF8
 #define	IXGBE_MIN_EITR		8
 #define	IXGBE_EITR(_i)	(((_i) <= 23) ? \
-	(0x00820 + ((_i) * 4)) : (0x012300 + ((_i) * 4)))
+	(0x00820 + ((_i) * 4)) : (0x012300 + (((_i) - 24) * 4)))
 #define	IXGBE_EITR_ITR_INT_MASK	0x00000FF8
 #define	IXGBE_EITR_LLI_MOD	0x00008000
 #define	IXGBE_EITR_CNT_WDIS	0x80000000
@@ -232,10 +233,10 @@
 #define	IXGBE_L34T_IMIR(_i)	(0x0E800 + ((_i) * 4))
 				/* 128 of these (0-127) */
 #define	IXGBE_LLITHRESH	0x0EC90
-#define	IXGBE_VMD_CTL	0x0581C
 #define	IXGBE_IMIR(_i)	(0x05A80 + ((_i) * 4))  /* 8 of these (0-7) */
 #define	IXGBE_IMIREXT(_i) (0x05AA0 + ((_i) * 4))  /* 8 of these (0-7) */
 #define	IXGBE_IMIRVP	0x05AC0
+#define	IXGBE_VMD_CTL	0x0581C
 #define	IXGBE_RETA(_i)	(0x05C00 + ((_i) * 4))  /* 32 of these (0-31) */
 #define	IXGBE_RSSRK(_i)	(0x05C80 + ((_i) * 4))  /* 10 of these (0-9) */
 
@@ -279,6 +280,7 @@
 #define	IXGBE_DTXCTL	0x07E00
 
 #define	IXGBE_DMATXCTL		0x04A80
+#define	IXGBE_PFDTXGSWC		0x08220
 #define	IXGBE_DTXMXSZRQ		0x08100
 #define	IXGBE_DTXTCPFLGL	0x04A88
 #define	IXGBE_DTXTCPFLGH	0x04A8C
@@ -289,6 +291,8 @@
 #define	IXGBE_DMATXCTL_NS	0x2 /* No Snoop LSO hdr buffer */
 #define	IXGBE_DMATXCTL_GDV	0x8 /* Global Double VLAN */
 #define	IXGBE_DMATXCTL_VT_SHIFT	16  /* VLAN EtherType */
+
+#define	IXGBE_PFDTXGSWC_VT_LBEN	0x1 /* Local L2 VT switch enable */
 #define	IXGBE_DCA_TXCTRL(_i)	(0x07200 + ((_i) * 4)) /* 16 of these (0-15) */
 /* Tx DCA Control register : 128 of these (0-127) */
 #define	IXGBE_DCA_TXCTRL_82599(_i)	(0x0600C + ((_i) * 0x40))
@@ -481,6 +485,7 @@
 /* DCB registers */
 #define	IXGBE_RTRPCS		0x02430
 #define	IXGBE_RTTDCS		0x04900
+#define	IXGBE_RTTDCS_ARBDIS	0x00000040 /* DCB arbiter disable */
 #define	IXGBE_RTTPCS		0x0CD00
 #define	IXGBE_RTRUP2TC		0x03020
 #define	IXGBE_RTTUP2TC		0x0C800
@@ -681,7 +686,7 @@
 #define	IXGBE_DCA_ID	0x11070
 #define	IXGBE_DCA_CTRL	0x11074
 
-/* PCI-E registers 82599 Specific */
+/* PCI-E registers 82599-Specific */
 #define	IXGBE_GCR_EXT		0x11050
 #define	IXGBE_GSCL_5_82599	0x11030
 #define	IXGBE_GSCL_6_82599	0x11034
@@ -980,6 +985,7 @@
 #define	IXGBE_MDIO_PHY_SPEED_ABILITY	0x4 /* Speed Ability Reg */
 #define	IXGBE_MDIO_PHY_SPEED_10G	0x0001 /* 10G capable */
 #define	IXGBE_MDIO_PHY_SPEED_1G		0x0010 /* 1G capable */
+#define	IXGBE_MDIO_PHY_SPEED_100M	0x0020 /* 100M capable */
 #define	IXGBE_MDIO_PHY_EXT_ABILITY	0xB /* Ext Ability Reg */
 #define	IXGBE_MDIO_PHY_10GBASET_ABILITY	0x0004 /* 10GBaseT capable */
 #define	IXGBE_MDIO_PHY_1000BASET_ABILITY	0x0020 /* 1000BaseT capable */
@@ -1004,6 +1010,8 @@
 /* PHY IDs */
 #define	TN1010_PHY_ID		0x00A19410
 #define	TNX_FW_REV		0xB
+#define	AQ1002_PHY_ID		0x03A1B420
+#define	AQ_FW_REV		0x20
 #define	QT2022_PHY_ID		0x0043A400
 #define	ATH_PHY_ID		0x03429050
 
@@ -1093,6 +1101,8 @@
 /* VFRE bitmask */
 #define	IXGBE_VFRE_ENABLE_ALL	0xFFFFFFFF
 
+#define	IXGBE_VF_INIT_TIMEOUT	200 /* Number of retries to clear RSTI */
+
 /* RDHMPN and TDHMPN bitmasks */
 #define	IXGBE_RDHMPN_RDICADDR		0x007FF800
 #define	IXGBE_RDHMPN_RDICRDREQ		0x00800000
@@ -1318,12 +1328,10 @@
  *
  * Current filters:
  *    EAPOL 802.1x (0x888e): Filter 0
- *    BCN (0x8904):          Filter 1
  *    FCoE (0x8906):         Filter 2
  *    1588 (0x88f7):         Filter 3
  */
 #define	IXGBE_ETQF_FILTER_EAPOL	0
-#define	IXGBE_ETQF_FILTER_BCN	1
 #define	IXGBE_ETQF_FILTER_FCOE	2
 #define	IXGBE_ETQF_FILTER_1588	3
 
@@ -1349,8 +1357,10 @@
 #define	IXGBE_STATUS_LAN_ID_1	0x00000004 /* LAN ID 1 */
 
 /* ESDP Bit Masks */
-#define	IXGBE_ESDP_SDP0		0x00000001
-#define	IXGBE_ESDP_SDP1		0x00000002
+#define	IXGBE_ESDP_SDP0		0x00000001 /* SDP0 Data Value */
+#define	IXGBE_ESDP_SDP1		0x00000002 /* SDP1 Data Value */
+#define	IXGBE_ESDP_SDP2		0x00000004 /* SDP2 Data Value */
+#define	IXGBE_ESDP_SDP3		0x00000008 /* SDP3 Data Value */
 #define	IXGBE_ESDP_SDP4		0x00000010 /* SDP4 Data Value */
 #define	IXGBE_ESDP_SDP5		0x00000020 /* SDP5 Data Value */
 #define	IXGBE_ESDP_SDP6		0x00000040 /* SDP6 Data Value */
@@ -1523,6 +1533,7 @@
 #define	IXGBE_PBANUM1_PTR	0x16
 #define	IXGBE_SAN_MAC_ADDR_PTR	0x28
 #define	IXGBE_DEVICE_CAPS	0x2C
+#define	IXGBE_SERIAL_NUMBER_MAC_ADDR	0x11
 #define	IXGBE_PCIE_MSIX_82599_CAPS	0x72
 #define	IXGBE_PCIE_MSIX_82598_CAPS	0x62
 
@@ -1566,10 +1577,17 @@
 #define	IXGBE_EERD_ATTEMPTS 100000
 #endif
 
+#define	IXGBE_PCIE_CTRL2		0x5	/* PCIe Control 2 Offset */
+#define	IXGBE_PCIE_CTRL2_DUMMY_ENABLE	0x8	/* Dummy Function Enable */
+#define	IXGBE_PCIE_CTRL2_LAN_DISABLE	0x2	/* LAN PCI Disable */
+#define	IXGBE_PCIE_CTRL2_DISABLE_SELECT	0x1	/* LAN Disable Select */
+
 #define	IXGBE_SAN_MAC_ADDR_PORT0_OFFSET	0x0
 #define	IXGBE_SAN_MAC_ADDR_PORT1_OFFSET	0x3
 #define	IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP	0x1
 #define	IXGBE_DEVICE_CAPS_FCOE_OFFLOADS	0x2
+#define	IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR	0x4
+#define	IXGBE_FW_PATCH_VERSION_4	0x7
 
 /* PCI Bus Info */
 #define	IXGBE_PCI_LINK_STATUS		0xB2
@@ -1629,7 +1647,7 @@
 #define	IXGBE_MAX_FRAME_SZ	0x40040000
 
 #define	IXGBE_TDWBAL_HEAD_WB_ENABLE	0x1 /* Tx head write-back enable */
-#define	IXGBE_TDWBAL_SEQNUM_WB_ENABLE	0x2 /* Tx seq. # write-back enable */
+#define	IXGBE_TDWBAL_SEQNUM_WB_ENABLE	0x2 /* Tx seq# write-back enable */
 
 /* Receive Config masks */
 #define	IXGBE_RXCTRL_RXEN	0x00000001  /* Enable Receiver */
@@ -1770,6 +1788,7 @@
 #define	IXGBE_PSRTYPE_UDPHDR		0x00000020
 #define	IXGBE_PSRTYPE_IPV4HDR		0x00000100
 #define	IXGBE_PSRTYPE_IPV6HDR		0x00000200
+#define	IXGBE_PSRTYPE_L2HDR		0x00001000
 
 /* SRRCTL bit definitions */
 #define	IXGBE_SRRCTL_BSIZEPKT_SHIFT	10	/* so many KBs */
@@ -1857,6 +1876,12 @@
 #define	IXGBE_RX_DESC_SPECIAL_PRI_SHIFT  0x000D /* Priority in upper 3 of 16 */
 #define	IXGBE_TX_DESC_SPECIAL_PRI_SHIFT  IXGBE_RX_DESC_SPECIAL_PRI_SHIFT
 
+/* SR-IOV specific macros */
+#define	IXGBE_MBVFICR_INDEX(vf_number)	(vf_number >> 4)
+#define	IXGBE_MBVFICR(_i)		(0x00710 + (_i * 4))
+#define	IXGBE_VFLRE(_i)			(((_i & 1) ? 0x001C0 : 0x00600))
+#define	IXGBE_VFLREC(_i)		(0x00700 + (_i * 4))
+
 #ifndef	__le16
 /* Little Endian defines */
 #define	__le16	u16
@@ -1925,6 +1950,7 @@
 #define	IXGBE_FDIRHASH_BUCKET_VALID_SHIFT	15
 #define	IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT	16
 
+#define	IXGBE_FDIRCMD_CMD_MASK			0x00000003
 #define	IXGBE_FDIRCMD_CMD_ADD_FLOW		0x00000001
 #define	IXGBE_FDIRCMD_CMD_REMOVE_FLOW		0x00000002
 #define	IXGBE_FDIRCMD_CMD_QUERY_REM_FILT	0x00000003
@@ -1935,6 +1961,7 @@
 #define	IXGBE_FDIRCMD_L4TYPE_TCP		0x00000040
 #define	IXGBE_FDIRCMD_L4TYPE_SCTP		0x00000060
 #define	IXGBE_FDIRCMD_IPV6			0x00000080
+#define	IXGBE_FDIRCMD_CLEARHT			0x00000100
 #define	IXGBE_FDIRCMD_DROP			0x00000200
 #define	IXGBE_FDIRCMD_INT			0x00000400
 #define	IXGBE_FDIRCMD_LAST			0x00000800
@@ -1942,6 +1969,8 @@
 #define	IXGBE_FDIRCMD_QUEUE_EN			0x00008000
 #define	IXGBE_FDIRCMD_RX_QUEUE_SHIFT		16
 #define	IXGBE_FDIRCMD_VT_POOL_SHIFT		24
+#define	IXGBE_FDIR_INIT_DONE_POLL		10
+#define	IXGBE_FDIRCMD_CMD_POLL			10
 
 /* Transmit Descriptor - Legacy */
 struct ixgbe_legacy_tx_desc {
@@ -2113,6 +2142,7 @@
 #define	IXGBE_PHYSICAL_LAYER_1000BASE_KX	0x0200
 #define	IXGBE_PHYSICAL_LAYER_1000BASE_BX	0x0400
 #define	IXGBE_PHYSICAL_LAYER_10GBASE_KR		0x0800
+#define	IXGBE_PHYSICAL_LAYER_10GBASE_XAUI	0x1000
 
 /* Software ATR hash keys */
 #define	IXGBE_ATR_BUCKET_HASH_KEY	0xE214AD3D
@@ -2171,8 +2201,9 @@
 	ixgbe_phy_unknown = 0,
 	ixgbe_phy_none,
 	ixgbe_phy_tn,
+	ixgbe_phy_aq,
+	ixgbe_phy_cu_unknown,
 	ixgbe_phy_qt,
-	ixgbe_phy_cu_unknown,
 	ixgbe_phy_xaui,
 	ixgbe_phy_nl,
 	ixgbe_phy_tw_tyco,
@@ -2400,6 +2431,8 @@
 	s32 (*write_analog_reg8)(struct ixgbe_hw *, u32, u8);
 	s32 (*setup_sfp)(struct ixgbe_hw *);
 	s32 (*enable_rx_dma)(struct ixgbe_hw *, u32);
+	s32 (*acquire_swfw_sync)(struct ixgbe_hw *, u16);
+	void (*release_swfw_sync)(struct ixgbe_hw *, u16);
 
 	/* Link */
 	s32 (*setup_link)(struct ixgbe_hw *);
@@ -2483,6 +2516,7 @@
 	bool				orig_link_settings_stored;
 	bool				autoneg;
 	bool				autoneg_succeeded;
+	bool				autotry_restart;
 };
 
 struct ixgbe_phy_info {
@@ -2544,10 +2578,12 @@
 #define	IXGBE_ERR_SFP_NOT_PRESENT		-20
 #define	IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT	-21
 #define	IXGBE_ERR_NO_SAN_ADDR_PTR		-22
+#define	IXGBE_ERR_FDIR_REINIT_FAILED		-23
+#define	IXGBE_ERR_EEPROM_VERSION		-24
 #define	IXGBE_NOT_IMPLEMENTED			0x7FFFFFFF
 
 #ifndef UNREFERENCED_PARAMETER
-#define	UNREFERENCED_PARAMETER(_p)
+#define	UNREFERENCED_PARAMETER(_p)		(_p);
 #endif
 
 #endif /* _IXGBE_TYPE_H */
--- a/usr/src/uts/common/io/mac/mac.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/mac/mac.c	Mon Aug 17 09:14:35 2009 -0400
@@ -1426,35 +1426,54 @@
  */
 int
 mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh,
-    mac_ring_handle_t *hwrh)
+    mac_ring_handle_t *hwrh, mac_ring_type_t rtype)
 {
 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
-	flow_entry_t		*flent = mcip->mci_flent;
-	mac_group_t		*grp = flent->fe_rx_ring_group;
-	mac_ring_t		*ring;
 	int			cnt = 0;
 
-	/*
-	 * The mac client did not reserve any RX group, return directly.
-	 * This is probably because the underlying MAC does not support
-	 * any RX groups.
-	 */
-	*hwgh = NULL;
-	if (grp == NULL)
-		return (0);
-
-	/*
-	 * This RX group must be reserved by this mac client.
-	 */
-	ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) &&
-	    (mch == (mac_client_handle_t)(MAC_RX_GROUP_ONLY_CLIENT(grp))));
-
-	for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) {
-		ASSERT(cnt < MAX_RINGS_PER_GROUP);
-		hwrh[cnt++] = (mac_ring_handle_t)ring;
+	switch (rtype) {
+	case MAC_RING_TYPE_RX: {
+		flow_entry_t	*flent = mcip->mci_flent;
+		mac_group_t	*grp;
+		mac_ring_t	*ring;
+
+		grp = flent->fe_rx_ring_group;
+		/*
+		 * The mac client did not reserve any RX group, return directly.
+		 * This is probably because the underlying MAC does not support
+		 * any groups.
+		 */
+		*hwgh = NULL;
+		if (grp == NULL)
+			return (0);
+		/*
+		 * This group must be reserved by this mac client.
+		 */
+		ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) &&
+		    (mch == (mac_client_handle_t)
+		    (MAC_RX_GROUP_ONLY_CLIENT(grp))));
+		for (ring = grp->mrg_rings;
+		    ring != NULL; ring = ring->mr_next, cnt++) {
+			ASSERT(cnt < MAX_RINGS_PER_GROUP);
+			hwrh[cnt] = (mac_ring_handle_t)ring;
+		}
+		*hwgh = (mac_group_handle_t)grp;
+		return (cnt);
 	}
-	*hwgh = (mac_group_handle_t)grp;
-	return (cnt);
+	case MAC_RING_TYPE_TX: {
+		mac_soft_ring_set_t	*tx_srs;
+		mac_srs_tx_t		*tx;
+
+		tx_srs = MCIP_TX_SRS(mcip);
+		tx = &tx_srs->srs_tx;
+		for (; cnt < tx->st_ring_count; cnt++)
+			hwrh[cnt] = tx->st_rings[cnt];
+		return (cnt);
+	}
+	default:
+		ASSERT(B_FALSE);
+		return (-1);
+	}
 }
 
 /*
@@ -1524,6 +1543,22 @@
 	return (info->mri_poll(info->mri_driver, bytes_to_pickup));
 }
 
+/*
+ * Send packets through the selected tx ring.
+ */
+mblk_t *
+mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp)
+{
+	mac_ring_t *ring = (mac_ring_t *)rh;
+	mac_ring_info_t *info = &ring->mr_info;
+
+	ASSERT(ring->mr_type == MAC_RING_TYPE_TX);
+	ASSERT(ring->mr_state >= MR_INUSE);
+	ASSERT(info->mri_tx != NULL);
+
+	return (info->mri_tx(info->mri_driver, mp));
+}
+
 int
 mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr)
 {
@@ -3429,22 +3464,6 @@
 }
 
 /*
- * Send packets through a selected tx ring.
- */
-mblk_t *
-mac_ring_tx(mac_ring_handle_t rh, mblk_t *mp)
-{
-	mac_ring_t *ring = (mac_ring_t *)rh;
-	mac_ring_info_t *info = &ring->mr_info;
-
-	ASSERT(ring->mr_type == MAC_RING_TYPE_TX);
-	ASSERT(ring->mr_state >= MR_INUSE);
-	ASSERT(info->mri_tx != NULL);
-
-	return (info->mri_tx(info->mri_driver, mp));
-}
-
-/*
  * Find a ring from its index.
  */
 mac_ring_t *
--- a/usr/src/uts/common/io/mac/mac_datapath_setup.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c	Mon Aug 17 09:14:35 2009 -0400
@@ -2235,6 +2235,10 @@
 			    tx->st_group);
 			tx->st_group = NULL;
 		}
+		if (tx->st_ring_count != 0) {
+			kmem_free(tx->st_rings,
+			    sizeof (mac_ring_handle_t) * tx->st_ring_count);
+		}
 		if (tx->st_arg2 != NULL) {
 			ASSERT(tx_srs->srs_type & SRST_TX);
 			mac_release_tx_ring(tx->st_arg2);
@@ -3203,7 +3207,7 @@
 	mac_impl_t *mip = mcip->mci_mip;
 	mac_soft_ring_set_t *tx_srs;
 	int i, tx_ring_count = 0, tx_rings_reserved = 0;
-	mac_ring_handle_t *tx_ring = NULL;
+	mac_ring_handle_t *tx_rings = NULL;
 	uint32_t soft_ring_type;
 	mac_group_t *grp = NULL;
 	mac_ring_t *ring;
@@ -3221,7 +3225,7 @@
 	}
 
 	if (tx_ring_count != 0) {
-		tx_ring = kmem_zalloc(sizeof (mac_ring_handle_t) *
+		tx_rings = kmem_zalloc(sizeof (mac_ring_handle_t) *
 		    tx_ring_count, KM_SLEEP);
 	}
 
@@ -3231,8 +3235,12 @@
 	 * NIC's.
 	 */
 	if (srs_type == SRST_FLOW ||
-	    (mcip->mci_state_flags & MCIS_NO_HWRINGS) != 0)
-		goto use_default_ring;
+	    (mcip->mci_state_flags & MCIS_NO_HWRINGS) != 0) {
+		/* use default ring */
+		tx_rings[0] = (void *)mip->mi_default_tx_ring;
+		tx_rings_reserved++;
+		goto rings_assigned;
+	}
 
 	if (mcip->mci_share != NULL)
 		ring = grp->mrg_rings;
@@ -3245,8 +3253,7 @@
 	 * then each Tx ring will have a Tx-side soft ring. All
 	 * these soft rings will be hang off Tx SRS.
 	 */
-	for (i = 0, tx_rings_reserved = 0;
-	    i < tx_ring_count; i++, tx_rings_reserved++) {
+	for (i = 0; i < tx_ring_count; i++) {
 		if (mcip->mci_share != NULL) {
 			/*
 			 * The ring was already chosen and associated
@@ -3255,42 +3262,39 @@
 			 * between the share and non-share cases.
 			 */
 			ASSERT(ring != NULL);
-			tx_ring[i] = (mac_ring_handle_t)ring;
+			tx_rings[i] = (mac_ring_handle_t)ring;
 			ring = ring->mr_next;
 		} else {
-			tx_ring[i] =
+			tx_rings[i] =
 			    (mac_ring_handle_t)mac_reserve_tx_ring(mip, NULL);
-			if (tx_ring[i] == NULL)
+			if (tx_rings[i] == NULL) {
+				/*
+				 * We have run out of Tx rings. So
+				 * give the default ring too.
+				 */
+				tx_rings[i] = (void *)mip->mi_default_tx_ring;
+				tx_rings_reserved++;
 				break;
+			}
 		}
+		tx_rings_reserved++;
 	}
+
+rings_assigned:
 	if (mac_tx_serialize || (mip->mi_v12n_level & MAC_VIRT_SERIALIZE))
 		serialize = B_TRUE;
 	/*
 	 * Did we get the requested number of tx rings?
-	 * There are 3 actions we can take depending upon the number
+	 * There are 2 actions we can take depending upon the number
 	 * of tx_rings we got.
-	 * 1) If we got none, then hook up the tx_srs with the
-	 * default ring.
-	 * 2) If we got one, then get the tx_ring from the soft ring,
+	 * 1) If we got one, then get the tx_ring from the soft ring,
 	 * save it in SRS and free up the soft ring.
-	 * 3) If we got more than 1, then do the tx fanout among the
+	 * 2) If we got more than 1, then do the tx fanout among the
 	 * rings we obtained.
 	 */
-	switch (tx_rings_reserved) {
-	case 1:
-		/*
-		 * No need to allocate Tx soft rings. Tx-side soft
-		 * rings are for Tx fanout case. Just use Tx SRS.
-		 */
-		/* FALLTHRU */
-
-	case 0:
-use_default_ring:
-		if (tx_rings_reserved == 0)
-			tx->st_arg2 = (void *)mip->mi_default_tx_ring;
-		else
-			tx->st_arg2 = (void *)tx_ring[0];
+	ASSERT(tx_rings_reserved != 0);
+	if (tx_rings_reserved == 1) {
+		tx->st_arg2 = (void *)tx_rings[0];
 		/* For ring_count of 0 or 1, set the tx_mode and return */
 		if (tx_srs->srs_type & SRST_BW_CONTROL)
 			tx->st_mode = SRS_TX_BW;
@@ -3298,18 +3302,9 @@
 			tx->st_mode = SRS_TX_SERIALIZE;
 		else
 			tx->st_mode = SRS_TX_DEFAULT;
-		break;
-
-	default:
+	} else {
 		/*
 		 * We got multiple Tx rings for Tx fanout.
-		 *
-		 * cpuid of -1 is passed. This creates an unbound
-		 * worker thread. Instead the code should get CPU
-		 * binding information and pass that to
-		 * mac_soft_ring_create(). This needs to be done
-		 * in conjunction with Rx-side soft ring
-		 * bindings.
 		 */
 		soft_ring_type = ST_RING_OTH | ST_RING_TX;
 		if (tx_srs->srs_type & SRST_BW_CONTROL) {
@@ -3322,7 +3317,7 @@
 		for (i = 0; i < tx_rings_reserved; i++) {
 			(void) mac_soft_ring_create(i, 0, NULL, soft_ring_type,
 			    maxclsyspri, mcip, tx_srs, -1, NULL, mcip,
-			    (mac_resource_handle_t)tx_ring[i]);
+			    (mac_resource_handle_t)tx_rings[i]);
 		}
 		mac_srs_update_fanout_list(tx_srs);
 	}
@@ -3332,8 +3327,12 @@
 	    int, tx->st_mode, int, tx_srs->srs_oth_ring_count);
 
 	if (tx_ring_count != 0) {
-		kmem_free(tx_ring,
-		    sizeof (mac_ring_handle_t) * tx_ring_count);
+		tx->st_ring_count = tx_rings_reserved;
+		tx->st_rings = kmem_zalloc(sizeof (mac_ring_handle_t) *
+		    tx_rings_reserved, KM_SLEEP);
+		for (i = 0; i < tx->st_ring_count; i++)
+			tx->st_rings[i] = tx_rings[i];
+		kmem_free(tx_rings, sizeof (mac_ring_handle_t) * tx_ring_count);
 	}
 }
 
--- a/usr/src/uts/common/io/mii/mii.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/mii/mii.c	Mon Aug 17 09:14:35 2009 -0400
@@ -1278,8 +1278,8 @@
 	ph->phy_speed = 0;
 
 	phy_write(ph, MII_AN_ADVERT, anar);
-	phy_write(ph, MII_CONTROL,
-	    bmcr & ~(MII_CONTROL_ANE|MII_CONTROL_RSAN|MII_CONTROL_LOOPBACK));
+	phy_write(ph, MII_CONTROL, bmcr & ~(MII_CONTROL_RSAN));
+
 	switch (ph->phy_type) {
 	case XCVR_1000T:
 	case XCVR_1000X:
--- a/usr/src/uts/common/io/nxge/nxge_hio.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/nxge/nxge_hio.c	Mon Aug 17 09:14:35 2009 -0400
@@ -41,9 +41,6 @@
 #include <sys/nxge/nxge_txdma.h>
 #include <sys/nxge/nxge_hio.h>
 
-#define	NXGE_HIO_SHARE_MIN_CHANNELS 2
-#define	NXGE_HIO_SHARE_MAX_CHANNELS 2
-
 /*
  * External prototypes
  */
@@ -1057,23 +1054,6 @@
 		NXGE_DEBUG_MSG((nxge, HIO_CTL,
 		    "Hybrid IO-capable service domain"));
 		return (NXGE_OK);
-	} else {
-		/*
-		 * isLDOMguest(nxge) == B_TRUE
-		 */
-		nx_vio_fp_t *vio;
-		nhd->type = NXGE_HIO_TYPE_GUEST;
-
-		vio = &nhd->hio.vio;
-		vio->__register = (vio_net_resource_reg_t)
-		    modgetsymvalue("vio_net_resource_reg", 0);
-		vio->unregister = (vio_net_resource_unreg_t)
-		    modgetsymvalue("vio_net_resource_unreg", 0);
-
-		if (vio->__register == 0 || vio->unregister == 0) {
-			NXGE_ERROR_MSG((nxge, VIR_CTL, "vio_net is absent!"));
-			return (NXGE_ERROR);
-		}
 	}
 
 	return (0);
@@ -1144,12 +1124,16 @@
 static int
 nxge_hio_add_mac(void *arg, const uint8_t *mac_addr)
 {
-	nxge_ring_group_t *group = (nxge_ring_group_t *)arg;
-	p_nxge_t nxge = group->nxgep;
-	int rv;
-	nxge_hio_vr_t *vr;	/* The Virtualization Region */
+	nxge_ring_group_t	*group = (nxge_ring_group_t *)arg;
+	p_nxge_t		nxge = group->nxgep;
+	int			rv;
+	nxge_hio_vr_t		*vr;	/* The Virtualization Region */
 
 	ASSERT(group->type == MAC_RING_TYPE_RX);
+	ASSERT(group->nxgep != NULL);
+
+	if (isLDOMguest(group->nxgep))
+		return (0);
 
 	mutex_enter(nxge->genlock);
 
@@ -1174,8 +1158,7 @@
 	/*
 	 * Program the mac address for the group.
 	 */
-	if ((rv = nxge_hio_group_mac_add(nxge, group,
-	    mac_addr)) != 0) {
+	if ((rv = nxge_hio_group_mac_add(nxge, group, mac_addr)) != 0) {
 		return (rv);
 	}
 
@@ -1206,6 +1189,10 @@
 	int rv, slot;
 
 	ASSERT(group->type == MAC_RING_TYPE_RX);
+	ASSERT(group->nxgep != NULL);
+
+	if (isLDOMguest(group->nxgep))
+		return (0);
 
 	mutex_enter(nxge->genlock);
 
@@ -1253,14 +1240,16 @@
 	int			dev_gindex;
 
 	ASSERT(group->type == MAC_RING_TYPE_RX);
+	ASSERT(group->nxgep != NULL);
 
-#ifdef later
 	ASSERT(group->nxgep->nxge_mac_state == NXGE_MAC_STARTED);
-#endif
 	if (group->nxgep->nxge_mac_state != NXGE_MAC_STARTED)
 		return (ENXIO);
 
 	mutex_enter(group->nxgep->genlock);
+	if (isLDOMguest(group->nxgep))
+		goto nxge_hio_group_start_exit;
+
 	dev_gindex = group->nxgep->pt_config.hw_config.def_mac_rxdma_grpid +
 	    group->gindex;
 	rdc_grp_p = &group->nxgep->pt_config.rdc_grps[dev_gindex];
@@ -1289,9 +1278,9 @@
 
 	(void) nxge_init_fzc_rdc_tbl(group->nxgep, rdc_grp_p, rdctbl);
 
+nxge_hio_group_start_exit:
 	group->started = B_TRUE;
 	mutex_exit(group->nxgep->genlock);
-
 	return (0);
 }
 
@@ -1305,6 +1294,9 @@
 	mutex_enter(group->nxgep->genlock);
 	group->started = B_FALSE;
 
+	if (isLDOMguest(group->nxgep))
+		goto nxge_hio_group_stop_exit;
+
 	/*
 	 * Unbind the RDC table previously bound for this group.
 	 *
@@ -1314,6 +1306,7 @@
 	if (group->gindex != 0)
 		(void) nxge_fzc_rdc_tbl_unbind(group->nxgep, group->rdctbl);
 
+nxge_hio_group_stop_exit:
 	mutex_exit(group->nxgep->genlock);
 }
 
@@ -1334,20 +1327,26 @@
 		group->gindex = groupid;
 		group->sindex = 0;	/* not yet bound to a share */
 
-		dev_gindex = nxgep->pt_config.hw_config.def_mac_rxdma_grpid +
-		    groupid;
+		if (!isLDOMguest(nxgep)) {
+			dev_gindex =
+			    nxgep->pt_config.hw_config.def_mac_rxdma_grpid +
+			    groupid;
 
-		if (nxgep->pt_config.hw_config.def_mac_rxdma_grpid ==
-		    dev_gindex)
-			group->port_default_grp = B_TRUE;
+			if (nxgep->pt_config.hw_config.def_mac_rxdma_grpid ==
+			    dev_gindex)
+				group->port_default_grp = B_TRUE;
+
+			infop->mgi_count =
+			    nxgep->pt_config.rdc_grps[dev_gindex].max_rdcs;
+		} else {
+			infop->mgi_count = NXGE_HIO_SHARE_MAX_CHANNELS;
+		}
 
 		infop->mgi_driver = (mac_group_driver_t)group;
 		infop->mgi_start = nxge_hio_group_start;
 		infop->mgi_stop = nxge_hio_group_stop;
 		infop->mgi_addmac = nxge_hio_add_mac;
 		infop->mgi_remmac = nxge_hio_rem_mac;
-		infop->mgi_count =
-		    nxgep->pt_config.rdc_grps[dev_gindex].max_rdcs;
 		break;
 
 	case MAC_RING_TYPE_TX:
--- a/usr/src/uts/common/io/nxge/nxge_hio_guest.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/nxge/nxge_hio_guest.c	Mon Aug 17 09:14:35 2009 -0400
@@ -35,46 +35,9 @@
 #include <sys/nxge/nxge_fzc.h>
 #include <sys/nxge/nxge_rxdma.h>
 #include <sys/nxge/nxge_txdma.h>
-
 #include <sys/nxge/nxge_hio.h>
 
 /*
- * nxge_hio_unregister
- *
- *	Unregister with the VNET module.
- *
- * Arguments:
- * 	nxge
- *
- * Notes:
- *	We must uninitialize all DMA channels associated with the VR, too.
- *
- *	We're assuming that the channels will be disabled & unassigned
- *	in the service domain, after we're done here.
- *
- * Context:
- *	Guest domain
- */
-void
-nxge_hio_unregister(
-	nxge_t *nxge)
-{
-	nxge_hio_data_t *nhd = (nxge_hio_data_t *)nxge->nxge_hw_p->hio;
-
-	if (nhd == 0) {
-		return;
-	}
-
-#if defined(sun4v)
-	/* Unregister with vNet. */
-	if (nhd->hio.vio.unregister) {
-		if (nxge->hio_vr)
-			(*nhd->hio.vio.unregister)(nxge->hio_vr->vhp);
-	}
-#endif
-}
-
-/*
  * nxge_guest_regs_map
  *
  *	Map in a guest domain's register set(s).
@@ -95,8 +58,7 @@
 };
 
 int
-nxge_guest_regs_map(
-	nxge_t *nxge)
+nxge_guest_regs_map(nxge_t *nxge)
 {
 	dev_regs_t 	*regs;
 	off_t		regsize;
@@ -211,31 +173,22 @@
 int
 nxge_hio_vr_add(nxge_t *nxge)
 {
-	extern mac_callbacks_t nxge_m_callbacks;
-
-	nxge_hio_data_t *nhd = (nxge_hio_data_t *)nxge->nxge_hw_p->hio;
-	nxge_hio_vr_t *vr;
-	nxge_hio_dc_t *dc;
-
-	int *reg_val;
-	uint_t reg_len;
-	uint8_t vr_index;
+	extern nxge_status_t	nxge_mac_register(p_nxge_t);
 
-	nxhv_vr_fp_t *fp;
-	uint64_t vr_address, vr_size;
-	uint32_t cookie;
-
-	nxhv_dc_fp_t *tx, *rx;
-	uint64_t tx_map, rx_map;
-
-	uint64_t hv_rv;
-
-	/* Variables needed to register with vnet. */
-	mac_register_t *mac_info;
-	ether_addr_t mac_addr;
-	nx_vio_fp_t *vio;
-
-	int i;
+	nxge_hio_data_t		*nhd = (nxge_hio_data_t *)nxge->nxge_hw_p->hio;
+	nxge_hio_vr_t		*vr;
+	nxge_hio_dc_t		*dc;
+	int			*reg_val;
+	uint_t			reg_len;
+	uint8_t			vr_index;
+	nxhv_vr_fp_t		*fp;
+	uint64_t		vr_address, vr_size;
+	uint32_t		cookie;
+	nxhv_dc_fp_t		*tx, *rx;
+	uint64_t		tx_map, rx_map;
+	uint64_t		hv_rv;
+	int			i;
+	nxge_status_t		status;
 
 	NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_vr_add"));
 
@@ -384,40 +337,13 @@
 		}
 	}
 
-	/*
-	 * Register with vnet.
-	 */
-	if ((mac_info = mac_alloc(MAC_VERSION)) == NULL)
-		return (NXGE_ERROR);
-
-	mac_info->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
-	mac_info->m_driver = nxge;
-	mac_info->m_dip = nxge->dip;
-	mac_info->m_src_addr = KMEM_ZALLOC(MAXMACADDRLEN, KM_SLEEP);
-	mac_info->m_dst_addr = KMEM_ZALLOC(MAXMACADDRLEN, KM_SLEEP);
-	(void) memset(mac_info->m_src_addr, 0xff, sizeof (MAXMACADDRLEN));
-	mac_info->m_callbacks = &nxge_m_callbacks;
-	mac_info->m_min_sdu = 0;
-	mac_info->m_max_sdu = NXGE_MTU_DEFAULT_MAX -
-	    sizeof (struct ether_header) - ETHERFCSL - 4;
-
-	(void) memset(&mac_addr, 0xff, sizeof (mac_addr));
-
-	/* Register with vio_net. */
-	vio = &nhd->hio.vio;
-	if ((*vio->__register)(mac_info, VIO_NET_RES_HYBRID,
-	    nxge->hio_mac_addr, mac_addr, &vr->vhp, &vio->cb)) {
-		NXGE_DEBUG_MSG((nxge, HIO_CTL, "HIO registration() failed"));
-		KMEM_FREE(mac_info->m_src_addr, MAXMACADDRLEN);
-		KMEM_FREE(mac_info->m_dst_addr, MAXMACADDRLEN);
-		mac_free(mac_info);
-		return (NXGE_ERROR);
+	status = nxge_mac_register(nxge);
+	if (status != NXGE_OK) {
+		cmn_err(CE_WARN, "nxge(%d): nxge_mac_register failed\n",
+		    nxge->instance);
+		return (status);
 	}
 
-	KMEM_FREE(mac_info->m_src_addr, MAXMACADDRLEN);
-	KMEM_FREE(mac_info->m_dst_addr, MAXMACADDRLEN);
-	mac_free(mac_info);
-
 	nxge->hio_vr = vr;	/* For faster lookups. */
 
 	NXGE_DEBUG_MSG((nxge, HIO_CTL, "<== nxge_hio_vr_add"));
--- a/usr/src/uts/common/io/nxge/nxge_main.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/nxge/nxge_main.c	Mon Aug 17 09:14:35 2009 -0400
@@ -272,14 +272,11 @@
 static int nxge_m_multicst(void *, boolean_t, const uint8_t *);
 static int nxge_m_promisc(void *, boolean_t);
 static void nxge_m_ioctl(void *, queue_t *, mblk_t *);
-static nxge_status_t nxge_mac_register(p_nxge_t);
+nxge_status_t nxge_mac_register(p_nxge_t);
 static int nxge_altmac_set(p_nxge_t nxgep, uint8_t *mac_addr,
 	int slot, int rdctbl, boolean_t usetbl);
 void nxge_mmac_kstat_update(p_nxge_t nxgep, int slot,
 	boolean_t factory);
-#if defined(sun4v)
-extern mblk_t *nxge_m_tx(void *arg, mblk_t *mp);
-#endif
 
 static void nxge_m_getfactaddr(void *, uint_t, uint8_t *);
 static	boolean_t nxge_m_getcapab(void *, mac_capab_t, void *);
@@ -630,11 +627,6 @@
 	if (nxgep->niu_type != N2_NIU) {
 		nxge_set_pci_replay_timeout(nxgep);
 	}
-#if defined(sun4v)
-	if (isLDOMguest(nxgep)) {
-		nxge_m_callbacks.mc_tx = nxge_m_tx;
-	}
-#endif
 
 #if defined(sun4v)
 	/* This is required by nxge_hio_init(), which follows. */
@@ -961,11 +953,7 @@
 
 	(void) nxge_link_monitor(nxgep, LINK_MONITOR_STOP);
 
-	if (isLDOMguest(nxgep)) {
-		if (nxgep->nxge_mac_state == NXGE_MAC_STARTED)
-			nxge_m_stop((void *)nxgep);
-		nxge_hio_unregister(nxgep);
-	} else if (nxgep->mach && (status = mac_unregister(nxgep->mach)) != 0) {
+	if (nxgep->mach && (status = mac_unregister(nxgep->mach)) != 0) {
 		NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL,
 		    "<== nxge_detach status = 0x%08X", status));
 		return (DDI_FAILURE);
@@ -4294,10 +4282,13 @@
 	case MAC_CAPAB_MULTIFACTADDR: {
 		mac_capab_multifactaddr_t	*mfacp = cap_data;
 
-		mutex_enter(nxgep->genlock);
-		mfacp->mcm_naddr = nxgep->nxge_mmac_info.num_factory_mmac;
-		mfacp->mcm_getaddr = nxge_m_getfactaddr;
-		mutex_exit(nxgep->genlock);
+		if (!isLDOMguest(nxgep)) {
+			mutex_enter(nxgep->genlock);
+			mfacp->mcm_naddr =
+			    nxgep->nxge_mmac_info.num_factory_mmac;
+			mfacp->mcm_getaddr = nxge_m_getfactaddr;
+			mutex_exit(nxgep->genlock);
+		}
 		break;
 	}
 
@@ -4325,34 +4316,68 @@
 
 		mutex_enter(nxgep->genlock);
 		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
-			cap_rings->mr_group_type = MAC_GROUP_TYPE_DYNAMIC;
-			cap_rings->mr_rnum = p_cfgp->max_rdcs;
-			cap_rings->mr_rget = nxge_fill_ring;
-			cap_rings->mr_gnum = p_cfgp->max_rdc_grpids;
-			cap_rings->mr_gget = nxge_hio_group_get;
-			cap_rings->mr_gaddring = nxge_group_add_ring;
-			cap_rings->mr_gremring = nxge_group_rem_ring;
+			if (isLDOMguest(nxgep))  {
+				cap_rings->mr_group_type =
+				    MAC_GROUP_TYPE_STATIC;
+				cap_rings->mr_rnum =
+				    NXGE_HIO_SHARE_MAX_CHANNELS;
+				cap_rings->mr_rget = nxge_fill_ring;
+				cap_rings->mr_gnum = 1;
+				cap_rings->mr_gget = nxge_hio_group_get;
+				cap_rings->mr_gaddring = NULL;
+				cap_rings->mr_gremring = NULL;
+			} else {
+				/*
+				 * Service Domain.
+				 */
+				cap_rings->mr_group_type =
+				    MAC_GROUP_TYPE_DYNAMIC;
+				cap_rings->mr_rnum = p_cfgp->max_rdcs;
+				cap_rings->mr_rget = nxge_fill_ring;
+				cap_rings->mr_gnum = p_cfgp->max_rdc_grpids;
+				cap_rings->mr_gget = nxge_hio_group_get;
+				cap_rings->mr_gaddring = nxge_group_add_ring;
+				cap_rings->mr_gremring = nxge_group_rem_ring;
+			}
 
 			NXGE_DEBUG_MSG((nxgep, RX_CTL,
 			    "==> nxge_m_getcapab: rx nrings[%d] ngroups[%d]",
 			    p_cfgp->max_rdcs, p_cfgp->max_rdc_grpids));
 		} else {
-			cap_rings->mr_group_type = MAC_GROUP_TYPE_DYNAMIC;
-			cap_rings->mr_rnum = p_cfgp->tdc.count;
-			cap_rings->mr_rget = nxge_fill_ring;
-			if (isLDOMservice(nxgep)) {
-				/* share capable */
-				/* Do not report the default ring: hence -1 */
+			/*
+			 * TX Rings.
+			 */
+			if (isLDOMguest(nxgep)) {
+				cap_rings->mr_group_type =
+				    MAC_GROUP_TYPE_STATIC;
+				cap_rings->mr_rnum =
+				    NXGE_HIO_SHARE_MAX_CHANNELS;
+				cap_rings->mr_rget = nxge_fill_ring;
+				cap_rings->mr_gnum = 0;
+				cap_rings->mr_gget = NULL;
+				cap_rings->mr_gaddring = NULL;
+				cap_rings->mr_gremring = NULL;
+			} else {
+				/*
+				 * Service Domain.
+				 */
+				cap_rings->mr_group_type =
+				    MAC_GROUP_TYPE_DYNAMIC;
+				cap_rings->mr_rnum = p_cfgp->tdc.count;
+				cap_rings->mr_rget = nxge_fill_ring;
+
+				/*
+				 * Share capable.
+				 *
+				 * Do not report the default group: hence -1
+				 */
 				cap_rings->mr_gnum =
 				    NXGE_MAX_TDC_GROUPS / nxgep->nports - 1;
-			} else {
-				cap_rings->mr_gnum = 0;
+				cap_rings->mr_gget = nxge_hio_group_get;
+				cap_rings->mr_gaddring = nxge_group_add_ring;
+				cap_rings->mr_gremring = nxge_group_rem_ring;
 			}
 
-			cap_rings->mr_gget = nxge_hio_group_get;
-			cap_rings->mr_gaddring = nxge_group_add_ring;
-			cap_rings->mr_gremring = nxge_group_rem_ring;
-
 			NXGE_DEBUG_MSG((nxgep, TX_CTL,
 			    "==> nxge_m_getcapab: tx rings # of rings %d",
 			    p_cfgp->tdc.count));
@@ -6372,7 +6397,7 @@
 	NXGE_DEBUG_MSG((nxgep, INT_CTL, "<== nxge_intrs_disable"));
 }
 
-static nxge_status_t
+nxge_status_t
 nxge_mac_register(p_nxge_t nxgep)
 {
 	mac_register_t *macp;
@@ -6386,7 +6411,13 @@
 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
 	macp->m_driver = nxgep;
 	macp->m_dip = nxgep->dip;
-	macp->m_src_addr = nxgep->ouraddr.ether_addr_octet;
+	if (!isLDOMguest(nxgep)) {
+		macp->m_src_addr = nxgep->ouraddr.ether_addr_octet;
+	} else {
+		macp->m_src_addr = KMEM_ZALLOC(MAXMACADDRLEN, KM_SLEEP);
+		macp->m_dst_addr = KMEM_ZALLOC(MAXMACADDRLEN, KM_SLEEP);
+		(void) memset(macp->m_src_addr, 0xff, sizeof (MAXMACADDRLEN));
+	}
 	macp->m_callbacks = &nxge_m_callbacks;
 	macp->m_min_sdu = 0;
 	nxgep->mac.default_mtu = nxgep->mac.maxframesize -
@@ -6395,7 +6426,12 @@
 	macp->m_margin = VLAN_TAGSZ;
 	macp->m_priv_props = nxge_priv_props;
 	macp->m_priv_prop_count = NXGE_MAX_PRIV_PROPS;
-	macp->m_v12n = MAC_VIRT_HIO | MAC_VIRT_LEVEL1 | MAC_VIRT_SERIALIZE;
+	if (isLDOMguest(nxgep)) {
+		macp->m_v12n = MAC_VIRT_LEVEL1 | MAC_VIRT_SERIALIZE;
+	} else {
+		macp->m_v12n = MAC_VIRT_HIO | MAC_VIRT_LEVEL1 | \
+		    MAC_VIRT_SERIALIZE;
+	}
 
 	NXGE_DEBUG_MSG((nxgep, MAC_CTL,
 	    "==> nxge_mac_register: instance %d "
@@ -6406,6 +6442,10 @@
 	    NXGE_EHEADER_VLAN_CRC));
 
 	status = mac_register(macp, &nxgep->mach);
+	if (isLDOMguest(nxgep)) {
+		KMEM_FREE(macp->m_src_addr, MAXMACADDRLEN);
+		KMEM_FREE(macp->m_dst_addr, MAXMACADDRLEN);
+	}
 	mac_free(macp);
 
 	if (status != 0) {
--- a/usr/src/uts/common/io/nxge/nxge_rxdma.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/nxge/nxge_rxdma.c	Mon Aug 17 09:14:35 2009 -0400
@@ -1756,7 +1756,7 @@
 	uint8_t			channel;
 	npi_handle_t		handle;
 	rx_dma_ctl_stat_t	cs;
-	p_rx_rcr_ring_t		rcr_ring;
+	p_rx_rcr_ring_t		rcrp;
 	mblk_t			*mp = NULL;
 
 	if (ldvp == NULL) {
@@ -1789,7 +1789,7 @@
 	/*
 	 * Get the ring to enable us to process packets.
 	 */
-	rcr_ring = nxgep->rx_rcr_rings->rcr_rings[ldvp->vdma_index];
+	rcrp = nxgep->rx_rcr_rings->rcr_rings[ldvp->vdma_index];
 
 	/*
 	 * The RCR ring lock must be held when packets
@@ -1799,7 +1799,7 @@
 	 * (will cause fatal errors such as rcrincon bit set)
 	 * and the setting of the poll_flag.
 	 */
-	MUTEX_ENTER(&rcr_ring->lock);
+	MUTEX_ENTER(&rcrp->lock);
 
 	/*
 	 * Get the control and status for this channel.
@@ -1840,12 +1840,12 @@
 				    mgm.value);
 			}
 		}
-		MUTEX_EXIT(&rcr_ring->lock);
+		MUTEX_EXIT(&rcrp->lock);
 		return (DDI_INTR_CLAIMED);
 	}
 
-	ASSERT(rcr_ring->ldgp == ldgp);
-	ASSERT(rcr_ring->ldvp == ldvp);
+	ASSERT(rcrp->ldgp == ldgp);
+	ASSERT(rcrp->ldvp == ldvp);
 
 	RXDMA_REG_READ64(handle, RX_DMA_CTL_STAT_REG, channel, &cs.value);
 
@@ -1856,8 +1856,8 @@
 	    cs.bits.hdw.rcrto,
 	    cs.bits.hdw.rcrthres));
 
-	if (rcr_ring->poll_flag == 0) {
-		mp = nxge_rx_pkts(nxgep, rcr_ring, cs, -1);
+	if (!rcrp->poll_flag) {
+		mp = nxge_rx_pkts(nxgep, rcrp, cs, -1);
 	}
 
 	/* error events. */
@@ -1873,27 +1873,34 @@
 	 * these two edge triggered bits.
 	 */
 	cs.value &= RX_DMA_CTL_STAT_WR1C;
-	cs.bits.hdw.mex = rcr_ring->poll_flag ? 0 : 1;
+	cs.bits.hdw.mex = rcrp->poll_flag ? 0 : 1;
 	RXDMA_REG_WRITE64(handle, RX_DMA_CTL_STAT_REG, channel,
 	    cs.value);
 
 	/*
 	 * If the polling mode is enabled, disable the interrupt.
 	 */
-	if (rcr_ring->poll_flag) {
+	if (rcrp->poll_flag) {
 		NXGE_DEBUG_MSG((nxgep, NXGE_ERR_CTL,
 		    "==> nxge_rx_intr: rdc %d ldgp $%p ldvp $%p "
 		    "(disabling interrupts)", channel, ldgp, ldvp));
+
 		/*
 		 * Disarm this logical group if this is a single device
 		 * group.
 		 */
 		if (ldgp->nldvs == 1) {
-			ldgimgm_t mgm;
-			mgm.value = 0;
-			mgm.bits.ldw.arm = 0;
-			NXGE_REG_WR64(handle,
-			    LDGIMGN_REG + LDSV_OFFSET(ldgp->ldg), mgm.value);
+			if (isLDOMguest(nxgep)) {
+				ldgp->arm = B_FALSE;
+				nxge_hio_ldgimgn(nxgep, ldgp);
+			} else {
+				ldgimgm_t mgm;
+				mgm.value = 0;
+				mgm.bits.ldw.arm = 0;
+				NXGE_REG_WR64(handle,
+				    LDGIMGN_REG + LDSV_OFFSET(ldgp->ldg),
+				    mgm.value);
+			}
 		}
 	} else {
 		/*
@@ -1920,24 +1927,11 @@
 		    "==> nxge_rx_intr: rdc %d ldgp $%p "
 		    "exiting ISR (and call mac_rx_ring)", channel, ldgp));
 	}
-	MUTEX_EXIT(&rcr_ring->lock);
+	MUTEX_EXIT(&rcrp->lock);
 
 	if (mp != NULL) {
-		if (!isLDOMguest(nxgep))
-			mac_rx_ring(nxgep->mach, rcr_ring->rcr_mac_handle, mp,
-			    rcr_ring->rcr_gen_num);
-#if defined(sun4v)
-		else {			/* isLDOMguest(nxgep) */
-			nxge_hio_data_t *nhd = (nxge_hio_data_t *)
-			    nxgep->nxge_hw_p->hio;
-			nx_vio_fp_t *vio = &nhd->hio.vio;
-
-			if (vio->cb.vio_net_rx_cb) {
-				(*vio->cb.vio_net_rx_cb)
-				    (nxgep->hio_vr->vhp, mp);
-			}
-		}
-#endif
+		mac_rx_ring(nxgep->mach, rcrp->rcr_mac_handle, mp,
+		    rcrp->rcr_gen_num);
 	}
 	NXGE_DEBUG_MSG((nxgep, RX_CTL, "<== nxge_rx_intr: DDI_INTR_CLAIMED"));
 	return (DDI_INTR_CLAIMED);
@@ -2720,6 +2714,7 @@
 	uint32_t		channel;
 
 	if (ring_handle == NULL) {
+		ASSERT(ring_handle != NULL);
 		return (0);
 	}
 
@@ -2760,6 +2755,7 @@
 	uint32_t		channel;
 
 	if (ring_handle == NULL) {
+		ASSERT(ring_handle != NULL);
 		return (0);
 	}
 
@@ -2816,12 +2812,18 @@
 		    "==> nxge_disable_poll: rdc %d ldgp $%p (enable intr)",
 		    ringp->rdc, ldgp));
 		if (ldgp->nldvs == 1) {
-			ldgimgm_t	mgm;
-			mgm.value = 0;
-			mgm.bits.ldw.arm = 1;
-			mgm.bits.ldw.timer = ldgp->ldg_timer;
-			NXGE_REG_WR64(handle,
-			    LDGIMGN_REG + LDSV_OFFSET(ldgp->ldg), mgm.value);
+			if (isLDOMguest(nxgep)) {
+				ldgp->arm = B_TRUE;
+				nxge_hio_ldgimgn(nxgep, ldgp);
+			} else {
+				ldgimgm_t	mgm;
+				mgm.value = 0;
+				mgm.bits.ldw.arm = 1;
+				mgm.bits.ldw.timer = ldgp->ldg_timer;
+				NXGE_REG_WR64(handle,
+				    LDGIMGN_REG + LDSV_OFFSET(ldgp->ldg),
+				    mgm.value);
+			}
 		}
 		ringp->poll_flag = 0;
 	}
--- a/usr/src/uts/common/io/nxge/nxge_send.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/nxge/nxge_send.c	Mon Aug 17 09:14:35 2009 -0400
@@ -66,20 +66,9 @@
 	(void) nxge_txdma_reclaim(ring->nxgep, ring, 0);
 	MUTEX_EXIT(&ring->lock);
 
-	if (!isLDOMguest(ring->nxgep) && !ring->tx_ring_offline)
+	if (!ring->tx_ring_offline) {
 		mac_tx_ring_update(ring->nxgep->mach, ring->tx_ring_handle);
-#if defined(sun4v)
-	else {
-		nxge_hio_data_t *nhd =
-		    (nxge_hio_data_t *)ring->nxgep->nxge_hw_p->hio;
-		nx_vio_fp_t *vio = &nhd->hio.vio;
-
-		/* Call back vnet. */
-		if (vio->cb.vio_net_tx_update) {
-			(*vio->cb.vio_net_tx_update)(ring->nxgep->hio_vr->vhp);
-		}
 	}
-#endif
 }
 
 static void
@@ -141,65 +130,6 @@
 	return ((mblk_t *)NULL);
 }
 
-#if defined(sun4v)
-
-/*
- * Hashing policy for load balancing over the set of TX rings
- * available to the driver.
- */
-static uint8_t nxge_tx_hash_policy = MAC_PKT_HASH_L4;
-
-/*
- * nxge_m_tx() is needed for Hybrid I/O operation of the vnet in
- *	the guest domain.  See CR 6778758 for long term solution.
- *
- *	The guest domain driver will for now hash the packet
- *	to pick a DMA channel from the only group it has group 0.
- */
-
-mblk_t *
-nxge_m_tx(void *arg, mblk_t *mp)
-{
-	p_nxge_t		nxgep = (p_nxge_t)arg;
-	mblk_t			*next;
-	uint64_t		rindex;
-	p_tx_ring_t		tx_ring_p;
-	int			status;
-
-	NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_m_tx"));
-
-	/*
-	 * Hash to pick a ring from Group 0, the only TX group
-	 * for a guest domain driver.
-	 */
-	rindex = mac_pkt_hash(DL_ETHER, mp, nxge_tx_hash_policy, B_TRUE);
-	rindex = rindex % nxgep->pt_config.tdc_grps[0].max_tdcs;
-
-	/*
-	 * Get the ring handle.
-	 */
-	tx_ring_p = nxgep->tx_rings->rings[rindex];
-
-	while (mp != NULL) {
-		next = mp->b_next;
-		mp->b_next = NULL;
-
-		status = nxge_start(nxgep, tx_ring_p, mp);
-		if (status != 0) {
-			mp->b_next = next;
-			nxge_tx_ring_dispatch(tx_ring_p);
-			return (mp);
-		}
-
-		mp = next;
-	}
-
-	NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_m_tx"));
-	return ((mblk_t *)NULL);
-}
-
-#endif
-
 int
 nxge_start(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp)
 {
--- a/usr/src/uts/common/io/nxge/nxge_virtual.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/nxge/nxge_virtual.c	Mon Aug 17 09:14:35 2009 -0400
@@ -3994,6 +3994,9 @@
 	p_dma_cfgp = &nxgep->pt_config;
 	p_cfgp = &p_dma_cfgp->hw_config;
 
+	if (isLDOMguest(nxgep))
+		return (ringidx);
+
 	for (i = 0; i < groupid; i++) {
 		rdc_grp_p =
 		    &p_dma_cfgp->rdc_grps[p_cfgp->def_mac_rxdma_grpid + i];
--- a/usr/src/uts/common/io/sata/adapters/ahci/ahci.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/sata/adapters/ahci/ahci.c	Mon Aug 17 09:14:35 2009 -0400
@@ -94,33 +94,57 @@
 static	void ahci_dealloc_cmd_list(ahci_ctl_t *, ahci_port_t *);
 static  int ahci_alloc_cmd_tables(ahci_ctl_t *, ahci_port_t *);
 static  void ahci_dealloc_cmd_tables(ahci_ctl_t *, ahci_port_t *);
+static	void ahci_alloc_pmult(ahci_ctl_t *, ahci_port_t *);
+static	void ahci_dealloc_pmult(ahci_ctl_t *, ahci_port_t *);
 
 static	int ahci_initialize_controller(ahci_ctl_t *);
 static	void ahci_uninitialize_controller(ahci_ctl_t *);
-static	int ahci_initialize_port(ahci_ctl_t *, ahci_port_t *, uint8_t);
+static	int ahci_initialize_port(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *);
 static	int ahci_config_space_init(ahci_ctl_t *);
 
 static	void ahci_drain_ports_taskq(ahci_ctl_t *);
+static	int ahci_rdwr_pmult(ahci_ctl_t *, ahci_addr_t *, uint8_t, uint32_t *,
+    uint8_t);
+static	int ahci_read_pmult(ahci_ctl_t *, ahci_addr_t *, uint8_t, uint32_t *);
+static	int ahci_write_pmult(ahci_ctl_t *, ahci_addr_t *, uint8_t, uint32_t);
+static	int ahci_update_pmult_pscr(ahci_ctl_t *, ahci_addr_t *,
+    sata_device_t *);
+static	int ahci_update_pmult_gscr(ahci_ctl_t *, ahci_addr_t *,
+    sata_device_t *);
+static	int ahci_initialize_pmult(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *,
+    sata_device_t *);
+static	int ahci_initialize_pmport(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *);
+static	int ahci_probe_pmult(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *);
+static	int ahci_probe_pmport(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *,
+    sata_device_t *);
+
 static	void ahci_disable_interface_pm(ahci_ctl_t *, uint8_t);
 static	int ahci_start_port(ahci_ctl_t *, ahci_port_t *, uint8_t);
-static	void ahci_find_dev_signature(ahci_ctl_t *, ahci_port_t *, uint8_t);
+static	void ahci_find_dev_signature(ahci_ctl_t *, ahci_port_t *,
+    ahci_addr_t *);
 static	void ahci_update_sata_registers(ahci_ctl_t *, uint8_t, sata_device_t *);
 static	int ahci_deliver_satapkt(ahci_ctl_t *, ahci_port_t *,
-    uint8_t, sata_pkt_t *);
+    ahci_addr_t *, sata_pkt_t *);
 static	int ahci_do_sync_start(ahci_ctl_t *, ahci_port_t *,
-    uint8_t, sata_pkt_t *);
-static	int ahci_claim_free_slot(ahci_ctl_t *, ahci_port_t *, int);
+    ahci_addr_t *, sata_pkt_t *);
+static	int ahci_claim_free_slot(ahci_ctl_t *, ahci_port_t *,
+    ahci_addr_t *, int);
 static  void ahci_copy_err_cnxt(sata_cmd_t *, ahci_fis_d2h_register_t *);
 static	void ahci_copy_ncq_err_page(sata_cmd_t *,
     struct sata_ncq_error_recovery_page *);
 static	void ahci_copy_out_regs(sata_cmd_t *, ahci_fis_d2h_register_t *);
 
-static	int ahci_software_reset(ahci_ctl_t *, ahci_port_t *, uint8_t);
+static	int ahci_software_reset(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *);
 static	int ahci_hba_reset(ahci_ctl_t *);
-static	int ahci_port_reset(ahci_ctl_t *, ahci_port_t *, uint8_t);
+static	int ahci_port_reset(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *);
+static	int ahci_pmport_reset(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *);
 static	void ahci_reject_all_abort_pkts(ahci_ctl_t *, ahci_port_t *, uint8_t);
-static	int ahci_reset_device_reject_pkts(ahci_ctl_t *, ahci_port_t *, uint8_t);
-static	int ahci_reset_port_reject_pkts(ahci_ctl_t *, ahci_port_t *, uint8_t);
+static	int ahci_reset_device_reject_pkts(ahci_ctl_t *, ahci_port_t *,
+    ahci_addr_t *);
+static	int ahci_reset_pmdevice_reject_pkts(ahci_ctl_t *, ahci_port_t *,
+    ahci_addr_t *);
+static	int ahci_reset_port_reject_pkts(ahci_ctl_t *, ahci_port_t *,
+    ahci_addr_t *);
 static	int ahci_reset_hba_reject_pkts(ahci_ctl_t *);
 static	int ahci_put_port_into_notrunning_state(ahci_ctl_t *, ahci_port_t *,
     uint8_t);
@@ -132,6 +156,8 @@
 static void ahci_get_rqsense_data(ahci_ctl_t *, ahci_port_t *,
     uint8_t, sata_pkt_t *);
 static	void ahci_fatal_error_recovery_handler(ahci_ctl_t *, ahci_port_t *,
+    ahci_addr_t *, uint32_t);
+static	void ahci_pmult_error_recovery_handler(ahci_ctl_t *, ahci_port_t *,
     uint8_t, uint32_t);
 static	void ahci_timeout_pkts(ahci_ctl_t *, ahci_port_t *,
     uint8_t, uint32_t);
@@ -149,6 +175,8 @@
 
 static  int ahci_intr_cmd_cmplt(ahci_ctl_t *, ahci_port_t *, uint8_t);
 static	int ahci_intr_set_device_bits(ahci_ctl_t *, ahci_port_t *, uint8_t);
+static	int ahci_intr_ncq_events(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *);
+static	int ahci_intr_pmult_sntf_events(ahci_ctl_t *, ahci_port_t *, uint8_t);
 static	int ahci_intr_port_connect_change(ahci_ctl_t *, ahci_port_t *, uint8_t);
 static	int ahci_intr_device_mechanical_presence_status(ahci_ctl_t *,
     ahci_port_t *, uint8_t);
@@ -159,6 +187,7 @@
     uint8_t, uint32_t);
 static	int ahci_intr_cold_port_detect(ahci_ctl_t *, ahci_port_t *, uint8_t);
 
+static	void ahci_get_ahci_addr(ahci_ctl_t *, sata_device_t *, ahci_addr_t *);
 static	int ahci_get_num_implemented_ports(uint32_t);
 static  void ahci_log_fatal_error_message(ahci_ctl_t *, uint8_t port,
     uint32_t);
@@ -668,13 +697,34 @@
 		    "hba does not support 64-bit addressing", NULL);
 	}
 
+	/* Checking for the support of Port Multiplier */
+	if (cap_status & AHCI_HBA_CAP_SPM) {
+		ahci_ctlp->ahcictl_cap |= AHCI_CAP_PMULT_CBSS;
+		AHCIDBG(AHCIDBG_INIT|AHCIDBG_PMULT, ahci_ctlp,
+		    "hba supports port multiplier (CBSS)", NULL);
+
+		/* Support FIS-based switching ? */
+		if (cap_status & AHCI_HBA_CAP_FBSS) {
+			ahci_ctlp->ahcictl_cap |= AHCI_CAP_PMULT_FBSS;
+			AHCIDBG(AHCIDBG_INIT|AHCIDBG_PMULT, ahci_ctlp,
+			    "hba supports FIS-based switching (FBSS)", NULL);
+		}
+	}
+
 	/* Checking for Support Command List Override */
 	if (cap_status & AHCI_HBA_CAP_SCLO) {
 		ahci_ctlp->ahcictl_cap |= AHCI_CAP_SCLO;
-		AHCIDBG(AHCIDBG_INIT, ahci_ctlp,
+		AHCIDBG(AHCIDBG_INIT|AHCIDBG_PMULT, ahci_ctlp,
 		    "hba supports command list override.", NULL);
 	}
 
+	/* Checking for Asynchronous Notification */
+	if (cap_status & AHCI_HBA_CAP_SSNTF) {
+		ahci_ctlp->ahcictl_cap |= AHCI_CAP_SNTF;
+		AHCIDBG(AHCIDBG_INIT|AHCIDBG_PMULT, ahci_ctlp,
+		    "hba supports asynchronous notification.", NULL);
+	}
+
 	if (pci_config_setup(dip, &ahci_ctlp->ahcictl_pci_conf_handle)
 	    != DDI_SUCCESS) {
 		cmn_err(CE_WARN, "!ahci%d: Cannot set up pci configure space",
@@ -1037,7 +1087,7 @@
 	/* Allocate memory for the sata_hba_tran  */
 	sata_hba_tran = kmem_zalloc(sizeof (sata_hba_tran_t), KM_SLEEP);
 
-	sata_hba_tran->sata_tran_hba_rev = SATA_TRAN_HBA_REV_2;
+	sata_hba_tran->sata_tran_hba_rev = SATA_TRAN_HBA_REV_3;
 	sata_hba_tran->sata_tran_hba_dip = ahci_ctlp->ahcictl_dip;
 	sata_hba_tran->sata_tran_hba_dma_attr =
 	    &ahci_ctlp->ahcictl_buffer_dma_attr;
@@ -1079,6 +1129,18 @@
 		    "Command Queuing", NULL);
 	}
 
+	/* Support port multiplier? */
+	if (cap_status & AHCI_HBA_CAP_SPM) {
+		sata_hba_tran->sata_tran_hba_features_support |=
+		    SATA_CTLF_PORT_MULTIPLIER;
+
+		/* Support FIS-based switching for port multiplier? */
+		if (cap_status & AHCI_HBA_CAP_FBSS) {
+			sata_hba_tran->sata_tran_hba_features_support |=
+			    SATA_CTLF_PMULT_FBS;
+		}
+	}
+
 	/* Report the number of command slots */
 	sata_hba_tran->sata_tran_hba_qdepth = ahci_ctlp->ahcictl_num_cmd_slots;
 
@@ -1140,6 +1202,15 @@
 	return (AHCI_SUCCESS);
 }
 
+#define	SET_PORTSTR(str, addrp)						\
+	if (AHCI_ADDR_IS_PORT(addrp))					\
+		(void) sprintf((str), "%d", (addrp)->aa_port);		\
+	else if (AHCI_ADDR_IS_PMULT(addrp))				\
+		(void) sprintf((str), "%d (pmult)", (addrp)->aa_port);	\
+	else								\
+		(void) sprintf((str), "%d:%d", (addrp)->aa_port,	\
+		    (addrp)->aa_pmport);
+
 /*
  * ahci_tran_probe_port is called by SATA framework. It returns port state,
  * port status registers and an attached device type via sata_device
@@ -1155,67 +1226,128 @@
 {
 	ahci_ctl_t *ahci_ctlp;
 	ahci_port_t *ahci_portp;
-	uint8_t	cport = sd->satadev_addr.cport;
-	uint8_t pmport = sd->satadev_addr.pmport;
-	uint8_t qual = sd->satadev_addr.qual;
-	uint8_t	device_type;
+	ahci_addr_t addr, pmult_addr;
+	uint8_t cport = sd->satadev_addr.cport;
+	char portstr[10];
+	uint8_t device_type;
 	uint32_t port_state;
 	uint8_t port;
+	int rval = SATA_SUCCESS, rval_init;
 
 	ahci_ctlp = ddi_get_soft_state(ahci_statep, ddi_get_instance(dip));
 	port = ahci_ctlp->ahcictl_cport_to_port[cport];
 
-	AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp,
-	    "ahci_tran_probe_port enter: cport: %d, "
-	    "pmport: %d, qual: %d", cport, pmport, qual);
-
 	ahci_portp = ahci_ctlp->ahcictl_ports[port];
 
 	mutex_enter(&ahci_portp->ahciport_mutex);
 
-	port_state = ahci_portp->ahciport_port_state;
+	ahci_get_ahci_addr(ahci_ctlp, sd, &addr);
+	ASSERT(AHCI_ADDR_IS_VALID(&addr));
+	SET_PORTSTR(portstr, &addr);
+
+	AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp,
+	    "ahci_tran_probe_port enter: port %s", portstr);
+
+	if ((AHCI_ADDR_IS_PMULT(&addr) || AHCI_ADDR_IS_PMPORT(&addr)) &&
+	    (ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT ||
+	    ahci_portp->ahciport_pmult_info == NULL)) {
+		/* port mutliplier is removed. */
+		AHCIDBG(AHCIDBG_PMULT, ahci_ctlp,
+		    "ahci_tran_probe_port: "
+		    "port-pmult is removed from port %s", portstr);
+		mutex_exit(&ahci_portp->ahciport_mutex);
+		return (SATA_FAILURE);
+	}
+
+	/*
+	 * The sata_device may refer to
+	 * 1. A controller port.
+	 *    A controller port should be ready here.
+	 * 2. A port multiplier.
+	 *    If it has not been initialized, initialized it. If it is
+	 *    initilaized, we need check the status of all its device ports.
+	 * 3. A port multiplier port.
+	 *    If it has not been initialized, initialized it.
+	 *
+	 * A port multiplier or a port multiplier port may require some
+	 * initialization because we cannot do these time-consuming jobs in an
+	 * interrupt context.
+	 */
+	if (AHCI_ADDR_IS_PORT(&addr)) {
+		if (ahci_portp->ahciport_device_type == SATA_DTYPE_PMULT) {
+			AHCI_ADDR_SET_PMULT(&pmult_addr, port);
+			port_state = ahci_portp->ahciport_port_state;
+			/* SATA Framework's first probe? */
+			if (!(port_state & SATA_DSTATE_PMULT_INIT)) {
+				/* Initialize registers on a port multiplier */
+				rval_init = ahci_initialize_pmult(ahci_ctlp,
+				    ahci_portp, &pmult_addr, sd);
+				if (rval_init != AHCI_SUCCESS) {
+					AHCIDBG(AHCIDBG_PMULT, ahci_ctlp,
+					    "ahci_tran_probe_port: "
+					    "pmult initialization failed.",
+					    NULL);
+					mutex_exit(&ahci_portp->ahciport_mutex);
+					return (SATA_FAILURE);
+				}
+			}
+		}
+	} else if (AHCI_ADDR_IS_PMULT(&addr)) {
+		/* Check pmports hotplug events */
+		(void) ahci_probe_pmult(ahci_ctlp, ahci_portp, &addr);
+	} else if (AHCI_ADDR_IS_PMPORT(&addr)) {
+		if (ahci_probe_pmport(ahci_ctlp, ahci_portp,
+		    &addr, sd) != AHCI_SUCCESS) {
+			rval = SATA_FAILURE;
+			goto out;
+		}
+	}
+
+	/* Update port state and device type */
+	port_state = AHCIPORT_GET_STATE(ahci_portp, &addr);
+
 	switch (port_state) {
 
 	case SATA_PSTATE_FAILED:
 		sd->satadev_state = SATA_PSTATE_FAILED;
 		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
-		    "ahci_tran_probe_port: port %d PORT FAILED", port);
+		    "ahci_tran_probe_port: port %s PORT FAILED", portstr);
 		goto out;
 
 	case SATA_PSTATE_SHUTDOWN:
 		sd->satadev_state = SATA_PSTATE_SHUTDOWN;
 		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
-		    "ahci_tran_probe_port: port %d PORT SHUTDOWN", port);
+		    "ahci_tran_probe_port: port %s PORT SHUTDOWN", portstr);
 		goto out;
 
 	case SATA_PSTATE_PWROFF:
 		sd->satadev_state = SATA_PSTATE_PWROFF;
 		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
-		    "ahci_tran_probe_port: port %d PORT PWROFF", port);
+		    "ahci_tran_probe_port: port %s PORT PWROFF", portstr);
 		goto out;
 
 	case SATA_PSTATE_PWRON:
 		sd->satadev_state = SATA_PSTATE_PWRON;
 		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
-		    "ahci_tran_probe_port: port %d PORT PWRON", port);
+		    "ahci_tran_probe_port: port %s PORT PWRON", portstr);
 		break;
 
 	default:
 		sd->satadev_state = port_state;
 		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
-		    "ahci_tran_probe_port: port %d PORT NORMAL %x",
-		    port, port_state);
+		    "ahci_tran_probe_port: port %s PORT NORMAL %x",
+		    portstr, port_state);
 		break;
 	}
 
-	device_type = ahci_portp->ahciport_device_type;
+	device_type = AHCIPORT_GET_DEV_TYPE(ahci_portp, &addr);
 
 	switch (device_type) {
 
 	case SATA_DTYPE_ATADISK:
 		sd->satadev_type = SATA_DTYPE_ATADISK;
 		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
-		    "ahci_tran_probe_port: port %d DISK found", port);
+		    "ahci_tran_probe_port: port %s DISK found", portstr);
 		break;
 
 	case SATA_DTYPE_ATAPI:
@@ -1227,35 +1359,62 @@
 		 */
 		sd->satadev_type = SATA_DTYPE_ATAPI;
 		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
-		    "ahci_tran_probe_port: port %d ATAPI found", port);
+		    "ahci_tran_probe_port: port %s ATAPI found", portstr);
 		break;
 
 	case SATA_DTYPE_PMULT:
+		ASSERT(AHCI_ADDR_IS_PORT(&addr)||AHCI_ADDR_IS_PMULT(&addr));
 		sd->satadev_type = SATA_DTYPE_PMULT;
+
+		/* Update the number of pmports. */
+		ASSERT(ahci_portp->ahciport_pmult_info != NULL);
+		sd->satadev_add_info = ahci_portp->
+		    ahciport_pmult_info->ahcipmi_num_dev_ports;
+
 		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
-		    "ahci_tran_probe_port: port %d Port Multiplier found",
-		    port);
+		    "ahci_tran_probe_port: port %s Port Multiplier found",
+		    portstr);
 		break;
 
 	case SATA_DTYPE_UNKNOWN:
 		sd->satadev_type = SATA_DTYPE_UNKNOWN;
 		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
-		    "ahci_tran_probe_port: port %d Unknown device found", port);
+		    "ahci_tran_probe_port: port %s Unknown device found",
+		    portstr);
 		break;
 
 	default:
 		/* we don't support any other device types */
 		sd->satadev_type = SATA_DTYPE_NONE;
 		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
-		    "ahci_tran_probe_port: port %d No device found", port);
+		    "ahci_tran_probe_port: port %s No device found", portstr);
 		break;
 	}
 
 out:
-	ahci_update_sata_registers(ahci_ctlp, port, sd);
+	/* Register update only fails while probing a pmult/pmport */
+	if (AHCI_ADDR_IS_PORT(&addr)) {
+		ahci_update_sata_registers(ahci_ctlp, port, sd);
+		if (ahci_portp->ahciport_device_type == SATA_DTYPE_PMULT)
+			if (port_state & SATA_STATE_READY)
+				if (ahci_update_pmult_gscr(ahci_ctlp,
+				    &pmult_addr, sd) != AHCI_SUCCESS)
+					rval = SATA_FAILURE;
+	} else if (AHCI_ADDR_IS_PMULT(&addr)) {
+		ahci_update_sata_registers(ahci_ctlp, port, sd);
+		if (port_state & SATA_STATE_READY)
+			if (ahci_update_pmult_gscr(ahci_ctlp,
+			    &addr, sd) != AHCI_SUCCESS)
+				rval = SATA_FAILURE;
+	} else if (AHCI_ADDR_IS_PMPORT(&addr)) {
+		if (port_state & SATA_STATE_READY)
+			if (ahci_update_pmult_pscr(ahci_ctlp,
+			    &addr, sd) != AHCI_SUCCESS)
+				rval = SATA_FAILURE;
+	}
+
 	mutex_exit(&ahci_portp->ahciport_mutex);
-
-	return (SATA_SUCCESS);
+	return (rval);
 }
 
 /*
@@ -1292,8 +1451,11 @@
 {
 	ahci_ctl_t *ahci_ctlp;
 	ahci_port_t *ahci_portp;
+	ahci_addr_t addr, addr_pmult;
+	sata_device_t sdevice;
 	uint8_t	cport = spkt->satapkt_device.satadev_addr.cport;
 	uint8_t port;
+	char portstr[10];
 
 	ahci_ctlp = ddi_get_soft_state(ahci_statep, ddi_get_instance(dip));
 	port = ahci_ctlp->ahcictl_cport_to_port[cport];
@@ -1305,12 +1467,75 @@
 	ahci_portp = ahci_ctlp->ahcictl_ports[port];
 
 	mutex_enter(&ahci_portp->ahciport_mutex);
-
-	if (ahci_portp->ahciport_port_state & SATA_PSTATE_FAILED |
-	    ahci_portp->ahciport_port_state & SATA_PSTATE_SHUTDOWN |
-	    ahci_portp->ahciport_port_state & SATA_PSTATE_PWROFF) {
+	ahci_get_ahci_addr(ahci_ctlp, &spkt->satapkt_device, &addr);
+	SET_PORTSTR(portstr, &addr);
+
+	/* Sanity check */
+	if (AHCI_ADDR_IS_PMPORT(&addr)) {
+		if (ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT ||
+		    ahci_portp->ahciport_pmult_info == NULL) {
+
+			spkt->satapkt_reason = SATA_PKT_PORT_ERROR;
+			spkt->satapkt_device.satadev_type = SATA_DTYPE_NONE;
+			spkt->satapkt_device.satadev_state = SATA_STATE_UNKNOWN;
+			ahci_update_sata_registers(ahci_ctlp, port,
+			    &spkt->satapkt_device);
+			AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+			    "ahci_tran_start returning PORT_ERROR while "
+			    "pmult removed: port: %s", portstr);
+			mutex_exit(&ahci_portp->ahciport_mutex);
+			return (SATA_TRAN_PORT_ERROR);
+		}
+
+		/* Port multiplier and pmport are correctly initialized? */
+		if (!(ahci_portp->ahciport_port_state &
+		    SATA_DSTATE_PMULT_INIT)) {
+			AHCI_ADDR_SET_PMULT(&addr_pmult, port);
+			if (!ddi_in_panic() ||
+			    ahci_initialize_pmult(ahci_ctlp, ahci_portp,
+			    &addr_pmult, &sdevice) != AHCI_SUCCESS) {
+				spkt->satapkt_reason = SATA_PKT_PORT_ERROR;
+				spkt->satapkt_device.satadev_type =
+				    AHCIPORT_GET_DEV_TYPE(ahci_portp, &addr);
+				spkt->satapkt_device.satadev_state =
+				    AHCIPORT_GET_STATE(ahci_portp, &addr);
+				ahci_update_sata_registers(ahci_ctlp, port,
+				    &spkt->satapkt_device);
+				AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+				    "ahci_tran_start returning PORT_ERROR "
+				    "while pmult is not initialized "
+				    "at port %d", port);
+				mutex_exit(&ahci_portp->ahciport_mutex);
+				return (SATA_TRAN_PORT_ERROR);
+			}
+		}
+		if (!(AHCIPORT_GET_STATE(ahci_portp, &addr) &
+		    SATA_STATE_READY)) {
+			if (!ddi_in_panic() ||
+			    ahci_initialize_pmport(ahci_ctlp,
+			    ahci_portp, &addr) != AHCI_SUCCESS) {
+				spkt->satapkt_reason = SATA_PKT_PORT_ERROR;
+				spkt->satapkt_device.satadev_type =
+				    AHCIPORT_GET_DEV_TYPE(ahci_portp, &addr);
+				spkt->satapkt_device.satadev_state =
+				    AHCIPORT_GET_STATE(ahci_portp, &addr);
+				ahci_update_sata_registers(ahci_ctlp, port,
+				    &spkt->satapkt_device);
+				AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+				    "ahci_tran_start returning PORT_ERROR "
+				    "while sub-link is not initialized "
+				    "at port: %s", portstr);
+				mutex_exit(&ahci_portp->ahciport_mutex);
+				return (SATA_TRAN_PORT_ERROR);
+			}
+		}
+	}
+
+	if (AHCIPORT_GET_STATE(ahci_portp, &addr) & SATA_PSTATE_FAILED ||
+	    AHCIPORT_GET_STATE(ahci_portp, &addr) & SATA_PSTATE_SHUTDOWN||
+	    AHCIPORT_GET_STATE(ahci_portp, &addr) & SATA_PSTATE_PWROFF) {
 		/*
-		 * In case the targer driver would send the packet before
+		 * In case the target driver would send the packet before
 		 * sata framework can have the opportunity to process those
 		 * event reports.
 		 */
@@ -1322,12 +1547,12 @@
 		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
 		    "ahci_tran_start returning PORT_ERROR while "
 		    "port in FAILED/SHUTDOWN/PWROFF state: "
-		    "port: %d", port);
+		    "port: %s", portstr);
 		mutex_exit(&ahci_portp->ahciport_mutex);
 		return (SATA_TRAN_PORT_ERROR);
 	}
 
-	if (ahci_portp->ahciport_device_type == SATA_DTYPE_NONE) {
+	if (AHCIPORT_GET_DEV_TYPE(ahci_portp, &addr) == SATA_DTYPE_NONE) {
 		/*
 		 * ahci_intr_phyrdy_change() may have rendered it to
 		 * SATA_DTYPE_NONE.
@@ -1340,11 +1565,31 @@
 		    &spkt->satapkt_device);
 		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
 		    "ahci_tran_start returning PORT_ERROR while "
-		    "no device attached: port: %d", port);
+		    "no device attached: port: %s", portstr);
 		mutex_exit(&ahci_portp->ahciport_mutex);
 		return (SATA_TRAN_PORT_ERROR);
 	}
 
+	/* R/W PMULT command will occupy the whole HBA port */
+	if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) {
+		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+		    "ahci_tran_start returning BUSY while "
+		    "executing READ/WRITE PORT-MULT command: "
+		    "port: %s", portstr);
+		spkt->satapkt_reason = SATA_PKT_BUSY;
+		mutex_exit(&ahci_portp->ahciport_mutex);
+		return (SATA_TRAN_BUSY);
+	}
+
+	if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_HOTPLUG) {
+		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+		    "ahci_tran_start returning BUSY while "
+		    "hot-plug in progress: port: %s", portstr);
+		spkt->satapkt_reason = SATA_PKT_BUSY;
+		mutex_exit(&ahci_portp->ahciport_mutex);
+		return (SATA_TRAN_BUSY);
+	}
+
 	/*
 	 * SATA HBA driver should remember that a device was reset and it
 	 * is supposed to reject any packets which do not specify either
@@ -1356,10 +1601,16 @@
 	 * has a chance to restore the device setting (such as cache enable/
 	 * disable or other resettable stuff).
 	 */
+	/*
+	 * It is unnecessary to use specific flags to indicate
+	 * reset_in_progress for a pmport. While mopping, all command will be
+	 * mopped so that the entire HBA port is being dealt as a single
+	 * object.
+	 */
 	if (spkt->satapkt_cmd.satacmd_flags.sata_clear_dev_reset) {
 		ahci_portp->ahciport_reset_in_progress = 0;
 		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
-		    "ahci_tran_start clearing the "
+		    "ahci_tran_start [CLEAR] the "
 		    "reset_in_progress for port: %d", port);
 	}
 
@@ -1374,6 +1625,14 @@
 		return (SATA_TRAN_BUSY);
 	}
 
+#ifdef AHCI_DEBUG
+	if (spkt->satapkt_cmd.satacmd_flags.sata_ignore_dev_reset) {
+		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+		    "ahci_tran_start: packet 0x%p [PASSTHRU] at port %d",
+		    spkt, port);
+	}
+#endif
+
 	if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_MOPPING) {
 		spkt->satapkt_reason = SATA_PKT_BUSY;
 		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
@@ -1401,7 +1660,7 @@
 		}
 
 		/* We need to do the sync start now */
-		if (ahci_do_sync_start(ahci_ctlp, ahci_portp, port,
+		if (ahci_do_sync_start(ahci_ctlp, ahci_portp, &addr,
 		    spkt) == AHCI_FAILURE) {
 			AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci_tran_start "
 			    "return QUEUE_FULL: port %d", port);
@@ -1410,7 +1669,7 @@
 		}
 	} else {
 		/* Async start, using interrupt */
-		if (ahci_deliver_satapkt(ahci_ctlp, ahci_portp, port, spkt)
+		if (ahci_deliver_satapkt(ahci_ctlp, ahci_portp, &addr, spkt)
 		    == AHCI_FAILURE) {
 			spkt->satapkt_reason = SATA_PKT_QUEUE_FULL;
 			AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci_tran_start "
@@ -1421,7 +1680,7 @@
 	}
 
 	AHCIDBG(AHCIDBG_INFO, ahci_ctlp, "ahci_tran_start "
-	    "sata tran accepted: port %d", port);
+	    "sata tran accepted: port %s", portstr);
 
 	mutex_exit(&ahci_portp->ahciport_mutex);
 	return (SATA_TRAN_ACCEPTED);
@@ -1438,20 +1697,21 @@
  */
 static int
 ahci_do_sync_start(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
-    uint8_t port, sata_pkt_t *spkt)
+    ahci_addr_t *addrp, sata_pkt_t *spkt)
 {
 	int pkt_timeout_ticks;
 	uint32_t timeout_tags;
 	int rval;
 	int instance = ddi_get_instance(ahci_ctlp->ahcictl_dip);
+	uint8_t port = addrp->aa_port;
 
 	AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, "ahci_do_sync_start enter: "
-	    "port %d spkt 0x%p", port, spkt);
+	    "port %d:%d spkt 0x%p", port, addrp->aa_pmport, spkt);
 
 	if (spkt->satapkt_op_mode & SATA_OPMODE_POLLING) {
 		ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_POLLING;
 		if ((rval = ahci_deliver_satapkt(ahci_ctlp, ahci_portp,
-		    port, spkt)) == AHCI_FAILURE) {
+		    addrp, spkt)) == AHCI_FAILURE) {
 			ahci_portp->ahciport_flags &= ~ AHCI_PORT_FLAG_POLLING;
 			return (rval);
 		}
@@ -1465,10 +1725,10 @@
 			/* Simulate the interrupt */
 			ahci_port_intr(ahci_ctlp, ahci_portp, port);
 
-			drv_usecwait(AHCI_1MS_USECS);
+			drv_usecwait(AHCI_10MS_USECS);
 
 			mutex_enter(&ahci_portp->ahciport_mutex);
-			pkt_timeout_ticks -= AHCI_1MS_TICKS;
+			pkt_timeout_ticks -= AHCI_10MS_TICKS;
 			if (pkt_timeout_ticks < 0) {
 				cmn_err(CE_WARN, "!ahci%d: ahci_do_sync_start "
 				    "port %d satapkt 0x%p timed out\n",
@@ -1485,7 +1745,7 @@
 
 	} else {
 		if ((rval = ahci_deliver_satapkt(ahci_ctlp, ahci_portp,
-		    port, spkt)) == AHCI_FAILURE)
+		    addrp, spkt)) == AHCI_FAILURE)
 			return (rval);
 
 #if AHCI_DEBUG
@@ -1529,27 +1789,33 @@
 /*
  * Searches for and claims a free command slot.
  *
- * Returns:
- *
- * AHCI_FAILURE if failed
- *	1. if no empty slot left
- *	2. non-queued command requested while queued command(s) is outstanding
- *	3. queued command requested whild non-queued command(s) is outstanding
+ * Returns value:
+ *
+ * AHCI_FAILURE returned only if
+ *	1. No empty slot left
+ *	2. Non-queued command requested while queued command(s) is outstanding
+ *	3. Queued command requested while non-queued command(s) is outstanding
  *	4. HBA doesn't support multiple-use of command list while already a
- *	non-queued command is oustanding
- *
- * claimed slot number if succeeded
- *
- * NOTE: it will always return slot 0 during error recovery process for
- * REQUEST SENSE or READ LOG EXT command to simplify the algorithm.
+ *	   non-queued command is oustanding
+ *	5. Queued command requested while some queued command(s) has been
+ *	   outstanding on a different port multiplier port. (AHCI spec 1.2,
+ *	   9.1.2)
+ *
+ * claimed slot number returned if succeeded
+ *
+ * NOTE: it will always return slot 0 for following commands to simplify the
+ * algorithm.
+ * 	1. REQUEST SENSE or READ LOG EXT command during error recovery process
+ * 	2. READ/WRITE PORTMULT command
  *
  * WARNING!!! ahciport_mutex should be acquired before the function
  * is called.
  */
 static int
-ahci_claim_free_slot(ahci_ctl_t *ahci_ctlp,
-    ahci_port_t *ahci_portp, int command_type)
+ahci_claim_free_slot(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
+    ahci_addr_t *addrp, int command_type)
 {
+	uint32_t port_cmd_issue;
 	uint32_t free_slots;
 	int slot;
 
@@ -1574,6 +1840,14 @@
 			    "command", NULL);
 			return (AHCI_FAILURE);
 		}
+		if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) {
+			AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp,
+			    "ahci_claim_free_slot: there is still pending "
+			    "read/write port-mult command(s) in command list, "
+			    "so no available slot for the non-queued command",
+			    NULL);
+			return (AHCI_FAILURE);
+		}
 		if ((ahci_ctlp->ahcictl_cap & AHCI_CAP_NO_MCMDLIST_NONQUEUE) &&
 		    NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) {
 			AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
@@ -1594,6 +1868,31 @@
 			    NULL);
 			return (AHCI_FAILURE);
 		}
+
+		/*
+		 * NCQ commands cannot be sent to different port multiplier
+		 * ports in Command-Based Switching mode
+		 */
+		/*
+		 * NOTE: In Command-Based Switching mode, AHCI controller
+		 * usually reports a 'Handshake Error' when multiple NCQ
+		 * commands are outstanding simultaneously.
+		 */
+		if (AHCIPORT_DEV_TYPE(ahci_portp, addrp) == SATA_DTYPE_PMULT) {
+			ASSERT(ahci_portp->ahciport_pmult_info != NULL);
+			if (!(ahci_ctlp->ahcictl_cap & AHCI_CAP_PMULT_FBSS) &&
+			    NCQ_CMD_IN_PROGRESS(ahci_portp) &&
+			    AHCIPORT_NCQ_PMPORT(ahci_portp) !=
+			    addrp->aa_pmport) {
+				AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
+				    "ahci_claim_free_slot: there is still "
+				    "pending queued command(s) in the "
+				    "command list for another Port Multiplier "
+				    "port, so no available slot.", NULL);
+				return (AHCI_FAILURE);
+			}
+		}
+
 		free_slots = (~ahci_portp->ahciport_pending_ncq_tags) &
 		    AHCI_NCQ_SLOT_MASK(ahci_portp);
 	} else if (command_type == AHCI_ERR_RETRI_CMD) {
@@ -1603,6 +1902,30 @@
 		    "SENSE or READ LOG EXT command", NULL);
 		slot = 0;
 		goto out;
+	} else if (command_type == AHCI_RDWR_PMULT_CMD) {
+		/*
+		 * An extra check on PxCI. Sometimes PxCI bits may not be
+		 * cleared during hot-plug or error recovery process.
+		 */
+		port_cmd_issue = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxCI(ahci_ctlp, addrp->aa_port));
+
+		if (port_cmd_issue != 0) {
+			AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp,
+			    "ahci_claim_free_slot: there is still pending "
+			    "command(s) in command list (0x%x/0x%x, PxCI %x),"
+			    "so no available slot for R/W PMULT command.",
+			    NON_NCQ_CMD_IN_PROGRESS(ahci_portp),
+			    NCQ_CMD_IN_PROGRESS(ahci_portp),
+			    port_cmd_issue);
+			return (AHCI_FAILURE);
+		}
+
+		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
+		    "ahci_claim_free_slot: slot 0 is allocated for "
+		    "READ/WRITE PORTMULT command", NULL);
+		slot = 0;
+		goto out;
 	}
 
 	slot = ddi_ffs(free_slots) - 1;
@@ -1621,6 +1944,10 @@
 	 */
 	if (command_type == AHCI_NCQ_CMD) {
 		ahci_portp->ahciport_pending_ncq_tags |= (0x1 << slot);
+		if (AHCI_ADDR_IS_PMPORT(addrp)) {
+			ASSERT(ahci_portp->ahciport_pmult_info != NULL);
+			AHCIPORT_NCQ_PMPORT(ahci_portp) = addrp->aa_pmport;
+		}
 	}
 
 	ahci_portp->ahciport_pending_tags |= (0x1 << slot);
@@ -1643,7 +1970,7 @@
  */
 static int
 ahci_deliver_satapkt(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
-    uint8_t port, sata_pkt_t *spkt)
+    ahci_addr_t *addrp, sata_pkt_t *spkt)
 {
 	int cmd_slot;
 	sata_cmd_t *scmd;
@@ -1655,11 +1982,15 @@
 	int command_type = AHCI_NON_NCQ_CMD;
 	int ncq_qdepth;
 	int instance = ddi_get_instance(ahci_ctlp->ahcictl_dip);
+	uint8_t port, pmport;
 #if AHCI_DEBUG
 	uint32_t *ptr;
 	uint8_t *ptr2;
 #endif
 
+	port = addrp->aa_port;
+	pmport = addrp->aa_pmport;
+
 	spkt->satapkt_reason = SATA_PKT_BUSY;
 
 	scmd = &spkt->satapkt_cmd;
@@ -1708,8 +2039,13 @@
 	if (ERR_RETRI_CMD_IN_PROGRESS(ahci_portp))
 		command_type = AHCI_ERR_RETRI_CMD;
 
+	/* Check if the command is an read/write pmult command */
+	if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp))
+		command_type = AHCI_RDWR_PMULT_CMD;
+
 	/* Check if there is an empty command slot */
-	cmd_slot = ahci_claim_free_slot(ahci_ctlp, ahci_portp, command_type);
+	cmd_slot = ahci_claim_free_slot(ahci_ctlp, ahci_portp,
+	    addrp, command_type);
 	if (cmd_slot == AHCI_FAILURE) {
 		AHCIDBG(AHCIDBG_INFO, ahci_ctlp, "no free command slot", NULL);
 		return (AHCI_FAILURE);
@@ -1728,11 +2064,13 @@
 	    &(cmd_table->ahcict_command_fis.ahcifc_fis.ahcifc_h2d_register);
 
 	SET_FIS_TYPE(h2d_register_fisp, AHCI_H2D_REGISTER_FIS_TYPE);
-	if ((spkt->satapkt_device.satadev_addr.qual == SATA_ADDR_PMPORT) ||
-	    (spkt->satapkt_device.satadev_addr.qual == SATA_ADDR_DPMPORT)) {
-		SET_FIS_PMP(h2d_register_fisp,
-		    spkt->satapkt_device.satadev_addr.pmport);
-	}
+
+	/*
+	 * PMP field only make sense when target is a port multiplier or a
+	 * device behind a port multiplier. Otherwise should set it to 0.
+	 */
+	if (AHCI_ADDR_IS_PMULT(addrp) || AHCI_ADDR_IS_PMPORT(addrp))
+		SET_FIS_PMP(h2d_register_fisp, pmport);
 
 	SET_FIS_CDMDEVCTL(h2d_register_fisp, 1);
 	SET_FIS_COMMAND(h2d_register_fisp, scmd->satacmd_cmd_reg);
@@ -1753,6 +2091,8 @@
 		 * 	SATAC_SMART
 		 * 	SATAC_ID_PACKET_DEVICE
 		 * 	SATAC_ID_DEVICE
+		 * 	SATAC_READ_PORTMULT
+		 * 	SATAC_WRITE_PORTMULT
 		 */
 		/* FALLTHRU */
 
@@ -1864,6 +2204,13 @@
 	/* Set the length of the command in the CFIS area */
 	SET_COMMAND_FIS_LENGTH(cmd_header, AHCI_H2D_REGISTER_FIS_LENGTH);
 
+	/*
+	 * PMP field only make sense when target is a port multiplier or a
+	 * device behind a port multiplier. Otherwise should set it to 0.
+	 */
+	if (AHCI_ADDR_IS_PMULT(addrp) || AHCI_ADDR_IS_PMPORT(addrp))
+		SET_PORT_MULTI_PORT(cmd_header, pmport);
+
 	AHCIDBG(AHCIDBG_INFO, ahci_ctlp, "command data direction is "
 	    "sata_data_direction = 0x%x",
 	    scmd->satacmd_flags.sata_data_direction);
@@ -1886,8 +2233,13 @@
 	if (command_type != AHCI_NCQ_CMD)
 		SET_PREFETCHABLE(cmd_header, AHCI_CMDHEAD_PREFETCHABLE);
 
-	/* Now remember the sata packet in ahciport_slot_pkts[]. */
-	if (!ERR_RETRI_CMD_IN_PROGRESS(ahci_portp))
+	/*
+	 * Now remember the sata packet in ahciport_slot_pkts[].
+	 * Error retrieval command and r/w port multiplier command will
+	 * be stored specifically for each port.
+	 */
+	if (!ERR_RETRI_CMD_IN_PROGRESS(ahci_portp) &&
+	    !RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp))
 		ahci_portp->ahciport_slot_pkts[cmd_slot] = spkt;
 
 	/*
@@ -2008,6 +2360,19 @@
 		return (SATA_SUCCESS);
 	}
 
+	/*
+	 * If AHCI_PORT_FLAG_RDWR_PMULT flag is set, it means a R/W PMULT
+	 * command is being executed so no other commands is outstanding,
+	 * nothing to do.
+	 */
+	if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_RDWR_PMULT) {
+		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
+		    "ahci_tran_abort: port %d is reading/writing "
+		    "port multiplier, so just return directly ", port);
+		mutex_exit(&ahci_portp->ahciport_mutex);
+		return (SATA_SUCCESS);
+	}
+
 	if (ahci_portp->ahciport_port_state & SATA_PSTATE_FAILED |
 	    ahci_portp->ahciport_port_state & SATA_PSTATE_SHUTDOWN |
 	    ahci_portp->ahciport_port_state & SATA_PSTATE_PWROFF) {
@@ -2140,15 +2505,17 @@
  * Used to do device reset and reject all the pending packets on a device
  * during the reset operation.
  *
+ * NOTE: ONLY called by ahci_tran_reset_dport
  * WARNING!!! ahciport_mutex should be acquired before the function is called.
  */
 static int
 ahci_reset_device_reject_pkts(ahci_ctl_t *ahci_ctlp,
-    ahci_port_t *ahci_portp, uint8_t port)
+    ahci_port_t *ahci_portp, ahci_addr_t *addrp)
 {
 	uint32_t slot_status = 0;
 	uint32_t reset_tags = 0;
 	uint32_t finished_tags = 0;
+	uint8_t port = addrp->aa_port;
 	sata_device_t sdevice;
 	int ret;
 
@@ -2176,12 +2543,12 @@
 		reset_tags = slot_status & AHCI_NCQ_SLOT_MASK(ahci_portp);
 	}
 
-	if (ahci_software_reset(ahci_ctlp, ahci_portp, port)
+	if (ahci_software_reset(ahci_ctlp, ahci_portp, addrp)
 	    != AHCI_SUCCESS) {
 		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
 		    "Try to do a port reset after software "
 		    "reset failed", port);
-		ret = ahci_port_reset(ahci_ctlp, ahci_portp, port);
+		ret = ahci_port_reset(ahci_ctlp, ahci_portp, addrp);
 		if (ret != AHCI_SUCCESS) {
 			AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
 			    "ahci_reset_device_reject_pkts: port %d "
@@ -2200,7 +2567,6 @@
 	sdevice.satadev_addr.cport = ahci_ctlp->ahcictl_port_to_cport[port];
 	sdevice.satadev_addr.pmport = 0;
 	sdevice.satadev_addr.qual = SATA_ADDR_DCPORT;
-
 	sdevice.satadev_state = SATA_DSTATE_RESET |
 	    SATA_DSTATE_PWR_ACTIVE;
 	mutex_exit(&ahci_portp->ahciport_mutex);
@@ -2211,7 +2577,7 @@
 	mutex_enter(&ahci_portp->ahciport_mutex);
 
 	AHCIDBG(AHCIDBG_EVENT, ahci_ctlp,
-	    "port %d sending event up: SATA_EVNT_RESET", port);
+	    "port %d sending event up: SATA_EVNT_DEVICE_RESET", port);
 
 	/* Next try to mop the pending commands */
 	if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp))
@@ -2235,6 +2601,109 @@
 }
 
 /*
+ * Used to do device reset and reject all the pending packets on a device
+ * during the reset operation.
+ *
+ * NOTE: ONLY called by ahci_tran_reset_dport
+ * WARNING!!! ahciport_mutex should be acquired before the function is called.
+ */
+static int
+ahci_reset_pmdevice_reject_pkts(ahci_ctl_t *ahci_ctlp,
+    ahci_port_t *ahci_portp, ahci_addr_t *addrp)
+{
+	uint32_t finished_tags = 0, reset_tags = 0, slot_status = 0;
+	uint8_t port = addrp->aa_port;
+	uint8_t pmport = addrp->aa_pmport;
+	sata_device_t sdevice;
+
+	AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_PMULT, ahci_ctlp,
+	    "ahci_reset_pmdevice_reject_pkts at port %d:%d", port, pmport);
+
+	if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_MOPPING) {
+		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+		    "ahci_reset_pmdevice_reject_pkts: port %d is in "
+		    "mopping process, so return directly ", port);
+		return (SATA_SUCCESS);
+	}
+
+	/* Checking for outstanding commands */
+	if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) {
+		slot_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxCI(ahci_ctlp, port));
+		reset_tags = slot_status & AHCI_SLOT_MASK(ahci_ctlp);
+	} else if (NCQ_CMD_IN_PROGRESS(ahci_portp)) {
+		slot_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxSACT(ahci_ctlp, port));
+		reset_tags = slot_status & AHCI_NCQ_SLOT_MASK(ahci_portp);
+	}
+
+	/* Issue SOFTWARE reset command. */
+	if (ahci_software_reset(ahci_ctlp, ahci_portp, addrp)
+	    != AHCI_SUCCESS) {
+		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+		    "Try to do a port reset after software "
+		    "reset failed", port);
+		return (SATA_FAILURE);
+	}
+
+	/* Set the reset in progress flag */
+	ahci_portp->ahciport_reset_in_progress = 1;
+
+	ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_MOPPING;
+	ahci_portp->ahciport_mop_in_progress++;
+
+	/* Indicate to the framework that a reset has happened */
+	bzero((void *)&sdevice, sizeof (sata_device_t));
+	sdevice.satadev_addr.cport = ahci_ctlp->ahcictl_port_to_cport[port];
+	sdevice.satadev_addr.pmport = pmport;
+	if (AHCI_ADDR_IS_PMULT(addrp))
+		sdevice.satadev_addr.qual = SATA_ADDR_PMULT;
+	else
+		sdevice.satadev_addr.qual = SATA_ADDR_DPMPORT;
+	sdevice.satadev_state = SATA_DSTATE_RESET |
+	    SATA_DSTATE_PWR_ACTIVE;
+	mutex_exit(&ahci_portp->ahciport_mutex);
+	sata_hba_event_notify(
+	    ahci_ctlp->ahcictl_sata_hba_tran->sata_tran_hba_dip,
+	    &sdevice,
+	    SATA_EVNT_DEVICE_RESET);
+	mutex_enter(&ahci_portp->ahciport_mutex);
+
+	AHCIDBG(AHCIDBG_EVENT, ahci_ctlp,
+	    "port %d:%d sending event up: SATA_EVNT_DEVICE_RESET",
+	    port, pmport);
+
+	/* Next try to mop the pending commands */
+	if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp))
+		finished_tags = ahci_portp->ahciport_pending_tags &
+		    ~slot_status & AHCI_SLOT_MASK(ahci_ctlp);
+	else if (NCQ_CMD_IN_PROGRESS(ahci_portp))
+		finished_tags = ahci_portp->ahciport_pending_ncq_tags &
+		    ~slot_status & AHCI_NCQ_SLOT_MASK(ahci_portp);
+	reset_tags &= ~finished_tags;
+
+	AHCIDBG(AHCIDBG_EVENT|AHCIDBG_PMULT, ahci_ctlp,
+	    "reset_tags = %x, finished_tags = %x, slot_status = %x",
+	    reset_tags, finished_tags, slot_status);
+
+	/*
+	 * NOTE: Because PxCI be only erased by unset PxCMD.ST bit, so even we
+	 * try to reset a single device behind a port multiplier will
+	 * terminate all the commands on that HBA port. We need mop these
+	 * commands as well.
+	 */
+	ahci_mop_commands(ahci_ctlp,
+	    ahci_portp,
+	    slot_status,
+	    0, /* failed tags */
+	    0, /* timeout tags */
+	    0, /* aborted tags */
+	    reset_tags); /* reset tags */
+
+	return (SATA_SUCCESS);
+}
+
+/*
  * Used to do port reset and reject all the pending packets on a port during
  * the reset operation.
  *
@@ -2242,14 +2711,15 @@
  */
 static int
 ahci_reset_port_reject_pkts(ahci_ctl_t *ahci_ctlp,
-    ahci_port_t *ahci_portp, uint8_t port)
+    ahci_port_t *ahci_portp, ahci_addr_t *addrp)
 {
 	uint32_t slot_status = 0;
 	uint32_t reset_tags = 0;
 	uint32_t finished_tags = 0;
+	uint8_t port = addrp->aa_port;
 
 	AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp,
-	    "ahci_reset_port_reject_pkts on port: %d", port);
+	    "ahci_reset_port_reject_pkts at port: %d", port);
 
 	/*
 	 * If AHCI_PORT_FLAG_MOPPING flag is set, it means all the pending
@@ -2277,8 +2747,15 @@
 
 	if (ahci_restart_port_wait_till_ready(ahci_ctlp,
 	    ahci_portp, port, AHCI_PORT_RESET|AHCI_RESET_NO_EVENTS_UP,
-	    NULL) != AHCI_SUCCESS)
+	    NULL) != AHCI_SUCCESS) {
+
+		/* Clear mop flag */
+		ahci_portp->ahciport_mop_in_progress--;
+		if (ahci_portp->ahciport_mop_in_progress == 0)
+			ahci_portp->ahciport_flags &=
+			    ~AHCI_PORT_FLAG_MOPPING;
 		return (SATA_FAILURE);
+	}
 
 	if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp))
 		finished_tags = ahci_portp->ahciport_pending_tags &
@@ -2311,12 +2788,16 @@
 	uint32_t slot_status[AHCI_MAX_PORTS];
 	uint32_t reset_tags[AHCI_MAX_PORTS];
 	uint32_t finished_tags[AHCI_MAX_PORTS];
-	uint8_t port;
+	int port;
 	int ret = SATA_SUCCESS;
 
 	AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp,
 	    "ahci_reset_hba_reject_pkts enter", NULL);
 
+	bzero(slot_status, sizeof (slot_status));
+	bzero(reset_tags, sizeof (reset_tags));
+	bzero(finished_tags, sizeof (finished_tags));
+
 	for (port = 0; port < ahci_ctlp->ahcictl_num_ports; port++) {
 		if (!AHCI_PORT_IMPLEMENTED(ahci_ctlp, port)) {
 			continue;
@@ -2325,25 +2806,33 @@
 		ahci_portp = ahci_ctlp->ahcictl_ports[port];
 
 		mutex_enter(&ahci_portp->ahciport_mutex);
+		ahci_portp->ahciport_reset_in_progress = 1;
 		if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) {
 			slot_status[port] = ddi_get32(
 			    ahci_ctlp->ahcictl_ahci_acc_handle,
 			    (uint32_t *)AHCI_PORT_PxCI(ahci_ctlp, port));
 			reset_tags[port] = slot_status[port] &
 			    AHCI_SLOT_MASK(ahci_ctlp);
+			AHCIDBG(AHCIDBG_INIT, ahci_ctlp,
+			    "port %d: reset_tags = 0x%x pending_tags = 0x%x",
+			    port, reset_tags[port],
+			    ahci_portp->ahciport_pending_tags);
 		} else if (NCQ_CMD_IN_PROGRESS(ahci_portp)) {
 			slot_status[port] = ddi_get32(
 			    ahci_ctlp->ahcictl_ahci_acc_handle,
 			    (uint32_t *)AHCI_PORT_PxSACT(ahci_ctlp, port));
 			reset_tags[port] = slot_status[port] &
 			    AHCI_NCQ_SLOT_MASK(ahci_portp);
+			AHCIDBG(AHCIDBG_INIT, ahci_ctlp,
+			    "port %d: reset_tags = 0x%x pending_tags = 0x%x",
+			    port, reset_tags[port],
+			    ahci_portp->ahciport_pending_tags);
 		}
 		mutex_exit(&ahci_portp->ahciport_mutex);
 	}
 
 	if (ahci_hba_reset(ahci_ctlp) != AHCI_SUCCESS) {
 		ret = SATA_FAILURE;
-		goto out;
 	}
 
 	for (port = 0; port < ahci_ctlp->ahcictl_num_ports; port++) {
@@ -2401,37 +2890,67 @@
 {
 	ahci_ctl_t *ahci_ctlp;
 	ahci_port_t *ahci_portp;
+	ahci_addr_t addr;
 	uint8_t cport = sd->satadev_addr.cport;
+	uint8_t pmport = sd->satadev_addr.pmport;
 	uint8_t port;
 	int ret = SATA_SUCCESS;
 	int instance = ddi_get_instance(dip);
 
 	ahci_ctlp = ddi_get_soft_state(ahci_statep, instance);
 	port = ahci_ctlp->ahcictl_cport_to_port[cport];
+	ahci_portp = ahci_ctlp->ahcictl_ports[port];
+
+	ahci_get_ahci_addr(ahci_ctlp, sd, &addr);
 
 	AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp,
-	    "ahci_tran_reset_port enter: cport: %d", cport);
+	    "ahci_tran_reset_dport enter: cport %d", cport);
 
 	switch (sd->satadev_addr.qual) {
+	case SATA_ADDR_PMPORT:
+		/*
+		 * If we want to issue a COMRESET on a pmport, we need to
+		 * reject the outstanding commands on that pmport. According
+		 * to AHCI spec, PxCI register could only be cleared by
+		 * clearing PxCMD.ST, which will halt the controller port - as
+		 * well as other pmports.
+		 *
+		 * Therefore we directly reset the controller port for
+		 * simplicity. ahci_tran_probe_port() will handle reset stuff
+		 * like initializing the given pmport.
+		 */
+		/* FALLTHRU */
 	case SATA_ADDR_CPORT:
 		/* Port reset */
 		ahci_portp = ahci_ctlp->ahcictl_ports[port];
-		cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport port %d "
-		    "reset port", instance, port);
+		cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport "
+		    "port %d reset port", instance, port);
 
 		mutex_enter(&ahci_portp->ahciport_mutex);
-		ret = ahci_reset_port_reject_pkts(ahci_ctlp, ahci_portp, port);
+		ret = ahci_reset_port_reject_pkts(ahci_ctlp, ahci_portp, &addr);
 		mutex_exit(&ahci_portp->ahciport_mutex);
 
 		break;
 
+	case SATA_ADDR_DPMPORT:
+		cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport "
+		    "port %d:%d reset device", instance, port, pmport);
+		/* FALLTHRU */
 	case SATA_ADDR_DCPORT:
 		/* Device reset */
-		ahci_portp = ahci_ctlp->ahcictl_ports[port];
-		cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport port %d "
-		    "reset device", instance, port);
+		if (sd->satadev_addr.qual == SATA_ADDR_DCPORT)
+			cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport "
+			    "port %d reset device", instance, port);
 
 		mutex_enter(&ahci_portp->ahciport_mutex);
+		/*
+		 * software reset request must be sent to SATA_PMULT_HOSTPORT
+		 * if target is a port multiplier:
+		 */
+		if (sd->satadev_addr.qual == SATA_ADDR_DCPORT &&
+		    ahci_portp->ahciport_device_type == SATA_DTYPE_PMULT)
+			AHCI_ADDR_SET_PMULT(&addr, port);
+
 		if (ahci_portp->ahciport_port_state & SATA_PSTATE_FAILED |
 		    ahci_portp->ahciport_port_state & SATA_PSTATE_SHUTDOWN |
 		    ahci_portp->ahciport_port_state & SATA_PSTATE_PWROFF) {
@@ -2451,13 +2970,15 @@
 			break;
 		}
 
-		if (ahci_portp->ahciport_device_type == SATA_DTYPE_NONE) {
+		if (AHCIPORT_GET_DEV_TYPE(ahci_portp, &addr) ==
+		    SATA_DTYPE_NONE) {
 			/*
 			 * ahci_intr_phyrdy_change() may have rendered it to
 			 * AHCI_PORT_TYPE_NODEV.
 			 */
 			sd->satadev_type = SATA_DTYPE_NONE;
-			sd->satadev_state = ahci_portp->ahciport_port_state;
+			sd->satadev_state = AHCIPORT_GET_STATE(ahci_portp,
+			    &addr);
 			ahci_update_sata_registers(ahci_ctlp, port, sd);
 			AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
 			    "ahci_tran_reset_dport returning SATA_FAILURE "
@@ -2467,24 +2988,24 @@
 			break;
 		}
 
-		ret = ahci_reset_device_reject_pkts(ahci_ctlp,
-		    ahci_portp, port);
+		if (AHCI_ADDR_IS_PORT(&addr)) {
+			ret = ahci_reset_device_reject_pkts(ahci_ctlp,
+			    ahci_portp, &addr);
+		} else {
+			ret = ahci_reset_pmdevice_reject_pkts(ahci_ctlp,
+			    ahci_portp, &addr);
+		}
+
 		mutex_exit(&ahci_portp->ahciport_mutex);
 		break;
 
 	case SATA_ADDR_CNTRL:
 		/* Reset the whole controller */
-		cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport port %d "
-		    "reset the whole hba", instance, port);
+		cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport "
+		    "reset the whole hba", instance);
 		ret = ahci_reset_hba_reject_pkts(ahci_ctlp);
 		break;
 
-	case SATA_ADDR_PMPORT:
-	case SATA_ADDR_DPMPORT:
-		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
-		    "ahci_tran_reset_dport: port multiplier will be "
-		    "supported later", NULL);
-		/* FALLTHRU */
 	default:
 		ret = SATA_FAILURE;
 	}
@@ -2495,14 +3016,16 @@
 /*
  * Called by sata framework to activate a port as part of hotplug.
  * (cfgadm -c connect satax/y)
- * Note: Not port-mult aware.
+ * Support port multiplier.
  */
 static int
 ahci_tran_hotplug_port_activate(dev_info_t *dip, sata_device_t *satadev)
 {
 	ahci_ctl_t *ahci_ctlp;
 	ahci_port_t *ahci_portp;
+	ahci_addr_t addr;
 	uint8_t	cport = satadev->satadev_addr.cport;
+	uint8_t	pmport = satadev->satadev_addr.pmport;
 	uint8_t port;
 	int instance = ddi_get_instance(dip);
 
@@ -2510,32 +3033,48 @@
 	port = ahci_ctlp->ahcictl_cport_to_port[cport];
 
 	AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp,
-	    "ahci_tran_hotplug_port_activate cport %d enter", cport);
+	    "ahci_tran_hotplug_port_activate enter: cport %d", cport);
 
 	ahci_portp = ahci_ctlp->ahcictl_ports[port];
 
 	mutex_enter(&ahci_portp->ahciport_mutex);
-	ahci_enable_port_intrs(ahci_ctlp, port);
-	cmn_err(CE_NOTE, "!ahci%d: ahci port %d is activated", instance, port);
-
-	/*
-	 * Reset the port so that the PHY communication would be re-established.
-	 * But this reset is an internal operation and the sata module doesn't
-	 * need to know about it. Moreover, the port with a device attached will
-	 * be started too.
-	 */
-	(void) ahci_restart_port_wait_till_ready(ahci_ctlp,
-	    ahci_portp, port,
-	    AHCI_PORT_RESET|AHCI_RESET_NO_EVENTS_UP,
-	    NULL);
-
-	/*
-	 * Need to check the link status and device status of the port
-	 * and consider raising power if the port was in D3 state
-	 */
-	ahci_portp->ahciport_port_state |= SATA_PSTATE_PWRON;
-	ahci_portp->ahciport_port_state &= ~SATA_PSTATE_PWROFF;
-	ahci_portp->ahciport_port_state &= ~SATA_PSTATE_SHUTDOWN;
+	ahci_get_ahci_addr(ahci_ctlp, satadev, &addr);
+	ASSERT(AHCI_ADDR_IS_PORT(&addr) || AHCI_ADDR_IS_PMPORT(&addr));
+
+	if (AHCI_ADDR_IS_PORT(&addr)) {
+		cmn_err(CE_NOTE, "!ahci%d: ahci port %d is activated",
+		    instance, port);
+
+		/* Enable the interrupts on the port */
+		ahci_enable_port_intrs(ahci_ctlp, port);
+
+		/*
+		 * Reset the port so that the PHY communication would be
+		 * re-established.  But this reset is an internal operation
+		 * and the sata module doesn't need to know about it.
+		 * Moreover, the port with a device attached will be started
+		 * too.
+		 */
+		(void) ahci_restart_port_wait_till_ready(ahci_ctlp,
+		    ahci_portp, port,
+		    AHCI_PORT_RESET|AHCI_RESET_NO_EVENTS_UP,
+		    NULL);
+
+		/*
+		 * Need to check the link status and device status of the port
+		 * and consider raising power if the port was in D3 state
+		 */
+		ahci_portp->ahciport_port_state |= SATA_PSTATE_PWRON;
+		ahci_portp->ahciport_port_state &= ~SATA_PSTATE_PWROFF;
+		ahci_portp->ahciport_port_state &= ~SATA_PSTATE_SHUTDOWN;
+	} else if (AHCI_ADDR_IS_PMPORT(&addr)) {
+		cmn_err(CE_NOTE, "!ahci%d: ahci port %d:%d is activated",
+		    instance, port, pmport);
+		/* AHCI_ADDR_PMPORT */
+		AHCIPORT_PMSTATE(ahci_portp, &addr) |= SATA_PSTATE_PWRON;
+		AHCIPORT_PMSTATE(ahci_portp, &addr) &=
+		    ~(SATA_PSTATE_PWROFF|SATA_PSTATE_SHUTDOWN);
+	}
 
 	satadev->satadev_state = ahci_portp->ahciport_port_state;
 
@@ -2548,14 +3087,16 @@
 /*
  * Called by sata framework to deactivate a port as part of hotplug.
  * (cfgadm -c disconnect satax/y)
- * Note: Not port-mult aware.
+ * Support port multiplier.
  */
 static int
 ahci_tran_hotplug_port_deactivate(dev_info_t *dip, sata_device_t *satadev)
 {
 	ahci_ctl_t *ahci_ctlp;
 	ahci_port_t *ahci_portp;
-	uint8_t cport = satadev->satadev_addr.cport;
+	ahci_addr_t addr;
+	uint8_t	cport = satadev->satadev_addr.cport;
+	uint8_t	pmport = satadev->satadev_addr.pmport;
 	uint8_t port;
 	uint32_t port_scontrol;
 	int instance = ddi_get_instance(dip);
@@ -2564,38 +3105,52 @@
 	port = ahci_ctlp->ahcictl_cport_to_port[cport];
 
 	AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp,
-	    "ahci_tran_hotplug_port_deactivate cport %d enter", cport);
+	    "ahci_tran_hotplug_port_deactivate enter: cport %d", cport);
 
 	ahci_portp = ahci_ctlp->ahcictl_ports[port];
-
 	mutex_enter(&ahci_portp->ahciport_mutex);
-	cmn_err(CE_NOTE, "!ahci%d: ahci port %d is deactivated",
-	    instance, port);
-
-	/* Disable the interrupts on the port */
-	ahci_disable_port_intrs(ahci_ctlp, port);
-
-	if (ahci_portp->ahciport_device_type == SATA_DTYPE_NONE) {
-		goto phy_offline;
-	}
-
-	/* First to abort all the pending commands */
-	ahci_reject_all_abort_pkts(ahci_ctlp, ahci_portp, port);
-
-	/* Then stop the port */
-	(void) ahci_put_port_into_notrunning_state(ahci_ctlp,
-	    ahci_portp, port);
-
-	/* Next put the PHY offline */
-
-phy_offline:
-	port_scontrol = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
-	    (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port));
-	SCONTROL_SET_DET(port_scontrol, SCONTROL_DET_DISABLE);
-
-	/* Update ahciport_port_state */
-	ahci_portp->ahciport_port_state = SATA_PSTATE_SHUTDOWN;
-	satadev->satadev_state = ahci_portp->ahciport_port_state;
+	ahci_get_ahci_addr(ahci_ctlp, satadev, &addr);
+	ASSERT(AHCI_ADDR_IS_PORT(&addr) || AHCI_ADDR_IS_PMPORT(&addr));
+
+	if (AHCI_ADDR_IS_PORT(&addr)) {
+		cmn_err(CE_NOTE, "!ahci%d: ahci port %d is deactivated",
+		    instance, port);
+
+		/* Disable the interrupts on the port */
+		ahci_disable_port_intrs(ahci_ctlp, port);
+
+		if (ahci_portp->ahciport_device_type != SATA_DTYPE_NONE) {
+
+			/* First to abort all the pending commands */
+			ahci_reject_all_abort_pkts(ahci_ctlp, ahci_portp, port);
+
+			/* Then stop the port */
+			(void) ahci_put_port_into_notrunning_state(ahci_ctlp,
+			    ahci_portp, port);
+		}
+
+		/* Next put the PHY offline */
+		port_scontrol = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port));
+		SCONTROL_SET_DET(port_scontrol, SCONTROL_DET_DISABLE);
+		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle, (uint32_t *)
+		    AHCI_PORT_PxSCTL(ahci_ctlp, port), port_scontrol);
+	} else if (AHCI_ADDR_IS_PMPORT(&addr)) {
+		cmn_err(CE_NOTE, "!ahci%d: ahci port %d:%d is deactivated",
+		    instance, port, pmport);
+
+		ahci_disable_port_intrs(ahci_ctlp, port);
+		if (AHCIPORT_GET_DEV_TYPE(ahci_portp, &addr)
+		    != SATA_DTYPE_NONE)
+			ahci_reject_all_abort_pkts(ahci_ctlp, ahci_portp, port);
+
+		/* Re-enable the interrupts for the other pmports */
+		ahci_enable_port_intrs(ahci_ctlp, port);
+	}
+
+	/* Update port state */
+	AHCIPORT_SET_STATE(ahci_portp, &addr, SATA_PSTATE_SHUTDOWN);
+	satadev->satadev_state = SATA_PSTATE_SHUTDOWN;
 
 	ahci_update_sata_registers(ahci_ctlp, port, satadev);
 
@@ -2617,7 +3172,14 @@
 	uint32_t abort_tags = 0;
 
 	AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_INTR, ahci_ctlp,
-	    "ahci_reject_all_abort_pkts on port: %d", port);
+	    "ahci_reject_all_abort_pkts at port: %d", port);
+
+	/* Read/write port multiplier command takes highest priority */
+	if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) {
+		slot_status = 0x1;
+		abort_tags = 0x1;
+		goto out;
+	}
 
 	/*
 	 * When AHCI_PORT_FLAG_MOPPING is set, we need to check whether a
@@ -2772,6 +3334,7 @@
 ahci_initialize_controller(ahci_ctl_t *ahci_ctlp)
 {
 	ahci_port_t *ahci_portp;
+	ahci_addr_t addr;
 	uint32_t ghc_control;
 	int port;
 
@@ -2807,7 +3370,9 @@
 		 * Ensure that the controller is not in the running state
 		 * by checking every implemented port's PxCMD register
 		 */
-		if (ahci_initialize_port(ahci_ctlp, ahci_portp, port)
+		AHCI_ADDR_SET_PORT(&addr, (uint8_t)port);
+
+		if (ahci_initialize_port(ahci_ctlp, ahci_portp, &addr)
 		    != AHCI_SUCCESS) {
 			AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
 			    "ahci_initialize_controller: failed to "
@@ -2874,25 +3439,116 @@
 }
 
 /*
- * The routine is to initialize the port. First put the port in NOTRunning
+ * ahci_alloc_pmult()
+ * 1. Setting HBA port registers which are necessary for a port multiplier.
+ *    (Set PxCMD.PMA while PxCMD.ST is '0')
+ * 2. Allocate ahci_pmult_info structure.
+ *
+ * NOTE: Must stop port before the function is called.
+ * WARNING!!! ahciport_mutex should be acquired before the function is
+ * called.
+ */
+static void
+ahci_alloc_pmult(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp)
+{
+	uint32_t port_cmd_status;
+	uint8_t port = ahci_portp->ahciport_port_num;
+
+	port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port));
+
+	/* The port must have been stopped before. */
+	ASSERT(!(port_cmd_status & AHCI_CMD_STATUS_ST));
+
+	if (!(port_cmd_status & AHCI_CMD_STATUS_PMA)) {
+		/* set PMA bit */
+		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port),
+		    port_cmd_status|AHCI_CMD_STATUS_PMA);
+
+		AHCIDBG(AHCIDBG_INIT|AHCIDBG_PMULT, ahci_ctlp,
+		    "ahci_alloc_pmult: "
+		    "PxCMD.PMA bit set at port %d.", port);
+	}
+
+	/* Allocate port multiplier information structure */
+	if (ahci_portp->ahciport_pmult_info == NULL) {
+		ahci_portp->ahciport_pmult_info = (ahci_pmult_info_t *)
+		    kmem_zalloc(sizeof (ahci_pmult_info_t), KM_SLEEP);
+	}
+
+	ASSERT(ahci_portp->ahciport_pmult_info != NULL);
+}
+
+/*
+ * ahci_dealloc_pmult()
+ * 1. Clearing related registers when a port multiplier is detached.
+ *    (Clear PxCMD.PMA while PxCMD.ST is '0')
+ * 2. Deallocate ahci_pmult_info structure.
+ *
+ * NOTE: Must stop port before the function is called.
+ * WARNING!!! ahciport_mutex should be acquired before the function is
+ * called.
+ */
+static void
+ahci_dealloc_pmult(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp)
+{
+	uint32_t port_cmd_status;
+	uint8_t port = ahci_portp->ahciport_port_num;
+
+	port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port));
+
+	if (port_cmd_status & AHCI_CMD_STATUS_PMA) {
+		/* Clear PMA bit */
+		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port),
+		    (port_cmd_status & (~AHCI_CMD_STATUS_PMA)));
+
+		AHCIDBG(AHCIDBG_INIT|AHCIDBG_PMULT, ahci_ctlp,
+		    "ahci_dealloc_pmult: "
+		    "PxCMD.PMA bit cleared at port %d.", port);
+	}
+
+	/* Release port multiplier information structure */
+	if (ahci_portp->ahciport_pmult_info != NULL) {
+		kmem_free(ahci_portp->ahciport_pmult_info,
+		    sizeof (ahci_pmult_info_t));
+		ahci_portp->ahciport_pmult_info = NULL;
+	}
+}
+
+/*
+ * The routine is to initialize a port. First put the port in NOTRunning
  * state, then enable port interrupt and clear Serror register. And under
  * AHCI_ATTACH case, find device signature and then try to start the port.
  *
+ * Called by
+ *    1. ahci_initialize_controller
+ *    2. ahci_intr_phyrdy_change (hotplug)
+ *
  * WARNING!!! ahciport_mutex should be acquired before the function is called.
  */
 static int
-ahci_initialize_port(ahci_ctl_t *ahci_ctlp,
-    ahci_port_t *ahci_portp, uint8_t port)
+ahci_initialize_port(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
+    ahci_addr_t *addrp)
 {
 	uint32_t port_sstatus, port_task_file, port_cmd_status;
+	uint8_t port = addrp->aa_port;
+	boolean_t pm_mode = B_TRUE;	/* Power Management mode */
 	int ret;
 
+	ASSERT(mutex_owned(&ahci_portp->ahciport_mutex));
+
+	/* AHCI_ADDR_PORT: We've no idea of the attached device here.  */
+	ASSERT(AHCI_ADDR_IS_PORT(addrp));
+
 	port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
 	    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port));
 
 	AHCIDBG(AHCIDBG_INIT|AHCIDBG_ENTRY, ahci_ctlp,
-	    "ahci_initialize_port: port %d "
-	    "port_cmd_status = 0x%x", port, port_cmd_status);
+	    "ahci_initialize_port: port %d ", port);
+
 	/*
 	 * Check whether the port is in NotRunning state, if not,
 	 * put the port in NotRunning state
@@ -2906,8 +3562,11 @@
 		    ahci_portp, port);
 	}
 
+	/* Disable interrupt */
+	ahci_disable_port_intrs(ahci_ctlp, port);
+
 	/* Device is unknown at first */
-	ahci_portp->ahciport_device_type = SATA_DTYPE_UNKNOWN;
+	AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_UNKNOWN);
 
 	/* Disable the interface power management */
 	ahci_disable_interface_pm(ahci_ctlp, port);
@@ -2928,8 +3587,8 @@
 	    /* Check whether port reset must be executed */
 	    ahci_ctlp->ahcictl_cap & AHCI_CAP_INIT_PORT_RESET) {
 
-		/* Incorrect task file state, we need to reset port */
-		ret = ahci_port_reset(ahci_ctlp, ahci_portp, port);
+		/* Something went wrong, we need do some reset things */
+		ret = ahci_port_reset(ahci_ctlp, ahci_portp, addrp);
 
 		/* Does port reset succeed on HBA port? */
 		if (ret != AHCI_SUCCESS) {
@@ -2940,16 +3599,15 @@
 		}
 
 		/* Is port failed? */
-		if (ahci_portp->ahciport_port_state & SATA_PSTATE_FAILED) {
+		if (AHCIPORT_GET_STATE(ahci_portp, addrp) &
+		    SATA_PSTATE_FAILED) {
 			AHCIDBG(AHCIDBG_INIT|AHCIDBG_ERRS, ahci_ctlp,
 			    "ahci_initialize_port: port %d state 0x%x",
 			    port, ahci_portp->ahciport_port_state);
 			return (AHCI_FAILURE);
 		}
 	}
-
-	ahci_portp->ahciport_port_state = SATA_STATE_READY;
-
+	AHCIPORT_SET_STATE(ahci_portp, addrp, SATA_STATE_READY);
 	AHCIDBG(AHCIDBG_INIT, ahci_ctlp, "port %d is ready now.", port);
 
 	/*
@@ -2958,13 +3616,23 @@
 	 * changed during power state changes, but at the time being, we
 	 * don't support the situation.
 	 */
-	if (ahci_ctlp->ahcictl_flags & AHCI_ATTACH) {
+	mutex_exit(&ahci_portp->ahciport_mutex);
+	mutex_enter(&ahci_ctlp->ahcictl_mutex);
+	if (ahci_ctlp->ahcictl_flags & AHCI_ATTACH)
+		pm_mode = B_FALSE;
+	mutex_exit(&ahci_ctlp->ahcictl_mutex);
+	mutex_enter(&ahci_portp->ahciport_mutex);
+
+	if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_HOTPLUG)
+		pm_mode = B_FALSE;
+
+	if (!pm_mode) {
 		/*
 		 * Try to get the device signature if the port is
 		 * not empty.
 		 */
 		if (ahci_portp->ahciport_device_type != SATA_DTYPE_NONE)
-			ahci_find_dev_signature(ahci_ctlp, ahci_portp, port);
+			ahci_find_dev_signature(ahci_ctlp, ahci_portp, addrp);
 	} else {
 
 		/*
@@ -2975,7 +3643,7 @@
 		AHCIDBG(AHCIDBG_PM, ahci_ctlp,
 		    "ahci_initialize_port: port %d "
 		    "reset the port during resume", port);
-		(void) ahci_port_reset(ahci_ctlp, ahci_portp, port);
+		(void) ahci_port_reset(ahci_ctlp, ahci_portp, addrp);
 
 		AHCIDBG(AHCIDBG_PM, ahci_ctlp,
 		    "ahci_initialize_port: port %d "
@@ -3000,6 +3668,13 @@
 		goto out;
 	}
 
+	/* If this is a port multiplier, we need do some initialization */
+	if (ahci_portp->ahciport_device_type == SATA_DTYPE_PMULT) {
+		AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp,
+		    "Port multiplier found at port %d", port);
+		ahci_alloc_pmult(ahci_ctlp, ahci_portp);
+	}
+
 	/* Try to start the port */
 	if (ahci_start_port(ahci_ctlp, ahci_portp, port)
 	    != AHCI_SUCCESS) {
@@ -3217,6 +3892,639 @@
 }
 
 /*
+ * Read/Write a register at port multiplier by SATA READ PORTMULT / SATA WRITE
+ * PORTMULT command. SYNC & POLLING mode is used.
+ *
+ * WARNING!!! ahciport_mutex should be acquired before the function
+ * is called.
+ */
+static int
+ahci_rdwr_pmult(ahci_ctl_t *ahci_ctlp, ahci_addr_t *addrp,
+    uint8_t regn, uint32_t *pregv, uint8_t type)
+{
+	ahci_port_t *ahci_portp;
+	ahci_addr_t pmult_addr;
+	sata_pkt_t *spkt;
+	sata_cmd_t *scmd;
+	sata_device_t sata_device;
+	uint8_t port = addrp->aa_port;
+	uint8_t pmport = addrp->aa_pmport;
+	uint8_t cport;
+	uint32_t intr_mask;
+	int rval;
+	char portstr[10];
+
+	SET_PORTSTR(portstr, addrp);
+	cport = ahci_ctlp->ahcictl_port_to_cport[port];
+	ahci_portp = ahci_ctlp->ahcictl_ports[port];
+
+	ASSERT(AHCI_ADDR_IS_PMPORT(addrp) || AHCI_ADDR_IS_PMULT(addrp));
+	ASSERT(mutex_owned(&ahci_portp->ahciport_mutex));
+
+	/* Check the existence of the port multiplier */
+	if (ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT)
+		return (AHCI_FAILURE);
+
+	/* Request a READ/WRITE PORTMULT sata packet. */
+	bzero(&sata_device, sizeof (sata_device_t));
+	sata_device.satadev_addr.cport = cport;
+	sata_device.satadev_addr.pmport = pmport;
+	sata_device.satadev_addr.qual = SATA_ADDR_PMULT;
+	sata_device.satadev_rev = SATA_DEVICE_REV;
+
+	/*
+	 * Make sure no command is outstanding here. All R/W PMULT requests
+	 * come from
+	 *
+	 * 1. ahci_attach()
+	 *    The port should be empty.
+	 *
+	 * 2. ahci_tran_probe_port()
+	 *    Any request from SATA framework (via ahci_tran_start) should be
+	 *    rejected if R/W PMULT command is outstanding.
+	 *
+	 *    If we are doing mopping, do not check those flags because no
+	 *    command will be actually outstanding.
+	 *
+	 *    If the port has been occupied by any other commands, the probe
+	 *    function will return a SATA_RETRY. SATA framework will retry
+	 *    later.
+	 */
+	if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) {
+		AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp,
+		    "R/W PMULT failed: R/W PMULT in progress at port %d.",
+		    port, ahci_portp->ahciport_flags);
+		return (AHCI_FAILURE);
+	}
+
+	if (!(ahci_portp->ahciport_flags & AHCI_PORT_FLAG_MOPPING) && (
+	    ERR_RETRI_CMD_IN_PROGRESS(ahci_portp) ||
+	    NCQ_CMD_IN_PROGRESS(ahci_portp) ||
+	    NON_NCQ_CMD_IN_PROGRESS(ahci_portp))) {
+		AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp,
+		    "R/W PMULT failed: port %d is occupied (flags 0x%x).",
+		    port, ahci_portp->ahciport_flags);
+		return (AHCI_FAILURE);
+	}
+
+	/*
+	 * The port multiplier is gone. This may happen when
+	 * 1. Cutting off the power of an enclosure. The device lose the power
+	 *    before port multiplier.
+	 * 2. Disconnecting the port multiplier during hot-plugging a sub-drive.
+	 *
+	 * The issued command should be aborted and the following command
+	 * should not be continued.
+	 */
+	if (!(ahci_portp->ahciport_port_state & SATA_STATE_READY)) {
+		AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp,
+		    "READ/WRITE PMULT failed: "
+		    "port-mult is removed from port %d", port);
+		return (AHCI_FAILURE);
+	}
+
+	ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_RDWR_PMULT;
+
+	spkt = sata_get_rdwr_pmult_pkt(ahci_ctlp->ahcictl_dip,
+	    &sata_device, regn, *pregv, type);
+
+	/*
+	 * READ/WRITE PORTMULT command is intended to sent to the control port
+	 * of the port multiplier.
+	 */
+	AHCI_ADDR_SET_PMULT(&pmult_addr, addrp->aa_port);
+
+	ahci_portp->ahciport_rdwr_pmult_pkt = spkt;
+
+	/* No interrupt here. Store the interrupt enable mask. */
+	intr_mask = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxIE(ahci_ctlp, port));
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxIE(ahci_ctlp, port), 0);
+
+	rval = ahci_do_sync_start(ahci_ctlp, ahci_portp, &pmult_addr, spkt);
+
+	if (rval == AHCI_SUCCESS &&
+	    spkt->satapkt_reason == SATA_PKT_COMPLETED) {
+		if (type == SATA_RDWR_PMULT_PKT_TYPE_READ) {
+			scmd = &spkt->satapkt_cmd;
+			*pregv = scmd->satacmd_lba_high_lsb << 24 |
+			    scmd->satacmd_lba_mid_lsb << 16 |
+			    scmd->satacmd_lba_low_lsb << 8 |
+			    scmd->satacmd_sec_count_lsb;
+		}
+	} else {
+		/* Failed or not completed. */
+		AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp,
+		    "ahci_rdwr_pmult: cannot [%s] %s[%d] at port %s",
+		    type == SATA_RDWR_PMULT_PKT_TYPE_READ?"Read":"Write",
+		    AHCI_ADDR_IS_PMULT(addrp)?"gscr":"pscr", regn, portstr);
+		rval = AHCI_FAILURE;
+	}
+out:
+	/* Restore the interrupt mask */
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxIE(ahci_ctlp, port), intr_mask);
+
+	ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_RDWR_PMULT;
+	ahci_portp->ahciport_rdwr_pmult_pkt = NULL;
+	sata_free_rdwr_pmult_pkt(spkt);
+	return (rval);
+}
+
+static int
+ahci_read_pmult(ahci_ctl_t *ahci_ctlp, ahci_addr_t *addrp,
+    uint8_t regn, uint32_t *pregv)
+{
+	return ahci_rdwr_pmult(ahci_ctlp, addrp, regn, pregv,
+	    SATA_RDWR_PMULT_PKT_TYPE_READ);
+}
+
+static int
+ahci_write_pmult(ahci_ctl_t *ahci_ctlp, ahci_addr_t *addrp,
+    uint8_t regn, uint32_t regv)
+{
+	return ahci_rdwr_pmult(ahci_ctlp, addrp, regn, &regv,
+	    SATA_RDWR_PMULT_PKT_TYPE_WRITE);
+}
+
+#define	READ_PMULT(addrp, r, pv, out)					\
+	if (ahci_read_pmult(ahci_ctlp, addrp, r, pv) != AHCI_SUCCESS)	\
+		goto out;
+
+#define	WRITE_PMULT(addrp, r, v, out)					\
+	if (ahci_write_pmult(ahci_ctlp, addrp, r, v) != AHCI_SUCCESS)	\
+		goto out;
+
+/*
+ * Update sata registers on port multiplier, including GSCR/PSCR registers.
+ * ahci_update_pmult_gscr()
+ * ahci_update_pmult_pscr()
+ *
+ * WARNING!!! ahciport_mutex should be acquired before those functions
+ * get called.
+ */
+static int
+ahci_update_pmult_gscr(ahci_ctl_t *ahci_ctlp, ahci_addr_t *addrp,
+    sata_device_t *sd)
+{
+	READ_PMULT(addrp, SATA_PMULT_GSCR0, &sd->satadev_gscr.gscr0, err);
+	READ_PMULT(addrp, SATA_PMULT_GSCR1, &sd->satadev_gscr.gscr1, err);
+	READ_PMULT(addrp, SATA_PMULT_GSCR2, &sd->satadev_gscr.gscr2, err);
+	READ_PMULT(addrp, SATA_PMULT_GSCR64, &sd->satadev_gscr.gscr64, err);
+
+	return (AHCI_SUCCESS);
+
+err:	/* R/W PMULT error */
+	return (AHCI_FAILURE);
+}
+
+static int
+ahci_update_pmult_pscr(ahci_ctl_t *ahci_ctlp, ahci_addr_t *addrp,
+    sata_device_t *sd)
+{
+	ASSERT(AHCI_ADDR_IS_PMPORT(addrp));
+
+	READ_PMULT(addrp, SATA_PMULT_REG_SSTS, &sd->satadev_scr.sstatus, err);
+	READ_PMULT(addrp, SATA_PMULT_REG_SERR, &sd->satadev_scr.serror, err);
+	READ_PMULT(addrp, SATA_PMULT_REG_SCTL, &sd->satadev_scr.scontrol, err);
+	READ_PMULT(addrp, SATA_PMULT_REG_SACT, &sd->satadev_scr.sactive, err);
+
+	return (AHCI_SUCCESS);
+
+err:	/* R/W PMULT error */
+	return (AHCI_FAILURE);
+}
+
+/*
+ * ahci_initialize_pmult()
+ *
+ * Initialize a port multiplier, including
+ * 1. Enable FEATURES register at port multiplier. (SATA Chp.16)
+ * 2. Redefine MASK register. (SATA Chap 16.?)
+ *
+ * WARNING!!! ahciport_mutex should be acquired before the function
+ * is called.
+ */
+static int
+ahci_initialize_pmult(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
+    ahci_addr_t *addrp, sata_device_t *sd)
+{
+	uint32_t gscr64;
+	uint8_t port = addrp->aa_port;
+
+	ASSERT(mutex_owned(&ahci_portp->ahciport_mutex));
+	AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp,
+	    "[Initialize] Port-multiplier at port %d.", port);
+
+	/*
+	 * Enable features of port multiplier. Currently only
+	 * Asynchronous Notification is enabled.
+	 */
+	/* Check gscr64 for supported features. */
+	READ_PMULT(addrp, SATA_PMULT_GSCR64, &gscr64, err);
+
+	if (gscr64 & SATA_PMULT_CAP_SNOTIF) {
+		AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp,
+		    "port %d: Port Multiplier supports "
+		    "Asynchronous Notification.", port);
+
+		/* Write to gscr96 to enabled features */
+		WRITE_PMULT(addrp, SATA_PMULT_GSCR96,
+		    SATA_PMULT_CAP_SNOTIF, err);
+
+		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxSNTF(ahci_ctlp, port),
+		    AHCI_SNOTIF_CLEAR_ALL);
+		AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp,
+		    "port %d: PMult PxSNTF cleared.", port);
+
+	}
+
+	/*
+	 * Now we need to update gscr33 register to enable hot-plug interrupt
+	 * for sub devices behind port multiplier.
+	 */
+	WRITE_PMULT(addrp, SATA_PMULT_GSCR33, (0x1ffff), err);
+	AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp,
+	    "port %d: gscr33 mask set to %x.", port, (0x1ffff));
+
+	/*
+	 * Fetch the number of device ports of the port multiplier
+	 */
+	if (ahci_update_pmult_gscr(ahci_ctlp, addrp, sd) != AHCI_SUCCESS)
+		return (AHCI_FAILURE);
+
+	/* If it's not in the blacklist, use the value in GSCR2 */
+	if (sata_check_pmult_blacklist(sd) == SATA_FAILURE)
+		sd->satadev_add_info = sd->satadev_gscr.gscr2 &
+		    SATA_PMULT_PORTNUM_MASK;
+
+	ahci_portp->ahciport_pmult_info->ahcipmi_num_dev_ports =
+	    sd->satadev_add_info;
+
+	AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp,
+	    "port %d: pmult sub-port number updated to %x.", port,
+	    ahci_portp->ahciport_pmult_info->ahcipmi_num_dev_ports);
+
+	/* Till now port-mult is successfully initialized */
+	ahci_portp->ahciport_port_state |= SATA_DSTATE_PMULT_INIT;
+	return (AHCI_SUCCESS);
+
+err:	/* R/W PMULT error */
+	return (AHCI_FAILURE);
+}
+
+/*
+ * Initialize a port multiplier port. According to spec, firstly we need
+ * issue a COMRESET, then a software reset to get its signature.
+ *
+ * NOTE: This function should only be called in ahci_probe_pmport()
+ * WARNING!!! ahciport_mutex should be acquired before the function.
+ */
+static int
+ahci_initialize_pmport(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
+    ahci_addr_t *addrp)
+{
+	uint32_t finished_tags = 0, reset_tags = 0, slot_status = 0;
+	uint8_t port = addrp->aa_port;
+	uint8_t pmport = addrp->aa_pmport;
+	int ret = AHCI_FAILURE;
+
+	ASSERT(AHCI_ADDR_IS_PMPORT(addrp));
+
+	AHCIDBG(AHCIDBG_INIT|AHCIDBG_ENTRY, ahci_ctlp,
+	    "ahci_initialize_pmport: port %d:%d", port, pmport);
+
+	/* Check HBA port state */
+	if (ahci_portp->ahciport_port_state & SATA_PSTATE_FAILED) {
+		AHCIDBG(AHCIDBG_INIT|AHCIDBG_ERRS, ahci_ctlp,
+		    "ahci_initialize_pmport:"
+		    "port %d:%d Port Multiplier is failed.",
+		    port, pmport);
+		return (AHCI_FAILURE);
+	}
+
+	if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_HOTPLUG) {
+		return (AHCI_FAILURE);
+	}
+	ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_HOTPLUG;
+
+	/* Checking for outstanding commands */
+	if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) {
+		slot_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxCI(ahci_ctlp, port));
+		reset_tags = slot_status & AHCI_SLOT_MASK(ahci_ctlp);
+	} else if (NCQ_CMD_IN_PROGRESS(ahci_portp)) {
+		slot_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxSACT(ahci_ctlp, port));
+		reset_tags = slot_status & AHCI_NCQ_SLOT_MASK(ahci_portp);
+	}
+
+	ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_MOPPING;
+	ahci_portp->ahciport_mop_in_progress++;
+
+	/* Clear status */
+	AHCIPORT_SET_STATE(ahci_portp, addrp, SATA_STATE_UNKNOWN);
+
+	/* Firstly assume an unknown device */
+	AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_UNKNOWN);
+
+	ahci_disable_port_intrs(ahci_ctlp, port);
+
+	/* port reset is necessary for port multiplier port */
+	if (ahci_pmport_reset(ahci_ctlp, ahci_portp, addrp) != AHCI_SUCCESS) {
+		AHCIDBG(AHCIDBG_INIT|AHCIDBG_ERRS, ahci_ctlp,
+		    "ahci_initialize_pmport:"
+		    "port reset failed at port %d:%d",
+		    port, pmport);
+		goto out;
+	}
+
+	/* Is port failed? */
+	if (AHCIPORT_GET_STATE(ahci_portp, addrp) &
+	    SATA_PSTATE_FAILED) {
+		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+		    "ahci_initialize_pmport: port %d:%d failed. "
+		    "state = 0x%x", port, pmport,
+		    ahci_portp->ahciport_port_state);
+		goto out;
+	}
+
+	/* Is there any device attached? */
+	if (AHCIPORT_GET_DEV_TYPE(ahci_portp, addrp)
+	    == SATA_DTYPE_NONE) {
+		/* Do not waste time on an empty port */
+		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
+		    "ahci_initialize_pmport: No device is found "
+		    "at port %d:%d", port, pmport);
+		ret = AHCI_SUCCESS;
+		goto out;
+	}
+
+	AHCIPORT_SET_STATE(ahci_portp, addrp, SATA_STATE_READY);
+	AHCIDBG(AHCIDBG_INIT, ahci_ctlp,
+	    "port %d:%d is ready now.", port, pmport);
+
+	/*
+	 * Till now we can assure a device attached to that HBA port and work
+	 * correctly. Now try to get the device signature. This is an optional
+	 * step. If failed, unknown device is assumed, then SATA module will
+	 * continue to use IDENTIFY DEVICE to get the information of the
+	 * device.
+	 */
+	ahci_find_dev_signature(ahci_ctlp, ahci_portp, addrp);
+
+	ret = AHCI_SUCCESS;
+
+out:
+	/* Next try to mop the pending commands */
+	if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp))
+		finished_tags = ahci_portp->ahciport_pending_tags &
+		    ~slot_status & AHCI_SLOT_MASK(ahci_ctlp);
+	else if (NCQ_CMD_IN_PROGRESS(ahci_portp))
+		finished_tags = ahci_portp->ahciport_pending_ncq_tags &
+		    ~slot_status & AHCI_NCQ_SLOT_MASK(ahci_portp);
+	reset_tags &= ~finished_tags;
+
+	ahci_mop_commands(ahci_ctlp,
+	    ahci_portp,
+	    slot_status,
+	    0, /* failed tags */
+	    0, /* timeout tags */
+	    0, /* aborted tags */
+	    reset_tags); /* reset tags */
+
+	/* Clear PxSNTF register if supported. */
+	if (ahci_ctlp->ahcictl_cap & AHCI_CAP_SNTF) {
+		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxSNTF(ahci_ctlp, port),
+		    AHCI_SNOTIF_CLEAR_ALL);
+	}
+
+	ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_HOTPLUG;
+	ahci_enable_port_intrs(ahci_ctlp, port);
+	return (ret);
+}
+
+/*
+ * ahci_probe_pmult()
+ *
+ * This function will be called to probe a port multiplier, which will
+ * handle hotplug events on port multiplier ports.
+ *
+ * NOTE: Only called from ahci_tran_probe_port()
+ * WARNING!!! ahciport_mutex should be acquired before the function is called.
+ */
+static int
+ahci_probe_pmult(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
+    ahci_addr_t *addrp)
+{
+	sata_device_t sdevice;
+	ahci_addr_t pmport_addr;
+	uint32_t gscr32, port_hotplug_tags;
+	uint32_t pmport_sstatus;
+	int dev_exists_now = 0, dev_existed_previously = 0;
+	uint8_t port = addrp->aa_port;
+	int npmport;
+
+	/* The bits in GSCR32 refers to the pmport that has a hot-plug event. */
+	READ_PMULT(addrp, SATA_PMULT_GSCR32, &gscr32, err);
+	port_hotplug_tags = gscr32 & AHCI_PMPORT_MASK(ahci_portp);
+
+	do {
+		npmport = ddi_ffs(port_hotplug_tags) - 1;
+		if (npmport == -1)
+			/* no pending hot plug events. */
+			return (AHCI_SUCCESS);
+
+		AHCIDBG(AHCIDBG_EVENT|AHCIDBG_PMULT, ahci_ctlp,
+		    "hot-plug event at port %d:%d", port, npmport);
+
+		AHCI_ADDR_SET_PMPORT(&pmport_addr, port, (uint8_t)npmport);
+
+		/* Check previous device at that port */
+		if (AHCIPORT_GET_DEV_TYPE(ahci_portp, &pmport_addr)
+		    != SATA_DTYPE_NONE)
+			dev_existed_previously = 1;
+
+		/* PxSStatus tells the presence of device. */
+		READ_PMULT(&pmport_addr, SATA_PMULT_REG_SSTS,
+		    &pmport_sstatus, err);
+
+		if (SSTATUS_GET_DET(pmport_sstatus) ==
+		    SSTATUS_DET_DEVPRE_PHYCOM)
+			dev_exists_now = 1;
+
+		/*
+		 * Clear PxSERR is critical. The transition from 0 to 1 will
+		 * emit a FIS which generates an asynchronous notification
+		 * event at controller. If we fail to clear the PxSERR, the
+		 * Async Notif events will no longer be activated on this
+		 * pmport.
+		 */
+		WRITE_PMULT(&pmport_addr, SATA_PMULT_REG_SERR,
+		    AHCI_SERROR_CLEAR_ALL, err);
+
+		bzero((void *)&sdevice, sizeof (sata_device_t));
+		sdevice.satadev_addr.cport = ahci_ctlp->
+		    ahcictl_port_to_cport[port];
+		sdevice.satadev_addr.qual = SATA_ADDR_PMPORT;
+		sdevice.satadev_addr.pmport = (uint8_t)npmport;
+		sdevice.satadev_state = SATA_PSTATE_PWRON;
+
+		AHCIDBG(AHCIDBG_EVENT|AHCIDBG_PMULT, ahci_ctlp,
+		    "[Existence] %d -> %d", dev_existed_previously,
+		    dev_exists_now);
+
+		if (dev_exists_now) {
+			if (dev_existed_previously) {
+				/* Link (may) not change: Exist -> Exist * */
+				AHCIDBG(AHCIDBG_EVENT, ahci_ctlp,
+				    "ahci_probe_pmult: port %d:%d "
+				    "device link lost/established",
+				    port, npmport);
+
+				mutex_exit(&ahci_portp->ahciport_mutex);
+				sata_hba_event_notify(
+				    ahci_ctlp->ahcictl_sata_hba_tran->
+				    sata_tran_hba_dip,
+				    &sdevice,
+				    SATA_EVNT_LINK_LOST|
+				    SATA_EVNT_LINK_ESTABLISHED);
+				mutex_enter(&ahci_portp->ahciport_mutex);
+			} else {
+				/* Link change: None -> Exist */
+				AHCIDBG(AHCIDBG_EVENT|AHCIDBG_PMULT, ahci_ctlp,
+				    "ahci_probe_pmult: port %d:%d "
+				    "device link established", port, npmport);
+
+				/* Clear port state */
+				AHCIPORT_SET_STATE(ahci_portp, &pmport_addr,
+				    SATA_STATE_UNKNOWN);
+				AHCIDBG(AHCIDBG_EVENT|AHCIDBG_PMULT, ahci_ctlp,
+				    "ahci_probe_pmult: port %d "
+				    "ahciport_port_state [Cleared].", port);
+
+				mutex_exit(&ahci_portp->ahciport_mutex);
+				sata_hba_event_notify(
+				    ahci_ctlp->ahcictl_sata_hba_tran->
+				    sata_tran_hba_dip,
+				    &sdevice,
+				    SATA_EVNT_LINK_ESTABLISHED);
+				mutex_enter(&ahci_portp->ahciport_mutex);
+			}
+		} else { /* No device exists now */
+			if (dev_existed_previously) {
+
+				/* Link change: Exist -> None */
+				AHCIDBG(AHCIDBG_EVENT|AHCIDBG_PMULT, ahci_ctlp,
+				    "ahci_probe_pmult: port %d:%d "
+				    "device link lost", port, npmport);
+
+				/* An existing device is lost. */
+				AHCIPORT_SET_STATE(ahci_portp, &pmport_addr,
+				    SATA_STATE_UNKNOWN);
+				AHCIPORT_SET_DEV_TYPE(ahci_portp, &pmport_addr,
+				    SATA_DTYPE_NONE);
+
+				mutex_exit(&ahci_portp->ahciport_mutex);
+				sata_hba_event_notify(
+				    ahci_ctlp->ahcictl_sata_hba_tran->
+				    sata_tran_hba_dip,
+				    &sdevice,
+				    SATA_EVNT_LINK_LOST);
+				mutex_enter(&ahci_portp->ahciport_mutex);
+			}
+		}
+
+		CLEAR_BIT(port_hotplug_tags, npmport);
+	} while (port_hotplug_tags != 0);
+
+	return (AHCI_SUCCESS);
+
+err:	/* R/W PMULT error */
+	return (AHCI_FAILURE);
+}
+
+/*
+ * Probe and initialize a port multiplier port.
+ * A port multiplier port could only be initilaizer here.
+ *
+ * WARNING!!! ahcictl_mutex should be acquired before the function
+ * is called.
+ */
+static int
+ahci_probe_pmport(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
+    ahci_addr_t *addrp, sata_device_t *sd)
+{
+	uint32_t port_state;
+	uint8_t port = addrp->aa_port;
+	ahci_addr_t addr_pmult;
+
+	/*
+	 * Check the parent - port multiplier first.
+	 */
+
+	/*
+	 * Parent port multiplier might have been removed. This event will be
+	 * ignored and failure.
+	 */
+	if (ahci_portp->ahciport_device_type == SATA_DTYPE_NONE ||
+	    ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT) {
+		AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp,
+		    "ahci_tran_probe_port: "
+		    "parent device removed, ignore event.", NULL);
+
+		return (AHCI_FAILURE);
+	}
+
+	/* The port is ready? */
+	port_state = ahci_portp->ahciport_port_state;
+	if (!(port_state & SATA_STATE_READY)) {
+		AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp,
+		    "ahci_tran_probe_port: "
+		    "parent port-mult is NOT ready.", NULL);
+
+		if (ahci_restart_port_wait_till_ready(ahci_ctlp,
+		    ahci_portp, port, AHCI_PORT_RESET, NULL) !=
+		    AHCI_SUCCESS) {
+			AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp,
+			    "ahci_tran_probe_port: "
+			    "restart port-mult failed.", NULL);
+			return (AHCI_FAILURE);
+		}
+	}
+
+	/*
+	 * If port-mult is restarted due to some reason, we need
+	 * re-initialized the PMult.
+	 */
+	if (!(port_state & SATA_DSTATE_PMULT_INIT)) {
+		/* Initialize registers on a port multiplier */
+		AHCI_ADDR_SET_PMULT(&addr_pmult, addrp->aa_port);
+		if (ahci_initialize_pmult(ahci_ctlp, ahci_portp,
+		    &addr_pmult, sd) != AHCI_SUCCESS)
+			return (AHCI_FAILURE);
+	}
+
+	/*
+	 * Then we check the port-mult port
+	 */
+	/* Is this pmport initialized? */
+	port_state = AHCIPORT_GET_STATE(ahci_portp, addrp);
+	if (!(port_state & SATA_STATE_READY)) {
+
+		/* ahci_initialize_pmport() will set READY state */
+		if (ahci_initialize_pmport(ahci_ctlp,
+		    ahci_portp, addrp) != AHCI_SUCCESS)
+			return (AHCI_FAILURE);
+	}
+
+	return (AHCI_SUCCESS);
+}
+
+/*
  * AHCI device reset ...; a single device on one of the ports is reset,
  * but the HBA and physical communication remain intact. This is the
  * least intrusive.
@@ -3232,17 +4540,19 @@
  */
 static int
 ahci_software_reset(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
-    uint8_t port)
+    ahci_addr_t *addrp)
 {
 	ahci_fis_h2d_register_t *h2d_register_fisp;
 	ahci_cmd_table_t *cmd_table;
 	ahci_cmd_header_t *cmd_header;
 	uint32_t port_cmd_status, port_cmd_issue, port_task_file;
 	int slot, loop_count;
+	uint8_t port = addrp->aa_port;
+	uint8_t pmport = addrp->aa_pmport;
 	int rval = AHCI_FAILURE;
 
 	AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp,
-	    "Port %d device resetting", port);
+	    "port %d:%d device software resetting (FIS)", port, pmport);
 
 	/* First clear PxCMD.ST (AHCI v1.2 10.4.1) */
 	if (ahci_put_port_into_notrunning_state(ahci_ctlp, ahci_portp,
@@ -3335,6 +4645,7 @@
 	    &(cmd_table->ahcict_command_fis.ahcifc_fis.ahcifc_h2d_register);
 
 	SET_FIS_TYPE(h2d_register_fisp, AHCI_H2D_REGISTER_FIS_TYPE);
+	SET_FIS_PMP(h2d_register_fisp, pmport);
 	SET_FIS_DEVCTL(h2d_register_fisp, SATA_DEVCTL_SRST);
 
 	/* Set Command Header in Command List */
@@ -3342,6 +4653,7 @@
 	BZERO_DESCR_INFO(cmd_header);
 	BZERO_PRD_BYTE_COUNT(cmd_header);
 	SET_COMMAND_FIS_LENGTH(cmd_header, 5);
+	SET_PORT_MULTI_PORT(cmd_header, pmport);
 
 	SET_CLEAR_BUSY_UPON_R_OK(cmd_header, 1);
 	SET_RESET(cmd_header, 1);
@@ -3376,9 +4688,8 @@
 			    "loop_count = %d", loop_count);
 			goto out;
 		}
-
 		/* Wait for 10 millisec */
-		delay(AHCI_10MS_TICKS);
+		drv_usecwait(AHCI_10MS_USECS);
 	} while (port_cmd_issue & AHCI_SLOT_MASK(ahci_ctlp) & (0x1 << slot));
 
 	AHCIDBG(AHCIDBG_POLL_LOOP, ahci_ctlp,
@@ -3397,12 +4708,14 @@
 	    &(cmd_table->ahcict_command_fis.ahcifc_fis.ahcifc_h2d_register);
 
 	SET_FIS_TYPE(h2d_register_fisp, AHCI_H2D_REGISTER_FIS_TYPE);
+	SET_FIS_PMP(h2d_register_fisp, pmport);
 
 	/* Set Command Header in Command List */
 	cmd_header = &ahci_portp->ahciport_cmd_list[slot];
 	BZERO_DESCR_INFO(cmd_header);
 	BZERO_PRD_BYTE_COUNT(cmd_header);
 	SET_COMMAND_FIS_LENGTH(cmd_header, 5);
+	SET_PORT_MULTI_PORT(cmd_header, pmport);
 
 	SET_WRITE(cmd_header, 1);
 
@@ -3437,7 +4750,7 @@
 		}
 
 		/* Wait for 10 millisec */
-		delay(AHCI_10MS_TICKS);
+		drv_usecwait(AHCI_10MS_USECS);
 	} while (port_cmd_issue & AHCI_SLOT_MASK(ahci_ctlp) & (0x1 << slot));
 
 	AHCIDBG(AHCIDBG_POLL_LOOP, ahci_ctlp,
@@ -3450,9 +4763,9 @@
 	rval = AHCI_SUCCESS;
 out:
 	AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
-	    "ahci_software_reset: %s at port %d",
+	    "ahci_software_reset: %s at port %d:%d",
 	    rval == AHCI_SUCCESS ? "succeed" : "failed",
-	    port);
+	    port, pmport);
 
 	return (rval);
 }
@@ -3478,15 +4791,26 @@
  * cleared before the function is called.
  */
 static int
-ahci_port_reset(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, uint8_t port)
+ahci_port_reset(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
+    ahci_addr_t *addrp)
 {
+	ahci_addr_t pmult_addr;
 	uint32_t cap_status, port_cmd_status;
 	uint32_t port_scontrol, port_sstatus, port_serror;
 	uint32_t port_intr_status, port_task_file;
+	uint32_t port_state;
+	uint8_t port = addrp->aa_port;
 
 	int loop_count;
 	int instance = ddi_get_instance(ahci_ctlp->ahcictl_dip);
 
+	/* Target is a port multiplier port? */
+	if (AHCI_ADDR_IS_PMPORT(addrp))
+		return (ahci_pmport_reset(ahci_ctlp, ahci_portp, addrp));
+
+	/* Otherwise it must be an HBA port. */
+	ASSERT(AHCI_ADDR_IS_PORT(addrp));
+
 	AHCIDBG(AHCIDBG_INIT|AHCIDBG_ENTRY, ahci_ctlp,
 	    "Port %d port resetting...", port);
 	ahci_portp->ahciport_port_state = 0;
@@ -3561,7 +4885,7 @@
 		 * spec, software shall wait at least 1 millisecond before
 		 * clearing PxSCTL.DET
 		 */
-		delay(AHCI_1MS_TICKS*2);
+		drv_usecwait(AHCI_1MS_USECS*2);
 
 		/* Fetch the SCONTROL again and rewrite the DET part with 0 */
 		port_scontrol = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
@@ -3587,7 +4911,7 @@
 		    port_cmd_status);
 
 		/* 0 -> 1 edge */
-		delay(AHCI_1MS_TICKS*2);
+		drv_usecwait(AHCI_1MS_USECS*2);
 
 		/* Set PxCMD.SUD to 1 */
 		port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
@@ -3640,7 +4964,7 @@
 		}
 
 		/* Wait for 10 millisec */
-		delay(AHCI_10MS_TICKS);
+		drv_usecwait(AHCI_10MS_USECS);
 	} while (SSTATUS_GET_DET(port_sstatus) != SSTATUS_DET_DEVPRE_PHYCOM);
 
 	AHCIDBG(AHCIDBG_INIT|AHCIDBG_POLL_LOOP, ahci_ctlp,
@@ -3654,7 +4978,7 @@
 		 * Either the port is not active or there
 		 * is no device present.
 		 */
-		ahci_portp->ahciport_device_type = SATA_DTYPE_NONE;
+		AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_NONE);
 		return (AHCI_SUCCESS);
 	}
 
@@ -3673,7 +4997,7 @@
 		cmn_err(CE_WARN, "!ahci%d: ahci_port_reset port %d "
 		    "COMINIT signal from the device not received",
 		    instance, port);
-		ahci_portp->ahciport_port_state |= SATA_PSTATE_FAILED;
+		AHCIPORT_SET_STATE(ahci_portp, addrp, SATA_PSTATE_FAILED);
 		return (AHCI_FAILURE);
 	}
 
@@ -3711,7 +5035,7 @@
 	loop_count = 0;
 	do {
 		/* Wait for 10 millisec */
-		delay(AHCI_10MS_TICKS);
+		drv_usecwait(AHCI_10MS_USECS);
 
 		if (loop_count++ > AHCI_POLLRATE_PORT_TFD_ERROR) {
 			/*
@@ -3731,16 +5055,21 @@
 			    (uint32_t *)AHCI_PORT_PxSERR(ahci_ctlp, port),
 			    AHCI_SERROR_CLEAR_ALL);
 
+			AHCI_ADDR_SET_PMULT(&pmult_addr, port);
+
 			/* Try another software reset. */
 			if (ahci_software_reset(ahci_ctlp, ahci_portp,
-			    port) != AHCI_SUCCESS) {
-				ahci_portp->ahciport_port_state |=
-				    SATA_PSTATE_FAILED;
+			    &pmult_addr) != AHCI_SUCCESS) {
+				AHCIPORT_SET_STATE(ahci_portp, addrp,
+				    SATA_PSTATE_FAILED);
 				return (AHCI_FAILURE);
 			}
 			break;
 		}
 
+		/* Wait for 10 millisec */
+		drv_usecwait(AHCI_10MS_USECS);
+
 		/*
 		 * The Error bit '1' means COMRESET is finished successfully
 		 * The device hardware has been initialized and the power-up
@@ -3765,7 +5094,8 @@
 	    AHCI_SERROR_CLEAR_ALL);
 
 	/* Set port as ready */
-	ahci_portp->ahciport_port_state |= SATA_STATE_READY;
+	port_state = AHCIPORT_GET_STATE(ahci_portp, addrp);
+	AHCIPORT_SET_STATE(ahci_portp, addrp, port_state|SATA_STATE_READY);
 
 	AHCIDBG(AHCIDBG_INFO|AHCIDBG_ERRS, ahci_ctlp,
 	    "ahci_port_reset: succeed at port %d.", port);
@@ -3773,6 +5103,189 @@
 }
 
 /*
+ * COMRESET on a port multiplier port.
+ *
+ * NOTE: Only called in ahci_port_reset()
+ */
+static int
+ahci_pmport_reset(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
+    ahci_addr_t *addrp)
+{
+	uint32_t port_scontrol, port_sstatus, port_serror;
+	uint32_t port_cmd_status, port_intr_status;
+	uint32_t port_state;
+	uint8_t port = addrp->aa_port;
+	uint8_t pmport = addrp->aa_pmport;
+	int loop_count;
+	int instance = ddi_get_instance(ahci_ctlp->ahcictl_dip);
+
+	AHCIDBG(AHCIDBG_INIT|AHCIDBG_ENTRY, ahci_ctlp,
+	    "port %d:%d: pmport resetting", port, pmport);
+
+	/* Initialize pmport state */
+	AHCIPORT_SET_STATE(ahci_portp, addrp, 0);
+
+	READ_PMULT(addrp, SATA_PMULT_REG_SCTL, &port_scontrol, err);
+	SCONTROL_SET_DET(port_scontrol, SCONTROL_DET_COMRESET);
+	WRITE_PMULT(addrp, SATA_PMULT_REG_SCTL, port_scontrol, err);
+
+	/* PxCMD.FRE should be set before. */
+	port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port));
+	ASSERT(port_cmd_status & AHCI_CMD_STATUS_FRE);
+	if (!(port_cmd_status & AHCI_CMD_STATUS_FRE))
+		return (AHCI_FAILURE);
+
+	/*
+	 * Give time for COMRESET to percolate, according to the AHCI
+	 * spec, software shall wait at least 1 millisecond before
+	 * clearing PxSCTL.DET
+	 */
+	drv_usecwait(AHCI_1MS_USECS*2);
+
+	/*
+	 * Fetch the SCONTROL again and rewrite the DET part with 0
+	 * This will generate an Asychronous Notification events.
+	 */
+	READ_PMULT(addrp, SATA_PMULT_REG_SCTL, &port_scontrol, err);
+	SCONTROL_SET_DET(port_scontrol, SCONTROL_DET_NOACTION);
+	WRITE_PMULT(addrp, SATA_PMULT_REG_SCTL, port_scontrol, err);
+
+	/*
+	 * The port enters P:StartComm state, and HBA tells link layer to
+	 * start communication, which involves sending COMRESET to device.
+	 * And the HBA resets PxTFD.STS to 7Fh.
+	 *
+	 * When a COMINIT is received from the device, then the port enters
+	 * P:ComInit state. And HBA sets PxTFD.STS to FFh or 80h. HBA sets
+	 * PxSSTS.DET to 1h to indicate a device is detected but communication
+	 * is not yet established. HBA sets PxSERR.DIAG.X to '1' to indicate
+	 * a COMINIT has been received.
+	 */
+	/*
+	 * The DET field is valid only if IPM field indicates
+	 * that the interface is in active state.
+	 */
+	loop_count = 0;
+	do {
+		READ_PMULT(addrp, SATA_PMULT_REG_SSTS, &port_sstatus, err);
+
+		if (SSTATUS_GET_IPM(port_sstatus) != SSTATUS_IPM_ACTIVE) {
+			/*
+			 * If the interface is not active, the DET field
+			 * is considered not accurate. So we want to
+			 * continue looping.
+			 */
+			SSTATUS_SET_DET(port_sstatus, SSTATUS_DET_NODEV);
+		}
+
+		if (loop_count++ > AHCI_POLLRATE_PORT_SSTATUS) {
+			/*
+			 * We are effectively timing out after 0.1 sec.
+			 */
+			break;
+		}
+
+		/* Wait for 10 millisec */
+		drv_usecwait(AHCI_10MS_USECS);
+
+	} while (SSTATUS_GET_DET(port_sstatus) != SSTATUS_DET_DEVPRE_PHYCOM);
+
+	AHCIDBG(AHCIDBG_POLL_LOOP, ahci_ctlp,
+	    "ahci_pmport_reset: 1st loop count: %d, "
+	    "port_sstatus = 0x%x port %d:%d",
+	    loop_count, port_sstatus, port, pmport);
+
+	if ((SSTATUS_GET_IPM(port_sstatus) != SSTATUS_IPM_ACTIVE) ||
+	    (SSTATUS_GET_DET(port_sstatus) != SSTATUS_DET_DEVPRE_PHYCOM)) {
+		/*
+		 * Either the port is not active or there
+		 * is no device present.
+		 */
+		AHCIDBG(AHCIDBG_INIT|AHCIDBG_INFO, ahci_ctlp,
+		    "ahci_pmport_reset: "
+		    "no device attached to port %d:%d",
+		    port, pmport);
+		AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_NONE);
+		return (AHCI_SUCCESS);
+	}
+
+	/* Now we can make sure there is a device connected to the port */
+	/* COMINIT signal is supposed to be received (PxSERR.DIAG.X = '1') */
+	READ_PMULT(addrp, SATA_PMULT_REG_SERR, &port_serror, err);
+
+	if (!(port_serror & (1 << 26))) {
+		cmn_err(CE_WARN, "!ahci%d: ahci_pmport_reset: "
+		    "COMINIT signal from the device not received port %d:%d",
+		    instance, port, pmport);
+
+		AHCIPORT_SET_STATE(ahci_portp, addrp, SATA_PSTATE_FAILED);
+		return (AHCI_FAILURE);
+	}
+
+	/*
+	 * After clear PxSERR register, we will receive a D2H FIS.
+	 * Normally this FIS will cause a IPMS error according to AHCI spec
+	 * v1.2 because there is no command outstanding for it. So we need
+	 * to ignore this error.
+	 */
+	ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_IGNORE_IPMS;
+	WRITE_PMULT(addrp, SATA_PMULT_REG_SERR, AHCI_SERROR_CLEAR_ALL, err);
+
+	/* Now we need to check the D2H FIS by checking IPMS error. */
+	loop_count = 0;
+	do {
+		port_intr_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxIS(ahci_ctlp, port));
+
+		if (loop_count++ > AHCI_POLLRATE_PORT_TFD_ERROR) {
+			/*
+			 * No D2H FIS received. This is possible according
+			 * to SATA 2.6 spec.
+			 */
+			cmn_err(CE_WARN, "ahci_port_reset: port %d:%d "
+			    "PxIS.IPMS is not set, we need another "
+			    "software reset.", port, pmport);
+
+			break;
+		}
+
+		/* Wait for 10 millisec */
+		mutex_exit(&ahci_portp->ahciport_mutex);
+		delay(AHCI_10MS_TICKS);
+		mutex_enter(&ahci_portp->ahciport_mutex);
+
+	} while (!(port_intr_status & AHCI_INTR_STATUS_IPMS));
+
+	AHCIDBG(AHCIDBG_POLL_LOOP, ahci_ctlp,
+	    "ahci_pmport_reset: 2st loop count: %d, "
+	    "port_sstatus = 0x%x port %d:%d",
+	    loop_count, port_sstatus, port, pmport);
+
+	/* Clear IPMS */
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxIS(ahci_ctlp, port),
+	    AHCI_INTR_STATUS_IPMS);
+	ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_IGNORE_IPMS;
+
+	/* This pmport is now ready for ahci_tran_start() */
+	port_state = AHCIPORT_GET_STATE(ahci_portp, addrp);
+	AHCIPORT_SET_STATE(ahci_portp, addrp, port_state|SATA_STATE_READY);
+
+	AHCIDBG(AHCIDBG_INFO|AHCIDBG_ERRS, ahci_ctlp,
+	    "ahci_pmport_reset: succeed at port %d:%d", port, pmport);
+	return (AHCI_SUCCESS);
+
+err:	/* R/W PMULT error */
+	/* IPMS flags might be set before. */
+	ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_IGNORE_IPMS;
+	AHCIDBG(AHCIDBG_INFO|AHCIDBG_ERRS, ahci_ctlp,
+	    "ahci_pmport_reset: failed at port %d:%d", port, pmport);
+
+	return (AHCI_FAILURE);
+}
+
+/*
  * AHCI HBA reset ...; the entire HBA is reset, and all ports are disabled.
  * This is the most intrusive.
  *
@@ -3790,6 +5303,7 @@
 ahci_hba_reset(ahci_ctl_t *ahci_ctlp)
 {
 	ahci_port_t *ahci_portp;
+	ahci_addr_t addr;
 	uint32_t ghc_control;
 	uint8_t port;
 	int loop_count;
@@ -3825,10 +5339,10 @@
 		}
 
 		/* Wait for 10 millisec */
-		delay(AHCI_10MS_TICKS);
+		drv_usecwait(AHCI_10MS_USECS);
 	} while (ghc_control & AHCI_HBA_GHC_HR);
 
-	AHCIDBG(AHCIDBG_INIT|AHCIDBG_POLL_LOOP, ahci_ctlp,
+	AHCIDBG(AHCIDBG_POLL_LOOP, ahci_ctlp,
 	    "ahci_hba_reset: 1st loop count: %d, "
 	    "ghc_control = 0x%x", loop_count, ghc_control);
 
@@ -3840,6 +5354,17 @@
 		return (AHCI_FAILURE);
 	}
 
+	/*
+	 * HBA reset will clear (AHCI Spec v1.2 10.4.3) GHC.IE / GHC.AE
+	 */
+	ghc_control = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp));
+	ghc_control |= (AHCI_HBA_GHC_AE | AHCI_HBA_GHC_IE);
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp), ghc_control);
+
+	mutex_exit(&ahci_ctlp->ahcictl_mutex);
+
 	for (port = 0; port < ahci_ctlp->ahcictl_num_ports; port++) {
 		/* Only check implemented ports */
 		if (!AHCI_PORT_IMPLEMENTED(ahci_ctlp, port)) {
@@ -3849,29 +5374,24 @@
 		ahci_portp = ahci_ctlp->ahcictl_ports[port];
 		mutex_enter(&ahci_portp->ahciport_mutex);
 
-		if (ahci_port_reset(ahci_ctlp, ahci_portp, port)
-		    != AHCI_SUCCESS) {
+		AHCI_ADDR_SET_PORT(&addr, port);
+
+		if (ahci_restart_port_wait_till_ready(ahci_ctlp, ahci_portp,
+		    port, AHCI_PORT_RESET|AHCI_RESET_NO_EVENTS_UP, NULL) !=
+		    AHCI_SUCCESS) {
 			rval = AHCI_FAILURE;
 			AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
 			    "ahci_hba_reset: port %d failed", port);
+			/*
+			 * Set the port state to SATA_PSTATE_FAILED if
+			 * failed to initialize it.
+			 */
+			ahci_portp->ahciport_port_state = SATA_PSTATE_FAILED;
 		}
 
 		mutex_exit(&ahci_portp->ahciport_mutex);
 	}
 
-	/*
-	 * Indicate that system software is AHCI aware by setting
-	 * GHC.AE to 1
-	 */
-	ghc_control = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
-	    (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp));
-
-	ghc_control |= AHCI_HBA_GHC_AE;
-	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
-	    (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp), ghc_control);
-
-	mutex_exit(&ahci_ctlp->ahcictl_mutex);
-
 	return (rval);
 }
 
@@ -3888,23 +5408,43 @@
  * is called. And the port interrupt is disabled.
  */
 static void
-ahci_find_dev_signature(ahci_ctl_t *ahci_ctlp,
-    ahci_port_t *ahci_portp, uint8_t port)
+ahci_find_dev_signature(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
+    ahci_addr_t *addrp)
 {
+	ahci_addr_t dev_addr;
 	uint32_t signature;
-
-	AHCIDBG(AHCIDBG_INIT|AHCIDBG_ENTRY, ahci_ctlp,
-	    "ahci_find_dev_signature enter: port %d", port);
-
-	ahci_portp->ahciport_device_type = SATA_DTYPE_UNKNOWN;
+	uint8_t port = addrp->aa_port;
+	uint8_t pmport = addrp->aa_pmport;
+	ASSERT(AHCI_ADDR_IS_VALID(addrp));
+
+	if (AHCI_ADDR_IS_PORT(addrp)) {
+		AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp,
+		    "ahci_find_dev_signature enter: port %d", port);
+
+		/*
+		 * NOTE: when the ahci address is a HBA port, we do not know
+		 * it is a device or a port multiplier that attached. we need
+		 * try a software reset at port multiplier address (0xf
+		 * pmport)
+		 */
+		AHCI_ADDR_SET_PMULT(&dev_addr, addrp->aa_port);
+	} else {
+		AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp,
+		    "ahci_find_dev_signature enter: port %d:%d",
+		    port, pmport);
+		dev_addr = *addrp;
+	}
+
+	/* Assume it is unknown. */
+	AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_UNKNOWN);
 
 	/* Issue a software reset to get the signature */
-	if (ahci_software_reset(ahci_ctlp, ahci_portp, port)
+	if (ahci_software_reset(ahci_ctlp, ahci_portp, &dev_addr)
 	    != AHCI_SUCCESS) {
 		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
 		    "ahci_find_dev_signature: software reset failed "
-		    "at port %d, cannot get signature.", port);
-		ahci_portp->ahciport_port_state = SATA_PSTATE_FAILED;
+		    "at port %d:%d, cannot get signature.", port, pmport);
+		AHCIPORT_SET_STATE(ahci_portp, addrp, SATA_PSTATE_FAILED);
 		return;
 	}
 
@@ -3912,44 +5452,61 @@
 	 * ahci_software_reset has started the port, so we need manually stop
 	 * the port again.
 	 */
-	if (ahci_put_port_into_notrunning_state(ahci_ctlp, ahci_portp, port)
-	    != AHCI_SUCCESS) {
-		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
-		    "ahci_find_dev_signature: cannot stop port %d.", port);
-		ahci_portp->ahciport_port_state = SATA_PSTATE_FAILED;
-		return;
+	if (AHCI_ADDR_IS_PORT(addrp)) {
+		if (ahci_put_port_into_notrunning_state(ahci_ctlp,
+		    ahci_portp, port) != AHCI_SUCCESS) {
+			AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
+			    "ahci_find_dev_signature: cannot stop port %d.",
+			    port);
+			ahci_portp->ahciport_port_state = SATA_PSTATE_FAILED;
+			return;
+		}
 	}
 
 	/* Now we can make sure that a valid signature is received. */
 	signature = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
 	    (uint32_t *)AHCI_PORT_PxSIG(ahci_ctlp, port));
 
-	AHCIDBG(AHCIDBG_INIT|AHCIDBG_INFO, ahci_ctlp,
-	    "ahci_find_dev_signature: port %d signature = 0x%x",
-	    port, signature);
+#ifdef AHCI_DEBUG
+	if (AHCI_ADDR_IS_PMPORT(addrp)) {
+		AHCIDBG(AHCIDBG_INIT|AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp,
+		    "ahci_find_dev_signature: signature = 0x%x at port %d:%d",
+		    signature, port, pmport);
+	} else {
+		AHCIDBG(AHCIDBG_INIT|AHCIDBG_INFO, ahci_ctlp,
+		    "ahci_find_dev_signature: signature = 0x%x at port %d",
+		    signature, port);
+	}
+#endif
+
+	/* NOTE: Only support ATAPI device at controller port. */
+	if (signature == AHCI_SIGNATURE_ATAPI && !AHCI_ADDR_IS_PORT(addrp))
+		signature = SATA_DTYPE_UNKNOWN;
 
 	switch (signature) {
 
 	case AHCI_SIGNATURE_DISK:
-		ahci_portp->ahciport_device_type = SATA_DTYPE_ATADISK;
+		AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_ATADISK);
 		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
 		    "Disk is found at port: %d", port);
 		break;
 
 	case AHCI_SIGNATURE_ATAPI:
-		ahci_portp->ahciport_device_type = SATA_DTYPE_ATAPI;
+		AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_ATAPI);
 		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
 		    "ATAPI device is found at port: %d", port);
 		break;
 
 	case AHCI_SIGNATURE_PORT_MULTIPLIER:
-		ahci_portp->ahciport_device_type = SATA_DTYPE_PMULT;
+		/* Port Multiplier cannot recursively attached. */
+		ASSERT(AHCI_ADDR_IS_PORT(addrp));
+		AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_PMULT);
 		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
 		    "Port Multiplier is found at port: %d", port);
 		break;
 
 	default:
-		ahci_portp->ahciport_device_type = SATA_DTYPE_UNKNOWN;
+		AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_UNKNOWN);
 		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
 		    "Unknown device is found at port: %d", port);
 	}
@@ -4071,7 +5628,7 @@
 	ahci_ctlp->ahcictl_ports[port] = ahci_portp;
 	ahci_portp->ahciport_port_num = port;
 
-	/* Intialize the port condition variable */
+	/* Initialize the port condition variable */
 	cv_init(&ahci_portp->ahciport_cv, NULL, CV_DRIVER, NULL);
 
 	/* Initialize the port mutex */
@@ -4108,6 +5665,9 @@
 	ahci_portp->ahciport_event_args =
 	    kmem_zalloc(sizeof (ahci_event_arg_t), KM_SLEEP);
 
+	ahci_portp->ahciport_event_args->ahciea_addrp =
+	    kmem_zalloc(sizeof (ahci_addr_t), KM_SLEEP);
+
 	if (ahci_portp->ahciport_event_args == NULL)
 		goto err_case4;
 
@@ -4148,11 +5708,15 @@
 	ASSERT(ahci_portp != NULL);
 
 	mutex_enter(&ahci_portp->ahciport_mutex);
+	kmem_free(ahci_portp->ahciport_event_args->ahciea_addrp,
+	    sizeof (ahci_addr_t));
+	ahci_portp->ahciport_event_args->ahciea_addrp = NULL;
 	kmem_free(ahci_portp->ahciport_event_args, sizeof (ahci_event_arg_t));
 	ahci_portp->ahciport_event_args = NULL;
 	ddi_taskq_destroy(ahci_portp->ahciport_event_taskq);
 	ahci_dealloc_cmd_list(ahci_ctlp, ahci_portp);
 	ahci_dealloc_rcvd_fis(ahci_portp);
+	ahci_dealloc_pmult(ahci_ctlp, ahci_portp);
 	mutex_exit(&ahci_portp->ahciport_mutex);
 
 	mutex_destroy(&ahci_portp->ahciport_mutex);
@@ -4523,8 +6087,10 @@
 }
 
 /*
- * WARNING!!! ahciport_mutex should be acquired before the function
- * is called.
+ * Update SATA registers at controller ports
+ *
+ * WARNING!!! ahciport_mutex should be acquired before those functions
+ * get called.
  */
 static void
 ahci_update_sata_registers(ahci_ctl_t *ahci_ctlp, uint8_t port,
@@ -4572,19 +6138,21 @@
 		    AHCI_INTR_STATUS_HBDS |
 		    AHCI_INTR_STATUS_HBFS |
 		    AHCI_INTR_STATUS_TFES);
-		mutex_exit(&ahci_portp->ahciport_mutex);
-		goto next;
-	}
+	} else {
+		/*
+		 * port_intr_enable indicates that the corresponding interrrupt
+		 * reporting is enabled.
+		 */
+		port_intr_enable = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxIE(ahci_ctlp, port));
+	}
+
+	/* IPMS error in port reset should be ignored according AHCI spec. */
+	if (!(ahci_portp->ahciport_flags & AHCI_PORT_FLAG_IGNORE_IPMS))
+		port_intr_enable |= AHCI_INTR_STATUS_IPMS;
 	mutex_exit(&ahci_portp->ahciport_mutex);
 
 	/*
-	 * port_intr_enable indicates that the corresponding interrrupt
-	 * reporting is enabled.
-	 */
-	port_intr_enable = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
-	    (uint32_t *)AHCI_PORT_PxIE(ahci_ctlp, port));
-next:
-	/*
 	 * port_intr_stats indicates that the corresponding interrupt
 	 * condition is active.
 	 */
@@ -4635,18 +6203,20 @@
 	}
 
 	/*
-	 * Check the non-fatal error interrupt bits, there are three
+	 * Check the non-fatal error interrupt bits, there are four
 	 * kinds of non-fatal errors at the time being:
 	 *
 	 *    PxIS.UFS - Unknown FIS Error
 	 *    PxIS.OFS - Overflow Error
 	 *    PxIS.INFS - Interface Non-Fatal Error
+	 *    PxIS.IPMS - Incorrect Port Multiplier Status Error
 	 *
 	 * For these non-fatal errors, the HBA can continue to operate,
 	 * so the driver just log the error messages.
 	 */
 	if (port_intr_status & (AHCI_INTR_STATUS_UFS |
 	    AHCI_INTR_STATUS_OFS |
+	    AHCI_INTR_STATUS_IPMS |
 	    AHCI_INTR_STATUS_INFS)) {
 		(void) ahci_intr_non_fatal_error(ahci_ctlp, ahci_portp,
 		    port, port_intr_status);
@@ -4774,6 +6344,7 @@
 	mutex_enter(&ahci_portp->ahciport_mutex);
 
 	if (!ERR_RETRI_CMD_IN_PROGRESS(ahci_portp) &&
+	    !RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp) &&
 	    !NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) {
 		/*
 		 * Spurious interrupt. Nothing to be done.
@@ -4792,7 +6363,15 @@
 		    "ahci_intr_cmd_cmplt: port %d the sata pkt for error "
 		    "retrieval is finished, and finished_tags = 0x%x",
 		    port, finished_tags);
+	} else if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) {
+		finished_tags = 0x1 & ~port_cmd_issue;
+		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
+		    "ahci_intr_cmd_cmplt: port %d the sata pkt for r/w "
+		    "port multiplier is finished, and finished_tags = 0x%x",
+		    port, finished_tags);
+
 	} else {
+
 		finished_tags = ahci_portp->ahciport_pending_tags &
 		    ~port_cmd_issue & AHCI_SLOT_MASK(ahci_ctlp);
 	}
@@ -4816,6 +6395,28 @@
 		goto out;
 	}
 
+	if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp) &&
+	    (finished_tags == 0x1)) {
+		satapkt = ahci_portp->ahciport_rdwr_pmult_pkt;
+		ASSERT(satapkt != NULL);
+
+		AHCIDBG(AHCIDBG_INTR, ahci_ctlp,
+		    "ahci_intr_cmd_cmplt: sending up pkt 0x%p "
+		    "with SATA_PKT_COMPLETED", (void *)satapkt);
+
+		/* READ PORTMULT need copy out FIS content. */
+		if (satapkt->satapkt_cmd.satacmd_flags.sata_special_regs) {
+			rcvd_fisp = &(ahci_portp->ahciport_rcvd_fis->
+			    ahcirf_d2h_register_fis);
+			satapkt->satapkt_cmd.satacmd_status_reg =
+			    GET_RFIS_STATUS(rcvd_fisp);
+			ahci_copy_out_regs(&satapkt->satapkt_cmd, rcvd_fisp);
+		}
+
+		SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_COMPLETED);
+		goto out;
+	}
+
 	while (finished_tags) {
 		finished_slot = ddi_ffs(finished_tags) - 1;
 		if (finished_slot == -1) {
@@ -4915,8 +6516,7 @@
  * with the 'I' bit set and has been copied into system memory. It will
  * be sent under the following situations:
  *
- * 	1. NCQ command is completed
- * 	2. Asynchronous notification
+ * 1. NCQ command is completed
  *
  * The completion of NCQ commands (READ/WRITE FPDMA QUEUED) is performed
  * via the Set Device Bits FIS. When such event is generated, the software
@@ -4924,22 +6524,62 @@
  * list of commands previously issue by software. ahciport_pending_ncq_tags
  * keeps the tags of previously issued commands.
  *
- * Asynchronous Notification is a feature in SATA II, which allows an
- * ATAPI device to send a signal to the host when media is inserted or
+ * 2. Asynchronous Notification
+ *
+ * Asynchronous Notification is a feature in SATA spec 2.6.
+ *
+ * 1) ATAPI device will send a signal to the host when media is inserted or
  * removed and avoids polling the device for media changes. The signal
  * sent to the host is a Set Device Bits FIS with the 'I' and 'N' bits
  * set to '1'. At the moment, it's not supported yet.
+ *
+ * 2) Port multiplier will send a signal to the host when a hot plug event
+ * has occured on a port multiplier port. It is used when command based
+ * switching is employed. This is handled by ahci_intr_pmult_sntf_events()
  */
 static int
 ahci_intr_set_device_bits(ahci_ctl_t *ahci_ctlp,
     ahci_port_t *ahci_portp, uint8_t port)
 {
+	ahci_addr_t addr;
+
+	AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_INTR, ahci_ctlp,
+	    "ahci_intr_set_device_bits enter: port %d", port);
+
+	/* Initialize HBA port address */
+	AHCI_ADDR_SET_PORT(&addr, port);
+
+	/* NCQ plug handler */
+	(void) ahci_intr_ncq_events(ahci_ctlp, ahci_portp, &addr);
+
+	/* Check port multiplier's asynchronous notification events */
+	if (ahci_ctlp->ahcictl_cap & AHCI_CAP_SNTF) {
+		(void) ahci_intr_pmult_sntf_events(ahci_ctlp,
+		    ahci_portp, port);
+	}
+
+	/* ATAPI events is not supported yet */
+
+	return (AHCI_SUCCESS);
+}
+/*
+ * NCQ interrupt handler. Called upon a NCQ command is completed.
+ * Only be called from ahci_intr_set_device_bits().
+ *
+ * WARNING!!! ahciport_mutex should be acquired before the function is
+ * called.
+ */
+static int
+ahci_intr_ncq_events(ahci_ctl_t *ahci_ctlp,
+    ahci_port_t *ahci_portp, ahci_addr_t *addrp)
+{
 	uint32_t port_sactive;
 	uint32_t port_cmd_issue;
 	uint32_t issued_tags;
 	int issued_slot;
 	uint32_t finished_tags;
 	int finished_slot;
+	uint8_t port = addrp->aa_port;
 	sata_pkt_t *satapkt;
 
 	AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_INTR|AHCIDBG_NCQ, ahci_ctlp,
@@ -5037,6 +6677,116 @@
 }
 
 /*
+ * Port multiplier asynchronous notification event handler. Called upon a
+ * device is hot plugged/pulled.
+ *
+ * The async-notification event will only be recorded by ahcipmi_snotif_tags
+ * here and will be handled by ahci_probe_pmult().
+ *
+ * NOTE: called only from ahci_port_intr().
+ */
+static int
+ahci_intr_pmult_sntf_events(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
+    uint8_t port)
+{
+	sata_device_t sdevice;
+
+	AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_INTR, ahci_ctlp,
+	    "ahci_intr_pmult_sntf_events enter: port %d ", port);
+
+	/* no hot-plug while attaching process */
+	mutex_enter(&ahci_ctlp->ahcictl_mutex);
+	if (ahci_ctlp->ahcictl_flags & AHCI_ATTACH) {
+		mutex_exit(&ahci_ctlp->ahcictl_mutex);
+		return (AHCI_SUCCESS);
+	}
+	mutex_exit(&ahci_ctlp->ahcictl_mutex);
+
+	mutex_enter(&ahci_portp->ahciport_mutex);
+	if (ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT) {
+		mutex_exit(&ahci_portp->ahciport_mutex);
+		return (AHCI_SUCCESS);
+	}
+
+	ASSERT(ahci_portp->ahciport_pmult_info != NULL);
+
+	ahci_portp->ahciport_pmult_info->ahcipmi_snotif_tags =
+	    ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxSNTF(ahci_ctlp, port));
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxSNTF(ahci_ctlp, port),
+	    AHCI_SNOTIF_CLEAR_ALL);
+
+	if (ahci_portp->ahciport_pmult_info->ahcipmi_snotif_tags == 0) {
+		mutex_exit(&ahci_portp->ahciport_mutex);
+		return (AHCI_SUCCESS);
+	}
+
+	/* Port Multiplier sub-device hot-plug handler */
+	if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) {
+		mutex_exit(&ahci_portp->ahciport_mutex);
+		return (AHCI_SUCCESS);
+	}
+
+	if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_PMULT_SNTF) {
+		/* Not allowed to re-enter. */
+		mutex_exit(&ahci_portp->ahciport_mutex);
+		return (AHCI_SUCCESS);
+	}
+
+	ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_PMULT_SNTF;
+
+	/*
+	 * NOTE:
+	 * Even if Asynchronous Notification is supported (and enabled) by
+	 * both controller and the port multiplier, the content of PxSNTF
+	 * register is always set to 0x8000 by async notification event. We
+	 * need to check GSCR[32] on the port multiplier to find out the
+	 * owner of this event.
+	 * This is not accord with SATA spec 2.6 and needs further
+	 * clarification.
+	 */
+	/* hot-plug will not reported while reseting. */
+	if (ahci_portp->ahciport_reset_in_progress == 1) {
+		AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp,
+		    "port %d snotif event ignored", port);
+		ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_PMULT_SNTF;
+		mutex_exit(&ahci_portp->ahciport_mutex);
+		return (AHCI_SUCCESS);
+	}
+
+	AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp,
+	    "PxSNTF is set to 0x%x by port multiplier",
+	    ahci_portp->ahciport_pmult_info->ahcipmi_snotif_tags);
+
+	/*
+	 * Now we need do some necessary operation and inform SATA framework
+	 * that link/device events has happened.
+	 */
+	bzero((void *)&sdevice, sizeof (sata_device_t));
+	sdevice.satadev_addr.cport = ahci_ctlp->
+	    ahcictl_port_to_cport[port];
+	sdevice.satadev_addr.pmport = SATA_PMULT_HOSTPORT;
+	sdevice.satadev_addr.qual = SATA_ADDR_PMULT;
+	sdevice.satadev_state = SATA_PSTATE_PWRON;
+
+	/* Just reject packets, do not stop that port. */
+	ahci_reject_all_abort_pkts(ahci_ctlp, ahci_portp, port);
+
+	mutex_exit(&ahci_portp->ahciport_mutex);
+	sata_hba_event_notify(
+	    ahci_ctlp->ahcictl_sata_hba_tran->sata_tran_hba_dip,
+	    &sdevice,
+	    SATA_EVNT_PMULT_LINK_CHANGED);
+	mutex_enter(&ahci_portp->ahciport_mutex);
+
+	ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_PMULT_SNTF;
+	mutex_exit(&ahci_portp->ahciport_mutex);
+
+	return (AHCI_SUCCESS);
+}
+
+/*
  * 1=Change in Current Connect Status. 0=No change in Current Connect Status.
  * This bit reflects the state of PxSERR.DIAG.X. This bit is only cleared
  * when PxSERR.DIAG.X is cleared. When PxSERR.DIAG.X is set to one, it
@@ -5169,6 +6919,7 @@
 	sata_device_t sdevice;
 	int dev_exists_now = 0;
 	int dev_existed_previously = 0;
+	ahci_addr_t port_addr;
 
 	AHCIDBG(AHCIDBG_INTR|AHCIDBG_ENTRY, ahci_ctlp,
 	    "ahci_intr_phyrdy_change enter, port %d", port);
@@ -5219,8 +6970,11 @@
 	sdevice.satadev_state = SATA_PSTATE_PWRON;
 	ahci_portp->ahciport_port_state = SATA_PSTATE_PWRON;
 
+	AHCI_ADDR_SET_PORT(&port_addr, port);
+
+	ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_HOTPLUG;
 	if (dev_exists_now) {
-		if (dev_existed_previously) {
+		if (dev_existed_previously) { /* 1 -> 1 */
 			/* Things are fine now. The loss was temporary. */
 			AHCIDBG(AHCIDBG_EVENT, ahci_ctlp,
 			    "ahci_intr_phyrdy_change  port %d "
@@ -5233,14 +6987,18 @@
 			    SATA_EVNT_LINK_LOST|SATA_EVNT_LINK_ESTABLISHED);
 			mutex_enter(&ahci_portp->ahciport_mutex);
 
-		} else {
+		} else { /* 0 -> 1 */
 			AHCIDBG(AHCIDBG_EVENT, ahci_ctlp,
 			    "ahci_intr_phyrdy_change: port %d "
 			    "device link established", port);
 
-			/* A new device has been detected. */
-			(void) ahci_port_reset(ahci_ctlp, ahci_portp, port);
-			ahci_find_dev_signature(ahci_ctlp, ahci_portp, port);
+			/*
+			 * A new device has been detected. The new device
+			 * might be a port multiplier instead of a drive, so
+			 * we cannot update the signature directly.
+			 */
+			(void) ahci_initialize_port(ahci_ctlp,
+			    ahci_portp, &port_addr);
 
 			/* Try to start the port */
 			if (ahci_start_port(ahci_ctlp, ahci_portp, port)
@@ -5264,7 +7022,7 @@
 		}
 	} else { /* No device exists now */
 
-		if (dev_existed_previously) {
+		if (dev_existed_previously) { /* 1 -> 0 */
 			AHCIDBG(AHCIDBG_EVENT, ahci_ctlp,
 			    "ahci_intr_phyrdy_change: port %d "
 			    "device link lost", port);
@@ -5273,8 +7031,14 @@
 			(void) ahci_put_port_into_notrunning_state(ahci_ctlp,
 			    ahci_portp, port);
 
+			if (ahci_portp->ahciport_device_type ==
+			    SATA_DTYPE_PMULT) {
+				ahci_dealloc_pmult(ahci_ctlp, ahci_portp);
+			}
+
 			/* An existing device is lost. */
 			ahci_portp->ahciport_device_type = SATA_DTYPE_NONE;
+			ahci_portp->ahciport_port_state = SATA_STATE_UNKNOWN;
 
 			mutex_exit(&ahci_portp->ahciport_mutex);
 			sata_hba_event_notify(
@@ -5284,6 +7048,7 @@
 			mutex_enter(&ahci_portp->ahciport_mutex);
 		}
 	}
+	ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_HOTPLUG;
 
 	mutex_exit(&ahci_portp->ahciport_mutex);
 
@@ -5301,6 +7066,14 @@
  * won't be posted to memory and PxSERR.ERR.P will be set, which is then
  * a fatal error.
  *
+ * PxIS.IPMS - Incorrect Port Multiplier Status
+ *
+ * IPMS Indicates that the HBA received a FIS from a device that did not
+ * have a command outstanding. The IPMS bit may be set during enumeration
+ * of devices on a Port Multiplier due to the normal Port Multiplier
+ * enumeration process. It is recommended that IPMS only be used after
+ * enumeration is complete on the Port Multiplier (copied from spec).
+ *
  * PxIS.OFS - Overflow Error
  *
  * Command list overflow is defined as software building a command table
@@ -5350,7 +7123,7 @@
 
 	AHCIDBG(AHCIDBG_INTR|AHCIDBG_ENTRY|AHCIDBG_ERRS, ahci_ctlp,
 	    "ahci_intr_non_fatal_error: port %d, "
-	    "port_serror = 0x%x", port, port_serror);
+	    "PxSERR = 0x%x, PxIS = 0x%x ", port, port_serror, intr_status);
 
 	ahci_log_serror_message(ahci_ctlp, port, port_serror, 1);
 
@@ -5365,6 +7138,11 @@
 	}
 
 #if AHCI_DEBUG
+	if (intr_status & AHCI_INTR_STATUS_IPMS) {
+		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci port %d "
+		    "has Incorrect Port Multiplier Status error", port);
+	}
+
 	if (intr_status & AHCI_INTR_STATUS_OFS) {
 		AHCIDBG(AHCIDBG_INTR|AHCIDBG_ERRS, ahci_ctlp,
 		    "ahci port %d has overflow error", port);
@@ -5581,6 +7359,10 @@
 		    "task_file_status = 0x%x", port, task_file_status);
 	}
 
+	/*
+	 * Here we just log the fatal error info in interrupt context.
+	 * Misc recovery processing will be handled in task queue.
+	 */
 	if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) {
 		/*
 		 * Read PxCMD.CCS to determine the slot that the HBA
@@ -5596,6 +7378,7 @@
 		    "ahci_intr_fatal_error: spkt 0x%p is being processed when "
 		    "fatal error occurred for port %d", spkt, port);
 
+		/* A Task File Data error. */
 		if (intr_status & AHCI_INTR_STATUS_TFES) {
 			err_byte = (task_file_status & AHCI_TFD_ERR_MASK)
 			    >> AHCI_TFD_ERR_SHIFT;
@@ -5640,6 +7423,7 @@
 	args->ahciea_ctlp = (void *)ahci_ctlp;
 	args->ahciea_portp = (void *)ahci_portp;
 	args->ahciea_event = intr_status;
+	AHCI_ADDR_SET_PORT((ahci_addr_t *)args->ahciea_addrp, port);
 
 	/* Start the taskq to handle error recovery */
 	if ((ddi_taskq_dispatch(ahci_portp->ahciport_event_taskq,
@@ -6129,7 +7913,7 @@
 		}
 
 		/* Wait for 10 millisec */
-		drv_usecwait(10000);
+		drv_usecwait(AHCI_10MS_USECS);
 	} while (port_cmd_status & AHCI_CMD_STATUS_CR);
 
 	ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_STARTED;
@@ -6160,6 +7944,7 @@
  * the process.
  *
  * The routine will be called under following scenarios:
+ * 	+ To reset the HBA
  *	+ To abort the packet(s)
  *	+ To reset the port
  *	+ To activate the port
@@ -6190,12 +7975,19 @@
 	uint32_t task_file_status;
 	sata_device_t sdevice;
 	int rval;
+	ahci_addr_t addr_port;
+	ahci_pmult_info_t *pminfo = NULL;
 	int dev_exists_begin = 0;
 	int dev_exists_end = 0;
+	uint32_t previous_dev_type = ahci_portp->ahciport_device_type;
+	int npmport = 0;
+	uint8_t cport = ahci_ctlp->ahcictl_port_to_cport[port];
 
 	AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp,
 	    "ahci_restart_port_wait_till_ready: port %d enter", port);
 
+	AHCI_ADDR_SET_PORT(&addr_port, port);
+
 	if (ahci_portp->ahciport_device_type != SATA_DTYPE_NONE)
 		dev_exists_begin = 1;
 
@@ -6221,7 +8013,7 @@
 
 	/*
 	 * Check whether the device is in a stable status, if yes,
-	 * then start the port directly. However for ahci_tran_dport_reset,
+	 * then start the port directly. However for ahci_tran_reset_dport,
 	 * we may have to perform a port reset.
 	 */
 	if (!(task_file_status & (AHCI_TFD_STS_BSY | AHCI_TFD_STS_DRQ)) &&
@@ -6233,13 +8025,23 @@
 	 * If PxTFD.STS.BSY or PxTFD.STS.DRQ is set to '1', then issue
 	 * a COMRESET to the device
 	 */
-	rval = ahci_port_reset(ahci_ctlp, ahci_portp, port);
+	ahci_disable_port_intrs(ahci_ctlp, port);
+	rval = ahci_port_reset(ahci_ctlp, ahci_portp, &addr_port);
+	ahci_enable_port_intrs(ahci_ctlp, port);
+
+#ifdef AHCI_DEBUG
+	if (rval != AHCI_SUCCESS)
+		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+		    "ahci_restart_port_wait_till_ready: port %d failed",
+		    port);
+#endif
 
 	if (reset_flag != NULL)
 		*reset_flag = 1;
 
 	/* Indicate to the framework that a reset has happened. */
 	if ((ahci_portp->ahciport_device_type != SATA_DTYPE_NONE) &&
+	    (ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT) &&
 	    !(flag & AHCI_RESET_NO_EVENTS_UP)) {
 		/* Set the reset in progress flag */
 		ahci_portp->ahciport_reset_in_progress = 1;
@@ -6262,7 +8064,7 @@
 		}
 
 		AHCIDBG(AHCIDBG_EVENT, ahci_ctlp,
-		    "port %d sending event up: SATA_EVNT_RESET", port);
+		    "port %d sending event up: SATA_EVNT_DEVICE_RESET", port);
 	} else {
 		ahci_portp->ahciport_reset_in_progress = 0;
 	}
@@ -6276,11 +8078,13 @@
 
 	if (SSTATUS_GET_DET(port_sstatus) == SSTATUS_DET_DEVPRE_PHYCOM) {
 		dev_exists_end = 1;
-		ASSERT(ahci_portp->ahciport_device_type != SATA_DTYPE_NONE);
-	}
+	}
+
+	if (dev_exists_begin == 0 && dev_exists_end == 0) /* 0 -> 0 */
+		return (rval);
 
 	/* Check whether a hot plug event happened */
-	if (dev_exists_begin == 1 && dev_exists_end == 0) {
+	if (dev_exists_begin == 1 && dev_exists_end == 0) { /* 1 -> 0 */
 		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
 		    "ahci_restart_port_wait_till_ready: port %d "
 		    "device is removed", port);
@@ -6291,7 +8095,124 @@
 		mutex_exit(&ahci_portp->ahciport_mutex);
 		(void) ahci_intr_phyrdy_change(ahci_ctlp, ahci_portp, port);
 		mutex_enter(&ahci_portp->ahciport_mutex);
-	}
+
+		return (rval);
+	}
+
+
+	/* 0/1 -> 1 : device may change */
+	/*
+	 * May be called by ahci_fatal_error_recovery_handler, so
+	 * don't issue software if the previous device is ATAPI.
+	 */
+	if (ahci_portp->ahciport_device_type == SATA_DTYPE_ATAPI)
+		return (rval);
+
+	/*
+	 * The COMRESET will make port multiplier enter legacy mode.
+	 * Issue a software reset to make it work again.
+	 */
+	ahci_find_dev_signature(ahci_ctlp, ahci_portp, &addr_port);
+
+	/*
+	 * Following codes are specific for the port multiplier
+	 */
+	if (previous_dev_type != SATA_DTYPE_PMULT &&
+	    ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT) {
+		/* in case previous_dev_type is corrupt */
+		ahci_dealloc_pmult(ahci_ctlp, ahci_portp);
+		(void) ahci_start_port(ahci_ctlp, ahci_portp, port);
+		return (rval);
+	}
+
+	/* Device change: PMult -> Non-PMult */
+	if (previous_dev_type == SATA_DTYPE_PMULT &&
+	    ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT) {
+		/*
+		 * This might happen because
+		 * 1. Software reset failed. Port multiplier is not correctly
+		 *    enumerated.
+		 * 2. Another non-port-multiplier device is attached. Perhaps
+		 *    the port multiplier was replaced by another device by
+		 *    whatever reason, but AHCI driver missed hot-plug event.
+		 *
+		 * Now that the port has been initialized, we just need to
+		 * update the port structure according new device, then report
+		 * and wait SATA framework to probe new device.
+		 */
+
+		/* Force to release pmult resource */
+		ahci_dealloc_pmult(ahci_ctlp, ahci_portp);
+		(void) ahci_start_port(ahci_ctlp, ahci_portp, port);
+
+		bzero((void *)&sdevice, sizeof (sata_device_t));
+		sdevice.satadev_addr.cport =
+		    ahci_ctlp->ahcictl_port_to_cport[port];
+		sdevice.satadev_addr.pmport = 0;
+		sdevice.satadev_addr.qual = SATA_ADDR_DCPORT;
+
+		sdevice.satadev_state = SATA_DSTATE_RESET |
+		    SATA_DSTATE_PWR_ACTIVE;
+
+		mutex_exit(&ahci_portp->ahciport_mutex);
+		sata_hba_event_notify(
+		    ahci_ctlp->ahcictl_dip,
+		    &sdevice,
+		    SATA_EVNT_DEVICE_RESET);
+		mutex_enter(&ahci_portp->ahciport_mutex);
+
+		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+		    "Port multiplier is [Gone] at port %d ", port);
+		AHCIDBG(AHCIDBG_EVENT, ahci_ctlp,
+		    "port %d sending event up: SATA_EVNT_DEVICE_RESET", port);
+
+		return (AHCI_SUCCESS);
+	}
+
+	/* Device change: Non-PMult -> PMult */
+	if (ahci_portp->ahciport_device_type == SATA_DTYPE_PMULT) {
+
+		/* NOTE: The PxCMD.PMA may be cleared by HBA reset. */
+		ahci_alloc_pmult(ahci_ctlp, ahci_portp);
+
+		(void) ahci_start_port(ahci_ctlp, ahci_portp, port);
+	}
+	pminfo = ahci_portp->ahciport_pmult_info;
+	ASSERT(pminfo != NULL);
+
+	/* Device (may) change: PMult -> PMult */
+	/*
+	 * First initialize port multiplier. Set state to READY and wait for
+	 * probe entry point to initialize it
+	 */
+	ahci_portp->ahciport_port_state = SATA_STATE_READY;
+
+	/*
+	 * It's a little complicated while target is a port multiplier. we
+	 * need to COMRESET all pmports behind that PMult otherwise those
+	 * sub-links between the PMult and the sub-devices will be in an
+	 * inactive state (indicated by PSCR0/PxSSTS) and the following access
+	 * to those sub-devices will be rejected by Link-Fatal-Error.
+	 */
+	/*
+	 * The PxSNTF will be set soon after the pmult is plugged. While the
+	 * pmult itself is attaching, sata_hba_event_notfiy will fail. so we
+	 * simply mark every sub-port as 'unknown', then ahci_probe_pmport
+	 * will initialized it.
+	 */
+	for (npmport = 0; npmport < pminfo->ahcipmi_num_dev_ports; npmport++)
+		pminfo->ahcipmi_port_state[npmport] = SATA_STATE_UNKNOWN;
+
+	/* Report reset event. */
+	ahci_portp->ahciport_reset_in_progress = 1;
+
+	bzero((void *)&sdevice, sizeof (sata_device_t));
+	sdevice.satadev_addr.cport = cport;
+	sdevice.satadev_addr.pmport = SATA_PMULT_HOSTPORT;
+	sdevice.satadev_addr.qual = SATA_ADDR_PMULT;
+	sdevice.satadev_state = SATA_DSTATE_RESET | SATA_DSTATE_PWR_ACTIVE;
+	sata_hba_event_notify(ahci_ctlp->ahcictl_dip, &sdevice,
+	    SATA_EVNT_DEVICE_RESET);
 
 	return (rval);
 }
@@ -6323,6 +8244,7 @@
 	sata_pkt_t *satapkt;
 	int ncq_cmd_in_progress = 0;
 	int err_retri_cmd_in_progress = 0;
+	int rdwr_pmult_cmd_in_progress = 0;
 
 	AHCIDBG(AHCIDBG_ERRS|AHCIDBG_ENTRY, ahci_ctlp,
 	    "ahci_mop_commands entered: port: %d slot_status: 0x%x",
@@ -6334,6 +8256,25 @@
 	    "reset_tags: 0x%x", failed_tags,
 	    timeout_tags, aborted_tags, reset_tags);
 
+#ifdef AHCI_DEBUG
+	if (ahci_debug_flags & AHCIDBG_ERRS) {
+		int i;
+		char msg_buf[200] = {0, };
+		for (i = 0x1f; i >= 0; i--) {
+			if (ahci_portp->ahciport_slot_pkts[i] != NULL)
+				msg_buf[i] = 'X';
+			else
+				msg_buf[i] = '.';
+		}
+		msg_buf[0x20] = '\0';
+		cmn_err(CE_NOTE, "port[%d] slots: %s",
+		    ahci_portp->ahciport_port_num, msg_buf);
+		cmn_err(CE_NOTE, "[ERR-RT] %p [RW-PM] %p ",
+		    (void *)ahci_portp->ahciport_err_retri_pkt,
+		    (void *)ahci_portp->ahciport_rdwr_pmult_pkt);
+	}
+#endif
+
 	if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) {
 		finished_tags = ahci_portp->ahciport_pending_tags &
 		    ~slot_status & AHCI_SLOT_MASK(ahci_ctlp);
@@ -6378,7 +8319,21 @@
 		    ahci_portp->ahciport_port_num, slot_status);
 		ASSERT(ahci_portp->ahciport_mop_in_progress > 1);
 		ASSERT(slot_status == 0x1);
-	}
+	} else if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) {
+		rdwr_pmult_cmd_in_progress = 1;
+		AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp,
+		    "ahci_mop_commands is called for port %d while "
+		    "READ/WRITE PORTMULT command is being executed",
+		    ahci_portp->ahciport_port_num);
+
+		ASSERT(slot_status == 0x1);
+	}
+
+#ifdef AHCI_DEBUG
+	AHCIDBG(AHCIDBG_ERRS|AHCIDBG_ENTRY, ahci_ctlp,
+	    "ahci_mop_commands: finished_tags: 0x%x, "
+	    "unfinished_tags 0x%x", finished_tags, unfinished_tags);
+#endif
 
 	/* Send up finished packets with SATA_PKT_COMPLETED */
 	while (finished_tags) {
@@ -6428,6 +8383,16 @@
 			SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_DEV_ERROR);
 			break;
 		}
+		if (rdwr_pmult_cmd_in_progress) {
+			satapkt = ahci_portp->ahciport_rdwr_pmult_pkt;
+			ASSERT(satapkt != NULL);
+			AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+			    "ahci_mop_commands: sending up "
+			    "rdwr pmult pkt 0x%p with SATA_PKT_DEV_ERROR",
+			    (void *)satapkt);
+			SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_DEV_ERROR);
+			break;
+		}
 
 		tmp_slot = ddi_ffs(failed_tags) - 1;
 		if (tmp_slot == -1) {
@@ -6464,6 +8429,16 @@
 			SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_TIMEOUT);
 			break;
 		}
+		if (rdwr_pmult_cmd_in_progress) {
+			satapkt = ahci_portp->ahciport_rdwr_pmult_pkt;
+			ASSERT(satapkt != NULL);
+			AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+			    "ahci_mop_commands: sending up "
+			    "rdwr pmult pkt 0x%p with SATA_PKT_TIMEOUT",
+			    (void *)satapkt);
+			SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_TIMEOUT);
+			break;
+		}
 
 		tmp_slot = ddi_ffs(timeout_tags) - 1;
 		if (tmp_slot == -1) {
@@ -6500,6 +8475,17 @@
 			SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_ABORTED);
 			break;
 		}
+		if (rdwr_pmult_cmd_in_progress) {
+			satapkt = ahci_portp->ahciport_rdwr_pmult_pkt;
+			ASSERT(satapkt != NULL);
+			ASSERT(aborted_tags == 0x1);
+			AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+			    "ahci_mop_commands: sending up "
+			    "rdwr pmult pkt 0x%p with SATA_PKT_ABORTED",
+			    (void *)satapkt);
+			SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_ABORTED);
+			break;
+		}
 
 		tmp_slot = ddi_ffs(aborted_tags) - 1;
 		if (tmp_slot == -1) {
@@ -6525,6 +8511,18 @@
 
 	/* Send up reset packets with SATA_PKT_RESET. */
 	while (reset_tags) {
+		if (rdwr_pmult_cmd_in_progress) {
+			satapkt = ahci_portp->ahciport_rdwr_pmult_pkt;
+			ASSERT(satapkt != NULL);
+			ASSERT(aborted_tags == 0x1);
+			AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+			    "ahci_mop_commands: sending up "
+			    "rdwr pmult pkt 0x%p with SATA_PKT_RESET",
+			    (void *)satapkt);
+			SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_RESET);
+			break;
+		}
+
 		tmp_slot = ddi_ffs(reset_tags) - 1;
 		if (tmp_slot == -1) {
 			break;
@@ -6590,6 +8588,7 @@
 	sata_device_t	sdevice;
 	sata_pkt_t	*rdlog_spkt, *spkt;
 	ddi_dma_handle_t buf_dma_handle;
+	ahci_addr_t	addr;
 	int		loop_count;
 	int		rval;
 	int		failed_slot;
@@ -6606,6 +8605,9 @@
 	sdevice.satadev_addr.qual = SATA_ADDR_DCPORT;
 	sdevice.satadev_addr.pmport = 0;
 
+	/* Translate sata_device.satadev_addr -> ahci_addr */
+	ahci_get_ahci_addr(ahci_ctlp, &sdevice, &addr);
+
 	/*
 	 * Call the sata hba interface to get a rdlog spkt
 	 */
@@ -6616,7 +8618,7 @@
 	if (rdlog_spkt == NULL) {
 		if (loop_count++ < AHCI_POLLRATE_GET_SPKT) {
 			/* Sleep for a while */
-			delay(AHCI_10MS_TICKS);
+			drv_usecwait(AHCI_10MS_USECS);
 			goto loop;
 		}
 		/* Timed out after 1s */
@@ -6638,7 +8640,7 @@
 	 * the whole command list is empty.
 	 */
 	ahci_portp->ahciport_err_retri_pkt = rdlog_spkt;
-	(void) ahci_do_sync_start(ahci_ctlp, ahci_portp, port, rdlog_spkt);
+	(void) ahci_do_sync_start(ahci_ctlp, ahci_portp, &addr, rdlog_spkt);
 	ahci_portp->ahciport_err_retri_pkt = NULL;
 
 	/* Remove the flag after READ LOG EXT command is completed */
@@ -6706,6 +8708,7 @@
 	sata_pkt_t	*rs_spkt;
 	sata_cmd_t	*sata_cmd;
 	ddi_dma_handle_t buf_dma_handle;
+	ahci_addr_t	addr;
 	int		loop_count;
 #if AHCI_DEBUG
 	struct scsi_extended_sense *rqsense;
@@ -6721,6 +8724,9 @@
 	sdevice.satadev_addr.qual = SATA_ADDR_DCPORT;
 	sdevice.satadev_addr.pmport = 0;
 
+	/* Translate sata_device.satadev_addr -> ahci_addr */
+	ahci_get_ahci_addr(ahci_ctlp, &sdevice, &addr);
+
 	sata_cmd = &spkt->satapkt_cmd;
 
 	/*
@@ -6733,7 +8739,7 @@
 	if (rs_spkt == NULL) {
 		if (loop_count++ < AHCI_POLLRATE_GET_SPKT) {
 			/* Sleep for a while */
-			delay(AHCI_10MS_TICKS);
+			drv_usecwait(AHCI_10MS_USECS);
 			goto loop;
 
 		}
@@ -6756,7 +8762,7 @@
 	 * the whole command list is empty.
 	 */
 	ahci_portp->ahciport_err_retri_pkt = rs_spkt;
-	(void) ahci_do_sync_start(ahci_ctlp, ahci_portp, port, rs_spkt);
+	(void) ahci_do_sync_start(ahci_ctlp, ahci_portp, &addr, rs_spkt);
 	ahci_portp->ahciport_err_retri_pkt = NULL;
 
 	/* Remove the flag after REQUEST SENSE command is completed */
@@ -6822,23 +8828,34 @@
  */
 static void
 ahci_fatal_error_recovery_handler(ahci_ctl_t *ahci_ctlp,
-    ahci_port_t *ahci_portp, uint8_t port, uint32_t intr_status)
+    ahci_port_t *ahci_portp, ahci_addr_t *addrp, uint32_t intr_status)
 {
 	uint32_t	port_cmd_status;
 	uint32_t	slot_status = 0;
 	uint32_t	failed_tags = 0;
 	int		failed_slot;
-	int		reset_flag = 0;
+	int		reset_flag = 0, flag = 0;
 	ahci_fis_d2h_register_t	*ahci_rcvd_fisp;
 	sata_cmd_t	*sata_cmd = NULL;
 	sata_pkt_t	*spkt = NULL;
 #if AHCI_DEBUG
 	ahci_cmd_header_t *cmd_header;
 #endif
+	uint8_t 	port = addrp->aa_port;
 
 	AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp,
 	    "ahci_fatal_error_recovery_handler enter: port %d", port);
 
+	/* Port multiplier error */
+	if (ahci_portp->ahciport_device_type == SATA_DTYPE_PMULT) {
+		/* FBS code is neither completed nor tested. */
+		ahci_pmult_error_recovery_handler(ahci_ctlp, ahci_portp,
+		    port, intr_status);
+
+		/* Force a port reset */
+		flag = AHCI_PORT_RESET;
+	}
+
 	if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp) ||
 	    ERR_RETRI_CMD_IN_PROGRESS(ahci_portp)) {
 
@@ -6934,7 +8951,7 @@
 	ahci_portp->ahciport_mop_in_progress++;
 
 	(void) ahci_restart_port_wait_till_ready(ahci_ctlp, ahci_portp,
-	    port, NULL, &reset_flag);
+	    port, flag, &reset_flag);
 
 	/*
 	 * Won't retrieve error information:
@@ -6985,6 +9002,97 @@
 }
 
 /*
+ * Used to recovery a PMULT pmport fatal error under FIS-based switching.
+ * 	1. device specific.PxFBS.SDE=1
+ * 	2. Non-Deivce specific.
+ * Nothing will be done when Command-based switching is employed.
+ *
+ * Currently code is neither completed nor tested.
+ *
+ * WARNING!!! ahciport_mutex should be acquired before the function
+ * is called.
+ */
+static void
+ahci_pmult_error_recovery_handler(ahci_ctl_t *ahci_ctlp,
+    ahci_port_t *ahci_portp, uint8_t port, uint32_t intr_status)
+{
+#ifndef __lock_lint
+	_NOTE(ARGUNUSED(intr_status))
+#endif
+	uint32_t	port_fbs_ctrl;
+	int loop_count = 0;
+	ahci_addr_t	addr;
+
+	/* Nothing will be done under Command-based switching. */
+	if (!(ahci_ctlp->ahcictl_cap & AHCI_CAP_PMULT_FBSS))
+		return;
+
+	port_fbs_ctrl = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxFBS(ahci_ctlp, port));
+
+	if (!(port_fbs_ctrl & AHCI_FBS_EN))
+		/* FBS is not enabled. */
+		return;
+
+	/* Problem's getting complicated now. */
+	/*
+	 * If FIS-based switching is used, we need to check
+	 * the PxFBS to see the error type.
+	 */
+	port_fbs_ctrl = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxFBS(ahci_ctlp, port));
+
+	/* Refer to spec(v1.2) 9.3.6.1 */
+	if (port_fbs_ctrl & AHCI_FBS_SDE) {
+		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+		    "A Device Sepcific Error: port %d", port);
+		/*
+		 * Controller has paused commands for all other
+		 * sub-devices until PxFBS.DEC is set.
+		 */
+		ahci_reject_all_abort_pkts(ahci_ctlp,
+		    ahci_portp, 0);
+
+		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxFBS(ahci_ctlp, port),
+		    port_fbs_ctrl | AHCI_FBS_DEC);
+
+		/*
+		 * Wait controller clear PxFBS.DEC,
+		 * then we can continue.
+		 */
+		loop_count = 0;
+		do {
+			port_fbs_ctrl = ddi_get32(ahci_ctlp->
+			    ahcictl_ahci_acc_handle, (uint32_t *)
+			    AHCI_PORT_PxFBS(ahci_ctlp, port));
+
+			if (loop_count++ > 1000)
+				/*
+				 * Esclate the error. Follow
+				 * non-device specific error
+				 * procedure.
+				 */
+				return;
+
+			drv_usecwait(AHCI_100US_USECS);
+		} while (port_fbs_ctrl & AHCI_FBS_DEC);
+
+		/*
+		 * Issue a software reset to ensure drive is in
+		 * a known state.
+		 */
+		(void) ahci_software_reset(ahci_ctlp,
+		    ahci_portp, &addr);
+
+	} else {
+
+		/* Process Non-Device Specific Error. */
+		/* This will be handled later on. */
+		cmn_err(CE_NOTE, "!FBS is not supported now.");
+	}
+}
+/*
  * Handle events - fatal error recovery
  */
 static void
@@ -6993,19 +9101,19 @@
 	ahci_event_arg_t *ahci_event_arg;
 	ahci_ctl_t *ahci_ctlp;
 	ahci_port_t *ahci_portp;
+	ahci_addr_t *addrp;
 	uint32_t event;
-	uint8_t port;
 
 	ahci_event_arg = (ahci_event_arg_t *)args;
 
 	ahci_ctlp = ahci_event_arg->ahciea_ctlp;
 	ahci_portp = ahci_event_arg->ahciea_portp;
+	addrp = ahci_event_arg->ahciea_addrp;
 	event = ahci_event_arg->ahciea_event;
-	port = ahci_portp->ahciport_port_num;
 
 	AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_INTR|AHCIDBG_ERRS, ahci_ctlp,
 	    "ahci_events_handler enter: port %d intr_status = 0x%x",
-	    port, event);
+	    ahci_portp->ahciport_port_num, event);
 
 	mutex_enter(&ahci_portp->ahciport_mutex);
 
@@ -7016,7 +9124,8 @@
 	if (ahci_portp->ahciport_device_type == SATA_DTYPE_NONE) {
 		AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_INTR, ahci_ctlp,
 		    "ahci_events_handler: port %d no device attached, "
-		    "and just return without doing anything", port);
+		    "and just return without doing anything",
+		    ahci_portp->ahciport_port_num);
 		goto out;
 	}
 
@@ -7025,7 +9134,7 @@
 	    AHCI_INTR_STATUS_HBFS |
 	    AHCI_INTR_STATUS_TFES))
 		ahci_fatal_error_recovery_handler(ahci_ctlp, ahci_portp,
-		    port, event);
+		    addrp, event);
 
 out:
 	mutex_exit(&ahci_portp->ahciport_mutex);
@@ -7049,6 +9158,7 @@
 	mutex_enter(&ahci_portp->ahciport_mutex);
 
 	if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp) ||
+	    RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp) ||
 	    ERR_RETRI_CMD_IN_PROGRESS(ahci_portp)) {
 		/* Read PxCI to see which commands are still outstanding */
 		slot_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
@@ -7077,6 +9187,12 @@
 		    ahci_portp->ahciport_pending_tags,
 		    ahci_portp->ahciport_pending_ncq_tags);
 		ASSERT(slot_status == 0x1);
+	} else if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) {
+		AHCIDBG(AHCIDBG_ERRS|AHCIDBG_TIMEOUT, ahci_ctlp,
+		    "ahci_timeout_pkts called while executing R/W PMULT "
+		    "command timeout_tags = 0x%x, slot_status = 0x%x",
+		    tmp_timeout_tags, slot_status);
+		ASSERT(slot_status == 0x1);
 	}
 #endif
 
@@ -7084,7 +9200,7 @@
 	ahci_portp->ahciport_mop_in_progress++;
 
 	(void) ahci_restart_port_wait_till_ready(ahci_ctlp, ahci_portp,
-	    port, NULL, NULL);
+	    port, AHCI_PORT_RESET, NULL);
 
 	/*
 	 * Re-identify timeout tags because some previously checked commands
@@ -7112,17 +9228,18 @@
 		    "pending_ncq_tags = 0x%x ",
 		    port, finished_tags, timeout_tags,
 		    slot_status, ahci_portp->ahciport_pending_ncq_tags);
-	} else if (ERR_RETRI_CMD_IN_PROGRESS(ahci_portp)) {
+	} else if (ERR_RETRI_CMD_IN_PROGRESS(ahci_portp) ||
+	    RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) {
 		timeout_tags = tmp_timeout_tags;
 	}
 
 	ahci_mop_commands(ahci_ctlp,
 	    ahci_portp,
 	    slot_status,
-	    0, /* failed tags */
-	    timeout_tags, /* timeout tags */
-	    0, /* aborted tags */
-	    0); /* reset tags */
+	    0,			/* failed tags */
+	    timeout_tags,	/* timeout tags */
+	    0,			/* aborted tags */
+	    0);			/* reset tags */
 
 	mutex_exit(&ahci_portp->ahciport_mutex);
 }
@@ -7180,7 +9297,8 @@
 		port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
 		    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port));
 
-		if (ERR_RETRI_CMD_IN_PROGRESS(ahci_portp)) {
+		if (ERR_RETRI_CMD_IN_PROGRESS(ahci_portp) ||
+		    RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) {
 			current_slot = 0;
 			pending_tags = 0x1;
 		} else if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) {
@@ -7207,6 +9325,8 @@
 
 			if (ERR_RETRI_CMD_IN_PROGRESS(ahci_portp))
 				spkt = ahci_portp->ahciport_err_retri_pkt;
+			else if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp))
+				spkt = ahci_portp->ahciport_rdwr_pmult_pkt;
 			else
 				spkt = ahci_portp->ahciport_slot_pkts[tmp_slot];
 
@@ -7497,6 +9617,39 @@
 }
 
 /*
+ * Translate the sata_address_t type into the ahci_addr_t type.
+ * sata_device.satadev_addr structure is used as source.
+ */
+static void
+ahci_get_ahci_addr(ahci_ctl_t *ahci_ctlp, sata_device_t *sd,
+    ahci_addr_t *ahci_addrp)
+{
+	sata_address_t *sata_addrp = &sd->satadev_addr;
+	ahci_addrp->aa_port =
+	    ahci_ctlp->ahcictl_cport_to_port[sata_addrp->cport];
+	ahci_addrp->aa_pmport = sata_addrp->pmport;
+
+	switch (sata_addrp->qual) {
+	case SATA_ADDR_DCPORT:
+	case SATA_ADDR_CPORT:
+		ahci_addrp->aa_qual = AHCI_ADDR_PORT;
+		break;
+	case SATA_ADDR_PMULT:
+		ahci_addrp->aa_qual = AHCI_ADDR_PMULT;
+		break;
+	case SATA_ADDR_DPMPORT:
+	case SATA_ADDR_PMPORT:
+		ahci_addrp->aa_qual = AHCI_ADDR_PMPORT;
+		break;
+	case SATA_ADDR_NULL:
+	default:
+		/* something went wrong */
+		ahci_addrp->aa_qual = AHCI_ADDR_NULL;
+		break;
+	}
+}
+
+/*
  * This routine is to calculate the total number of ports implemented
  * by the HBA.
  */
--- a/usr/src/uts/common/io/sata/impl/sata.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/sata/impl/sata.c	Mon Aug 17 09:14:35 2009 -0400
@@ -50,6 +50,7 @@
 #include <sys/sata/sata_hba.h>
 #include <sys/sata/sata_defs.h>
 #include <sys/sata/sata_cfgadm.h>
+#include <sys/sata/sata_blacklist.h>
 
 /* Debug flags - defined in sata.h */
 int	sata_debug_flags = 0;
@@ -61,6 +62,7 @@
 #define	SATA_ENABLE_QUEUING		1
 #define	SATA_ENABLE_NCQ			2
 #define	SATA_ENABLE_PROCESS_EVENTS	4
+#define	SATA_ENABLE_PMULT_FBS		8 /* FIS-Based Switching */
 int sata_func_enable =
 	SATA_ENABLE_PROCESS_EVENTS | SATA_ENABLE_QUEUING | SATA_ENABLE_NCQ;
 
@@ -129,7 +131,7 @@
 
 #define	LEGACY_HWID_LEN	64	/* Model (40) + Serial (20) + pad */
 
-static char sata_rev_tag[] = {"1.43"};
+static char sata_rev_tag[] = {"1.44"};
 
 /*
  * SATA cb_ops functions
@@ -167,13 +169,21 @@
 static	void sata_event_daemon(void *);
 static	void sata_event_thread_control(int);
 static	void sata_process_controller_events(sata_hba_inst_t *sata_hba_inst);
+static	void sata_process_pmult_events(sata_hba_inst_t *, uint8_t);
 static	void sata_process_device_reset(sata_hba_inst_t *, sata_address_t *);
+static	void sata_process_pmdevice_reset(sata_hba_inst_t *, sata_address_t *);
 static	void sata_process_port_failed_event(sata_hba_inst_t *,
     sata_address_t *);
 static	void sata_process_port_link_events(sata_hba_inst_t *,
     sata_address_t *);
+static	void sata_process_pmport_link_events(sata_hba_inst_t *,
+    sata_address_t *);
 static	void sata_process_device_detached(sata_hba_inst_t *, sata_address_t *);
+static	void sata_process_pmdevice_detached(sata_hba_inst_t *,
+    sata_address_t *);
 static	void sata_process_device_attached(sata_hba_inst_t *, sata_address_t *);
+static	void sata_process_pmdevice_attached(sata_hba_inst_t *,
+    sata_address_t *);
 static	void sata_process_port_pwr_change(sata_hba_inst_t *, sata_address_t *);
 static	void sata_process_cntrl_pwr_level_change(sata_hba_inst_t *);
 static	void sata_process_target_node_cleanup(sata_hba_inst_t *,
@@ -220,7 +230,7 @@
 static	int32_t sata_get_port_num(sata_hba_inst_t *,  struct devctl_iocdata *);
 static	void sata_cfgadm_state(sata_hba_inst_t *, int32_t,
     devctl_ap_state_t *);
-static	dev_info_t *sata_get_target_dip(dev_info_t *, int32_t);
+static	dev_info_t *sata_get_target_dip(dev_info_t *, uint8_t, uint8_t);
 static	dev_info_t *sata_get_scsi_target_dip(dev_info_t *, sata_address_t *);
 static	dev_info_t *sata_devt_to_devinfo(dev_t);
 static	int sata_ioctl_connect(sata_hba_inst_t *, sata_device_t *);
@@ -250,11 +260,19 @@
 static 	void sata_remove_hba_instance(dev_info_t *);
 static 	int sata_validate_sata_hba_tran(dev_info_t *, sata_hba_tran_t *);
 static 	void sata_probe_ports(sata_hba_inst_t *);
+static	void sata_probe_pmports(sata_hba_inst_t *, uint8_t);
 static 	int sata_reprobe_port(sata_hba_inst_t *, sata_device_t *, int);
-static 	int sata_add_device(dev_info_t *, sata_hba_inst_t *, int cport,
-    int pmport);
+static 	int sata_reprobe_pmult(sata_hba_inst_t *, sata_device_t *, int);
+static 	int sata_reprobe_pmport(sata_hba_inst_t *, sata_device_t *, int);
+static	void sata_alloc_pmult(sata_hba_inst_t *, sata_device_t *);
+static	void sata_free_pmult(sata_hba_inst_t *, sata_device_t *);
+static 	int sata_add_device(dev_info_t *, sata_hba_inst_t *, sata_device_t *);
+static	int sata_offline_device(sata_hba_inst_t *, sata_device_t *,
+    sata_drive_info_t *);
 static 	dev_info_t *sata_create_target_node(dev_info_t *, sata_hba_inst_t *,
     sata_address_t *);
+static 	void sata_remove_target_node(sata_hba_inst_t *,
+    sata_address_t *);
 static 	int sata_validate_scsi_address(sata_hba_inst_t *,
     struct scsi_address *, sata_device_t *);
 static 	int sata_validate_sata_address(sata_hba_inst_t *, int, int, int);
@@ -276,6 +294,7 @@
 static 	int sata_fetch_device_identify_data(sata_hba_inst_t *,
     sata_drive_info_t *);
 static	void sata_update_port_info(sata_hba_inst_t *, sata_device_t *);
+static	void sata_update_pmport_info(sata_hba_inst_t *, sata_device_t *);
 static	void sata_update_port_scr(sata_port_scr_t *, sata_device_t *);
 static	int sata_set_dma_mode(sata_hba_inst_t *, sata_drive_info_t *);
 static	int sata_set_cache_mode(sata_hba_inst_t *, sata_drive_info_t *, int);
@@ -319,6 +338,7 @@
 
 static	void sata_save_drive_settings(sata_drive_info_t *);
 static	void sata_show_drive_info(sata_hba_inst_t *, sata_drive_info_t *);
+static	void sata_show_pmult_info(sata_hba_inst_t *, sata_device_t *);
 static	void sata_log(sata_hba_inst_t *, uint_t, char *fmt, ...);
 static	void sata_trace_log(sata_hba_inst_t *, uint_t, const char *fmt, ...);
 static	int sata_fetch_smart_return_status(sata_hba_inst_t *,
@@ -527,7 +547,7 @@
 _NOTE(SCHEME_PROTECTS_DATA("No Mutex Needed", sata_hba_inst::satahba_dip))
 _NOTE(SCHEME_PROTECTS_DATA("Scheme", sata_hba_inst::satahba_attached))
 _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_hba_inst::satahba_dev_port))
-_NOTE(MUTEX_PROTECTS_DATA(sata_hba_inst::satahba_mutex, 
+_NOTE(MUTEX_PROTECTS_DATA(sata_hba_inst::satahba_mutex,
     sata_hba_inst::satahba_event_flags))
 _NOTE(MUTEX_PROTECTS_DATA(sata_cport_info::cport_mutex, \
     sata_cport_info::cport_devp))
@@ -539,10 +559,18 @@
 _NOTE(MUTEX_PROTECTS_DATA(sata_cport_info::cport_mutex, \
     sata_cport_info::cport_state))
 _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_cport_info::cport_state))
-_NOTE(MUTEX_PROTECTS_DATA(sata_cport_info::cport_mutex, \
+_NOTE(MUTEX_PROTECTS_DATA(sata_pmport_info::pmport_mutex, \
     sata_pmport_info::pmport_state))
 _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_pmport_info::pmport_state))
+_NOTE(MUTEX_PROTECTS_DATA(sata_pmport_info::pmport_mutex, \
+    sata_pmport_info::pmport_dev_type))
 _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_pmport_info::pmport_dev_type))
+_NOTE(MUTEX_PROTECTS_DATA(sata_pmport_info::pmport_mutex, \
+    sata_pmport_info::pmport_sata_drive))
+_NOTE(MUTEX_PROTECTS_DATA(sata_pmport_info::pmport_mutex, \
+    sata_pmport_info::pmport_tgtnode_clean))
+_NOTE(MUTEX_PROTECTS_DATA(sata_pmport_info::pmport_mutex, \
+    sata_pmport_info::pmport_event_flags))
 _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_pmport_info::pmport_sata_drive))
 _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_pmult_info::pmult_dev_port))
 _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_pmult_info::pmult_num_dev_ports))
@@ -928,7 +956,7 @@
  *
  * When the last HBA instance is detached, the event daemon is terminated.
  *
- * NOTE: cport support only, no port multiplier support.
+ * NOTE: Port multiplier is supported.
  */
 int
 sata_hba_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
@@ -937,8 +965,10 @@
 	sata_hba_inst_t	*sata_hba_inst;
 	scsi_hba_tran_t *scsi_hba_tran;
 	sata_cport_info_t *cportinfo;
+	sata_pmult_info_t *pminfo;
 	sata_drive_info_t *sdinfo;
-	int ncport;
+	sata_device_t	sdevice;
+	int ncport, npmport;
 
 	SATADBG3(SATA_DBG_HBA_IF, NULL, "sata_hba_detach: node %s (%s%d)\n",
 	    ddi_node_name(dip), ddi_driver_name(dip), ddi_get_instance(dip));
@@ -970,7 +1000,7 @@
 				sdinfo = SATA_CPORTINFO_DRV_INFO(cportinfo);
 				if (sdinfo != NULL) {
 					tdip = sata_get_target_dip(dip,
-					    ncport);
+					    ncport, 0);
 					if (tdip != NULL) {
 						if (ndi_devi_offline(tdip,
 						    NDI_DEVI_REMOVE) !=
@@ -985,6 +1015,44 @@
 						}
 					}
 				}
+			} else { /* SATA_DTYPE_PMULT */
+				mutex_enter(&cportinfo->cport_mutex);
+				pminfo = SATA_CPORTINFO_PMULT_INFO(cportinfo);
+
+				if (pminfo == NULL) {
+					SATA_LOG_D((sata_hba_inst, CE_WARN,
+					    "sata_hba_detach: Port multiplier "
+					    "not ready yet!"));
+					mutex_exit(&cportinfo->cport_mutex);
+					return (DDI_FAILURE);
+				}
+
+				/*
+				 * Detach would fail if removal of any of the
+				 * target nodes is failed - albeit in that
+				 * case some of them may have been removed.
+				 */
+				for (npmport = 0; npmport < SATA_NUM_PMPORTS(
+				    sata_hba_inst, ncport); npmport++) {
+					tdip = sata_get_target_dip(dip, ncport,
+					    npmport);
+					if (tdip != NULL) {
+						if (ndi_devi_offline(tdip,
+						    NDI_DEVI_REMOVE) !=
+						    NDI_SUCCESS) {
+							SATA_LOG_D((
+							    sata_hba_inst,
+							    CE_WARN,
+							    "sata_hba_detach: "
+							    "Target node not "
+							    "removed !"));
+							mutex_exit(&cportinfo->
+							    cport_mutex);
+							return (DDI_FAILURE);
+						}
+					}
+				}
+				mutex_exit(&cportinfo->cport_mutex);
 			}
 		}
 		/*
@@ -1026,6 +1094,10 @@
 				mutex_destroy(&cportinfo->cport_mutex);
 				kmem_free(cportinfo,
 				    sizeof (sata_cport_info_t));
+			} else { /* SATA_DTYPE_PMULT */
+				sdevice.satadev_addr.cport = (uint8_t)ncport;
+				sdevice.satadev_addr.qual = SATA_ADDR_PMULT;
+				sata_free_pmult(sata_hba_inst, &sdevice);
 			}
 		}
 
@@ -1196,8 +1268,9 @@
  * 0 if successful,
  * error code if operation failed.
  *
- * NOTE: Port Multiplier is not supported.
- *
+ * Port Multiplier support is supported now.
+ *
+ * NOTE: qual should be SATA_ADDR_DCPORT or SATA_ADDR_DPMPORT
  */
 
 static int
@@ -1260,10 +1333,14 @@
 			return (EINVAL);
 		}
 
+		/*
+		 * According to SCSI_TO_SATA_ADDR_QUAL, qual should be either
+		 * SATA_ADDR_DCPORT or SATA_ADDR_DPMPORT.
+		 */
 		cport = SCSI_TO_SATA_CPORT(comp_port);
 		pmport = SCSI_TO_SATA_PMPORT(comp_port);
-		/* Only cport is considered now, i.e. SATA_ADDR_CPORT */
-		qual = SATA_ADDR_CPORT;
+		qual = SCSI_TO_SATA_ADDR_QUAL(comp_port);
+
 		if (sata_validate_sata_address(sata_hba_inst, cport, pmport,
 		    qual) != 0) {
 			ndi_dc_freehdl(dcp);
@@ -1422,8 +1499,9 @@
 		pmport = SCSI_TO_SATA_PMPORT(ioc.port);
 		qual = SCSI_TO_SATA_ADDR_QUAL(ioc.port);
 
-		/* Override address qualifier - handle cport only for now */
-		qual = SATA_ADDR_CPORT;
+		SATADBG3(SATA_DBG_IOCTL_IF, sata_hba_inst,
+		    "sata_hba_ioctl: target port is %d:%d (%d)",
+		    cport, pmport, qual);
 
 		if (sata_validate_sata_address(sata_hba_inst, cport,
 		    pmport, qual) != 0)
@@ -1502,12 +1580,7 @@
 			break;
 
 		case SATA_CFGA_GET_DEVICE_PATH:
-			if (qual == SATA_ADDR_CPORT)
-				sata_device.satadev_addr.qual =
-				    SATA_ADDR_DCPORT;
-			else
-				sata_device.satadev_addr.qual =
-				    SATA_ADDR_DPMPORT;
+
 			rv = sata_ioctl_get_device_path(sata_hba_inst,
 			    &sata_device, &ioc, mode);
 			break;
@@ -1678,6 +1751,138 @@
 }
 
 /*
+ * Create READ PORT MULTIPLIER and WRITE PORT MULTIPLIER sata packet
+ *
+ * No association with any scsi packet is made and no callback routine is
+ * specified.
+ *
+ * Returns a pointer to sata packet upon successfull packet creation.
+ * Returns NULL, if packet cannot be created.
+ *
+ * NOTE: Input/Output value includes 64 bits accoring to SATA Spec 2.6,
+ * only lower 32 bits are available currently.
+ */
+sata_pkt_t *
+sata_get_rdwr_pmult_pkt(dev_info_t *dip, sata_device_t *sd,
+    uint8_t regn, uint32_t regv, uint32_t type)
+{
+	sata_hba_inst_t	*sata_hba_inst;
+	sata_pkt_txlate_t *spx;
+	sata_pkt_t *spkt;
+	sata_cmd_t *scmd;
+
+	/* Only READ/WRITE commands are accepted. */
+	ASSERT(type == SATA_RDWR_PMULT_PKT_TYPE_READ ||
+	    type == SATA_RDWR_PMULT_PKT_TYPE_WRITE);
+
+	mutex_enter(&sata_mutex);
+	for (sata_hba_inst = sata_hba_list; sata_hba_inst != NULL;
+	    sata_hba_inst = sata_hba_inst->satahba_next) {
+		if (SATA_DIP(sata_hba_inst) == dip)
+			break;
+	}
+	mutex_exit(&sata_mutex);
+	ASSERT(sata_hba_inst != NULL);
+
+	spx = kmem_zalloc(sizeof (sata_pkt_txlate_t), KM_SLEEP);
+	spx->txlt_sata_hba_inst = sata_hba_inst;
+	spx->txlt_scsi_pkt = NULL;	/* No scsi pkt involved */
+	spkt = sata_pkt_alloc(spx, SLEEP_FUNC);
+	if (spkt == NULL) {
+		kmem_free(spx, sizeof (sata_pkt_txlate_t));
+		return (NULL);
+	}
+
+	/*
+	 * NOTE: We need to send this command to the port multiplier,
+	 * that means send to SATA_PMULT_HOSTPORT(0xf) pmport
+	 *
+	 * sata_device contains the address of actual target device, and the
+	 * pmport number in the command comes from the sata_device structure.
+	 */
+	spkt->satapkt_device.satadev_addr = sd->satadev_addr;
+	spkt->satapkt_device.satadev_addr.pmport = SATA_PMULT_HOSTPORT;
+	spkt->satapkt_device.satadev_addr.qual = SATA_ADDR_PMULT;
+
+	/* Fill sata_pkt */
+	spkt->satapkt_op_mode = SATA_OPMODE_SYNCH | SATA_OPMODE_POLLING;
+	spkt->satapkt_comp = NULL; /* Synchronous mode, no callback */
+	spkt->satapkt_time = 10; /* Timeout 10s */
+
+	/* Build READ PORT MULTIPLIER cmd in the sata_pkt */
+	scmd = &spkt->satapkt_cmd;
+	scmd->satacmd_features_reg = regn & 0xff;
+	scmd->satacmd_features_reg_ext = (regn >> 8) & 0xff;
+	scmd->satacmd_device_reg = sd->satadev_addr.pmport;
+	scmd->satacmd_addr_type = 0;		/* N/A */
+
+	scmd->satacmd_flags.sata_ignore_dev_reset = B_TRUE;
+
+	if (type == SATA_RDWR_PMULT_PKT_TYPE_READ) {
+		scmd->satacmd_cmd_reg = SATAC_READ_PORTMULT;
+		scmd->satacmd_flags.sata_data_direction = SATA_DIR_READ;
+		scmd->satacmd_flags.sata_special_regs = 1;
+		scmd->satacmd_flags.sata_copy_out_lba_high_lsb = 1;
+		scmd->satacmd_flags.sata_copy_out_lba_mid_lsb = 1;
+		scmd->satacmd_flags.sata_copy_out_lba_low_lsb = 1;
+		scmd->satacmd_flags.sata_copy_out_sec_count_lsb = 1;
+	} else if (type == SATA_RDWR_PMULT_PKT_TYPE_WRITE) {
+		scmd->satacmd_cmd_reg = SATAC_WRITE_PORTMULT;
+		scmd->satacmd_flags.sata_data_direction = SATA_DIR_WRITE;
+		scmd->satacmd_sec_count_lsb = regv & 0xff;
+		scmd->satacmd_lba_low_lsb = regv >> 8 & 0xff;
+		scmd->satacmd_lba_mid_lsb = regv >> 16 & 0xff;
+		scmd->satacmd_lba_high_lsb = regv >> 24 & 0xff;
+	}
+
+	return (spkt);
+}
+
+/*
+ * Free sata packet and any associated resources allocated previously by
+ * sata_get_rdwr_pmult_pkt().
+ *
+ * Void return.
+ */
+void
+sata_free_rdwr_pmult_pkt(sata_pkt_t *sata_pkt)
+{
+	sata_pkt_txlate_t *spx =
+	    (sata_pkt_txlate_t *)sata_pkt->satapkt_framework_private;
+
+	/* Free allocated resources */
+	sata_pkt_free(spx);
+	kmem_free(spx, sizeof (sata_pkt_txlate_t));
+}
+
+/*
+ * Search a port multiplier in the blacklist and update the flags if a match
+ * is found.
+ *
+ * Returns:
+ * SATA_SUCCESS if any matched entry is found.
+ * SATA_FAILURE if no matched entry is found.
+ */
+int
+sata_check_pmult_blacklist(sata_device_t *sd)
+{
+	sata_pmult_bl_t *blp;
+	for (blp = sata_pmult_blacklist; blp->bl_gscr0; blp++) {
+		if (sd->satadev_gscr.gscr0 != blp->bl_gscr0 && blp->bl_gscr0)
+			continue;
+		if (sd->satadev_gscr.gscr1 != blp->bl_gscr1 && blp->bl_gscr1)
+			continue;
+		if (sd->satadev_gscr.gscr2 != blp->bl_gscr2 && blp->bl_gscr2)
+			continue;
+
+		cmn_err(CE_WARN, "!Port multiplier is on the blacklist.");
+		sd->satadev_add_info = blp->bl_flags;
+		return (SATA_SUCCESS);
+	}
+	return (SATA_FAILURE);
+}
+
+/*
  * sata_name_child is for composing the name of the node
  * the format of the name is "target,0".
  */
@@ -2125,9 +2330,11 @@
 	sata_hba_inst_t *sata_hba_inst =
 	    (sata_hba_inst_t *)(ap->a_hba_tran->tran_hba_private);
 	sata_pkt_txlate_t *spx = (sata_pkt_txlate_t *)pkt->pkt_ha_private;
+	sata_device_t *sdevice = &spx->txlt_sata_pkt->satapkt_device;
 	sata_drive_info_t *sdinfo;
 	struct buf *bp;
-	int cport;
+	uint8_t cport, pmport;
+	boolean_t dev_gone = B_FALSE;
 	int rval;
 
 	SATADBG1(SATA_DBG_SCSI_IF, sata_hba_inst,
@@ -2137,15 +2344,42 @@
 	    spx->txlt_scsi_pkt == pkt && spx->txlt_sata_pkt != NULL);
 
 	cport = SCSI_TO_SATA_CPORT(ap->a_target);
+	pmport = SCSI_TO_SATA_PMPORT(ap->a_target);
 
 	mutex_enter(&(SATA_CPORT_MUTEX(sata_hba_inst, cport)));
-	sdinfo = sata_get_device_info(sata_hba_inst,
-	    &spx->txlt_sata_pkt->satapkt_device);
-	if (sdinfo == NULL ||
-	    SATA_CPORT_INFO(sata_hba_inst, cport)->cport_tgtnode_clean ==
-	    B_FALSE ||
-	    (sdinfo->satadrv_state & SATA_DSTATE_FAILED) != 0) {
-
+
+	if (sdevice->satadev_addr.qual == SATA_ADDR_DCPORT) {
+		sdinfo = sata_get_device_info(sata_hba_inst, sdevice);
+		if (sdinfo == NULL ||
+		    SATA_CPORT_INFO(sata_hba_inst, cport)->
+		    cport_tgtnode_clean == B_FALSE ||
+		    (sdinfo->satadrv_state & SATA_DSTATE_FAILED) != 0) {
+			dev_gone = B_TRUE;
+		}
+	} else if (sdevice->satadev_addr.qual == SATA_ADDR_DPMPORT) {
+		if (SATA_CPORT_DEV_TYPE(sata_hba_inst, cport) !=
+		    SATA_DTYPE_PMULT || SATA_PMULT_INFO(sata_hba_inst,
+		    cport) == NULL) {
+			dev_gone = B_TRUE;
+		} else if (SATA_PMPORT_INFO(sata_hba_inst, cport,
+		    pmport) == NULL) {
+			dev_gone = B_TRUE;
+		} else {
+			mutex_enter(&(SATA_PMPORT_MUTEX(sata_hba_inst,
+			    cport, pmport)));
+			sdinfo = sata_get_device_info(sata_hba_inst, sdevice);
+			if (sdinfo == NULL ||
+			    SATA_PMPORT_INFO(sata_hba_inst, cport, pmport)->
+			    pmport_tgtnode_clean == B_FALSE ||
+			    (sdinfo->satadrv_state & SATA_DSTATE_FAILED) != 0) {
+				dev_gone = B_TRUE;
+			}
+			mutex_exit(&(SATA_PMPORT_MUTEX(sata_hba_inst,
+			    cport, pmport)));
+		}
+	}
+
+	if (dev_gone == B_TRUE) {
 		mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, cport)));
 		pkt->pkt_reason = CMD_DEV_GONE;
 		/*
@@ -2424,8 +2658,12 @@
 	mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst,
 	    sata_device.satadev_addr.cport)));
 	if (level == RESET_ALL) {
-		/* port reset - cport only */
-		sata_device.satadev_addr.qual = SATA_ADDR_CPORT;
+		/* port reset */
+		if (sata_device.satadev_addr.qual == SATA_ADDR_DCPORT)
+			sata_device.satadev_addr.qual = SATA_ADDR_CPORT;
+		else
+			sata_device.satadev_addr.qual = SATA_ADDR_PMPORT;
+
 		if ((*SATA_RESET_DPORT_FUNC(sata_hba_inst))
 		    (SATA_DIP(sata_hba_inst), &sata_device) == SATA_SUCCESS)
 			return (1);
@@ -2815,9 +3053,16 @@
 
 	case -1:
 		/* Invalid address or invalid device type */
+		SATADBG1(SATA_DBG_SCSI_IF, spx->txlt_sata_hba_inst,
+		    "sata_scsi_start: reject command because "
+		    "dev type or address is invalid\n", NULL);
 		return (TRAN_BADPKT);
 	case 1:
 		/* valid address but no device - it has disappeared ? */
+		SATADBG1(SATA_DBG_SCSI_IF, spx->txlt_sata_hba_inst,
+		    "sata_scsi_start: reject command because "
+		    "device is gone\n", NULL);
+
 		spx->txlt_scsi_pkt->pkt_reason = CMD_DEV_GONE;
 		*reason = CMD_DEV_GONE;
 		/*
@@ -3142,7 +3387,7 @@
  * SATA translate command: Inquiry / Identify Device
  * Use cached Identify Device data for now, rather than issuing actual
  * Device Identify cmd request. If device is detached and re-attached,
- * asynchromous event processing should fetch and refresh Identify Device
+ * asynchronous event processing should fetch and refresh Identify Device
  * data.
  * Two VPD pages are supported now:
  * Vital Product Data page
@@ -5766,22 +6011,23 @@
  * have called the sata_pkt callback function for this packet.
  *
  * The scsi callback has to be performed by the caller of this routine.
- *
- * Note 2: No port multiplier support for now.
  */
 static int
 sata_hba_start(sata_pkt_txlate_t *spx, int *rval)
 {
-	int stat, cport;
+	int stat;
+	uint8_t cport = SATA_TXLT_CPORT(spx);
+	uint8_t pmport = SATA_TXLT_PMPORT(spx);
 	sata_hba_inst_t *sata_hba_inst = spx->txlt_sata_hba_inst;
 	sata_drive_info_t *sdinfo;
-	sata_device_t *sata_device;
+	sata_pmult_info_t *pminfo;
+	sata_pmport_info_t *pmportinfo = NULL;
+	sata_device_t *sata_device = NULL;
 	uint8_t cmd;
 	struct sata_cmd_flags cmd_flags;
 
 	ASSERT(spx->txlt_sata_pkt != NULL);
 
-	cport = SATA_TXLT_CPORT(spx);
 	ASSERT(mutex_owned(&SATA_CPORT_MUTEX(sata_hba_inst, cport)));
 
 	sdinfo = sata_get_device_info(sata_hba_inst,
@@ -5789,13 +6035,40 @@
 	ASSERT(sdinfo != NULL);
 
 	/* Clear device reset state? */
-	if (sdinfo->satadrv_event_flags & SATA_EVNT_CLEAR_DEVICE_RESET) {
-		spx->txlt_sata_pkt->satapkt_cmd.satacmd_flags.
-		    sata_clear_dev_reset = B_TRUE;
-		sdinfo->satadrv_event_flags &= ~SATA_EVNT_CLEAR_DEVICE_RESET;
-		SATADBG1(SATA_DBG_EVENTS, sata_hba_inst,
-		    "sata_hba_start: clearing device reset state\n", NULL);
-	}
+	/* qual should be XXX_DPMPORT, but add XXX_PMPORT in case */
+	if (sdinfo->satadrv_addr.qual == SATA_ADDR_DPMPORT ||
+	    sdinfo->satadrv_addr.qual == SATA_ADDR_PMPORT) {
+
+		/*
+		 * Get the pmult_info of the its parent port multiplier, all
+		 * sub-devices share a common device reset flags on in
+		 * pmult_info.
+		 */
+		pminfo = SATA_PMULT_INFO(sata_hba_inst, cport);
+		pmportinfo = pminfo->pmult_dev_port[pmport];
+		ASSERT(pminfo != NULL);
+		if (pminfo->pmult_event_flags & SATA_EVNT_CLEAR_DEVICE_RESET) {
+			spx->txlt_sata_pkt->satapkt_cmd.satacmd_flags.
+			    sata_clear_dev_reset = B_TRUE;
+			pminfo->pmult_event_flags &=
+			    ~SATA_EVNT_CLEAR_DEVICE_RESET;
+			SATADBG1(SATA_DBG_EVENTS, sata_hba_inst,
+			    "sata_hba_start: clearing device reset state"
+			    "on pmult.\n", NULL);
+		}
+	} else {
+		if (sdinfo->satadrv_event_flags &
+		    SATA_EVNT_CLEAR_DEVICE_RESET) {
+			spx->txlt_sata_pkt->satapkt_cmd.satacmd_flags.
+			    sata_clear_dev_reset = B_TRUE;
+			sdinfo->satadrv_event_flags &=
+			    ~SATA_EVNT_CLEAR_DEVICE_RESET;
+			SATADBG1(SATA_DBG_EVENTS, sata_hba_inst,
+			    "sata_hba_start: clearing device reset state\n",
+			    NULL);
+		}
+	}
+
 	cmd = spx->txlt_sata_pkt->satapkt_cmd.satacmd_cmd_reg;
 	cmd_flags = spx->txlt_sata_pkt->satapkt_cmd.satacmd_flags;
 	sata_device = &spx->txlt_sata_pkt->satapkt_device;
@@ -5857,7 +6130,7 @@
 			    sata_device->satadev_addr.cport);
 		else
 			sata_log(sata_hba_inst, CE_CONT,
-			    "SATA port %d pmport %d error\n",
+			    "SATA port %d:%d error\n",
 			    sata_device->satadev_addr.cport,
 			    sata_device->satadev_addr.pmport);
 
@@ -5869,7 +6142,17 @@
 		 * because original packet's sata address refered to a device
 		 * attached to some port.
 		 */
-		sata_update_port_info(sata_hba_inst, sata_device);
+		if (sdinfo->satadrv_addr.qual == SATA_ADDR_DPMPORT ||
+		    sdinfo->satadrv_addr.qual == SATA_ADDR_PMPORT) {
+			mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, cport)));
+			mutex_enter(&pmportinfo->pmport_mutex);
+			sata_update_pmport_info(sata_hba_inst, sata_device);
+			mutex_exit(&pmportinfo->pmport_mutex);
+			mutex_enter(&(SATA_CPORT_MUTEX(sata_hba_inst, cport)));
+		} else {
+			sata_update_port_info(sata_hba_inst, sata_device);
+		}
+
 		spx->txlt_scsi_pkt->pkt_reason = CMD_TRAN_ERR;
 		*rval = TRAN_FATAL_ERROR;
 		break;
@@ -5944,8 +6227,14 @@
 			 * the device reset state,
 			 * so the next sata packet may carry it to HBA.
 			 */
-			sdinfo->satadrv_event_flags |=
-			    SATA_EVNT_CLEAR_DEVICE_RESET;
+			if (sdinfo->satadrv_addr.qual == SATA_ADDR_PMPORT ||
+			    sdinfo->satadrv_addr.qual == SATA_ADDR_DPMPORT) {
+				pminfo->pmult_event_flags |=
+				    SATA_EVNT_CLEAR_DEVICE_RESET;
+			} else {
+				sdinfo->satadrv_event_flags |=
+				    SATA_EVNT_CLEAR_DEVICE_RESET;
+			}
 		}
 	}
 	return (-1);
@@ -8779,10 +9068,6 @@
 	mutex_exit(&sata_mutex);
 }
 
-
-
-
-
 /*
  * Probe all SATA ports of the specified HBA instance.
  * The assumption is that there are no target and attachment point minor nodes
@@ -8802,11 +9087,9 @@
 sata_probe_ports(sata_hba_inst_t *sata_hba_inst)
 {
 	dev_info_t		*dip = SATA_DIP(sata_hba_inst);
-	int			ncport, npmport;
+	int			ncport;
 	sata_cport_info_t 	*cportinfo;
 	sata_drive_info_t	*drive;
-	sata_pmult_info_t	*pminfo;
-	sata_pmport_info_t 	*pmportinfo;
 	sata_device_t		sata_device;
 	int			rval;
 	dev_t			minor_number;
@@ -8837,8 +9120,8 @@
 		 * an attachment point
 		 */
 		mutex_exit(&cportinfo->cport_mutex);
-		minor_number =
-		    SATA_MAKE_AP_MINOR(ddi_get_instance(dip), ncport, 0, 0);
+		minor_number = SATA_MAKE_AP_MINOR(ddi_get_instance(dip),
+		    ncport, 0, SATA_ADDR_CPORT);
 		(void) sprintf(name, "%d", ncport);
 		if (ddi_create_minor_node(dip, name, S_IFCHR,
 		    minor_number, DDI_NT_SATA_ATTACHMENT_POINT, 0) !=
@@ -8860,7 +9143,7 @@
 		    (dip, &sata_device);
 
 		mutex_enter(&cportinfo->cport_mutex);
-		sata_update_port_scr(&cportinfo->cport_scr, &sata_device);
+		cportinfo->cport_scr = sata_device.satadev_scr;
 		if (rval != SATA_SUCCESS) {
 			/* Something went wrong? Fail the port */
 			cportinfo->cport_state = SATA_PSTATE_FAILED;
@@ -8895,7 +9178,7 @@
 			drive->satadrv_state = SATA_STATE_UNKNOWN;
 
 			mutex_exit(&cportinfo->cport_mutex);
-			if (sata_add_device(dip, sata_hba_inst, ncport, 0) !=
+			if (sata_add_device(dip, sata_hba_inst, &sata_device) !=
 			    SATA_SUCCESS) {
 				/*
 				 * Plugged device was not correctly identified.
@@ -8910,135 +9193,114 @@
 					goto reprobe_cport;
 				}
 			}
-		} else {
-			mutex_exit(&cportinfo->cport_mutex);
-			ASSERT(cportinfo->cport_dev_type == SATA_DTYPE_PMULT);
-			pminfo = kmem_zalloc(sizeof (sata_pmult_info_t),
-			    KM_SLEEP);
-			mutex_enter(&cportinfo->cport_mutex);
-			ASSERT(pminfo != NULL);
-			SATA_CPORTINFO_PMULT_INFO(cportinfo) = pminfo;
-			pminfo->pmult_addr.cport = cportinfo->cport_addr.cport;
-			pminfo->pmult_addr.pmport = SATA_PMULT_HOSTPORT;
-			pminfo->pmult_addr.qual = SATA_ADDR_PMPORT;
-			pminfo->pmult_num_dev_ports =
-			    sata_device.satadev_add_info;
-			mutex_init(&pminfo->pmult_mutex, NULL, MUTEX_DRIVER,
-			    NULL);
-			pminfo->pmult_state = SATA_STATE_PROBING;
+		} else { /* SATA_DTYPE_PMULT */
 			mutex_exit(&cportinfo->cport_mutex);
 
-			/* Probe Port Multiplier ports */
-			for (npmport = 0;
-			    npmport < pminfo->pmult_num_dev_ports;
-			    npmport++) {
-				pmportinfo = kmem_zalloc(
-				    sizeof (sata_pmport_info_t), KM_SLEEP);
-				mutex_enter(&cportinfo->cport_mutex);
-				ASSERT(pmportinfo != NULL);
-				pmportinfo->pmport_addr.cport = ncport;
-				pmportinfo->pmport_addr.pmport = npmport;
-				pmportinfo->pmport_addr.qual =
-				    SATA_ADDR_PMPORT;
-				pminfo->pmult_dev_port[npmport] = pmportinfo;
-
-				mutex_init(&pmportinfo->pmport_mutex, NULL,
-				    MUTEX_DRIVER, NULL);
-
-				mutex_exit(&cportinfo->cport_mutex);
-
-				/* Create an attachment point */
-				minor_number = SATA_MAKE_AP_MINOR(
-				    ddi_get_instance(dip), ncport, npmport, 1);
-				(void) sprintf(name, "%d.%d", ncport, npmport);
-				if (ddi_create_minor_node(dip, name, S_IFCHR,
-				    minor_number, DDI_NT_SATA_ATTACHMENT_POINT,
-				    0) != DDI_SUCCESS) {
-					sata_log(sata_hba_inst, CE_WARN,
-					    "sata_hba_attach: "
-					    "cannot create SATA attachment "
-					    "point for port %d pmult port %d",
-					    ncport, npmport);
-				}
-
-				start_time = ddi_get_lbolt();
-			reprobe_pmport:
-				sata_device.satadev_addr.pmport = npmport;
-				sata_device.satadev_addr.qual =
-				    SATA_ADDR_PMPORT;
-
-				rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst))
-				    (dip, &sata_device);
-				mutex_enter(&cportinfo->cport_mutex);
-
-				/* sata_update_port_info() */
-				sata_update_port_scr(&pmportinfo->pmport_scr,
-				    &sata_device);
-
-				if (rval != SATA_SUCCESS) {
-					pmportinfo->pmport_state =
-					    SATA_PSTATE_FAILED;
-					mutex_exit(&cportinfo->cport_mutex);
-					continue;
-				}
-				pmportinfo->pmport_state &=
-				    ~SATA_STATE_PROBING;
-				pmportinfo->pmport_state |= SATA_STATE_PROBED;
-				pmportinfo->pmport_dev_type =
-				    sata_device.satadev_type;
-
-				pmportinfo->pmport_state |= SATA_STATE_READY;
-				if (pmportinfo->pmport_dev_type ==
-				    SATA_DTYPE_NONE) {
-					mutex_exit(&cportinfo->cport_mutex);
-					continue;
-				}
-				/* Port multipliers cannot be chained */
-				ASSERT(pmportinfo->pmport_dev_type !=
-				    SATA_DTYPE_PMULT);
-				/*
-				 * There is something attached to Port
-				 * Multiplier device port
-				 * Allocate device info structure
-				 */
-				if (pmportinfo->pmport_sata_drive == NULL) {
-					mutex_exit(&cportinfo->cport_mutex);
-					pmportinfo->pmport_sata_drive =
-					    kmem_zalloc(
-					    sizeof (sata_drive_info_t),
-					    KM_SLEEP);
-					mutex_enter(&cportinfo->cport_mutex);
-				}
-				drive = pmportinfo->pmport_sata_drive;
-				drive->satadrv_addr.cport =
-				    pmportinfo->pmport_addr.cport;
-				drive->satadrv_addr.pmport = npmport;
-				drive->satadrv_addr.qual = SATA_ADDR_DPMPORT;
-				drive->satadrv_type = pmportinfo->
-				    pmport_dev_type;
-				drive->satadrv_state = SATA_STATE_UNKNOWN;
-
-				mutex_exit(&cportinfo->cport_mutex);
-				if (sata_add_device(dip, sata_hba_inst, ncport,
-				    npmport) != SATA_SUCCESS) {
-					/*
-					 * Plugged device was not correctly
-					 * identified. Retry, within the
-					 * SATA_DEV_IDENTIFY_TIMEOUT
-					 */
-					cur_time = ddi_get_lbolt();
-					if ((cur_time - start_time) <
-					    drv_usectohz(
-					    SATA_DEV_IDENTIFY_TIMEOUT)) {
-						/* sleep for a while */
-						delay(drv_usectohz(
-						    SATA_DEV_RETRY_DLY));
-						goto reprobe_pmport;
-					}
-				}
-			}
+			/* Allocate sata_pmult_info and sata_pmport_info */
+			sata_alloc_pmult(sata_hba_inst, &sata_device);
+
+			/* Log the information of the port multiplier */
+			sata_show_pmult_info(sata_hba_inst, &sata_device);
+
+			/* Probe its pmports */
+			sata_probe_pmports(sata_hba_inst, ncport);
+		}
+	}
+}
+
+/*
+ * Probe all device ports behind a port multiplier.
+ *
+ * PMult-related structure should be allocated before by sata_alloc_pmult().
+ *
+ * NOTE1: Only called from sata_probe_ports()
+ * NOTE2: No mutex should be hold.
+ */
+static void
+sata_probe_pmports(sata_hba_inst_t *sata_hba_inst, uint8_t ncport)
+{
+	dev_info_t		*dip = SATA_DIP(sata_hba_inst);
+	sata_pmult_info_t	*pmultinfo = NULL;
+	sata_pmport_info_t 	*pmportinfo = NULL;
+	sata_drive_info_t	*drive = NULL;
+	sata_device_t		sata_device;
+
+	clock_t			start_time, cur_time;
+	int			npmport;
+	int			rval;
+
+	pmultinfo = SATA_PMULT_INFO(sata_hba_inst, ncport);
+
+	/* Probe Port Multiplier ports */
+	for (npmport = 0; npmport < pmultinfo->pmult_num_dev_ports; npmport++) {
+		pmportinfo = pmultinfo->pmult_dev_port[npmport];
+		start_time = ddi_get_lbolt();
+reprobe_pmport:
+		sata_device.satadev_addr.cport = ncport;
+		sata_device.satadev_addr.pmport = npmport;
+		sata_device.satadev_addr.qual = SATA_ADDR_PMPORT;
+		sata_device.satadev_rev = SATA_DEVICE_REV;
+
+		/* Let HBA driver probe it. */
+		rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst))
+		    (dip, &sata_device);
+		mutex_enter(&pmportinfo->pmport_mutex);
+
+		pmportinfo->pmport_scr = sata_device.satadev_scr;
+
+		if (rval != SATA_SUCCESS) {
 			pmportinfo->pmport_state =
-			    SATA_STATE_PROBED | SATA_STATE_READY;
+			    SATA_PSTATE_FAILED;
+			mutex_exit(&pmportinfo->pmport_mutex);
+			continue;
+		}
+		pmportinfo->pmport_state &= ~SATA_STATE_PROBING;
+		pmportinfo->pmport_state |= SATA_STATE_PROBED;
+		pmportinfo->pmport_dev_type = sata_device.satadev_type;
+
+		pmportinfo->pmport_state |= SATA_STATE_READY;
+		if (pmportinfo->pmport_dev_type ==
+		    SATA_DTYPE_NONE) {
+			SATADBG2(SATA_DBG_PMULT, sata_hba_inst,
+			    "no device found at port %d:%d", ncport, npmport);
+			mutex_exit(&pmportinfo->pmport_mutex);
+			continue;
+		}
+		/* Port multipliers cannot be chained */
+		ASSERT(pmportinfo->pmport_dev_type != SATA_DTYPE_PMULT);
+		/*
+		 * There is something attached to Port
+		 * Multiplier device port
+		 * Allocate device info structure
+		 */
+		if (pmportinfo->pmport_sata_drive == NULL) {
+			mutex_exit(&pmportinfo->pmport_mutex);
+			pmportinfo->pmport_sata_drive =
+			    kmem_zalloc(sizeof (sata_drive_info_t), KM_SLEEP);
+			mutex_enter(&pmportinfo->pmport_mutex);
+		}
+		drive = pmportinfo->pmport_sata_drive;
+		drive->satadrv_addr.cport = pmportinfo->pmport_addr.cport;
+		drive->satadrv_addr.pmport = npmport;
+		drive->satadrv_addr.qual = SATA_ADDR_DPMPORT;
+		drive->satadrv_type = pmportinfo-> pmport_dev_type;
+		drive->satadrv_state = SATA_STATE_UNKNOWN;
+
+		mutex_exit(&pmportinfo->pmport_mutex);
+		rval = sata_add_device(dip, sata_hba_inst, &sata_device);
+
+		if (rval != SATA_SUCCESS) {
+			/*
+			 * Plugged device was not correctly identified.
+			 * Retry, within the SATA_DEV_IDENTIFY_TIMEOUT
+			 */
+			cur_time = ddi_get_lbolt();
+			if ((cur_time - start_time) < drv_usectohz(
+			    SATA_DEV_IDENTIFY_TIMEOUT)) {
+				/* sleep for a while */
+				delay(drv_usectohz(SATA_DEV_RETRY_DLY));
+				goto reprobe_pmport;
+			}
 		}
 	}
 }
@@ -9052,28 +9314,29 @@
  *
  * This function cannot be called from an interrupt context.
  *
- * ONLY DISK TARGET NODES ARE CREATED NOW
+ * Create target nodes for disk, CD/DVD, Tape and ATAPI disk devices
  *
  * Returns SATA_SUCCESS when port/device was fully processed, SATA_FAILURE when
  * device identification failed - adding a device could be retried.
  *
  */
 static 	int
-sata_add_device(dev_info_t *pdip, sata_hba_inst_t *sata_hba_inst, int cport,
-    int pmport)
+sata_add_device(dev_info_t *pdip, sata_hba_inst_t *sata_hba_inst,
+    sata_device_t *sata_device)
 {
 	sata_cport_info_t 	*cportinfo;
 	sata_pmult_info_t	*pminfo;
 	sata_pmport_info_t	*pmportinfo;
 	dev_info_t		*cdip;		/* child dip */
-	sata_device_t		sata_device;
+	sata_address_t		*saddr = &sata_device->satadev_addr;
+	uint8_t			cport, pmport;
 	int			rval;
 
-
-
+	cport = saddr->cport;
+	pmport = saddr->pmport;
 	cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
 	ASSERT(cportinfo->cport_dev_type != SATA_DTYPE_NONE);
-	mutex_enter(&cportinfo->cport_mutex);
+
 	/*
 	 * Some device is attached to a controller port.
 	 * We rely on controllers distinquishing between no-device,
@@ -9082,25 +9345,24 @@
 	 * positively the dev type before trying to attach
 	 * the target driver.
 	 */
-	sata_device.satadev_rev = SATA_DEVICE_REV;
-	if (cportinfo->cport_dev_type != SATA_DTYPE_PMULT) {
-		/*
-		 * Not port multiplier.
-		 */
-		sata_device.satadev_addr = cportinfo->cport_addr;
-		sata_device.satadev_addr.qual = SATA_ADDR_DCPORT;
-		mutex_exit(&cportinfo->cport_mutex);
-
-		rval = sata_probe_device(sata_hba_inst, &sata_device);
+	sata_device->satadev_rev = SATA_DEVICE_REV;
+	switch (saddr->qual) {
+	case SATA_ADDR_CPORT:
+		/*
+		 * Add a non-port-multiplier device at controller port.
+		 */
+		saddr->qual = SATA_ADDR_DCPORT;
+
+		rval = sata_probe_device(sata_hba_inst, sata_device);
 		if (rval != SATA_SUCCESS ||
-		    sata_device.satadev_type == SATA_DTYPE_UNKNOWN)
+		    sata_device->satadev_type == SATA_DTYPE_UNKNOWN)
 			return (SATA_FAILURE);
 
 		mutex_enter(&cportinfo->cport_mutex);
 		sata_show_drive_info(sata_hba_inst,
 		    SATA_CPORTINFO_DRV_INFO(cportinfo));
 
-		if ((sata_device.satadev_type & SATA_VALID_DEV_TYPE) == 0) {
+		if ((sata_device->satadev_type & SATA_VALID_DEV_TYPE) == 0) {
 			/*
 			 * Could not determine device type or
 			 * a device is not supported.
@@ -9110,7 +9372,7 @@
 			mutex_exit(&cportinfo->cport_mutex);
 			return (SATA_SUCCESS);
 		}
-		cportinfo->cport_dev_type = sata_device.satadev_type;
+		cportinfo->cport_dev_type = sata_device->satadev_type;
 		cportinfo->cport_tgtnode_clean = B_TRUE;
 		mutex_exit(&cportinfo->cport_mutex);
 
@@ -9130,59 +9392,58 @@
 				    "SATA device at port %d - "
 				    "default device features could not be set."
 				    " Device may not operate as expected.",
-				    cportinfo->cport_addr.cport);
-		}
-
-		cdip = sata_create_target_node(pdip, sata_hba_inst,
-		    &sata_device.satadev_addr);
-		mutex_enter(&cportinfo->cport_mutex);
+				    cport);
+		}
+
+		cdip = sata_create_target_node(pdip, sata_hba_inst, saddr);
 		if (cdip == NULL) {
 			/*
 			 * Attaching target node failed.
 			 * We retain sata_drive_info structure...
 			 */
-			mutex_exit(&cportinfo->cport_mutex);
 			return (SATA_SUCCESS);
 		}
+
+		mutex_enter(&cportinfo->cport_mutex);
 		(SATA_CPORTINFO_DRV_INFO(cportinfo))->
 		    satadrv_state = SATA_STATE_READY;
-	} else {
-		/* This must be Port Multiplier type */
-		if (cportinfo->cport_dev_type != SATA_DTYPE_PMULT) {
-			SATA_LOG_D((sata_hba_inst, CE_WARN,
-			    "sata_add_device: "
-			    "unrecognized dev type %x",
-			    cportinfo->cport_dev_type));
-			mutex_exit(&cportinfo->cport_mutex);
-			return (SATA_SUCCESS);
-		}
+		mutex_exit(&cportinfo->cport_mutex);
+
+		break;
+
+	case SATA_ADDR_PMPORT:
+		saddr->qual = SATA_ADDR_DPMPORT;
+
+		mutex_enter(&cportinfo->cport_mutex);
+		/* It must be a Port Multiplier at the controller port */
+		ASSERT(cportinfo->cport_dev_type == SATA_DTYPE_PMULT);
+
 		pminfo = SATA_CPORTINFO_PMULT_INFO(cportinfo);
-		pmportinfo = pminfo->pmult_dev_port[pmport];
-		sata_device.satadev_addr = pmportinfo->pmport_addr;
-		sata_device.satadev_addr.qual = SATA_ADDR_DPMPORT;
+		pmportinfo = pminfo->pmult_dev_port[saddr->pmport];
 		mutex_exit(&cportinfo->cport_mutex);
 
-		rval = sata_probe_device(sata_hba_inst, &sata_device);
+		rval = sata_probe_device(sata_hba_inst, sata_device);
 		if (rval != SATA_SUCCESS ||
-		    sata_device.satadev_type == SATA_DTYPE_UNKNOWN) {
+		    sata_device->satadev_type == SATA_DTYPE_UNKNOWN) {
 			return (SATA_FAILURE);
 		}
-		mutex_enter(&cportinfo->cport_mutex);
+
+		mutex_enter(&pmportinfo->pmport_mutex);
 		sata_show_drive_info(sata_hba_inst,
-		    SATA_CPORTINFO_DRV_INFO(cportinfo));
-
-		if ((sata_device.satadev_type & SATA_VALID_DEV_TYPE) == 0) {
+		    SATA_PMPORTINFO_DRV_INFO(pmportinfo));
+
+		if ((sata_device->satadev_type & SATA_VALID_DEV_TYPE) == 0) {
 			/*
 			 * Could not determine device type.
 			 * Degrade this device to unknown.
 			 */
 			pmportinfo->pmport_dev_type = SATA_DTYPE_UNKNOWN;
-			mutex_exit(&cportinfo->cport_mutex);
+			mutex_exit(&pmportinfo->pmport_mutex);
 			return (SATA_SUCCESS);
 		}
-		pmportinfo->pmport_dev_type = sata_device.satadev_type;
+		pmportinfo->pmport_dev_type = sata_device->satadev_type;
 		pmportinfo->pmport_tgtnode_clean = B_TRUE;
-		mutex_exit(&cportinfo->cport_mutex);
+		mutex_exit(&pmportinfo->pmport_mutex);
 
 		/*
 		 * Initialize device to the desired state.
@@ -9197,30 +9458,106 @@
 
 			if (rval == SATA_RETRY)
 				sata_log(sata_hba_inst, CE_WARN,
-				    "SATA device at port %d pmport %d - "
+				    "SATA device at port %d:%d - "
 				    "default device features could not be set."
 				    " Device may not operate as expected.",
-				    pmportinfo->pmport_addr.cport,
-				    pmportinfo->pmport_addr.pmport);
-		}
-		cdip = sata_create_target_node(pdip, sata_hba_inst,
-		    &sata_device.satadev_addr);
-		mutex_enter(&cportinfo->cport_mutex);
+				    cport, pmport);
+		}
+
+		cdip = sata_create_target_node(pdip, sata_hba_inst, saddr);
 		if (cdip == NULL) {
 			/*
 			 * Attaching target node failed.
 			 * We retain sata_drive_info structure...
 			 */
-			mutex_exit(&cportinfo->cport_mutex);
 			return (SATA_SUCCESS);
 		}
+		mutex_enter(&pmportinfo->pmport_mutex);
 		pmportinfo->pmport_sata_drive->satadrv_state |=
 		    SATA_STATE_READY;
-	}
-	mutex_exit(&cportinfo->cport_mutex);
+		mutex_exit(&pmportinfo->pmport_mutex);
+
+		break;
+
+	default:
+		return (SATA_FAILURE);
+	}
+
 	return (SATA_SUCCESS);
 }
 
+/*
+ * Clean up target node at specific address.
+ *
+ * NOTE: No Mutex should be hold.
+ */
+static int
+sata_offline_device(sata_hba_inst_t *sata_hba_inst,
+    sata_device_t *sata_device, sata_drive_info_t *sdinfo)
+{
+	uint8_t cport, pmport, qual;
+	dev_info_t *tdip;
+
+	cport = sata_device->satadev_addr.cport;
+	pmport = sata_device->satadev_addr.pmport;
+	qual = sata_device->satadev_addr.qual;
+
+	if (qual == SATA_ADDR_DCPORT) {
+		SATA_LOG_D((sata_hba_inst, CE_WARN,
+		    "sata_hba_ioctl: disconnect device at port %d", cport));
+	} else {
+		SATA_LOG_D((sata_hba_inst, CE_WARN,
+		    "sata_hba_ioctl: disconnect device at port %d:%d",
+		    cport, pmport));
+	}
+
+	/* We are addressing attached device, not a port */
+	sata_device->satadev_addr.qual =
+	    sdinfo->satadrv_addr.qual;
+	tdip = sata_get_scsi_target_dip(SATA_DIP(sata_hba_inst),
+	    &sata_device->satadev_addr);
+	if (tdip != NULL && ndi_devi_offline(tdip,
+	    NDI_DEVI_REMOVE) != NDI_SUCCESS) {
+		/*
+		 * Problem :
+		 * The target node remained attached.
+		 * This happens when the device file was open
+		 * or a node was waiting for resources.
+		 * Cannot do anything about it.
+		 */
+		if (qual == SATA_ADDR_DCPORT) {
+			SATA_LOG_D((sata_hba_inst, CE_WARN,
+			    "sata_hba_ioctl: disconnect: could "
+			    "not unconfigure device before "
+			    "disconnecting the SATA port %d",
+			    cport));
+		} else {
+			SATA_LOG_D((sata_hba_inst, CE_WARN,
+			    "sata_hba_ioctl: disconnect: could "
+			    "not unconfigure device before "
+			    "disconnecting the SATA port %d:%d",
+			    cport, pmport));
+		}
+		/*
+		 * Set DEVICE REMOVED state in the target
+		 * node. It will prevent access to the device
+		 * even when a new device is attached, until
+		 * the old target node is released, removed and
+		 * recreated for a new  device.
+		 */
+		sata_set_device_removed(tdip);
+
+		/*
+		 * Instruct event daemon to try the target
+		 * node cleanup later.
+		 */
+		sata_set_target_node_cleanup(
+		    sata_hba_inst, &sata_device->satadev_addr);
+	}
+
+
+	return (SATA_SUCCESS);
+}
 
 
 /*
@@ -9230,8 +9567,6 @@
  *
  * A dev_info_t pointer is returned if operation is successful, NULL is
  * returned otherwise.
- *
- * No port multiplier support.
  */
 
 static dev_info_t *
@@ -9406,6 +9741,76 @@
 	return (NULL);
 }
 
+/*
+ * Remove a target node.
+ */
+static void
+sata_remove_target_node(sata_hba_inst_t *sata_hba_inst,
+			sata_address_t *sata_addr)
+{
+	dev_info_t *tdip;
+	uint8_t cport = sata_addr->cport;
+	uint8_t pmport = sata_addr->pmport;
+	uint8_t qual = sata_addr->qual;
+
+	/* Note the sata daemon uses the address of the port/pmport */
+	ASSERT(qual == SATA_ADDR_CPORT || qual == SATA_ADDR_PMPORT);
+
+	/* Remove target node */
+	tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), cport, pmport);
+	if (tdip != NULL) {
+		/*
+		 * Target node exists.  Unconfigure device
+		 * then remove the target node (one ndi
+		 * operation).
+		 */
+		if (ndi_devi_offline(tdip, NDI_DEVI_REMOVE) != NDI_SUCCESS) {
+			/*
+			 * PROBLEM - no device, but target node remained. This
+			 * happens when the file was open or node was waiting
+			 * for resources.
+			 */
+			SATA_LOG_D((sata_hba_inst, CE_WARN,
+			    "sata_remove_target_node: "
+			    "Failed to remove target node for "
+			    "detached SATA device."));
+			/*
+			 * Set target node state to DEVI_DEVICE_REMOVED. But
+			 * re-check first that the node still exists.
+			 */
+			tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst),
+			    cport, pmport);
+			if (tdip != NULL) {
+				sata_set_device_removed(tdip);
+				/*
+				 * Instruct event daemon to retry the cleanup
+				 * later.
+				 */
+				sata_set_target_node_cleanup(sata_hba_inst,
+				    sata_addr);
+			}
+		}
+
+		if (qual == SATA_ADDR_CPORT)
+			sata_log(sata_hba_inst, CE_WARN,
+			    "SATA device detached at port %d", cport);
+		else
+			sata_log(sata_hba_inst, CE_WARN,
+			    "SATA device detached at port %d:%d",
+			    cport, pmport);
+	}
+#ifdef SATA_DEBUG
+	else {
+		if (qual == SATA_ADDR_CPORT)
+			sata_log(sata_hba_inst, CE_WARN,
+			    "target node not found at port %d", cport);
+		else
+			sata_log(sata_hba_inst, CE_WARN,
+			    "target node not found at port %d:%d",
+			    cport, pmport);
+	}
+#endif
+}
 
 
 /*
@@ -9424,14 +9829,14 @@
  *
  * This function cannot be called in interrupt context - it may sleep.
  *
- * NOte: Port multiplier is not supported yet, although there may be some
- * pieces of code referencing to it.
+ * Note: Port multiplier is supported.
  */
 static int
 sata_reprobe_port(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device,
     int flag)
 {
 	sata_cport_info_t *cportinfo;
+	sata_pmult_info_t *pmultinfo;
 	sata_drive_info_t *sdinfo, *osdinfo;
 	boolean_t init_device = B_FALSE;
 	int prev_device_type = SATA_DTYPE_NONE;
@@ -9439,11 +9844,28 @@
 	int prev_device_state = 0;
 	clock_t start_time;
 	int retry = B_FALSE;
+	uint8_t cport = sata_device->satadev_addr.cport;
 	int rval_probe, rval_init;
 
+	/*
+	 * If target is pmport, sata_reprobe_pmport() will handle it.
+	 */
+	if (sata_device->satadev_addr.qual == SATA_ADDR_PMPORT ||
+	    sata_device->satadev_addr.qual == SATA_ADDR_DPMPORT)
+		return (sata_reprobe_pmport(sata_hba_inst, sata_device, flag));
+
 	/* We only care about host sata cport for now */
 	cportinfo = SATA_CPORT_INFO(sata_hba_inst,
 	    sata_device->satadev_addr.cport);
+
+	/*
+	 * If a port multiplier was previously attached (we have no idea it
+	 * still there or not), sata_reprobe_pmult() will handle it.
+	 */
+	if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT)
+		return (sata_reprobe_pmult(sata_hba_inst, sata_device, flag));
+
+	/* Store sata_drive_info when a non-pmult device was attached. */
 	osdinfo = SATA_CPORTINFO_DRV_INFO(cportinfo);
 	if (osdinfo != NULL) {
 		/*
@@ -9509,24 +9931,22 @@
 	}
 
 	cportinfo->cport_state |= SATA_STATE_READY;
+	cportinfo->cport_state |= SATA_STATE_PROBED;
+
 	cportinfo->cport_dev_type = sata_device->satadev_type;
 	sdinfo = SATA_CPORTINFO_DRV_INFO(cportinfo);
 
 	/*
 	 * If we are re-probing the port, there may be
 	 * sata_drive_info structure attached
-	 * (or sata_pm_info, if PMult is supported).
 	 */
 	if (sata_device->satadev_type == SATA_DTYPE_NONE) {
+
 		/*
 		 * There is no device, so remove device info structure,
 		 * if necessary.
-		 * Only direct attached drive is considered now, until
-		 * port multiplier is supported. If the previously
-		 * attached device was a port multiplier, we would need
-		 * to take care of devices attached beyond the port
-		 * multiplier.
-		 */
+		 */
+		/* Device change: Drive -> None */
 		SATA_CPORTINFO_DRV_INFO(cportinfo) = NULL;
 		cportinfo->cport_dev_type = SATA_DTYPE_NONE;
 		if (sdinfo != NULL) {
@@ -9537,9 +9957,12 @@
 		}
 		mutex_exit(&cportinfo->cport_mutex);
 		return (SATA_SUCCESS);
+
 	}
 
 	if (sata_device->satadev_type != SATA_DTYPE_PMULT) {
+
+		/* Device (may) change: Drive -> Drive */
 		if (sdinfo == NULL) {
 			/*
 			 * There is some device attached, but there is
@@ -9578,14 +10001,36 @@
 		cportinfo->cport_dev_type = SATA_DTYPE_UNKNOWN;
 		sata_device->satadev_addr.qual = sdinfo->satadrv_addr.qual;
 	} else {
-		/*
-		 * The device is a port multiplier - not handled now.
-		 */
-		cportinfo->cport_dev_type = SATA_DTYPE_UNKNOWN;
+		/* Device change: Drive -> PMult */
+		SATA_CPORTINFO_DRV_INFO(cportinfo) = NULL;
+		if (sdinfo != NULL) {
+			kmem_free(sdinfo, sizeof (sata_drive_info_t));
+			sata_log(sata_hba_inst, CE_WARN,
+			    "SATA device detached "
+			    "from port %d", cportinfo->cport_addr.cport);
+		}
+
+		sata_log(sata_hba_inst, CE_WARN,
+		    "SATA port multiplier detected at port %d",
+		    cportinfo->cport_addr.cport);
+
+		mutex_exit(&cportinfo->cport_mutex);
+		sata_alloc_pmult(sata_hba_inst, sata_device);
+		sata_show_pmult_info(sata_hba_inst, sata_device);
+		mutex_enter(&cportinfo->cport_mutex);
+
+		/*
+		 * Mark all the port multiplier port behind the port
+		 * multiplier behind with link events, so that the sata daemon
+		 * will update their status.
+		 */
+		pmultinfo = SATA_PMULT_INFO(sata_hba_inst, cport);
+		pmultinfo->pmult_event_flags |= SATA_EVNT_DEVICE_RESET;
 		mutex_exit(&cportinfo->cport_mutex);
 		return (SATA_SUCCESS);
 	}
 	mutex_exit(&cportinfo->cport_mutex);
+
 	/*
 	 * Figure out what kind of device we are really
 	 * dealing with. Failure of identifying device does not fail this
@@ -9675,6 +10120,621 @@
 }
 
 /*
+ * Reprobe a controller port that connected to a port multiplier.
+ *
+ * NOTE: No Mutex should be hold.
+ */
+static int
+sata_reprobe_pmult(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device,
+    int flag)
+{
+	_NOTE(ARGUNUSED(flag))
+	sata_cport_info_t *cportinfo;
+	sata_pmult_info_t *pmultinfo;
+	uint8_t cport = sata_device->satadev_addr.cport;
+	int rval_probe;
+
+	cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
+	pmultinfo = SATA_PMULT_INFO(sata_hba_inst, cport);
+
+	/* probe port */
+	mutex_enter(&cportinfo->cport_mutex);
+	cportinfo->cport_state &= ~SATA_PORT_STATE_CLEAR_MASK;
+	cportinfo->cport_state |= SATA_STATE_PROBING;
+	mutex_exit(&cportinfo->cport_mutex);
+
+	rval_probe = (*SATA_PROBE_PORT_FUNC(sata_hba_inst))
+	    (SATA_DIP(sata_hba_inst), sata_device);
+
+	mutex_enter(&cportinfo->cport_mutex);
+	if (rval_probe != SATA_SUCCESS) {
+		cportinfo->cport_state = SATA_PSTATE_FAILED;
+		SATA_LOG_D((sata_hba_inst, CE_WARN, "sata_reprobe_pmult: "
+		    "SATA port %d probing failed", cport));
+		sata_log(sata_hba_inst, CE_WARN,
+		    "SATA port multiplier detached at port %d", cport);
+		mutex_exit(&cportinfo->cport_mutex);
+		sata_free_pmult(sata_hba_inst, sata_device);
+		return (SATA_FAILURE);
+	}
+
+	/*
+	 * update sata port state and set device type
+	 */
+	sata_update_port_info(sata_hba_inst, sata_device);
+	cportinfo->cport_state &= ~SATA_STATE_PROBING;
+	cportinfo->cport_state |= SATA_STATE_PROBED;
+
+	/*
+	 * Sanity check - Port is active? Is the link active?
+	 * Is there any device attached?
+	 */
+	if ((cportinfo->cport_state &
+	    (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) ||
+	    (cportinfo->cport_scr.sstatus & SATA_PORT_DEVLINK_UP_MASK) !=
+	    SATA_PORT_DEVLINK_UP ||
+	    (sata_device->satadev_type == SATA_DTYPE_NONE)) {
+		cportinfo->cport_dev_type = SATA_DTYPE_NONE;
+		mutex_exit(&cportinfo->cport_mutex);
+		sata_free_pmult(sata_hba_inst, sata_device);
+		sata_log(sata_hba_inst, CE_WARN,
+		    "SATA port multiplier detached at port %d", cport);
+		return (SATA_SUCCESS);
+	}
+
+	/*
+	 * Device changed: PMult -> Non-PMult
+	 *
+	 * This situation is uncommon, most possibly being caused by errors
+	 * after which the port multiplier is not correct initialized and
+	 * recognized. In that case the new device will be marked as unknown
+	 * and will not be automatically probed in this routine. Instead
+	 * system administrator could manually restart it via cfgadm(1M).
+	 */
+	if (sata_device->satadev_type != SATA_DTYPE_PMULT) {
+		cportinfo->cport_dev_type = SATA_DTYPE_UNKNOWN;
+		mutex_exit(&cportinfo->cport_mutex);
+		sata_free_pmult(sata_hba_inst, sata_device);
+		sata_log(sata_hba_inst, CE_WARN,
+		    "SATA port multiplier detached at port %d", cport);
+		return (SATA_FAILURE);
+	}
+
+	/*
+	 * Now we know it is a port multiplier. However, if this is not the
+	 * previously attached port multiplier - they may have different
+	 * pmport numbers - we need to re-allocate data structures for every
+	 * pmport and drive.
+	 *
+	 * Port multipliers of the same model have identical values in these
+	 * registers, so it is still necessary to update the information of
+	 * all drives attached to the previous port multiplier afterwards.
+	 */
+	if ((sata_device->satadev_gscr.gscr0 != pmultinfo->pmult_gscr.gscr0) ||
+	    (sata_device->satadev_gscr.gscr1 != pmultinfo->pmult_gscr.gscr1) ||
+	    (sata_device->satadev_gscr.gscr2 != pmultinfo->pmult_gscr.gscr2)) {
+
+		/* Device changed: PMult -> another PMult */
+		mutex_exit(&cportinfo->cport_mutex);
+		sata_free_pmult(sata_hba_inst, sata_device);
+		sata_alloc_pmult(sata_hba_inst, sata_device);
+		mutex_enter(&cportinfo->cport_mutex);
+
+		SATADBG1(SATA_DBG_PMULT, sata_hba_inst,
+		    "SATA port multiplier [changed] at port %d", cport);
+		sata_log(sata_hba_inst, CE_WARN,
+		    "SATA port multiplier detected at port %d", cport);
+	}
+
+	/*
+	 * Mark all the port multiplier port behind the port
+	 * multiplier behind with link events, so that the sata daemon
+	 * will update their status.
+	 */
+	pmultinfo->pmult_event_flags |= SATA_EVNT_DEVICE_RESET;
+	mutex_exit(&cportinfo->cport_mutex);
+
+	return (SATA_SUCCESS);
+}
+
+/*
+ * Re-probe a port multiplier port, check for a device and attach info
+ * structures when necessary. Identify Device data is fetched, if possible.
+ * Assumption: sata address is already validated as port multiplier port.
+ * SATA_SUCCESS is returned if port is re-probed sucessfully, regardless of
+ * the presence of a device and its type.
+ *
+ * flag arg specifies that the function should try multiple times to identify
+ * device type and to initialize it, or it should return immediately on failure.
+ * SATA_DEV_IDENTIFY_RETRY - retry
+ * SATA_DEV_IDENTIFY_NORETRY - no retry
+ *
+ * SATA_FAILURE is returned if one of the operations failed.
+ *
+ * This function cannot be called in interrupt context - it may sleep.
+ *
+ * NOTE: Should be only called by sata_probe_port() in case target port is a
+ *       port multiplier port.
+ * NOTE: No Mutex should be hold.
+ */
+static int
+sata_reprobe_pmport(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device,
+    int flag)
+{
+	sata_cport_info_t *cportinfo = NULL;
+	sata_pmport_info_t *pmportinfo = NULL;
+	sata_drive_info_t *sdinfo, *osdinfo;
+	sata_device_t sdevice;
+	boolean_t init_device = B_FALSE;
+	int prev_device_type = SATA_DTYPE_NONE;
+	int prev_device_settings = 0;
+	int prev_device_state = 0;
+	clock_t start_time;
+	uint8_t cport = sata_device->satadev_addr.cport;
+	uint8_t pmport = sata_device->satadev_addr.pmport;
+	int rval;
+
+	cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
+	pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport);
+	osdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo);
+
+	if (osdinfo != NULL) {
+		/*
+		 * We are re-probing port with a previously attached device.
+		 * Save previous device type and settings.
+		 */
+		prev_device_type = pmportinfo->pmport_dev_type;
+		prev_device_settings = osdinfo->satadrv_settings;
+		prev_device_state = osdinfo->satadrv_state;
+	}
+
+	start_time = ddi_get_lbolt();
+
+	/* check parent status */
+	mutex_enter(&cportinfo->cport_mutex);
+	if ((cportinfo->cport_state &
+	    (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) ||
+	    (cportinfo->cport_scr.sstatus & SATA_PORT_DEVLINK_UP_MASK) !=
+	    SATA_PORT_DEVLINK_UP) {
+		mutex_exit(&cportinfo->cport_mutex);
+		return (SATA_FAILURE);
+	}
+	mutex_exit(&cportinfo->cport_mutex);
+
+retry_probe_pmport:
+
+	/* probe port */
+	mutex_enter(&pmportinfo->pmport_mutex);
+	pmportinfo->pmport_state &= ~SATA_PORT_STATE_CLEAR_MASK;
+	pmportinfo->pmport_state |= SATA_STATE_PROBING;
+	mutex_exit(&pmportinfo->pmport_mutex);
+
+	rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst))
+	    (SATA_DIP(sata_hba_inst), sata_device);
+
+	/* might need retry because we cannot touch registers. */
+	if (rval == SATA_FAILURE) {
+		mutex_enter(&pmportinfo->pmport_mutex);
+		pmportinfo->pmport_state = SATA_PSTATE_FAILED;
+		mutex_exit(&pmportinfo->pmport_mutex);
+		SATA_LOG_D((sata_hba_inst, CE_WARN, "sata_reprobe_pmport: "
+		    "SATA port %d:%d probing failed",
+		    cport, pmport));
+		return (SATA_FAILURE);
+	} else if (rval == SATA_RETRY) {
+		SATA_LOG_D((sata_hba_inst, CE_WARN, "sata_reprobe_pmport: "
+		    "SATA port %d:%d probing failed, retrying...",
+		    cport, pmport));
+		clock_t cur_time = ddi_get_lbolt();
+		/*
+		 * A device was not successfully identified or initialized.
+		 * Track retry time for device identification.
+		 */
+		if ((cur_time - start_time) <
+		    drv_usectohz(SATA_DEV_REPROBE_TIMEOUT)) {
+			/* sleep for a while */
+			delay(drv_usectohz(SATA_DEV_RETRY_DLY));
+			goto retry_probe_pmport;
+		} else {
+			mutex_enter(&pmportinfo->pmport_mutex);
+			if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL)
+				SATA_PMPORTINFO_DRV_INFO(pmportinfo)->
+				    satadrv_state = SATA_DSTATE_FAILED;
+			mutex_exit(&pmportinfo->pmport_mutex);
+			return (SATA_SUCCESS);
+		}
+	}
+
+	/*
+	 * Sanity check - Controller port is active? Is the link active?
+	 * Is it still a port multiplier?
+	 */
+	if ((cportinfo->cport_state &
+	    (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) ||
+	    (cportinfo->cport_scr.sstatus & SATA_PORT_DEVLINK_UP_MASK) !=
+	    SATA_PORT_DEVLINK_UP ||
+	    (cportinfo->cport_dev_type != SATA_DTYPE_PMULT)) {
+		/*
+		 * Port in non-usable state or no link active/no
+		 * device. Free info structure.
+		 */
+		cportinfo->cport_dev_type = SATA_DTYPE_UNKNOWN;
+
+		sdevice.satadev_addr.cport = cport;
+		sdevice.satadev_addr.pmport = pmport;
+		sdevice.satadev_addr.qual = SATA_ADDR_PMULT;
+		mutex_exit(&cportinfo->cport_mutex);
+
+		sata_free_pmult(sata_hba_inst, &sdevice);
+		return (SATA_FAILURE);
+	}
+
+	/* SATA_SUCCESS NOW */
+	/*
+	 * update sata port state and set device type
+	 */
+	mutex_enter(&pmportinfo->pmport_mutex);
+	sata_update_pmport_info(sata_hba_inst, sata_device);
+	pmportinfo->pmport_state &= ~SATA_STATE_PROBING;
+
+	/*
+	 * Sanity check - Port is active? Is the link active?
+	 * Is there any device attached?
+	 */
+	if ((pmportinfo->pmport_state &
+	    (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) ||
+	    (pmportinfo->pmport_scr.sstatus & SATA_PORT_DEVLINK_UP_MASK) !=
+	    SATA_PORT_DEVLINK_UP) {
+		/*
+		 * Port in non-usable state or no link active/no device.
+		 * Free info structure if necessary (direct attached drive
+		 * only, for now!
+		 */
+		sdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo);
+		SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL;
+		/* Add here differentiation for device attached or not */
+		pmportinfo->pmport_dev_type = SATA_DTYPE_NONE;
+		mutex_exit(&pmportinfo->pmport_mutex);
+		if (sdinfo != NULL)
+			kmem_free(sdinfo, sizeof (sata_drive_info_t));
+		return (SATA_SUCCESS);
+	}
+
+	pmportinfo->pmport_state |= SATA_STATE_READY;
+	pmportinfo->pmport_dev_type = sata_device->satadev_type;
+	sdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo);
+
+	/*
+	 * If we are re-probing the port, there may be
+	 * sata_drive_info structure attached
+	 * (or sata_pm_info, if PMult is supported).
+	 */
+	if (sata_device->satadev_type == SATA_DTYPE_NONE) {
+		/*
+		 * There is no device, so remove device info structure,
+		 * if necessary.
+		 */
+		SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL;
+		pmportinfo->pmport_dev_type = SATA_DTYPE_NONE;
+		if (sdinfo != NULL) {
+			kmem_free(sdinfo, sizeof (sata_drive_info_t));
+			sata_log(sata_hba_inst, CE_WARN,
+			    "SATA device detached from port %d:%d",
+			    cport, pmport);
+		}
+		mutex_exit(&pmportinfo->pmport_mutex);
+		return (SATA_SUCCESS);
+	}
+
+	/* this should not be a pmult */
+	ASSERT(sata_device->satadev_type != SATA_DTYPE_PMULT);
+	if (sdinfo == NULL) {
+		/*
+		 * There is some device attached, but there is
+		 * no sata_drive_info structure - allocate one
+		 */
+		mutex_exit(&pmportinfo->pmport_mutex);
+		sdinfo = kmem_zalloc(sizeof (sata_drive_info_t),
+		    KM_SLEEP);
+		mutex_enter(&pmportinfo->pmport_mutex);
+		/*
+		 * Recheck, that the port state did not change when we
+		 * released mutex.
+		 */
+		if (pmportinfo->pmport_state & SATA_STATE_READY) {
+			SATA_PMPORTINFO_DRV_INFO(pmportinfo) = sdinfo;
+			sdinfo->satadrv_addr = pmportinfo->pmport_addr;
+			sdinfo->satadrv_addr.qual = SATA_ADDR_DPMPORT;
+			sdinfo->satadrv_type = SATA_DTYPE_UNKNOWN;
+			sdinfo->satadrv_state = SATA_STATE_UNKNOWN;
+		} else {
+			/*
+			 * Port is not in ready state, we
+			 * cannot attach a device.
+			 */
+			mutex_exit(&pmportinfo->pmport_mutex);
+			kmem_free(sdinfo, sizeof (sata_drive_info_t));
+			return (SATA_SUCCESS);
+		}
+		/*
+		 * Since we are adding device, presumably new one,
+		 * indicate that it  should be initalized,
+		 * as well as some internal framework states).
+		 */
+		init_device = B_TRUE;
+	}
+
+	pmportinfo->pmport_dev_type = SATA_DTYPE_UNKNOWN;
+	sata_device->satadev_addr.qual = sdinfo->satadrv_addr.qual;
+
+	mutex_exit(&pmportinfo->pmport_mutex);
+	/*
+	 * Figure out what kind of device we are really
+	 * dealing with.
+	 */
+	rval = sata_probe_device(sata_hba_inst, sata_device);
+
+	mutex_enter(&pmportinfo->pmport_mutex);
+	if (rval == SATA_SUCCESS) {
+		/*
+		 * If we are dealing with the same type of a device as before,
+		 * restore its settings flags.
+		 */
+		if (osdinfo != NULL &&
+		    sata_device->satadev_type == prev_device_type)
+			sdinfo->satadrv_settings = prev_device_settings;
+
+		mutex_exit(&pmportinfo->pmport_mutex);
+		/* Set initial device features, if necessary */
+		if (init_device == B_TRUE) {
+			rval = sata_initialize_device(sata_hba_inst, sdinfo);
+		}
+		if (rval == SATA_SUCCESS)
+			return (rval);
+	} else {
+		/*
+		 * If there was some device info before we probe the device,
+		 * restore previous device setting, so we can retry from scratch
+		 * later. Providing, of course, that device has not disappeared
+		 * during probing process.
+		 */
+		if (sata_device->satadev_type != SATA_DTYPE_NONE) {
+			if (osdinfo != NULL) {
+				pmportinfo->pmport_dev_type = prev_device_type;
+				sdinfo->satadrv_type = prev_device_type;
+				sdinfo->satadrv_state = prev_device_state;
+			}
+		} else {
+			/* device is gone */
+			kmem_free(sdinfo, sizeof (sata_drive_info_t));
+			pmportinfo->pmport_dev_type = SATA_DTYPE_NONE;
+			SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL;
+			mutex_exit(&pmportinfo->pmport_mutex);
+			return (SATA_SUCCESS);
+		}
+		mutex_exit(&pmportinfo->pmport_mutex);
+	}
+
+	if (flag == SATA_DEV_IDENTIFY_RETRY) {
+		clock_t cur_time = ddi_get_lbolt();
+		/*
+		 * A device was not successfully identified or initialized.
+		 * Track retry time for device identification.
+		 */
+		if ((cur_time - start_time) <
+		    drv_usectohz(SATA_DEV_REPROBE_TIMEOUT)) {
+			/* sleep for a while */
+			delay(drv_usectohz(SATA_DEV_RETRY_DLY));
+			goto retry_probe_pmport;
+		} else {
+			mutex_enter(&pmportinfo->pmport_mutex);
+			if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL)
+				SATA_PMPORTINFO_DRV_INFO(pmportinfo)->
+				    satadrv_state = SATA_DSTATE_FAILED;
+			mutex_exit(&pmportinfo->pmport_mutex);
+		}
+	}
+	return (SATA_SUCCESS);
+}
+
+/*
+ * Allocated related structure for a port multiplier and its device ports
+ *
+ * Port multiplier should be ready and probed, and related information like
+ * the number of the device ports should be store in sata_device_t.
+ *
+ * NOTE: No Mutex should be hold.
+ */
+static void
+sata_alloc_pmult(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device)
+{
+	dev_info_t *dip = SATA_DIP(sata_hba_inst);
+	sata_cport_info_t *cportinfo = NULL;
+	sata_pmult_info_t *pmultinfo = NULL;
+	sata_pmport_info_t *pmportinfo = NULL;
+	dev_t minor_number;
+	char name[16];
+	uint8_t cport = sata_device->satadev_addr.cport;
+	int npmport;
+
+	cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
+
+	/* This function might be called while a port-mult is hot-plugged. */
+	mutex_enter(&cportinfo->cport_mutex);
+
+	/* dev_type's not updated when get called from sata_reprobe_port() */
+	cportinfo->cport_dev_type = SATA_DTYPE_PMULT;
+	if (SATA_CPORTINFO_PMULT_INFO(cportinfo) == NULL) {
+		/* Create a pmult_info structure */
+		SATA_CPORTINFO_PMULT_INFO(cportinfo) =
+		    kmem_zalloc(sizeof (sata_pmult_info_t), KM_SLEEP);
+	}
+	pmultinfo = SATA_CPORTINFO_PMULT_INFO(cportinfo);
+
+	pmultinfo->pmult_addr = sata_device->satadev_addr;
+	pmultinfo->pmult_addr.qual = SATA_ADDR_PMULT;
+	pmultinfo->pmult_state = SATA_STATE_PROBING;
+	pmultinfo->pmult_gscr = sata_device->satadev_gscr;
+	pmultinfo->pmult_num_dev_ports = sata_device->satadev_add_info;
+
+	/* Initialize pmport_info structure */
+	for (npmport = 0; npmport < pmultinfo->pmult_num_dev_ports;
+	    npmport++) {
+
+		/* if everything is allocated, skip */
+		if (SATA_PMPORT_INFO(sata_hba_inst, cport, npmport) != NULL)
+			continue;
+
+		pmportinfo = kmem_zalloc(sizeof (sata_pmport_info_t), KM_SLEEP);
+		mutex_init(&pmportinfo->pmport_mutex, NULL, MUTEX_DRIVER, NULL);
+		mutex_exit(&cportinfo->cport_mutex);
+
+		mutex_enter(&pmportinfo->pmport_mutex);
+		pmportinfo->pmport_addr.cport = cport;
+		pmportinfo->pmport_addr.pmport = (uint8_t)npmport;
+		pmportinfo->pmport_addr.qual = SATA_ADDR_PMPORT;
+		pmportinfo->pmport_state &= ~SATA_PORT_STATE_CLEAR_MASK;
+		mutex_exit(&pmportinfo->pmport_mutex);
+
+		mutex_enter(&cportinfo->cport_mutex);
+		SATA_PMPORT_INFO(sata_hba_inst, cport, npmport) = pmportinfo;
+
+		/* Create an attachment point */
+		minor_number = SATA_MAKE_AP_MINOR(ddi_get_instance(dip),
+		    cport, (uint8_t)npmport, SATA_ADDR_PMPORT);
+		(void) sprintf(name, "%d.%d", cport, npmport);
+
+		if (ddi_create_minor_node(dip, name, S_IFCHR, minor_number,
+		    DDI_NT_SATA_ATTACHMENT_POINT, 0) != DDI_SUCCESS) {
+			sata_log(sata_hba_inst, CE_WARN, "sata_hba_attach: "
+			    "cannot create SATA attachment point for "
+			    "port %d:%d", cport, npmport);
+		}
+	}
+
+	pmultinfo->pmult_state &= ~SATA_STATE_PROBING;
+	pmultinfo->pmult_state |= (SATA_STATE_PROBED|SATA_STATE_READY);
+
+	mutex_exit(&cportinfo->cport_mutex);
+}
+
+/*
+ * Free data structures when a port multiplier is removed.
+ *
+ * NOTE: No Mutex should be hold.
+ */
+static void
+sata_free_pmult(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device)
+{
+	sata_cport_info_t *cportinfo;
+	sata_pmult_info_t *pmultinfo;
+	sata_pmport_info_t *pmportinfo;
+	sata_device_t pmport_device;
+	sata_drive_info_t *sdinfo;
+	dev_info_t *tdip;
+	char name[16];
+	uint8_t cport = sata_device->satadev_addr.cport;
+	int npmport;
+
+	cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
+
+	/* This function might be called while port-mult is hot plugged. */
+	mutex_enter(&cportinfo->cport_mutex);
+
+	pmultinfo = SATA_CPORTINFO_PMULT_INFO(cportinfo);
+	ASSERT(pmultinfo != NULL);
+
+	/* Free pmport_info structure */
+	for (npmport = 0; npmport < pmultinfo->pmult_num_dev_ports;
+	    npmport++) {
+		pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, npmport);
+		if (pmportinfo == NULL)
+			continue;
+		mutex_exit(&cportinfo->cport_mutex);
+
+		mutex_enter(&pmportinfo->pmport_mutex);
+		sdinfo = pmportinfo->pmport_sata_drive;
+		SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL;
+		mutex_exit(&pmportinfo->pmport_mutex);
+
+		/* Remove attachment point. */
+		name[0] = '\0';
+		(void) sprintf(name, "%d.%d", cport, npmport);
+		ddi_remove_minor_node(SATA_DIP(sata_hba_inst), name);
+		sata_log(sata_hba_inst, CE_NOTE,
+		    "Remove attachment point of port %d:%d",
+		    cport, npmport);
+
+		/*
+		 * Rumove target node
+		 */
+		bzero(&pmport_device, sizeof (sata_device_t));
+		pmport_device.satadev_rev = SATA_DEVICE_REV;
+		pmport_device.satadev_addr.cport = cport;
+		pmport_device.satadev_addr.pmport = (uint8_t)npmport;
+		pmport_device.satadev_addr.qual = SATA_ADDR_DPMPORT;
+
+		tdip = sata_get_scsi_target_dip(SATA_DIP(sata_hba_inst),
+		    &(pmport_device.satadev_addr));
+		if (tdip != NULL && ndi_devi_offline(tdip,
+		    NDI_DEVI_REMOVE) != NDI_SUCCESS) {
+			/*
+			 * Problem :
+			 * The target node remained attached.
+			 * This happens when the device file was open
+			 * or a node was waiting for resources.
+			 * Cannot do anything about it.
+			 */
+			SATA_LOG_D((sata_hba_inst, CE_WARN,
+			    "sata_free_pmult: could not unconfigure device "
+			    "before disconnecting the SATA port %d:%d",
+			    cport, npmport));
+
+			/*
+			 * Set DEVICE REMOVED state in the target
+			 * node. It will prevent access to the device
+			 * even when a new device is attached, until
+			 * the old target node is released, removed and
+			 * recreated for a new  device.
+			 */
+			sata_set_device_removed(tdip);
+
+			/*
+			 * Instruct event daemon to try the target
+			 * node cleanup later.
+			 */
+			sata_set_target_node_cleanup(
+			    sata_hba_inst, &(pmport_device.satadev_addr));
+
+		}
+		mutex_enter(&cportinfo->cport_mutex);
+
+		/*
+		 * Add here differentiation for device attached or not
+		 */
+		if (sdinfo != NULL)  {
+			sata_log(sata_hba_inst, CE_WARN,
+			    "SATA device detached from port %d:%d",
+			    cport, npmport);
+			kmem_free(sdinfo, sizeof (sata_drive_info_t));
+		}
+
+		mutex_destroy(&pmportinfo->pmport_mutex);
+		kmem_free(pmportinfo, sizeof (sata_pmport_info_t));
+	}
+
+	kmem_free(pmultinfo, sizeof (sata_pmult_info_t));
+
+	cportinfo->cport_devp.cport_sata_pmult = NULL;
+	cportinfo->cport_dev_type = SATA_DTYPE_NONE;
+
+	sata_log(sata_hba_inst, CE_WARN,
+	    "SATA port multiplier detached at port %d", cport);
+
+	mutex_exit(&cportinfo->cport_mutex);
+}
+
+/*
  * Initialize device
  * Specified device is initialized to a default state.
  *
@@ -9826,6 +10886,8 @@
  * returns 1 if address is valid but device is not attached,
  * returns -1 if bad address or device is of an unsupported type.
  * Upon return sata_device argument is set.
+ *
+ * Port multiplier is supported now.
  */
 static int
 sata_validate_scsi_address(sata_hba_inst_t *sata_hba_inst,
@@ -9859,8 +10921,7 @@
 			    cportinfo->cport_dev_type == SATA_DTYPE_NONE)
 				goto out;
 
-			if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT ||
-			    (cportinfo->cport_dev_type &
+			if ((cportinfo->cport_dev_type &
 			    SATA_VALID_DEV_TYPE) == 0) {
 				rval = -1;
 				goto out;
@@ -9965,6 +11026,7 @@
 static int
 sata_probe_device(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device)
 {
+	sata_pmport_info_t *pmportinfo;
 	sata_drive_info_t *sdinfo;
 	sata_drive_info_t new_sdinfo;	/* local drive info struct */
 	int rval;
@@ -9978,6 +11040,13 @@
 	mutex_enter(&(SATA_CPORT_MUTEX(sata_hba_inst,
 	    sata_device->satadev_addr.cport)));
 
+	if (sata_device->satadev_addr.qual == SATA_ADDR_DPMPORT) {
+		pmportinfo = SATA_PMPORT_INFO(sata_hba_inst,
+		    sata_device->satadev_addr.cport,
+		    sata_device->satadev_addr.pmport);
+		ASSERT(pmportinfo != NULL);
+	}
+
 	/* Get pointer to port-linked sata device info structure */
 	sdinfo = sata_get_device_info(sata_hba_inst, sata_device);
 	if (sdinfo != NULL) {
@@ -10041,11 +11110,14 @@
 			SATA_CPORT_DEV_TYPE(sata_hba_inst,
 			    sata_device->satadev_addr.cport) =
 			    sdinfo->satadrv_type;
-		else /* SATA_ADDR_DPMPORT */
+		else { /* SATA_ADDR_DPMPORT */
+			mutex_enter(&pmportinfo->pmport_mutex);
 			SATA_PMPORT_DEV_TYPE(sata_hba_inst,
 			    sata_device->satadev_addr.cport,
 			    sata_device->satadev_addr.pmport) =
 			    sdinfo->satadrv_type;
+			mutex_exit(&pmportinfo->pmport_mutex);
+		}
 		mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst,
 		    sata_device->satadev_addr.cport)));
 		return (SATA_SUCCESS);
@@ -10069,6 +11141,7 @@
 			    SATA_DTYPE_UNKNOWN;
 		else {
 			/* SATA_ADDR_DPMPORT */
+			mutex_enter(&pmportinfo->pmport_mutex);
 			if ((SATA_PMULT_INFO(sata_hba_inst,
 			    sata_device->satadev_addr.cport) != NULL) &&
 			    (SATA_PMPORT_INFO(sata_hba_inst,
@@ -10078,6 +11151,7 @@
 				    sata_device->satadev_addr.cport,
 				    sata_device->satadev_addr.pmport) =
 				    SATA_DTYPE_UNKNOWN;
+			mutex_exit(&pmportinfo->pmport_mutex);
 		}
 	}
 	mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst,
@@ -10085,7 +11159,6 @@
 	return (rval);
 }
 
-
 /*
  * Get pointer to sata_drive_info structure.
  *
@@ -10132,6 +11205,11 @@
 		if (pmport > SATA_NUM_PMPORTS(sata_hba_inst, cport))
 			return (NULL);
 
+		if (!(SATA_PMPORT_STATE(sata_hba_inst, cport, pmport) &
+		    (SATA_STATE_PROBED | SATA_STATE_READY)))
+			/* Port multiplier port not probed yet */
+			return (NULL);
+
 		return (SATA_PMPORT_DRV_INFO(sata_hba_inst, cport, pmport));
 	}
 
@@ -10300,7 +11378,7 @@
 		cmn_err(CE_CONT, "?\t%s port %d\n",
 		    msg_buf, sdinfo->satadrv_addr.cport);
 	else
-		cmn_err(CE_CONT, "?\t%s port %d pmport %d\n",
+		cmn_err(CE_CONT, "?\t%s port %d:%d\n",
 		    msg_buf, sdinfo->satadrv_addr.cport,
 		    sdinfo->satadrv_addr.pmport);
 
@@ -10414,6 +11492,59 @@
 	}
 }
 
+/*
+ * Log/display port multiplier information
+ */
+static void
+sata_show_pmult_info(sata_hba_inst_t *sata_hba_inst,
+    sata_device_t *sata_device)
+{
+	_NOTE(ARGUNUSED(sata_hba_inst))
+
+	char msg_buf[MAXPATHLEN];
+	uint32_t gscr0, gscr1, gscr2, gscr64;
+
+	gscr0 = sata_device->satadev_gscr.gscr0;
+	gscr1 = sata_device->satadev_gscr.gscr1;
+	gscr2 = sata_device->satadev_gscr.gscr2;
+	gscr64 = sata_device->satadev_gscr.gscr64;
+
+	cmn_err(CE_CONT, "?Port Multiplier %d device-ports found at port %d",
+	    sata_device->satadev_add_info, sata_device->satadev_addr.cport);
+
+	(void) sprintf(msg_buf, "\tVendor_ID 0x%04x, Module_ID 0x%04x",
+	    gscr0 & 0xffff, (gscr0 >> 16) & 0xffff);
+	cmn_err(CE_CONT, "?%s", msg_buf);
+
+	(void) strcpy(msg_buf, "\tSupport SATA PMP Spec ");
+	if (gscr1 & (1 << 3))
+		(void) strlcat(msg_buf, "1.2", MAXPATHLEN);
+	else if (gscr1 & (1 << 2))
+		(void) strlcat(msg_buf, "1.1", MAXPATHLEN);
+	else if (gscr1 & (1 << 1))
+		(void) strlcat(msg_buf, "1.0", MAXPATHLEN);
+	else
+		(void) strlcat(msg_buf, "unknown", MAXPATHLEN);
+	cmn_err(CE_CONT, "?%s", msg_buf);
+
+	(void) strcpy(msg_buf, "\tSupport ");
+	if (gscr64 & (1 << 3))
+		(void) strlcat(msg_buf, "Asy-Notif, ",
+		    MAXPATHLEN);
+	if (gscr64 & (1 << 2))
+		(void) strlcat(msg_buf, "Dyn-SSC, ", MAXPATHLEN);
+	if (gscr64 & (1 << 1))
+		(void) strlcat(msg_buf, "Iss-PMREQ, ", MAXPATHLEN);
+	if (gscr64 & (1 << 0))
+		(void) strlcat(msg_buf, "BIST", MAXPATHLEN);
+	if ((gscr64 & 0xf) == 0)
+		(void) strlcat(msg_buf, "nothing", MAXPATHLEN);
+	cmn_err(CE_CONT, "?%s", msg_buf);
+
+	(void) sprintf(msg_buf, "\tNumber of exposed device fan-out ports: %d",
+	    gscr2 & SATA_PMULT_PORTNUM_MASK);
+	cmn_err(CE_CONT, "?%s", msg_buf);
+}
 
 /*
  * sata_save_drive_settings extracts current setting of the device and stores
@@ -11128,7 +12259,6 @@
 
 	sata_common_free_dma_rsrcs(spx);
 }
-
 /*
  * Fetch Device Identify data.
  * Send DEVICE IDENTIFY or IDENTIFY PACKET DEVICE (depending on a device type)
@@ -11631,19 +12761,6 @@
 
 
 /*
- * Update port SCR block
- */
-static void
-sata_update_port_scr(sata_port_scr_t *port_scr, sata_device_t *device)
-{
-	port_scr->sstatus = device->satadev_scr.sstatus;
-	port_scr->serror = device->satadev_scr.serror;
-	port_scr->scontrol = device->satadev_scr.scontrol;
-	port_scr->sactive = device->satadev_scr.sactive;
-	port_scr->snotific = device->satadev_scr.snotific;
-}
-
-/*
  * Update state and copy port ss* values from passed sata_device structure.
  * sata_address is validated - if not valid, nothing is changed in sata_scsi
  * configuration struct.
@@ -11655,54 +12772,60 @@
  */
 static void
 sata_update_port_info(sata_hba_inst_t *sata_hba_inst,
-	sata_device_t *sata_device)
-{
-	ASSERT(mutex_owned(&SATA_CPORT_MUTEX(sata_hba_inst,
-	    sata_device->satadev_addr.cport)));
+    sata_device_t *sata_device)
+{
+	sata_cport_info_t *cportinfo;
 
 	if (sata_device->satadev_addr.qual == SATA_ADDR_CPORT ||
 	    sata_device->satadev_addr.qual == SATA_ADDR_DCPORT) {
-
-		sata_cport_info_t *cportinfo;
-
 		if (SATA_NUM_CPORTS(sata_hba_inst) <=
 		    sata_device->satadev_addr.cport)
 			return;
 
 		cportinfo = SATA_CPORT_INFO(sata_hba_inst,
 		    sata_device->satadev_addr.cport);
-		sata_update_port_scr(&cportinfo->cport_scr, sata_device);
+
+		ASSERT(mutex_owned(&cportinfo->cport_mutex));
+		cportinfo->cport_scr = sata_device->satadev_scr;
 
 		/* Preserve SATA_PSTATE_SHUTDOWN flag */
 		cportinfo->cport_state &= ~(SATA_PSTATE_PWRON |
 		    SATA_PSTATE_PWROFF | SATA_PSTATE_FAILED);
 		cportinfo->cport_state |=
 		    sata_device->satadev_state & SATA_PSTATE_VALID;
-	} else {
-		sata_pmport_info_t *pmportinfo;
-
-		if ((sata_device->satadev_addr.qual != SATA_ADDR_PMPORT) ||
-		    (sata_device->satadev_addr.qual != SATA_ADDR_DPMPORT) ||
-		    SATA_NUM_PMPORTS(sata_hba_inst,
-		    sata_device->satadev_addr.cport) <
-		    sata_device->satadev_addr.pmport)
-			return;
-
-		pmportinfo = SATA_PMPORT_INFO(sata_hba_inst,
-		    sata_device->satadev_addr.cport,
-		    sata_device->satadev_addr.pmport);
-		sata_update_port_scr(&pmportinfo->pmport_scr, sata_device);
-
-		/* Preserve SATA_PSTATE_SHUTDOWN flag */
-		pmportinfo->pmport_state &=
-		    ~(SATA_PSTATE_PWRON | SATA_PSTATE_PWROFF |
-		    SATA_PSTATE_FAILED);
-		pmportinfo->pmport_state |=
-		    sata_device->satadev_state & SATA_PSTATE_VALID;
-	}
-}
-
-
+	}
+}
+
+void
+sata_update_pmport_info(sata_hba_inst_t *sata_hba_inst,
+    sata_device_t *sata_device)
+{
+	sata_pmport_info_t *pmportinfo;
+
+	if ((sata_device->satadev_addr.qual != SATA_ADDR_PMPORT &&
+	    sata_device->satadev_addr.qual != SATA_ADDR_DPMPORT) ||
+	    SATA_NUM_PMPORTS(sata_hba_inst,
+	    sata_device->satadev_addr.cport) <
+	    sata_device->satadev_addr.pmport) {
+		SATADBG1(SATA_DBG_PMULT, sata_hba_inst,
+		    "sata_update_port_info: error address %p.",
+		    &sata_device->satadev_addr);
+		return;
+	}
+
+	pmportinfo = SATA_PMPORT_INFO(sata_hba_inst,
+	    sata_device->satadev_addr.cport,
+	    sata_device->satadev_addr.pmport);
+
+	ASSERT(mutex_owned(&pmportinfo->pmport_mutex));
+	pmportinfo->pmport_scr = sata_device->satadev_scr;
+
+	/* Preserve SATA_PSTATE_SHUTDOWN flag */
+	pmportinfo->pmport_state &=
+	    ~(SATA_PSTATE_PWRON | SATA_PSTATE_PWROFF | SATA_PSTATE_FAILED);
+	pmportinfo->pmport_state |=
+	    sata_device->satadev_state & SATA_PSTATE_VALID;
+}
 
 /*
  * Extract SATA port specification from an IOCTL argument.
@@ -11710,7 +12833,7 @@
  * This function return the port the user land send us as is, unless it
  * cannot retrieve port spec, then -1 is returned.
  *
- * Note: Only cport  - no port multiplier port.
+ * Support port multiplier.
  */
 static int32_t
 sata_get_port_num(sata_hba_inst_t *sata_hba_inst, struct devctl_iocdata *dcp)
@@ -11736,22 +12859,41 @@
  * way as a scsi target number.
  * At this moment it carries only cport number.
  *
- * No PMult hotplug support.
+ * PMult hotplug is supported now.
  *
  * Returns dev_info_t pointer if target device was found, NULL otherwise.
  */
 
 static dev_info_t *
-sata_get_target_dip(dev_info_t *dip, int32_t port)
+sata_get_target_dip(dev_info_t *dip, uint8_t cport, uint8_t pmport)
 {
 	dev_info_t	*cdip = NULL;
 	int		target, tgt;
-	int		ncport;
 	int 		circ;
-
-	ncport = port & SATA_CFGA_CPORT_MASK;
-	target = SATA_TO_SCSI_TARGET(ncport, 0, SATA_ADDR_DCPORT);
-
+	uint8_t		qual;
+
+	sata_hba_inst_t	*sata_hba_inst;
+	scsi_hba_tran_t *scsi_hba_tran;
+
+	/* Get target id */
+	scsi_hba_tran = ddi_get_driver_private(dip);
+	if (scsi_hba_tran == NULL)
+		return (NULL);
+
+	sata_hba_inst = scsi_hba_tran->tran_hba_private;
+
+	if (sata_hba_inst == NULL)
+		return (NULL);
+
+	/* Identify a port-mult by cport_info.cport_dev_type */
+	if (SATA_CPORT_DEV_TYPE(sata_hba_inst, cport) == SATA_DTYPE_PMULT)
+		qual = SATA_ADDR_DPMPORT;
+	else
+		qual = SATA_ADDR_DCPORT;
+
+	target = SATA_TO_SCSI_TARGET(cport, pmport, qual);
+
+	/* Retrieve target dip */
 	ndi_devi_enter(dip, &circ);
 	for (cdip = ddi_get_child(dip); cdip != NULL; ) {
 		dev_info_t *next = ddi_get_next_sibling(cdip);
@@ -11783,9 +12925,6 @@
  * the AP - it is not a sata_address.
  * It is a combination of cport, pmport and address qualifier, encoded same
  * way as a scsi target number.
- * At this moment it carries only cport number.
- *
- * No PMult hotplug support.
  *
  * Returns dev_info_t pointer if target device was found, NULL otherwise.
  */
@@ -11843,27 +12982,32 @@
  * Failure of the port_deactivate may keep port in the physically active state,
  * or may fail the port.
  *
- * NOTE: Port multiplier code is not completed nor tested.
+ * NOTE: Port multiplier is supported.
  */
 
 static int
 sata_ioctl_disconnect(sata_hba_inst_t *sata_hba_inst,
     sata_device_t *sata_device)
 {
-	sata_drive_info_t *sdinfo = NULL;
+	sata_drive_info_t *sdinfo = NULL, *subsdinfo = NULL;
 	sata_cport_info_t *cportinfo = NULL;
 	sata_pmport_info_t *pmportinfo = NULL;
 	sata_pmult_info_t *pmultinfo = NULL;
-	dev_info_t *tdip;
+	sata_device_t 		subsdevice;
 	int cport, pmport, qual;
 	int rval = SATA_SUCCESS;
+	int npmport = 0;
 	int rv = 0;
 
 	cport = sata_device->satadev_addr.cport;
 	pmport = sata_device->satadev_addr.pmport;
 	qual = sata_device->satadev_addr.qual;
 
-	ASSERT(qual == SATA_ADDR_CPORT || qual == SATA_ADDR_PMPORT);
+	ASSERT(qual == SATA_ADDR_DCPORT || qual == SATA_ADDR_DPMPORT);
+	if (qual == SATA_ADDR_DCPORT)
+		qual = SATA_ADDR_CPORT;
+	else
+		qual = SATA_ADDR_PMPORT;
 
 	/*
 	 * DEVCTL_AP_DISCONNECT invokes sata_hba_inst->satahba_tran->
@@ -11878,131 +13022,184 @@
 	/* Check the current state of the port */
 	rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst))
 	    (SATA_DIP(sata_hba_inst), sata_device);
-	mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
-	sata_update_port_info(sata_hba_inst, sata_device);
-	if (rval != SATA_SUCCESS ||
-	    (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) {
-		/* Device port status is unknown or it is in failed state */
-		if (qual == SATA_ADDR_PMPORT) {
-			SATA_PMPORT_STATE(sata_hba_inst, cport, pmport) =
+
+	cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
+
+	/*
+	 * Processing port mulitiplier
+	 */
+	if (qual == SATA_ADDR_CPORT &&
+	    SATA_CPORT_DEV_TYPE(sata_hba_inst, cport) == SATA_DTYPE_PMULT) {
+		mutex_enter(&cportinfo->cport_mutex);
+
+		/* Check controller port status */
+		sata_update_port_info(sata_hba_inst, sata_device);
+		if (rval != SATA_SUCCESS ||
+		    (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) {
+			/*
+			 * Device port status is unknown or it is in failed
+			 * state
+			 */
+			SATA_CPORT_STATE(sata_hba_inst, cport) =
 			    SATA_PSTATE_FAILED;
 			SATADBG1(SATA_DBG_IOCTL_IF, sata_hba_inst,
 			    "sata_hba_ioctl: connect: failed to deactivate "
 			    "SATA port %d", cport);
-		} else {
-			SATA_CPORT_STATE(sata_hba_inst, cport) =
+			mutex_exit(&cportinfo->cport_mutex);
+			return (EIO);
+		}
+
+		/* Disconnect all sub-devices. */
+		pmultinfo = SATA_CPORTINFO_PMULT_INFO(cportinfo);
+		if (pmultinfo != NULL) {
+
+			for (npmport = 0; npmport < SATA_NUM_PMPORTS(
+			    sata_hba_inst, cport); npmport ++) {
+				subsdinfo = SATA_PMPORT_DRV_INFO(
+				    sata_hba_inst, cport, npmport);
+				if (subsdinfo == NULL)
+					continue;
+
+				subsdevice.satadev_addr = subsdinfo->
+				    satadrv_addr;
+
+				mutex_exit(&cportinfo->cport_mutex);
+				if (sata_ioctl_disconnect(sata_hba_inst,
+				    &subsdevice) == SATA_SUCCESS) {
+					SATADBG2(SATA_DBG_PMULT, sata_hba_inst,
+					"[Remove] device at port %d:%d "
+					"successfully.", cport, npmport);
+				}
+				mutex_enter(&cportinfo->cport_mutex);
+			}
+		}
+
+		/* Disconnect the port multiplier */
+		cportinfo->cport_state &= ~SATA_STATE_READY;
+		mutex_exit(&cportinfo->cport_mutex);
+
+		sata_device->satadev_addr.qual = qual;
+		rval = (*SATA_PORT_DEACTIVATE_FUNC(sata_hba_inst))
+		    (SATA_DIP(sata_hba_inst), sata_device);
+
+		sata_gen_sysevent(sata_hba_inst, &sata_device->satadev_addr,
+		    SE_NO_HINT);
+
+		mutex_enter(&cportinfo->cport_mutex);
+		sata_update_port_info(sata_hba_inst, sata_device);
+		if (rval != SATA_SUCCESS &&
+		    sata_device->satadev_state & SATA_PSTATE_FAILED) {
+			cportinfo->cport_state = SATA_PSTATE_FAILED;
+			rv = EIO;
+		} else {
+			cportinfo->cport_state |= SATA_PSTATE_SHUTDOWN;
+		}
+		mutex_exit(&cportinfo->cport_mutex);
+
+		return (rv);
+	}
+
+	/*
+	 * Process non-port-multiplier device - it could be a drive connected
+	 * to a port multiplier port or a controller port.
+	 */
+	if (qual == SATA_ADDR_PMPORT) {
+		pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport);
+		mutex_enter(&pmportinfo->pmport_mutex);
+		sata_update_pmport_info(sata_hba_inst, sata_device);
+		if (rval != SATA_SUCCESS ||
+		    (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) {
+			SATA_PMPORT_STATE(sata_hba_inst, cport, pmport) =
 			    SATA_PSTATE_FAILED;
 			SATADBG2(SATA_DBG_IOCTL_IF, sata_hba_inst,
 			    "sata_hba_ioctl: connect: failed to deactivate "
 			    "SATA port %d:%d", cport, pmport);
-		}
-		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
-		    cport)->cport_mutex);
-		return (EIO);
-	}
-	/*
-	 * Set port's dev_state to not ready - this will disable
-	 * an access to a potentially attached device.
-	 */
-	cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
-	if (qual == SATA_ADDR_PMPORT) {
-		pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport);
+			mutex_exit(&pmportinfo->pmport_mutex);
+			return (EIO);
+		}
+
 		if (pmportinfo->pmport_dev_type != SATA_DTYPE_NONE) {
 			sdinfo = pmportinfo->pmport_sata_drive;
 			ASSERT(sdinfo != NULL);
 		}
+
+		/*
+		 * Set port's dev_state to not ready - this will disable
+		 * an access to a potentially attached device.
+		 */
 		pmportinfo->pmport_state &= ~SATA_STATE_READY;
-	} else {
-		/* Assuming cport */
-
-		if (cportinfo->cport_dev_type != SATA_DTYPE_NONE) {
-			if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT) {
-				pmultinfo =
-				    cportinfo->cport_devp.cport_sata_pmult;
-				ASSERT(pmultinfo != NULL);
-			} else {
-				sdinfo = cportinfo->cport_devp.cport_sata_drive;
-			}
+
+		/* Remove and release sata_drive info structure. */
+		if (sdinfo != NULL) {
+			if ((sdinfo->satadrv_type &
+			    SATA_VALID_DEV_TYPE) != 0) {
+				/*
+				 * If a target node exists, try to offline
+				 * a device and remove target node.
+				 */
+				mutex_exit(&pmportinfo->pmport_mutex);
+				(void) sata_offline_device(sata_hba_inst,
+				    sata_device, sdinfo);
+				mutex_enter(&pmportinfo->pmport_mutex);
+			}
+
+			SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL;
+			pmportinfo->pmport_dev_type = SATA_DTYPE_NONE;
+			(void) kmem_free((void *)sdinfo,
+			    sizeof (sata_drive_info_t));
+		}
+		mutex_exit(&pmportinfo->pmport_mutex);
+
+	} else if (qual == SATA_ADDR_CPORT) {
+		mutex_enter(&cportinfo->cport_mutex);
+		sata_update_port_info(sata_hba_inst, sata_device);
+		if (rval != SATA_SUCCESS ||
+		    (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) {
+			/*
+			 * Device port status is unknown or it is in failed
+			 * state
+			 */
+			SATA_CPORT_STATE(sata_hba_inst, cport) =
+			    SATA_PSTATE_FAILED;
+			SATADBG1(SATA_DBG_IOCTL_IF, sata_hba_inst,
+			    "sata_hba_ioctl: connect: failed to deactivate "
+			    "SATA port %d", cport);
+			mutex_exit(&cportinfo->cport_mutex);
+			return (EIO);
+		}
+
+		if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT) {
+			pmultinfo = SATA_CPORTINFO_PMULT_INFO(cportinfo);
+			ASSERT(pmultinfo != NULL);
+		} else if (cportinfo->cport_dev_type != SATA_DTYPE_NONE) {
+			sdinfo = SATA_CPORTINFO_DRV_INFO(cportinfo);
+			ASSERT(sdinfo != NULL);
 		}
 		cportinfo->cport_state &= ~SATA_STATE_READY;
-	}
-	if (sdinfo != NULL) {
-		if ((sdinfo->satadrv_type & (SATA_VALID_DEV_TYPE)) != 0) {
-			/*
-			 * If a target node exists, try to offline
-			 * a device and remove target node.
-			 */
-			mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
-			    cport)->cport_mutex);
-			/* We are addressing attached device, not a port */
-			sata_device->satadev_addr.qual =
-			    sdinfo->satadrv_addr.qual;
-			tdip = sata_get_scsi_target_dip(SATA_DIP(sata_hba_inst),
-			    &sata_device->satadev_addr);
-			if (tdip != NULL && ndi_devi_offline(tdip,
-			    NDI_DEVI_REMOVE) != NDI_SUCCESS) {
-				/*
-				 * Problem
-				 * The target node remained attached.
-				 * This happens when the device file was open
-				 * or a node was waiting for resources.
-				 * Cannot do anything about it.
-				 */
-				if (qual == SATA_ADDR_CPORT) {
-					SATA_LOG_D((sata_hba_inst, CE_WARN,
-					    "sata_hba_ioctl: disconnect: could "
-					    "not unconfigure device before "
-					    "disconnecting the SATA port %d",
-					    cport));
-				} else {
-					SATA_LOG_D((sata_hba_inst, CE_WARN,
-					    "sata_hba_ioctl: disconnect: could "
-					    "not unconfigure device before "
-					    "disconnecting the SATA port %d:%d",
-					    cport, pmport));
-				}
-				/*
-				 * Set DEVICE REMOVED state in the target
-				 * node. It will prevent access to the device
-				 * even when a new device is attached, until
-				 * the old target node is released, removed and
-				 * recreated for a new  device.
-				 */
-				sata_set_device_removed(tdip);
-
-				/*
-				 * Instruct event daemon to try the target
-				 * node cleanup later.
-				 */
-				sata_set_target_node_cleanup(
-				    sata_hba_inst, &sata_device->satadev_addr);
-			}
-			mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
-			    cport)->cport_mutex);
-		}
-
-		/* Remove and release sata_drive info structure. */
-		if (pmportinfo != NULL) {
-			SATA_PMPORT_DRV_INFO(sata_hba_inst, cport, pmport) =
-			    NULL;
-			pmportinfo->pmport_dev_type = SATA_DTYPE_NONE;
-		} else {
+
+		if (sdinfo != NULL) {
+			if ((sdinfo->satadrv_type &
+			    SATA_VALID_DEV_TYPE) != 0) {
+				/*
+				 * If a target node exists, try to offline
+				 * a device and remove target node.
+				 */
+				mutex_exit(&cportinfo->cport_mutex);
+				(void) sata_offline_device(sata_hba_inst,
+				    sata_device, sdinfo);
+				mutex_enter(&cportinfo->cport_mutex);
+			}
+
 			SATA_CPORTINFO_DRV_INFO(cportinfo) = NULL;
 			cportinfo->cport_dev_type = SATA_DTYPE_NONE;
-		}
-		(void) kmem_free((void *)sdinfo, sizeof (sata_drive_info_t));
-	}
-#if 0
-	else if (pmultinfo != NULL) {
-		/*
-		 * Port Multiplier itself needs special handling.
-		 * All device ports need to be processed here!
-		 */
-	}
-#endif
-	mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
+			(void) kmem_free((void *)sdinfo,
+			    sizeof (sata_drive_info_t));
+		}
+		mutex_exit(&cportinfo->cport_mutex);
+	}
+
 	/* Just ask HBA driver to deactivate port */
-	/*	sata_device->satadev_addr.qual = SATA_ADDR_DCPORT; */
+	sata_device->satadev_addr.qual = qual;
 
 	rval = (*SATA_PORT_DEACTIVATE_FUNC(sata_hba_inst))
 	    (SATA_DIP(sata_hba_inst), sata_device);
@@ -12014,33 +13211,46 @@
 	sata_gen_sysevent(sata_hba_inst, &sata_device->satadev_addr,
 	    SE_NO_HINT);
 
-	mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
-	sata_update_port_info(sata_hba_inst, sata_device);
-
-	if (rval != SATA_SUCCESS) {
-		/*
-		 * Port deactivation failure - do not
-		 * change port state unless the state
-		 * returned by HBA indicates a port failure.
-		 * NOTE: device structures were released, so devices now are
-		 * invisible! Port reset is needed to re-enumerate devices.
-		 */
-		if (sata_device->satadev_state & SATA_PSTATE_FAILED) {
-			if (pmportinfo != NULL)
-				pmportinfo->pmport_state = SATA_PSTATE_FAILED;
-			else
-				cportinfo->cport_state = SATA_PSTATE_FAILED;
+	if (qual == SATA_ADDR_PMPORT) {
+		mutex_enter(&pmportinfo->pmport_mutex);
+		sata_update_pmport_info(sata_hba_inst, sata_device);
+
+		if (rval != SATA_SUCCESS &&
+		    sata_device->satadev_state & SATA_PSTATE_FAILED) {
+			/*
+			 * Port deactivation failure - do not change port
+			 * state unless the state returned by HBA indicates a
+			 * port failure.
+			 *
+			 * NOTE: device structures were released, so devices
+			 * now are invisible! Port reset is needed to
+			 * re-enumerate devices.
+			 */
+			pmportinfo->pmport_state = SATA_PSTATE_FAILED;
 			rv = EIO;
-		}
-	} else {
-		/*
-		 * Deactivation succeded. From now on the sata framework
-		 * will not care what is happening to the device, until
-		 * the port is activated again.
-		 */
-		cportinfo->cport_state |= SATA_PSTATE_SHUTDOWN;
-	}
-	mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
+		} else {
+			/*
+			 * Deactivation succeded. From now on the sata framework
+			 * will not care what is happening to the device, until
+			 * the port is activated again.
+			 */
+			pmportinfo->pmport_state |= SATA_PSTATE_SHUTDOWN;
+		}
+		mutex_exit(&pmportinfo->pmport_mutex);
+	} else if (qual == SATA_ADDR_CPORT) {
+		mutex_enter(&cportinfo->cport_mutex);
+		sata_update_port_info(sata_hba_inst, sata_device);
+
+		if (rval != SATA_SUCCESS &&
+		    sata_device->satadev_state & SATA_PSTATE_FAILED) {
+			cportinfo->cport_state = SATA_PSTATE_FAILED;
+			rv = EIO;
+		} else {
+			cportinfo->cport_state |= SATA_PSTATE_SHUTDOWN;
+		}
+		mutex_exit(&cportinfo->cport_mutex);
+	}
+
 	return (rv);
 }
 
@@ -12066,21 +13276,29 @@
  * This operation may remove port failed state and will
  * try to make port active and in good standing.
  *
- * NOTE: Port multiplier code is not completed nor tested.
+ * NOTE: Port multiplier is supported.
  */
 
 static int
 sata_ioctl_connect(sata_hba_inst_t *sata_hba_inst,
     sata_device_t *sata_device)
 {
-	int cport, pmport, qual;
+	sata_pmport_info_t	*pmportinfo = NULL;
+	uint8_t cport, pmport, qual;
 	int rv = 0;
 
 	cport = sata_device->satadev_addr.cport;
 	pmport = sata_device->satadev_addr.pmport;
 	qual = sata_device->satadev_addr.qual;
 
-	ASSERT(qual == SATA_ADDR_CPORT || qual == SATA_ADDR_PMPORT);
+	ASSERT(qual == SATA_ADDR_DCPORT || qual == SATA_ADDR_DPMPORT);
+	if (qual == SATA_ADDR_DCPORT)
+		qual = SATA_ADDR_CPORT;
+	else
+		qual = SATA_ADDR_PMPORT;
+
+	if (qual == SATA_ADDR_PMPORT)
+		pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport);
 
 	/*
 	 * DEVCTL_AP_CONNECT would invoke sata_hba_inst->
@@ -12098,40 +13316,46 @@
 		/*
 		 * Port activation failure.
 		 */
-		mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
-		    cport)->cport_mutex);
-		sata_update_port_info(sata_hba_inst, sata_device);
-		if (sata_device->satadev_state & SATA_PSTATE_FAILED) {
-			if (qual == SATA_ADDR_DCPORT) {
+		if (qual == SATA_ADDR_CPORT) {
+			mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
+			    cport)->cport_mutex);
+			sata_update_port_info(sata_hba_inst, sata_device);
+			if (sata_device->satadev_state & SATA_PSTATE_FAILED) {
 				SATA_CPORT_STATE(sata_hba_inst, cport) =
 				    SATA_PSTATE_FAILED;
 				SATADBG1(SATA_DBG_IOCTL_IF, sata_hba_inst,
 				    "sata_hba_ioctl: connect: failed to "
 				    "activate SATA port %d", cport);
-			} else { /* port multiplier device port */
+			}
+			mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
+			    cport)->cport_mutex);
+		} else { /* port multiplier device port */
+			mutex_enter(&pmportinfo->pmport_mutex);
+			sata_update_pmport_info(sata_hba_inst, sata_device);
+			if (sata_device->satadev_state & SATA_PSTATE_FAILED) {
 				SATA_PMPORT_STATE(sata_hba_inst, cport,
 				    pmport) = SATA_PSTATE_FAILED;
 				SATADBG2(SATA_DBG_IOCTL_IF, sata_hba_inst,
 				    "sata_hba_ioctl: connect: failed to "
 				    "activate SATA port %d:%d", cport, pmport);
-
-			}
-		}
-		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
-		    cport)->cport_mutex);
-		SATADBG2(SATA_DBG_IOCTL_IF, sata_hba_inst,
-		    "sata_hba_ioctl: connect: failed to activate SATA "
-		    "port %d:%d", cport, pmport);
+			}
+			mutex_exit(&pmportinfo->pmport_mutex);
+		}
 		return (EIO);
 	}
 
 	/* Virgin port state - will be updated by the port re-probe. */
-	mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
-	if (qual == SATA_ADDR_CPORT)
+	if (qual == SATA_ADDR_CPORT) {
+		mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
+		    cport)->cport_mutex);
 		SATA_CPORT_STATE(sata_hba_inst, cport) = 0;
-	else /* port multiplier device port */
+		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
+		    cport)->cport_mutex);
+	} else { /* port multiplier device port */
+		mutex_enter(&pmportinfo->pmport_mutex);
 		SATA_PMPORT_STATE(sata_hba_inst, cport, pmport) = 0;
-	mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
+		mutex_exit(&pmportinfo->pmport_mutex);
+	}
 
 	/*
 	 * Probe the port to find its state and attached device.
@@ -12154,15 +13378,22 @@
 	if (sata_device->satadev_type != SATA_DTYPE_NONE) {
 
 		if (qual == SATA_ADDR_CPORT) {
-			sata_log(sata_hba_inst, CE_WARN,
-			    "SATA device detected at port %d", cport);
-			if (sata_device->satadev_type == SATA_DTYPE_UNKNOWN) {
+			if (sata_device->satadev_type == SATA_DTYPE_PMULT) {
+				sata_log(sata_hba_inst, CE_WARN,
+				    "SATA port multiplier detected "
+				    "at port %d", cport);
+			} else {
+				sata_log(sata_hba_inst, CE_WARN,
+				    "SATA device detected at port %d", cport);
+				if (sata_device->satadev_type ==
+				    SATA_DTYPE_UNKNOWN) {
 				/*
 				 * A device was not successfully identified
 				 */
 				sata_log(sata_hba_inst, CE_WARN,
 				    "Could not identify SATA "
 				    "device at port %d", cport);
+				}
 			}
 		} else { /* port multiplier device port */
 			sata_log(sata_hba_inst, CE_WARN,
@@ -12250,7 +13481,7 @@
  * If target node does not exist, a new target node is created. In both cases
  * an attempt is made to online (configure) the device.
  *
- * NOTE: Port multiplier code is not completed nor tested.
+ * NOTE: Port multiplier is supported.
  */
 static int
 sata_ioctl_configure(sata_hba_inst_t *sata_hba_inst,
@@ -12271,22 +13502,37 @@
 	/* Get current port state */
 	rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst))
 	    (SATA_DIP(sata_hba_inst), sata_device);
-	mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
-	sata_update_port_info(sata_hba_inst, sata_device);
-
-	if (rval != SATA_SUCCESS ||
-	    (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) {
-		/*
-		 * Obviously, device on a failed port is not visible
-		 */
-		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
-		return (ENXIO);
-	}
 
 	cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
-	if (qual == SATA_ADDR_PMPORT)
+	if (qual == SATA_ADDR_DPMPORT) {
 		pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport);
-	mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
+		mutex_enter(&pmportinfo->pmport_mutex);
+		sata_update_pmport_info(sata_hba_inst, sata_device);
+		if (rval != SATA_SUCCESS ||
+		    (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) {
+			/*
+			 * Obviously, device on a failed port is not visible
+			 */
+			mutex_exit(&pmportinfo->pmport_mutex);
+			return (ENXIO);
+		}
+		mutex_exit(&pmportinfo->pmport_mutex);
+	} else {
+		mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
+		    cport)->cport_mutex);
+		sata_update_port_info(sata_hba_inst, sata_device);
+		if (rval != SATA_SUCCESS ||
+		    (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) {
+			/*
+			 * Obviously, device on a failed port is not visible
+			 */
+			mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
+			    cport)->cport_mutex);
+			return (ENXIO);
+		}
+		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
+		    cport)->cport_mutex);
+	}
 
 	if ((sata_device->satadev_state & SATA_PSTATE_SHUTDOWN) != 0) {
 		/* need to activate port */
@@ -12304,40 +13550,52 @@
 			 * unless the state returned by HBA indicates a port
 			 * failure.
 			 */
-			mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
-			    cport)->cport_mutex);
-			sata_update_port_info(sata_hba_inst, sata_device);
-			if (sata_device->satadev_state & SATA_PSTATE_FAILED) {
-				if (qual == SATA_ADDR_PMPORT)
+			if (qual == SATA_ADDR_DPMPORT) {
+				mutex_enter(&pmportinfo->pmport_mutex);
+				sata_update_pmport_info(sata_hba_inst,
+				    sata_device);
+				if (sata_device->satadev_state &
+				    SATA_PSTATE_FAILED)
 					pmportinfo->pmport_state =
 					    SATA_PSTATE_FAILED;
-				else
+				mutex_exit(&pmportinfo->pmport_mutex);
+			} else {
+				mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
+				    cport)->cport_mutex);
+				sata_update_port_info(sata_hba_inst,
+				    sata_device);
+				if (sata_device->satadev_state &
+				    SATA_PSTATE_FAILED)
 					cportinfo->cport_state =
 					    SATA_PSTATE_FAILED;
-			}
-			mutex_exit(&SATA_CPORT_INFO(
-			    sata_hba_inst, cport)->cport_mutex);
-			SATA_LOG_D((sata_hba_inst, CE_WARN,
-			    "sata_hba_ioctl: configure: "
-			    "failed to activate SATA port %d:%d",
-			    cport, pmport));
-			return (EIO);
-		}
-		/*
-		 * Generate sysevent - EC_DR / ESC_DR_AP_STATE_CHANGE
-		 * without the hint.
-		 */
-		sata_gen_sysevent(sata_hba_inst,
-		    &sata_device->satadev_addr, SE_NO_HINT);
-
-		mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->
-		    cport_mutex);
-		/* Virgin port state */
-		if (qual == SATA_ADDR_PMPORT)
-			pmportinfo->pmport_state = 0;
-		else
-			cportinfo->cport_state = 0;
-		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
+				mutex_exit(&SATA_CPORT_INFO(
+				    sata_hba_inst, cport)->cport_mutex);
+			}
+		}
+		SATA_LOG_D((sata_hba_inst, CE_WARN,
+		    "sata_hba_ioctl: configure: "
+		    "failed to activate SATA port %d:%d",
+		    cport, pmport));
+		return (EIO);
+	}
+	/*
+	 * Generate sysevent - EC_DR / ESC_DR_AP_STATE_CHANGE
+	 * without the hint.
+	 */
+	sata_gen_sysevent(sata_hba_inst,
+	    &sata_device->satadev_addr, SE_NO_HINT);
+
+	/* Virgin port state */
+	if (qual == SATA_ADDR_DPMPORT) {
+		mutex_enter(&pmportinfo->pmport_mutex);
+		pmportinfo->pmport_state = 0;
+		mutex_exit(&pmportinfo->pmport_mutex);
+	} else {
+		mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
+		    cport)-> cport_mutex);
+		cportinfo->cport_state = 0;
+		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
+		    cport)->cport_mutex);
 	}
 	/*
 	 * Always reprobe port, to get current device info.
@@ -12347,7 +13605,7 @@
 		return (EIO);
 
 	if (sata_device->satadev_type != SATA_DTYPE_NONE && target == FALSE) {
-		if (qual == SATA_ADDR_PMPORT) {
+		if (qual == SATA_ADDR_DPMPORT) {
 			/*
 			 * That's the transition from "inactive" port
 			 * to active one with device attached.
@@ -12379,7 +13637,7 @@
 	 * For now, configure only disks and other valid target devices.
 	 */
 	if (!(sata_device->satadev_type & SATA_VALID_DEV_TYPE)) {
-		if (qual == SATA_ADDR_CPORT) {
+		if (qual == SATA_ADDR_DCPORT) {
 			if (sata_device->satadev_type == SATA_DTYPE_UNKNOWN) {
 				/*
 				 * A device was not successfully identified
@@ -12411,7 +13669,7 @@
 	 * to clear device reset condition.
 	 */
 	mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
-	if (qual == SATA_ADDR_PMPORT)
+	if (qual == SATA_ADDR_DPMPORT)
 		sata_device->satadev_addr.qual = SATA_ADDR_DPMPORT;
 	else
 		sata_device->satadev_addr.qual = SATA_ADDR_DCPORT;
@@ -12466,29 +13724,34 @@
 			    "%d:%d failed", cport, pmport));
 			return (EIO);
 		}
-		mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
-		    cport)->cport_mutex);
-
-		if (qual == SATA_ADDR_DPMPORT)
+
+		if (qual == SATA_ADDR_DPMPORT) {
+			mutex_enter(&pmportinfo->pmport_mutex);
 			pmportinfo->pmport_tgtnode_clean = B_TRUE;
-		else
+			mutex_exit(&pmportinfo->pmport_mutex);
+		} else {
+			mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
+			    cport)->cport_mutex);
 			cportinfo-> cport_tgtnode_clean = B_TRUE;
-
-		mutex_exit(&SATA_CPORT_INFO(
-		    sata_hba_inst, cport)->cport_mutex);
+			mutex_exit(&SATA_CPORT_INFO(
+			    sata_hba_inst, cport)->cport_mutex);
+		}
 	} else {
 		/*
 		 * No target node - need to create a new target node.
 		 */
-		mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->
-		    cport_mutex);
-		if (qual == SATA_ADDR_DPMPORT)
+		if (qual == SATA_ADDR_DPMPORT) {
+			mutex_enter(&pmportinfo->pmport_mutex);
 			pmportinfo->pmport_tgtnode_clean = B_TRUE;
-		else
+			mutex_exit(&pmportinfo->pmport_mutex);
+		} else {
+			mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->
+			    cport_mutex);
 			cportinfo-> cport_tgtnode_clean = B_TRUE;
-
-		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->
-		    cport_mutex);
+			mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->
+			    cport_mutex);
+		}
+
 		tdip = sata_create_target_node(SATA_DIP(sata_hba_inst),
 		    sata_hba_inst, &sata_device->satadev_addr);
 		if (tdip == NULL) {
@@ -12510,7 +13773,7 @@
  * Even if the unconfigure fails, proceed with the
  * port deactivation.
  *
- * NOTE: Port Multiplier code is not completed and tested.
+ * NOTE: Port Multiplier is supported now.
  */
 
 static int
@@ -12519,10 +13782,13 @@
 {
 	int cport, pmport, qual;
 	int rval, rv = 0;
+	int npmport;
 	sata_cport_info_t *cportinfo;
-	sata_pmport_info_t *pmportinfo = NULL;
+	sata_pmport_info_t *pmportinfo;
+	sata_pmult_info_t *pmultinfo;
 	dev_info_t *tdip;
 	sata_drive_info_t *sdinfo = NULL;
+	sata_device_t subsdevice;
 
 	/* Sanity check */
 	if (SATA_PORT_DEACTIVATE_FUNC(sata_hba_inst) == NULL)
@@ -12532,33 +13798,101 @@
 	pmport = sata_device->satadev_addr.pmport;
 	qual = sata_device->satadev_addr.qual;
 
+	/* SCSI_TO_SATA_ADDR_QUAL() translate ap_id into a device qualifier */
+	ASSERT(qual == SATA_ADDR_DCPORT || qual == SATA_ADDR_DPMPORT);
+	if (qual == SATA_ADDR_DCPORT)
+		qual = SATA_ADDR_CPORT;
+	else
+		qual = SATA_ADDR_PMPORT;
+
+	cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
+	if (qual == SATA_ADDR_PMPORT)
+		pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport);
+
+	/*
+	 * Processing port multiplier
+	 */
+	if (qual == SATA_ADDR_CPORT &&
+	    SATA_CPORT_DEV_TYPE(sata_hba_inst, cport) == SATA_DTYPE_PMULT) {
+		mutex_enter(&cportinfo->cport_mutex);
+
+		/* Deactivate all sub-deices */
+		pmultinfo = SATA_CPORTINFO_PMULT_INFO(cportinfo);
+		if (pmultinfo != NULL) {
+			for (npmport = 0; npmport < SATA_NUM_PMPORTS(
+			    sata_hba_inst, cport); npmport++) {
+
+				subsdevice.satadev_addr.cport = cport;
+				subsdevice.satadev_addr.pmport =
+				    (uint8_t)npmport;
+				subsdevice.satadev_addr.qual =
+				    SATA_ADDR_DPMPORT;
+
+				SATADBG2(SATA_DBG_PMULT, sata_hba_inst,
+				    "sata_hba_ioctl: deactivate: trying to "
+				    "deactivate SATA port %d:%d",
+				    cport, npmport);
+
+				mutex_exit(&cportinfo->cport_mutex);
+				if (sata_ioctl_deactivate(sata_hba_inst,
+				    &subsdevice) == SATA_SUCCESS) {
+					SATADBG2(SATA_DBG_PMULT, sata_hba_inst,
+					    "[Deactivate] device at port %d:%d "
+					    "successfully.", cport, npmport);
+				}
+				mutex_enter(&cportinfo->cport_mutex);
+			}
+		}
+
+		/* Deactivate the port multiplier now. */
+		cportinfo->cport_state &= ~SATA_STATE_READY;
+		mutex_exit(&cportinfo->cport_mutex);
+
+		sata_device->satadev_addr.qual = qual;
+		rval = (*SATA_PORT_DEACTIVATE_FUNC(sata_hba_inst))
+		    (SATA_DIP(sata_hba_inst), sata_device);
+
+		sata_gen_sysevent(sata_hba_inst, &sata_device->satadev_addr,
+		    SE_NO_HINT);
+
+		mutex_enter(&cportinfo->cport_mutex);
+		sata_update_port_info(sata_hba_inst, sata_device);
+		if (rval != SATA_SUCCESS) {
+			if (sata_device->satadev_state & SATA_PSTATE_FAILED) {
+				cportinfo->cport_state = SATA_PSTATE_FAILED;
+			}
+			rv = EIO;
+		} else {
+			cportinfo->cport_state |= SATA_PSTATE_SHUTDOWN;
+		}
+		mutex_exit(&cportinfo->cport_mutex);
+
+		return (rv);
+	}
+
+	/*
+	 * Process non-port-multiplier device - it could be a drive connected
+	 * to a port multiplier port or a controller port.
+	 */
 	mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
-	cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
 	if (qual == SATA_ADDR_CPORT) {
 		sata_device->satadev_addr.qual = SATA_ADDR_DCPORT;
 		if (cportinfo->cport_dev_type != SATA_DTYPE_NONE) {
-			/*
-			 * For now, assume that port multiplier is not
-			 * supported, i.e. deal only with valid devices
-			 */
+			/* deal only with valid devices */
 			if ((cportinfo->cport_dev_type &
 			    SATA_VALID_DEV_TYPE) != 0)
 				sdinfo = SATA_CPORTINFO_DRV_INFO(cportinfo);
-			/*
-			 * If attached device is a port multiplier, we will
-			 * have to unconfigure all devices attached to the
-			 * port multiplier. Add this code here.
-			 */
 		}
 		cportinfo->cport_state &= ~SATA_STATE_READY;
 	} else {
 		/* Port multiplier device port */
-		pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport);
+		mutex_enter(&pmportinfo->pmport_mutex);
 		sata_device->satadev_addr.qual = SATA_ADDR_DPMPORT;
 		if (pmportinfo->pmport_dev_type != SATA_DTYPE_NONE &&
 		    (pmportinfo->pmport_dev_type & SATA_VALID_DEV_TYPE) != 0)
 			sdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo);
 		pmportinfo->pmport_state &= ~SATA_STATE_READY;
+		mutex_exit(&pmportinfo->pmport_mutex);
 	}
 
 	if (sdinfo != NULL) {
@@ -12611,17 +13945,22 @@
 			SATA_CPORTINFO_DRV_INFO(cportinfo) = NULL;
 			cportinfo->cport_dev_type = SATA_DTYPE_NONE;
 		} else { /* port multiplier device port */
+			mutex_enter(&pmportinfo->pmport_mutex);
 			SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL;
 			pmportinfo->pmport_dev_type = SATA_DTYPE_NONE;
+			mutex_exit(&pmportinfo->pmport_mutex);
 		}
 		(void) kmem_free((void *)sdinfo, sizeof (sata_drive_info_t));
 	}
+
 	if (qual == SATA_ADDR_CPORT) {
 		cportinfo->cport_state &= ~(SATA_STATE_PROBED |
 		    SATA_STATE_PROBING);
-	} else { /* port multiplier device port */
+	} else if (qual == SATA_ADDR_PMPORT) {
+		mutex_enter(&pmportinfo->pmport_mutex);
 		pmportinfo->pmport_state &= ~(SATA_STATE_PROBED |
 		    SATA_STATE_PROBING);
+		mutex_exit(&pmportinfo->pmport_mutex);
 	}
 	mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
 
@@ -12658,6 +13997,7 @@
 			cportinfo->cport_state |= SATA_PSTATE_SHUTDOWN;
 		}
 	} else {
+		mutex_enter(&pmportinfo->pmport_mutex);
 		if (rval != SATA_SUCCESS) {
 			if (sata_device->satadev_state & SATA_PSTATE_FAILED) {
 				SATA_PMPORT_STATE(sata_hba_inst, cport,
@@ -12671,6 +14011,7 @@
 		} else {
 			pmportinfo->pmport_state |= SATA_PSTATE_SHUTDOWN;
 		}
+		mutex_exit(&pmportinfo->pmport_mutex);
 	}
 
 	mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
@@ -12681,7 +14022,7 @@
 /*
  * Process ioctl port activate request.
  *
- * NOTE: Port multiplier code is not completed nor tested.
+ * NOTE: Port multiplier is supported now.
  */
 static int
 sata_ioctl_activate(sata_hba_inst_t *sata_hba_inst,
@@ -12700,8 +14041,19 @@
 	pmport = sata_device->satadev_addr.pmport;
 	qual = sata_device->satadev_addr.qual;
 
+	cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
+
+	/*
+	 * The qual translate from ap_id (by SCSI_TO_SATA_ADDR_QUAL())
+	 * is a device. But what we are dealing with is port/pmport.
+	 */
+	ASSERT(qual == SATA_ADDR_DCPORT || qual == SATA_ADDR_DPMPORT);
+	if (qual == SATA_ADDR_DCPORT)
+		sata_device->satadev_addr.qual = qual = SATA_ADDR_CPORT;
+	else
+		sata_device->satadev_addr.qual = qual = SATA_ADDR_PMPORT;
+
 	mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
-	cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
 	if (qual == SATA_ADDR_PMPORT) {
 		pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport);
 		if (pmportinfo->pmport_state & SATA_PSTATE_SHUTDOWN ||
@@ -12725,9 +14077,11 @@
 		    cport)->cport_mutex);
 		sata_update_port_info(sata_hba_inst, sata_device);
 		if (sata_device->satadev_state & SATA_PSTATE_FAILED) {
-			if (qual == SATA_ADDR_PMPORT)
+			if (qual == SATA_ADDR_PMPORT) {
+				mutex_enter(&pmportinfo->pmport_mutex);
 				pmportinfo->pmport_state = SATA_PSTATE_FAILED;
-			else
+				mutex_exit(&pmportinfo->pmport_mutex);
+			} else
 				cportinfo->cport_state = SATA_PSTATE_FAILED;
 
 			mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
@@ -12740,9 +14094,11 @@
 		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
 	}
 	mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
-	if (qual == SATA_ADDR_PMPORT)
+	if (qual == SATA_ADDR_PMPORT) {
+		mutex_enter(&pmportinfo->pmport_mutex);
 		pmportinfo->pmport_state &= ~SATA_PSTATE_SHUTDOWN;
-	else
+		mutex_exit(&pmportinfo->pmport_mutex);
+	} else
 		cportinfo->cport_state &= ~SATA_PSTATE_SHUTDOWN;
 	mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
 
@@ -12788,13 +14144,6 @@
 				sata_log(sata_hba_inst, CE_WARN,
 				    "SATA port multiplier detected at port %d",
 				    cport);
-				/*
-				 * Because the detected device is a port
-				 * multiplier, we need to reprobe every device
-				 * port on the port multiplier and show every
-				 * device found attached.
-				 * Add this code here.
-				 */
 			}
 		}
 	}
@@ -12806,7 +14155,7 @@
 /*
  * Process ioctl reset port request.
  *
- * NOTE: Port multiplier code is not completed nor tested.
+ * NOTE: Port-Multiplier is supported.
  */
 static int
 sata_ioctl_reset_port(sata_hba_inst_t *sata_hba_inst,
@@ -12819,6 +14168,16 @@
 	pmport = sata_device->satadev_addr.pmport;
 	qual = sata_device->satadev_addr.qual;
 
+	/*
+	 * The qual translate from ap_id (by SCSI_TO_SATA_ADDR_QUAL())
+	 * is a device. But what we are dealing with is port/pmport.
+	 */
+	if (qual == SATA_ADDR_DCPORT)
+		sata_device->satadev_addr.qual = qual = SATA_ADDR_CPORT;
+	else
+		sata_device->satadev_addr.qual = qual = SATA_ADDR_PMPORT;
+	ASSERT(qual == SATA_ADDR_CPORT || qual == SATA_ADDR_PMPORT);
+
 	/* Sanity check */
 	if (SATA_RESET_DPORT_FUNC(sata_hba_inst) == NULL) {
 		SATA_LOG_D((sata_hba_inst, CE_WARN,
@@ -12839,9 +14198,14 @@
 		if (qual == SATA_ADDR_CPORT)
 			SATA_CPORT_STATE(sata_hba_inst, cport) =
 			    SATA_PSTATE_FAILED;
-		else
+		else {
+			mutex_enter(&SATA_PMPORT_MUTEX(sata_hba_inst, cport,
+			    pmport));
 			SATA_PMPORT_STATE(sata_hba_inst, cport, pmport) =
 			    SATA_PSTATE_FAILED;
+			mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, cport,
+			    pmport));
+		}
 		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->
 		    cport_mutex);
 		rv = EIO;
@@ -12862,13 +14226,14 @@
 /*
  * Process ioctl reset device request.
  *
- * NOTE: Port multiplier code is not completed nor tested.
+ * NOTE: Port multiplier is supported.
  */
 static int
 sata_ioctl_reset_device(sata_hba_inst_t *sata_hba_inst,
     sata_device_t *sata_device)
 {
-	sata_drive_info_t *sdinfo;
+	sata_drive_info_t *sdinfo = NULL;
+	sata_pmult_info_t *pmultinfo = NULL;
 	int cport, pmport;
 	int rv = 0;
 
@@ -12884,17 +14249,21 @@
 	pmport = sata_device->satadev_addr.pmport;
 
 	mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
-	if (sata_device->satadev_addr.qual == SATA_ADDR_CPORT) {
-		sata_device->satadev_addr.qual = SATA_ADDR_DCPORT;
-		sdinfo = SATA_CPORT_DRV_INFO(sata_hba_inst,
-		    sata_device->satadev_addr.cport);
+	if (sata_device->satadev_addr.qual == SATA_ADDR_DCPORT) {
+		if (SATA_CPORT_DEV_TYPE(sata_hba_inst, cport) ==
+		    SATA_DTYPE_PMULT)
+			pmultinfo = SATA_CPORT_INFO(sata_hba_inst, cport)->
+			    cport_devp.cport_sata_pmult;
+		else
+			sdinfo = SATA_CPORT_DRV_INFO(sata_hba_inst,
+			    sata_device->satadev_addr.cport);
 	} else { /* port multiplier */
 		sata_device->satadev_addr.qual = SATA_ADDR_DPMPORT;
 		sdinfo = SATA_PMPORT_DRV_INFO(sata_hba_inst,
 		    sata_device->satadev_addr.cport,
 		    sata_device->satadev_addr.pmport);
 	}
-	if (sdinfo == NULL) {
+	if (sdinfo == NULL && pmultinfo == NULL) {
 		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
 		return (EINVAL);
 	}
@@ -12914,15 +14283,23 @@
 		 * or port disconnect/connect and re-probing is
 		 * needed to change it's state
 		 */
-		sdinfo->satadrv_state &= ~SATA_STATE_READY;
-		sdinfo->satadrv_state |= SATA_DSTATE_FAILED;
+		if (sdinfo != NULL) {
+			sdinfo->satadrv_state &= ~SATA_STATE_READY;
+			sdinfo->satadrv_state |= SATA_DSTATE_FAILED;
+		} else if (pmultinfo != NULL) {
+			pmultinfo->pmult_state &= ~SATA_STATE_READY;
+			pmultinfo->pmult_state |= SATA_DSTATE_FAILED;
+		}
+
 		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex);
 		rv = EIO;
 	}
 	/*
 	 * If attached device was a port multiplier, some extra processing
-	 * may be needed, to bring it back (if port re-probing did not handle
-	 * it). Add such code here.
+	 * may be needed to bring it back. SATA specification requies a
+	 * mandatory software reset on host port to reliably enumerate a port
+	 * multiplier, the HBA driver should handle that after reset
+	 * operation.
 	 */
 	return (rv);
 }
@@ -12930,8 +14307,6 @@
 
 /*
  * Process ioctl reset all request.
- *
- * NOTE: Port multiplier code is not completed nor tested.
  */
 static int
 sata_ioctl_reset_all(sata_hba_inst_t *sata_hba_inst)
@@ -12969,12 +14344,12 @@
 			rv = EBUSY;
 			break;
 		} else {
+			/*
+			 * It is enough to lock cport in command-based
+			 * switching mode.
+			 */
 			SATA_CPORT_INFO(sata_hba_inst, tcport)->
 			    cport_event_flags |= SATA_APCTL_LOCK_PORT_BUSY;
-			/*
-			 * If there is a port multiplier attached, we may need
-			 * to lock its port as well. If so, add such code here.
-			 */
 		}
 		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, tcport)->
 		    cport_mutex);
@@ -12983,7 +14358,7 @@
 	if (rv == 0) {
 		/*
 		 * All cports were successfully locked.
-		 * Reset main SATA controller only for now - no PMult.
+		 * Reset main SATA controller.
 		 * Set the device address to port 0, to have a valid device
 		 * address.
 		 */
@@ -13001,7 +14376,6 @@
 		 * Because ports were reset, port states are unknown.
 		 * They should be re-probed to get their state and
 		 * attached devices should be reinitialized.
-		 * Add code here to re-probe port multiplier device ports.
 		 */
 		for (tcport = 0; tcport < SATA_NUM_CPORTS(sata_hba_inst);
 		    tcport++) {
@@ -13009,6 +14383,12 @@
 			sata_device.satadev_addr.pmport = tpmport;
 			sata_device.satadev_addr.qual = SATA_ADDR_CPORT;
 
+			/*
+			 * The sata_reprobe_port() will mark a
+			 * SATA_EVNT_DEVICE_RESET event on the port
+			 * multiplier, all its sub-ports will be probed by
+			 * sata daemon afterwards.
+			 */
 			if (sata_reprobe_port(sata_hba_inst, &sata_device,
 			    SATA_DEV_IDENTIFY_RETRY) != SATA_SUCCESS)
 				rv = EIO;
@@ -13070,9 +14450,14 @@
 		if (qual == SATA_ADDR_CPORT)
 			SATA_CPORT_STATE(sata_hba_inst, cport) =
 			    SATA_PSTATE_FAILED;
-		else /* port ultiplier device port */
+		else { /* port multiplier device port */
+			mutex_enter(&SATA_PMPORT_MUTEX(sata_hba_inst,
+			    cport, pmport));
 			SATA_PMPORT_STATE(sata_hba_inst, cport, pmport) =
 			    SATA_PSTATE_FAILED;
+			mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst,
+			    cport, pmport));
+		}
 
 		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->
 		    cport_mutex);
@@ -13102,20 +14487,23 @@
  * SCSI_TO_SATA_PMPORT extracts pmport number and
  * SCSI_TO_SATA_ADDR_QUAL extracts port mulitplier qualifier flag.
  *
- * For now, support is for cports only - no port multiplier device ports.
+ * Port multiplier is supported.
  */
 
 static void
 sata_cfgadm_state(sata_hba_inst_t *sata_hba_inst, int32_t port,
     devctl_ap_state_t *ap_state)
 {
-	uint16_t	cport;
-	int		port_state;
+	uint8_t		cport, pmport, qual;
+	uint32_t	port_state, pmult_state;
+	uint32_t	dev_type;
 	sata_drive_info_t *sdinfo;
 
-	/* Cport only */
 	cport = SCSI_TO_SATA_CPORT(port);
-
+	pmport = SCSI_TO_SATA_PMPORT(port);
+	qual = SCSI_TO_SATA_ADDR_QUAL(port);
+
+	/* Check cport state */
 	port_state = SATA_CPORT_STATE(sata_hba_inst, cport);
 	if (port_state & SATA_PSTATE_SHUTDOWN ||
 	    port_state & SATA_PSTATE_FAILED) {
@@ -13129,11 +14517,34 @@
 		return;
 	}
 
-	/* Need to check pmult device port here as well, when supported */
+	/* cport state is okay. Now check pmport state */
+	if (qual == SATA_ADDR_DPMPORT || qual == SATA_ADDR_PMPORT) {
+		/* Sanity check */
+		if (SATA_CPORT_DEV_TYPE(sata_hba_inst, cport) !=
+		    SATA_DTYPE_PMULT || SATA_PMPORT_INFO(sata_hba_inst,
+		    cport, pmport) == NULL)
+			return;
+		port_state = SATA_PMPORT_STATE(sata_hba_inst, cport, pmport);
+		if (port_state & SATA_PSTATE_SHUTDOWN ||
+		    port_state & SATA_PSTATE_FAILED) {
+			ap_state->ap_rstate = AP_RSTATE_DISCONNECTED;
+			ap_state->ap_ostate = AP_OSTATE_UNCONFIGURED;
+			if (port_state & SATA_PSTATE_FAILED)
+				ap_state->ap_condition = AP_COND_FAILED;
+			else
+				ap_state->ap_condition = AP_COND_UNKNOWN;
+
+			return;
+		}
+	}
 
 	/* Port is enabled and ready */
-
-	switch (SATA_CPORT_DEV_TYPE(sata_hba_inst, cport)) {
+	if (qual == SATA_ADDR_DCPORT || qual == SATA_ADDR_CPORT)
+		dev_type = SATA_CPORT_DEV_TYPE(sata_hba_inst, cport);
+	else
+		dev_type = SATA_PMPORT_DEV_TYPE(sata_hba_inst, cport, pmport);
+
+	switch (dev_type) {
 	case SATA_DTYPE_NONE:
 	{
 		ap_state->ap_ostate = AP_OSTATE_UNCONFIGURED;
@@ -13142,8 +14553,30 @@
 		ap_state->ap_rstate = AP_RSTATE_EMPTY;
 		break;
 	}
-	case SATA_DTYPE_UNKNOWN:
-	case SATA_DTYPE_PMULT:	/* Until PMult is supported */
+	case SATA_DTYPE_PMULT:
+	{
+		/* Need to check port multiplier state */
+		ASSERT(qual == SATA_ADDR_DCPORT);
+		pmult_state = SATA_PMULT_INFO(sata_hba_inst, cport)->
+		    pmult_state;
+		if (pmult_state & (SATA_PSTATE_SHUTDOWN|SATA_PSTATE_FAILED)) {
+			ap_state->ap_rstate = AP_RSTATE_DISCONNECTED;
+			ap_state->ap_ostate = AP_OSTATE_UNCONFIGURED;
+			if (pmult_state & SATA_PSTATE_FAILED)
+				ap_state->ap_condition = AP_COND_FAILED;
+			else
+				ap_state->ap_condition = AP_COND_UNKNOWN;
+
+			return;
+		}
+
+		/* Port multiplier is not configurable */
+		ap_state->ap_ostate = AP_OSTATE_CONFIGURED;
+		ap_state->ap_rstate = AP_RSTATE_CONNECTED;
+		ap_state->ap_condition = AP_COND_OK;
+		break;
+	}
+
 	case SATA_DTYPE_ATADISK:
 	case SATA_DTYPE_ATAPICD:
 	case SATA_DTYPE_ATAPITAPE:
@@ -13154,7 +14587,7 @@
 		int circ;
 
 		dip = SATA_DIP(sata_hba_inst);
-		tdip = sata_get_target_dip(dip, port);
+		tdip = sata_get_target_dip(dip, cport, pmport);
 		ap_state->ap_rstate = AP_RSTATE_CONNECTED;
 		if (tdip != NULL) {
 			ndi_devi_enter(dip, &circ);
@@ -13225,7 +14658,9 @@
 /*
  * Process ioctl get device path request.
  *
- * NOTE: Port multiplier code is not completed nor tested.
+ * NOTE: Port multiplier has no target dip. Devices connected to port
+ * multiplier have target node attached to the HBA node. The only difference
+ * between them and the directly-attached device node is a target address.
  */
 static int
 sata_ioctl_get_device_path(sata_hba_inst_t *sata_hba_inst,
@@ -13270,7 +14705,7 @@
 /*
  * Process ioctl get attachment point type request.
  *
- * NOTE: Port multiplier code is not completed nor tested.
+ * NOTE: Port multiplier is supported.
  */
 static	int
 sata_ioctl_get_ap_type(sata_hba_inst_t *sata_hba_inst,
@@ -13280,7 +14715,7 @@
 	const char	*ap_type;
 	int		dev_type;
 
-	if (sata_device->satadev_addr.qual == SATA_ADDR_CPORT)
+	if (sata_device->satadev_addr.qual == SATA_ADDR_DCPORT)
 		dev_type = SATA_CPORT_DEV_TYPE(sata_hba_inst,
 		    sata_device->satadev_addr.cport);
 	else /* pmport */
@@ -13307,7 +14742,7 @@
 		break;
 
 	case SATA_DTYPE_PMULT:
-		ap_type = "pmult";
+		ap_type = "sata-pmult";
 		break;
 
 	case SATA_DTYPE_UNKNOWN:
@@ -13343,7 +14778,7 @@
  * This operation should return to cfgadm the device model
  * information string
  *
- * NOTE: Port multiplier code is not completed nor tested.
+ * NOTE: Port multiplier is supported.
  */
 static	int
 sata_ioctl_get_model_info(sata_hba_inst_t *sata_hba_inst,
@@ -13355,7 +14790,7 @@
 
 	mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
 	    sata_device->satadev_addr.cport)->cport_mutex);
-	if (sata_device->satadev_addr.qual == SATA_ADDR_CPORT)
+	if (sata_device->satadev_addr.qual == SATA_ADDR_DCPORT)
 		sdinfo = SATA_CPORT_DRV_INFO(sata_hba_inst,
 		    sata_device->satadev_addr.cport);
 	else /* port multiplier */
@@ -13401,7 +14836,7 @@
  * This operation should return to cfgadm the device firmware revision
  * information string
  *
- * NOTE: Port multiplier code is not completed nor tested.
+ * Port multiplier is supported.
  */
 static	int
 sata_ioctl_get_revfirmware_info(sata_hba_inst_t *sata_hba_inst,
@@ -13413,7 +14848,7 @@
 
 	mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
 	    sata_device->satadev_addr.cport)->cport_mutex);
-	if (sata_device->satadev_addr.qual == SATA_ADDR_CPORT)
+	if (sata_device->satadev_addr.qual == SATA_ADDR_DCPORT)
 		sdinfo = SATA_CPORT_DRV_INFO(sata_hba_inst,
 		    sata_device->satadev_addr.cport);
 	else /* port multiplier */
@@ -13458,7 +14893,7 @@
  * Process ioctl get device serial number info request.
  * This operation should return to cfgadm the device serial number string.
  *
- * NOTE: Port multiplier code is not completed nor tested.
+ * NOTE: Port multiplier is supported.
  */
 static	int
 sata_ioctl_get_serialnumber_info(sata_hba_inst_t *sata_hba_inst,
@@ -13470,7 +14905,7 @@
 
 	mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
 	    sata_device->satadev_addr.cport)->cport_mutex);
-	if (sata_device->satadev_addr.qual == SATA_ADDR_CPORT)
+	if (sata_device->satadev_addr.qual == SATA_ADDR_DCPORT)
 		sdinfo = SATA_CPORT_DRV_INFO(sata_hba_inst,
 		    sata_device->satadev_addr.cport);
 	else /* port multiplier */
@@ -14719,6 +16154,7 @@
 {
 	sata_hba_inst_t *sata_hba_inst = NULL;
 	sata_address_t *saddr;
+	sata_pmult_info_t *pmultinfo;
 	sata_drive_info_t *sdinfo;
 	sata_port_stats_t *pstats;
 	sata_cport_info_t *cportinfo;
@@ -14758,10 +16194,6 @@
 	saddr = &sata_device->satadev_addr;
 	if (saddr->cport >= SATA_NUM_CPORTS(sata_hba_inst))
 		return;
-	if (saddr->qual == SATA_ADDR_PMPORT ||
-	    saddr->qual == SATA_ADDR_DPMPORT)
-		/* Port Multiplier not supported yet */
-		return;
 
 	cport = saddr->cport;
 	pmport = saddr->pmport;
@@ -14773,20 +16205,74 @@
 	 * Port has to be initialized, or we cannot accept an event.
 	 */
 	if ((saddr->qual & (SATA_ADDR_CPORT | SATA_ADDR_PMPORT |
-	    SATA_ADDR_DCPORT | SATA_ADDR_DPMPORT)) != 0) {
-		if ((saddr->qual & (SATA_ADDR_CPORT | SATA_ADDR_DCPORT)) != 0) {
-			mutex_enter(&sata_hba_inst->satahba_mutex);
-			cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
-			mutex_exit(&sata_hba_inst->satahba_mutex);
-			if (cportinfo == NULL || cportinfo->cport_state == 0)
-				return;
-		} else {
-			mutex_enter(&sata_hba_inst->satahba_mutex);
-			pmportinfo = SATA_PMPORT_INFO(sata_hba_inst,
-			    cport, pmport);
-			mutex_exit(&sata_hba_inst->satahba_mutex);
-			if (pmportinfo == NULL || pmportinfo->pmport_state == 0)
-				return;
+	    SATA_ADDR_DCPORT | SATA_ADDR_DPMPORT | SATA_ADDR_PMULT)) != 0) {
+		mutex_enter(&sata_hba_inst->satahba_mutex);
+		cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
+		mutex_exit(&sata_hba_inst->satahba_mutex);
+		if (cportinfo == NULL || cportinfo->cport_state == 0)
+			return;
+	}
+
+	if ((saddr->qual & (SATA_ADDR_PMULT | SATA_ADDR_PMPORT |
+	    SATA_ADDR_DPMPORT)) != 0) {
+		if (cportinfo->cport_dev_type != SATA_DTYPE_PMULT) {
+			SATA_LOG_D((sata_hba_inst, CE_WARN,
+			    "sata_hba_event_notify: Non-pmult device (0x%x)"
+			    "is attached to port %d, ignore pmult/pmport "
+			    "event 0x%x", cportinfo->cport_dev_type,
+			    cport, event));
+			return;
+		}
+
+		mutex_enter(&cportinfo->cport_mutex);
+		pmultinfo = SATA_PMULT_INFO(sata_hba_inst, cport);
+		mutex_exit(&cportinfo->cport_mutex);
+
+		/*
+		 * The daemon might be processing attachment of port
+		 * multiplier, in that case we should ignore events on its
+		 * sub-devices.
+		 *
+		 * NOTE: Only pmult_state is checked in sata_hba_event_notify.
+		 * The pmport_state is checked by sata daemon.
+		 */
+		if (pmultinfo == NULL ||
+		    pmultinfo->pmult_state == SATA_STATE_UNKNOWN) {
+			SATA_LOG_D((sata_hba_inst, CE_WARN,
+			    "sata_hba_event_notify: pmult is not"
+			    "available at port %d:%d, ignore event 0x%x",
+			    cport, pmport, event));
+			return;
+		}
+	}
+
+	if ((saddr->qual &
+	    (SATA_ADDR_PMPORT | SATA_ADDR_DPMPORT)) != 0) {
+
+		mutex_enter(&cportinfo->cport_mutex);
+		if (pmport > SATA_NUM_PMPORTS(sata_hba_inst, cport)) {
+			SATA_LOG_D((sata_hba_inst, CE_WARN,
+			    "sata_hba_event_notify: invalid/"
+			    "un-implemented port %d:%d (%d ports), "
+			    "ignore event 0x%x", cport, pmport,
+			    SATA_NUM_PMPORTS(sata_hba_inst, cport), event));
+			mutex_exit(&cportinfo->cport_mutex);
+			return;
+		}
+		mutex_exit(&cportinfo->cport_mutex);
+
+		mutex_enter(&sata_hba_inst->satahba_mutex);
+		pmportinfo = SATA_PMPORT_INFO(sata_hba_inst,
+		    cport, pmport);
+		mutex_exit(&sata_hba_inst->satahba_mutex);
+
+		/* pmport is implemented/valid? */
+		if (pmportinfo == NULL) {
+			SATA_LOG_D((sata_hba_inst, CE_WARN,
+			    "sata_hba_event_notify: invalid/"
+			    "un-implemented port %d:%d, ignore "
+			    "event 0x%x", cport, pmport, event));
+			return;
 		}
 	}
 
@@ -14816,6 +16302,8 @@
 			    &(SATA_CPORT_INFO(sata_hba_inst, cport))->
 			    cport_stats;
 		} else {
+			mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, cport)));
+			mutex_enter(&pmportinfo->pmport_mutex);
 			/* Port multiplier's device port event */
 			(SATA_PMPORT_INFO(sata_hba_inst, cport, pmport))->
 			    pmport_event_flags |=
@@ -14823,6 +16311,8 @@
 			pstats =
 			    &(SATA_PMPORT_INFO(sata_hba_inst, cport, pmport))->
 			    pmport_stats;
+			mutex_exit(&pmportinfo->pmport_mutex);
+			mutex_enter(&(SATA_CPORT_MUTEX(sata_hba_inst, cport)));
 		}
 
 		/*
@@ -14915,6 +16405,42 @@
 			    event & ~SATA_EVNT_DRIVE_EVENTS);
 		}
 		mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, cport)));
+	} else if (saddr->qual == SATA_ADDR_PMULT) {
+		mutex_enter(&(SATA_CPORT_MUTEX(sata_hba_inst, cport)));
+
+		/* qualify this event */
+		if ((event & (SATA_EVNT_DEVICE_RESET |
+		    SATA_EVNT_PMULT_LINK_CHANGED)) == 0) {
+			/* Invalid event for a port multiplier */
+			(void) sprintf(buf2, err_msg_evnt_2,
+			    event & SATA_EVNT_DEVICE_RESET);
+			mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, cport)));
+			goto event_info;
+		}
+
+		pmultinfo = SATA_PMULT_INFO(sata_hba_inst, cport);
+
+		if (event & SATA_EVNT_DEVICE_RESET) {
+
+			SATADBG1(SATA_DBG_PMULT, sata_hba_inst,
+			    "[Reset] port-mult on cport %d", cport);
+			pmultinfo->pmult_event_flags |=
+			    SATA_EVNT_DEVICE_RESET;
+			(void) strlcat(buf1, "pmult reset, ",
+			    SATA_EVENT_MAX_MSG_LENGTH);
+		}
+
+		if (event & SATA_EVNT_PMULT_LINK_CHANGED) {
+
+			SATADBG1(SATA_DBG_PMULT, sata_hba_inst,
+			    "pmult link changed on cport %d", cport);
+			pmultinfo->pmult_event_flags |=
+			    SATA_EVNT_PMULT_LINK_CHANGED;
+			(void) strlcat(buf1, "pmult link changed, ",
+			    SATA_EVENT_MAX_MSG_LENGTH);
+		}
+		mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, cport)));
+
 	} else {
 		if (saddr->qual != SATA_ADDR_NULL) {
 			/* Wrong address qualifier */
@@ -15113,7 +16639,7 @@
  *
  * NOTE: At the moment, device event processing is limited to hard disks
  * only.
- * cports only are supported - no pmports.
+ * Port multiplier is supported now.
  */
 static void
 sata_process_controller_events(sata_hba_inst_t *sata_hba_inst)
@@ -15122,6 +16648,7 @@
 	uint32_t event_flags;
 	sata_address_t *saddr;
 	sata_cport_info_t *cportinfo;
+	sata_pmult_info_t *pmultinfo;
 
 	SATADBG1(SATA_DBG_EVENTS_CNTRL, sata_hba_inst,
 	    "Processing controller %d event(s)",
@@ -15236,7 +16763,38 @@
 				    sata_hba_inst, saddr);
 			}
 		}
+
+
+		/*
+		 * Scan port multiplier and all its sub-ports event flags.
+		 * The events are marked by
+		 * (1) sata_pmult_info.pmult_event_flags
+		 * (2) sata_pmport_info.pmport_event_flags
+		 */
 		mutex_enter(&(SATA_CPORT_MUTEX(sata_hba_inst, ncport)));
+		if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT) {
+			/*
+			 * There should be another extra check: this
+			 * port multiplier still exists?
+			 */
+			pmultinfo = SATA_PMULT_INFO(sata_hba_inst,
+			    ncport);
+
+			if (pmultinfo != NULL) {
+				mutex_exit(&(SATA_CPORT_MUTEX(
+				    sata_hba_inst, ncport)));
+				sata_process_pmult_events(
+				    sata_hba_inst, ncport);
+				mutex_enter(&(SATA_CPORT_MUTEX(
+				    sata_hba_inst, ncport)));
+			} else {
+				SATADBG1(SATA_DBG_PMULT, sata_hba_inst,
+				    "Port-multiplier is gone. "
+				    "Ignore all sub-device events "
+				    "at port %d.", ncport);
+			}
+		}
+
 		if ((SATA_CPORT_DEV_TYPE(sata_hba_inst, ncport) !=
 		    SATA_DTYPE_NONE) &&
 		    (SATA_CPORT_DRV_INFO(sata_hba_inst, ncport) != NULL)) {
@@ -15258,6 +16816,190 @@
 }
 
 /*
+ * Specific port multiplier instance event processing. At the moment, device
+ * event processing is limited to link/attach event only.
+ *
+ * NOTE: power management event is not supported yet.
+ */
+static void
+sata_process_pmult_events(sata_hba_inst_t *sata_hba_inst, uint8_t cport)
+{
+	sata_cport_info_t *cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
+	sata_pmult_info_t *pmultinfo;
+	sata_pmport_info_t *pmportinfo;
+	sata_address_t *saddr;
+	sata_device_t sata_device;
+	uint32_t event_flags;
+	int npmport;
+	int rval;
+
+	SATADBG2(SATA_DBG_EVENTS_CNTRL|SATA_DBG_PMULT, sata_hba_inst,
+	    "Processing pmult event(s) on cport %d of controller %d",
+	    cport, ddi_get_instance(SATA_DIP(sata_hba_inst)));
+
+	/* First process events on port multiplier */
+	mutex_enter(&cportinfo->cport_mutex);
+	pmultinfo = SATA_PMULT_INFO(sata_hba_inst, cport);
+	event_flags = pmultinfo->pmult_event_flags;
+
+	/*
+	 * Reset event (of port multiplier) has higher priority because the
+	 * port multiplier itself might be failed or removed after reset.
+	 */
+	if (event_flags & SATA_EVNT_DEVICE_RESET) {
+		/*
+		 * The status of the sub-links are uncertain,
+		 * so mark all sub-ports as RESET
+		 */
+		for (npmport = 0; npmport < SATA_NUM_PMPORTS(
+		    sata_hba_inst, cport); npmport ++) {
+			pmportinfo = SATA_PMPORT_INFO(sata_hba_inst,
+			    cport, npmport);
+			if (pmportinfo == NULL) {
+				/* That's weird. */
+				SATA_LOG_D((sata_hba_inst, CE_WARN,
+				    "sata_hba_event_notify: "
+				    "invalid/un-implemented "
+				    "port %d:%d (%d ports), ",
+				    cport, npmport, SATA_NUM_PMPORTS(
+				    sata_hba_inst, cport)));
+				continue;
+			}
+
+			mutex_enter(&pmportinfo->pmport_mutex);
+
+			/* Mark all pmport to unknow state. */
+			pmportinfo->pmport_state = SATA_STATE_UNKNOWN;
+			/* Mark all pmports with link events. */
+			pmportinfo->pmport_event_flags =
+			    (SATA_EVNT_LINK_ESTABLISHED|SATA_EVNT_LINK_LOST);
+			mutex_exit(&pmportinfo->pmport_mutex);
+		}
+
+	} else if (event_flags & SATA_EVNT_PMULT_LINK_CHANGED) {
+		/*
+		 * We need probe the port multiplier to know what has
+		 * happened.
+		 */
+		bzero(&sata_device, sizeof (sata_device_t));
+		sata_device.satadev_rev = SATA_DEVICE_REV;
+		sata_device.satadev_addr.cport = cport;
+		sata_device.satadev_addr.pmport = SATA_PMULT_HOSTPORT;
+		sata_device.satadev_addr.qual = SATA_ADDR_PMULT;
+
+		mutex_exit(&cportinfo->cport_mutex);
+		rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst))
+		    (SATA_DIP(sata_hba_inst), &sata_device);
+		mutex_enter(&cportinfo->cport_mutex);
+		if (rval != SATA_SUCCESS) {
+			/* Something went wrong? Fail the port */
+			cportinfo->cport_state = SATA_PSTATE_FAILED;
+			mutex_exit(&cportinfo->cport_mutex);
+			SATA_LOG_D((sata_hba_inst, CE_WARN,
+			    "SATA port %d probing failed", cport));
+
+			/* PMult structure must be released.  */
+			sata_free_pmult(sata_hba_inst, &sata_device);
+			return;
+		}
+
+		sata_update_port_info(sata_hba_inst, &sata_device);
+
+		/*
+		 * Sanity check - Port is active? Is the link active?
+		 * The device is still a port multiplier?
+		 */
+		if ((cportinfo->cport_state &
+		    (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) ||
+		    ((cportinfo->cport_scr.sstatus &
+		    SATA_PORT_DEVLINK_UP_MASK) != SATA_PORT_DEVLINK_UP) ||
+		    (cportinfo->cport_dev_type != SATA_DTYPE_PMULT)) {
+			mutex_exit(&cportinfo->cport_mutex);
+
+			/* PMult structure must be released.  */
+			sata_free_pmult(sata_hba_inst, &sata_device);
+			return;
+		}
+
+		/* Probed succeed, set port ready. */
+		cportinfo->cport_state |=
+		    SATA_STATE_PROBED | SATA_STATE_READY;
+	}
+
+	/* Release port multiplier event flags. */
+	pmultinfo->pmult_event_flags &=
+	    ~(SATA_EVNT_DEVICE_RESET|SATA_EVNT_PMULT_LINK_CHANGED);
+	mutex_exit(&cportinfo->cport_mutex);
+
+	/*
+	 * Check all sub-links.
+	 */
+	for (npmport = 0; npmport < SATA_NUM_PMPORTS(sata_hba_inst, cport);
+	    npmport ++) {
+		pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, npmport);
+		mutex_enter(&pmportinfo->pmport_mutex);
+		event_flags = pmportinfo->pmport_event_flags;
+		mutex_exit(&pmportinfo->pmport_mutex);
+		saddr = &pmportinfo->pmport_addr;
+
+		if ((event_flags &
+		    (SATA_EVNT_PORT_EVENTS | SATA_EVNT_DRIVE_EVENTS)) != 0) {
+			/*
+			 * Got port multiplier port event.
+			 * We need some hierarchy of event processing as they
+			 * are affecting each other:
+			 * 1. device detached/attached
+			 * 2. link events - link events may trigger device
+			 *    detached or device attached events in some
+			 *    circumstances.
+			 */
+			if (event_flags & SATA_EVNT_DEVICE_DETACHED) {
+				sata_process_pmdevice_detached(sata_hba_inst,
+				    saddr);
+			}
+			if (event_flags & SATA_EVNT_DEVICE_ATTACHED) {
+				sata_process_pmdevice_attached(sata_hba_inst,
+				    saddr);
+			}
+			if (event_flags & SATA_EVNT_LINK_ESTABLISHED ||
+			    event_flags & SATA_EVNT_LINK_LOST) {
+				sata_process_pmport_link_events(sata_hba_inst,
+				    saddr);
+			}
+			if (event_flags & SATA_EVNT_TARGET_NODE_CLEANUP) {
+				sata_process_target_node_cleanup(
+				    sata_hba_inst, saddr);
+			}
+		}
+
+		/* Checking drive event(s). */
+		mutex_enter(&pmportinfo->pmport_mutex);
+		if (pmportinfo->pmport_dev_type != SATA_DTYPE_NONE &&
+		    pmportinfo->pmport_sata_drive != NULL) {
+			event_flags = pmportinfo->pmport_sata_drive->
+			    satadrv_event_flags;
+			if (event_flags & (SATA_EVNT_DEVICE_RESET |
+			    SATA_EVNT_INPROC_DEVICE_RESET)) {
+
+				/* Have device event */
+				sata_process_pmdevice_reset(sata_hba_inst,
+				    saddr);
+			}
+		}
+		mutex_exit(&pmportinfo->pmport_mutex);
+
+		/* Release PORT_BUSY flag */
+		mutex_enter(&cportinfo->cport_mutex);
+		cportinfo->cport_event_flags &= ~SATA_EVNT_LOCK_PORT_BUSY;
+		mutex_exit(&cportinfo->cport_mutex);
+	}
+
+	SATADBG2(SATA_DBG_EVENTS_CNTRL|SATA_DBG_PMULT, sata_hba_inst,
+	    "[DONE] pmult event(s) on cport %d of controller %d",
+	    cport, ddi_get_instance(SATA_DIP(sata_hba_inst)));
+}
+
+/*
  * Process HBA power level change reported by HBA driver.
  * Not implemented at this time - event is ignored.
  */
@@ -15355,6 +17097,16 @@
 		return;
 	}
 
+	if ((SATA_CPORT_DEV_TYPE(sata_hba_inst, saddr->cport) ==
+	    SATA_DTYPE_PMULT)) {
+		/*
+		 * Should not happened: this is already handled in
+		 * sata_hba_event_notify()
+		 */
+		mutex_exit(&cportinfo->cport_mutex);
+		goto done;
+	}
+
 	if ((SATA_CPORT_DEV_TYPE(sata_hba_inst, saddr->cport) &
 	    SATA_VALID_DEV_TYPE) == 0) {
 		/*
@@ -15570,6 +17322,243 @@
 
 
 /*
+ * Port Multiplier Port Device Reset Event processing.
+ *
+ * NOTE: This function has to be entered with pmport mutex held. It exits with
+ * mutex held as well, but can release mutex during the processing.
+ */
+static void
+sata_process_pmdevice_reset(sata_hba_inst_t *sata_hba_inst,
+    sata_address_t *saddr)
+{
+	sata_drive_info_t old_sdinfo; /* local copy of the drive info */
+	sata_drive_info_t *sdinfo = NULL;
+	sata_cport_info_t *cportinfo = NULL;
+	sata_pmport_info_t *pmportinfo = NULL;
+	sata_pmult_info_t *pminfo = NULL;
+	sata_device_t sata_device;
+	uint8_t cport = saddr->cport;
+	uint8_t pmport = saddr->pmport;
+	int rval;
+
+	SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+	    "Processing drive reset at port %d:%d", cport, pmport);
+
+	cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport);
+	pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport);
+	sdinfo = SATA_PMPORT_DRV_INFO(sata_hba_inst, cport, pmport);
+
+	/*
+	 * If the port is in SHUTDOWN or FAILED state, or device is in FAILED
+	 * state, ignore reset event.
+	 */
+	if (((cportinfo->cport_state &
+	    (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) != 0) ||
+	    (sdinfo->satadrv_state & SATA_DSTATE_FAILED) != 0) {
+		sdinfo->satadrv_event_flags &=
+		    ~(SATA_EVNT_DEVICE_RESET | SATA_EVNT_INPROC_DEVICE_RESET);
+		return;
+	}
+
+	if ((pmportinfo->pmport_dev_type & SATA_VALID_DEV_TYPE) == 0) {
+		/*
+		 * This should not happen - coding error.
+		 * But we can recover, so do not panic, just clean up
+		 * and if in debug mode, log the message.
+		 */
+#ifdef SATA_DEBUG
+		sata_log(sata_hba_inst, CE_WARN,
+		    "sata_process_pmdevice_reset: "
+		    "Invalid device type with sdinfo!", NULL);
+#endif
+		sdinfo->satadrv_event_flags = 0;
+		return;
+	}
+
+#ifdef SATA_DEBUG
+	if ((sdinfo->satadrv_event_flags &
+	    (SATA_EVNT_DEVICE_RESET | SATA_EVNT_INPROC_DEVICE_RESET)) == 0) {
+		/* Nothing to do */
+		/* Something is weird - why we are processing dev reset? */
+		SATADBG1(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+		    "No device reset event!!!!", NULL);
+
+		return;
+	}
+	if ((sdinfo->satadrv_event_flags &
+	    (SATA_EVNT_DEVICE_RESET | SATA_EVNT_INPROC_DEVICE_RESET)) ==
+	    (SATA_EVNT_DEVICE_RESET | SATA_EVNT_INPROC_DEVICE_RESET)) {
+		/* Something is weird - new device reset event */
+		SATADBG1(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+		    "Overlapping device reset events!", NULL);
+	}
+#endif
+	SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+	    "Processing port %d:%d device reset", cport, pmport);
+
+	/* Clear event flag */
+	sdinfo->satadrv_event_flags &= ~SATA_EVNT_DEVICE_RESET;
+
+	/* It seems that we always need to check the port state first */
+	sata_device.satadev_rev = SATA_DEVICE_REV;
+	sata_device.satadev_addr = *saddr;
+	/*
+	 * We have to exit mutex, because the HBA probe port function may
+	 * block on its own mutex.
+	 */
+	mutex_exit(&pmportinfo->pmport_mutex);
+	rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst))
+	    (SATA_DIP(sata_hba_inst), &sata_device);
+	mutex_enter(&pmportinfo->pmport_mutex);
+
+	sata_update_pmport_info(sata_hba_inst, &sata_device);
+	if (rval != SATA_SUCCESS) {
+		/* Something went wrong? Fail the port */
+		pmportinfo->pmport_state = SATA_PSTATE_FAILED;
+		sdinfo = SATA_PMPORT_DRV_INFO(sata_hba_inst, saddr->cport,
+		    saddr->pmport);
+		if (sdinfo != NULL)
+			sdinfo->satadrv_event_flags = 0;
+		mutex_exit(&pmportinfo->pmport_mutex);
+		SATA_LOG_D((sata_hba_inst, CE_WARN,
+		    "SATA port %d:%d probing failed",
+		    saddr->cport, saddr->pmport));
+		mutex_enter(&pmportinfo->pmport_mutex);
+		return;
+	}
+	if ((sata_device.satadev_scr.sstatus  &
+	    SATA_PORT_DEVLINK_UP_MASK) !=
+	    SATA_PORT_DEVLINK_UP ||
+	    sata_device.satadev_type == SATA_DTYPE_NONE) {
+		/*
+		 * No device to process, anymore. Some other event processing
+		 * would or have already performed port info cleanup.
+		 * To be safe (HBA may need it), request clearing device
+		 * reset condition.
+		 */
+		sdinfo = SATA_PMPORT_DRV_INFO(sata_hba_inst, saddr->cport,
+		    saddr->pmport);
+		if (sdinfo != NULL) {
+			sdinfo->satadrv_event_flags &=
+			    ~SATA_EVNT_INPROC_DEVICE_RESET;
+			/* must clear flags on cport */
+			pminfo = SATA_PMULT_INFO(sata_hba_inst,
+			    saddr->cport);
+			pminfo->pmult_event_flags |=
+			    SATA_EVNT_CLEAR_DEVICE_RESET;
+		}
+		return;
+	}
+
+	sdinfo = SATA_PMPORT_DRV_INFO(sata_hba_inst, saddr->cport,
+	    saddr->pmport);
+	if (sdinfo == NULL) {
+		return;
+	}
+	if ((sdinfo->satadrv_event_flags &
+	    SATA_EVNT_INPROC_DEVICE_RESET) == 0) {
+		/*
+		 * Start tracking time for device feature restoration and
+		 * identification. Save current time (lbolt value).
+		 */
+		sdinfo->satadrv_reset_time = ddi_get_lbolt();
+	}
+	/* Mark device reset processing as active */
+	sdinfo->satadrv_event_flags |= SATA_EVNT_INPROC_DEVICE_RESET;
+
+	old_sdinfo = *sdinfo;	/* local copy of the drive info */
+	mutex_exit(&pmportinfo->pmport_mutex);
+
+	if (sata_set_drive_features(sata_hba_inst, &old_sdinfo, 1) ==
+	    SATA_FAILURE) {
+		/*
+		 * Restoring drive setting failed.
+		 * Probe the port first, to check if the port state has changed
+		 */
+		sata_device.satadev_rev = SATA_DEVICE_REV;
+		sata_device.satadev_addr = *saddr;
+		sata_device.satadev_addr.qual = SATA_ADDR_PMPORT;
+
+		/* probe port */
+		rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst))
+		    (SATA_DIP(sata_hba_inst), &sata_device);
+		mutex_enter(&pmportinfo->pmport_mutex);
+		if (rval == SATA_SUCCESS &&
+		    (sata_device.satadev_state &
+		    (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) == 0 &&
+		    (sata_device.satadev_scr.sstatus  &
+		    SATA_PORT_DEVLINK_UP_MASK) == SATA_PORT_DEVLINK_UP &&
+		    sata_device.satadev_type != SATA_DTYPE_NONE) {
+			/*
+			 * We may retry this a bit later - in-process reset
+			 * condition should be already set.
+			 * Track retry time for device identification.
+			 */
+			if ((pmportinfo->pmport_dev_type &
+			    SATA_VALID_DEV_TYPE) != 0 &&
+			    SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL &&
+			    sdinfo->satadrv_reset_time != 0) {
+				clock_t cur_time = ddi_get_lbolt();
+				/*
+				 * If the retry time limit was not
+				 * exceeded, retry.
+				 */
+				if ((cur_time - sdinfo->satadrv_reset_time) <
+				    drv_usectohz(SATA_DEV_REPROBE_TIMEOUT)) {
+					mutex_enter(
+					    &sata_hba_inst->satahba_mutex);
+					sata_hba_inst->satahba_event_flags |=
+					    SATA_EVNT_MAIN;
+					mutex_exit(
+					    &sata_hba_inst->satahba_mutex);
+					mutex_enter(&sata_mutex);
+					sata_event_pending |= SATA_EVNT_MAIN;
+					mutex_exit(&sata_mutex);
+					return;
+				}
+			}
+			/* Fail the drive */
+			sdinfo->satadrv_state = SATA_DSTATE_FAILED;
+
+			sata_log(sata_hba_inst, CE_WARN,
+			    "SATA device at port %d:%d - device failed",
+			    saddr->cport, saddr->pmport);
+		} else {
+			/*
+			 * No point of retrying - some other event processing
+			 * would or already did port info cleanup.
+			 * To be safe (HBA may need it),
+			 * request clearing device reset condition.
+			 */
+			sdinfo->satadrv_event_flags |=
+			    SATA_EVNT_CLEAR_DEVICE_RESET;
+		}
+		sdinfo->satadrv_event_flags &= ~SATA_EVNT_INPROC_DEVICE_RESET;
+		sdinfo->satadrv_reset_time = 0;
+		return;
+	}
+	/*
+	 * Raise the flag indicating that the next sata command could
+	 * be sent with SATA_CLEAR_DEV_RESET_STATE flag, if no new device
+	 * reset is reported.
+	 */
+	mutex_enter(&pmportinfo->pmport_mutex);
+	if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL) {
+		sdinfo->satadrv_reset_time = 0;
+		if (pmportinfo->pmport_dev_type & SATA_VALID_DEV_TYPE) {
+			sdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo);
+			sdinfo->satadrv_event_flags &=
+			    ~SATA_EVNT_INPROC_DEVICE_RESET;
+			/* must clear flags on cport */
+			pminfo = SATA_PMULT_INFO(sata_hba_inst,
+			    saddr->cport);
+			pminfo->pmult_event_flags |=
+			    SATA_EVNT_CLEAR_DEVICE_RESET;
+		}
+	}
+}
+
+/*
  * Port Link Events processing.
  * Every link established event may involve device reset (due to
  * COMRESET signal, equivalent of the hard reset) so arbitrarily
@@ -15590,7 +17579,8 @@
  * device detached event processing after link lost timeout.
  * Else, the event is ignored.
  *
- * NOTE: Only cports are processed for now, i.e. no port multiplier ports
+ * NOTE: Port multiplier ports events are handled by
+ * sata_process_pmport_link_events();
  */
 static void
 sata_process_port_link_events(sata_hba_inst_t *sata_hba_inst,
@@ -15674,8 +17664,7 @@
 		 * For the sanity sake check if a device is attached - check
 		 * return state of a port probing.
 		 */
-		if (sata_device.satadev_type != SATA_DTYPE_NONE &&
-		    sata_device.satadev_type != SATA_DTYPE_PMULT) {
+		if (sata_device.satadev_type != SATA_DTYPE_NONE) {
 			/*
 			 * HBA port probe indicated that there is a device
 			 * attached. Check if the framework had device info
@@ -15795,22 +17784,242 @@
 }
 
 /*
+ * Port Multiplier Port Link Events processing.
+ */
+static void
+sata_process_pmport_link_events(sata_hba_inst_t *sata_hba_inst,
+    sata_address_t *saddr)
+{
+	sata_device_t sata_device;
+	sata_pmport_info_t *pmportinfo = NULL;
+	sata_drive_info_t *sdinfo = NULL;
+	uint32_t event_flags;
+	uint8_t cport = saddr->cport;
+	uint8_t pmport = saddr->pmport;
+	int rval;
+
+	SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+	    "Processing port %d:%d link event(s)",
+	    cport, pmport);
+
+	pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport);
+	mutex_enter(&pmportinfo->pmport_mutex);
+	event_flags = pmportinfo->pmport_event_flags;
+
+	/* Reset event flags first */
+	pmportinfo->pmport_event_flags &=
+	    ~(SATA_EVNT_LINK_ESTABLISHED | SATA_EVNT_LINK_LOST);
+
+	/* If the port is in SHUTDOWN or FAILED state, ignore link events. */
+	if ((pmportinfo->pmport_state &
+	    (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) != 0) {
+		mutex_exit(&pmportinfo->pmport_mutex);
+		return;
+	}
+
+	/*
+	 * For the sanity sake get current port state.
+	 * Set device address only. Other sata_device fields should be
+	 * set by HBA driver.
+	 */
+	sata_device.satadev_rev = SATA_DEVICE_REV;
+	sata_device.satadev_addr = *saddr;
+	/*
+	 * We have to exit mutex, because the HBA probe port function may
+	 * block on its own mutex.
+	 */
+	mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, saddr->cport,
+	    saddr->pmport));
+	rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst))
+	    (SATA_DIP(sata_hba_inst), &sata_device);
+	mutex_enter(&SATA_PMPORT_MUTEX(sata_hba_inst, saddr->cport,
+	    saddr->pmport));
+	sata_update_pmport_info(sata_hba_inst, &sata_device);
+	if (rval != SATA_SUCCESS) {
+		/* Something went wrong? Fail the port */
+		pmportinfo->pmport_state = SATA_PSTATE_FAILED;
+		mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, saddr->cport,
+		    saddr->pmport));
+		SATA_LOG_D((sata_hba_inst, CE_WARN,
+		    "SATA port %d:%d probing failed",
+		    saddr->cport, saddr->pmport));
+		/*
+		 * We may want to release device info structure, but
+		 * it is not necessary.
+		 */
+		return;
+	} else {
+		/* port probed successfully */
+		pmportinfo->pmport_state |=
+		    SATA_STATE_PROBED | SATA_STATE_READY;
+	}
+	mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst,
+	    saddr->cport, saddr->pmport));
+	mutex_enter(&SATA_PMPORT_MUTEX(sata_hba_inst,
+	    saddr->cport, saddr->pmport));
+	if (event_flags & SATA_EVNT_LINK_ESTABLISHED) {
+
+		if ((sata_device.satadev_scr.sstatus &
+		    SATA_PORT_DEVLINK_UP_MASK) != SATA_PORT_DEVLINK_UP) {
+			/* Ignore event */
+			SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+			    "Ignoring port %d:%d link established event - "
+			    "link down",
+			    saddr->cport, saddr->pmport);
+			goto linklost;
+		}
+
+		SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+		    "Processing port %d:%d link established event",
+		    cport, pmport);
+
+		/*
+		 * For the sanity sake check if a device is attached - check
+		 * return state of a port probing.
+		 */
+		if (sata_device.satadev_type != SATA_DTYPE_NONE &&
+		    sata_device.satadev_type != SATA_DTYPE_PMULT) {
+			/*
+			 * HBA port probe indicated that there is a device
+			 * attached. Check if the framework had device info
+			 * structure attached for this device.
+			 */
+			if (pmportinfo->pmport_dev_type != SATA_DTYPE_NONE) {
+				ASSERT(SATA_PMPORTINFO_DRV_INFO(pmportinfo) !=
+				    NULL);
+
+				sdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo);
+				if ((sdinfo->satadrv_type &
+				    SATA_VALID_DEV_TYPE) != 0) {
+					/*
+					 * Dev info structure is present.
+					 * If dev_type is set to known type in
+					 * the framework's drive info struct
+					 * then the device existed before and
+					 * the link was probably lost
+					 * momentarily - in such case
+					 * we may want to check device
+					 * identity.
+					 * Identity check is not supported now.
+					 *
+					 * Link established event
+					 * triggers device reset event.
+					 */
+					(SATA_PMPORTINFO_DRV_INFO(pmportinfo))->
+					    satadrv_event_flags |=
+					    SATA_EVNT_DEVICE_RESET;
+				}
+			} else if (pmportinfo->pmport_dev_type ==
+			    SATA_DTYPE_NONE) {
+				/*
+				 * We got new device attached! If HBA does not
+				 * generate device attached events, trigger it
+				 * here.
+				 */
+				if (!(SATA_FEATURES(sata_hba_inst) &
+				    SATA_CTLF_HOTPLUG)) {
+					pmportinfo->pmport_event_flags |=
+					    SATA_EVNT_DEVICE_ATTACHED;
+				}
+			}
+			/* Reset link lost timeout */
+			pmportinfo->pmport_link_lost_time = 0;
+		}
+	}
+linklost:
+	if (event_flags & SATA_EVNT_LINK_LOST) {
+#ifdef SATA_DEBUG
+		if (pmportinfo->pmport_link_lost_time == 0) {
+			SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+			    "Processing port %d:%d link lost event",
+			    saddr->cport, saddr->pmport);
+		}
+#endif
+		if ((sata_device.satadev_scr.sstatus &
+		    SATA_PORT_DEVLINK_UP_MASK) == SATA_PORT_DEVLINK_UP) {
+			/* Ignore event */
+			SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+			    "Ignoring port %d:%d link lost event - link is up",
+			    saddr->cport, saddr->pmport);
+			goto done;
+		}
+		/*
+		 * When HBA cannot generate device attached/detached events,
+		 * we need to track link lost time and eventually generate
+		 * device detach event.
+		 */
+		if (!(SATA_FEATURES(sata_hba_inst) & SATA_CTLF_HOTPLUG)) {
+			/* We are tracking link lost time */
+			if (pmportinfo->pmport_link_lost_time == 0) {
+				/* save current time (lbolt value) */
+				pmportinfo->pmport_link_lost_time =
+				    ddi_get_lbolt();
+				/* just keep link lost event */
+				pmportinfo->pmport_event_flags |=
+				    SATA_EVNT_LINK_LOST;
+			} else {
+				clock_t cur_time = ddi_get_lbolt();
+				if ((cur_time -
+				    pmportinfo->pmport_link_lost_time) >=
+				    drv_usectohz(
+				    SATA_EVNT_LINK_LOST_TIMEOUT)) {
+					/* trigger device detach event */
+					pmportinfo->pmport_event_flags |=
+					    SATA_EVNT_DEVICE_DETACHED;
+					pmportinfo->pmport_link_lost_time = 0;
+					SATADBG2(SATA_DBG_EVENTS,
+					    sata_hba_inst,
+					    "Triggering port %d:%d "
+					    "device detached event",
+					    saddr->cport, saddr->pmport);
+				} else {
+					/* keep link lost event */
+					pmportinfo->pmport_event_flags |=
+					    SATA_EVNT_LINK_LOST;
+				}
+			}
+		}
+		/*
+		 * We could change port state to disable/delay access to
+		 * the attached device until the link is recovered.
+		 */
+	}
+done:
+	event_flags = pmportinfo->pmport_event_flags;
+	mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, saddr->cport,
+	    saddr->pmport));
+	if (event_flags != 0) {
+		mutex_enter(&sata_hba_inst->satahba_mutex);
+		sata_hba_inst->satahba_event_flags |= SATA_EVNT_MAIN;
+		mutex_exit(&sata_hba_inst->satahba_mutex);
+		mutex_enter(&sata_mutex);
+		sata_event_pending |= SATA_EVNT_MAIN;
+		mutex_exit(&sata_mutex);
+	}
+}
+
+/*
  * Device Detached Event processing.
  * Port is probed to find if a device is really gone. If so,
  * the device info structure is detached from the SATA port info structure
  * and released.
  * Port status is updated.
  *
- * NOTE: Process cports event only, no port multiplier ports.
+ * NOTE: Port multiplier ports events are handled by
+ * sata_process_pmdevice_detached()
  */
 static void
 sata_process_device_detached(sata_hba_inst_t *sata_hba_inst,
     sata_address_t *saddr)
 {
 	sata_cport_info_t *cportinfo;
+	sata_pmport_info_t *pmportinfo;
 	sata_drive_info_t *sdevinfo;
 	sata_device_t sata_device;
-	dev_info_t *tdip;
+	sata_address_t pmport_addr;
+	char name[16];
+	uint8_t cport = saddr->cport;
+	int npmport;
 	int rval;
 
 	SATADBG1(SATA_DBG_EVENTS_PROC, sata_hba_inst,
@@ -15878,58 +18087,205 @@
 	/*
 	 * We need to detach and release device info structure here
 	 */
-	if (SATA_CPORTINFO_DRV_INFO(cportinfo) != NULL) {
-		sdevinfo = SATA_CPORTINFO_DRV_INFO(cportinfo);
-		SATA_CPORTINFO_DRV_INFO(cportinfo) = NULL;
+	if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT) {
+		/*
+		 * A port-multiplier is removed.
+		 *
+		 * Calling sata_process_pmdevice_detached() does not work
+		 * here. The port multiplier is gone, so we cannot probe
+		 * sub-port any more and all pmult-related data structure must
+		 * be de-allocated immediately. Following structure of every
+		 * implemented sub-port behind the pmult are required to
+		 * released.
+		 *
+		 *   - attachment point
+		 *   - target node
+		 *   - sata_drive_info
+		 *   - sata_pmport_info
+		 */
+		for (npmport = 0; npmport < SATA_NUM_PMPORTS(sata_hba_inst,
+		    cport); npmport ++) {
+			SATADBG2(SATA_DBG_PMULT|SATA_DBG_EVENTS_PROC,
+			    sata_hba_inst,
+			    "Detaching target node at port %d:%d",
+			    cport, npmport);
+
+			mutex_exit(&SATA_CPORT_MUTEX(sata_hba_inst, cport));
+
+			/* Remove attachment point. */
+			name[0] = '\0';
+			(void) sprintf(name, "%d.%d", cport, npmport);
+			ddi_remove_minor_node(SATA_DIP(sata_hba_inst), name);
+			sata_log(sata_hba_inst, CE_NOTE,
+			    "Remove attachment point of port %d:%d",
+			    cport, npmport);
+
+			/* Remove target node */
+			pmport_addr.cport = cport;
+			pmport_addr.pmport = (uint8_t)npmport;
+			pmport_addr.qual = SATA_ADDR_PMPORT;
+			sata_remove_target_node(sata_hba_inst, &pmport_addr);
+
+			mutex_enter(&SATA_CPORT_MUTEX(sata_hba_inst, cport));
+
+			/* Release sata_pmport_info & sata_drive_info. */
+			pmportinfo = SATA_PMPORT_INFO(sata_hba_inst,
+			    cport, npmport);
+			ASSERT(pmportinfo != NULL);
+
+			sdevinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo);
+			if (sdevinfo != NULL) {
+				(void) kmem_free((void *) sdevinfo,
+				    sizeof (sata_drive_info_t));
+			}
+
+			/* Release sata_pmport_info at last */
+			(void) kmem_free((void *) pmportinfo,
+			    sizeof (sata_pmport_info_t));
+		}
+
+		/* Finally, release sata_pmult_info */
+		(void) kmem_free((void *)
+		    SATA_CPORTINFO_PMULT_INFO(cportinfo),
+		    sizeof (sata_pmult_info_t));
+		SATA_CPORTINFO_PMULT_INFO(cportinfo) = NULL;
+
+		sata_log(sata_hba_inst, CE_WARN,
+		    "SATA port-multiplier detached at port %d", cport);
+
+		cportinfo->cport_dev_type = SATA_DTYPE_NONE;
+		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
+		    saddr->cport)->cport_mutex);
+	} else {
+		if (SATA_CPORTINFO_DRV_INFO(cportinfo) != NULL) {
+			sdevinfo = SATA_CPORTINFO_DRV_INFO(cportinfo);
+			SATA_CPORTINFO_DRV_INFO(cportinfo) = NULL;
+			(void) kmem_free((void *)sdevinfo,
+			    sizeof (sata_drive_info_t));
+		}
+		sata_log(sata_hba_inst, CE_WARN,
+		    "SATA device detached at port %d", cport);
+
+		cportinfo->cport_dev_type = SATA_DTYPE_NONE;
+		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
+		    saddr->cport)->cport_mutex);
+
+		/*
+		 * Try to offline a device and remove target node
+		 * if it still exists
+		 */
+		sata_remove_target_node(sata_hba_inst, saddr);
+	}
+
+
+	/*
+	 * Generate sysevent - EC_DR / ESC_DR_AP_STATE_CHANGE
+	 * with the hint: SE_HINT_REMOVE
+	 */
+	sata_gen_sysevent(sata_hba_inst, saddr, SE_HINT_REMOVE);
+}
+
+/*
+ * Port Multiplier Port Device Deattached Event processing.
+ *
+ * NOTE: No Mutex should be hold.
+ */
+static void
+sata_process_pmdevice_detached(sata_hba_inst_t *sata_hba_inst,
+    sata_address_t *saddr)
+{
+	sata_pmport_info_t *pmportinfo;
+	sata_drive_info_t *sdevinfo;
+	sata_device_t sata_device;
+	int rval;
+	uint8_t cport, pmport;
+
+	cport = saddr->cport;
+	pmport = saddr->pmport;
+
+	SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+	    "Processing port %d:%d device detached",
+	    cport, pmport);
+
+	pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport);
+	mutex_enter(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport));
+
+	/* Clear event flag */
+	pmportinfo->pmport_event_flags &= ~SATA_EVNT_DEVICE_DETACHED;
+
+	/* If the port is in SHUTDOWN or FAILED state, ignore detach event. */
+	if ((pmportinfo->pmport_state &
+	    (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) != 0) {
+		mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport));
+		return;
+	}
+	/* For sanity, re-probe the port */
+	sata_device.satadev_rev = SATA_DEVICE_REV;
+	sata_device.satadev_addr = *saddr;
+
+	/*
+	 * We have to exit mutex, because the HBA probe port function may
+	 * block on its own mutex.
+	 */
+	mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport));
+	rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst))
+	    (SATA_DIP(sata_hba_inst), &sata_device);
+	mutex_enter(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport));
+	sata_update_pmport_info(sata_hba_inst, &sata_device);
+	if (rval != SATA_SUCCESS) {
+		/* Something went wrong? Fail the port */
+		pmportinfo->pmport_state = SATA_PSTATE_FAILED;
+		mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport));
+		SATA_LOG_D((sata_hba_inst, CE_WARN,
+		    "SATA port %d:%d probing failed",
+		    saddr->pmport));
+		/*
+		 * We may want to release device info structure, but
+		 * it is not necessary.
+		 */
+		return;
+	} else {
+		/* port probed successfully */
+		pmportinfo->pmport_state |=
+		    SATA_STATE_PROBED | SATA_STATE_READY;
+	}
+	/*
+	 * Check if a device is still attached. For sanity, check also
+	 * link status - if no link, there is no device.
+	 */
+	if ((sata_device.satadev_scr.sstatus & SATA_PORT_DEVLINK_UP_MASK) ==
+	    SATA_PORT_DEVLINK_UP && sata_device.satadev_type !=
+	    SATA_DTYPE_NONE) {
+		/*
+		 * Device is still attached - ignore detach event.
+		 */
+		mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport));
+		SATADBG1(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+		    "Ignoring detach - device still attached to port %d",
+		    sata_device.satadev_addr.pmport);
+		return;
+	}
+	/*
+	 * We need to detach and release device info structure here
+	 */
+	if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL) {
+		sdevinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo);
+		SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL;
 		(void) kmem_free((void *)sdevinfo,
 		    sizeof (sata_drive_info_t));
 	}
-	cportinfo->cport_dev_type = SATA_DTYPE_NONE;
+	pmportinfo->pmport_dev_type = SATA_DTYPE_NONE;
 	/*
 	 * Device cannot be reached anymore, even if the target node may be
 	 * still present.
 	 */
-
-	mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, saddr->cport)->cport_mutex);
-	sata_log(sata_hba_inst, CE_WARN, "SATA device detached at port %d",
-	    sata_device.satadev_addr.cport);
+	mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport));
 
 	/*
 	 * Try to offline a device and remove target node if it still exists
 	 */
-	tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), saddr->cport);
-	if (tdip != NULL) {
-		/*
-		 * Target node exists.  Unconfigure device then remove
-		 * the target node (one ndi operation).
-		 */
-		if (ndi_devi_offline(tdip, NDI_DEVI_REMOVE) != NDI_SUCCESS) {
-			/*
-			 * PROBLEM - no device, but target node remained
-			 * This happens when the file was open or node was
-			 * waiting for resources.
-			 */
-			SATA_LOG_D((sata_hba_inst, CE_WARN,
-			    "sata_process_device_detached: "
-			    "Failed to remove target node for "
-			    "detached SATA device."));
-			/*
-			 * Set target node state to DEVI_DEVICE_REMOVED.
-			 * But re-check first that the node still exists.
-			 */
-			tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst),
-			    saddr->cport);
-			if (tdip != NULL) {
-				sata_set_device_removed(tdip);
-				/*
-				 * Instruct event daemon to retry the
-				 * cleanup later.
-				 */
-				sata_set_target_node_cleanup(sata_hba_inst,
-				    &sata_device.satadev_addr);
-			}
-		}
-	}
+	sata_remove_target_node(sata_hba_inst, saddr);
+
 	/*
 	 * Generate sysevent - EC_DR / ESC_DR_AP_STATE_CHANGE
 	 * with the hint: SE_HINT_REMOVE
@@ -15951,18 +18307,22 @@
  *
  * This function cannot be called in interrupt context (it may sleep).
  *
- * NOTE: Process cports event only, no port multiplier ports.
+ * NOTE: Port multiplier ports events are handled by
+ * sata_process_pmdevice_attached()
  */
 static void
 sata_process_device_attached(sata_hba_inst_t *sata_hba_inst,
     sata_address_t *saddr)
 {
-	sata_cport_info_t *cportinfo;
-	sata_drive_info_t *sdevinfo;
+	sata_cport_info_t *cportinfo = NULL;
+	sata_drive_info_t *sdevinfo = NULL;
+	sata_pmult_info_t *pmultinfo = NULL;
+	sata_pmport_info_t *pmportinfo = NULL;
 	sata_device_t sata_device;
 	dev_info_t *tdip;
-	uint32_t event_flags;
+	uint32_t event_flags = 0, pmult_event_flags = 0;
 	int rval;
+	int npmport;
 
 	SATADBG1(SATA_DBG_EVENTS_PROC, sata_hba_inst,
 	    "Processing port %d device attached", saddr->cport);
@@ -16103,6 +18463,40 @@
 				cportinfo->cport_event_flags |=
 				    SATA_EVNT_DEVICE_ATTACHED;
 			}
+		} else if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT) {
+			cportinfo->cport_dev_attach_time = 0;
+			sata_log(sata_hba_inst, CE_NOTE,
+			    "SATA port-multiplier detected at port %d",
+			    saddr->cport);
+
+			if (SATA_CPORTINFO_PMULT_INFO(cportinfo) != NULL) {
+				/* Log the info of new port multiplier */
+				sata_show_pmult_info(sata_hba_inst,
+				    &sata_device);
+			}
+
+			ASSERT(SATA_CPORTINFO_PMULT_INFO(cportinfo) != NULL);
+			pmultinfo = SATA_CPORTINFO_PMULT_INFO(cportinfo);
+			for (npmport = 0; npmport <
+			    pmultinfo->pmult_num_dev_ports; npmport++) {
+				pmportinfo = SATA_PMPORT_INFO(sata_hba_inst,
+				    saddr->cport, npmport);
+				ASSERT(pmportinfo != NULL);
+
+				mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
+				    saddr->cport)->cport_mutex);
+				mutex_enter(&pmportinfo->pmport_mutex);
+				/* Marked all pmports with link events. */
+				pmportinfo->pmport_event_flags =
+				    SATA_EVNT_LINK_ESTABLISHED;
+				pmult_event_flags |=
+				    pmportinfo->pmport_event_flags;
+				mutex_exit(&pmportinfo->pmport_mutex);
+				mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
+				    saddr->cport)->cport_mutex);
+			}
+			/* Auto-online is not available for PMult now. */
+
 		} else {
 			/*
 			 * If device was successfully attached, the subsequent
@@ -16138,7 +18532,7 @@
 			 * device was detached.
 			 */
 			tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst),
-			    saddr->cport);
+			    saddr->cport, saddr->pmport);
 			mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
 			    saddr->cport)->cport_mutex);
 			if (tdip != NULL) {
@@ -16203,6 +18597,269 @@
 
 	event_flags = cportinfo->cport_event_flags;
 	mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, saddr->cport)->cport_mutex);
+	if (event_flags != 0 || pmult_event_flags != 0) {
+		mutex_enter(&sata_hba_inst->satahba_mutex);
+		sata_hba_inst->satahba_event_flags |= SATA_EVNT_MAIN;
+		mutex_exit(&sata_hba_inst->satahba_mutex);
+		mutex_enter(&sata_mutex);
+		sata_event_pending |= SATA_EVNT_MAIN;
+		mutex_exit(&sata_mutex);
+	}
+}
+
+/*
+ * Port Multiplier Port Device Attached Event processing.
+ *
+ * NOTE: No Mutex should be hold.
+ */
+static void
+sata_process_pmdevice_attached(sata_hba_inst_t *sata_hba_inst,
+    sata_address_t *saddr)
+{
+	sata_pmport_info_t *pmportinfo;
+	sata_drive_info_t *sdinfo;
+	sata_device_t sata_device;
+	dev_info_t *tdip;
+	uint32_t event_flags;
+	uint8_t cport = saddr->cport;
+	uint8_t pmport = saddr->pmport;
+	int rval;
+
+	SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+	    "Processing port %d:%d device attached", cport, pmport);
+
+	pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport);
+
+	mutex_enter(&pmportinfo->pmport_mutex);
+
+	/* Clear attach event flag first */
+	pmportinfo->pmport_event_flags &= ~SATA_EVNT_DEVICE_ATTACHED;
+
+	/* If the port is in SHUTDOWN or FAILED state, ignore event. */
+	if ((pmportinfo->pmport_state &
+	    (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) != 0) {
+		pmportinfo->pmport_dev_attach_time = 0;
+		mutex_exit(&pmportinfo->pmport_mutex);
+		return;
+	}
+
+	/*
+	 * If the sata_drive_info structure is found attached to the port info,
+	 * despite the fact the device was removed and now it is re-attached,
+	 * the old drive info structure was not removed.
+	 * Arbitrarily release device info structure.
+	 */
+	if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL) {
+		sdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo);
+		SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL;
+		(void) kmem_free((void *)sdinfo,
+		    sizeof (sata_drive_info_t));
+		SATADBG1(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+		    "Arbitrarily detaching old device info.", NULL);
+	}
+	pmportinfo->pmport_dev_type = SATA_DTYPE_NONE;
+
+	/* For sanity, re-probe the port */
+	sata_device.satadev_rev = SATA_DEVICE_REV;
+	sata_device.satadev_addr = *saddr;
+
+	/*
+	 * We have to exit mutex, because the HBA probe port function may
+	 * block on its own mutex.
+	 */
+	mutex_exit(&pmportinfo->pmport_mutex);
+	rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst))
+	    (SATA_DIP(sata_hba_inst), &sata_device);
+	mutex_enter(&pmportinfo->pmport_mutex);
+
+	sata_update_pmport_info(sata_hba_inst, &sata_device);
+	if (rval != SATA_SUCCESS) {
+		/* Something went wrong? Fail the port */
+		pmportinfo->pmport_state = SATA_PSTATE_FAILED;
+		pmportinfo->pmport_dev_attach_time = 0;
+		mutex_exit(&pmportinfo->pmport_mutex);
+		SATA_LOG_D((sata_hba_inst, CE_WARN,
+		    "SATA port %d:%d probing failed", cport, pmport));
+		return;
+	} else {
+		/* pmport probed successfully */
+		pmportinfo->pmport_state |=
+		    SATA_STATE_PROBED | SATA_STATE_READY;
+	}
+	/*
+	 * Check if a device is still attached. For sanity, check also
+	 * link status - if no link, there is no device.
+	 */
+	if ((sata_device.satadev_scr.sstatus & SATA_PORT_DEVLINK_UP_MASK) !=
+	    SATA_PORT_DEVLINK_UP || sata_device.satadev_type ==
+	    SATA_DTYPE_NONE) {
+		/*
+		 * No device - ignore attach event.
+		 */
+		pmportinfo->pmport_dev_attach_time = 0;
+		mutex_exit(&pmportinfo->pmport_mutex);
+		SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst,
+		    "Ignoring attach - no device connected to port %d:%d",
+		    cport, pmport);
+		return;
+	}
+
+	mutex_exit(&pmportinfo->pmport_mutex);
+	/*
+	 * Generate sysevent - EC_DR / ESC_DR_AP_STATE_CHANGE
+	 * with the hint: SE_HINT_INSERT
+	 */
+	sata_gen_sysevent(sata_hba_inst, saddr, SE_HINT_INSERT);
+
+	/*
+	 * Port reprobing will take care of the creation of the device
+	 * info structure and determination of the device type.
+	 */
+	sata_device.satadev_addr = *saddr;
+	(void) sata_reprobe_port(sata_hba_inst, &sata_device,
+	    SATA_DEV_IDENTIFY_NORETRY);
+
+	mutex_enter(&pmportinfo->pmport_mutex);
+	if ((pmportinfo->pmport_state & SATA_STATE_READY) &&
+	    (pmportinfo->pmport_dev_type != SATA_DTYPE_NONE)) {
+		/* Some device is attached to the port */
+		if (pmportinfo->pmport_dev_type == SATA_DTYPE_UNKNOWN) {
+			/*
+			 * A device was not successfully attached.
+			 * Track retry time for device identification.
+			 */
+			if (pmportinfo->pmport_dev_attach_time != 0) {
+				clock_t cur_time = ddi_get_lbolt();
+				/*
+				 * If the retry time limit was not exceeded,
+				 * reinstate attach event.
+				 */
+				if ((cur_time -
+				    pmportinfo->pmport_dev_attach_time) <
+				    drv_usectohz(
+				    SATA_DEV_IDENTIFY_TIMEOUT)) {
+					/* OK, restore attach event */
+					pmportinfo->pmport_event_flags |=
+					    SATA_EVNT_DEVICE_ATTACHED;
+				} else {
+					/* Timeout - cannot identify device */
+					pmportinfo->pmport_dev_attach_time = 0;
+					sata_log(sata_hba_inst, CE_WARN,
+					    "Could not identify SATA device "
+					    "at port %d:%d",
+					    cport, pmport);
+				}
+			} else {
+				/*
+				 * Start tracking time for device
+				 * identification.
+				 * Save current time (lbolt value).
+				 */
+				pmportinfo->pmport_dev_attach_time =
+				    ddi_get_lbolt();
+				/* Restore attach event */
+				pmportinfo->pmport_event_flags |=
+				    SATA_EVNT_DEVICE_ATTACHED;
+			}
+		} else {
+			/*
+			 * If device was successfully attached, the subsequent
+			 * action depends on a state of the
+			 * sata_auto_online variable. If it is set to zero.
+			 * an explicit 'configure' command will be needed to
+			 * configure it. If its value is non-zero, we will
+			 * attempt to online (configure) the device.
+			 * First, log the message indicating that a device
+			 * was attached.
+			 */
+			pmportinfo->pmport_dev_attach_time = 0;
+			sata_log(sata_hba_inst, CE_WARN,
+			    "SATA device detected at port %d:%d",
+			    cport, pmport);
+
+			if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL) {
+				sata_drive_info_t new_sdinfo;
+
+				/* Log device info data */
+				new_sdinfo = *(SATA_PMPORTINFO_DRV_INFO(
+				    pmportinfo));
+				sata_show_drive_info(sata_hba_inst,
+				    &new_sdinfo);
+			}
+
+			mutex_exit(&pmportinfo->pmport_mutex);
+
+			/*
+			 * Make sure that there is no target node for that
+			 * device. If so, release it. It should not happen,
+			 * unless we had problem removing the node when
+			 * device was detached.
+			 */
+			tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst),
+			    saddr->cport, saddr->pmport);
+			mutex_enter(&pmportinfo->pmport_mutex);
+			if (tdip != NULL) {
+
+#ifdef SATA_DEBUG
+				if ((pmportinfo->pmport_event_flags &
+				    SATA_EVNT_TARGET_NODE_CLEANUP) == 0)
+					sata_log(sata_hba_inst, CE_WARN,
+					    "sata_process_device_attached: "
+					    "old device target node exists!");
+#endif
+				/*
+				 * target node exists - try to unconfigure
+				 * device and remove the node.
+				 */
+				mutex_exit(&pmportinfo->pmport_mutex);
+				rval = ndi_devi_offline(tdip,
+				    NDI_DEVI_REMOVE);
+				mutex_enter(&pmportinfo->pmport_mutex);
+
+				if (rval == NDI_SUCCESS) {
+					pmportinfo->pmport_event_flags &=
+					    ~SATA_EVNT_TARGET_NODE_CLEANUP;
+					pmportinfo->pmport_tgtnode_clean =
+					    B_TRUE;
+				} else {
+					/*
+					 * PROBLEM - the target node remained
+					 * and it belongs to a previously
+					 * attached device.
+					 * This happens when the file was open
+					 * or the node was waiting for
+					 * resources at the time the
+					 * associated device was removed.
+					 * Instruct event daemon to retry the
+					 * cleanup later.
+					 */
+					sata_log(sata_hba_inst,
+					    CE_WARN,
+					    "Application(s) accessing "
+					    "previously attached SATA "
+					    "device have to release "
+					    "it before newly inserted "
+					    "device can be made accessible."
+					    "at port %d:%d",
+					    cport, pmport);
+					pmportinfo->pmport_event_flags |=
+					    SATA_EVNT_TARGET_NODE_CLEANUP;
+					pmportinfo->pmport_tgtnode_clean =
+					    B_FALSE;
+				}
+			}
+			if (sata_auto_online != 0) {
+				pmportinfo->pmport_event_flags |=
+				    SATA_EVNT_AUTOONLINE_DEVICE;
+			}
+
+		}
+	} else {
+		pmportinfo->pmport_dev_attach_time = 0;
+	}
+
+	event_flags = pmportinfo->pmport_event_flags;
+	mutex_exit(&pmportinfo->pmport_mutex);
 	if (event_flags != 0) {
 		mutex_enter(&sata_hba_inst->satahba_mutex);
 		sata_hba_inst->satahba_event_flags |= SATA_EVNT_MAIN;
@@ -16211,8 +18868,19 @@
 		sata_event_pending |= SATA_EVNT_MAIN;
 		mutex_exit(&sata_mutex);
 	}
-}
-
+
+	/* clear the reset_in_progress events */
+	if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL) {
+		if (pmportinfo->pmport_dev_type & SATA_VALID_DEV_TYPE) {
+			/* must clear flags on cport */
+			sata_pmult_info_t *pminfo =
+			    SATA_PMULT_INFO(sata_hba_inst,
+			    saddr->cport);
+			pminfo->pmult_event_flags |=
+			    SATA_EVNT_CLEAR_DEVICE_RESET;
+		}
+	}
+}
 
 /*
  * Device Target Node Cleanup Event processing.
@@ -16241,7 +18909,8 @@
 	 * Check if there is target node for that device and it is in the
 	 * DEVI_DEVICE_REMOVED state. If so, release it.
 	 */
-	tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), saddr->cport);
+	tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), saddr->cport,
+	    saddr->pmport);
 	if (tdip != NULL) {
 		/*
 		 * target node exists - check if it is target node of
@@ -16275,12 +18944,32 @@
 			mutex_exit(&sata_mutex);
 		}
 	} else {
-		mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
-		    saddr->cport)->cport_mutex);
-		cportinfo->cport_event_flags &=
-		    ~SATA_EVNT_TARGET_NODE_CLEANUP;
-		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
-		    saddr->cport)->cport_mutex);
+		if (saddr->qual == SATA_ADDR_CPORT ||
+		    saddr->qual == SATA_ADDR_DCPORT) {
+			mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
+			    saddr->cport)->cport_mutex);
+			cportinfo->cport_event_flags &=
+			    ~SATA_EVNT_TARGET_NODE_CLEANUP;
+			mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
+			    saddr->cport)->cport_mutex);
+		} else {
+			/* sanity check */
+			if (SATA_CPORT_DEV_TYPE(sata_hba_inst, saddr->cport) !=
+			    SATA_DTYPE_PMULT || SATA_PMULT_INFO(sata_hba_inst,
+			    saddr->cport) == NULL)
+				return;
+			if (SATA_PMPORT_INFO(sata_hba_inst, saddr->cport,
+			    saddr->pmport) == NULL)
+				return;
+
+			mutex_enter(&SATA_PMPORT_INFO(sata_hba_inst,
+			    saddr->cport, saddr->pmport)->pmport_mutex);
+			SATA_PMPORT_INFO(sata_hba_inst, saddr->cport,
+			    saddr->pmport)->pmport_event_flags &=
+			    ~SATA_EVNT_TARGET_NODE_CLEANUP;
+			mutex_exit(&SATA_PMPORT_INFO(sata_hba_inst,
+			    saddr->cport, saddr->pmport)->pmport_mutex);
+		}
 	}
 }
 
@@ -16327,7 +19016,8 @@
 	 * DEVI_DEVICE_REMOVED state. If so, abort onlining but keep
 	 * the event for later processing.
 	 */
-	tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), saddr->cport);
+	tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), saddr->cport,
+	    saddr->pmport);
 	if (tdip != NULL) {
 		/*
 		 * target node exists - check if it is target node of
@@ -16490,12 +19180,26 @@
 sata_set_target_node_cleanup(sata_hba_inst_t *sata_hba_inst,
     sata_address_t *saddr)
 {
-	mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, saddr->cport)->cport_mutex);
-	SATA_CPORT_EVENT_FLAGS(sata_hba_inst, saddr->cport) |=
-	    SATA_EVNT_TARGET_NODE_CLEANUP;
-	SATA_CPORT_INFO(sata_hba_inst, saddr->cport)->cport_tgtnode_clean =
-	    B_FALSE;
-	mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, saddr->cport)->cport_mutex);
+	if (saddr->qual == SATA_ADDR_CPORT ||
+	    saddr->qual == SATA_ADDR_DCPORT) {
+		mutex_enter(&SATA_CPORT_INFO(sata_hba_inst,
+		    saddr->cport)->cport_mutex);
+		SATA_CPORT_EVENT_FLAGS(sata_hba_inst, saddr->cport) |=
+		    SATA_EVNT_TARGET_NODE_CLEANUP;
+		SATA_CPORT_INFO(sata_hba_inst, saddr->cport)->
+		    cport_tgtnode_clean = B_FALSE;
+		mutex_exit(&SATA_CPORT_INFO(sata_hba_inst,
+		    saddr->cport)->cport_mutex);
+	} else {
+		mutex_enter(&SATA_PMPORT_INFO(sata_hba_inst,
+		    saddr->cport, saddr->pmport)->pmport_mutex);
+		SATA_PMPORT_EVENT_FLAGS(sata_hba_inst, saddr->cport,
+		    saddr->pmport) |= SATA_EVNT_TARGET_NODE_CLEANUP;
+		SATA_PMPORT_INFO(sata_hba_inst, saddr->cport, saddr->pmport)->
+		    pmport_tgtnode_clean = B_FALSE;
+		mutex_exit(&SATA_PMPORT_INFO(sata_hba_inst,
+		    saddr->cport, saddr->pmport)->pmport_mutex);
+	}
 	mutex_enter(&sata_hba_inst->satahba_mutex);
 	sata_hba_inst->satahba_event_flags |= SATA_EVNT_MAIN;
 	mutex_exit(&sata_hba_inst->satahba_mutex);
@@ -16512,8 +19216,6 @@
  *
  * Returns B_TRUE if the target node is in DEVI_DEVICE_REMOVED state,
  * B_FALSE otherwise.
- *
- * NOTE: No port multiplier support.
  */
 static boolean_t
 sata_check_device_removed(dev_info_t *tdip)
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_conn.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_conn.c	Mon Aug 17 09:14:35 2009 -0400
@@ -478,13 +478,14 @@
 
 		/*
 		 * This logic assumes that the IDM login-snooping code
-		 * and the initiator login code will agree on whether
-		 * the connection is in FFP.  The reason we do this
-		 * is that we don't want to process CN_FFP_DISABLED until
-		 * CN_FFP_ENABLED has been full handled.
+		 * and the initiator login code will agree to go when
+		 * the connection is in FFP or final error received.
+		 * The reason we do this is that we don't want to process
+		 * CN_FFP_DISABLED until CN_FFP_ENABLED has been full handled.
 		 */
 		mutex_enter(&icp->conn_login_mutex);
-		while (icp->conn_login_state != LOGIN_FFP) {
+		while ((icp->conn_login_state != LOGIN_FFP) &&
+		    (icp->conn_login_state != LOGIN_ERROR)) {
 			cv_wait(&icp->conn_login_cv, &icp->conn_login_mutex);
 		}
 		mutex_exit(&icp->conn_login_mutex);
@@ -524,8 +525,13 @@
 
 		case FD_CONN_FAIL:
 		default:
-			iscsi_conn_update_state_locked(icp,
-			    ISCSI_CONN_STATE_FAILED);
+			if (icp->conn_state == ISCSI_CONN_STATE_IN_LOGIN) {
+				iscsi_conn_update_state_locked(icp,
+				    ISCSI_CONN_STATE_FREE);
+			} else {
+				iscsi_conn_update_state_locked(icp,
+				    ISCSI_CONN_STATE_FAILED);
+			}
 			break;
 		}
 
@@ -544,7 +550,8 @@
 		 * what CN_CONNECT_LOST means to us.
 		 */
 		in_login = (boolean_t)data;
-		if (in_login) {
+		if (in_login ||
+		    (icp->conn_prev_state == ISCSI_CONN_STATE_IN_LOGIN)) {
 			mutex_enter(&icp->conn_state_mutex);
 
 			icp->conn_state_idm_connected = B_FALSE;
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_login.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_login.c	Mon Aug 17 09:14:35 2009 -0400
@@ -57,6 +57,14 @@
 
 #define	ISCSI_LOGIN_RETRY_DELAY		5	/* seconds */
 
+#define	ISCSI_LOGIN_TRANSIT_FFP(flags) \
+	(!(flags & ISCSI_FLAG_LOGIN_CONTINUE) && \
+	(flags & ISCSI_FLAG_LOGIN_TRANSIT) && \
+	(ISCSI_LOGIN_CURRENT_STAGE(flags) == \
+	ISCSI_OP_PARMS_NEGOTIATION_STAGE) && \
+	(ISCSI_LOGIN_NEXT_STAGE(flags) == \
+	ISCSI_FULL_FEATURE_PHASE))
+
 /*
  * +--------------------------------------------------------------------+
  * | External Login Interface						|
@@ -310,7 +318,11 @@
 				}
 			}
 			break;
+		case ISCSI_CONN_STATE_FREE:
+			mutex_exit(&icp->conn_state_mutex);
+			break;
 		default:
+			mutex_exit(&icp->conn_state_mutex);
 			ASSERT(0);
 			break;
 		}
@@ -535,6 +547,18 @@
 			    icp->conn_login_max_data_length);
 			/* pass back whatever error we discovered */
 			if (!ISCSI_SUCCESS(rval)) {
+				if (ISCSI_LOGIN_TRANSIT_FFP(ilrhp->flags)) {
+					/*
+					 * iSCSI connection transit to next
+					 * FFP stage while iscsi params
+					 * ngeotiate error, LOGIN_ERROR
+					 * marked so CN_FFP_ENABLED can
+					 * be fully handled before
+					 * CN_FFP_DISABLED can be processed.
+					 */
+					iscsi_login_update_state(icp,
+					    LOGIN_ERROR);
+				}
 				goto iscsi_login_done;
 			}
 
--- a/usr/src/uts/common/io/scsi/adapters/scsi_vhci/fops/tpgs_tape.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/scsi_vhci/fops/tpgs_tape.c	Mon Aug 17 09:14:35 2009 -0400
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -96,7 +96,11 @@
 tpgs_tape_device_probe(struct scsi_device *sd, struct scsi_inquiry *inq,
     void **ctpriv)
 {
-	int		mode, state, xlf, preferred = 0;
+	int mode;
+	int state;
+	int xlf;
+	int preferred = 0;
+	int support;
 
 	VHCI_DEBUG(6, (CE_NOTE, NULL, "tpgs_tape_device_probe: vidpid %s\n",
 	    inq->inq_vid));
@@ -104,48 +108,58 @@
 	if (inq->inq_tpgs == TPGS_FAILOVER_NONE) {
 		VHCI_DEBUG(4, (CE_WARN, NULL,
 		    "!tpgs_tape_device_probe: not a standard tpgs device"));
-		return (SFO_DEVICE_PROBE_PHCI);
-	}
-
-	if (inq->inq_dtype != DTYPE_SEQUENTIAL) {
+		support = SFO_DEVICE_PROBE_PHCI;
+	} else if (inq->inq_dtype != DTYPE_SEQUENTIAL) {
 		VHCI_DEBUG(4, (CE_NOTE, NULL,
 		    "!tpgs_tape_device_probe: Detected a "
 		    "Standard Asymmetric device "
 		    "not yet supported\n"));
-		return (SFO_DEVICE_PROBE_PHCI);
-	}
-
-	if (vhci_tpgs_get_target_fo_mode(sd, &mode, &state, &xlf, &preferred)) {
+		support = SFO_DEVICE_PROBE_PHCI;
+	} else if (vhci_tpgs_get_target_fo_mode(sd, &mode, &state, &xlf,
+	    &preferred)) {
 		VHCI_DEBUG(4, (CE_WARN, NULL, "!unable to fetch fo "
 		    "mode: sd(%p)", (void *) sd));
-		return (SFO_DEVICE_PROBE_PHCI);
-	}
-
-	if (inq->inq_tpgs == TPGS_FAILOVER_IMPLICIT) {
+		support = SFO_DEVICE_PROBE_PHCI;
+	} else if (inq->inq_tpgs == TPGS_FAILOVER_IMPLICIT) {
 		VHCI_DEBUG(1, (CE_NOTE, NULL,
 		    "!tpgs_tape_device_probe: Detected a "
 		    "Standard Asymmetric device "
 		    "with implicit failover\n"));
-		return (SFO_DEVICE_PROBE_VHCI);
-	}
-	if (inq->inq_tpgs == TPGS_FAILOVER_EXPLICIT) {
+		support = SFO_DEVICE_PROBE_VHCI;
+	} else if (inq->inq_tpgs == TPGS_FAILOVER_EXPLICIT) {
 		VHCI_DEBUG(1, (CE_NOTE, NULL,
 		    "!tpgs_tape_device_probe: Detected a "
 		    "Standard Asymmetric device "
 		    "with explicit failover\n"));
-		return (SFO_DEVICE_PROBE_VHCI);
-	}
-	if (inq->inq_tpgs == TPGS_FAILOVER_BOTH) {
+		support = SFO_DEVICE_PROBE_VHCI;
+	} else if (inq->inq_tpgs == TPGS_FAILOVER_BOTH) {
 		VHCI_DEBUG(1, (CE_NOTE, NULL,
 		    "!tpgs_tape_device_probe: Detected a "
 		    "Standard Asymmetric device "
 		    "which supports both implicit and explicit failover\n"));
-		return (SFO_DEVICE_PROBE_VHCI);
+		support = SFO_DEVICE_PROBE_VHCI;
+	} else {
+		VHCI_DEBUG(1, (CE_WARN, NULL,
+		    "!tpgs_tape_device_probe: "
+		    "Unknown tpgs_bits: %x", inq->inq_tpgs));
+		support = SFO_DEVICE_PROBE_PHCI;
 	}
-	VHCI_DEBUG(1, (CE_WARN, NULL,
-	    "!tpgs_tape_device_probe: "
-	    "Unknown tpgs_bits: %x", inq->inq_tpgs));
-	return (SFO_DEVICE_PROBE_PHCI);
+
+	if (support == SFO_DEVICE_PROBE_VHCI) {
+		/*
+		 * Policy only applies to 'client' probe, not
+		 * vhci_is_dev_supported() pHCI probe. Detect difference
+		 * based on ctpriv.
+		 */
+		if (ctpriv &&
+		    (mdi_set_lb_policy(sd->sd_dev, LOAD_BALANCE_NONE) !=
+		    MDI_SUCCESS)) {
+			VHCI_DEBUG(6, (CE_NOTE, NULL, "!fail load balance none"
+			    ": %s\n", inq->inq_vid));
+			support = SFO_DEVICE_PROBE_PHCI;
+		}
+	}
+	return (support);
 }
 
 static void
--- a/usr/src/uts/common/io/scsi/targets/sd.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/scsi/targets/sd.c	Mon Aug 17 09:14:35 2009 -0400
@@ -879,6 +879,7 @@
 #define	sd_blank_cmp			ssd_blank_cmp
 #define	sd_chk_vers1_data		ssd_chk_vers1_data
 #define	sd_set_vers1_properties		ssd_set_vers1_properties
+#define	sd_check_solid_state		ssd_check_solid_state
 
 #define	sd_get_physical_geometry	ssd_get_physical_geometry
 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
@@ -1271,6 +1272,7 @@
 static int   sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled);
 static void  sd_get_nv_sup(sd_ssc_t *ssc);
 static dev_t sd_make_device(dev_info_t *devi);
+static void  sd_check_solid_state(sd_ssc_t *ssc);
 
 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
 	uint64_t capacity);
@@ -5667,8 +5669,8 @@
  * Description: This routine sends an inquiry command with the EVPD bit set and
  *		a page code of 0x00 to the device. It is used to determine which
  *		vital product pages are available to find the devid. We are
- *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
- *		device does not support that command.
+ *		looking for pages 0x83 0x80 or 0xB1.  If we return a negative 1,
+ *		the device does not support that command.
  *
  *   Arguments: un  - driver soft state (unit) structure
  *
@@ -5724,7 +5726,7 @@
 		 * Pages are returned in ascending order, and 0x83 is what we
 		 * are hoping for.
 		 */
-		while ((page_list[counter] <= 0x86) &&
+		while ((page_list[counter] <= 0xB1) &&
 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
 		    VPD_HEAD_OFFSET))) {
 			/*
@@ -5751,6 +5753,9 @@
 			case 0x86:
 				un->un_vpd_page_mask |= SD_VPD_EXTENDED_DATA_PG;
 				break;
+			case 0xB1:
+				un->un_vpd_page_mask |= SD_VPD_DEV_CHARACTER_PG;
+				break;
 			}
 			counter++;
 		}
@@ -8092,6 +8097,11 @@
 
 	un->un_mediastate = DKIO_NONE;
 
+	/*
+	 * Check if this is a SSD(Solid State Drive).
+	 */
+	sd_check_solid_state(ssc);
+
 	cmlb_alloc_handle(&un->un_cmlbhandle);
 
 #if defined(__i386) || defined(__amd64)
@@ -30828,6 +30838,8 @@
 		mutex_enter(SD_MUTEX(un));
 		((tg_attribute_t *)arg)->media_is_writable =
 		    un->un_f_mmc_writable_media;
+		((tg_attribute_t *)arg)->media_is_solid_state =
+		    un->un_f_is_solid_state;
 		mutex_exit(SD_MUTEX(un));
 		return (0);
 	default:
@@ -31289,3 +31301,73 @@
 		xp->xb_ena = fm_ena_generate(0, FM_ENA_FMT1);
 	ssc->ssc_uscsi_info->ui_ena = xp->xb_ena;
 }
+
+
+/*
+ *     Function: sd_check_solid_state
+ *
+ * Description: Query the optional INQUIRY VPD page 0xb1. If the device
+ *              supports VPD page 0xb1, sd examines the MEDIUM ROTATION
+ *              RATE. If the MEDIUM ROTATION RATE is 1, sd assumes the
+ *              device is a solid state drive.
+ *
+ *     Context: Kernel thread or interrupt context.
+ */
+
+static void
+sd_check_solid_state(sd_ssc_t *ssc)
+{
+	int		rval		= 0;
+	uchar_t		*inqb1		= NULL;
+	size_t		inqb1_len	= MAX_INQUIRY_SIZE;
+	size_t		inqb1_resid	= 0;
+	struct sd_lun	*un;
+
+	ASSERT(ssc != NULL);
+	un = ssc->ssc_un;
+	ASSERT(un != NULL);
+	ASSERT(!mutex_owned(SD_MUTEX(un)));
+
+	mutex_enter(SD_MUTEX(un));
+	un->un_f_is_solid_state = FALSE;
+
+	if (ISCD(un)) {
+		mutex_exit(SD_MUTEX(un));
+		return;
+	}
+
+	if (sd_check_vpd_page_support(ssc) == 0 &&
+	    un->un_vpd_page_mask & SD_VPD_DEV_CHARACTER_PG) {
+		mutex_exit(SD_MUTEX(un));
+		/* collect page b1 data */
+		inqb1 = kmem_zalloc(inqb1_len, KM_SLEEP);
+
+		rval = sd_send_scsi_INQUIRY(ssc, inqb1, inqb1_len,
+		    0x01, 0xB1, &inqb1_resid);
+
+		if (rval == 0 && (inqb1_len - inqb1_resid > 5)) {
+			SD_TRACE(SD_LOG_COMMON, un,
+			    "sd_check_solid_state: \
+			    successfully get VPD page: %x \
+			    PAGE LENGTH: %x BYTE 4: %x \
+			    BYTE 5: %x", inqb1[1], inqb1[3], inqb1[4],
+			    inqb1[5]);
+
+			mutex_enter(SD_MUTEX(un));
+			/*
+			 * Check the MEDIUM ROTATION RATE. If it is set
+			 * to 1, the device is a solid state drive.
+			 */
+			if (inqb1[4] == 0 && inqb1[5] == 1) {
+				un->un_f_is_solid_state = TRUE;
+			}
+			mutex_exit(SD_MUTEX(un));
+		} else if (rval != 0) {
+			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
+		}
+
+		kmem_free(inqb1, inqb1_len);
+	} else {
+		mutex_exit(SD_MUTEX(un));
+	}
+}
--- a/usr/src/uts/common/io/srn.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/srn.c	Mon Aug 17 09:14:35 2009 -0400
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -78,6 +78,7 @@
 	int		srn_type[SRN_MAX_CLONE]; /* type of handshake */
 	int		srn_delivered[SRN_MAX_CLONE];
 	srn_event_info_t srn_pending[SRN_MAX_CLONE];
+	int		srn_fault[SRN_MAX_CLONE];
 } srn = { NULL, -1};
 typedef struct srnstate *srn_state_t;
 
@@ -85,6 +86,9 @@
 uint_t		srn_poll_cnt[SRN_MAX_CLONE];	/* count of events for poll */
 int		srn_apm_count;
 int		srn_autosx_count;
+/* Number of seconds to wait for clients to ack a poll */
+int		srn_timeout = 10;
+
 struct pollhead	srn_pollhead[SRN_MAX_CLONE];
 
 static int	srn_open(dev_t *, int, int, cred_t *);
@@ -251,7 +255,7 @@
 srn_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
 	struct pollhead **phpp)
 {
-	extern struct pollhead srn_pollhead[];	/* common/os/sunpm.c */
+	extern struct pollhead srn_pollhead[];
 	int	clone;
 
 	clone = SRN_MINOR_TO_CLONE(getminor(dev));
@@ -342,6 +346,7 @@
 	crfree(srn.srn_cred[clone]);
 	srn.srn_cred[clone] = 0;
 	srn_poll_cnt[clone] = 0;
+	srn.srn_fault[clone] = 0;
 	if (srn.srn_pending[clone].ae_type || srn.srn_delivered[clone]) {
 		srn.srn_pending[clone].ae_type = 0;
 		srn.srn_delivered[clone] = 0;
@@ -407,6 +412,7 @@
 		}
 		ASSERT(srn.srn_type[clone] == SRN_TYPE_APM);
 		srn.srn_type[clone] = SRN_TYPE_AUTOSX;
+		srn.srn_fault[clone] = 0;
 		srn_apm_count--;
 		ASSERT(srn_apm_count >= 0);
 		ASSERT(srn_autosx_count >= 0);
@@ -423,6 +429,11 @@
 		 * then wake up the kernel thread sleeping for the delivery
 		 */
 		PMD(PMD_SX, ("SRN_IOC_NEXTEVENT entered\n"))
+		if (srn.srn_fault[clone]) {
+			PMD(PMD_SX, ("SRN_IOC_NEXTEVENT clone %d fault "
+			    "cleared\n", clone))
+			srn.srn_fault[clone] = 0;
+		}
 		mutex_enter(&srn_clone_lock);
 		if (srn_poll_cnt[clone] == 0) {
 			mutex_exit(&srn_clone_lock);
@@ -450,6 +461,11 @@
 	case SRN_IOC_SUSPEND:
 		/* ack suspend */
 		PMD(PMD_SX, ("SRN_IOC_SUSPEND entered clone %d\n", clone))
+		if (srn.srn_fault[clone]) {
+			PMD(PMD_SX, ("SRN_IOC_SUSPEND clone %d fault "
+			    "cleared\n", clone))
+			srn.srn_fault[clone] = 0;
+		}
 		mutex_enter(&srn_clone_lock);
 		if (srn.srn_delivered[clone] != SRN_SUSPEND_REQ) {
 			mutex_exit(&srn_clone_lock);
@@ -467,6 +483,11 @@
 	case SRN_IOC_RESUME:
 		/* ack resume */
 		PMD(PMD_SX, ("SRN_IOC_RESUME entered clone %d\n", clone))
+		if (srn.srn_fault[clone]) {
+			PMD(PMD_SX, ("SRN_IOC_RESUME clone %d fault "
+			    "cleared\n", clone))
+			srn.srn_fault[clone] = 0;
+		}
 		mutex_enter(&srn_clone_lock);
 		if (srn.srn_delivered[clone] != SRN_NORMAL_RESUME) {
 			mutex_exit(&srn_clone_lock);
@@ -522,11 +543,13 @@
 	PMD(PMD_SX, ("count %d\n", count))
 	for (clone = 0; clone < SRN_MAX_CLONE; clone++) {
 		if (srn.srn_type[clone] == type) {
+#ifdef DEBUG
 			if (type == SRN_TYPE_APM) {
 				ASSERT(srn.srn_pending[clone].ae_type == 0);
 				ASSERT(srn_poll_cnt[clone] == 0);
 				ASSERT(srn.srn_delivered[clone] == 0);
 			}
+#endif
 			srn.srn_pending[clone].ae_type = event;
 			srn_poll_cnt[clone] = 1;
 			PMD(PMD_SX, ("pollwake %d\n", clone))
@@ -544,7 +567,7 @@
 	/* otherwise wait for acks */
 restart:
 	/*
-	 * We wait untill all of the pending events are cleared.
+	 * We wait until all of the pending events are cleared.
 	 * We have to start over every time we do a cv_wait because
 	 * we give up the mutex and can be re-entered
 	 */
@@ -552,10 +575,22 @@
 		if (srn.srn_clones[clone] == 0 ||
 		    srn.srn_type[clone] != SRN_TYPE_APM)
 			continue;
-		if (srn.srn_pending[clone].ae_type) {
+		if (srn.srn_pending[clone].ae_type && !srn.srn_fault[clone]) {
 			PMD(PMD_SX, ("srn_notify waiting for ack for clone %d, "
 			    "event %x\n", clone, event))
-			cv_wait(&srn_clones_cv[clone], &srn_clone_lock);
+			if (cv_timedwait(&srn_clones_cv[clone],
+			    &srn_clone_lock, ddi_get_lbolt() +
+			    drv_usectohz(srn_timeout * 1000000)) == -1) {
+				/*
+				 * Client didn't respond, mark it as faulted
+				 * and continue as if a regular signal.
+				 */
+				PMD(PMD_SX, ("srn_notify: clone %d did not "
+				    "ack event %x\n", clone, event))
+				cmn_err(CE_WARN, "srn_notify: clone %d did "
+				    "not ack event %x\n", clone, event);
+				srn.srn_fault[clone] = 1;
+			}
 			goto restart;
 		}
 	}
--- a/usr/src/uts/common/io/usb/usba/usba.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/usb/usba/usba.c	Mon Aug 17 09:14:35 2009 -0400
@@ -73,8 +73,11 @@
 /*
  * compatible name handling
  */
-/* allowing for 15 compat names, plus one force bind name */
-#define	USBA_MAX_COMPAT_NAMES		16
+/*
+ * allowing for 15 compat names, plus one force bind name and
+ * one possible specified client driver name
+ */
+#define	USBA_MAX_COMPAT_NAMES		17
 #define	USBA_MAX_COMPAT_NAME_LEN	64
 
 /* double linked list for usba_devices */
@@ -1841,7 +1844,7 @@
 	usb_dev_descr_t	*usb_dev_descr;
 	uint_t		n_cfgs;	/* number of configs */
 	uint_t		n_ifs;	/* number of interfaces */
-	uint_t		port;
+	uint_t		port, bus_num;
 	size_t		usb_config_length;
 	uchar_t 	*usb_config;
 	int		reg[1];
@@ -1867,7 +1870,7 @@
 	usb_dev_descr = usba_device->usb_dev_descr;
 	n_cfgs = usba_device->usb_n_cfgs;
 	n_ifs = usba_device->usb_n_ifs;
-
+	bus_num = usba_device->usb_addr;
 
 	if (address != ROOT_HUB_ADDR) {
 		size = usb_parse_if_descr(
@@ -1974,6 +1977,35 @@
 		    USBA_MAX_COMPAT_NAME_LEN);
 	}
 
+	/*
+	 * If the callback function of specified driver is registered,
+	 * it will be called here to check whether to take over the device.
+	 */
+	if (usb_cap.usba_dev_driver_cb != NULL) {
+		char		*dev_drv = NULL;
+		usb_dev_str_t	dev_str;
+		char		*pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+		dev_str.usb_mfg = usba_device->usb_mfg_str;
+		dev_str.usb_product = usba_device->usb_product_str;
+		dev_str.usb_serialno = usba_device->usb_serialno_str;
+
+		(void) ddi_pathname(child_dip, pathname);
+
+		if ((usb_cap.usba_dev_driver_cb(usb_dev_descr, &dev_str,
+		    pathname, bus_num, port, &dev_drv, NULL) == USB_SUCCESS) &&
+		    (dev_drv != NULL)) {
+			USB_DPRINTF_L3(DPRINT_MASK_USBA, usba_log_handle,
+			    "usba_ready_device_node: dev_driver=%s, port =%d,"
+			    "bus =%d, path=%s\n\t",
+			    dev_drv, port, bus_num, pathname);
+
+			(void) strncpy(usba_name[n++], dev_drv,
+			    USBA_MAX_COMPAT_NAME_LEN);
+		}
+		kmem_free(pathname, MAXPATHLEN);
+	}
+
 	/* create compatible names */
 	if (combined_node) {
 
--- a/usr/src/uts/common/io/usb/usba/usbai.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/usb/usba/usbai.c	Mon Aug 17 09:14:35 2009 -0400
@@ -18,7 +18,7 @@
  *
  * CDDL HEADER END
  *
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -82,7 +82,6 @@
  */
 int usb_force_enable_pm = 0;
 
-
 /* USBA framework initializations */
 void
 usba_usbai_initialization()
@@ -1288,3 +1287,38 @@
 {
 	return (usba_hubdi_reset_device(dip, reset_level));
 }
+
+/*
+ * usb device driver registration
+ */
+int
+usb_register_dev_driver(dev_info_t *dip, usb_dev_driver_callback_t cb)
+{
+	USB_DPRINTF_L4(DPRINT_MASK_USBAI, usbai_log_handle,
+	    "usb_register_dev_driver: register the specified driver "
+	    "in usba: dip = 0x%p", (void *)dip);
+
+	if (cb != NULL) {
+		usb_cap.dip = dip;
+		usb_cap.usba_dev_driver_cb = cb;
+
+		return (USB_SUCCESS);
+	}
+
+	return (USB_FAILURE);
+}
+
+/*
+ * usb device driver unregistration
+ */
+void
+usb_unregister_dev_driver(dev_info_t *dip)
+{
+	USB_DPRINTF_L4(DPRINT_MASK_USBAI, usbai_log_handle,
+	    "usb_unregister_dev_driver: unregister the registered "
+	    "driver: dip =0x%p", (void *)dip);
+
+	ASSERT(dip == usb_cap.dip);
+	usb_cap.dip = NULL;
+	usb_cap.usba_dev_driver_cb = NULL;
+}
--- a/usr/src/uts/common/io/warlock/hid_with_usba.wlcmd	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/warlock/hid_with_usba.wlcmd	Mon Aug 17 09:14:35 2009 -0400
@@ -86,6 +86,8 @@
 root	usb_get_current_cfgidx
 root	usb_register_client
 root	usb_reset_device
+root	usb_register_dev_driver
+root	usb_unregister_dev_driver
 
 root	usba_common_power
 root	usba_common_register_events
--- a/usr/src/uts/common/io/warlock/scsa2usb_with_usba.wlcmd	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/warlock/scsa2usb_with_usba.wlcmd	Mon Aug 17 09:14:35 2009 -0400
@@ -101,6 +101,8 @@
 root	usb_register_client
 root	usb_ugen_power
 root	usb_reset_device
+root	usb_register_dev_driver
+root	usb_unregister_dev_driver
 
 root	usba_common_power
 root	usba_common_register_events
--- a/usr/src/uts/common/io/warlock/ugen_with_usba.wlcmd	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/warlock/ugen_with_usba.wlcmd	Mon Aug 17 09:14:35 2009 -0400
@@ -75,6 +75,8 @@
 root	usb_is_pm_enabled
 root    usb_register_client
 root	usb_reset_device
+root	usb_register_dev_driver
+root	usb_unregister_dev_driver
 
 root	usba_common_power
 root	usba_common_register_events
--- a/usr/src/uts/common/io/warlock/usb_ia_with_usba.wlcmd	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/warlock/usb_ia_with_usba.wlcmd	Mon Aug 17 09:14:35 2009 -0400
@@ -128,7 +128,8 @@
 root	usb_ugen_reconnect_ev_cb
 root	usb_ugen_write
 root	usba_ready_interface_association_node
-
+root	usb_register_dev_driver
+root	usb_unregister_dev_driver
 
 root    hubd_root_hub_cleanup_thread
 root	hubd_restore_state_cb
--- a/usr/src/uts/common/io/warlock/usb_mid_with_usba.wlcmd	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/warlock/usb_mid_with_usba.wlcmd	Mon Aug 17 09:14:35 2009 -0400
@@ -120,6 +120,8 @@
 root    usb_register_client
 root	usb_reset_device
 root	usb_ugen_power
+root	usb_register_dev_driver
+root	usb_unregister_dev_driver
 
 root    hubd_root_hub_cleanup_thread
 root	hubd_restore_state_cb
--- a/usr/src/uts/common/io/warlock/usbftdi_with_usba.wlcmd	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/warlock/usbftdi_with_usba.wlcmd	Mon Aug 17 09:14:35 2009 -0400
@@ -95,6 +95,8 @@
 root usb_get_current_cfgidx
 root usb_register_client
 root usb_reset_device
+root usb_register_dev_driver
+root usb_unregister_dev_driver
 
 root usb_ugen_attach
 root usb_ugen_close
--- a/usr/src/uts/common/io/warlock/usbprn_with_usba.wlcmd	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/warlock/usbprn_with_usba.wlcmd	Mon Aug 17 09:14:35 2009 -0400
@@ -89,6 +89,8 @@
 root    usb_register_client
 root	usb_reset_device
 root	usb_ugen_power
+root	usb_register_dev_driver
+root	usb_unregister_dev_driver
 
 root	hcdi_autoclearing
 root	hcdi_cb_thread
--- a/usr/src/uts/common/io/warlock/usbsacm_with_usba.wlcmd	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/warlock/usbsacm_with_usba.wlcmd	Mon Aug 17 09:14:35 2009 -0400
@@ -17,10 +17,9 @@
 #
 # CDDL HEADER END
 #
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
 
 one ohci_state
 one ehci_state
@@ -99,6 +98,8 @@
 root usb_get_current_cfgidx
 root usb_register_client
 root usb_reset_device
+root usb_register_dev_driver
+root usb_unregister_dev_driver
 
 root usb_ugen_attach
 root usb_ugen_close
--- a/usr/src/uts/common/io/warlock/usbser_edge_with_usba.wlcmd	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/warlock/usbser_edge_with_usba.wlcmd	Mon Aug 17 09:14:35 2009 -0400
@@ -79,6 +79,8 @@
 root	usb_get_current_cfgidx
 root    usb_register_client
 root	usb_reset_device
+root	usb_register_dev_driver
+root	usb_unregister_dev_driver
 
 root    usb_ugen_attach
 root    usb_ugen_close
--- a/usr/src/uts/common/io/warlock/usbser_keyspan_with_usba.wlcmd	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/warlock/usbser_keyspan_with_usba.wlcmd	Mon Aug 17 09:14:35 2009 -0400
@@ -100,6 +100,8 @@
 root	usb_get_current_cfgidx
 root    usb_register_client
 root	usb_reset_device
+root	usb_register_dev_driver
+root	usb_unregister_dev_driver
 
 root    usb_ugen_attach
 root    usb_ugen_close
--- a/usr/src/uts/common/io/warlock/usbskel_with_usba.wlcmd	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/warlock/usbskel_with_usba.wlcmd	Mon Aug 17 09:14:35 2009 -0400
@@ -84,6 +84,8 @@
 root	usb_get_current_cfgidx
 root    usb_register_client
 root	usb_reset_device
+root	usb_register_dev_driver
+root	usb_unregister_dev_driver
 
 root    usb_ugen_attach
 root    usb_ugen_close
--- a/usr/src/uts/common/io/warlock/usbsprl_with_usba.wlcmd	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/warlock/usbsprl_with_usba.wlcmd	Mon Aug 17 09:14:35 2009 -0400
@@ -94,6 +94,8 @@
 root usb_get_current_cfgidx
 root usb_register_client
 root usb_reset_device
+root usb_register_dev_driver
+root usb_unregister_dev_driver
 
 root usb_ugen_attach
 root usb_ugen_close
--- a/usr/src/uts/common/io/warlock/usbvc_with_usba.wlcmd	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/io/warlock/usbvc_with_usba.wlcmd	Mon Aug 17 09:14:35 2009 -0400
@@ -100,7 +100,8 @@
 root	usb_ugen_read
 root	usb_ugen_reconnect_ev_cb
 root	usb_ugen_write
-
+root	usb_register_dev_driver
+root	usb_unregister_dev_driver
 
 root	hcdi_autoclearing
 root	hcdi_cb_thread
--- a/usr/src/uts/common/os/sunpm.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/os/sunpm.c	Mon Aug 17 09:14:35 2009 -0400
@@ -5222,7 +5222,7 @@
 	ASSERT(keeper != kept);
 	if (PM_GET_PM_INFO(keeper) == NULL) {
 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
-		    "%s@%s(%s#%d), but the latter is not power managed",
+		    "%s@%s(%s#%d), but the former is not power managed",
 		    PM_DEVICE(keeper), PM_DEVICE(kept));
 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
 		    "power managed\n", pmf, PM_DEVICE(keeper)))
--- a/usr/src/uts/common/sys/cmlb.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/cmlb.h	Mon Aug 17 09:14:35 2009 -0400
@@ -49,6 +49,7 @@
 
 typedef struct tg_attribute {
 	int media_is_writable;
+	int media_is_solid_state;
 } tg_attribute_t;
 
 
--- a/usr/src/uts/common/sys/fs/zfs.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/fs/zfs.h	Mon Aug 17 09:14:35 2009 -0400
@@ -116,6 +116,7 @@
 	ZFS_PROP_STMF_SHAREINFO,	/* not exposed to the user */
 	ZFS_PROP_DEFER_DESTROY,
 	ZFS_PROP_USERREFS,
+	ZFS_PROP_LOGBIAS,
 	ZFS_NUM_PROPS
 } zfs_prop_t;
 
@@ -245,6 +246,11 @@
 	ZFS_CANMOUNT_NOAUTO = 2
 } zfs_canmount_type_t;
 
+typedef enum {
+	ZFS_LOGBIAS_LATENCY = 0,
+	ZFS_LOGBIAS_THROUGHPUT = 1
+} zfs_logbias_op_t;
+
 typedef enum zfs_share_op {
 	ZFS_SHARE_NFS = 0,
 	ZFS_UNSHARE_NFS = 1,
--- a/usr/src/uts/common/sys/ib/clients/ibd/ibd.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/ib/clients/ibd/ibd.h	Mon Aug 17 09:14:35 2009 -0400
@@ -297,7 +297,6 @@
 	uint16_t		id_pkix;
 	uint8_t			id_port;
 	ibt_mcg_info_t		*id_mcinfo;
-	boolean_t		id_bgroup_created;
 
 	mac_handle_t		id_mh;
 	mac_resource_handle_t	id_rh;
@@ -363,6 +362,9 @@
 	int			id_hca_res_lkey_capab;
 	ibt_lkey_t		id_res_lkey;
 
+	boolean_t		id_bgroup_created;
+	kmutex_t		id_macst_lock;
+	kcondvar_t		id_macst_cv;
 	uint32_t		id_mac_state;
 } ibd_state_t;
 
--- a/usr/src/uts/common/sys/mac_client_priv.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/mac_client_priv.h	Mon Aug 17 09:14:35 2009 -0400
@@ -120,7 +120,7 @@
 extern void	mac_rx_client_restart(mac_client_handle_t);
 extern void	mac_srs_perm_quiesce(mac_client_handle_t, boolean_t);
 extern int	mac_hwrings_get(mac_client_handle_t, mac_group_handle_t *,
-		    mac_ring_handle_t *);
+		    mac_ring_handle_t *, mac_ring_type_t);
 extern void	mac_hwring_setup(mac_ring_handle_t, mac_resource_handle_t);
 extern void	mac_hwring_teardown(mac_ring_handle_t);
 extern int	mac_hwring_disable_intr(mac_ring_handle_t);
--- a/usr/src/uts/common/sys/mac_impl.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/mac_impl.h	Mon Aug 17 09:14:35 2009 -0400
@@ -262,7 +262,7 @@
 #define	MAC_RING_TX_DEFAULT(mip, mp)			\
 	((mip->mi_default_tx_ring == NULL) ?		\
 	mip->mi_tx(mip->mi_driver, mp) :		\
-	mac_ring_tx(mip->mi_default_tx_ring, mp))
+	mac_hwring_tx(mip->mi_default_tx_ring, mp))
 
 #define	MAC_TX(mip, ring, mp, mcip) {					\
 	/*								\
@@ -275,7 +275,7 @@
 	    (ring == NULL))						\
 		mp = MAC_RING_TX_DEFAULT(mip, mp);			\
 	else								\
-		mp = mac_ring_tx(ring, mp);				\
+		mp = mac_hwring_tx(ring, mp);				\
 }
 
 /* mci_tx_flag */
@@ -585,7 +585,7 @@
 extern int mac_group_remmac(mac_group_t *, const uint8_t *);
 extern int mac_rx_group_add_flow(mac_client_impl_t *, flow_entry_t *,
     mac_group_t *);
-extern mblk_t *mac_ring_tx(mac_ring_handle_t, mblk_t *);
+extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *);
 extern mac_ring_t *mac_reserve_tx_ring(mac_impl_t *, mac_ring_t *);
 extern void mac_release_tx_ring(mac_ring_handle_t);
 extern mac_group_t *mac_reserve_tx_group(mac_impl_t *, mac_share_handle_t);
--- a/usr/src/uts/common/sys/mac_soft_ring.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/mac_soft_ring.h	Mon Aug 17 09:14:35 2009 -0400
@@ -131,6 +131,9 @@
 	void		*st_arg1;
 	void		*st_arg2;
 	mac_group_t	*st_group;	/* TX group for share */
+	uint32_t	st_ring_count;	/* no. of tx rings */
+	mac_ring_handle_t	*st_rings;
+
 	boolean_t	st_woken_up;
 
 	/*
--- a/usr/src/uts/common/sys/multiboot.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/multiboot.h	Mon Aug 17 09:14:35 2009 -0400
@@ -35,8 +35,13 @@
  * Definitions of structures/data for using a multiboot compliant OS loader.
  */
 #define	MB_HEADER_MAGIC		 0x1BADB002	/* magic */
-#define	MB_HEADER_FLAGS		 0x00010003	/* flags we use */
-#define	MB_HEADER_CHECKSUM	-0x1BAEB005	/* -(magic + flag) */
+
+/* The 32-bit kernel does not require the use of the AOUT kludge */
+#define	MB_HEADER_FLAGS_32	 0x00000003	/* flags we use */
+#define	MB_HEADER_CHECKSUM_32	-0x1BADB005	/* -(magic + flag) */
+
+#define	MB_HEADER_FLAGS_64	 0x00010003	/* flags we use */
+#define	MB_HEADER_CHECKSUM_64	-0x1BAEB005	/* -(magic + flag) */
 
 /*
  * passed by boot loader to kernel
--- a/usr/src/uts/common/sys/nxge/nxge_hio.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/nxge/nxge_hio.h	Mon Aug 17 09:14:35 2009 -0400
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -35,9 +35,6 @@
 #include <nxge_ipp.h>
 #include <nxge_fflp.h>
 #include <sys/mac_provider.h>
-#if defined(sun4v)
-#include <sys/vnet_res.h>
-#endif
 
 #define	isLDOMservice(nxge) \
 	(nxge->environs == SOLARIS_SERVICE_DOMAIN)
@@ -46,6 +43,9 @@
 #define	isLDOMs(nxge) \
 	(isLDOMservice(nxge) || isLDOMguest(nxge))
 
+#define	NXGE_HIO_SHARE_MIN_CHANNELS	2
+#define	NXGE_HIO_SHARE_MAX_CHANNELS	2
+
 /* ------------------------------------------------------------------ */
 typedef uint8_t nx_rdc_t;
 typedef uint8_t nx_tdc_t;
@@ -88,37 +88,19 @@
 	dc_getinfo	getinfo;
 } nxhv_dc_fp_t;
 
-#if defined(sun4v)
-typedef struct {
-	vio_net_resource_reg_t	__register;
-	vio_net_resource_unreg_t unregister;
-
-	vio_net_callbacks_t	cb;
-
-} nx_vio_fp_t;
-#endif
-
 typedef struct {
 	boolean_t	ldoms;
-
 	nxhv_vr_fp_t	vr;
 	nxhv_dc_fp_t	tx;
 	nxhv_dc_fp_t	rx;
-
-#if defined(sun4v)
-	nx_vio_fp_t	vio;
-#endif
-
 } nxhv_fp_t;
 
 /* ------------------------------------------------------------------ */
 #define	NXGE_VR_SR_MAX		8 /* There are 8 subregions (SR). */
 
 typedef enum {
-
 	NXGE_HIO_TYPE_SERVICE,	/* We are a service domain driver. */
 	NXGE_HIO_TYPE_GUEST	/* We are a guest domain driver. */
-
 } nxge_hio_type_t;
 
 typedef enum {
@@ -130,7 +112,6 @@
 	FUNC2_VIR = 0x5000000,
 	FUNC3_MNT = 0x6000000,
 	FUNC3_VIR = 0x7000000
-
 } vr_base_address_t;
 
 #define	VR_STEP		0x2000000
@@ -146,7 +127,6 @@
 	FUNC3_VIR0,
 	FUNC3_VIR1,
 	FUNC_VIR_MAX
-
 } vr_region_t;
 
 typedef enum {
@@ -159,13 +139,11 @@
 	VP_CHANNEL_6,
 	VP_CHANNEL_7,
 	VP_CHANNEL_MAX
-
 } vp_channel_t;
 
 typedef enum {
 	VP_BOUND_TX = 1,
 	VP_BOUND_RX
-
 } vpc_type_t;
 
 #define	VP_VC_OFFSET(channel)	(channel << 10)
@@ -254,9 +232,6 @@
 	ether_addr_t	altmac;	/* The alternate MAC address. */
 	int		slot;	/* According to nxge_m_mmac_add(). */
 
-#if defined(sun4v)
-	vio_net_handle_t vhp;	/* The handle given to us by the vnet. */
-#endif
 	nxge_grp_t	rx_group;
 	nxge_grp_t	tx_group;
 
@@ -273,7 +248,6 @@
 	uint64_t	map;	/* Currently unused */
 
 	int		vector;	/* The DDI vector number (index) */
-
 } hio_ldg_t;
 
 /*
--- a/usr/src/uts/common/sys/sata/adapters/ahci/ahcireg.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/sata/adapters/ahci/ahcireg.h	Mon Aug 17 09:14:35 2009 -0400
@@ -212,7 +212,17 @@
 #define	AHCI_TFD_ERR_SHIFT	8
 #define	AHCI_TFD_ERR_SGS	(0x1 << 0) /* DDR1: Send_good_status */
 
+/* FIS-Based Switching Control Register */
+#define	AHCI_FBS_SWE_MASK	(0xf << 16)	/* Device With Error */
+#define	AHCI_FBS_ADO_MASK	(0xf << 12)	/* Active Device Optimization */
+#define	AHCI_FBS_DEV_MASK	(0xf << 8)	/* Device To Issue */
+#define	AHCI_FBS_SDE		(0x1 << 2)	/* Single Device Error */
+#define	AHCI_FBS_DEC		(0x1 << 1)	/* Device Error Clear */
+#define	AHCI_FBS_EN		(0x1 << 0)	/* Enable */
+
+/* Sxxx Registers */
 #define	AHCI_SERROR_CLEAR_ALL			0xffffffff
+#define	AHCI_SNOTIF_CLEAR_ALL			0xffffffff
 
 /* per port registers offset */
 #define	AHCI_PORT_OFFSET(ahci_ctlp, port)			\
@@ -262,6 +272,9 @@
 	/* SNotification */
 #define	AHCI_PORT_PxSNTF(ahci_ctlp, port)			\
 			(AHCI_PORT_OFFSET(ahci_ctlp, port) + 0x3c)
+	/* FIS-Based Switching Control */
+#define	AHCI_PORT_PxFBS(ahci_ctlp, port)			\
+			(AHCI_PORT_OFFSET(ahci_ctlp, port) + 0x40)
 
 #define	AHCI_SLOT_MASK(ahci_ctlp)				\
 	((ahci_ctlp->ahcictl_num_cmd_slots == AHCI_PORT_MAX_CMD_SLOTS) ? \
@@ -269,19 +282,14 @@
 #define	AHCI_NCQ_SLOT_MASK(ahci_portp)				\
 	((ahci_portp->ahciport_max_ncq_tags == AHCI_PORT_MAX_CMD_SLOTS) ? \
 	0xffffffff : ((0x1 << ahci_portp->ahciport_max_ncq_tags) - 1))
+#define	AHCI_PMPORT_MASK(ahci_portp)				\
+	((0x1 << ahci_portp->ahciport_pmult_info->ahcipmi_num_dev_ports) - 1)
 
 /* Device signatures */
 #define	AHCI_SIGNATURE_PORT_MULTIPLIER	0x96690101
 #define	AHCI_SIGNATURE_ATAPI		0xeb140101
 #define	AHCI_SIGNATURE_DISK		0x00000101
 
-/*
- * The address of the control port for the port multiplier, which is
- * used for control and status communication with the port multiplier
- * itself.
- */
-#define	AHCI_PORTMULT_CONTROL_PORT	0x0f
-
 #define	AHCI_H2D_REGISTER_FIS_TYPE	0x27
 #define	AHCI_H2D_REGISTER_FIS_LENGTH	5
 
--- a/usr/src/uts/common/sys/sata/adapters/ahci/ahcivar.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/sata/adapters/ahci/ahcivar.h	Mon Aug 17 09:14:35 2009 -0400
@@ -32,18 +32,105 @@
 extern "C" {
 #endif
 
+/*
+ * AHCI address qualifier flags (in qual field of ahci_addr struct).
+ */
+#define	AHCI_ADDR_NULL		0x00
+#define	AHCI_ADDR_PORT		0x01
+#define	AHCI_ADDR_PMPORT	0x02
+#define	AHCI_ADDR_PMULT		0x04
+#define	AHCI_ADDR_VALID		(AHCI_ADDR_PORT | \
+				AHCI_ADDR_PMULT | \
+				AHCI_ADDR_PMPORT)
+
+/*
+ * AHCI address structure.
+ */
+struct ahci_addr {
+
+	/* HBA port number */
+	uint8_t			aa_port;
+
+	/* Port multiplier port number */
+	uint8_t			aa_pmport;
+
+	/*
+	 * AHCI_ADDR_NULL
+	 * AHCI_ADDR_PORT
+	 * AHCI_ADDR_PMPORT
+	 * AHCI_ADDR_PMULT
+	 */
+	uint8_t			aa_qual;
+};
+typedef struct ahci_addr ahci_addr_t;
+
+_NOTE(SCHEME_PROTECTS_DATA("unshared data", ahci_addr))
+
+#define	AHCI_ADDR_IS_PORT(addrp)					\
+	((addrp)->aa_qual & AHCI_ADDR_PORT)
+#define	AHCI_ADDR_IS_PMPORT(addrp)					\
+	((addrp)->aa_qual & AHCI_ADDR_PMPORT)
+#define	AHCI_ADDR_IS_PMULT(addrp)					\
+	((addrp)->aa_qual & AHCI_ADDR_PMULT)
+#define	AHCI_ADDR_IS_VALID(addrp)					\
+	((addrp)->aa_port < SATA_MAX_CPORTS) &&				\
+	((addrp)->aa_pmport < SATA_MAX_PMPORTS) &&			\
+	((addrp)->aa_qual & AHCI_ADDR_VALID)
+
+#define	AHCI_ADDR_SET(addrp, port, pmport, qual)			\
+	{								\
+		(addrp)->aa_port = port;				\
+		(addrp)->aa_pmport = pmport;				\
+		(addrp)->aa_qual = qual;				\
+	}
+#define	AHCI_ADDR_SET_PORT(addrp, port)					\
+	AHCI_ADDR_SET(addrp, port, 0, AHCI_ADDR_PORT)
+#define	AHCI_ADDR_SET_PMPORT(addrp, port, pmport)			\
+	AHCI_ADDR_SET(addrp, port, pmport, AHCI_ADDR_PMPORT)
+#define	AHCI_ADDR_SET_PMULT(addrp, port)				\
+	AHCI_ADDR_SET(addrp, port, SATA_PMULT_HOSTPORT, AHCI_ADDR_PMULT)
+
 /* Type for argument of event handler */
 typedef	struct ahci_event_arg {
 	void		*ahciea_ctlp;
 	void		*ahciea_portp;
+	void		*ahciea_addrp;
 	uint32_t	ahciea_event;
 } ahci_event_arg_t;
 
 /* Warlock annotation */
 _NOTE(DATA_READABLE_WITHOUT_LOCK(ahci_event_arg_t::ahciea_ctlp))
 _NOTE(DATA_READABLE_WITHOUT_LOCK(ahci_event_arg_t::ahciea_portp))
+_NOTE(DATA_READABLE_WITHOUT_LOCK(ahci_event_arg_t::ahciea_addrp))
 _NOTE(DATA_READABLE_WITHOUT_LOCK(ahci_event_arg_t::ahciea_event))
 
+
+/*
+ * ahci_pmult_info stores the information of a port multiplier and its
+ * sub-devices in case a port multiplier is attached to an HBA port.
+ */
+struct ahci_pmult_info {
+
+	/* Number of the device ports */
+	int			ahcipmi_num_dev_ports;
+
+	/* Device type of the sub-devices of the port multipler */
+	uint8_t			ahcipmi_device_type[SATA_MAX_PMPORTS];
+
+	/* State of port multiplier port */
+	uint32_t		ahcipmi_port_state[SATA_MAX_PMPORTS];
+
+	/*
+	 * Port multiplier port on which there is outstanding NCQ
+	 * commands. Only make sense in command based switching mode.
+	 */
+	uint8_t			ahcipmi_ncq_pmport;
+
+	/* Pending asynchronous notification events tags */
+	uint32_t		ahcipmi_snotif_tags;
+};
+typedef struct ahci_pmult_info ahci_pmult_info_t;
+
 /*
  * flags for ahciport_flags
  *
@@ -69,14 +156,31 @@
  *
  * AHCI_PORT_FLAG_NODEV: this flag will be set when a device is found gone
  * during ahci_restart_port_wait_till_ready process.
+ *
+ * AHCI_PORT_FLAG_RDWR_PMULT: this flags will be set when a READ/WRITE
+ * PORTMULT command is being executed.
+ *
+ * AHCI_PORT_FLAG_IGNORE_IPMS: this flags will be set when enumerating a port
+ * multiplier. According AHCI spec, IPMS error should be ignore during
+ * enumeration of port multiplier.
+ *
+ * AHCI_PORT_FLAG_PMULT_SNTF: this flags will be set when the a asynchronous
+ * notification event on the port multiplier is being handled.
+ *
+ * AHCI_PORT_FLAG_HOTPLUG: this flags will be set when a hot plug event is
+ * being handled.
  */
-#define	AHCI_PORT_FLAG_SPINUP	0x01
-#define	AHCI_PORT_FLAG_MOPPING	0x02
-#define	AHCI_PORT_FLAG_POLLING	0x04
-#define	AHCI_PORT_FLAG_RQSENSE	0x08
-#define	AHCI_PORT_FLAG_STARTED	0x10
-#define	AHCI_PORT_FLAG_RDLOGEXT	0x20
-#define	AHCI_PORT_FLAG_NODEV	0x40
+#define	AHCI_PORT_FLAG_SPINUP		0x01
+#define	AHCI_PORT_FLAG_MOPPING		0x02
+#define	AHCI_PORT_FLAG_POLLING		0x04
+#define	AHCI_PORT_FLAG_RQSENSE		0x08
+#define	AHCI_PORT_FLAG_STARTED		0x10
+#define	AHCI_PORT_FLAG_RDLOGEXT		0x20
+#define	AHCI_PORT_FLAG_NODEV		0x40
+#define	AHCI_PORT_FLAG_RDWR_PMULT	0x80
+#define	AHCI_PORT_FLAG_IGNORE_IPMS	0x100
+#define	AHCI_PORT_FLAG_PMULT_SNTF	0x200
+#define	AHCI_PORT_FLAG_HOTPLUG		0x400
 
 typedef struct ahci_port {
 	/* The physical port number */
@@ -87,6 +191,9 @@
 	/* State of the port */
 	uint32_t		ahciport_port_state;
 
+	/* Port multiplier struct */
+	ahci_pmult_info_t	*ahciport_pmult_info;
+
 	/*
 	 * AHCI_PORT_FLAG_SPINUP
 	 * AHCI_PORT_FLAG_MOPPING
@@ -95,6 +202,10 @@
 	 * AHCI_PORT_FLAG_STARTED
 	 * AHCI_PORT_FLAG_RDLOGEXT
 	 * AHCI_PORT_FLAG_NODEV
+	 * AHCI_PORT_FLAG_RDWR_PMULT
+	 * AHCI_PORT_FLAG_IGNORE_IPMS
+	 * AHCI_PORT_FLAG_PMULT_SNTF
+	 * AHCI_PORT_FLAG_HOTPLUG
 	 */
 	int			ahciport_flags;
 
@@ -146,6 +257,9 @@
 	/* Keep the error retrieval sata packet */
 	sata_pkt_t		*ahciport_err_retri_pkt;
 
+	/* Keep the read/write port multiplier packet */
+	sata_pkt_t		*ahciport_rdwr_pmult_pkt;
+
 	/*
 	 * SATA HBA driver is supposed to remember and maintain device
 	 * reset state. While the reset is in progress, it doesn't accept
@@ -185,6 +299,50 @@
 _NOTE(MUTEX_PROTECTS_DATA(ahci_port_t::ahciport_mutex,
 				    ahci_port_t::ahciport_event_taskq))
 
+#define	AHCI_NUM_PORTS(ctlp)						\
+	(ctlp)->ahcictl_num_ports
+
+#define	AHCIPORT_NUM_PMPORTS(portp)					\
+	(portp)->ahciport_pmult_info->ahcipmi_num_dev_ports
+
+#define	AHCIPORT_NCQ_PMPORT(ahci_portp)					\
+	(ahci_portp->ahciport_pmult_info->ahcipmi_ncq_pmport)
+
+#define	AHCIPORT_DEV_TYPE(portp, addrp)					\
+	(portp)->ahciport_device_type
+
+#define	AHCIPORT_PMDEV_TYPE(portp, addrp)				\
+	(portp)->ahciport_pmult_info->ahcipmi_device_type		\
+	[(addrp)->aa_pmport]
+
+#define	AHCIPORT_GET_DEV_TYPE(portp, addrp)				\
+	(AHCI_ADDR_IS_PORT(addrp) | AHCI_ADDR_IS_PMULT(addrp) ?		\
+	AHCIPORT_DEV_TYPE(portp, addrp) :				\
+	AHCIPORT_PMDEV_TYPE(portp, addrp))
+
+#define	AHCIPORT_SET_DEV_TYPE(portp, addrp, type)			\
+	if (AHCI_ADDR_IS_PORT(addrp) | AHCI_ADDR_IS_PMULT(addrp))	\
+		AHCIPORT_DEV_TYPE(portp, addrp) = type;			\
+	else								\
+		AHCIPORT_PMDEV_TYPE(portp, addrp) = type;
+
+#define	AHCIPORT_STATE(portp, addrp)					\
+	(portp)->ahciport_port_state
+
+#define	AHCIPORT_PMSTATE(portp, addrp)					\
+	(portp)->ahciport_pmult_info->ahcipmi_port_state		\
+	[(addrp)->aa_pmport]
+
+#define	AHCIPORT_GET_STATE(portp, addrp)				\
+	(AHCI_ADDR_IS_PORT(addrp) | AHCI_ADDR_IS_PMULT(addrp) ?		\
+	AHCIPORT_STATE(portp, addrp) : AHCIPORT_PMSTATE(portp, addrp))
+
+#define	AHCIPORT_SET_STATE(portp, addrp, state)				\
+	if (AHCI_ADDR_IS_PORT(addrp) | AHCI_ADDR_IS_PMULT(addrp))	\
+		AHCIPORT_STATE(portp, addrp) = state;			\
+	else								\
+		AHCIPORT_PMSTATE(portp, addrp) = state;
+
 typedef struct ahci_ctl {
 	dev_info_t		*ahcictl_dip;
 	/* To map port number to cport number */
@@ -213,6 +371,10 @@
 	 * AHCI_CAP_PM
 	 * AHCI_CAP_32BIT_DMA
 	 * AHCI_CAP_SCLO
+	 * AHCI_CAP_INIT_PORT_RESET
+	 * AHCI_CAP_SNTF
+	 * AHCI_CAP_PMULT_CBSS
+	 * AHCI_CAP_PMULT_FBSS
 	 */
 	int			ahcictl_cap;
 
@@ -291,7 +453,12 @@
 #define	AHCI_CAP_COMMU_32BIT_DMA	0x40
 /* Port reset is needed for initialization */
 #define	AHCI_CAP_INIT_PORT_RESET	0x80
-
+/* Port Asychronous Notification */
+#define	AHCI_CAP_SNTF			0x100
+/* Port Multiplier Command-Based Switching Support (PMULT_CBSS) */
+#define	AHCI_CAP_PMULT_CBSS		0x200
+/* Port Multiplier FIS-Based Switching Support (PMULT_FBSS) */
+#define	AHCI_CAP_PMULT_FBSS		0x400
 
 /* Flags controlling the restart port behavior */
 #define	AHCI_PORT_RESET		0x0001	/* Reset the port */
@@ -301,6 +468,10 @@
 	(ahci_portp->ahciport_flags &			\
 	(AHCI_PORT_FLAG_RQSENSE|AHCI_PORT_FLAG_RDLOGEXT))
 
+#define	RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)		\
+	(ahci_portp->ahciport_flags &			\
+	AHCI_PORT_FLAG_RDWR_PMULT)
+
 #define	NON_NCQ_CMD_IN_PROGRESS(ahci_portp)		\
 	(!ERR_RETRI_CMD_IN_PROGRESS(ahci_portp) &&	\
 	ahci_portp->ahciport_pending_tags != 0 &&	\
@@ -314,6 +485,7 @@
 #define	AHCI_NON_NCQ_CMD	0x0
 #define	AHCI_NCQ_CMD		0x1
 #define	AHCI_ERR_RETRI_CMD	0x2
+#define	AHCI_RDWR_PMULT_CMD	0x4
 
 /* State values for ahci_attach */
 #define	AHCI_ATTACH_STATE_NONE			(0x1 << 0)
@@ -374,6 +546,7 @@
 #define	AHCIDBG_PM		0x8000
 #define	AHCIDBG_UNDERFLOW	0x10000
 #define	AHCIDBG_MSI		0x20000
+#define	AHCIDBG_PMULT		0x40000
 
 extern uint32_t ahci_debug_flags;
 
--- a/usr/src/uts/common/sys/sata/impl/sata.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/sata/impl/sata.h	Mon Aug 17 09:14:35 2009 -0400
@@ -244,8 +244,6 @@
 /* Port Multiplier & host port info and state */
 struct sata_pmult_info {
 	sata_address_t	pmult_addr;		/* this PMult SATA Address */
-	kmutex_t	pmult_mutex;		/* pmult (host port) mutex */
-
 						/*
 						 * PMult state flags
 						 * SATA_STATE_UNKNOWN
@@ -256,13 +254,17 @@
 						 */
 	uint32_t	pmult_state;
 	uint32_t	pmult_event_flags;	/* Undefined for now */
-	struct sata_port_scr pmult_scr;		/* Host port SCR block */
+	struct sata_pmult_gscr pmult_gscr;	/* PMult GSCR block */
 	uint32_t	pmult_num_dev_ports; 	/* Number of data ports */
 	struct sata_pmport_info	*pmult_dev_port[SATA_MAX_PMPORTS - 1];
 };
 
 typedef	struct sata_pmult_info sata_pmult_info_t;
 
+_NOTE(SCHEME_PROTECTS_DATA("unshared data", sata_pmult_info))
+_NOTE(MUTEX_PROTECTS_DATA(sata_cport_info::cport_mutex, \
+    sata_pmult_info::pmult_dev_port))
+
 /* Port Multiplier's device port info & state */
 struct sata_pmport_info {
 	sata_address_t	pmport_addr;		/* this SATA port address */
@@ -414,6 +416,7 @@
 					SATA_EVNT_DEVICE_DETACHED | \
 					SATA_EVNT_LINK_LOST | \
 					SATA_EVNT_LINK_ESTABLISHED | \
+					SATA_EVNT_PMULT_LINK_CHANGED | \
 					SATA_EVNT_PORT_FAILED | \
 					SATA_EVNT_TARGET_NODE_CLEANUP | \
 					SATA_EVNT_AUTOONLINE_DEVICE)
@@ -618,6 +621,10 @@
 	sata_hba_inst->satahba_dev_port[cport]->\
 	cport_devp.cport_sata_pmult->pmult_num_dev_ports
 
+#define	SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport) \
+	sata_hba_inst->satahba_dev_port[cport]->\
+	cport_devp.cport_sata_pmult->pmult_dev_port[pmport]->pmport_mutex
+
 #define	SATA_PMPORT_INFO(sata_hba_inst, cport, pmport) \
 	sata_hba_inst->satahba_dev_port[cport]->\
 	cport_devp.cport_sata_pmult->pmult_dev_port[pmport]
@@ -639,6 +646,11 @@
 	sata_hba_inst->satahba_dev_port[cport]->\
 	cport_devp.cport_sata_pmult->pmult_dev_port[pmport]->pmport_dev_type
 
+#define	SATA_PMPORT_EVENT_FLAGS(sata_hba_inst, cport, pmport) \
+	sata_hba_inst->satahba_dev_port[cport]->\
+	cport_devp.cport_sata_pmult->pmult_dev_port[pmport]->\
+	pmport_event_flags
+
 #define	SATA_PMPORTINFO_DRV_TYPE(pmportinfo) \
 	pmportinfo->pmport_dev_type
 
@@ -651,6 +663,12 @@
 #define	SATA_TXLT_CPORT(spx) \
 	spx->txlt_sata_pkt->satapkt_device.satadev_addr.cport
 
+#define	SATA_TXLT_PMPORT(spx) \
+	spx->txlt_sata_pkt->satapkt_device.satadev_addr.pmport
+
+#define	SATA_TXLT_QUAL(spx) \
+	spx->txlt_sata_pkt->satapkt_device.satadev_addr.qual
+
 #define	SATA_TXLT_CPORT_MUTEX(spx) \
 	spx->txlt_sata_hba_inst->\
 	satahba_dev_port[spx->txlt_sata_pkt->\
@@ -785,6 +803,7 @@
 #define	SATA_DBG_ATAPI		0x1000
 #define	SATA_DBG_ATAPI_PACKET	0x8000
 #define	SATA_DBG_INTR_CTX	0x10000
+#define	SATA_DBG_PMULT		0x20000
 
 typedef struct sata_atapi_cmd {
 	uint8_t acdb[SATA_ATAPI_MAX_CDB_LEN];
@@ -816,7 +835,7 @@
 
 #endif
 
-/* sata_rev_tag 1.43 */
+/* sata_rev_tag 1.44 */
 
 #ifdef	__cplusplus
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/sys/sata/sata_blacklist.h	Mon Aug 17 09:14:35 2009 -0400
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SATA_BLACKLIST_H
+#define	_SATA_BLACKLIST_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * SATA port multiplier blacklist
+ *
+ * The number of the device ports is indicated by GSCR2[3:0]. These port
+ * multipliers got faulty values in GSCR2 (w/ pseudo port) by vendor
+ * configuration.
+ *
+ * Following is a list of some black-listed port multipliers with the actual
+ * number of device ports.
+ */
+struct sata_pmult_bl {
+	uint32_t	bl_gscr0;
+	uint32_t	bl_gscr1;
+	uint32_t	bl_gscr2;
+	uint32_t	bl_flags;
+};
+
+typedef struct sata_pmult_bl sata_pmult_bl_t;
+
+sata_pmult_bl_t sata_pmult_blacklist[] = {
+	{0x37261095, 0x0, 0x6, 0x5}, /* Silicon Image 3726, 5 ports. */
+	{0x47261095, 0x0, 0x7, 0x5}, /* Silicon Image 4726, 5 ports. */
+	{0x47231095, 0x0, 0x4, 0x2}, /* Silicon Image 4723, 2 ports. */
+	NULL
+};
+
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SATA_BLACKLIST_H */
--- a/usr/src/uts/common/sys/sata/sata_defs.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/sata/sata_defs.h	Mon Aug 17 09:14:35 2009 -0400
@@ -91,6 +91,12 @@
 
 #define	SATA_LOG_PAGE_10	0x10	/* log page 0x10 - SATA error */
 /*
+ * Port Multiplier Commands
+ */
+#define	SATAC_READ_PORTMULT	0xe4	/* read port multiplier */
+#define	SATAC_WRITE_PORTMULT	0xe8	/* write port multiplier */
+
+/*
  * Power Managment Commands (subset)
  */
 #define	SATAC_CHECK_POWER_MODE	0xe5	/* check power mode */
@@ -658,6 +664,41 @@
 #define	MODEPAGE_ACOUSTIC_MANAG 0x30
 
 /*
+ * Port Multiplier registers' offsets
+ */
+#define	SATA_PMULT_GSCR0		0x0
+#define	SATA_PMULT_GSCR1		0x1
+#define	SATA_PMULT_GSCR2		0x2
+#define	SATA_PMULT_GSCR32		0x20
+#define	SATA_PMULT_GSCR33		0x21
+#define	SATA_PMULT_GSCR64		0x40
+#define	SATA_PMULT_GSCR96		0x60
+
+#define	SATA_PMULT_PORTNUM_MASK		0xf
+
+#define	SATA_PMULT_PSCR0		0x0
+#define	SATA_PMULT_PSCR1		0x1
+#define	SATA_PMULT_PSCR2		0x2
+#define	SATA_PMULT_PSCR3		0x3
+#define	SATA_PMULT_PSCR4		0x4
+
+#define	SATA_PMULT_REG_SSTS		(SATA_PMULT_PSCR0)
+#define	SATA_PMULT_REG_SERR		(SATA_PMULT_PSCR1)
+#define	SATA_PMULT_REG_SCTL		(SATA_PMULT_PSCR2)
+#define	SATA_PMULT_REG_SACT		(SATA_PMULT_PSCR3)
+#define	SATA_PMULT_REG_SNTF		(SATA_PMULT_PSCR4)
+
+/*
+ * Port Multiplier capabilities
+ * (Indicated by GSCR64, and enabled by GSCR96)
+ */
+#define	SATA_PMULT_CAP_BIST		(1 << 0)
+#define	SATA_PMULT_CAP_PMREQ		(1 << 1)
+#define	SATA_PMULT_CAP_SSC		(1 << 2)
+#define	SATA_PMULT_CAP_SNOTIF		(1 << 3)
+#define	SATA_PMULT_CAP_PHYEVENT		(1 << 4)
+
+/*
  * sstatus field definitions
  */
 #define	SSTATUS_DET_SHIFT	0
--- a/usr/src/uts/common/sys/sata/sata_hba.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/sata/sata_hba.h	Mon Aug 17 09:14:35 2009 -0400
@@ -52,7 +52,6 @@
 /* SATA Framework definitions */
 
 #define	SATA_MAX_CPORTS		32	/* Max number of controller ports */
-
 					/* Multiplier (PMult) */
 #define	SATA_MAX_PMPORTS	16	/* Maximum number of ports on PMult */
 #define	SATA_PMULT_HOSTPORT	0xf	/* Port Multiplier host port number */
@@ -105,6 +104,21 @@
 typedef struct sata_port_scr sata_port_scr_t;
 
 /*
+ * SATA Port Multiplier general status and control register block.
+ * The gscr0, gscr1, gscr2 are the copyies of the register on port multiplier.
+ * GSCR[0], GSCR[1], GSCR[2] are defined in SATA defined by Port Multiplier
+ * 1.0/1.1/1.2 spec.
+ */
+struct sata_pmult_gscr {
+	uint32_t	gscr0;		/* Product Identifier register */
+	uint32_t	gscr1;		/* Resrved Information register */
+	uint32_t	gscr2;		/* Port Information register */
+	uint32_t	gscr64;		/* Feature register */
+};
+
+typedef struct sata_pmult_gscr sata_pmult_gscr_t;
+
+/*
  * SATA Device Structure (rev 1)
  * Used to request/return state of the controller, port, port multiplier
  * or an attached drive:
@@ -122,7 +136,8 @@
  *	a value specific for a reported event.
  */
 #define	SATA_DEVICE_REV_1	1
-#define	SATA_DEVICE_REV		SATA_DEVICE_REV_1
+#define	SATA_DEVICE_REV_2	2
+#define	SATA_DEVICE_REV		SATA_DEVICE_REV_2
 
 struct sata_device
 {
@@ -131,6 +146,9 @@
 	uint32_t	satadev_state;		/* Port or device state */
 	uint32_t	satadev_type;		/* Attached device type */
 	struct sata_port_scr satadev_scr; 	/* Port status and ctrl regs */
+	struct sata_pmult_gscr satadev_gscr;	/* Port multiplier specific */
+						/* global status and control */
+						/* registers */
 	uint32_t	satadev_add_info;	/* additional information, */
 						/* function specific */
 };
@@ -162,6 +180,7 @@
 #define	SATA_DSTATE_PWR_IDLE		0x000200
 #define	SATA_DSTATE_PWR_STANDBY		0x000400
 #define	SATA_DSTATE_RESET		0x001000
+#define	SATA_DSTATE_PMULT_INIT		0x002000
 #define	SATA_DSTATE_FAILED		0x008000
 
 /* Mask for drive power states */
@@ -547,6 +566,12 @@
 #define	SATA_ERR_RETR_PKT_TYPE_ATAPI	2
 
 /*
+ * Read/write port multiplier packet types
+ */
+#define	SATA_RDWR_PMULT_PKT_TYPE_READ	1
+#define	SATA_RDWR_PMULT_PKT_TYPE_WRITE	2
+
+/*
  * Hoplug functions vector structure (rev 1)
  */
 #define	SATA_TRAN_HOTPLUG_OPS_REV_1	1
@@ -602,7 +627,8 @@
  */
 #define	SATA_TRAN_HBA_REV_1	1
 #define	SATA_TRAN_HBA_REV_2	2
-#define	SATA_TRAN_HBA_REV	SATA_TRAN_HBA_REV_2
+#define	SATA_TRAN_HBA_REV_3	3
+#define	SATA_TRAN_HBA_REV	SATA_TRAN_HBA_REV_3
 
 struct sata_hba_tran {
 	int		sata_tran_hba_rev;	/* version */
@@ -643,6 +669,7 @@
 #define	SATA_CTLF_ASN			0x040 /* Asynchronous Event Support */
 #define	SATA_CTLF_QCMD			0x080 /* Queued commands support */
 #define	SATA_CTLF_NCQ			0x100 /* NCQ support */
+#define	SATA_CTLF_PMULT_FBS		0x200 /* FIS-based switching support */
 
 /*
  * sata_tran_start() return values.
@@ -693,6 +720,9 @@
  * SATA_EVNT_PWR_LEVEL_CHANGED
  * A port or entire SATA controller power level has changed
  *
+ * SATA_EVNT_PMULT_LINK_CHANGED
+ * Port multiplier detect change on a link of its device port
+ *
  */
 #define	SATA_EVNT_DEVICE_ATTACHED	0x01
 #define	SATA_EVNT_DEVICE_DETACHED	0x02
@@ -701,6 +731,7 @@
 #define	SATA_EVNT_PORT_FAILED		0x10
 #define	SATA_EVNT_DEVICE_RESET		0x20
 #define	SATA_EVNT_PWR_LEVEL_CHANGED	0x40
+#define	SATA_EVNT_PMULT_LINK_CHANGED	0x80
 
 /*
  * SATA Framework interface entry points
@@ -712,6 +743,10 @@
 void 	sata_hba_event_notify(dev_info_t *, sata_device_t *, int);
 sata_pkt_t *sata_get_error_retrieval_pkt(dev_info_t *, sata_device_t *, int);
 void	sata_free_error_retrieval_pkt(sata_pkt_t *);
+sata_pkt_t *sata_get_rdwr_pmult_pkt(dev_info_t *, sata_device_t *, uint8_t,
+    uint32_t, uint32_t);
+void	sata_free_rdwr_pmult_pkt(sata_pkt_t *);
+int	sata_check_pmult_blacklist(sata_device_t *);
 void	sata_free_dma_resources(sata_pkt_t *);
 
 /*
--- a/usr/src/uts/common/sys/scsi/targets/sddef.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/scsi/targets/sddef.h	Mon Aug 17 09:14:35 2009 -0400
@@ -453,7 +453,8 @@
 						/* field by hardware */
 	    un_f_pm_log_sense_smart	:1,	/* log sense support SMART */
 						/* feature attribute */
-	    un_f_reserved		:7;
+	    un_f_is_solid_state		:1,	/* has solid state media */
+	    un_f_reserved		:6;
 
 	/* Ptr to table of strings for ASC/ASCQ error message printing */
 	struct scsi_asq_key_strings	*un_additional_codes;
@@ -2357,6 +2358,7 @@
 #define	SD_VPD_ASCII_OP_PG	0x08	/* 0x82 - ASCII Op Defs */
 #define	SD_VPD_DEVID_WWN_PG	0x10	/* 0x83 - Device Identification */
 #define	SD_VPD_EXTENDED_DATA_PG	0x80	/* 0x86 - Extended data about the lun */
+#define	SD_VPD_DEV_CHARACTER_PG	0x400	/* 0xB1 - Device Characteristics */
 
 /*
  * Non-volatile cache support
--- a/usr/src/uts/common/sys/usb/usba/usba_impl.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/usb/usba/usba_impl.h	Mon Aug 17 09:14:35 2009 -0400
@@ -435,6 +435,15 @@
 /* free request wrappers */
 void	usba_req_wrapper_free(usba_req_wrapper_t *);
 
+/* usb device capture for the specific client driver */
+typedef struct usb_dev_cap {
+	dev_info_t			*dip;
+	usb_dev_driver_callback_t	usba_dev_driver_cb;
+} usb_dev_cap_t;
+
+usb_dev_cap_t usb_cap;
+_NOTE(SCHEME_PROTECTS_DATA("unique device capture data", usb_cap))
+
 #ifdef __cplusplus
 }
 #endif
--- a/usr/src/uts/common/sys/usb/usbai.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/common/sys/usb/usbai.h	Mon Aug 17 09:14:35 2009 -0400
@@ -2555,6 +2555,65 @@
 
 
 /*
+ * **************************************************************************
+ * USB device driver registration and callback functions remaining
+ * Contracted Project Private (for VirtualBox USB Device Capture)
+ * **************************************************************************
+ */
+
+/*
+ * getting the device strings of manufacturer, product and serial number
+ */
+typedef struct usb_dev_str {
+	char	*usb_mfg;	/* manufacturer string */
+	char	*usb_product;	/* product string */
+	char	*usb_serialno;	/* serial number string */
+} usb_dev_str_t;
+
+/*
+ * It is the callback function type for capture driver.
+ * Arguments:
+ *	dev_descr	- pointer to device descriptor
+ *	dev_str		- pointer to device strings
+ *	path		- pointer to device physical path
+ *	bus		- USB bus address
+ *	port		- USB port number
+ *	drv		- capture driver name.
+ *			  It is returned by the callback func.
+ * Return Values:
+ *      USB_SUCCESS     - VirtualBox will capture the device
+ *      USB_FAILURE     - VirtualBox will not capture the device
+ */
+typedef int (*usb_dev_driver_callback_t)(
+	usb_dev_descr_t	*dev_descr,
+	usb_dev_str_t	*dev_str,
+	char		*path,
+	int		bus,
+	int		port,
+	char		**drv,
+	void		*reserved);
+
+/*
+ * Register the callback function in the usba.
+ * Argument:
+ *	dip		- client driver's devinfo pointer
+ *	cb		- callback function
+ *
+ * Return Values:
+ *	USB_SUCCESS	- the registeration was successful
+ *	USB_FAILURE	- the registeration failed
+ */
+int usb_register_dev_driver(
+	dev_info_t			*dip,
+	usb_dev_driver_callback_t	cb);
+
+/*
+ * Unregister the callback function in the usba.
+ */
+void usb_unregister_dev_driver(dev_info_t *dip);
+
+
+/*
  * ***************************************************************************
  * USB Device and interface class, subclass and protocol codes
  * ***************************************************************************
--- a/usr/src/uts/i86pc/dboot/dboot_grub.s	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/i86pc/dboot/dboot_grub.s	Mon Aug 17 09:14:35 2009 -0400
@@ -21,7 +21,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -57,13 +57,20 @@
 	.globl	mb_header
 mb_header:
 	.long	MB_HEADER_MAGIC	/* magic number */
-	.long	MB_HEADER_FLAGS	/* flags */
-	.long	MB_HEADER_CHECKSUM	/* checksum */
-	.long	0x11111111	/* header_addr: patched by elfpatch */
-	.long	0x100000	/* load_addr: patched by elfpatch */
+#if defined(_BOOT_TARGET_i386)
+	.long	MB_HEADER_FLAGS_32	/* flags */
+	.long	MB_HEADER_CHECKSUM_32	/* checksum */
+#elif defined (_BOOT_TARGET_amd64)
+	.long	MB_HEADER_FLAGS_64	/* flags */
+	.long	MB_HEADER_CHECKSUM_64	/* checksum */
+#else
+#error No architecture defined
+#endif
+	.long	0x11111111	/* header_addr: patched by mbh_patch */
+	.long	0x100000	/* load_addr: patched by mbh_patch */
 	.long	0		/* load_end_addr - 0 means entire file */
 	.long	0		/* bss_end_addr */
-	.long	0x2222222	/* entry_addr: patched by elfpatch */
+	.long	0x2222222	/* entry_addr: patched by mbh_patch */
 	.long	0		/* video mode.. */
 	.long	0		/* width 0 == don't care */
 	.long	0		/* height 0 == don't care */
--- a/usr/src/uts/i86pc/dboot/dboot_startkern.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/i86pc/dboot/dboot_startkern.c	Mon Aug 17 09:14:35 2009 -0400
@@ -799,6 +799,10 @@
 	DBG_MSG("Entered init_mem_alloc()\n");
 	DBG((uintptr_t)mb_info);
 
+	if (mb_info->mods_count > MAX_MODULES) {
+		dboot_panic("Too many modules (%d) -- the maximum is %d.",
+		    mb_info->mods_count, MAX_MODULES);
+	}
 	/*
 	 * search the modules to find the last used address
 	 * we'll build the module list while we're walking through here
@@ -814,7 +818,11 @@
 			    (ulong_t)mod->mod_start, (ulong_t)mod->mod_end);
 		}
 		modules[i].bm_addr = mod->mod_start;
-		modules[i].bm_size = mod->mod_end;
+		if (mod->mod_start > mod->mod_end) {
+			dboot_panic("module[%d]: Invalid module start address "
+			    "(0x%llx)", i, (uint64_t)mod->mod_start);
+		}
+		modules[i].bm_size = mod->mod_end - mod->mod_start;
 
 		check_higher(mod->mod_end);
 	}
--- a/usr/src/uts/i86pc/os/ddi_impl.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/i86pc/os/ddi_impl.c	Mon Aug 17 09:14:35 2009 -0400
@@ -2514,6 +2514,10 @@
 void
 impl_setup_ddi(void)
 {
+#if !defined(__xpv)
+	extern void startup_bios_disk(void);
+	extern int post_fastreboot;
+#endif
 	dev_info_t *xdip, *isa_dip;
 	rd_existing_t rd_mem_prop;
 	int err;
@@ -2555,11 +2559,15 @@
 	 */
 	get_boot_properties();
 
+	/* not framebuffer should be enumerated, if present */
+	get_vga_properties();
+
+#if !defined(__xpv)
+	if (!post_fastreboot)
+		startup_bios_disk();
+#endif
 	/* do bus dependent probes. */
 	impl_bus_initialprobe();
-
-	/* not framebuffer should be enumerated, if present */
-	get_vga_properties();
 }
 
 dev_t
--- a/usr/src/uts/i86pc/os/fakebop.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/i86pc/os/fakebop.c	Mon Aug 17 09:14:35 2009 -0400
@@ -1087,9 +1087,9 @@
  * Fast Reboot.
  */
 static void
-save_boot_info(multiboot_info_t *mbi)
+save_boot_info(multiboot_info_t *mbi, struct xboot_info *xbi)
 {
-	mb_module_t *mbp;
+	struct boot_modules *modp;
 	int i;
 
 	bcopy(mbi, &saved_mbi, sizeof (multiboot_info_t));
@@ -1115,12 +1115,14 @@
 	/*
 	 * Current file sizes.  Used by fastboot.c to figure out how much
 	 * memory to reserve for panic reboot.
+	 * Use the module list from the dboot-constructed xboot_info
+	 * instead of the list referenced by the multiboot structure
+	 * because that structure may not be addressable now.
 	 */
 	saved_file_size[FASTBOOT_NAME_UNIX] = FOUR_MEG - PAGESIZE;
-	for (i = 0, mbp = (mb_module_t *)(uintptr_t)mbi->mods_addr;
-	    i < mbi->mods_count; i++, mbp += sizeof (mb_module_t)) {
-		saved_file_size[FASTBOOT_NAME_BOOTARCHIVE] +=
-		    mbp->mod_end - mbp->mod_start;
+	for (i = 0, modp = (struct boot_modules *)(uintptr_t)xbi->bi_modules;
+	    i < xbi->bi_module_cnt; i++, modp++) {
+		saved_file_size[FASTBOOT_NAME_BOOTARCHIVE] += modp->bm_size;
 	}
 
 }
@@ -1379,7 +1381,7 @@
 	/*
 	 * Save various boot information for Fast Reboot
 	 */
-	save_boot_info(mbi);
+	save_boot_info(mbi, xbootp);
 
 	if (mbi != NULL && mbi->flags & 0x2) {
 		boot_device = mbi->boot_device >> 24;
--- a/usr/src/uts/i86pc/os/startup.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/i86pc/os/startup.c	Mon Aug 17 09:14:35 2009 -0400
@@ -668,7 +668,6 @@
 startup(void)
 {
 #if !defined(__xpv)
-	extern void startup_bios_disk(void);
 	extern void startup_pci_bios(void);
 	extern int post_fastreboot;
 #endif
@@ -706,10 +705,7 @@
 	startup_xen_mca();
 #endif
 	startup_modules();
-#if !defined(__xpv)
-	if (!post_fastreboot)
-		startup_bios_disk();
-#endif
+
 	startup_end();
 	progressbar_start();
 }
--- a/usr/src/uts/intel/io/pci/pci_boot.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/intel/io/pci/pci_boot.c	Mon Aug 17 09:14:35 2009 -0400
@@ -600,7 +600,7 @@
 	int rv, cap_ptr, physhi;
 	dev_info_t *dip;
 	uint16_t cmd_reg;
-	struct memlist *list;
+	struct memlist *list, *scratch_list;
 
 	/* skip root (peer) PCI busses */
 	if (pci_bus_res[secbus].par_bus == (uchar_t)-1)
@@ -785,13 +785,13 @@
 	io_limit = ((io_limit & 0xf0) << 8) | 0xfff;
 
 	/* Form list of all resources passed (avail + used) */
-	list = memlist_dup(pci_bus_res[secbus].io_avail);
-	memlist_merge(&pci_bus_res[secbus].io_used, &list);
+	scratch_list = memlist_dup(pci_bus_res[secbus].io_avail);
+	memlist_merge(&pci_bus_res[secbus].io_used, &scratch_list);
 
 	if ((pci_bus_res[parbus].io_reprogram ||
 	    (io_base > io_limit) ||
 	    (!(cmd_reg & PCI_COMM_IO))) &&
-	    !list_is_vga_only(list, IO)) {
+	    !list_is_vga_only(scratch_list, IO)) {
 		if (pci_bus_res[secbus].io_used) {
 			memlist_subsume(&pci_bus_res[secbus].io_used,
 			    &pci_bus_res[secbus].io_avail);
@@ -857,6 +857,7 @@
 			    bus, dev, func, io_base, io_limit);
 		}
 	}
+	memlist_free_all(&scratch_list);
 
 	/*
 	 * Check memory space as we did I/O space.
@@ -866,13 +867,13 @@
 	mem_limit = (uint_t)pci_getw(bus, dev, func, PCI_BCNF_MEM_LIMIT);
 	mem_limit = ((mem_limit & 0xfff0) << 16) | 0xfffff;
 
-	list = memlist_dup(pci_bus_res[secbus].mem_avail);
-	memlist_merge(&pci_bus_res[secbus].mem_used, &list);
+	scratch_list = memlist_dup(pci_bus_res[secbus].mem_avail);
+	memlist_merge(&pci_bus_res[secbus].mem_used, &scratch_list);
 
 	if ((pci_bus_res[parbus].mem_reprogram ||
 	    (mem_base > mem_limit) ||
 	    (!(cmd_reg & PCI_COMM_MAE))) &&
-	    !list_is_vga_only(list, MEM)) {
+	    !list_is_vga_only(scratch_list, MEM)) {
 		if (pci_bus_res[secbus].mem_used) {
 			memlist_subsume(&pci_bus_res[secbus].mem_used,
 			    &pci_bus_res[secbus].mem_avail);
@@ -959,6 +960,7 @@
 			    bus, dev, func, mem_base, mem_limit);
 		}
 	}
+	memlist_free_all(&scratch_list);
 
 cmd_enable:
 	if (pci_bus_res[secbus].io_avail)
--- a/usr/src/uts/sun/io/eri/eri.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/sun/io/eri/eri.c	Mon Aug 17 09:14:35 2009 -0400
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -2894,6 +2894,9 @@
 		    ETX_COMPLETION_MASK);
 
 		macupdate |= eri_reclaim(erip, erip->tx_completion);
+		if (macupdate)
+			erip->wantw = B_FALSE;
+
 		mutex_exit(&erip->xmitlock);
 	}
 
@@ -2961,8 +2964,6 @@
 		erip->rx_completion = rmdi;
 	}
 
-	erip->wantw = B_FALSE;
-
 	mutex_exit(&erip->intrlock);
 
 	if (head)
@@ -3454,7 +3455,6 @@
 	uint_t		start_offset = 0;
 	uint_t		stuff_offset = 0;
 	uint_t		flags = 0;
-	boolean_t	macupdate = B_FALSE;
 
 	caddr_t	ptr;
 	uint32_t	offset;
@@ -3514,9 +3514,16 @@
 	if (i > (ERI_TPENDING - 4))
 		goto notmds;
 
-	if (i >= (ERI_TPENDING >> 1) && !(erip->starts & 0x7))
+	if (i >= (ERI_TPENDING >> 1) && !(erip->starts & 0x7)) {
 		int_me = ERI_TMD_INTME;
 
+		if (!erip->tx_int_me) {
+			PUT_GLOBREG(intmask, GET_GLOBREG(intmask) &
+			    ~(ERI_G_MASK_TX_INT_ME));
+			erip->tx_int_me = 1;
+		}
+	}
+
 	i = tmdp - tbasep; /* index */
 
 	offset = (i * ERI_BUFSIZE);
@@ -3562,36 +3569,18 @@
 	if (erip->tx_cur_cnt >= tx_interrupt_rate) {
 		erip->tx_completion = (uint32_t)(GET_ETXREG(tx_completion) &
 		    ETX_COMPLETION_MASK);
-		macupdate |= eri_reclaim(erip, erip->tx_completion);
+		(void) eri_reclaim(erip, erip->tx_completion);
 	}
 	mutex_exit(&erip->xmitlock);
 
-	if (macupdate)
-		mac_tx_update(erip->mh);
-
 	return (B_TRUE);
 
 notmds:
 	HSTAT(erip, notmds);
 	erip->wantw = B_TRUE;
 
-	if (!erip->tx_int_me) {
-		PUT_GLOBREG(intmask, GET_GLOBREG(intmask) &
-		    ~(ERI_G_MASK_TX_INT_ME));
-		erip->tx_int_me = 1;
-	}
-
-	if (erip->tx_cur_cnt >= tx_interrupt_rate) {
-		erip->tx_completion = (uint32_t)(GET_ETXREG(tx_completion) &
-		    ETX_COMPLETION_MASK);
-		macupdate |= eri_reclaim(erip, erip->tx_completion);
-	}
-
 	mutex_exit(&erip->xmitlock);
 
-	if (macupdate)
-		mac_tx_update(erip->mh);
-
 	return (B_FALSE);
 }
 
--- a/usr/src/uts/sun4v/io/vnet.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/sun4v/io/vnet.c	Mon Aug 17 09:14:35 2009 -0400
@@ -40,6 +40,8 @@
 #include <sys/dlpi.h>
 #include <net/if.h>
 #include <sys/mac_provider.h>
+#include <sys/mac_client.h>
+#include <sys/mac_client_priv.h>
 #include <sys/mac_ether.h>
 #include <sys/ddi.h>
 #include <sys/sunddi.h>
@@ -75,11 +77,38 @@
 #ifdef	VNET_IOC_DEBUG
 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
 #endif
+static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data);
+static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
+	const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle);
+static void vnet_get_group(void *arg, mac_ring_type_t type, const int index,
+	mac_group_info_t *infop, mac_group_handle_t handle);
+static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
+static void vnet_rx_ring_stop(mac_ring_driver_t rdriver);
+static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
+static void vnet_tx_ring_stop(mac_ring_driver_t rdriver);
+static int vnet_ring_enable_intr(void *arg);
+static int vnet_ring_disable_intr(void *arg);
+static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup);
+static int vnet_addmac(void *arg, const uint8_t *mac_addr);
+static int vnet_remmac(void *arg, const uint8_t *mac_addr);
 
 /* vnet internal functions */
 static int vnet_unattach(vnet_t *vnetp);
+static void vnet_ring_grp_init(vnet_t *vnetp);
+static void vnet_ring_grp_uninit(vnet_t *vnetp);
 static int vnet_mac_register(vnet_t *);
 static int vnet_read_mac_address(vnet_t *vnetp);
+static int vnet_bind_vgenring(vnet_res_t *vresp);
+static void vnet_unbind_vgenring(vnet_res_t *vresp);
+static int vnet_bind_hwrings(vnet_t *vnetp);
+static void vnet_unbind_hwrings(vnet_t *vnetp);
+static int vnet_bind_rings(vnet_res_t *vresp);
+static void vnet_unbind_rings(vnet_res_t *vresp);
+static int vnet_hio_stat(void *, uint_t, uint64_t *);
+static int vnet_hio_start(void *);
+static void vnet_hio_stop(void *);
+static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type);
+mblk_t *vnet_hio_tx(void *, mblk_t *);
 
 /* Forwarding database (FDB) routines */
 static void vnet_fdb_create(vnet_t *vnetp);
@@ -98,6 +127,8 @@
 static void vnet_dispatch_res_task(vnet_t *vnetp);
 static void vnet_res_start_task(void *arg);
 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
+static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp);
+static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp);
 
 /* Exported to vnet_gen */
 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
@@ -112,15 +143,21 @@
 
 /* Exported to to vnet_dds */
 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
+int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
+void vnet_hio_mac_cleanup(vnet_t *vnetp);
 
 /* Externs that are imported from vnet_gen */
 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
     const uint8_t *macaddr, void **vgenhdl);
+extern int vgen_init_mdeg(void *arg);
 extern void vgen_uninit(void *arg);
 extern int vgen_dds_tx(void *arg, void *dmsg);
 extern void vgen_mod_init(void);
 extern int vgen_mod_cleanup(void);
 extern void vgen_mod_fini(void);
+extern int vgen_enable_intr(void *arg);
+extern int vgen_disable_intr(void *arg);
+extern mblk_t *vgen_poll(void *arg, int bytes_to_pickup);
 
 /* Externs that are imported from vnet_dds */
 extern void vdds_mod_init(void);
@@ -131,6 +168,9 @@
 extern void vdds_cleanup_hybrid_res(void *arg);
 extern void vdds_cleanup_hio(vnet_t *vnetp);
 
+/* Externs imported from mac_impl */
+extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *);
+
 #define	DRV_NAME	"vnet"
 #define	VNET_FDBE_REFHOLD(p)						\
 {									\
@@ -145,9 +185,9 @@
 }
 
 #ifdef	VNET_IOC_DEBUG
-#define	VNET_M_CALLBACK_FLAGS	(MC_IOCTL)
+#define	VNET_M_CALLBACK_FLAGS	(MC_IOCTL | MC_GETCAPAB)
 #else
-#define	VNET_M_CALLBACK_FLAGS	(0)
+#define	VNET_M_CALLBACK_FLAGS	(MC_GETCAPAB)
 #endif
 
 static mac_callbacks_t vnet_m_callbacks = {
@@ -157,9 +197,23 @@
 	vnet_m_stop,
 	vnet_m_promisc,
 	vnet_m_multicst,
-	vnet_m_unicst,
-	vnet_m_tx,
+	NULL,	/* m_unicst entry must be NULL while rx rings are exposed */
+	NULL,	/* m_tx entry must be NULL while tx rings are exposed */
 	vnet_m_ioctl,
+	vnet_m_capab,
+	NULL
+};
+
+static mac_callbacks_t vnet_hio_res_callbacks = {
+	0,
+	vnet_hio_stat,
+	vnet_hio_start,
+	vnet_hio_stop,
+	NULL,
+	NULL,
+	NULL,
+	vnet_hio_tx,
+	NULL,
 	NULL,
 	NULL
 };
@@ -176,6 +230,9 @@
 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
 
+/* Configure tx serialization in mac layer for the vnet device */
+boolean_t vnet_mac_tx_serialize = B_TRUE;
+
 /*
  * Set this to non-zero to enable additional internal receive buffer pools
  * based on the MTU of the device for better performance at the cost of more
@@ -206,6 +263,11 @@
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 };
 
+/* mac_open() retry delay in usec */
+uint32_t vnet_mac_open_delay = 100;	/* 0.1 ms */
+
+/* max # of mac_open() retries */
+uint32_t vnet_mac_open_retries = 100;
 
 /*
  * Property names
@@ -375,6 +437,9 @@
 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
 	attach_progress |= AST_vnet_alloc;
 
+	vnet_ring_grp_init(vnetp);
+	attach_progress |= AST_ring_init;
+
 	status = vdds_init(vnetp);
 	if (status != 0) {
 		goto vnet_attach_fail;
@@ -419,10 +484,19 @@
 	attach_progress |= AST_vnet_list;
 
 	/*
-	 * Initialize the generic vnet plugin which provides
-	 * communication via sun4v LDC (logical domain channel) based
-	 * resources. It will register the LDC resources as and when
-	 * they become available.
+	 * Initialize the generic vnet plugin which provides communication via
+	 * sun4v LDC (logical domain channel) based resources. This involves 2
+	 * steps; first, vgen_init() is invoked to read the various properties
+	 * of the vnet device from its MD node (including its mtu which is
+	 * needed to mac_register()) and obtain a handle to the vgen layer.
+	 * After mac_register() is done and we have a mac handle, we then
+	 * invoke vgen_init_mdeg() which registers with the the MD event
+	 * generator (mdeg) framework to allow LDC resource notifications.
+	 * Note: this sequence also allows us to report the correct default #
+	 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
+	 * in the context of mac_register(); and avoids conflicting with
+	 * dynamic pseudo rx rings which get added/removed as a result of mdeg
+	 * events in vgen.
 	 */
 	status = vgen_init(vnetp, reg, vnetp->dip,
 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
@@ -432,15 +506,19 @@
 	}
 	attach_progress |= AST_vgen_init;
 
-	/* register with MAC layer */
 	status = vnet_mac_register(vnetp);
 	if (status != DDI_SUCCESS) {
 		goto vnet_attach_fail;
 	}
 	vnetp->link_state = LINK_STATE_UNKNOWN;
-
 	attach_progress |= AST_macreg;
 
+	status = vgen_init_mdeg(vnetp->vgenhdl);
+	if (status != DDI_SUCCESS) {
+		goto vnet_attach_fail;
+	}
+	attach_progress |= AST_init_mdeg;
+
 	vnetp->attach_progress = attach_progress;
 
 	DBG1(NULL, "instance(%d) exit\n", instance);
@@ -503,21 +581,25 @@
 	attach_progress = vnetp->attach_progress;
 
 	/*
-	 * Unregister from the gldv3 subsystem. This can fail, in particular
-	 * if there are still any open references to this mac device; in which
-	 * case we just return failure without continuing to detach further.
+	 * Disable the mac device in the gldv3 subsystem. This can fail, in
+	 * particular if there are still any open references to this mac
+	 * device; in which case we just return failure without continuing to
+	 * detach further.
+	 * If it succeeds, we then invoke vgen_uninit() which should unregister
+	 * any pseudo rings registered with the mac layer. Note we keep the
+	 * AST_macreg flag on, so we can unregister with the mac layer at
+	 * the end of this routine.
 	 */
 	if (attach_progress & AST_macreg) {
-		if (mac_unregister(vnetp->mh) != 0) {
+		if (mac_disable(vnetp->mh) != 0) {
 			return (1);
 		}
-		attach_progress &= ~AST_macreg;
 	}
 
 	/*
-	 * Now that we have unregistered from gldv3, we must finish all other
-	 * steps and successfully return from this function; otherwise we will
-	 * end up leaving the device in a broken/unusable state.
+	 * Now that we have disabled the device, we must finish all other steps
+	 * and successfully return from this function; otherwise we will end up
+	 * leaving the device in a broken/unusable state.
 	 *
 	 * First, release any hybrid resources assigned to this vnet device.
 	 */
@@ -530,9 +612,10 @@
 	 * Uninit vgen. This stops further mdeg callbacks to this vnet
 	 * device and/or its ports; and detaches any existing ports.
 	 */
-	if (attach_progress & AST_vgen_init) {
+	if (attach_progress & (AST_vgen_init|AST_init_mdeg)) {
 		vgen_uninit(vnetp->vgenhdl);
 		attach_progress &= ~AST_vgen_init;
+		attach_progress &= ~AST_init_mdeg;
 	}
 
 	/* Destroy the taskq. */
@@ -563,6 +646,17 @@
 		attach_progress &= ~AST_vnet_list;
 	}
 
+	if (attach_progress & AST_ring_init) {
+		vnet_ring_grp_uninit(vnetp);
+		attach_progress &= ~AST_ring_init;
+	}
+
+	if (attach_progress & AST_macreg) {
+		VERIFY(mac_unregister(vnetp->mh) == 0);
+		vnetp->mh = NULL;
+		attach_progress &= ~AST_macreg;
+	}
+
 	if (attach_progress & AST_vnet_alloc) {
 		rw_destroy(&vnetp->vrwlock);
 		rw_destroy(&vnetp->vsw_fp_rw);
@@ -683,8 +777,9 @@
  * external hosts.
  */
 mblk_t *
-vnet_m_tx(void *arg, mblk_t *mp)
+vnet_tx_ring_send(void *arg, mblk_t *mp)
 {
+	vnet_pseudo_tx_ring_t	*tx_ringp;
 	vnet_t			*vnetp;
 	vnet_res_t		*vresp;
 	mblk_t			*next;
@@ -694,8 +789,10 @@
 	boolean_t		is_unicast;
 	boolean_t		is_pvid;	/* non-default pvid ? */
 	boolean_t		hres;		/* Hybrid resource ? */
-
-	vnetp = (vnet_t *)arg;
+	void			*tx_arg;
+
+	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
+	vnetp = (vnet_t *)tx_ringp->vnetp;
 	DBG1(vnetp, "enter\n");
 	ASSERT(mp != NULL);
 
@@ -790,10 +887,14 @@
 					}
 
 				}
+
+				macp = &vresp->macreg;
+				tx_arg = tx_ringp;
+			} else {
+				macp = &vresp->macreg;
+				tx_arg = macp->m_driver;
 			}
-
-			macp = &vresp->macreg;
-			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
+			resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp);
 
 			/* tx done; now release ref on fdb entry */
 			VNET_FDBE_REFRELE(vresp);
@@ -848,6 +949,124 @@
 	return (0);
 }
 
+static void
+vnet_ring_grp_init(vnet_t *vnetp)
+{
+	vnet_pseudo_rx_group_t	*rx_grp;
+	vnet_pseudo_rx_ring_t	*rx_ringp;
+	vnet_pseudo_tx_group_t	*tx_grp;
+	vnet_pseudo_tx_ring_t	*tx_ringp;
+	int			i;
+
+	tx_grp = &vnetp->tx_grp[0];
+	tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) *
+	    VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP);
+	for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) {
+		tx_ringp[i].state |= VNET_TXRING_SHARED;
+	}
+	tx_grp->rings = tx_ringp;
+	tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS;
+
+	rx_grp = &vnetp->rx_grp[0];
+	rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP;
+	rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL);
+	rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) *
+	    rx_grp->max_ring_cnt, KM_SLEEP);
+
+	/*
+	 * Setup the first 3 Pseudo RX Rings that are reserved;
+	 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
+	 */
+	rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE;
+	rx_ringp[0].index = 0;
+	rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
+	rx_ringp[1].index = 1;
+	rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
+	rx_ringp[2].index = 2;
+
+	rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
+	rx_grp->rings = rx_ringp;
+
+	for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
+	    i < rx_grp->max_ring_cnt; i++) {
+		rx_ringp = &rx_grp->rings[i];
+		rx_ringp->state = VNET_RXRING_FREE;
+		rx_ringp->index = i;
+	}
+}
+
+static void
+vnet_ring_grp_uninit(vnet_t *vnetp)
+{
+	vnet_pseudo_rx_group_t	*rx_grp;
+	vnet_pseudo_tx_group_t	*tx_grp;
+
+	tx_grp = &vnetp->tx_grp[0];
+	if (tx_grp->rings != NULL) {
+		ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS);
+		kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) *
+		    tx_grp->ring_cnt);
+		tx_grp->rings = NULL;
+	}
+
+	rx_grp = &vnetp->rx_grp[0];
+	if (rx_grp->rings != NULL) {
+		ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP);
+		ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
+		kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) *
+		    rx_grp->max_ring_cnt);
+		rx_grp->rings = NULL;
+	}
+}
+
+static vnet_pseudo_rx_ring_t *
+vnet_alloc_pseudo_rx_ring(vnet_t *vnetp)
+{
+	vnet_pseudo_rx_group_t  *rx_grp;
+	vnet_pseudo_rx_ring_t	*rx_ringp;
+	int			index;
+
+	rx_grp = &vnetp->rx_grp[0];
+	WRITE_ENTER(&rx_grp->lock);
+
+	if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) {
+		/* no rings available */
+		RW_EXIT(&rx_grp->lock);
+		return (NULL);
+	}
+
+	for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
+	    index < rx_grp->max_ring_cnt; index++) {
+		rx_ringp = &rx_grp->rings[index];
+		if (rx_ringp->state == VNET_RXRING_FREE) {
+			rx_ringp->state |= VNET_RXRING_INUSE;
+			rx_grp->ring_cnt++;
+			break;
+		}
+	}
+
+	RW_EXIT(&rx_grp->lock);
+	return (rx_ringp);
+}
+
+static void
+vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp)
+{
+	vnet_pseudo_rx_group_t  *rx_grp;
+
+	ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
+	rx_grp = &vnetp->rx_grp[0];
+	WRITE_ENTER(&rx_grp->lock);
+
+	if (ringp->state != VNET_RXRING_FREE) {
+		ringp->state = VNET_RXRING_FREE;
+		ringp->handle = NULL;
+		rx_grp->ring_cnt--;
+	}
+
+	RW_EXIT(&rx_grp->lock);
+}
+
 /* wrapper function for mac_register() */
 static int
 vnet_mac_register(vnet_t *vnetp)
@@ -867,6 +1086,15 @@
 	macp->m_margin = VLAN_TAGSZ;
 
 	/*
+	 * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to
+	 * workaround tx lock contention issues in nxge.
+	 */
+	macp->m_v12n = MAC_VIRT_LEVEL1;
+	if (vnet_mac_tx_serialize == B_TRUE) {
+		macp->m_v12n |= MAC_VIRT_SERIALIZE;
+	}
+
+	/*
 	 * Finally, we're ready to register ourselves with the MAC layer
 	 * interface; if this succeeds, we're all ready to start()
 	 */
@@ -1116,42 +1344,57 @@
 static void
 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
 {
-	vnet_res_t	*vresp = (vnet_res_t *)vrh;
-	vnet_t		*vnetp = vresp->vnetp;
+	vnet_res_t		*vresp = (vnet_res_t *)vrh;
+	vnet_t			*vnetp = vresp->vnetp;
+	vnet_pseudo_rx_ring_t	*ringp;
 
 	if ((vnetp == NULL) || (vnetp->mh == 0)) {
 		freemsgchain(mp);
 		return;
 	}
 
-	/*
-	 * Packets received over a hybrid resource need additional processing
-	 * to remove the tag, for the pvid case. The underlying resource is
-	 * not aware of the vnet's pvid and thus packets are received with the
-	 * vlan tag in the header; unlike packets that are received over a ldc
-	 * channel in which case the peer vnet/vsw would have already removed
-	 * the tag.
-	 */
-	if (vresp->type == VIO_NET_RES_HYBRID &&
-	    vnetp->pvid != vnetp->default_vlan_id) {
-
-		vnet_rx_frames_untag(vnetp->pvid, &mp);
-		if (mp == NULL) {
-			return;
-		}
-	}
-
-	mac_rx(vnetp->mh, NULL, mp);
+	ringp = vresp->rx_ringp;
+	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
 }
 
 void
 vnet_tx_update(vio_net_handle_t vrh)
 {
-	vnet_res_t *vresp = (vnet_res_t *)vrh;
-	vnet_t *vnetp = vresp->vnetp;
-
-	if ((vnetp != NULL) && (vnetp->mh != NULL)) {
-		mac_tx_update(vnetp->mh);
+	vnet_res_t		*vresp = (vnet_res_t *)vrh;
+	vnet_t			*vnetp = vresp->vnetp;
+	vnet_pseudo_tx_ring_t	*tx_ringp;
+	vnet_pseudo_tx_group_t	*tx_grp;
+	int			i;
+
+	if (vnetp == NULL || vnetp->mh == NULL) {
+		return;
+	}
+
+	/*
+	 * Currently, the tx hwring API (used to access rings that belong to
+	 * a Hybrid IO resource) does not provide us a per ring flow ctrl
+	 * update; also the pseudo rings are shared by the ports/ldcs in the
+	 * vgen layer. Thus we can't figure out which pseudo ring is being
+	 * re-enabled for transmits. To work around this, when we get a tx
+	 * restart notification from below, we simply propagate that to all
+	 * the tx pseudo rings registered with the mac layer above.
+	 *
+	 * There are a couple of side effects with this approach, but they are
+	 * not harmful, as outlined below:
+	 *
+	 * A) We might send an invalid ring_update() for a ring that is not
+	 * really flow controlled. This will not have any effect in the mac
+	 * layer and packets will continue to be transmitted on that ring.
+	 *
+	 * B) We might end up clearing the flow control in the mac layer for
+	 * a ring that is still flow controlled in the underlying resource.
+	 * This will result in the mac layer restarting	transmit, only to be
+	 * flow controlled again on that ring.
+	 */
+	tx_grp = &vnetp->tx_grp[0];
+	for (i = 0; i < tx_grp->ring_cnt; i++) {
+		tx_ringp = &tx_grp->rings[i];
+		mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
 	}
 }
 
@@ -1233,8 +1476,8 @@
     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
     vio_net_callbacks_t *vcb)
 {
-	vnet_t	*vnetp;
-	vnet_res_t *vresp;
+	vnet_t		*vnetp;
+	vnet_res_t	*vresp;
 
 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
 	ether_copy(local_macaddr, vresp->local_macaddr);
@@ -1260,11 +1503,7 @@
 					    vnetp->instance);
 				}
 			}
-
-			WRITE_ENTER(&vnetp->vrwlock);
-			vresp->nextp = vnetp->vres_list;
-			vnetp->vres_list = vresp;
-			RW_EXIT(&vnetp->vrwlock);
+			vnet_add_resource(vnetp, vresp);
 			break;
 		}
 		vnetp = vnetp->nextp;
@@ -1281,6 +1520,14 @@
 	vcb->vio_net_tx_update = vnet_tx_update;
 	vcb->vio_net_report_err = vnet_handle_res_err;
 
+	/* Bind the resource to pseudo ring(s) */
+	if (vnet_bind_rings(vresp) != 0) {
+		(void) vnet_rem_resource(vnetp, vresp);
+		vnet_hio_destroy_kstats(vresp->ksp);
+		KMEM_FREE(vresp);
+		return (1);
+	}
+
 	/* Dispatch a task to start resources */
 	vnet_dispatch_res_task(vnetp);
 	return (0);
@@ -1294,8 +1541,6 @@
 {
 	vnet_res_t	*vresp = (vnet_res_t *)vhp;
 	vnet_t		*vnetp = vresp->vnetp;
-	vnet_res_t	*vrp;
-	kstat_t		*ksp = NULL;
 
 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
 
@@ -1306,7 +1551,29 @@
 	 */
 	vnet_fdbe_del(vnetp, vresp);
 
+	vnet_unbind_rings(vresp);
+
 	/* Now remove the resource from the list */
+	(void) vnet_rem_resource(vnetp, vresp);
+
+	vnet_hio_destroy_kstats(vresp->ksp);
+	KMEM_FREE(vresp);
+}
+
+static void
+vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp)
+{
+	WRITE_ENTER(&vnetp->vrwlock);
+	vresp->nextp = vnetp->vres_list;
+	vnetp->vres_list = vresp;
+	RW_EXIT(&vnetp->vrwlock);
+}
+
+static vnet_res_t *
+vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp)
+{
+	vnet_res_t	*vrp;
+
 	WRITE_ENTER(&vnetp->vrwlock);
 	if (vresp == vnetp->vres_list) {
 		vnetp->vres_list = vresp->nextp;
@@ -1320,15 +1587,12 @@
 			vrp = vrp->nextp;
 		}
 	}
-
-	ksp = vresp->ksp;
-	vresp->ksp = NULL;
-
 	vresp->vnetp = NULL;
 	vresp->nextp = NULL;
+
 	RW_EXIT(&vnetp->vrwlock);
-	vnet_hio_destroy_kstats(ksp);
-	KMEM_FREE(vresp);
+
+	return (vresp);
 }
 
 /*
@@ -1710,6 +1974,1024 @@
 	}
 }
 
+static boolean_t
+vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data)
+{
+	vnet_t	*vnetp = (vnet_t *)arg;
+
+	if (vnetp == NULL) {
+		return (0);
+	}
+
+	switch (cap) {
+
+	case MAC_CAPAB_RINGS: {
+
+		mac_capab_rings_t *cap_rings = cap_data;
+		/*
+		 * Rings Capability Notes:
+		 * We advertise rings to make use of the rings framework in
+		 * gldv3 mac layer, to improve the performance. This is
+		 * specifically needed when a Hybrid resource (with multiple
+		 * tx/rx hardware rings) is assigned to a vnet device. We also
+		 * leverage this for the normal case when no Hybrid resource is
+		 * assigned.
+		 *
+		 * Ring Allocation:
+		 * - TX path:
+		 * We expose a pseudo ring group with 2 pseudo tx rings (as
+		 * currently HybridIO exports only 2 rings) In the normal case,
+		 * transmit traffic that comes down to the driver through the
+		 * mri_tx (vnet_tx_ring_send()) entry point goes through the
+		 * distributed switching algorithm in vnet and gets transmitted
+		 * over a port/LDC in the vgen layer to either the vswitch or a
+		 * peer vnet. If and when a Hybrid resource is assigned to the
+		 * vnet, we obtain the tx ring information of the Hybrid device
+		 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
+		 * Traffic being sent over the Hybrid resource by the mac layer
+		 * gets spread across both hw rings, as they are mapped to the
+		 * 2 pseudo tx rings in vnet.
+		 *
+		 * - RX path:
+		 * We expose a pseudo ring group with 3 pseudo rx rings (static
+		 * rings) initially. The first (default) pseudo rx ring is
+		 * reserved for the resource that connects to the vswitch
+		 * service. The next 2 rings are reserved for a Hybrid resource
+		 * that may be assigned to the vnet device. If and when a
+		 * Hybrid resource is assigned to the vnet, we obtain the rx
+		 * ring information of the Hybrid device (nxge) and map these
+		 * pseudo rings 1:1 to the 2 hw rx rings. For each additional
+		 * resource that connects to a peer vnet, we dynamically
+		 * allocate a pseudo rx ring and map it to that resource, when
+		 * the resource gets added; and the pseudo rx ring is
+		 * dynamically registered with the upper mac layer. We do the
+		 * reverse and unregister the ring with the mac layer when
+		 * the resource gets removed.
+		 *
+		 * Synchronization notes:
+		 * We don't need any lock to protect members of ring structure,
+		 * specifically ringp->hw_rh, in either the TX or the RX ring,
+		 * as explained below.
+		 * - TX ring:
+		 * ring->hw_rh is initialized only when a Hybrid resource is
+		 * associated; and gets referenced only in vnet_hio_tx(). The
+		 * Hybrid resource itself is available in fdb only after tx
+		 * hwrings are found and mapped; i.e, in vio_net_resource_reg()
+		 * we call vnet_bind_rings() first and then call
+		 * vnet_start_resources() which adds an entry to fdb. For
+		 * traffic going over LDC resources, we don't reference
+		 * ring->hw_rh at all.
+		 * - RX ring:
+		 * For rings mapped to Hybrid resource ring->hw_rh is
+		 * initialized and only then do we add the rx callback for
+		 * the underlying Hybrid resource; we disable callbacks before
+		 * we unmap ring->hw_rh. For rings mapped to LDC resources, we
+		 * stop the rx callbacks (in vgen) before we remove ring->hw_rh
+		 * (vio_net_resource_unreg()).
+		 */
+
+		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
+			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
+
+			/*
+			 * The ring_cnt for rx grp is initialized in
+			 * vnet_ring_grp_init(). Later, the ring_cnt gets
+			 * updated dynamically whenever LDC resources are added
+			 * or removed.
+			 */
+			cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt;
+			cap_rings->mr_rget = vnet_get_ring;
+
+			cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS;
+			cap_rings->mr_gget = vnet_get_group;
+			cap_rings->mr_gaddring = NULL;
+			cap_rings->mr_gremring = NULL;
+		} else {
+			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
+
+			/*
+			 * The ring_cnt for tx grp is initialized in
+			 * vnet_ring_grp_init() and remains constant, as we
+			 * do not support dymanic tx rings for now.
+			 */
+			cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt;
+			cap_rings->mr_rget = vnet_get_ring;
+
+			/*
+			 * Transmit rings are not grouped; i.e, the number of
+			 * transmit ring groups advertised should be set to 0.
+			 */
+			cap_rings->mr_gnum = 0;
+
+			cap_rings->mr_gget = vnet_get_group;
+			cap_rings->mr_gaddring = NULL;
+			cap_rings->mr_gremring = NULL;
+		}
+		return (B_TRUE);
+
+	}
+
+	default:
+		break;
+
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Callback funtion for MAC layer to get ring information.
+ */
+static void
+vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
+    const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle)
+{
+	vnet_t	*vnetp = arg;
+
+	switch (rtype) {
+
+	case MAC_RING_TYPE_RX: {
+
+		vnet_pseudo_rx_group_t	*rx_grp;
+		vnet_pseudo_rx_ring_t	*rx_ringp;
+		mac_intr_t		*mintr;
+
+		/* We advertised only one RX group */
+		ASSERT(g_index == 0);
+		rx_grp = &vnetp->rx_grp[g_index];
+
+		/* Check the current # of rings in the rx group */
+		ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt));
+
+		/* Get the ring based on the index */
+		rx_ringp = &rx_grp->rings[r_index];
+
+		rx_ringp->handle = r_handle;
+		/*
+		 * Note: we don't need to save the incoming r_index in rx_ring,
+		 * as vnet_ring_grp_init() would have initialized the index for
+		 * each ring in the array.
+		 */
+		rx_ringp->grp = rx_grp;
+		rx_ringp->vnetp = vnetp;
+
+		mintr = &infop->mri_intr;
+		mintr->mi_handle = (mac_intr_handle_t)rx_ringp;
+		mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr;
+		mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr;
+
+		infop->mri_driver = (mac_ring_driver_t)rx_ringp;
+		infop->mri_start = vnet_rx_ring_start;
+		infop->mri_stop = vnet_rx_ring_stop;
+
+		/* Set the poll function, as this is an rx ring */
+		infop->mri_poll = vnet_rx_poll;
+
+		break;
+	}
+
+	case MAC_RING_TYPE_TX: {
+		vnet_pseudo_tx_group_t	*tx_grp;
+		vnet_pseudo_tx_ring_t	*tx_ringp;
+
+		/*
+		 * No need to check grp index; mac layer passes -1 for it.
+		 */
+		tx_grp = &vnetp->tx_grp[0];
+
+		/* Check the # of rings in the tx group */
+		ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt));
+
+		/* Get the ring based on the index */
+		tx_ringp = &tx_grp->rings[r_index];
+
+		tx_ringp->handle = r_handle;
+		tx_ringp->index = r_index;
+		tx_ringp->grp = tx_grp;
+		tx_ringp->vnetp = vnetp;
+
+		infop->mri_driver = (mac_ring_driver_t)tx_ringp;
+		infop->mri_start = vnet_tx_ring_start;
+		infop->mri_stop = vnet_tx_ring_stop;
+
+		/* Set the transmit function, as this is a tx ring */
+		infop->mri_tx = vnet_tx_ring_send;
+
+		break;
+	}
+
+	default:
+		break;
+	}
+}
+
+/*
+ * Callback funtion for MAC layer to get group information.
+ */
+static void
+vnet_get_group(void *arg, mac_ring_type_t type, const int index,
+	mac_group_info_t *infop, mac_group_handle_t handle)
+{
+	vnet_t	*vnetp = (vnet_t *)arg;
+
+	switch (type) {
+
+	case MAC_RING_TYPE_RX:
+	{
+		vnet_pseudo_rx_group_t	*rx_grp;
+
+		/* We advertised only one RX group */
+		ASSERT(index == 0);
+
+		rx_grp = &vnetp->rx_grp[index];
+		rx_grp->handle = handle;
+		rx_grp->index = index;
+		rx_grp->vnetp = vnetp;
+
+		infop->mgi_driver = (mac_group_driver_t)rx_grp;
+		infop->mgi_start = NULL;
+		infop->mgi_stop = NULL;
+		infop->mgi_addmac = vnet_addmac;
+		infop->mgi_remmac = vnet_remmac;
+		infop->mgi_count = rx_grp->ring_cnt;
+
+		break;
+	}
+
+	case MAC_RING_TYPE_TX:
+	{
+		vnet_pseudo_tx_group_t	*tx_grp;
+
+		/* We advertised only one TX group */
+		ASSERT(index == 0);
+
+		tx_grp = &vnetp->tx_grp[index];
+		tx_grp->handle = handle;
+		tx_grp->index = index;
+		tx_grp->vnetp = vnetp;
+
+		infop->mgi_driver = (mac_group_driver_t)tx_grp;
+		infop->mgi_start = NULL;
+		infop->mgi_stop = NULL;
+		infop->mgi_addmac = NULL;
+		infop->mgi_remmac = NULL;
+		infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS;
+
+		break;
+	}
+
+	default:
+		break;
+
+	}
+}
+
+static int
+vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
+{
+	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+	int			err;
+
+	/*
+	 * If this ring is mapped to a LDC resource, simply mark the state to
+	 * indicate the ring is started and return.
+	 */
+	if ((rx_ringp->state &
+	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
+		rx_ringp->gen_num = mr_gen_num;
+		rx_ringp->state |= VNET_RXRING_STARTED;
+		return (0);
+	}
+
+	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
+
+	/*
+	 * This must be a ring reserved for a hwring. If the hwring is not
+	 * bound yet, simply mark the state to indicate the ring is started and
+	 * return. If and when a hybrid resource is activated for this vnet
+	 * device, we will bind the hwring and start it then. If a hwring is
+	 * already bound, start it now.
+	 */
+	if (rx_ringp->hw_rh == NULL) {
+		rx_ringp->gen_num = mr_gen_num;
+		rx_ringp->state |= VNET_RXRING_STARTED;
+		return (0);
+	}
+
+	err = mac_hwring_start(rx_ringp->hw_rh);
+	if (err == 0) {
+		rx_ringp->gen_num = mr_gen_num;
+		rx_ringp->state |= VNET_RXRING_STARTED;
+	} else {
+		err = ENXIO;
+	}
+
+	return (err);
+}
+
+static void
+vnet_rx_ring_stop(mac_ring_driver_t arg)
+{
+	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+
+	/*
+	 * If this ring is mapped to a LDC resource, simply mark the state to
+	 * indicate the ring is now stopped and return.
+	 */
+	if ((rx_ringp->state &
+	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
+		rx_ringp->state &= ~VNET_RXRING_STARTED;
+	}
+
+	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
+
+	/*
+	 * This must be a ring reserved for a hwring. If the hwring is not
+	 * bound yet, simply mark the state to indicate the ring is stopped and
+	 * return. If a hwring is already bound, stop it now.
+	 */
+	if (rx_ringp->hw_rh == NULL) {
+		rx_ringp->state &= ~VNET_RXRING_STARTED;
+		return;
+	}
+
+	mac_hwring_stop(rx_ringp->hw_rh);
+	rx_ringp->state &= ~VNET_RXRING_STARTED;
+}
+
+/* ARGSUSED */
+static int
+vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
+{
+	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
+
+	tx_ringp->state |= VNET_TXRING_STARTED;
+	return (0);
+}
+
+static void
+vnet_tx_ring_stop(mac_ring_driver_t arg)
+{
+	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
+
+	tx_ringp->state &= ~VNET_TXRING_STARTED;
+}
+
+/*
+ * Disable polling for a ring and enable its interrupt.
+ */
+static int
+vnet_ring_enable_intr(void *arg)
+{
+	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+	vnet_res_t		*vresp;
+
+	if (rx_ringp->hw_rh == NULL) {
+		/*
+		 * Ring enable intr func is being invoked, but the ring is
+		 * not bound to any underlying resource ? This must be a ring
+		 * reserved for Hybrid resource and no such resource has been
+		 * assigned to this vnet device yet. We simply return success.
+		 */
+		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
+		return (0);
+	}
+
+	/*
+	 * The rx ring has been bound to either a LDC or a Hybrid resource.
+	 * Call the appropriate function to enable interrupts for the ring.
+	 */
+	if (rx_ringp->state & VNET_RXRING_HYBRID) {
+		return (mac_hwring_enable_intr(rx_ringp->hw_rh));
+	} else {
+		vresp = (vnet_res_t *)rx_ringp->hw_rh;
+		return (vgen_enable_intr(vresp->macreg.m_driver));
+	}
+}
+
+/*
+ * Enable polling for a ring and disable its interrupt.
+ */
+static int
+vnet_ring_disable_intr(void *arg)
+{
+	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+	vnet_res_t		*vresp;
+
+	if (rx_ringp->hw_rh == NULL) {
+		/*
+		 * Ring disable intr func is being invoked, but the ring is
+		 * not bound to any underlying resource ? This must be a ring
+		 * reserved for Hybrid resource and no such resource has been
+		 * assigned to this vnet device yet. We simply return success.
+		 */
+		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
+		return (0);
+	}
+
+	/*
+	 * The rx ring has been bound to either a LDC or a Hybrid resource.
+	 * Call the appropriate function to disable interrupts for the ring.
+	 */
+	if (rx_ringp->state & VNET_RXRING_HYBRID) {
+		return (mac_hwring_disable_intr(rx_ringp->hw_rh));
+	} else {
+		vresp = (vnet_res_t *)rx_ringp->hw_rh;
+		return (vgen_disable_intr(vresp->macreg.m_driver));
+	}
+}
+
+/*
+ * Poll 'bytes_to_pickup' bytes of message from the rx ring.
+ */
+static mblk_t *
+vnet_rx_poll(void *arg, int bytes_to_pickup)
+{
+	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+	mblk_t			*mp = NULL;
+	vnet_res_t		*vresp;
+	vnet_t			*vnetp = rx_ringp->vnetp;
+
+	if (rx_ringp->hw_rh == NULL) {
+		return (NULL);
+	}
+
+	if (rx_ringp->state & VNET_RXRING_HYBRID) {
+		mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup);
+		/*
+		 * Packets received over a hybrid resource need additional
+		 * processing to remove the tag, for the pvid case. The
+		 * underlying resource is not aware of the vnet's pvid and thus
+		 * packets are received with the vlan tag in the header; unlike
+		 * packets that are received over a ldc channel in which case
+		 * the peer vnet/vsw would have already removed the tag.
+		 */
+		if (vnetp->pvid != vnetp->default_vlan_id) {
+			vnet_rx_frames_untag(vnetp->pvid, &mp);
+		}
+	} else {
+		vresp = (vnet_res_t *)rx_ringp->hw_rh;
+		mp = vgen_poll(vresp->macreg.m_driver, bytes_to_pickup);
+	}
+	return (mp);
+}
+
+/* ARGSUSED */
+void
+vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+	boolean_t loopback)
+{
+	vnet_t			*vnetp = (vnet_t *)arg;
+	vnet_pseudo_rx_ring_t	*ringp = (vnet_pseudo_rx_ring_t *)mrh;
+
+	/*
+	 * Packets received over a hybrid resource need additional processing
+	 * to remove the tag, for the pvid case. The underlying resource is
+	 * not aware of the vnet's pvid and thus packets are received with the
+	 * vlan tag in the header; unlike packets that are received over a ldc
+	 * channel in which case the peer vnet/vsw would have already removed
+	 * the tag.
+	 */
+	if (vnetp->pvid != vnetp->default_vlan_id) {
+		vnet_rx_frames_untag(vnetp->pvid, &mp);
+		if (mp == NULL) {
+			return;
+		}
+	}
+	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
+}
+
+static int
+vnet_addmac(void *arg, const uint8_t *mac_addr)
+{
+	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
+	vnet_t			*vnetp;
+
+	vnetp = rx_grp->vnetp;
+
+	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
+		return (0);
+	}
+
+	cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n",
+	    vnetp->instance, __func__);
+	return (EINVAL);
+}
+
+static int
+vnet_remmac(void *arg, const uint8_t *mac_addr)
+{
+	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
+	vnet_t			*vnetp;
+
+	vnetp = rx_grp->vnetp;
+
+	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
+		return (0);
+	}
+
+	cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n",
+	    vnetp->instance, __func__, ether_sprintf((void *)mac_addr));
+	return (EINVAL);
+}
+
+int
+vnet_hio_mac_init(vnet_t *vnetp, char *ifname)
+{
+	mac_handle_t		mh;
+	mac_client_handle_t	mch = NULL;
+	mac_unicast_handle_t	muh = NULL;
+	mac_diag_t		diag;
+	mac_register_t		*macp;
+	char			client_name[MAXNAMELEN];
+	int			rv;
+	uint16_t		mac_flags = MAC_UNICAST_TAG_DISABLE |
+	    MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY;
+	vio_net_callbacks_t	vcb;
+	ether_addr_t		rem_addr =
+		{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	uint32_t		retries = 0;
+
+	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
+		return (EAGAIN);
+	}
+
+	do {
+		rv = mac_open_by_linkname(ifname, &mh);
+		if (rv == 0) {
+			break;
+		}
+		if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) {
+			mac_free(macp);
+			return (rv);
+		}
+		drv_usecwait(vnet_mac_open_delay);
+	} while (rv == ENOENT);
+
+	vnetp->hio_mh = mh;
+
+	(void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance,
+	    ifname);
+	rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE);
+	if (rv != 0) {
+		goto fail;
+	}
+	vnetp->hio_mch = mch;
+
+	rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0,
+	    &diag);
+	if (rv != 0) {
+		goto fail;
+	}
+	vnetp->hio_muh = muh;
+
+	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
+	macp->m_driver = vnetp;
+	macp->m_dip = NULL;
+	macp->m_src_addr = NULL;
+	macp->m_callbacks = &vnet_hio_res_callbacks;
+	macp->m_min_sdu = 0;
+	macp->m_max_sdu = ETHERMTU;
+
+	rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID,
+	    vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb);
+	if (rv != 0) {
+		goto fail;
+	}
+	mac_free(macp);
+
+	/* add the recv callback */
+	mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp);
+
+	/* add the notify callback - only tx updates for now */
+	vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb,
+	    vnetp);
+
+	return (0);
+
+fail:
+	mac_free(macp);
+	vnet_hio_mac_cleanup(vnetp);
+	return (1);
+}
+
+void
+vnet_hio_mac_cleanup(vnet_t *vnetp)
+{
+	if (vnetp->hio_mnh != NULL) {
+		(void) mac_notify_remove(vnetp->hio_mnh, B_TRUE);
+		vnetp->hio_mnh = NULL;
+	}
+
+	if (vnetp->hio_vhp != NULL) {
+		vio_net_resource_unreg(vnetp->hio_vhp);
+		vnetp->hio_vhp = NULL;
+	}
+
+	if (vnetp->hio_muh != NULL) {
+		mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh);
+		vnetp->hio_muh = NULL;
+	}
+
+	if (vnetp->hio_mch != NULL) {
+		mac_client_close(vnetp->hio_mch, 0);
+		vnetp->hio_mch = NULL;
+	}
+
+	if (vnetp->hio_mh != NULL) {
+		mac_close(vnetp->hio_mh);
+		vnetp->hio_mh = NULL;
+	}
+}
+
+/* Bind pseudo rings to hwrings */
+static int
+vnet_bind_hwrings(vnet_t *vnetp)
+{
+	mac_ring_handle_t	hw_rh[VNET_NUM_HYBRID_RINGS];
+	mac_perim_handle_t	mph1;
+	vnet_pseudo_rx_group_t	*rx_grp;
+	vnet_pseudo_rx_ring_t	*rx_ringp;
+	vnet_pseudo_tx_group_t	*tx_grp;
+	vnet_pseudo_tx_ring_t	*tx_ringp;
+	int			hw_ring_cnt;
+	int			i;
+	int			rv;
+
+	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
+
+	/* Get the list of the underlying RX rings. */
+	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh,
+	    MAC_RING_TYPE_RX);
+
+	/* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
+	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
+		cmn_err(CE_WARN,
+		    "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
+		    vnetp->instance, hw_ring_cnt);
+		goto fail;
+	}
+
+	if (vnetp->rx_hwgh != NULL) {
+		/*
+		 * Quiesce the HW ring and the mac srs on the ring. Note
+		 * that the HW ring will be restarted when the pseudo ring
+		 * is started. At that time all the packets will be
+		 * directly passed up to the pseudo RX ring and handled
+		 * by mac srs created over the pseudo RX ring.
+		 */
+		mac_rx_client_quiesce(vnetp->hio_mch);
+		mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE);
+	}
+
+	/*
+	 * Bind the pseudo rings to the hwrings and start the hwrings.
+	 * Note we don't need to register these with the upper mac, as we have
+	 * statically exported these pseudo rxrings which are reserved for
+	 * rxrings of Hybrid resource.
+	 */
+	rx_grp = &vnetp->rx_grp[0];
+	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
+		/* Pick the rxrings reserved for Hybrid resource */
+		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
+
+		/* Store the hw ring handle */
+		rx_ringp->hw_rh = hw_rh[i];
+
+		/* Bind the pseudo ring to the underlying hwring */
+		mac_hwring_setup(rx_ringp->hw_rh,
+		    (mac_resource_handle_t)rx_ringp);
+
+		/* Start the hwring if needed */
+		if (rx_ringp->state & VNET_RXRING_STARTED) {
+			rv = mac_hwring_start(rx_ringp->hw_rh);
+			if (rv != 0) {
+				mac_hwring_teardown(rx_ringp->hw_rh);
+				rx_ringp->hw_rh = NULL;
+				goto fail;
+			}
+		}
+	}
+
+	/* Get the list of the underlying TX rings. */
+	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh,
+	    MAC_RING_TYPE_TX);
+
+	/* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
+	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
+		cmn_err(CE_WARN,
+		    "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
+		    vnetp->instance, hw_ring_cnt);
+		goto fail;
+	}
+
+	/*
+	 * Now map the pseudo txrings to the hw txrings. Note we don't need
+	 * to register these with the upper mac, as we have statically exported
+	 * these rings. Note that these rings will continue to be used for LDC
+	 * resources to peer vnets and vswitch (shared ring).
+	 */
+	tx_grp = &vnetp->tx_grp[0];
+	for (i = 0; i < tx_grp->ring_cnt; i++) {
+		tx_ringp = &tx_grp->rings[i];
+		tx_ringp->hw_rh = hw_rh[i];
+		tx_ringp->state |= VNET_TXRING_HYBRID;
+	}
+
+	mac_perim_exit(mph1);
+	return (0);
+
+fail:
+	mac_perim_exit(mph1);
+	vnet_unbind_hwrings(vnetp);
+	return (1);
+}
+
+/* Unbind pseudo rings from hwrings */
+static void
+vnet_unbind_hwrings(vnet_t *vnetp)
+{
+	mac_perim_handle_t	mph1;
+	vnet_pseudo_rx_ring_t	*rx_ringp;
+	vnet_pseudo_rx_group_t	*rx_grp;
+	vnet_pseudo_tx_group_t	*tx_grp;
+	vnet_pseudo_tx_ring_t	*tx_ringp;
+	int			i;
+
+	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
+
+	tx_grp = &vnetp->tx_grp[0];
+	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
+		tx_ringp = &tx_grp->rings[i];
+		if (tx_ringp->state & VNET_TXRING_HYBRID) {
+			tx_ringp->state &= ~VNET_TXRING_HYBRID;
+			tx_ringp->hw_rh = NULL;
+		}
+	}
+
+	rx_grp = &vnetp->rx_grp[0];
+	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
+		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
+		if (rx_ringp->hw_rh != NULL) {
+			/* Stop the hwring */
+			mac_hwring_stop(rx_ringp->hw_rh);
+
+			/* Teardown the hwring */
+			mac_hwring_teardown(rx_ringp->hw_rh);
+			rx_ringp->hw_rh = NULL;
+		}
+	}
+
+	if (vnetp->rx_hwgh != NULL) {
+		vnetp->rx_hwgh = NULL;
+		/*
+		 * First clear the permanent-quiesced flag of the RX srs then
+		 * restart the HW ring and the mac srs on the ring.
+		 */
+		mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE);
+		mac_rx_client_restart(vnetp->hio_mch);
+	}
+
+	mac_perim_exit(mph1);
+}
+
+/* Bind pseudo ring to a LDC resource */
+static int
+vnet_bind_vgenring(vnet_res_t *vresp)
+{
+	vnet_t			*vnetp;
+	vnet_pseudo_rx_group_t	*rx_grp;
+	vnet_pseudo_rx_ring_t	*rx_ringp;
+	mac_perim_handle_t	mph1;
+	int			rv;
+	int			type;
+
+	vnetp = vresp->vnetp;
+	type = vresp->type;
+	rx_grp = &vnetp->rx_grp[0];
+
+	if (type == VIO_NET_RES_LDC_SERVICE) {
+		/*
+		 * Ring Index 0 is the default ring in the group and is
+		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
+		 * is allocated statically and is reported to the mac layer
+		 * in vnet_m_capab(). So, all we need to do here, is save a
+		 * reference to the associated vresp.
+		 */
+		rx_ringp = &rx_grp->rings[0];
+		rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
+		vresp->rx_ringp = (void *)rx_ringp;
+		return (0);
+	}
+	ASSERT(type == VIO_NET_RES_LDC_GUEST);
+
+	mac_perim_enter_by_mh(vnetp->mh, &mph1);
+
+	rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp);
+	if (rx_ringp == NULL) {
+		cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring",
+		    vnetp->instance);
+		goto fail;
+	}
+
+	/* Store the LDC resource itself as the ring handle */
+	rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
+
+	/*
+	 * Save a reference to the ring in the resource for lookup during
+	 * unbind. Note this is only done for LDC resources. We don't need this
+	 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
+	 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
+	 */
+	vresp->rx_ringp = (void *)rx_ringp;
+	rx_ringp->state |= VNET_RXRING_LDC_GUEST;
+
+	/* Register the pseudo ring with upper-mac */
+	rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index);
+	if (rv != 0) {
+		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
+		rx_ringp->hw_rh = NULL;
+		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
+		goto fail;
+	}
+
+	mac_perim_exit(mph1);
+	return (0);
+fail:
+	mac_perim_exit(mph1);
+	return (1);
+}
+
+/* Unbind pseudo ring from a LDC resource */
+static void
+vnet_unbind_vgenring(vnet_res_t *vresp)
+{
+	vnet_t			*vnetp;
+	vnet_pseudo_rx_group_t	*rx_grp;
+	vnet_pseudo_rx_ring_t	*rx_ringp;
+	mac_perim_handle_t	mph1;
+	int			type;
+
+	vnetp = vresp->vnetp;
+	type = vresp->type;
+	rx_grp = &vnetp->rx_grp[0];
+
+	if (vresp->rx_ringp == NULL) {
+		return;
+	}
+
+	if (type == VIO_NET_RES_LDC_SERVICE) {
+		/*
+		 * Ring Index 0 is the default ring in the group and is
+		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
+		 * is allocated statically and is reported to the mac layer
+		 * in vnet_m_capab(). So, all we need to do here, is remove its
+		 * reference to the associated vresp.
+		 */
+		rx_ringp = &rx_grp->rings[0];
+		rx_ringp->hw_rh = NULL;
+		vresp->rx_ringp = NULL;
+		return;
+	}
+	ASSERT(type == VIO_NET_RES_LDC_GUEST);
+
+	mac_perim_enter_by_mh(vnetp->mh, &mph1);
+
+	rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp;
+	vresp->rx_ringp = NULL;
+
+	if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) {
+		/* Unregister the pseudo ring with upper-mac */
+		mac_group_rem_ring(rx_grp->handle, rx_ringp->handle);
+
+		rx_ringp->hw_rh = NULL;
+		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
+
+		/* Free the pseudo rx ring */
+		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
+	}
+
+	mac_perim_exit(mph1);
+}
+
+static void
+vnet_unbind_rings(vnet_res_t *vresp)
+{
+	switch (vresp->type) {
+
+	case VIO_NET_RES_LDC_SERVICE:
+	case VIO_NET_RES_LDC_GUEST:
+		vnet_unbind_vgenring(vresp);
+		break;
+
+	case VIO_NET_RES_HYBRID:
+		vnet_unbind_hwrings(vresp->vnetp);
+		break;
+
+	default:
+		break;
+
+	}
+}
+
+static int
+vnet_bind_rings(vnet_res_t *vresp)
+{
+	int	rv;
+
+	switch (vresp->type) {
+
+	case VIO_NET_RES_LDC_SERVICE:
+	case VIO_NET_RES_LDC_GUEST:
+		rv = vnet_bind_vgenring(vresp);
+		break;
+
+	case VIO_NET_RES_HYBRID:
+		rv = vnet_bind_hwrings(vresp->vnetp);
+		break;
+
+	default:
+		rv = 1;
+		break;
+
+	}
+
+	return (rv);
+}
+
+/* ARGSUSED */
+int
+vnet_hio_stat(void *arg, uint_t stat, uint64_t *val)
+{
+	vnet_t	*vnetp = (vnet_t *)arg;
+
+	*val = mac_stat_get(vnetp->hio_mh, stat);
+	return (0);
+}
+
+/*
+ * The start() and stop() routines for the Hybrid resource below, are just
+ * dummy functions. This is provided to avoid resource type specific code in
+ * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
+ * of the Hybrid resource happens in the context of the mac_client interfaces
+ * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
+ */
+/* ARGSUSED */
+static int
+vnet_hio_start(void *arg)
+{
+	return (0);
+}
+
+/* ARGSUSED */
+static void
+vnet_hio_stop(void *arg)
+{
+}
+
+mblk_t *
+vnet_hio_tx(void *arg, mblk_t *mp)
+{
+	vnet_pseudo_tx_ring_t	*tx_ringp;
+	mblk_t			*nextp;
+	mblk_t			*ret_mp;
+
+	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
+	for (;;) {
+		nextp = mp->b_next;
+		mp->b_next = NULL;
+
+		ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp);
+		if (ret_mp != NULL) {
+			ret_mp->b_next = nextp;
+			mp = ret_mp;
+			break;
+		}
+
+		if ((mp = nextp) == NULL)
+			break;
+	}
+	return (mp);
+}
+
+static void
+vnet_hio_notify_cb(void *arg, mac_notify_type_t type)
+{
+	vnet_t			*vnetp = (vnet_t *)arg;
+	mac_perim_handle_t	mph;
+
+	mac_perim_enter_by_mh(vnetp->hio_mh, &mph);
+	switch (type) {
+	case MAC_NOTE_TX:
+		vnet_tx_update(vnetp->hio_vhp);
+		break;
+
+	default:
+		break;
+	}
+	mac_perim_exit(mph);
+}
+
 #ifdef	VNET_IOC_DEBUG
 
 /*
--- a/usr/src/uts/sun4v/io/vnet_dds.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/sun4v/io/vnet_dds.c	Mon Aug 17 09:14:35 2009 -0400
@@ -113,6 +113,8 @@
 
 /* Functions imported from other files */
 extern int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
+extern int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
+extern void vnet_hio_mac_cleanup(vnet_t *vnetp);
 
 /* HV functions that are used in this file */
 extern uint64_t vdds_hv_niu_vr_getinfo(uint32_t hvcookie,
@@ -412,7 +414,31 @@
 		} else {
 			vdds->hio_dip = dip;
 			vdds->hio_cookie = hio_cookie;
-			(void) vdds_send_dds_resp_msg(vnetp, dmsg, B_TRUE);
+			sprintf(vdds->hio_ifname, "%s%d", ddi_driver_name(dip),
+			    ddi_get_instance(dip));
+
+			rv = vnet_hio_mac_init(vnetp, vdds->hio_ifname);
+			if (rv != 0) {
+				/* failed - cleanup, send failed DDS message */
+				DERR(vdds, "HIO mac init failed, cleaning up");
+				rv = vdds_destroy_niu_node(dip, hio_cookie);
+				if (rv == 0) {
+					/* use DERR to print by default */
+					DERR(vdds, "Successfully destroyed"
+					    " Hybrid node");
+				} else {
+					cmn_err(CE_WARN, "vnet%d:Failed to "
+					    "destroy Hybrid node",
+					    vnetp->instance);
+				}
+				vdds->hio_dip = NULL;
+				vdds->hio_cookie = 0;
+				(void) vdds_send_dds_resp_msg(vnetp,
+				    dmsg, B_FALSE);
+			} else {
+				(void) vdds_send_dds_resp_msg(vnetp,
+				    dmsg, B_TRUE);
+			}
 			/* DERR used only print by default */
 			DERR(vdds, "Successfully created HIO node");
 		}
@@ -424,6 +450,7 @@
 			DBG2(vdds, "NACK: No HIO device destroy");
 			(void) vdds_send_dds_resp_msg(vnetp, dmsg, B_FALSE);
 		} else {
+			vnet_hio_mac_cleanup(vnetp);
 			rv = vdds_destroy_niu_node(vnetp->vdds_info.hio_dip,
 			    vdds->hio_cookie);
 			if (rv == 0) {
@@ -444,6 +471,7 @@
 	case VNET_DDS_TASK_REL_SHARE:
 		DBG2(vdds, "REL_SHARE task...");
 		if (vnetp->vdds_info.hio_dip != NULL) {
+			vnet_hio_mac_cleanup(vnetp);
 			rv = vdds_destroy_niu_node(vnetp->vdds_info.hio_dip,
 			    vdds->hio_cookie);
 			if (rv == 0) {
--- a/usr/src/uts/sun4v/io/vnet_gen.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/sun4v/io/vnet_gen.c	Mon Aug 17 09:14:35 2009 -0400
@@ -73,11 +73,15 @@
 /* vgen proxy entry points */
 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
     const uint8_t *macaddr, void **vgenhdl);
+int vgen_init_mdeg(void *arg);
 void vgen_uninit(void *arg);
 int vgen_dds_tx(void *arg, void *dmsg);
 void vgen_mod_init(void);
 int vgen_mod_cleanup(void);
 void vgen_mod_fini(void);
+int vgen_enable_intr(void *arg);
+int vgen_disable_intr(void *arg);
+mblk_t *vgen_poll(void *arg, int bytes_to_pickup);
 static int vgen_start(void *arg);
 static void vgen_stop(void *arg);
 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
@@ -151,6 +155,7 @@
 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
 static void vgen_ldc_watchdog(void *arg);
+static mblk_t *vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup);
 
 /* vgen handshake functions */
 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
@@ -200,7 +205,7 @@
 static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp);
 static void vgen_ldc_rcv_worker(void *arg);
 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
-static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp);
+static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt);
 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
@@ -536,13 +541,6 @@
 	if (rv != 0) {
 		goto vgen_init_fail;
 	}
-
-	/* register with MD event generator */
-	rv = vgen_mdeg_reg(vgenp);
-	if (rv != DDI_SUCCESS) {
-		goto vgen_init_fail;
-	}
-
 	*vgenhdl = (void *)vgenp;
 
 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
@@ -562,6 +560,15 @@
 	return (DDI_FAILURE);
 }
 
+int
+vgen_init_mdeg(void *arg)
+{
+	vgen_t	*vgenp = (vgen_t *)arg;
+
+	/* register with MD event generator */
+	return (vgen_mdeg_reg(vgenp));
+}
+
 /*
  * Called by vnet to undo the initializations done by vgen_init().
  * The handle provided by generic transport during vgen_init() is the argument.
@@ -2094,13 +2101,21 @@
 static void
 vgen_mdeg_unreg(vgen_t *vgenp)
 {
-	(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
-	(void) mdeg_unregister(vgenp->mdeg_port_hdl);
-	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
-	KMEM_FREE(vgenp->mdeg_parentp);
-	vgenp->mdeg_parentp = NULL;
-	vgenp->mdeg_dev_hdl = NULL;
-	vgenp->mdeg_port_hdl = NULL;
+	if (vgenp->mdeg_dev_hdl != NULL) {
+		(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
+		vgenp->mdeg_dev_hdl = NULL;
+	}
+	if (vgenp->mdeg_port_hdl != NULL) {
+		(void) mdeg_unregister(vgenp->mdeg_port_hdl);
+		vgenp->mdeg_port_hdl = NULL;
+	}
+
+	if (vgenp->mdeg_parentp != NULL) {
+		kmem_free(vgenp->mdeg_parentp->specp,
+		    sizeof (vgen_prop_template));
+		KMEM_FREE(vgenp->mdeg_parentp);
+		vgenp->mdeg_parentp = NULL;
+	}
 }
 
 /* mdeg callback function for the port node */
@@ -2907,6 +2922,7 @@
 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
+	mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
 
 	attach_state |= AST_mutex_init;
 
@@ -3032,6 +3048,7 @@
 		mutex_destroy(&ldcp->cblock);
 		mutex_destroy(&ldcp->wrlock);
 		mutex_destroy(&ldcp->rxlock);
+		mutex_destroy(&ldcp->pollq_lock);
 	}
 	if (attach_state & AST_ldc_alloc) {
 		KMEM_FREE(ldcp);
@@ -3100,6 +3117,7 @@
 		mutex_destroy(&ldcp->cblock);
 		mutex_destroy(&ldcp->wrlock);
 		mutex_destroy(&ldcp->rxlock);
+		mutex_destroy(&ldcp->pollq_lock);
 
 		/* unlink it from the list */
 		*prev_ldcp = ldcp->nextp;
@@ -6278,7 +6296,7 @@
 			 */
 			if (bp != NULL) {
 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
-				vgen_rx(ldcp, bp);
+				vgen_rx(ldcp, bp, bpt);
 				count = 0;
 				bp = bpt = NULL;
 			}
@@ -6459,7 +6477,7 @@
 
 		if (count++ > vgen_chain_len) {
 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
-			vgen_rx(ldcp, bp);
+			vgen_rx(ldcp, bp, bpt);
 			count = 0;
 			bp = bpt = NULL;
 		}
@@ -6512,7 +6530,7 @@
 	/* send up packets received so far */
 	if (bp != NULL) {
 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
-		vgen_rx(ldcp, bp);
+		vgen_rx(ldcp, bp, bpt);
 		bp = bpt = NULL;
 	}
 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
@@ -6996,18 +7014,57 @@
  * Send received packets up the stack.
  */
 static void
-vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp)
+vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt)
 {
 	vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
+	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
 
 	if (ldcp->rcv_thread != NULL) {
 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
-		mutex_exit(&ldcp->rxlock);
 	} else {
 		ASSERT(MUTEX_HELD(&ldcp->cblock));
+	}
+
+	mutex_enter(&ldcp->pollq_lock);
+
+	if (ldcp->polling_on == B_TRUE) {
+		/*
+		 * If we are in polling mode, simply queue
+		 * the packets onto the poll queue and return.
+		 */
+		if (ldcp->pollq_headp == NULL) {
+			ldcp->pollq_headp = bp;
+			ldcp->pollq_tailp = bpt;
+		} else {
+			ldcp->pollq_tailp->b_next = bp;
+			ldcp->pollq_tailp = bpt;
+		}
+
+		mutex_exit(&ldcp->pollq_lock);
+		return;
+	}
+
+	/*
+	 * Prepend any pending mblks in the poll queue, now that we
+	 * are in interrupt mode, before sending up the chain of pkts.
+	 */
+	if (ldcp->pollq_headp != NULL) {
+		DBG2(vgenp, ldcp, "vgen_rx(%lx), pending pollq_headp\n",
+		    (uintptr_t)ldcp);
+		ldcp->pollq_tailp->b_next = bp;
+		bp = ldcp->pollq_headp;
+		ldcp->pollq_headp = ldcp->pollq_tailp = NULL;
+	}
+
+	mutex_exit(&ldcp->pollq_lock);
+
+	if (ldcp->rcv_thread != NULL) {
+		mutex_exit(&ldcp->rxlock);
+	} else {
 		mutex_exit(&ldcp->cblock);
 	}
 
+	/* Send up the packets */
 	vrx_cb(ldcp->portp->vhp, bp);
 
 	if (ldcp->rcv_thread != NULL) {
@@ -7233,6 +7290,145 @@
 	vgen_handshake_retry(ldcp);
 }
 
+int
+vgen_enable_intr(void *arg)
+{
+	vgen_port_t		*portp = (vgen_port_t *)arg;
+	vgen_ldclist_t		*ldclp;
+	vgen_ldc_t		*ldcp;
+
+	ldclp = &portp->ldclist;
+	READ_ENTER(&ldclp->rwlock);
+	/*
+	 * NOTE: for now, we will assume we have a single channel.
+	 */
+	if (ldclp->headp == NULL) {
+		RW_EXIT(&ldclp->rwlock);
+		return (1);
+	}
+	ldcp = ldclp->headp;
+
+	mutex_enter(&ldcp->pollq_lock);
+	ldcp->polling_on = B_FALSE;
+	mutex_exit(&ldcp->pollq_lock);
+
+	RW_EXIT(&ldclp->rwlock);
+
+	return (0);
+}
+
+int
+vgen_disable_intr(void *arg)
+{
+	vgen_port_t		*portp = (vgen_port_t *)arg;
+	vgen_ldclist_t		*ldclp;
+	vgen_ldc_t		*ldcp;
+
+	ldclp = &portp->ldclist;
+	READ_ENTER(&ldclp->rwlock);
+	/*
+	 * NOTE: for now, we will assume we have a single channel.
+	 */
+	if (ldclp->headp == NULL) {
+		RW_EXIT(&ldclp->rwlock);
+		return (1);
+	}
+	ldcp = ldclp->headp;
+
+
+	mutex_enter(&ldcp->pollq_lock);
+	ldcp->polling_on = B_TRUE;
+	mutex_exit(&ldcp->pollq_lock);
+
+	RW_EXIT(&ldclp->rwlock);
+
+	return (0);
+}
+
+mblk_t *
+vgen_poll(void *arg, int bytes_to_pickup)
+{
+	vgen_port_t		*portp = (vgen_port_t *)arg;
+	vgen_ldclist_t		*ldclp;
+	vgen_ldc_t		*ldcp;
+	mblk_t			*mp = NULL;
+
+	ldclp = &portp->ldclist;
+	READ_ENTER(&ldclp->rwlock);
+	/*
+	 * NOTE: for now, we will assume we have a single channel.
+	 */
+	if (ldclp->headp == NULL) {
+		RW_EXIT(&ldclp->rwlock);
+		return (NULL);
+	}
+	ldcp = ldclp->headp;
+
+	mp = vgen_ldc_poll(ldcp, bytes_to_pickup);
+
+	RW_EXIT(&ldclp->rwlock);
+	return (mp);
+}
+
+static mblk_t *
+vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup)
+{
+	mblk_t	*bp = NULL;
+	mblk_t	*bpt = NULL;
+	mblk_t	*mp = NULL;
+	size_t	mblk_sz = 0;
+	size_t	sz = 0;
+	uint_t	count = 0;
+
+	mutex_enter(&ldcp->pollq_lock);
+
+	bp = ldcp->pollq_headp;
+	while (bp != NULL) {
+		/* get the size of this packet */
+		mblk_sz = msgdsize(bp);
+
+		/* if adding this pkt, exceeds the size limit, we are done. */
+		if (sz + mblk_sz >  bytes_to_pickup) {
+			break;
+		}
+
+		/* we have room for this packet */
+		sz += mblk_sz;
+
+		/* increment the # of packets being sent up */
+		count++;
+
+		/* track the last processed pkt */
+		bpt = bp;
+
+		/* get the next pkt */
+		bp = bp->b_next;
+	}
+
+	if (count != 0) {
+		/*
+		 * picked up some packets; save the head of pkts to be sent up.
+		 */
+		mp = ldcp->pollq_headp;
+
+		/* move the pollq_headp to skip over the pkts being sent up */
+		ldcp->pollq_headp = bp;
+
+		/* picked up all pending pkts in the queue; reset tail also */
+		if (ldcp->pollq_headp == NULL) {
+			ldcp->pollq_tailp = NULL;
+		}
+
+		/* terminate the tail of pkts to be sent up */
+		bpt->b_next = NULL;
+	}
+
+	mutex_exit(&ldcp->pollq_lock);
+
+	DTRACE_PROBE1(vgen_poll_pkts, uint_t, count);
+	return (mp);
+}
+
 #if DEBUG
 
 /*
--- a/usr/src/uts/sun4v/os/fillsysinfo.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/sun4v/os/fillsysinfo.c	Mon Aug 17 09:14:35 2009 -0400
@@ -774,9 +774,6 @@
 		}
 	}
 
-	cmn_err(CE_WARN, "mmu-#ra-bits property not found in MD");
-	cmn_err(CE_WARN, "Memory DR disabled");
-
 	memnodes = md_alloc_scan_dag(mdp,
 	    md_root_node(mdp), "memory", "fwd", &mem_list);
 
--- a/usr/src/uts/sun4v/promif/promif_emul.c	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/sun4v/promif/promif_emul.c	Mon Aug 17 09:14:35 2009 -0400
@@ -233,7 +233,7 @@
 unmap_prom_mappings(struct translation *transroot, size_t ntransroot)
 {
 	int i, j, rv;
-	int npgs, nunmapped, nfreed, nskipped;
+	int npgs, nunmapped, nfreed, nskipped, nskipped_io;
 	char *p;
 	tte_t tte;
 	pfn_t pfn;
@@ -256,6 +256,7 @@
 	nfreed = 0;
 	nunmapped = 0;
 	nskipped = 0;
+	nskipped_io = 0;
 
 	for (i = 0, promt = transroot; i < ntransroot; i++, promt++) {
 		ASSERT(promt->tte_hi != 0);
@@ -292,34 +293,41 @@
 			ASSERT(TTE_IS_8K(&tte));
 
 			/*
-			 * Unload the current mapping for the page and
-			 * if it is the last mapping, free the page.
+			 * Unload the current mapping for the pfn and
+			 * if it is the last mapping for a memory page,
+			 * free the page.
 			 */
-			pp = page_numtopp_nolock(pfn);
-			PMFREE_DEBUG("unmap vaddr=0x%lx pfn=0x%lx pp=0x%p",
-			    vaddr, pfn, (void *)pp);
-			ASSERT(pp);
-			ASSERT(PAGE_EXCL(pp));
-			ASSERT(PP_ISNORELOC(pp));
-			ASSERT(!PP_ISFREE(pp));
-			ASSERT(page_find(&prom_ppages, pfn));
-			ASSERT(page_get_pagecnt(pp->p_szc) == 1);
+			PMFREE_DEBUG("unmap vaddr=0x%lx pfn=0x%lx", vaddr, pfn);
 
 			hat_unload(kas.a_hat, (caddr_t)vaddr, PAGESIZE,
 			    HAT_UNLOAD_UNLOCK);
 
-			if (pp->p_mapping) {
-				PMFREE_DEBUG(" skip\n");
+			if (pf_is_memory(pfn)) {
+				pp = page_numtopp_nolock(pfn);
+				PMFREE_DEBUG(" pp=0x%p", (void *)pp);
+				ASSERT(pp);
+				ASSERT(PAGE_EXCL(pp));
+				ASSERT(PP_ISNORELOC(pp));
+				ASSERT(!PP_ISFREE(pp));
+				ASSERT(page_find(&prom_ppages, pfn));
+				ASSERT(page_get_pagecnt(pp->p_szc) == 1);
+
+				if (pp->p_mapping) {
+					PMFREE_DEBUG(" skip\n");
+				} else {
+					PP_CLRNORELOC(pp);
+					page_destroy(pp, 0);
+					memlist_write_lock();
+					rv = memlist_add_span(pfn << PAGESHIFT,
+					    PAGESIZE, &phys_avail);
+					ASSERT(rv == MEML_SPANOP_OK);
+					memlist_write_unlock();
+					PMFREE_DEBUG(" free\n");
+					nfreed++;
+				}
 			} else {
-				PP_CLRNORELOC(pp);
-				page_destroy(pp, 0);
-				memlist_write_lock();
-				rv = memlist_add_span(pfn << PAGESHIFT,
-				    PAGESIZE, &phys_avail);
-				ASSERT(rv == MEML_SPANOP_OK);
-				memlist_write_unlock();
-				PMFREE_DEBUG(" free\n");
-				nfreed++;
+				nskipped_io++;
+				PMFREE_DEBUG(" skip IO\n");
 			}
 			nunmapped++;
 			vaddr += PAGESIZE;
@@ -327,8 +335,9 @@
 	}
 
 	if (transroot) {
-		PMFREE_DEBUG("nunmapped=%d nfreed=%d nskipped=%d\n",
-		    nunmapped, nfreed, nskipped);
+		PMFREE_DEBUG(
+		    "nunmapped=%d nfreed=%d nskipped=%d nskipped_io=%d\n",
+		    nunmapped, nfreed, nskipped, nskipped_io);
 		kmem_free(transroot, ntransroot * sizeof (*transroot));
 	}
 
--- a/usr/src/uts/sun4v/sys/vnet.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/sun4v/sys/vnet.h	Mon Aug 17 09:14:35 2009 -0400
@@ -34,6 +34,8 @@
 #include <sys/vnet_res.h>
 #include <sys/vnet_mailbox.h>
 #include <sys/modhash.h>
+#include <net/if.h>
+#include <sys/mac_client.h>
 
 #define	VNET_SUCCESS		(0)	/* successful return */
 #define	VNET_FAILURE		(-1)	/* unsuccessful return */
@@ -117,6 +119,7 @@
 	uint32_t		refcnt;		/* reference count */
 	struct	vnet		*vnetp;		/* back pointer to vnet */
 	kstat_t			*ksp;		/* hio kstats */
+	void			*rx_ringp;	/* assoc pseudo rx ring */
 } vnet_res_t;
 
 #define	VNET_DDS_TASK_ADD_SHARE		0x01
@@ -131,6 +134,7 @@
 	vio_dds_msg_t	dmsg;		/* Pending DDS message */
 	dev_info_t	*hio_dip;	/* Hybrid device's dip */
 	uint64_t	hio_cookie;	/* Hybrid device's cookie */
+	char		hio_ifname[LIFNAMSIZ];  /* Hybrid interface name */
 	ddi_taskq_t	*dds_taskqp;	/* Taskq's used for DDS */
 	struct vnet	*vnetp;		/* Back pointer to vnetp */
 } vnet_dds_info_t;
@@ -155,12 +159,103 @@
 
 typedef enum {
 		AST_init = 0x0, AST_vnet_alloc = 0x1,
-		AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
-		AST_vgen_init = 0x8, AST_fdbh_alloc = 0x10,
-		AST_vdds_init = 0x20, AST_taskq_create = 0x40,
-		AST_vnet_list = 0x80, AST_macreg = 0x100
+		AST_ring_init = 0x2, AST_vdds_init = 0x4,
+		AST_read_macaddr = 0x8, AST_fdbh_alloc = 0x10,
+		AST_taskq_create = 0x20, AST_vnet_list = 0x40,
+		AST_vgen_init = 0x80, AST_macreg = 0x100,
+		AST_init_mdeg = 0x200
 } vnet_attach_progress_t;
 
+#define	VNET_NUM_PSEUDO_GROUPS		1	/* # of pseudo ring grps */
+#define	VNET_NUM_HYBRID_RINGS		2	/* # of Hybrid tx/rx rings */
+#define	VNET_HYBRID_RXRING_INDEX	1	/* Hybrid rx ring start index */
+
+/*
+ * # of Pseudo TX Rings is defined based on the possible
+ * # of TX Hardware Rings from a Hybrid resource.
+ */
+#define	VNET_NUM_PSEUDO_TXRINGS		VNET_NUM_HYBRID_RINGS
+
+/*
+ * # of Pseudo RX Rings that are reserved and exposed by default.
+ * 1 for LDC resource to vsw + 2 for RX rings of Hybrid resource.
+ */
+#define	VNET_NUM_PSEUDO_RXRINGS_DEFAULT	(VNET_NUM_HYBRID_RINGS + 1)
+
+/* Pseudo RX Ring States */
+typedef enum {
+	VNET_RXRING_FREE = 0x0,		/* Free */
+	VNET_RXRING_INUSE = 0x1,	/* In use */
+	VNET_RXRING_LDC_SERVICE = 0x2,	/* Mapped to vswitch */
+	VNET_RXRING_LDC_GUEST = 0x4,	/* Mapped to a peer vnet */
+	VNET_RXRING_HYBRID = 0x8,	/* Mapped to Hybrid resource */
+	VNET_RXRING_STARTED = 0x10	/* Started */
+} vnet_rxring_state_t;
+
+/* Pseudo TX Ring States */
+typedef enum {
+	VNET_TXRING_FREE = 0x0,		/* Free */
+	VNET_TXRING_INUSE = 0x1,	/* In use */
+	VNET_TXRING_SHARED = 0x2,	/* Shared among LDCs */
+	VNET_TXRING_HYBRID = 0x4,	/* Shared among LDCs, Hybrid resource */
+	VNET_TXRING_STARTED = 0x8	/* Started */
+} vnet_txring_state_t;
+
+/*
+ * Psuedo TX Ring
+ */
+typedef struct vnet_pseudo_tx_ring {
+	uint_t			index;		/* ring index */
+	vnet_txring_state_t	state;		/* ring state */
+	void			*grp;		/* grp associated */
+	void			*vnetp;		/* vnet associated */
+	mac_ring_handle_t	handle;		/* ring handle in mac layer */
+	mac_ring_handle_t	hw_rh;	/* Resource type dependent, internal */
+					/* ring handle. Hybrid res: ring hdl */
+					/* of hardware rx ring; LDC res: hdl */
+					/* to the res itself (vnet_res_t)    */
+} vnet_pseudo_tx_ring_t;
+
+/*
+ * Psuedo RX Ring
+ */
+typedef struct vnet_pseudo_rx_ring {
+	uint_t			index;		/* ring index */
+	vnet_rxring_state_t	state;		/* ring state */
+	void			*grp;		/* grp associated */
+	void			*vnetp;		/* vnet associated */
+	mac_ring_handle_t	handle;		/* ring handle in mac layer */
+	mac_ring_handle_t	hw_rh;	/* Resource type dependent, internal */
+					/* ring handle. Hybrid res: ring hdl */
+					/* of hardware tx ring; otherwise    */
+					/* NULL */
+	uint64_t		gen_num;	/* Mac layer gen_num */
+} vnet_pseudo_rx_ring_t;
+
+/*
+ * Psuedo TX Ring Group
+ */
+typedef struct vnet_pseudo_tx_group {
+	uint_t			index;		/* group index */
+	void			*vnetp;		/* vnet associated */
+	mac_group_handle_t	handle;		/* grp handle in mac layer */
+	uint_t			ring_cnt;	/* total # of rings in grp */
+	vnet_pseudo_tx_ring_t	*rings;		/* array of rings */
+} vnet_pseudo_tx_group_t;
+
+/*
+ * Psuedo RX Ring Group
+ */
+typedef struct vnet_pseudo_rx_group {
+	krwlock_t		lock;		/* sync rings access in grp */
+	int			index;		/* group index */
+	void			*vnetp;		/* vnet this grp belongs to */
+	mac_group_handle_t	handle;		/* grp handle in mac layer */
+	uint_t			max_ring_cnt;	/* total # of rings in grp */
+	uint_t			ring_cnt;	/* # of rings in use */
+	vnet_pseudo_rx_ring_t	*rings;		/* array of rings */
+} vnet_pseudo_rx_group_t;
+
 /*
  * vnet instance state information
  */
@@ -194,6 +289,18 @@
 	vnet_dds_info_t		vdds_info;	/* DDS related info */
 	krwlock_t		vrwlock;	/* Resource list lock */
 	ddi_taskq_t		*taskqp;	/* Resource taskq */
+
+	/* pseudo ring groups */
+	vnet_pseudo_rx_group_t	rx_grp[VNET_NUM_PSEUDO_GROUPS];
+	vnet_pseudo_tx_group_t	tx_grp[VNET_NUM_PSEUDO_GROUPS];
+
+	vio_net_handle_t	hio_vhp;	/* HIO resource hdl */
+	mac_handle_t		hio_mh;		/* HIO mac hdl */
+	mac_client_handle_t	hio_mch;	/* HIO mac client hdl */
+	mac_unicast_handle_t	hio_muh;	/* HIO mac unicst hdl */
+	mac_notify_handle_t	hio_mnh;	/* HIO notify cb hdl */
+	mac_group_handle_t	rx_hwgh;	/* HIO rx ring-group hdl */
+	mac_group_handle_t	tx_hwgh;	/* HIO tx ring-group hdl */
 } vnet_t;
 
 #ifdef DEBUG
--- a/usr/src/uts/sun4v/sys/vnet_gen.h	Mon Aug 17 09:11:10 2009 -0400
+++ b/usr/src/uts/sun4v/sys/vnet_gen.h	Mon Aug 17 09:14:35 2009 -0400
@@ -180,6 +180,7 @@
 	kmutex_t		tclock;		/* tx reclaim lock */
 	kmutex_t		wrlock;		/* sync transmits */
 	kmutex_t		rxlock;		/* sync reception */
+	kmutex_t		pollq_lock;	/* sync polling and rxworker */
 
 	/* channel info from ldc layer */
 	uint64_t		ldc_id;		/* channel number */
@@ -248,6 +249,11 @@
 	kmutex_t		rcv_thr_lock;	/* lock for receive thread */
 	kcondvar_t		rcv_thr_cv;	/* cond.var for recv thread */
 
+	/* receive polling fields */
+	boolean_t		polling_on;	/* polling enabled ? */
+	mblk_t			*pollq_headp;	/* head of pkts in pollq */
+	mblk_t			*pollq_tailp;	/* tail of pkts in pollq */
+
 	/* channel statistics */
 	vgen_stats_t		stats;		/* channel statistics */
 	kstat_t			*ksp;		/* channel kstats */