changeset 10053:79ff8cfc9153

PSARC/2009/340 Interrupt affinity interfaces and PCITool enhancements 6796906 Interfaces needed for querying and re-targetting msi-x interrupts 6805710 px driver should support re-targetting MSI-X interrupts 6849547 PCITool enhancements 6851623 ddi_intr_enable() and ddi_intr_disable() fails randomly for dup interrupt.
author Evan Yan <Evan.Yan@Sun.COM>
date Wed, 08 Jul 2009 12:59:05 +0800
parents 86e6866e392d
children 443f0eb740ec
files usr/src/cmd/mdb/sparc/modules/intr/intr.c usr/src/cmd/pcitool/pcitool.1m usr/src/cmd/pcitool/pcitool.c usr/src/cmd/pcitool/pcitool_ui.c usr/src/cmd/pcitool/pcitool_ui.h usr/src/cmd/pcitool/pcitool_usage.c usr/src/cmd/perl/contrib/Sun/Solaris/Intrs/Intrs.xs usr/src/uts/common/io/pci_intr_lib.c usr/src/uts/common/os/ddi_intr.c usr/src/uts/common/sys/ddi_intr.h usr/src/uts/common/sys/ddi_intr_impl.h usr/src/uts/common/sys/pci_intr_lib.h usr/src/uts/common/sys/pci_tools.h usr/src/uts/i86pc/io/pci/pci_common.c usr/src/uts/i86pc/io/pci/pci_tools.c usr/src/uts/i86pc/io/pcplusmp/apic_introp.c usr/src/uts/sun4/io/px/px.c usr/src/uts/sun4/io/px/px_ib.c usr/src/uts/sun4/io/px/px_ib.h usr/src/uts/sun4/io/px/px_intr.c usr/src/uts/sun4/io/px/px_intr.h usr/src/uts/sun4/io/px/px_msiq.c usr/src/uts/sun4/io/px/px_msiq.h usr/src/uts/sun4/io/px/px_pec.c usr/src/uts/sun4/io/px/px_tools.c usr/src/uts/sun4/os/ddi_impl.c usr/src/uts/sun4u/io/pci/pci.c usr/src/uts/sun4u/io/pci/pci_ib.c usr/src/uts/sun4u/io/pci/pci_intr.c usr/src/uts/sun4u/io/pci/pci_tools.c usr/src/uts/sun4u/sys/pci/pci_ib.h usr/src/uts/sun4v/io/px/px_lib4v.c
diffstat 32 files changed, 1455 insertions(+), 646 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/mdb/sparc/modules/intr/intr.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/cmd/mdb/sparc/modules/intr/intr.c	Wed Jul 08 12:59:05 2009 +0800
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/mdb_modapi.h>
 #include <mdb/mdb_ks.h>
 #include <sys/async.h>		/* ecc_flt for pci_ecc.h */
@@ -289,13 +287,13 @@
 	do { /* ino_next_p loop */
 		if (mdb_vread(&ipil, sizeof (px_ino_pil_t),
 		    (uintptr_t)ino.ino_ipil_p) == -1) {
-			return;
+			continue;
 		}
 
 		do { /* ipil_next_p loop */
 			if (mdb_vread(&ih, sizeof (px_ih_t),
 			    (uintptr_t)ipil.ipil_ih_start) == -1) {
-				return;
+				continue;
 			}
 
 			count = 0;
@@ -358,11 +356,12 @@
 
 			} while (count < ipil.ipil_ih_size);
 
-		} while (mdb_vread(&ipil, sizeof (px_ino_pil_t),
-		    (uintptr_t)ipil.ipil_next_p) != -1);
+		} while ((ipil.ipil_next_p != NULL) &&
+		    (mdb_vread(&ipil, sizeof (px_ino_pil_t),
+		    (uintptr_t)ipil.ipil_next_p) != -1));
 
-	} while (mdb_vread(&ino, sizeof (px_ino_t),
-	    (uintptr_t)ino.ino_next_p) != -1);
+	} while ((ino.ino_next_p != NULL) && (mdb_vread(&ino, sizeof (px_ino_t),
+	    (uintptr_t)ino.ino_next_p) != -1));
 }
 
 static char *
--- a/usr/src/cmd/pcitool/pcitool.1m	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/cmd/pcitool/pcitool.1m	Wed Jul 08 12:59:05 2009 +0800
@@ -24,20 +24,32 @@
 pcitool \- interrupt routing tool
 .SH "SYNOPSIS"
 .PP
-/usr/sbin/pcitool \fIPCI_nexus_node\fR -i ino=\fIino\fR [ -r [ -c ] | -w cpu=\fICPU\fR [ -g ] ] [ -v ] [ -q ]
+/usr/sbin/pcitool \fIpci@<unit-address>\fR -i \fI<ino#> | all\fR [ -r [ -c ] | -w \fI<cpu#>\fR [ -g ] ] [ -v ] [ -q ]
+
+/usr/sbin/pcitool \fIpci@<unit-address>\fR -m \fI<msi#> | all\fR [ -r [ -c ] | -w \fI<cpu#>\fR [ -g ] ] [ -v ] [ -q ]
 
 /usr/sbin/pcitool [ -h ]
 
 .SH "DESCRIPTION"
 .PP
-PCItool is a low-level tool which provides a facility for getting and setting interrupt routing information.
+PCItool is a low-level tool which provides a facility for getting and setting
+interrupt routing information.
 
 .SS \fIInterrupt Routing\fR
 
 The \fIpcitool -i\fR command displays device and CPU routing information for
-all inos on a given nexus, and allows rerouting of a given ino or ino group
-to a specific CPU.
-
+INOs on a given nexus, and allows rerouting of a given INO or INO group to a
+specific CPU. Use the \fIpcitool -m\fR command to retrieve and reroute MSI/Xs.
+  
+On SPARC platforms, the INO is mapped to an interrupt mondo, where as one or
+more MSI/Xs are mapped to an INO. So, INO and MSI/Xs are individually
+retargetable. Use "-i" option to retrieve or reroute a given INO, where as
+use "-m" option for MSI/Xs.
+   
+On x86 platforms, both INOs and MSI/Xs are mapped to the same interrupt vectors.
+Use "-i" option to retrieve and reroute any interrupt vectors (both INO and
+MSI/Xs).  So, "-m" option is not required on x86 platforms. Hence it is not
+supported.
 
 \fIRequired privileges\fR
 
@@ -52,41 +64,59 @@
 
 -r [ -c ]
 
-Display device and CPU routing information for inos on a given nexus.
-The device path and instance number of each device for each displayed ino will
-be shown.  On some platforms (e.g. Fire) interrupts dedicated to the root
-complex are indicated with "(Internal)" appended to their pathname.
+Display device and CPU routing information for INOs on a given nexus.
+The device path and instance number of each device for each displayed INO will
+be shown.  On some platforms, interrupts dedicated to the root complex are
+indicated with "(Internal)" appended to their pathname.
 
 Dump interrupt controller information with -c.
 
 If neither -r nor -w are provided on the commandline, -r is assumed.
 
-The command for showing all inos on /pci@8,700000 is:
-
-  # pcitool /pci@8,700000 -i
-
-The command for showing ino 0x23 on the same root nexus, along with sample
+The command for showing all INOs on /pci@1e,600000 is:
+   
+  # pcitool /pci@1e,600000 -i all
+   
+The command for showing ino 0x19 on  the  same  root  nexus, along with sample
 output, is:
-
-  # pcitool /pci@8,700000 -i ino=23
+   
+  # pcitool /pci@1e,600000 -i 19
+   
+  ino 19 mapped to cpu 0
+  Device: /pci@1e,600000/pci@0/pci@9/pci@0/scsi@1
+    Driver: mpt, instance 0
+   
+  ino 19 mapped to cpu 0
+  Device: /pci@1e,600000/pci@0/pci@2/pci@0/network@4
+    Driver: bge, instance 0
+   
+The command for showing MSI 0x1  on  the  same  root  nexus,
+along with sample output, is:
+   
+# pcitool /pci@1e,600000 -m 1
+   
+  msi 1 mapped to cpu 0
+  Device: /pci@1e,600000/pci@0/pci@9/pci@0/scsi@1
+    Driver: mpt, instance 0
 
-  ino 23 on ctlr 0 mapped to cpu 0
-  Device: /pci@8,700000/ebus@5/i2c@1,30
-    Driver: pcf8584, instance 1
-  Device: /pci@8,700000/ebus@5/i2c@1,2e
-    Driver: pcf8584, instance 0
-
--w cpu=\fIhex_CPU\fR [ -g ]
+-w \fI<cpu#>\fR [ -g ]
 
-Route the given ino to the given CPU.  Display the new and original routing
-information.  The ino must be specified.
-
-Successful rerouting ino 23 above from cpu 0 to cpu 1 gives the following
+Route the given INO or MSI/X to the given CPU. Display the  new and original
+routing information. The INO or MSI/X must be specified.
+  
+Successful rerouting ino 19 above from cpu 0 to cpu 1 gives the following
 output:
-
-  # pcitool /pci@8,700000 -i ino=23 -w cpu=1
-
-  Interrupts on ino 23 reassigned: Old cpu:0, New cpu:1
+   
+  # pcitool /pci@1e,600000 -i 19 -w 1
+   
+  Interrupts on ino 19 reassigned: Old cpu: 0, New cpu: 1
+   
+Successful rerouting msi 1 above from cpu 1 to cpu 0 gives the following
+output:
+  
+  # pcitool /pci@1e,600000 -m 1 -w 0
+  
+  Interrupts on msi 1 reassigned: Old cpu: 1, New cpu: 0
 
 On some platforms (such as X86) multiple MSI interrupts of a single function
 need to be rerouted together.  Use -g to do this.  -g works only on supported
@@ -94,12 +124,12 @@
 When -g is used, the vector provided must be the lowest-numbered vector of the
 group.  The size of the group is determined internally.
 
-Successful rerouting a group of inos starting at 60 from cpu 0 to cpu 1 gives
+Successful rerouting a group of INOs starting at 60 from cpu 0 to cpu 1 gives
 the following output:
 
-  # pcitool /pci@0,0 -i ino=60 -w cpu=1 -g
+  # pcitool /pci@0,0 -i 60 -w 1 -g
 
-  Interrupts on ino group starting at ino 60 reassigned: Old cpu:0, New cpu:1
+  Interrupts on ino group starting at ino 60 reassigned: Old cpu: 0, New cpu: 1
 
 -v
 
--- a/usr/src/cmd/pcitool/pcitool.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/cmd/pcitool/pcitool.c	Wed Jul 08 12:59:05 2009 +0800
@@ -94,6 +94,8 @@
 		"CPU is non-existent or not online" },
 	{ PCITOOL_INVALID_INO,
 		"INO is out of range or invalid" },
+	{ PCITOOL_INVALID_MSI,
+		"MSI is out of range or invalid" },
 	{ PCITOOL_PENDING_INTRTIMEOUT,
 		"Timeout waiting for pending interrupts to clear" },
 	{ PCITOOL_REGPROP_NOTWELLFORMED,
@@ -1290,8 +1292,13 @@
 {
 	int i;
 
-	(void) printf("\nino %x mapped to cpu %x\n",
-	    iget_p->ino,  iget_p->cpu_id);
+	if (iget_p->flags & PCITOOL_INTR_FLAG_GET_MSI)
+		(void) printf("\nmsi 0x%x mapped to cpu 0x%x\n",
+		    iget_p->msi,  iget_p->cpu_id);
+	else
+		(void) printf("\nino 0x%x mapped to cpu 0x%x\n",
+		    iget_p->ino,  iget_p->cpu_id);
+
 	for (i = 0; i < iget_p->num_devs; i++) {
 		(void) printf("Device: %s\n", iget_p->dev[i].path);
 		(void) printf("  Driver: %s, instance %d\n",
@@ -1310,29 +1317,38 @@
     pcitool_uiargs_t *input_args_p)
 {
 	pcitool_intr_get_t *iget_p = *iget_pp;
-	uint32_t ino = iget_p->ino;
+	const char	*str_type = NULL;
+	uint32_t	intr;
+
+	if (input_args_p->flags & MSI_SPEC_FLAG) {
+		intr = input_args_p->intr_msi;
+		str_type = "msi";
+	} else {
+		intr = input_args_p->intr_ino;
+		str_type = "ino";
+	}
 
 	/*
-	 * Check if interrupts are active on this ino.  Get as much
-	 * device info as there is room for at the moment.  If there
+	 * Check if interrupts are active on this ino/msi. Get as much
+	 * device info as there is room for at the moment. If there
 	 * is not enough room for all devices, will call again with a
 	 * larger buffer.
 	 */
 	if (ioctl(fd, PCITOOL_DEVICE_GET_INTR, iget_p) != 0) {
-
 		/*
 		 * Let EIO errors silently slip through, as
 		 * some inos may not be viewable by design.
 		 * We don't want to stop or print an error for these.
 		 */
-
 		if (errno == EIO) {
 			return (SUCCESS);
 		}
 
 		if (!(IS_QUIET(input_args_p->flags))) {
-			(void) fprintf(stderr, "Ioctl to get interrupt "
-			    "%d info failed %s\n", ino, strerror(errno));
+			(void) fprintf(stderr, "Ioctl to get %s 0x%x "
+			    "info failed: %s\n", str_type, intr,
+			    strerror(errno));
+
 			if (errno != EFAULT) {
 				(void) fprintf(stderr, "Pcitool status: %s\n",
 				    strstatus(iget_p->status));
@@ -1351,11 +1367,12 @@
 		iget_p = *iget_pp =
 		    realloc(iget_p, PCITOOL_IGET_SIZE(iget_p->num_devs));
 		iget_p->num_devs_ret = iget_p->num_devs;
+
 		if (ioctl(fd, PCITOOL_DEVICE_GET_INTR, iget_p) != 0) {
 			if (!(IS_QUIET(input_args_p->flags))) {
-				(void) fprintf(stderr, "Ioctl to get interrupt "
-				    "%d device info failed %s\n",
-				    ino, strerror(errno));
+				(void) fprintf(stderr, "Ioctl to get %s 0x%x"
+				    "device info failed: %s\n", str_type,
+				    intr, strerror(errno));
 				if (errno != EFAULT) {
 					(void) fprintf(stderr,
 					    "Pcitool status: %s\n",
@@ -1386,24 +1403,57 @@
 	iget_p->num_devs_ret = INIT_NUM_DEVS;
 	iget_p->user_version = PCITOOL_VERSION;
 
-	/* Explicit ino requested. */
-	if (input_args_p->flags &  INO_SPEC_FLAG) {
-		iget_p->ino = input_args_p->intr_ino;
+	/* Explicit MSI requested. */
+	if (input_args_p->flags & MSI_SPEC_FLAG) {
+		iget_p->msi = input_args_p->intr_msi;
+		iget_p->flags = PCITOOL_INTR_FLAG_GET_MSI;
 		rval = get_single_interrupt(fd, &iget_p, input_args_p);
-
-	} else {	/* Return all inos. */
-
+		/* Return all MSIs. */
+	} else if (input_args_p->flags & MSI_ALL_FLAG) {
 		pcitool_intr_info_t intr_info;
+		intr_info.flags = PCITOOL_INTR_FLAG_GET_MSI;
 
 		if (ioctl(fd, PCITOOL_SYSTEM_INTR_INFO, &intr_info) != 0) {
 			if (!(IS_QUIET(input_args_p->flags))) {
 				(void) fprintf(stderr,
-				    "intr info ioctl failed:%s\n",
+				    "intr info ioctl failed: %s\n",
 				    strerror(errno));
 			}
-
 		} else {
+			int msi;
 
+			/*
+			 * Search through all interrupts.
+			 * Display info on enabled ones.
+			 */
+			for (msi = 0;
+			    ((msi < intr_info.num_intr) && (rval == SUCCESS));
+			    msi++) {
+				bzero(iget_p, sizeof (pcitool_intr_get_t));
+				iget_p->num_devs_ret = INIT_NUM_DEVS;
+				iget_p->user_version = PCITOOL_VERSION;
+				iget_p->flags = PCITOOL_INTR_FLAG_GET_MSI;
+				iget_p->msi = msi;
+				rval = get_single_interrupt(
+				    fd, &iget_p, input_args_p);
+			}
+		}
+		/* Explicit INO requested. */
+	} else if (input_args_p->flags & INO_SPEC_FLAG) {
+		iget_p->ino = input_args_p->intr_ino;
+		rval = get_single_interrupt(fd, &iget_p, input_args_p);
+		/* Return all INOs. */
+	} else if (input_args_p->flags & INO_ALL_FLAG) {
+		pcitool_intr_info_t intr_info;
+		intr_info.flags = 0;
+
+		if (ioctl(fd, PCITOOL_SYSTEM_INTR_INFO, &intr_info) != 0) {
+			if (!(IS_QUIET(input_args_p->flags))) {
+				(void) fprintf(stderr,
+				    "intr info ioctl failed: %s\n",
+				    strerror(errno));
+			}
+		} else {
 			int ino;
 
 			/*
@@ -1413,6 +1463,9 @@
 			for (ino = 0;
 			    ((ino < intr_info.num_intr) && (rval == SUCCESS));
 			    ino++) {
+				bzero(iget_p, sizeof (pcitool_intr_get_t));
+				iget_p->num_devs_ret = INIT_NUM_DEVS;
+				iget_p->user_version = PCITOOL_VERSION;
 				iget_p->ino = ino;
 				rval = get_single_interrupt(
 				    fd, &iget_p, input_args_p);
@@ -1433,6 +1486,7 @@
 	char *ctlr_type = NULL;
 	int rval = SUCCESS;
 
+	intr_info.flags = 0;
 	if (ioctl(fd, PCITOOL_SYSTEM_INTR_INFO, &intr_info) != 0) {
 		if (!(IS_QUIET(input_args_p->flags))) {
 			(void) perror("Ioctl to get intr ctlr info failed");
@@ -1488,36 +1542,46 @@
 static int
 set_interrupts(int fd, pcitool_uiargs_t *input_args_p)
 {
-	int rval = SUCCESS;	/* Return status. */
-
-	pcitool_intr_set_t iset;
+	pcitool_intr_set_t	iset;
+	const char		*str_type = NULL;
+	uint32_t		intr;
+	int			rval = SUCCESS;	/* Return status. */
 
 	/* Load interrupt number and cpu from commandline. */
-	iset.ino = input_args_p->intr_ino;
+	if (input_args_p->flags & MSI_SPEC_FLAG) {
+		iset.msi = intr = input_args_p->intr_msi;
+		iset.flags = PCITOOL_INTR_FLAG_SET_MSI;
+		str_type = "msi";
+	} else {
+		iset.ino = intr = input_args_p->intr_ino;
+		iset.flags = 0;
+		str_type = "ino";
+	}
+
 	iset.cpu_id = input_args_p->intr_cpu;
 	iset.user_version = PCITOOL_VERSION;
-	iset.flags = (input_args_p->flags & SETGRP_FLAG) ?
-	    PCITOOL_INTR_SET_FLAG_GROUP : 0;
+	iset.flags |= (input_args_p->flags & SETGRP_FLAG) ?
+	    PCITOOL_INTR_FLAG_SET_GROUP : 0;
 
 	/* Do the deed. */
 	if (ioctl(fd, PCITOOL_DEVICE_SET_INTR, &iset) != 0) {
 		if (!(IS_QUIET(input_args_p->flags))) {
 			(void) fprintf(stderr,
-			    "Ioctl to set intr 0x%x failed: %s\n",
-			    input_args_p->intr_ino, strerror(errno));
+			    "Ioctl to set %s 0x%x failed: %s\n",
+			    str_type, intr, strerror(errno));
 			(void) fprintf(stderr, "pcitool status: %s\n",
 			    strstatus(iset.status));
 		}
 		rval = errno;
 	} else {
 		if (input_args_p->flags & SETGRP_FLAG) {
-			(void) printf("\nInterrupts on ino %x reassigned:",
-			    iset.ino);
+			(void) printf("\nInterrupts on %s group starting "
+			    "at %s 0x%x reassigned:", str_type, str_type, intr);
 		} else {
-			(void) printf("\nInterrupts on ino group starting "
-			    "at ino %x reassigned:", iset.ino);
+			(void) printf("\nInterrupts on %s 0x%x reassigned:",
+			    str_type, intr);
 		}
-		(void) printf(" Old cpu:%x, New cpu:%x\n", iset.cpu_id,
+		(void) printf(" Old cpu: 0x%x, New cpu: 0x%x\n", iset.cpu_id,
 		    input_args_p->intr_cpu);
 	}
 
--- a/usr/src/cmd/pcitool/pcitool_ui.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/cmd/pcitool/pcitool_ui.c	Wed Jul 08 12:59:05 2009 +0800
@@ -67,11 +67,8 @@
 /*
  * This defines which main options can be specified by the user.
  * Options with colons after them require arguments.
- *
- * First : means to return : if option is missing.  This is used to handle
- * the optional argument to -i.
  */
-static char *opt_string = ":n:d:i:p:rw:o:s:e:b:vaqlcxgy";
+static char *opt_string = ":n:d:i:m:p:rw:o:s:e:b:vaqlcxgy";
 
 /* This defines options used singly and only by themselves (no nexus). */
 static char *no_dev_opt_string = "ahpqv";
@@ -88,7 +85,8 @@
 static int parse_device_opts(char *input, uint64_t *flags_arg,
     uint8_t *bus_arg, uint8_t *device_arg, uint8_t *func_arg,
     uint8_t *bank_arg);
-static int parse_intr_opts(char *input, uint64_t *flags_arg, uint8_t *ino_arg);
+static int parse_ino_opts(char *input, uint64_t *flags_arg, uint8_t *ino_arg);
+static int parse_msi_opts(char *input, uint64_t *flags_arg, uint16_t *msi_arg);
 static int parse_intr_set_opts(char *input, uint64_t *flags_arg,
     uint32_t *cpu_arg);
 static int parse_probeone_opts(char *input, uint64_t *flags_arg,
@@ -187,7 +185,6 @@
 		}
 
 		if (error) {
-
 			print_bad_option(argv, optopt, optarg);
 			return (FAILURE);
 		}
@@ -251,21 +248,15 @@
 		case 'i':
 			if (parsed_args->flags & (LEAF_FLAG |
 			    NEXUS_FLAG | INTR_FLAG | PROBE_FLAGS)) {
-				(void) fprintf(stderr, "%s: -i set with "
+				(void) fprintf(stderr, "%s: -i set with -m, "
 				    "-n, -d or -p or is set twice\n", argv[0]);
 				error = B_TRUE;
 				break;
 			}
 			parsed_args->flags |= INTR_FLAG;
 
-			/* Process, say, -i -r */
-			if (optarg[0] == '-') {
-				optind--;
-				continue;
-			}
-
 			/* parse input to get ino value. */
-			if (parse_intr_opts(optarg, &parsed_args->flags,
+			if (parse_ino_opts(optarg, &parsed_args->flags,
 			    &parsed_args->intr_ino) != SUCCESS) {
 				(void) fprintf(stderr,
 				    "%s: Error parsing interrupt options\n",
@@ -273,7 +264,26 @@
 				error = B_TRUE;
 			}
 			break;
+		/* Interrupt */
+		case 'm':
+			if (parsed_args->flags & (LEAF_FLAG |
+			    NEXUS_FLAG | INTR_FLAG | PROBE_FLAGS)) {
+				(void) fprintf(stderr, "%s: -m set with -i, "
+				    "-n, -d or -p or is set twice\n", argv[0]);
+				error = B_TRUE;
+				break;
+			}
+			parsed_args->flags |= INTR_FLAG;
 
+			/* parse input to get msi value. */
+			if (parse_msi_opts(optarg, &parsed_args->flags,
+			    &parsed_args->intr_msi) != SUCCESS) {
+				(void) fprintf(stderr,
+				    "%s: Error parsing interrupt options\n",
+				    argv[0]);
+				error = B_TRUE;
+			}
+			break;
 		/* Probe */
 		case 'p':
 			if (parsed_args->flags & (LEAF_FLAG |
@@ -573,10 +583,6 @@
 		/* Option without operand. */
 		case ':':
 			switch (optopt) {
-			case 'i':
-				/* Allow -i without ino=. */
-				parsed_args->flags |= INTR_FLAG;
-				break;
 			case 'p':
 				/* Allow -p without bdf spec. */
 				parsed_args->flags |=
@@ -638,22 +644,23 @@
 			if (parsed_args->flags &
 			    ~(INTR_FLAG | VERBOSE_FLAG | QUIET_FLAG |
 			    READ_FLAG | WRITE_FLAG | SHOWCTLR_FLAG |
-			    SETGRP_FLAG | INO_SPEC_FLAG | CPU_SPEC_FLAG)) {
+			    SETGRP_FLAG | INO_ALL_FLAG | INO_SPEC_FLAG |
+			    MSI_ALL_FLAG | MSI_SPEC_FLAG | CPU_SPEC_FLAG)) {
 				(void) fprintf(stderr, "%s: -v, -q, -r, -w, -c "
-				    "and -g are only options options allowed.\n"
-				    "with interrupt command.\n", argv[0]);
+				    "-g are only options allowed with "
+				    "interrupt command.\n", argv[0]);
 				error = B_TRUE;
 			}
 
 			/* Need cpu and ino values for interrupt set command. */
 			if ((parsed_args->flags & WRITE_FLAG) &&
-			    (parsed_args->flags &
-			    (CPU_SPEC_FLAG | INO_SPEC_FLAG)) !=
-			    (CPU_SPEC_FLAG | INO_SPEC_FLAG)) {
+			    !(parsed_args->flags & CPU_SPEC_FLAG) &&
+			    !((parsed_args->flags & INO_SPEC_FLAG) ||
+			    (parsed_args->flags & MSI_SPEC_FLAG))) {
 				(void) fprintf(stderr,
-				    "%s: Both cpu and ino must be specified "
-				    "explicitly for interrupt set command.\n",
-				    argv[0]);
+				    "%s: Both cpu and ino/msi must be "
+				    "specified explicitly for interrupt "
+				    "set command.\n", argv[0]);
 				error = B_TRUE;
 			}
 
@@ -1270,59 +1277,69 @@
 
 
 /*
- * Parse interrupt options.  This includes:
- *   ino=number
+ * Parse INO options.  This includes:
+ *   ino#  | all
  *
  * input is the string of options to parse.  flags_arg returns modified with
  * specified options set.  Other args return their respective values.
  */
 static int
-parse_intr_opts(char *input, uint64_t *flags_arg, uint8_t *ino_arg)
+parse_ino_opts(char *input, uint64_t *flags_arg, uint8_t *ino_arg)
 {
-	typedef enum {
-		ino = 0
-	} intr_opts_index_t;
+	uint64_t	value;
+	int		rval = SUCCESS;
+
+	if (strcmp(input, "all") == 0) {
+		*flags_arg |= INO_ALL_FLAG;
+	} else if ((rval = get_value64(input, &value, HEX_ONLY)) == SUCCESS) {
+		*ino_arg = (uint8_t)value;
 
-	static char *intr_opts[] = {
-		"ino",
-		NULL
-	};
+		if (*ino_arg != value) {
+			(void) fprintf(stderr,
+			    "ino argument must fit into 8 bits.\n");
+			rval = FAILURE;
+		} else {
+			*flags_arg |= INO_SPEC_FLAG;
+		}
+	} else {
+		(void) fprintf(stderr,
+		    "Unrecognized option for -i\n");
+		rval = FAILURE;
+	}
 
-	char *value;
-	uint64_t	recv64;
+	return (rval);
+}
 
-	int rval = SUCCESS;
-
-	while ((*input != '\0') && (rval == SUCCESS)) {
-		switch (getsubopt(&input, intr_opts, &value)) {
 
-		/* ino=number */
-		case ino:
-			if (value == NULL) {
-				(void) fprintf(stderr, "Missing ino value.\n");
-				rval = FAILURE;
-				break;
-			}
-			if ((rval = get_value64(value, &recv64, HEX_ONLY)) !=
-			    SUCCESS) {
-				break;
-			}
-			*ino_arg = (uint8_t)recv64;
-			if (*ino_arg != recv64) {
-				(void) fprintf(stderr,
-				    "Ino argument must fit into 8 bits.\n");
-				rval = FAILURE;
-				break;
-			}
-			*flags_arg |= INO_SPEC_FLAG;
-			break;
+/*
+ * Parse MSI options.  This includes:
+ *   msi#  | all
+ *
+ * input is the string of options to parse.  flags_arg returns modified with
+ * specified options set.  Other args return their respective values.
+ */
+static int
+parse_msi_opts(char *input, uint64_t *flags_arg, uint16_t *msi_arg)
+{
+	uint64_t	value;
+	int		rval = SUCCESS;
 
-		default:
+	if (strcmp(input, "all") == 0) {
+		*flags_arg |= MSI_ALL_FLAG;
+	} else if ((rval = get_value64(input, &value, HEX_ONLY)) == SUCCESS) {
+		*msi_arg = (uint16_t)value;
+
+		if (*msi_arg != value) {
 			(void) fprintf(stderr,
-			    "Unrecognized option for -i\n");
+			    "msi argument must fit into 16 bits.\n");
 			rval = FAILURE;
-			break;
+		} else {
+			*flags_arg |= MSI_SPEC_FLAG;
 		}
+	} else {
+		(void) fprintf(stderr,
+		    "Unrecognized option for -m\n");
+		rval = FAILURE;
 	}
 
 	return (rval);
@@ -1339,50 +1356,23 @@
 static int
 parse_intr_set_opts(char *input, uint64_t *flags_arg, uint32_t *cpu_arg)
 {
-	typedef enum {
-		cpu = 0
-	} intr_set_opts_index_t;
+	uint64_t	value;
+	int		rval = SUCCESS;
 
-	static char *intr_set_opts[] = {
-		"cpu",
-		NULL
-	};
-
-	char *value;
-	uint64_t	recv64;
-
-	int rval = SUCCESS;
-
-	while ((*input != '\0') && (rval == SUCCESS)) {
-		switch (getsubopt(&input, intr_set_opts, &value)) {
+	if ((rval = get_value64(input, &value, HEX_ONLY)) == SUCCESS) {
 
-		/* cpu=value */
-		case cpu:
-			if (value == NULL) {
-				(void) fprintf(stderr, "Missing cpu value.\n");
-				rval = FAILURE;
-				break;
-			}
-			if ((rval = get_value64(value, &recv64, HEX_ONLY)) !=
-			    SUCCESS) {
-				break;
-			}
-			if ((long)recv64 > sysconf(_SC_CPUID_MAX)) {
-				(void) fprintf(stderr, "Cpu argument "
-				    "exceeds maximum for this system type.\n");
-				rval = FAILURE;
-				break;
-			}
-			*cpu_arg = (uint32_t)recv64;
+		if ((long)value > sysconf(_SC_CPUID_MAX)) {
+			(void) fprintf(stderr, "Cpu argument "
+			    "exceeds maximum for this system type.\n");
+			rval = FAILURE;
+		} else {
+			*cpu_arg = (uint32_t)value;
 			*flags_arg |= CPU_SPEC_FLAG;
-			break;
-
-		default:
-			(void) fprintf(stderr,
-			    "Unrecognized option for -i -w\n");
+		}
+	} else {
+		(void) fprintf(stderr,
+		    "Unrecognized option for -i -m -w\n");
 			rval = FAILURE;
-			break;
-		}
 	}
 
 	return (rval);
--- a/usr/src/cmd/pcitool/pcitool_ui.h	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/cmd/pcitool/pcitool_ui.h	Wed Jul 08 12:59:05 2009 +0800
@@ -43,7 +43,7 @@
  */
 #define	NEXUS_FLAG	0x1
 #define	LEAF_FLAG	0x2
-#define	INTR_FLAG	0x4
+#define	INTR_FLAG	0x4		/* Either -i or -m specified */
 #define	PROBEDEV_FLAG	0x8		/* Probe a specific device */
 #define	PROBETREE_FLAG	0x10		/* Probe all devs on a tree */
 #define	PROBEALL_FLAG	0x20		/* Probe devs on all trees */
@@ -72,8 +72,11 @@
 #define	BUS_SPEC_FLAG	(0x40000ULL << 32)
 #define	DEV_SPEC_FLAG	(0x80000ULL << 32)
 #define	FUNC_SPEC_FLAG	(0x100000ULL << 32)
-#define	CPU_SPEC_FLAG	(0x200000ULL << 32)
-#define	INO_SPEC_FLAG	(0x400000ULL << 32)
+#define	CPU_SPEC_FLAG	(0x200000ULL << 32)	/* -w <cpu#> */
+#define	INO_ALL_FLAG	(0x400000ULL << 32)	/* -i all */
+#define	INO_SPEC_FLAG	(0x800000ULL << 32)	/* -i <#ino> */
+#define	MSI_ALL_FLAG	(0x1000000ULL << 32)	/* -m all */
+#define	MSI_SPEC_FLAG	(0x2000000ULL << 32)	/* -m <#msi> */
 
 /* Macros for a few heavily-used flags. */
 #define	IS_VERBOSE(flags)	(flags & VERBOSE_FLAG)
@@ -96,6 +99,7 @@
 	uint8_t		size;
 	uint8_t		bank;
 	uint8_t		intr_ino;
+	uint16_t	intr_msi;
 	boolean_t	big_endian;
 } pcitool_uiargs_t;
 
--- a/usr/src/cmd/pcitool/pcitool_usage.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/cmd/pcitool/pcitool_usage.c	Wed Jul 08 12:59:05 2009 +0800
@@ -148,13 +148,14 @@
 static char *pcitool_usage_intr[] = {
 "Usage:",
 "Interrupt mode:",
-" %s <PCI nexus node> -i [ ino=<ino> ] [ -r [ -c ] | -w cpu=<CPU> ]",
-"	[ -v ] [ -q ]",
-" (only on applicable platforms)",
+" %s pci@<unit-address> -i <ino#> | all [ -r [ -c ] | -w <cpu#> [ -g ] ]",
+"       [ -v ] [ -q ]",
+" %s pci@<unit-address> -m <msi#> | all [ -r [ -c ] | -w <cpu#> [ -g ] ]",
+"       [ -v ] [ -q ]",
 "",
 "where",
 "",
-"<PCI nexus node> is a node from /devices, with \"/devices\" stripped off.",
+"pci@<unit-address> is a node from /devices, with \"/devices\" stripped off.",
 "For example: /pci@1e,600000",
 "",
 "-v gives verbose output for all modes.",
@@ -170,26 +171,36 @@
 "Interrupt mode",
 "--------------",
 "",
-"-i [ ino=<ino> ] changes or retrieves current CPU for interrupts of given",
-"nexus and optionally given ino.  Ino must be selected if -w specified.",
-"If no ino is selected (as for displaying), all will be selected.",
+"-i <ino#> changes or retrieves current CPU for interrupts of given nexus",
+"   and given INO. The special value of 'all' can be used to select all INOs.",
+"",
+"-m <msi#> changes or retrieves current CPU for interrupts of given nexus",
+"   and given MSI/X. The special value of 'all' can be used to select all",
+"   MSI/Xs.",
 "",
-"-w cpu=<CPU> [ -g ] to change an ino<->CPU binding.",
+"   Note: On x86 platforms, both INOs and MSI/Xs are mapped to the same",
+"   interrupt vectors. Use -i option to retrieve and reroute any interrupt",
+"   vectors (both INO and MSI/Xs).  So, -m option is not required on x86",
+"   platforms. Hence it is not supported.",
+"",
+"   A specific INO or MSI/X must be selected if -w specified.",
+"",
+"-w <cpu#> [ -g ] to change an INO or MSI/X <->CPU binding.",
 "",
 "   Note: On certain platforms (e.g. X86), multiple MSI interrupts of a single",
 "   function need to be moved together.  Use -g to do this.  -g works only on",
 "   supported platforms and only for groups of MSI interrupts.  When -g is",
-"   used, ino must be the lowest-numbered vector of the group.  (Use the mdb",
+"   used, INO must be the lowest-numbered vector of the group.  (Use the mdb",
 "   ::interrupts dcmd to discover groups of MSI vectors.)  The size of the",
 "   group is determined internally.  (\"Groups\" of size 1 are accepted.)",
 "",
-"-r [ -c ] for displaying ino<->CPU bindings of all selected inos on a given",
-"   nexus.  -c optionally dumps controller information.",
+"-r [ -c ] for displaying ino or msi <->CPU bindings of all selected INO/MSIs",
+"   on a given nexus.  -c optionally dumps controller information.",
 "",
-"   All relevant enabled inos supporting non-nexus device interrupts will be",
-"   printed.  For each printed ino, all supported devices and their CPU",
-"   binding will be displayed.  On some platforms, inos dedicated to the root",
-"   nexus will be shown and marked with \"(Internal)\".",
+"   All relevant enabled INO/MSI/Xs supporting non-nexus device interrupts",
+"   will be printed.  For each printed INO/MSI/X, all supported devices and",
+"   their CPU binding will be displayed.  On some platforms, INOs dedicated",
+"   to the root nexus will be shown and marked with \"(Internal)\".",
 "",
 "When neither -r nor -w are specified, -r is the default.",
 NULL
--- a/usr/src/cmd/perl/contrib/Sun/Solaris/Intrs/Intrs.xs	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/cmd/perl/contrib/Sun/Solaris/Intrs/Intrs.xs	Wed Jul 08 12:59:05 2009 +0800
@@ -20,12 +20,10 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/pci.h>
@@ -72,7 +70,7 @@
 	}
 	iset.ino = ino;
 	iset.cpu_id = cpu;
-	iset.flags = (num_ino > 1) ? PCITOOL_INTR_SET_FLAG_GROUP : 0;
+	iset.flags = (num_ino > 1) ? PCITOOL_INTR_FLAG_SET_GROUP : 0;
 	iset.user_version = PCITOOL_VERSION;
 
 	ret = ioctl(fd, PCITOOL_DEVICE_SET_INTR, &iset);
--- a/usr/src/uts/common/io/pci_intr_lib.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/common/io/pci_intr_lib.c	Wed Jul 08 12:59:05 2009 +0800
@@ -424,21 +424,13 @@
  * interrupt can be disabled.
  */
 int
-pci_msi_disable_mode(dev_info_t *rdip, int type, uint_t flags)
+pci_msi_disable_mode(dev_info_t *rdip, int type)
 {
 	ushort_t		caps_ptr, msi_ctrl;
 	ddi_acc_handle_t	cfg_hdle;
 
-	DDI_INTR_NEXDBG((CE_CONT, "pci_msi_disable_mode: rdip = 0x%p "
-	    "flags = 0x%x\n", (void *)rdip, flags));
-
-	/*
-	 * Do not turn off the master enable bit if other interrupts are
-	 * still active.
-	 */
-	if ((flags != DDI_INTR_FLAG_BLOCK) &&
-	    (i_ddi_intr_get_current_nenables(rdip) > 1))
-		return (DDI_SUCCESS);
+	DDI_INTR_NEXDBG((CE_CONT, "pci_msi_disable_mode: rdip = 0x%p\n",
+	    (void *)rdip));
 
 	if (pci_get_msi_ctrl(rdip, type, &msi_ctrl,
 	    &caps_ptr, &cfg_hdle) != DDI_SUCCESS)
--- a/usr/src/uts/common/os/ddi_intr.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/common/os/ddi_intr.c	Wed Jul 08 12:59:05 2009 +0800
@@ -728,6 +728,69 @@
 }
 
 /*
+ * Interrupt target get/set functions
+ */
+int
+ddi_intr_get_affinity(ddi_intr_handle_t h, ddi_intr_target_t *tgt_p)
+{
+	ddi_intr_handle_impl_t	*hdlp = (ddi_intr_handle_impl_t *)h;
+	int			ret;
+
+	DDI_INTR_APIDBG((CE_CONT, "ddi_intr_get_affinity: hdlp = %p\n",
+	    (void *)hdlp));
+
+	if ((hdlp == NULL) || (tgt_p == NULL))
+		return (DDI_EINVAL);
+
+	rw_enter(&hdlp->ih_rwlock, RW_READER);
+	if (hdlp->ih_state != DDI_IHDL_STATE_ENABLE) {
+		rw_exit(&hdlp->ih_rwlock);
+		return (DDI_EINVAL);
+	}
+
+	ret = i_ddi_intr_ops(hdlp->ih_dip, hdlp->ih_dip,
+	    DDI_INTROP_GETTARGET, hdlp, (void *)tgt_p);
+
+	DDI_INTR_APIDBG((CE_CONT, "ddi_intr_get_affinity: target %x\n",
+	    *tgt_p));
+
+	if (ret == DDI_SUCCESS)
+		hdlp->ih_target = *tgt_p;
+
+	rw_exit(&hdlp->ih_rwlock);
+	return (ret);
+}
+
+int
+ddi_intr_set_affinity(ddi_intr_handle_t h, ddi_intr_target_t tgt)
+{
+	ddi_intr_handle_impl_t	*hdlp = (ddi_intr_handle_impl_t *)h;
+	int			ret;
+
+	DDI_INTR_APIDBG((CE_CONT, "ddi_intr_set_affinity: hdlp = %p "
+	    "target %x\n", (void *)hdlp, tgt));
+
+	if (hdlp == NULL)
+		return (DDI_EINVAL);
+
+	rw_enter(&hdlp->ih_rwlock, RW_WRITER);
+	if ((hdlp->ih_state != DDI_IHDL_STATE_ENABLE) ||
+	    !(hdlp->ih_cap & DDI_INTR_FLAG_RETARGETABLE)) {
+		rw_exit(&hdlp->ih_rwlock);
+		return (DDI_EINVAL);
+	}
+
+	ret = i_ddi_intr_ops(hdlp->ih_dip, hdlp->ih_dip,
+	    DDI_INTROP_SETTARGET, hdlp, &tgt);
+
+	if (ret == DDI_SUCCESS)
+		hdlp->ih_target = tgt;
+
+	rw_exit(&hdlp->ih_rwlock);
+	return (ret);
+}
+
+/*
  * Interrupt enable/disable/block_enable/block_disable handlers
  */
 int
@@ -979,6 +1042,21 @@
 }
 
 /*
+ * Set the number of interrupts requested from IRM
+ */
+int
+ddi_intr_set_nreq(dev_info_t *dip, int nreq)
+{
+	DDI_INTR_APIDBG((CE_CONT, "ddi_intr_set_nreq: dip %p, nreq %d\n",
+	    (void *)dip, nreq));
+
+	if (dip == NULL)
+		return (DDI_EINVAL);
+
+	return (i_ddi_irm_modify(dip, nreq));
+}
+
+/*
  * Soft interrupt handlers
  */
 /*
@@ -1142,21 +1220,6 @@
 }
 
 /*
- * Set the number of interrupts requested from IRM
- */
-int
-ddi_intr_set_nreq(dev_info_t *dip, int nreq)
-{
-	DDI_INTR_APIDBG((CE_CONT, "ddi_intr_set_nreq: dip %p, nreq %d\n",
-	    (void *)dip, nreq));
-
-	if (dip == NULL)
-		return (DDI_EINVAL);
-
-	return (i_ddi_irm_modify(dip, nreq));
-}
-
-/*
  * Old DDI interrupt framework
  *
  * The following DDI interrupt interfaces are obsolete.
--- a/usr/src/uts/common/sys/ddi_intr.h	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/common/sys/ddi_intr.h	Wed Jul 08 12:59:05 2009 +0800
@@ -32,6 +32,7 @@
 
 #include <sys/ddipropdefs.h>
 #include <sys/rwlock.h>
+#include <sys/processor.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -85,7 +86,8 @@
 #define	DDI_INTR_FLAG_MASKABLE	0x0010	/* (RO) maskable */
 #define	DDI_INTR_FLAG_PENDING	0x0020	/* (RO) int pending supported */
 #define	DDI_INTR_FLAG_BLOCK	0x0100	/* (RO) requires block enable */
-#define	DDI_INTR_FLAG_MSI64	0x0200	/* (R0) MSI/X supports 64 bit addr */
+#define	DDI_INTR_FLAG_MSI64	0x0200	/* (RO) MSI/X supports 64 bit addr */
+#define	DDI_INTR_FLAG_RETARGETABLE	0x0400	/* (RO) retargetable */
 
 /*
  * Macro to be used while passing interrupt priority
@@ -100,6 +102,11 @@
 typedef struct __ddi_softint_handle *ddi_softint_handle_t;
 
 /*
+ * Typedef for interrupt target
+ */
+typedef	processorid_t ddi_intr_target_t;
+
+/*
  * Definition for behavior flag which is used with ddi_intr_alloc(9f).
  */
 #define	DDI_INTR_ALLOC_NORMAL	0	/* Non-strict alloc */
@@ -177,6 +184,12 @@
 int	ddi_intr_remove_handler(ddi_intr_handle_t h);
 
 /*
+ * Interrupt get/set affinity functions
+ */
+int	ddi_intr_get_affinity(ddi_intr_handle_t h, ddi_intr_target_t *tgt_p);
+int	ddi_intr_set_affinity(ddi_intr_handle_t h, ddi_intr_target_t tgt);
+
+/*
  * Interrupt enable/disable/block_enable/block_disable functions
  */
 int	ddi_intr_enable(ddi_intr_handle_t h);
@@ -196,6 +209,11 @@
 int	ddi_intr_get_pending(ddi_intr_handle_t h, int *pendingp);
 
 /*
+ * Interrupt resource management function
+ */
+int	ddi_intr_set_nreq(dev_info_t *dip, int nreq);
+
+/*
  * Soft interrupt functions
  */
 int	ddi_intr_add_softint(dev_info_t *dip, ddi_softint_handle_t *h,
@@ -206,11 +224,6 @@
 int	ddi_intr_set_softint_pri(ddi_softint_handle_t h, uint_t soft_pri);
 
 /*
- * Interrupt resource management function
- */
-int	ddi_intr_set_nreq(dev_info_t *dip, int nreq);
-
-/*
  * Old DDI interrupt interfaces.
  *
  * The following DDI interrupt interfaces are obsolete.
--- a/usr/src/uts/common/sys/ddi_intr_impl.h	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/common/sys/ddi_intr_impl.h	Wed Jul 08 12:59:05 2009 +0800
@@ -62,7 +62,9 @@
 	DDI_INTROP_CLRMASK,		/* 17 clear mask */
 	DDI_INTROP_GETPENDING,		/* 18 get pending interrupt */
 	DDI_INTROP_NAVAIL,		/* 19 get num of available interrupts */
-	DDI_INTROP_GETPOOL		/* 20 get resource management pool */
+	DDI_INTROP_GETPOOL,		/* 20 get resource management pool */
+	DDI_INTROP_GETTARGET,		/* 21 get target for a given intr(s) */
+	DDI_INTROP_SETTARGET		/* 22 set target for a given intr(s) */
 } ddi_intr_op_t;
 
 /* Version number used in the handles */
@@ -112,6 +114,14 @@
 	void			*ih_private;	/* Platform specific data */
 	uint_t			ih_scratch1;	/* Scratch1: #interrupts */
 	void			*ih_scratch2;	/* Scratch2: flag/h_array */
+
+	/*
+	 * The ih_target field may not reflect the actual target that is
+	 * currently being used for the given interrupt. This field is just a
+	 * snapshot taken either during ddi_intr_add_handler() or
+	 * ddi_intr_get/set_affinity() calls.
+	 */
+	ddi_intr_target_t	ih_target;	/* Target ID */
 } ddi_intr_handle_impl_t;
 
 /* values for ih_state (strictly for interrupt handle) */
--- a/usr/src/uts/common/sys/pci_intr_lib.h	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/common/sys/pci_intr_lib.h	Wed Jul 08 12:59:05 2009 +0800
@@ -42,7 +42,7 @@
 extern	int	pci_msi_unconfigure(dev_info_t *rdip, int type, int inum);
 extern	int	pci_is_msi_enabled(dev_info_t *rdip, int type);
 extern	int	pci_msi_enable_mode(dev_info_t *rdip, int type);
-extern	int	pci_msi_disable_mode(dev_info_t *rdip, int type, uint_t flags);
+extern	int	pci_msi_disable_mode(dev_info_t *rdip, int type);
 extern	int	pci_msi_set_mask(dev_info_t *rdip, int type, int inum);
 extern	int	pci_msi_clr_mask(dev_info_t *rdip, int type, int inum);
 extern	int	pci_msi_get_pending(dev_info_t *rdip, int type, int inum,
--- a/usr/src/uts/common/sys/pci_tools.h	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/common/sys/pci_tools.h	Wed Jul 08 12:59:05 2009 +0800
@@ -19,15 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef _SYS_PCI_TOOLS_H
 #define	_SYS_PCI_TOOLS_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/modctl.h>
 
 #ifdef	__cplusplus
@@ -107,6 +105,7 @@
 	PCITOOL_SUCCESS = 0x0,
 	PCITOOL_INVALID_CPUID,
 	PCITOOL_INVALID_INO,
+	PCITOOL_INVALID_MSI,
 	PCITOOL_PENDING_INTRTIMEOUT,
 	PCITOOL_REGPROP_NOTWELLFORMED,
 	PCITOOL_INVALID_ADDRESS,
@@ -127,16 +126,18 @@
 	uint16_t user_version;	/* Userland program version - to krnl */
 	uint16_t drvr_version;	/* Driver version - from kernel */
 	uint32_t ino;		/* interrupt to set - to kernel */
+	uint32_t msi;		/* Specific MSI to set - to kernel */
 	uint32_t cpu_id;	/* to: cpu to set / from: old cpu returned */
+	uint32_t flags;		/* to kernel */
 	pcitool_errno_t status;	/* from kernel */
-	uint32_t flags;		/* to kernel */
 } pcitool_intr_set_t;
 
 /*
- * flags for pcitool_intr_set_t
+ * Flags for pcitool_intr_get/set_t/info_t
  */
-#define	PCITOOL_INTR_SET_FLAG_GROUP	0x1
-
+#define	PCITOOL_INTR_FLAG_SET_GROUP	0x1
+#define	PCITOOL_INTR_FLAG_GET_MSI	0x2
+#define	PCITOOL_INTR_FLAG_SET_MSI	0x4
 
 /*
  * PCITOOL_DEVICE_GET_INTR ioctl data structure to dump out the
@@ -153,6 +154,7 @@
 	uint16_t user_version;		/* Userland program version - to krnl */
 	uint16_t drvr_version;		/* Driver version - from kernel */
 	uint32_t	ino;		/* interrupt number - to kernel */
+	uint32_t	msi;		/* MSI number - to kernel */
 	uint8_t		num_devs_ret;	/* room for this # of devs to be */
 					/* returned - to kernel */
 					/* # devs returned - from kernel */
@@ -160,6 +162,7 @@
 					/* intrs enabled for devs if > 0 */
 	uint8_t		ctlr;		/* controller number - from kernel */
 	uint32_t	cpu_id;		/* cpu of interrupt - from kernel */
+	uint32_t	flags;		/* to kernel */
 	pcitool_errno_t status;		/* returned status - from kernel */
 	pcitool_intr_dev_t	dev[1];	/* start of variable device list */
 					/* from kernel */
@@ -177,6 +180,7 @@
 typedef struct pcitool_intr_info {
 	uint16_t user_version;		/* Userland program version - to krnl */
 	uint16_t drvr_version;		/* Driver version - from kernel */
+	uint32_t flags;			/* to kernel */
 	uint32_t num_intr;		/* Number of intrs suppt by nexus */
 	uint32_t ctlr_version;		/* Intr ctlr HW version - from kernel */
 	uchar_t	ctlr_type;		/* A PCITOOL_CTLR_TYPE - from kernel */
--- a/usr/src/uts/i86pc/io/pci/pci_common.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/i86pc/io/pci/pci_common.c	Wed Jul 08 12:59:05 2009 +0800
@@ -681,6 +681,30 @@
 		DDI_INTR_NEXDBG((CE_CONT, "pci: GETPENDING returned = %x\n",
 		    *(int *)result));
 		break;
+	case DDI_INTROP_GETTARGET:
+		DDI_INTR_NEXDBG((CE_CONT, "pci_common_intr_ops: GETTARGET\n"));
+
+		/* Note hdlp->ih_vector is actually an irq */
+		if ((rv = pci_get_cpu_from_vecirq(hdlp->ih_vector, IS_IRQ)) ==
+		    -1)
+			return (DDI_FAILURE);
+		*(int *)result = rv;
+		DDI_INTR_NEXDBG((CE_CONT, "pci_common_intr_ops: GETTARGET "
+		    "vector = 0x%x, cpu = 0x%x\n", hdlp->ih_vector, rv));
+		break;
+	case DDI_INTROP_SETTARGET:
+		DDI_INTR_NEXDBG((CE_CONT, "pci_common_intr_ops: SETTARGET\n"));
+
+		/* hdlp->ih_vector is actually an irq */
+		tmp_hdl.ih_vector = hdlp->ih_vector;
+		tmp_hdl.ih_flags = PSMGI_INTRBY_IRQ;
+		tmp_hdl.ih_private = (void *)(uintptr_t)*(int *)result;
+		psm_rval = (*psm_intr_ops)(rdip, &tmp_hdl, PSM_INTR_OP_SET_CPU,
+		    &psm_status);
+
+		if (psm_rval != PSM_SUCCESS)
+			return (DDI_FAILURE);
+		break;
 	default:
 		return (i_ddi_intr_ops(pdip, rdip, intr_op, hdlp, result));
 	}
--- a/usr/src/uts/i86pc/io/pci/pci_tools.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/i86pc/io/pci/pci_tools.c	Wed Jul 08 12:59:05 2009 +0800
@@ -156,6 +156,12 @@
 		goto done_set_intr;
 	}
 
+	if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) {
+		rval = ENOTSUP;
+		iset.status = PCITOOL_IO_ERROR;
+		goto done_set_intr;
+	}
+
 	if (iset.ino > APIC_MAX_VECTOR) {
 		rval = EINVAL;
 		iset.status = PCITOOL_INVALID_INO;
@@ -179,13 +185,14 @@
 	 */
 	info_hdl.ih_vector = iset.ino;
 	info_hdl.ih_private = (void *)(uintptr_t)iset.cpu_id;
+	info_hdl.ih_flags = PSMGI_INTRBY_VEC;
 	if (pcitool_debug)
 		prom_printf("user version:%d, flags:0x%x\n",
 		    iset.user_version, iset.flags);
 
 	result = ENOTSUP;
 	if ((iset.user_version >= PCITOOL_V2) &&
-	    (iset.flags & PCITOOL_INTR_SET_FLAG_GROUP)) {
+	    (iset.flags & PCITOOL_INTR_FLAG_SET_GROUP)) {
 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_GRP_SET_CPU,
 		    &result);
 	} else {
@@ -259,6 +266,13 @@
 	    DDI_SUCCESS)
 		return (EFAULT);
 
+	if (partial_iget.flags & PCITOOL_INTR_FLAG_GET_MSI) {
+		partial_iget.status = PCITOOL_IO_ERROR;
+		partial_iget.num_devs_ret = 0;
+		rval = ENOTSUP;
+		goto done_get_intr;
+	}
+
 	/* Validate argument. */
 	if (partial_iget.ino > APIC_MAX_VECTOR) {
 		partial_iget.status = PCITOOL_INVALID_INO;
@@ -388,6 +402,9 @@
 		return (EFAULT);
 	}
 
+	if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI)
+		return (ENOTSUP);
+
 	/* For UPPC systems, psm_intr_ops has no entry for APIC_TYPE. */
 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
 	    PSM_INTR_OP_APIC_TYPE, NULL)) != PSM_SUCCESS) {
--- a/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c	Wed Jul 08 12:59:05 2009 +0800
@@ -596,7 +596,7 @@
 #if !defined(__xpv)
 
 static int
-apic_set_cpu(uint32_t vector, int cpu, int *result)
+apic_set_cpu(int irqno, int cpu, int *result)
 {
 	apic_irq_t *irqp;
 	ulong_t iflag;
@@ -604,9 +604,8 @@
 
 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n"));
 
-	/* Convert the vector to the irq using vector_to_irq table. */
 	mutex_enter(&airq_mutex);
-	irqp = apic_irq_table[apic_vector_to_irq[vector]];
+	irqp = apic_irq_table[irqno];
 	mutex_exit(&airq_mutex);
 
 	if (irqp == NULL) {
@@ -633,12 +632,17 @@
 		*result = EIO;
 		return (PSM_FAILURE);
 	}
+	/*
+	 * keep tracking the default interrupt cpu binding
+	 */
+	irqp->airq_cpu = cpu;
+
 	*result = 0;
 	return (PSM_SUCCESS);
 }
 
 static int
-apic_grp_set_cpu(uint32_t vector, int new_cpu, int *result)
+apic_grp_set_cpu(int irqno, int new_cpu, int *result)
 {
 	dev_info_t *orig_dip;
 	uint32_t orig_cpu;
@@ -651,6 +655,7 @@
 	uint32_t msi_pvm;
 	ddi_acc_handle_t handle;
 	int num_vectors = 0;
+	uint32_t vector;
 
 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n"));
 
@@ -659,15 +664,16 @@
 	 * us while we're playing with it.
 	 */
 	mutex_enter(&airq_mutex);
-	irqps[0] = apic_irq_table[apic_vector_to_irq[vector]];
+	irqps[0] = apic_irq_table[irqno];
 	orig_cpu = irqps[0]->airq_temp_cpu;
 	orig_dip = irqps[0]->airq_dip;
 	num_vectors = irqps[0]->airq_intin_no;
+	vector = irqps[0]->airq_vector;
 
 	/* A "group" of 1 */
 	if (num_vectors == 1) {
 		mutex_exit(&airq_mutex);
-		return (apic_set_cpu(vector, new_cpu, result));
+		return (apic_set_cpu(irqno, new_cpu, result));
 	}
 
 	*result = ENXIO;
@@ -748,8 +754,12 @@
 	if (apic_rebind_all(irqps[0], new_cpu))
 		(void) apic_rebind_all(irqps[0], orig_cpu);
 	else {
-		for (i = 1; i < num_vectors; i++)
+		irqps[0]->airq_cpu = new_cpu;
+
+		for (i = 1; i < num_vectors; i++) {
 			(void) apic_rebind_all(irqps[i], new_cpu);
+			irqps[i]->airq_cpu = new_cpu;
+		}
 		*result = 0;	/* SUCCESS */
 	}
 
@@ -986,6 +996,8 @@
 		cap = DDI_INTR_FLAG_PENDING;
 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
 			cap |= DDI_INTR_FLAG_MASKABLE;
+		else if (hdlp->ih_type == DDI_INTR_TYPE_MSIX)
+			cap |= DDI_INTR_FLAG_RETARGETABLE;
 		*result = cap;
 		break;
 	case PSM_INTR_OP_GET_SHARED:
@@ -1036,6 +1048,15 @@
 			*result = EINVAL;
 			return (PSM_FAILURE);
 		}
+		if (hdlp->ih_vector > APIC_MAX_VECTOR) {
+			DDI_INTR_IMPLDBG((CE_CONT,
+			    "[grp_]set_cpu: vector out of range: %d\n",
+			    hdlp->ih_vector));
+			*result = EINVAL;
+			return (PSM_FAILURE);
+		}
+		if (!(hdlp->ih_flags & PSMGI_INTRBY_IRQ))
+			hdlp->ih_vector = apic_vector_to_irq[hdlp->ih_vector];
 		if (intr_op == PSM_INTR_OP_SET_CPU) {
 			if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) !=
 			    PSM_SUCCESS)
--- a/usr/src/uts/sun4/io/px/px.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4/io/px/px.c	Wed Jul 08 12:59:05 2009 +0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -599,7 +599,8 @@
 	/* Add PME_TO_ACK message handler */
 	hdl.ih_cb_func = (ddi_intr_handler_t *)px_pmeq_intr;
 	if (px_add_msiq_intr(dip, dip, &hdl, MSG_REC,
-	    (msgcode_t)PCIE_PME_ACK_MSG, &px_p->px_pm_msiq_id) != DDI_SUCCESS) {
+	    (msgcode_t)PCIE_PME_ACK_MSG, -1,
+	    &px_p->px_pm_msiq_id) != DDI_SUCCESS) {
 		DBG(DBG_PWR, dip, "px_pwr_setup: couldn't add "
 		    " PME_TO_ACK intr\n");
 		goto pwr_setup_err1;
--- a/usr/src/uts/sun4/io/px/px_ib.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4/io/px/px_ib.c	Wed Jul 08 12:59:05 2009 +0800
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * PX Interrupt Block implementation
  */
@@ -311,10 +309,9 @@
 
 	/* Redistribute device interrupts */
 	mutex_enter(&ib_p->ib_ino_lst_mutex);
+	px_msiq_redist(px_p);
 
 	for (ino_p = ib_p->ib_ino_lst; ino_p; ino_p = ino_p->ino_next_p) {
-		uint32_t orig_cpuid;
-
 		/*
 		 * Recomputes the sum of interrupt weights of devices that
 		 * share the same ino upon first call marked by
@@ -348,12 +345,31 @@
 		if ((weight == ino_p->ino_intr_weight) ||
 		    ((weight >= weight_max) &&
 		    (ino_p->ino_intr_weight >= weight_max))) {
-			orig_cpuid = ino_p->ino_cpuid;
+			uint32_t orig_cpuid = ino_p->ino_cpuid;
+
 			if (cpu[orig_cpuid] == NULL)
 				orig_cpuid = CPU->cpu_id;
 
-			/* select cpuid to target and mark ino established */
-			ino_p->ino_cpuid = intr_dist_cpuid();
+			DBG(DBG_IB, dip, "px_ib_intr_redist: sysino 0x%llx "
+			    "current cpuid 0x%x current default cpuid 0x%x\n",
+			    ino_p->ino_sysino, ino_p->ino_cpuid,
+			    ino_p->ino_default_cpuid);
+
+			/* select target cpuid and mark ino established */
+			if (ino_p->ino_default_cpuid == -1)
+				ino_p->ino_cpuid = ino_p->ino_default_cpuid =
+				    intr_dist_cpuid();
+			else if ((ino_p->ino_cpuid !=
+			    ino_p->ino_default_cpuid) &&
+			    (cpu_intr_on(cpu[ino_p->ino_default_cpuid])))
+				ino_p->ino_cpuid = ino_p->ino_default_cpuid;
+			else if (!cpu_intr_on(cpu[ino_p->ino_cpuid]))
+				ino_p->ino_cpuid = intr_dist_cpuid();
+
+			DBG(DBG_IB, dip, "px_ib_intr_redist: sysino 0x%llx "
+			    "new cpuid 0x%x new default cpuid 0x%x\n",
+			    ino_p->ino_sysino, ino_p->ino_cpuid,
+			    ino_p->ino_default_cpuid);
 
 			/* Add device weight to targeted cpu. */
 			for (ipil_p = ino_p->ino_ipil_p; ipil_p;
@@ -436,30 +452,41 @@
 	return (ino_p);
 }
 
+px_ino_t *
+px_ib_alloc_ino(px_ib_t *ib_p, devino_t ino_num)
+{
+	sysino_t	sysino;
+	px_ino_t	*ino_p;
+
+	if (px_lib_intr_devino_to_sysino(ib_p->ib_px_p->px_dip,
+	    ino_num, &sysino) != DDI_SUCCESS)
+		return (NULL);
+
+	ino_p = kmem_zalloc(sizeof (px_ino_t), KM_SLEEP);
+
+	ino_p->ino_next_p = ib_p->ib_ino_lst;
+	ib_p->ib_ino_lst = ino_p;
+
+	ino_p->ino_ino = ino_num;
+	ino_p->ino_sysino = sysino;
+	ino_p->ino_ib_p = ib_p;
+	ino_p->ino_unclaimed_intrs = 0;
+	ino_p->ino_lopil = 0;
+	ino_p->ino_cpuid = ino_p->ino_default_cpuid = (cpuid_t)-1;
+
+	return (ino_p);
+}
+
 px_ino_pil_t *
 px_ib_new_ino_pil(px_ib_t *ib_p, devino_t ino_num, uint_t pil, px_ih_t *ih_p)
 {
 	px_ino_pil_t	*ipil_p = kmem_zalloc(sizeof (px_ino_pil_t), KM_SLEEP);
 	px_ino_t	*ino_p;
 
-	if ((ino_p = px_ib_locate_ino(ib_p, ino_num)) == NULL) {
-		sysino_t	sysino;
-
-		if (px_lib_intr_devino_to_sysino(ib_p->ib_px_p->px_dip,
-		    ino_num, &sysino) != DDI_SUCCESS)
-			return (NULL);
-
-		ino_p = kmem_zalloc(sizeof (px_ino_t), KM_SLEEP);
+	if ((ino_p = px_ib_locate_ino(ib_p, ino_num)) == NULL)
+		ino_p = px_ib_alloc_ino(ib_p, ino_num);
 
-		ino_p->ino_next_p = ib_p->ib_ino_lst;
-		ib_p->ib_ino_lst = ino_p;
-
-		ino_p->ino_ino = ino_num;
-		ino_p->ino_sysino = sysino;
-		ino_p->ino_ib_p = ib_p;
-		ino_p->ino_unclaimed_intrs = 0;
-		ino_p->ino_lopil = pil;
-	}
+	ASSERT(ino_p != NULL);
 
 	ih_p->ih_next = ih_p;
 	ipil_p->ipil_pil = pil;
@@ -473,7 +500,7 @@
 	ino_p->ino_ipil_p = ipil_p;
 	ino_p->ino_ipil_size++;
 
-	if (ino_p->ino_lopil > pil)
+	if ((ino_p->ino_lopil == 0) || (ino_p->ino_lopil > pil))
 		ino_p->ino_lopil = pil;
 
 	return (ipil_p);
@@ -508,6 +535,7 @@
 			if (pil > next->ipil_pil)
 				pil = next->ipil_pil;
 		}
+
 		/*
 		 * Value stored in pil should be the lowest pil.
 		 */
@@ -517,6 +545,11 @@
 	if (ino_p->ino_ipil_size)
 		return;
 
+	ino_p->ino_lopil = 0;
+
+	if (ino_p->ino_msiq_p)
+		return;
+
 	if (ib_p->ib_ino_lst == ino_p)
 		ib_p->ib_ino_lst = ino_p->ino_next_p;
 	else {
@@ -819,6 +852,242 @@
 }
 
 
+/*
+ * Get interrupt CPU for a given ino.
+ * Return info only for inos which are already mapped to devices.
+ */
+/*ARGSUSED*/
+int
+px_ib_get_intr_target(px_t *px_p, devino_t ino, cpuid_t *cpu_id_p)
+{
+	dev_info_t	*dip = px_p->px_dip;
+	sysino_t	sysino;
+	int		ret;
+
+	DBG(DBG_IB, px_p->px_dip, "px_ib_get_intr_target: devino %x\n", ino);
+
+	/* Convert leaf-wide intr to system-wide intr */
+	if (px_lib_intr_devino_to_sysino(dip, ino, &sysino) != DDI_SUCCESS)
+		return (DDI_FAILURE);
+
+	ret = px_lib_intr_gettarget(dip, sysino, cpu_id_p);
+
+	DBG(DBG_IB, px_p->px_dip, "px_ib_get_intr_target: cpu_id %x\n",
+	    *cpu_id_p);
+
+	return (ret);
+}
+
+
+/*
+ * Associate a new CPU with a given ino.
+ * Operate only on INOs which are already mapped to devices.
+ */
+int
+px_ib_set_intr_target(px_t *px_p, devino_t ino, cpuid_t cpu_id)
+{
+	dev_info_t		*dip = px_p->px_dip;
+	cpuid_t			old_cpu_id;
+	sysino_t		sysino;
+	int			ret = DDI_SUCCESS;
+	extern const int	_ncpu;
+	extern cpu_t		*cpu[];
+
+	DBG(DBG_IB, px_p->px_dip, "px_ib_set_intr_target: devino %x "
+	    "cpu_id %x\n", ino, cpu_id);
+
+	mutex_enter(&cpu_lock);
+
+	/* Convert leaf-wide intr to system-wide intr */
+	if (px_lib_intr_devino_to_sysino(dip, ino, &sysino) != DDI_SUCCESS) {
+		ret = DDI_FAILURE;
+		goto done;
+	}
+
+	if (px_lib_intr_gettarget(dip, sysino, &old_cpu_id) != DDI_SUCCESS) {
+		ret = DDI_FAILURE;
+		goto done;
+	}
+
+	/*
+	 * Get lock, validate cpu and write it.
+	 */
+	if ((cpu_id < _ncpu) && (cpu[cpu_id] && cpu_is_online(cpu[cpu_id]))) {
+		DBG(DBG_IB, dip, "px_ib_set_intr_target: Enabling CPU %d\n",
+		    cpu_id);
+		px_ib_intr_dist_en(dip, cpu_id, ino, B_TRUE);
+		px_ib_log_new_cpu(px_p->px_ib_p, old_cpu_id, cpu_id, ino);
+	} else {	/* Invalid cpu */
+		DBG(DBG_IB, dip, "px_ib_set_intr_target: Invalid cpuid %x\n",
+		    cpu_id);
+		ret = DDI_EINVAL;
+	}
+
+done:
+	mutex_exit(&cpu_lock);
+	return (ret);
+}
+
+hrtime_t px_ib_msix_retarget_timeout = 120ll * NANOSEC;	/* 120 seconds */
+
+/*
+ * Associate a new CPU with a given MSI/X.
+ * Operate only on MSI/Xs which are already mapped to devices.
+ */
+int
+px_ib_set_msix_target(px_t *px_p, ddi_intr_handle_impl_t *hdlp,
+    msinum_t msi_num, cpuid_t cpu_id)
+{
+	px_ib_t			*ib_p = px_p->px_ib_p;
+	px_msi_state_t		*msi_state_p = &px_p->px_ib_p->ib_msi_state;
+	dev_info_t		*dip = px_p->px_dip;
+	dev_info_t		*rdip = hdlp->ih_dip;
+	msiqid_t		msiq_id, old_msiq_id;
+	pci_msi_state_t		msi_state;
+	msiq_rec_type_t		msiq_rec_type;
+	msi_type_t		msi_type;
+	px_ino_t		*ino_p;
+	px_ih_t			*ih_p, *old_ih_p;
+	cpuid_t			old_cpu_id;
+	hrtime_t		start_time, end_time;
+	int			ret = DDI_SUCCESS;
+	extern const int	_ncpu;
+	extern cpu_t		*cpu[];
+
+	DBG(DBG_IB, dip, "px_ib_set_msix_target: msi_num %x new cpu_id %x\n",
+	    msi_num, cpu_id);
+
+	mutex_enter(&cpu_lock);
+
+	/* Check for MSI64 support */
+	if ((hdlp->ih_cap & DDI_INTR_FLAG_MSI64) && msi_state_p->msi_addr64) {
+		msiq_rec_type = MSI64_REC;
+		msi_type = MSI64_TYPE;
+	} else {
+		msiq_rec_type = MSI32_REC;
+		msi_type = MSI32_TYPE;
+	}
+
+	if ((ret = px_lib_msi_getmsiq(dip, msi_num,
+	    &old_msiq_id)) != DDI_SUCCESS) {
+
+		mutex_exit(&cpu_lock);
+		return (ret);
+	}
+
+	DBG(DBG_IB, dip, "px_ib_set_msix_target: current msiq 0x%x\n",
+	    old_msiq_id);
+
+	if ((ret = px_ib_get_intr_target(px_p,
+	    px_msiqid_to_devino(px_p, old_msiq_id),
+	    &old_cpu_id)) != DDI_SUCCESS) {
+
+		mutex_exit(&cpu_lock);
+		return (ret);
+	}
+
+	DBG(DBG_IB, dip, "px_ib_set_msix_target: current cpuid 0x%x\n",
+	    old_cpu_id);
+
+	if (cpu_id == old_cpu_id) {
+
+		mutex_exit(&cpu_lock);
+		return (DDI_SUCCESS);
+	}
+
+	/*
+	 * Get lock, validate cpu and write it.
+	 */
+	if (!((cpu_id < _ncpu) && (cpu[cpu_id] &&
+	    cpu_is_online(cpu[cpu_id])))) {
+		/* Invalid cpu */
+		DBG(DBG_IB, dip, "px_ib_set_msix_target: Invalid cpuid %x\n",
+		    cpu_id);
+
+		mutex_exit(&cpu_lock);
+		return (DDI_EINVAL);
+	}
+
+	DBG(DBG_IB, dip, "px_ib_set_msix_target: Enabling CPU %d\n", cpu_id);
+
+	if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
+	    msiq_rec_type, msi_num, cpu_id, &msiq_id)) != DDI_SUCCESS) {
+		DBG(DBG_IB, dip, "px_ib_set_msix_target: Add MSI handler "
+		    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
+
+		mutex_exit(&cpu_lock);
+		return (ret);
+	}
+
+	if ((ret = px_lib_msi_setmsiq(dip, msi_num,
+	    msiq_id, msi_type)) != DDI_SUCCESS) {
+		(void) px_rem_msiq_intr(dip, rdip,
+		    hdlp, msiq_rec_type, msi_num, msiq_id);
+
+		mutex_exit(&cpu_lock);
+		return (ret);
+	}
+
+	if ((ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
+	    px_msiqid_to_devino(px_p, msiq_id), hdlp->ih_pri,
+	    PX_INTR_STATE_ENABLE, msiq_rec_type, msi_num)) != DDI_SUCCESS) {
+		(void) px_rem_msiq_intr(dip, rdip,
+		    hdlp, msiq_rec_type, msi_num, msiq_id);
+
+		mutex_exit(&cpu_lock);
+		return (ret);
+	}
+
+	mutex_exit(&cpu_lock);
+	mutex_enter(&ib_p->ib_ino_lst_mutex);
+
+	ino_p = px_ib_locate_ino(ib_p, px_msiqid_to_devino(px_p, old_msiq_id));
+	old_ih_p = px_ib_intr_locate_ih(px_ib_ino_locate_ipil(ino_p,
+	    hdlp->ih_pri), rdip, hdlp->ih_inum, msiq_rec_type, msi_num);
+	old_ih_p->ih_retarget_flag = B_TRUE;
+
+	ino_p = px_ib_locate_ino(ib_p, px_msiqid_to_devino(px_p, msiq_id));
+	ih_p = px_ib_intr_locate_ih(px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri),
+	    rdip, hdlp->ih_inum, msiq_rec_type, msi_num);
+	ih_p->ih_retarget_flag = B_TRUE;
+
+	if ((ret = px_lib_msi_getstate(dip, msi_num,
+	    &msi_state)) != DDI_SUCCESS) {
+		(void) px_rem_msiq_intr(dip, rdip,
+		    hdlp, msiq_rec_type, msi_num, msiq_id);
+
+		mutex_exit(&ib_p->ib_ino_lst_mutex);
+		return (ret);
+	}
+
+	if (msi_state == PCI_MSI_STATE_IDLE)
+		ih_p->ih_retarget_flag = B_FALSE;
+
+	start_time = gethrtime();
+	while ((ih_p->ih_retarget_flag == B_TRUE) &&
+	    (old_ih_p->ih_retarget_flag == B_TRUE)) {
+		if ((end_time = (gethrtime() - start_time)) >
+		    px_ib_msix_retarget_timeout) {
+			cmn_err(CE_WARN, "MSIX retarget %x is not completed, "
+			    "even after waiting %llx ticks\n",
+			    msi_num, end_time);
+
+			break;
+		}
+
+		/* Wait for one second */
+		delay(drv_usectohz(1000000));
+	}
+
+	mutex_exit(&ib_p->ib_ino_lst_mutex);
+
+	ret = px_rem_msiq_intr(dip, rdip,
+	    hdlp, msiq_rec_type, msi_num, old_msiq_id);
+
+	return (ret);
+}
+
+
 static void
 px_fill_in_intr_devs(pcitool_intr_dev_t *dev, char *driver_name,
     char *path_name, int instance)
@@ -841,8 +1110,8 @@
  * the px node and (Internal) when it finds no other devices (and *devs_ret > 0)
  */
 uint8_t
-pxtool_ib_get_ino_devs(
-    px_t *px_p, uint32_t ino, uint8_t *devs_ret, pcitool_intr_dev_t *devs)
+pxtool_ib_get_ino_devs(px_t *px_p, uint32_t ino, uint32_t msi_num,
+    uint8_t *devs_ret, pcitool_intr_dev_t *devs)
 {
 	px_ib_t		*ib_p = px_p->px_ib_p;
 	px_ino_t	*ino_p;
@@ -863,7 +1132,17 @@
 			    ((i < ipil_p->ipil_ih_size) && (i < *devs_ret));
 			    i++, j++, ih_p = ih_p->ih_next) {
 				(void) ddi_pathname(ih_p->ih_dip, pathname);
-				px_fill_in_intr_devs(&devs[i],
+
+				if (ih_p->ih_msg_code == msi_num) {
+					num_devs = *devs_ret = 1;
+					px_fill_in_intr_devs(&devs[0],
+					    (char *)ddi_driver_name(
+					    ih_p->ih_dip), pathname,
+					    ddi_get_instance(ih_p->ih_dip));
+					goto done;
+				}
+
+				px_fill_in_intr_devs(&devs[j],
 				    (char *)ddi_driver_name(ih_p->ih_dip),
 				    pathname, ddi_get_instance(ih_p->ih_dip));
 			}
@@ -879,14 +1158,60 @@
 		num_devs = *devs_ret = 1;
 	}
 
+done:
 	mutex_exit(&ib_p->ib_ino_lst_mutex);
 
 	return (num_devs);
 }
 
 
+int
+pxtool_ib_get_msi_info(px_t *px_p, devino_t ino, msinum_t msi_num,
+    ddi_intr_handle_impl_t *hdlp)
+{
+	px_ib_t		*ib_p = px_p->px_ib_p;
+	px_ino_t	*ino_p;
+	px_ino_pil_t	*ipil_p;
+	px_ih_t 	*ih_p;
+	int		i;
+
+	mutex_enter(&ib_p->ib_ino_lst_mutex);
+
+	if ((ino_p = px_ib_locate_ino(ib_p, ino)) == NULL) {
+		mutex_exit(&ib_p->ib_ino_lst_mutex);
+		return (DDI_FAILURE);
+	}
+
+	for (ipil_p = ino_p->ino_ipil_p; ipil_p;
+	    ipil_p = ipil_p->ipil_next_p) {
+		for (i = 0, ih_p = ipil_p->ipil_ih_head;
+		    ((i < ipil_p->ipil_ih_size) && ih_p);
+		    i++, ih_p = ih_p->ih_next) {
+
+			if (ih_p->ih_msg_code != msi_num)
+				continue;
+
+			hdlp->ih_dip = ih_p->ih_dip;
+			hdlp->ih_inum = ih_p->ih_inum;
+			hdlp->ih_cb_func = ih_p->ih_handler;
+			hdlp->ih_cb_arg1 = ih_p->ih_handler_arg1;
+			hdlp->ih_cb_arg2 = ih_p->ih_handler_arg2;
+			if (ih_p->ih_rec_type == MSI64_REC)
+				hdlp->ih_cap = DDI_INTR_FLAG_MSI64;
+			hdlp->ih_pri = ipil_p->ipil_pil;
+			hdlp->ih_ver = DDI_INTR_VERSION;
+
+			mutex_exit(&ib_p->ib_ino_lst_mutex);
+			return (DDI_SUCCESS);
+		}
+	}
+
+	mutex_exit(&ib_p->ib_ino_lst_mutex);
+	return (DDI_FAILURE);
+}
+
 void
-px_ib_log_new_cpu(px_ib_t *ib_p, uint32_t old_cpu_id, uint32_t new_cpu_id,
+px_ib_log_new_cpu(px_ib_t *ib_p, cpuid_t old_cpu_id, cpuid_t new_cpu_id,
     uint32_t ino)
 {
 	px_ino_t	*ino_p;
--- a/usr/src/uts/sun4/io/px/px_ib.h	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4/io/px/px_ib.h	Wed Jul 08 12:59:05 2009 +0800
@@ -19,15 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_SYS_PX_IB_H
 #define	_SYS_PX_IB_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -70,6 +68,7 @@
 	uint_t		ih_intr_state;	/* only used for fixed interrupts */
 	msiq_rec_type_t	ih_rec_type;	/* MSI or PCIe record type */
 	msgcode_t	ih_msg_code;	/* MSI number or PCIe message code */
+	boolean_t	ih_retarget_flag; /* MSI/X retarget flag */
 	px_ih_t		*ih_next;	/* Next entry in list */
 	uint64_t	ih_ticks;	/* ticks spent in this handler */
 	uint64_t	ih_nsec;	/* nsec spent in this handler */
@@ -103,7 +102,8 @@
 	px_ib_t		*ino_ib_p;	/* link back to interrupt block state */
 	uint_t		ino_unclaimed_intrs; /* number of unclaimed intrs */
 	clock_t		ino_spurintr_begin; /* begin time of spurious intr */
-	cpuid_t		ino_cpuid;	/* cpu that ino is targeting */
+	cpuid_t		ino_cpuid;	/* current cpu for this ino */
+	cpuid_t		ino_default_cpuid; /* default cpu for this ino */
 	int32_t		ino_intr_weight; /* intr wt of devices sharing ino */
 	ushort_t	ino_ipil_size;	/* no of px_ino_pil_t sharing ino */
 	ushort_t	ino_lopil;	/* lowest pil sharing ino */
@@ -134,6 +134,7 @@
 extern void px_ib_free_ino_all(px_ib_t *ib_p);
 
 extern px_ino_pil_t *px_ib_ino_locate_ipil(px_ino_t *ino_p, uint_t pil);
+extern px_ino_t *px_ib_alloc_ino(px_ib_t *ib_p, devino_t ino_num);
 extern px_ino_pil_t *px_ib_new_ino_pil(px_ib_t *ib_p, devino_t ino_num,
     uint_t pil, px_ih_t *ih_p);
 extern void px_ib_delete_ino_pil(px_ib_t *ib_p, px_ino_pil_t *ipil_p);
@@ -150,12 +151,16 @@
 extern int px_ib_update_intr_state(px_t *px_p, dev_info_t *rdip, uint_t inum,
 	devino_t ino, uint_t pil, uint_t new_intr_state,
 	msiq_rec_type_t rec_type, msgcode_t msg_code);
-
+extern int px_ib_get_intr_target(px_t *px_p, devino_t ino, cpuid_t *cpu_id_p);
+extern int px_ib_set_intr_target(px_t *px_p, devino_t ino, cpuid_t cpu_id);
+extern int px_ib_set_msix_target(px_t *px_p, ddi_intr_handle_impl_t *hdlp,
+	msinum_t msi_num, cpuid_t cpuid);
 extern uint8_t pxtool_ib_get_ino_devs(px_t *px_p, uint32_t ino,
-	uint8_t *devs_ret, pcitool_intr_dev_t *devs);
-extern void px_ib_log_new_cpu(px_ib_t *ib_p, uint32_t old_cpu_id,
-	uint32_t new_cpu_id, uint32_t ino);
-
+	uint32_t msi_num, uint8_t *devs_ret, pcitool_intr_dev_t *devs);
+extern int pxtool_ib_get_msi_info(px_t *px_p, devino_t ino, msinum_t msi_num,
+	ddi_intr_handle_impl_t *hdlp);
+extern void px_ib_log_new_cpu(px_ib_t *ib_p, cpuid_t old_cpu_id,
+	cpuid_t new_cpu_id, uint32_t ino);
 
 #ifdef	__cplusplus
 }
--- a/usr/src/uts/sun4/io/px/px_intr.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4/io/px/px_intr.c	Wed Jul 08 12:59:05 2009 +0800
@@ -365,6 +365,8 @@
 			DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
 			    void *, handler, caddr_t, arg1, caddr_t, arg2);
 
+			ih_p->ih_retarget_flag = B_FALSE;
+
 			/*
 			 * Special case for PCIE Error Messages.
 			 * The current frame work doesn't fit PCIE Err Msgs
@@ -496,6 +498,13 @@
 	case DDI_INTROP_REMISR:
 		ret = px_rem_intx_intr(dip, rdip, hdlp);
 		break;
+	case DDI_INTROP_GETTARGET:
+		ret = px_ib_get_intr_target(px_p, hdlp->ih_vector,
+		    (cpuid_t *)result);
+		break;
+	case DDI_INTROP_SETTARGET:
+		ret = DDI_ENOTSUP;
+		break;
 	case DDI_INTROP_ENABLE:
 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
 		    hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_ENABLE, 0, 0);
@@ -538,7 +547,7 @@
 	msinum_t		msi_num;
 	msiqid_t		msiq_id;
 	uint_t			nintrs;
-	int			i, ret = DDI_SUCCESS;
+	int			ret = DDI_SUCCESS;
 
 	DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x "
 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
@@ -554,9 +563,15 @@
 		msi_addr = msi_state_p->msi_addr32;
 	}
 
+	(void) px_msi_get_msinum(px_p, hdlp->ih_dip,
+	    (hdlp->ih_flags & DDI_INTR_MSIX_DUP) ? hdlp->ih_main->ih_inum :
+	    hdlp->ih_inum, &msi_num);
+
 	switch (intr_op) {
 	case DDI_INTROP_GETCAP:
 		ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result);
+		if (ret == DDI_SUCCESS)
+			*(int *)result |= DDI_INTR_FLAG_RETARGETABLE;
 		break;
 	case DDI_INTROP_SETCAP:
 		DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n");
@@ -624,12 +639,8 @@
 	case DDI_INTROP_SETPRI:
 		break;
 	case DDI_INTROP_ADDISR:
-		if ((ret = px_msi_get_msinum(px_p, hdlp->ih_dip,
-		    hdlp->ih_inum, &msi_num)) != DDI_SUCCESS)
-			return (ret);
-
 		if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
-		    msiq_rec_type, msi_num, &msiq_id)) != DDI_SUCCESS) {
+		    msiq_rec_type, msi_num, -1, &msiq_id)) != DDI_SUCCESS) {
 			DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler "
 			    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
 			return (ret);
@@ -651,7 +662,14 @@
 			return (ret);
 		}
 
-		hdlp->ih_vector = msi_num;
+		if ((ret = px_lib_msi_setvalid(dip, msi_num,
+		    PCI_MSI_VALID)) != DDI_SUCCESS)
+			return (ret);
+
+		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
+		    px_msiqid_to_devino(px_p, msiq_id), hdlp->ih_pri,
+		    PX_INTR_STATE_ENABLE, msiq_rec_type, msi_num);
+
 		break;
 	case DDI_INTROP_DUPVEC:
 		DBG(DBG_INTROPS, dip, "px_msix_ops: dupisr - inum: %x, "
@@ -661,12 +679,20 @@
 		    hdlp->ih_scratch1);
 		break;
 	case DDI_INTROP_REMISR:
-		msi_num = hdlp->ih_vector;
-
 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
 		    &msiq_id)) != DDI_SUCCESS)
 			return (ret);
 
+		if ((ret = px_ib_update_intr_state(px_p, rdip,
+		    hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id),
+		    hdlp->ih_pri, PX_INTR_STATE_DISABLE, msiq_rec_type,
+		    msi_num)) != DDI_SUCCESS)
+			return (ret);
+
+		if ((ret = px_lib_msi_setvalid(dip, msi_num,
+		    PCI_MSI_INVALID)) != DDI_SUCCESS)
+			return (ret);
+
 		if ((ret = px_lib_msi_setstate(dip, msi_num,
 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS)
 			return (ret);
@@ -674,125 +700,78 @@
 		ret = px_rem_msiq_intr(dip, rdip,
 		    hdlp, msiq_rec_type, msi_num, msiq_id);
 
-		hdlp->ih_vector = 0;
 		break;
-	case DDI_INTROP_ENABLE:
-		msi_num = hdlp->ih_vector;
-
-		if ((ret = px_lib_msi_setvalid(dip, msi_num,
-		    PCI_MSI_VALID)) != DDI_SUCCESS)
+	case DDI_INTROP_GETTARGET:
+		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
+		    &msiq_id)) != DDI_SUCCESS)
 			return (ret);
 
-		if ((pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) ||
-		    (hdlp->ih_type == DDI_INTR_TYPE_MSIX)) {
-			nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
+		ret = px_ib_get_intr_target(px_p,
+		    px_msiqid_to_devino(px_p, msiq_id), (cpuid_t *)result);
+		break;
+	case DDI_INTROP_SETTARGET:
+		ret = px_ib_set_msix_target(px_p, hdlp, msi_num,
+		    *(cpuid_t *)result);
+		break;
+	case DDI_INTROP_ENABLE:
+		/*
+		 * curr_nenables will be greater than 0 if rdip is using
+		 * MSI-X and also, if it is using DUP interface. If this
+		 * curr_enables is > 1, return after clearing the mask bit.
+		 */
+		if ((pci_is_msi_enabled(rdip, hdlp->ih_type) == DDI_SUCCESS) &&
+		    (i_ddi_intr_get_current_nenables(rdip) > 0)) {
+			return (pci_msi_clr_mask(rdip, hdlp->ih_type,
+			    hdlp->ih_inum));
+		}
 
-			if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
-			    nintrs, hdlp->ih_inum, msi_addr,
-			    hdlp->ih_type == DDI_INTR_TYPE_MSIX ?
-			    msi_num : msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
-				return (ret);
+		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
 
-			if ((ret = pci_msi_enable_mode(rdip, hdlp->ih_type))
-			    != DDI_SUCCESS)
-				return (ret);
-		}
+		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
+		    nintrs, hdlp->ih_inum, msi_addr,
+		    hdlp->ih_type == DDI_INTR_TYPE_MSIX ? msi_num :
+		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
+			return (ret);
+
+		if ((ret = pci_msi_enable_mode(rdip,
+		    hdlp->ih_type)) != DDI_SUCCESS)
+			return (ret);
 
 		if ((ret = pci_msi_clr_mask(rdip, hdlp->ih_type,
 		    hdlp->ih_inum)) != DDI_SUCCESS)
 			return (ret);
 
-		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
-			break;
-
-		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
-		    &msiq_id)) != DDI_SUCCESS)
-			return (ret);
-
-		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
-		    px_msiqid_to_devino(px_p, msiq_id), hdlp->ih_pri,
-		    PX_INTR_STATE_ENABLE, msiq_rec_type, msi_num);
-
 		break;
 	case DDI_INTROP_DISABLE:
-		msi_num = hdlp->ih_vector;
-
-		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type,
-		    hdlp->ih_cap & DDI_INTR_FLAG_BLOCK)) != DDI_SUCCESS)
-			return (ret);
-
 		if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type,
 		    hdlp->ih_inum)) != DDI_SUCCESS)
 			return (ret);
 
-		if ((ret = px_lib_msi_setvalid(dip, msi_num,
-		    PCI_MSI_INVALID)) != DDI_SUCCESS)
-			return (ret);
-
-		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
-			break;
+		/*
+		 * curr_nenables will be greater than 1 if rdip is using
+		 * MSI-X and also, if it is using DUP interface. If this
+		 * curr_enables is > 1, return after setting the mask bit.
+		 */
+		if (i_ddi_intr_get_current_nenables(rdip) > 1)
+			return (DDI_SUCCESS);
 
-		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
-		    &msiq_id)) != DDI_SUCCESS)
+		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type))
+		    != DDI_SUCCESS)
 			return (ret);
 
-		ret = px_ib_update_intr_state(px_p, rdip,
-		    hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id),
-		    hdlp->ih_pri, PX_INTR_STATE_DISABLE, msiq_rec_type,
-		    msi_num);
-
 		break;
 	case DDI_INTROP_BLOCKENABLE:
 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
-		msi_num = hdlp->ih_vector;
 
 		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
 		    nintrs, hdlp->ih_inum, msi_addr,
 		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
 			return (ret);
 
-		for (i = 0; i < nintrs; i++, msi_num++) {
-			if ((ret = px_lib_msi_setvalid(dip, msi_num,
-			    PCI_MSI_VALID)) != DDI_SUCCESS)
-				return (ret);
-
-			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
-			    &msiq_id)) != DDI_SUCCESS)
-				return (ret);
-
-			if ((ret = px_ib_update_intr_state(px_p, rdip,
-			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
-			    msiq_id), hdlp->ih_pri, PX_INTR_STATE_ENABLE,
-			    msiq_rec_type, msi_num)) != DDI_SUCCESS)
-				return (ret);
-		}
-
 		ret = pci_msi_enable_mode(rdip, hdlp->ih_type);
 		break;
 	case DDI_INTROP_BLOCKDISABLE:
-		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
-		msi_num = hdlp->ih_vector;
-
-		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type,
-		    hdlp->ih_cap & DDI_INTR_FLAG_BLOCK)) != DDI_SUCCESS)
-			return (ret);
-
-		for (i = 0; i < nintrs; i++, msi_num++) {
-			if ((ret = px_lib_msi_setvalid(dip, msi_num,
-			    PCI_MSI_INVALID)) != DDI_SUCCESS)
-				return (ret);
-
-			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
-			    &msiq_id)) != DDI_SUCCESS)
-				return (ret);
-
-			if ((ret = px_ib_update_intr_state(px_p, rdip,
-			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
-			    msiq_id), hdlp->ih_pri, PX_INTR_STATE_DISABLE,
-			    msiq_rec_type, msi_num)) != DDI_SUCCESS)
-				return (ret);
-		}
-
+		ret = pci_msi_disable_mode(rdip, hdlp->ih_type);
 		break;
 	case DDI_INTROP_SETMASK:
 		ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
@@ -1030,13 +1009,16 @@
 
 	/* Select cpu, saving it for sharing and removal */
 	if (ipil_list == NULL) {
-		ino_p->ino_cpuid = intr_dist_cpuid();
+		if (ino_p->ino_cpuid == -1)
+			ino_p->ino_cpuid = intr_dist_cpuid();
 
 		/* Enable interrupt */
 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
 	}
 
 ino_done:
+	hdlp->ih_target = ino_p->ino_cpuid;
+
 	/* Add weight to the cpu that we are already targeting */
 	weight = pci_class_to_intr_weight(rdip);
 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
@@ -1133,7 +1115,7 @@
 int
 px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
-    msgcode_t msg_code, msiqid_t *msiq_id_p)
+    msgcode_t msg_code, cpuid_t cpu_id, msiqid_t *msiq_id_p)
 {
 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
 	px_ib_t		*ib_p = px_p->px_ib_p;
@@ -1145,23 +1127,27 @@
 	int32_t		weight;
 	int		ret = DDI_SUCCESS;
 
-	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=%x "
-	    "arg1=%x arg2=%x\n", ddi_driver_name(rdip), ddi_get_instance(rdip),
-	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
-
-	if ((ret = px_msiq_alloc(px_p, rec_type, msiq_id_p)) != DDI_SUCCESS) {
-		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
-		    "msiq allocation failed\n");
-		return (ret);
-	}
-
-	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
+	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=0x%x "
+	    "arg1=0x%x arg2=0x%x cpu=0x%x\n", ddi_driver_name(rdip),
+	    ddi_get_instance(rdip), hdlp->ih_cb_func, hdlp->ih_cb_arg1,
+	    hdlp->ih_cb_arg2, cpu_id);
 
 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func,
 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code);
 
 	mutex_enter(&ib_p->ib_ino_lst_mutex);
 
+	ret = (cpu_id == -1) ? px_msiq_alloc(px_p, rec_type, msiq_id_p) :
+	    px_msiq_alloc_based_on_cpuid(px_p, rec_type, cpu_id, msiq_id_p);
+
+	if (ret != DDI_SUCCESS) {
+		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
+		    "msiq allocation failed\n");
+		goto fail;
+	}
+
+	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
+
 	ino_p = px_ib_locate_ino(ib_p, ino);
 	ipil_list = ino_p ? ino_p->ino_ipil_p : NULL;
 
@@ -1221,17 +1207,20 @@
 
 	/* Select cpu, saving it for sharing and removal */
 	if (ipil_list == NULL) {
-		ino_p->ino_cpuid = intr_dist_cpuid();
-
 		/* Enable MSIQ */
 		px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE);
 		px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID);
 
+		if (ino_p->ino_cpuid == -1)
+			ino_p->ino_cpuid = intr_dist_cpuid();
+
 		/* Enable interrupt */
 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
 	}
 
 ino_done:
+	hdlp->ih_target = ino_p->ino_cpuid;
+
 	/* Add weight to the cpu that we are already targeting */
 	weight = pci_class_to_intr_weight(rdip);
 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
@@ -1249,6 +1238,8 @@
 fail2:
 	px_ib_delete_ino_pil(ib_p, ipil_p);
 fail1:
+	(void) px_msiq_free(px_p, *msiq_id_p);
+fail:
 	if (ih_p->ih_config_handle)
 		pci_config_teardown(&ih_p->ih_config_handle);
 
@@ -1309,13 +1300,11 @@
 		if (ino_p->ino_ipil_size == 0)
 			px_lib_msiq_setvalid(dip,
 			    px_devino_to_msiqid(px_p, ino), PCI_MSIQ_INVALID);
-
-		(void) px_msiq_free(px_p, msiq_id);
 	}
 
-	if (ino_p->ino_ipil_size == 0) {
-		kmem_free(ino_p, sizeof (px_ino_t));
-	} else {
+	(void) px_msiq_free(px_p, msiq_id);
+
+	if (ino_p->ino_ipil_size) {
 		/* Re-enable interrupt only if mapping register still shared */
 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
 	}
--- a/usr/src/uts/sun4/io/px/px_intr.h	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4/io/px/px_intr.h	Wed Jul 08 12:59:05 2009 +0800
@@ -44,7 +44,7 @@
 
 extern int px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
 	ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
-	msgcode_t msg_code, msiqid_t *msiq_id_p);
+	msgcode_t msg_code, cpuid_t cpuid, msiqid_t *msiq_id_p);
 extern int px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
 	ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
 	msgcode_t msg_code, msiqid_t msiq_id);
--- a/usr/src/uts/sun4/io/px/px_msiq.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4/io/px/px_msiq.c	Wed Jul 08 12:59:05 2009 +0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -32,6 +32,7 @@
 #include <sys/conf.h>
 #include <sys/ddi.h>
 #include <sys/sunddi.h>
+#include <sys/machsystm.h>	/* intr_dist_add */
 #include <sys/modctl.h>
 #include <sys/disp.h>
 #include <sys/stat.h>
@@ -46,7 +47,8 @@
 int
 px_msiq_attach(px_t *px_p)
 {
-	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
+	px_ib_t		*ib_p = px_p->px_ib_p;
+	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
 	int		i, ret = DDI_SUCCESS;
 
 	DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_attach\n");
@@ -68,11 +70,8 @@
 	    msiq_state_p->msiq_msg_qcnt;
 
 	msiq_state_p->msiq_1st_msi_qid = msiq_state_p->msiq_1st_msiq_id;
-	msiq_state_p->msiq_next_msi_qid = msiq_state_p->msiq_1st_msi_qid;
-
 	msiq_state_p->msiq_1st_msg_qid = msiq_state_p->msiq_1st_msiq_id +
 	    msiq_state_p->msiq_msi_qcnt;
-	msiq_state_p->msiq_next_msg_qid = msiq_state_p->msiq_1st_msg_qid;
 
 	mutex_init(&msiq_state_p->msiq_mutex, NULL, MUTEX_DRIVER, NULL);
 	msiq_state_p->msiq_p = kmem_zalloc(msiq_state_p->msiq_cnt *
@@ -83,11 +82,14 @@
 		    msiq_state_p->msiq_1st_msiq_id + i;
 		msiq_state_p->msiq_p[i].msiq_refcnt = 0;
 		msiq_state_p->msiq_p[i].msiq_state = MSIQ_STATE_FREE;
+		(void) px_ib_alloc_ino(ib_p, px_msiqid_to_devino(px_p,
+		    msiq_state_p->msiq_p[i].msiq_id));
 	}
 
 	if ((ret = px_lib_msiq_init(px_p->px_dip)) != DDI_SUCCESS)
 		px_msiq_detach(px_p);
 
+	msiq_state_p->msiq_redist_flag = B_TRUE;
 	return (ret);
 }
 
@@ -110,11 +112,11 @@
 	kmem_free(msiq_state_p->msiq_p,
 	    msiq_state_p->msiq_cnt * sizeof (px_msiq_t));
 
-	bzero(&px_p->px_ib_p->ib_msiq_state, sizeof (px_msiq_state_t));
+	bzero(msiq_state_p, sizeof (px_msiq_state_t));
 }
 
 /*
- * px_msiq_detach()
+ * px_msiq_resume()
  */
 void
 px_msiq_resume(px_t *px_p)
@@ -123,7 +125,8 @@
 	int		i;
 
 	for (i = 0; i < msiq_state_p->msiq_cnt; i++) {
-		(void) px_lib_msiq_gethead(px_p->px_dip, i,
+		(void) px_lib_msiq_gethead(px_p->px_dip,
+		    msiq_state_p->msiq_p[i].msiq_id,
 		    &msiq_state_p->msiq_p[i].msiq_curr_head_index);
 		msiq_state_p->msiq_p[i].msiq_new_head_index = 0;
 		msiq_state_p->msiq_p[i].msiq_recs2process = 0;
@@ -136,53 +139,128 @@
 int
 px_msiq_alloc(px_t *px_p, msiq_rec_type_t rec_type, msiqid_t *msiq_id_p)
 {
-	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
-	msiqid_t	first_msiq_id, *next_msiq_index;
+	px_ib_t		*ib_p = px_p->px_ib_p;
+	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
+	msiqid_t	first_msiq_id;
 	uint_t		msiq_cnt;
+	ushort_t	least_refcnt;
 	int		i;
 
 	DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_alloc\n");
 
+	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
+	mutex_enter(&msiq_state_p->msiq_mutex);
+
+	if (rec_type == MSG_REC) {
+		msiq_cnt = msiq_state_p->msiq_msg_qcnt;
+		first_msiq_id = msiq_state_p->msiq_1st_msg_qid;
+	} else {
+		msiq_cnt = msiq_state_p->msiq_msi_qcnt;
+		first_msiq_id = msiq_state_p->msiq_1st_msi_qid;
+	}
+
+	*msiq_id_p = first_msiq_id;
+	least_refcnt = msiq_state_p->msiq_p[first_msiq_id].msiq_refcnt;
+
+	/* Allocate MSIQs */
+	for (i = first_msiq_id; i < (first_msiq_id + msiq_cnt); i++) {
+		if (msiq_state_p->msiq_p[i].msiq_state == MSIQ_STATE_FREE) {
+			msiq_state_p->msiq_p[i].msiq_state = MSIQ_STATE_INUSE;
+			(void) px_lib_msiq_gethead(px_p->px_dip, i,
+			    &msiq_state_p->msiq_p[i].msiq_curr_head_index);
+			*msiq_id_p = msiq_state_p->msiq_p[i].msiq_id;
+			break;
+		}
+
+		if (least_refcnt > msiq_state_p->msiq_p[i].msiq_refcnt) {
+			*msiq_id_p = msiq_state_p->msiq_p[i].msiq_id;
+			least_refcnt = msiq_state_p->msiq_p[i].msiq_refcnt;
+		}
+	}
+
+	msiq_state_p->msiq_p[*msiq_id_p].msiq_refcnt++;
+
+	DBG(DBG_MSIQ, px_p->px_dip,
+	    "px_msiq_alloc: msiq_id 0x%x\n", *msiq_id_p);
+
+	mutex_exit(&msiq_state_p->msiq_mutex);
+	return (DDI_SUCCESS);
+}
+
+/*
+ * px_msiq_alloc_based_on_cpuid()
+ */
+int
+px_msiq_alloc_based_on_cpuid(px_t *px_p, msiq_rec_type_t rec_type,
+    cpuid_t cpuid, msiqid_t *msiq_id_p)
+{
+	px_ib_t		*ib_p = px_p->px_ib_p;
+	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
+	msiqid_t	first_msiq_id, free_msiq_id;
+	uint_t		msiq_cnt;
+	ushort_t	least_refcnt;
+	px_ino_t	*ino_p;
+	int		i;
+
+	DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_alloc_based_on_cpuid: "
+	    "cpuid 0x%x\n", cpuid);
+
+	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
+
 	mutex_enter(&msiq_state_p->msiq_mutex);
 
 	if (rec_type == MSG_REC) {
 		msiq_cnt = msiq_state_p->msiq_msg_qcnt;
 		first_msiq_id = msiq_state_p->msiq_1st_msg_qid;
-		next_msiq_index = &msiq_state_p->msiq_next_msg_qid;
 	} else {
 		msiq_cnt = msiq_state_p->msiq_msi_qcnt;
 		first_msiq_id = msiq_state_p->msiq_1st_msi_qid;
-		next_msiq_index = &msiq_state_p->msiq_next_msi_qid;
 	}
 
+	*msiq_id_p = free_msiq_id = (msiqid_t)-1;
+	least_refcnt = (ushort_t)-1;
+
 	/* Allocate MSIQs */
 	for (i = first_msiq_id; i < (first_msiq_id + msiq_cnt); i++) {
-		if (msiq_state_p->msiq_p[i].msiq_state == MSIQ_STATE_FREE) {
-			msiq_state_p->msiq_p[i].msiq_state = MSIQ_STATE_INUSE;
-			msiq_state_p->msiq_p[i].msiq_refcnt = 1;
-			(void) px_lib_msiq_gethead(px_p->px_dip, i,
-			    &msiq_state_p->msiq_p[i].msiq_curr_head_index);
-			break;
+		ino_p = px_ib_locate_ino(ib_p, px_msiqid_to_devino(px_p, i));
+
+		if ((ino_p->ino_cpuid == cpuid) &&
+		    (least_refcnt > msiq_state_p->msiq_p[i].msiq_refcnt)) {
+			*msiq_id_p = msiq_state_p->msiq_p[i].msiq_id;
+			least_refcnt = msiq_state_p->msiq_p[i].msiq_refcnt;
 		}
+
+		if ((*msiq_id_p == -1) && (free_msiq_id == -1) &&
+		    (msiq_state_p->msiq_p[i].msiq_state == MSIQ_STATE_FREE))
+			free_msiq_id = msiq_state_p->msiq_p[i].msiq_id;
 	}
 
-	/*
-	 * There are no free MSIQ.
-	 * Use next available MSIQ.
-	 */
-	if (i >= (first_msiq_id + msiq_cnt)) {
-		i = *next_msiq_index;
-		msiq_state_p->msiq_p[i].msiq_refcnt++;
+	if (*msiq_id_p == -1) {
+		if (free_msiq_id == -1) {
+			DBG(DBG_MSIQ, px_p->px_dip,
+			    "px_msiq_alloc_based_on_cpuid: No EQ is available "
+			    "for CPU 0x%x\n", cpuid);
+
+			mutex_exit(&msiq_state_p->msiq_mutex);
+			return (DDI_EINVAL);
+		}
+
+		*msiq_id_p = free_msiq_id;
+		ino_p = px_ib_locate_ino(ib_p,
+		    px_msiqid_to_devino(px_p, *msiq_id_p));
+		ino_p->ino_cpuid = ino_p->ino_default_cpuid = cpuid;
 	}
 
-	*msiq_id_p = msiq_state_p->msiq_p[i].msiq_id;
-	DBG(DBG_MSIQ, px_p->px_dip,
-	    "px_msiq_alloc: msiq_id 0x%x\n", *msiq_id_p);
+	if (msiq_state_p->msiq_p[*msiq_id_p].msiq_state == MSIQ_STATE_FREE) {
+		msiq_state_p->msiq_p[*msiq_id_p].msiq_state = MSIQ_STATE_INUSE;
+		(void) px_lib_msiq_gethead(px_p->px_dip, *msiq_id_p,
+		    &msiq_state_p->msiq_p[*msiq_id_p].msiq_curr_head_index);
+	}
 
-	(*next_msiq_index)++;
+	msiq_state_p->msiq_p[*msiq_id_p].msiq_refcnt++;
 
-	if (*next_msiq_index >= (first_msiq_id + msiq_cnt))
-		*next_msiq_index = first_msiq_id;
+	DBG(DBG_MSIQ, px_p->px_dip,
+	    "px_msiq_alloc_based_on_cpuid: msiq_id 0x%x\n", *msiq_id_p);
 
 	mutex_exit(&msiq_state_p->msiq_mutex);
 	return (DDI_SUCCESS);
@@ -194,16 +272,20 @@
 int
 px_msiq_free(px_t *px_p, msiqid_t msiq_id)
 {
-	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
+	px_ib_t		*ib_p = px_p->px_ib_p;
+	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
 
 	DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_free: msiq_id 0x%x", msiq_id);
 
+	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
 	mutex_enter(&msiq_state_p->msiq_mutex);
 
 	if ((msiq_id < msiq_state_p->msiq_1st_msiq_id) || (msiq_id >=
 	    (msiq_state_p->msiq_1st_msiq_id + msiq_state_p->msiq_cnt))) {
 		DBG(DBG_MSIQ, px_p->px_dip,
 		    "px_msiq_free: Invalid msiq_id 0x%x", msiq_id);
+
+		mutex_exit(&msiq_state_p->msiq_mutex);
 		return (DDI_FAILURE);
 	}
 
@@ -215,6 +297,45 @@
 }
 
 /*
+ * px_msiq_redist()
+ */
+void
+px_msiq_redist(px_t *px_p)
+{
+	px_ib_t		*ib_p = px_p->px_ib_p;
+	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
+	px_ino_t	*ino_p;
+	int		i;
+
+	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
+
+	mutex_enter(&msiq_state_p->msiq_mutex);
+
+	if (msiq_state_p->msiq_redist_flag == B_FALSE) {
+		mutex_exit(&msiq_state_p->msiq_mutex);
+		return;
+	}
+
+	for (i = 0; i < msiq_state_p->msiq_cnt; i++) {
+		ino_p = px_ib_locate_ino(ib_p,
+		    px_msiqid_to_devino(px_p, msiq_state_p->msiq_p[i].msiq_id));
+
+		if (ino_p) {
+			ino_p->ino_cpuid = ino_p->ino_default_cpuid =
+			    intr_dist_cpuid();
+
+			DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_redist: "
+			    "sysino 0x%llx current cpuid 0x%x "
+			    "default cpuid 0x%x\n", ino_p->ino_sysino,
+			    ino_p->ino_cpuid, ino_p->ino_default_cpuid);
+		}
+	}
+
+	msiq_state_p->msiq_redist_flag = B_FALSE;
+	mutex_exit(&msiq_state_p->msiq_mutex);
+}
+
+/*
  * px_msiqid_to_devino()
  */
 devino_t
--- a/usr/src/uts/sun4/io/px/px_msiq.h	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4/io/px/px_msiq.h	Wed Jul 08 12:59:05 2009 +0800
@@ -19,15 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_SYS_PX_MSIQ_H
 #define	_SYS_PX_MSIQ_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -59,16 +57,15 @@
 	uint_t		msiq_rec_cnt;	/* # of records per MSIQ */
 	msiqid_t	msiq_1st_msiq_id; /* First MSIQ ID */
 	devino_t	msiq_1st_devino; /* First devino */
+	boolean_t	msiq_redist_flag; /* Flag to redist MSIQs */
 
 	/* MSIQs specific reserved for MSI/Xs */
 	uint_t		msiq_msi_qcnt;	/* # of MSIQs for MSI/Xs */
 	msiqid_t	msiq_1st_msi_qid; /* First MSIQ ID for MSI/Xs */
-	msiqid_t	msiq_next_msi_qid; /* Next MSIQ index for MSI/Xs */
 
 	/* MSIQs specific reserved for PCIe messages */
 	uint_t		msiq_msg_qcnt;	/* # of MSIQs for PCIe msgs */
 	msiqid_t	msiq_1st_msg_qid; /* First MSIQ ID for PCIe msgs */
-	msiqid_t	msiq_next_msg_qid; /* Next MSIQ index for PCIe msgs */
 
 	px_msiq_t	*msiq_p;	/* Pointer to MSIQs array */
 	void		*msiq_buf_p; /* Pointer to MSIQs array */
@@ -98,7 +95,11 @@
 
 extern	int	px_msiq_alloc(px_t *px_p, msiq_rec_type_t rec_type,
 		    msiqid_t *msiq_id_p);
+extern	int	px_msiq_alloc_based_on_cpuid(px_t *px_p,
+		    msiq_rec_type_t rec_type, cpuid_t cpuid,
+		    msiqid_t *msiq_id_p);
 extern	int	px_msiq_free(px_t *px_p, msiqid_t msiq_id);
+extern	void	px_msiq_redist(px_t *px_p);
 
 extern  devino_t px_msiqid_to_devino(px_t *px_p, msiqid_t msiq_id);
 extern  msiqid_t px_devino_to_msiqid(px_t *px_p, devino_t devino);
--- a/usr/src/uts/sun4/io/px/px_pec.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4/io/px/px_pec.c	Wed Jul 08 12:59:05 2009 +0800
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * PCI Express PEC implementation:
  *	initialization
@@ -181,7 +179,7 @@
 	hdl.ih_pri = PX_ERR_LOW_PIL;
 
 	if ((ret = px_add_msiq_intr(dip, dip, &hdl,
-	    MSG_REC, (msgcode_t)PCIE_CORR_MSG,
+	    MSG_REC, (msgcode_t)PCIE_CORR_MSG, -1,
 	    &pec_p->pec_corr_msg_msiq_id)) != DDI_SUCCESS) {
 		DBG(DBG_MSG, px_p->px_dip,
 		    "PCIE_CORR_MSG registration failed\n");
@@ -204,7 +202,7 @@
 	hdl.ih_pri = PX_ERR_PIL;
 
 	if ((ret = px_add_msiq_intr(dip, dip, &hdl,
-	    MSG_REC, (msgcode_t)PCIE_NONFATAL_MSG,
+	    MSG_REC, (msgcode_t)PCIE_NONFATAL_MSG, -1,
 	    &pec_p->pec_non_fatal_msg_msiq_id)) != DDI_SUCCESS) {
 		DBG(DBG_MSG, px_p->px_dip,
 		    "PCIE_NONFATAL_MSG registration failed\n");
@@ -228,7 +226,7 @@
 	hdl.ih_pri = PX_ERR_PIL;
 
 	if ((ret = px_add_msiq_intr(dip, dip, &hdl,
-	    MSG_REC, (msgcode_t)PCIE_FATAL_MSG,
+	    MSG_REC, (msgcode_t)PCIE_FATAL_MSG, -1,
 	    &pec_p->pec_fatal_msg_msiq_id)) != DDI_SUCCESS) {
 		DBG(DBG_MSG, px_p->px_dip,
 		    "PCIE_FATAL_MSG registration failed\n");
--- a/usr/src/uts/sun4/io/px/px_tools.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4/io/px/px_tools.c	Wed Jul 08 12:59:05 2009 +0800
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/cpuvar.h>
@@ -68,26 +66,13 @@
 
 int	pci_num_bars = sizeof (pci_bars) / sizeof (pci_bars[0]);
 
-/*
- * Validate the cpu_id passed in.
- * A value of 1 will be returned for success and zero for failure.
- */
-static int
-pxtool_validate_cpuid(uint32_t cpuid)
-{
-	extern const int _ncpu;
-	extern cpu_t	*cpu[];
-
-	ASSERT(mutex_owned(&cpu_lock));
-
-	return ((cpuid < _ncpu) && (cpu[cpuid] && cpu_is_online(cpu[cpuid])));
-}
-
 
 /*ARGSUSED*/
 static int
 pxtool_intr_info(dev_info_t *dip, void *arg, int mode)
 {
+	px_t *px_p = DIP_TO_STATE(dip);
+	px_msi_state_t	*msi_state_p = &px_p->px_ib_p->ib_msi_state;
 	pcitool_intr_info_t intr_info;
 	int rval = SUCCESS;
 
@@ -99,7 +84,10 @@
 
 	intr_info.ctlr_version = 0;	/* XXX how to get real version? */
 	intr_info.ctlr_type = PCITOOL_CTLR_TYPE_RISC;
-	intr_info.num_intr = pxtool_num_inos;
+	if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI)
+		intr_info.num_intr = msi_state_p->msi_cnt;
+	else
+		intr_info.num_intr = pxtool_num_inos;
 
 	intr_info.drvr_version = PCITOOL_VERSION;
 	if (ddi_copyout(&intr_info, arg, sizeof (pcitool_intr_info_t), mode) !=
@@ -125,44 +113,65 @@
 {
 	/* Array part isn't used here, but oh well... */
 	pcitool_intr_get_t partial_iget;
-	uint32_t ino;
-	uint8_t num_devs_ret;
+	pcitool_intr_get_t *iget = &partial_iget;
 	int copyout_rval;
 	sysino_t sysino;
 	intr_valid_state_t intr_valid_state;
 	cpuid_t old_cpu_id;
 	px_t *px_p = DIP_TO_STATE(dip);
-	pcitool_intr_get_t *iget = &partial_iget;
 	size_t	iget_kmem_alloc_size = 0;
-	int rval = SUCCESS;
+	int rval = EIO;
 
 	/* Read in just the header part, no array section. */
 	if (ddi_copyin(arg, &partial_iget, PCITOOL_IGET_SIZE(0), mode) !=
 	    DDI_SUCCESS)
 		return (EFAULT);
 
-	ino = partial_iget.ino;
-	num_devs_ret = partial_iget.num_devs_ret;
+	iget->status = PCITOOL_IO_ERROR;
+
+	if (iget->flags & PCITOOL_INTR_FLAG_GET_MSI) {
+		px_msi_state_t	*msi_state_p = &px_p->px_ib_p->ib_msi_state;
+		pci_msi_valid_state_t	msi_state;
+		msiqid_t	msiq_id;
 
-	partial_iget.num_devs_ret = 0;		/* Assume error for now. */
-	partial_iget.status = PCITOOL_INVALID_INO;
-	rval = EINVAL;
+		if ((iget->msi < msi_state_p->msi_1st_msinum) ||
+		    (iget->msi >= (msi_state_p->msi_1st_msinum +
+		    msi_state_p->msi_cnt))) {
+			iget->status = PCITOOL_INVALID_MSI;
+			rval = EINVAL;
+			goto done_get_intr;
+		}
+
+		if ((px_lib_msi_getvalid(dip, iget->msi,
+		    &msi_state) != DDI_SUCCESS) ||
+		    (msi_state != PCI_MSI_VALID))
+			goto done_get_intr;
+
+		if (px_lib_msi_getmsiq(dip, iget->msi,
+		    &msiq_id) != DDI_SUCCESS)
+			goto done_get_intr;
+
+		iget->ino = px_msiqid_to_devino(px_p, msiq_id);
+	} else {
+		iget->msi = (uint32_t)-1;
+	}
 
 	/* Validate argument. */
-	if (partial_iget.ino > pxtool_num_inos) {
+	if (iget->ino > pxtool_num_inos) {
+		iget->status = PCITOOL_INVALID_INO;
+		rval = EINVAL;
 		goto done_get_intr;
 	}
 
 	/* Caller wants device information returned. */
-	if (num_devs_ret > 0) {
-
+	if (iget->num_devs_ret > 0) {
 		/*
 		 * Allocate room.
 		 * Note if num_devs == 0 iget remains pointing to
 		 * partial_iget.
 		 */
-		iget_kmem_alloc_size = PCITOOL_IGET_SIZE(num_devs_ret);
-		iget = kmem_alloc(iget_kmem_alloc_size, KM_SLEEP);
+		iget_kmem_alloc_size = PCITOOL_IGET_SIZE(iget->num_devs_ret);
+		iget = kmem_zalloc(iget_kmem_alloc_size, KM_SLEEP);
 
 		/* Read in whole structure to verify there's room. */
 		if (ddi_copyin(arg, iget, iget_kmem_alloc_size, mode) !=
@@ -175,21 +184,17 @@
 		}
 	}
 
-	bzero(iget, PCITOOL_IGET_SIZE(num_devs_ret));
-	iget->ino = ino;
-	iget->num_devs_ret = num_devs_ret;
-
 	/* Convert leaf-wide intr to system-wide intr */
-	if (px_lib_intr_devino_to_sysino(dip, iget->ino, &sysino) ==
-	    DDI_FAILURE) {
+	if (px_lib_intr_devino_to_sysino(dip, iget->ino, &sysino) !=
+	    DDI_SUCCESS) {
 		iget->status = PCITOOL_IO_ERROR;
 		rval = EIO;
 		goto done_get_intr;
 	}
 
 	/* Operate only on inos which are already enabled. */
-	if (px_lib_intr_getvalid(dip, sysino, &intr_valid_state) ==
-	    DDI_FAILURE) {
+	if (px_lib_intr_getvalid(dip, sysino, &intr_valid_state) !=
+	    DDI_SUCCESS) {
 		iget->status = PCITOOL_IO_ERROR;
 		rval = EIO;
 		goto done_get_intr;
@@ -200,20 +205,20 @@
 	 * as well as those mapped to devices.
 	 */
 	if (intr_valid_state == INTR_VALID) {
-
 		/*
 		 * The following looks up the px_ino and returns
 		 * info of devices mapped to this ino.
 		 */
-		iget->num_devs = pxtool_ib_get_ino_devs(
-		    px_p, ino, &iget->num_devs_ret, iget->dev);
+		iget->num_devs = pxtool_ib_get_ino_devs(px_p, iget->ino,
+		    iget->msi, &iget->num_devs_ret, iget->dev);
 
-		if (px_lib_intr_gettarget(dip, sysino, &old_cpu_id) ==
-		    DDI_FAILURE) {
+		if (px_ib_get_intr_target(px_p, iget->ino,
+		    &old_cpu_id) != DDI_SUCCESS) {
 			iget->status = PCITOOL_IO_ERROR;
 			rval = EIO;
 			goto done_get_intr;
 		}
+
 		iget->cpu_id = old_cpu_id;
 	}
 
@@ -223,7 +228,7 @@
 done_get_intr:
 	iget->drvr_version = PCITOOL_VERSION;
 	copyout_rval =
-	    ddi_copyout(iget, arg, PCITOOL_IGET_SIZE(num_devs_ret), mode);
+	    ddi_copyout(iget, arg, PCITOOL_IGET_SIZE(iget->num_devs_ret), mode);
 
 	if (iget_kmem_alloc_size > 0)
 		kmem_free(iget, iget_kmem_alloc_size);
@@ -246,10 +251,11 @@
 	pcitool_intr_set_t iset;
 	cpuid_t old_cpu_id;
 	sysino_t sysino;
+	intr_valid_state_t intr_valid_state;
 	px_t *px_p = DIP_TO_STATE(dip);
-	px_ib_t *ib_p = px_p->px_ib_p;
-	uint8_t zero = 0;
-	int rval = SUCCESS;
+	msiqid_t msiq_id;
+	int rval = EIO;
+	int ret = DDI_SUCCESS;
 	size_t copyinout_size;
 
 	bzero(&iset, sizeof (pcitool_intr_set_t));
@@ -276,57 +282,108 @@
 		goto done_set_intr;
 	}
 
-	if (iset.flags & PCITOOL_INTR_SET_FLAG_GROUP) {
+	if (iset.flags & PCITOOL_INTR_FLAG_SET_GROUP) {
 		iset.status = PCITOOL_IO_ERROR;
 		rval = ENOTSUP;
 		goto done_set_intr;
 	}
 
-	iset.status = PCITOOL_INVALID_INO;
-	rval = EINVAL;
+	iset.status = PCITOOL_IO_ERROR;
+
+	if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) {
+		px_msi_state_t	*msi_state_p = &px_p->px_ib_p->ib_msi_state;
+		pci_msi_valid_state_t	msi_state;
+
+		if ((iset.msi < msi_state_p->msi_1st_msinum) ||
+		    (iset.msi >= (msi_state_p->msi_1st_msinum +
+		    msi_state_p->msi_cnt))) {
+			iset.status = PCITOOL_INVALID_MSI;
+			rval = EINVAL;
+			goto done_set_intr;
+		}
+
+		if ((px_lib_msi_getvalid(dip, iset.msi,
+		    &msi_state) != DDI_SUCCESS) ||
+		    (msi_state != PCI_MSI_VALID))
+			goto done_set_intr;
+
+		if (px_lib_msi_getmsiq(dip, iset.msi,
+		    &msiq_id) != DDI_SUCCESS)
+			goto done_set_intr;
+
+		iset.ino = px_msiqid_to_devino(px_p, msiq_id);
+	} else {
+		iset.msi = (uint32_t)-1;
+	}
 
 	/* Validate input argument. */
-	if (iset.ino > pxtool_num_inos)
+	if (iset.ino > pxtool_num_inos) {
+		iset.status = PCITOOL_INVALID_INO;
+		rval = EINVAL;
+		goto done_set_intr;
+	}
+
+	/* Convert leaf-wide intr to system-wide intr */
+	if (px_lib_intr_devino_to_sysino(dip, iset.ino, &sysino) !=
+	    DDI_SUCCESS)
 		goto done_set_intr;
 
-	/* Validate that ino given belongs to a device. */
-	if (pxtool_ib_get_ino_devs(px_p, iset.ino, &zero, NULL) == 0)
+	/* Operate only on inos which are already enabled. */
+	if ((px_lib_intr_getvalid(dip, sysino, &intr_valid_state) !=
+	    DDI_SUCCESS) || (intr_valid_state == INTR_NOTVALID))
 		goto done_set_intr;
 
 	/*
-	 * Get lock, validate cpu and write new mapreg value.
-	 * Return original cpu value to caller via iset.cpu.
+	 * Consider all valid inos: those mapped to the root complex itself
+	 * as well as those mapped to devices.
 	 */
-	mutex_enter(&cpu_lock);
-	if (pxtool_validate_cpuid(iset.cpu_id)) {
+	if (px_lib_intr_gettarget(dip, sysino, &old_cpu_id) != DDI_SUCCESS)
+		goto done_set_intr;
 
-		DBG(DBG_TOOLS, dip, "Enabling CPU %d\n", iset.cpu_id);
+	if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) {
+		ddi_intr_handle_impl_t	hdle;
 
-		if (px_lib_intr_devino_to_sysino(dip, iset.ino, &sysino) ==
-		    DDI_FAILURE)
-			goto done_set_intr;
-
-		if (px_lib_intr_gettarget(dip, sysino, &old_cpu_id) ==
-		    DDI_FAILURE)
+		bzero(&hdle, sizeof (ddi_intr_handle_impl_t));
+		if (pxtool_ib_get_msi_info(px_p, iset.ino, iset.msi,
+		    &hdle) != DDI_SUCCESS) {
+			iset.status = PCITOOL_INVALID_MSI;
+			rval = EINVAL;
 			goto done_set_intr;
-
-		px_ib_intr_dist_en(dip, iset.cpu_id, iset.ino, B_TRUE);
-
-		px_ib_log_new_cpu(ib_p, old_cpu_id, iset.cpu_id, iset.ino);
+		}
 
-		iset.cpu_id = old_cpu_id;
-		iset.status = PCITOOL_SUCCESS;
-		rval = SUCCESS;
+		if ((ret = px_ib_set_msix_target(px_p, &hdle, iset.msi,
+		    iset.cpu_id)) == DDI_SUCCESS) {
+			(void) px_lib_msi_getmsiq(dip, iset.msi, &msiq_id);
+			iset.ino = px_msiqid_to_devino(px_p, msiq_id);
+			iset.cpu_id = old_cpu_id;
+			iset.status = PCITOOL_SUCCESS;
+			rval = SUCCESS;
+			goto done_set_intr;
+		}
+	} else {
+		if ((ret = px_ib_set_intr_target(px_p, iset.ino,
+		    iset.cpu_id)) == DDI_SUCCESS) {
+			iset.cpu_id = old_cpu_id;
+			iset.status = PCITOOL_SUCCESS;
+			rval = SUCCESS;
+			goto done_set_intr;
+		}
+	}
 
-	} else {	/* Invalid cpu.  Restore original register image. */
-
-		DBG(DBG_TOOLS, dip,
-		    "Invalid cpuid: writing orig mapreg value\n");
-
+	switch (ret) {
+	case DDI_EPENDING:
+		iset.status = PCITOOL_PENDING_INTRTIMEOUT;
+		rval = ETIME;
+		break;
+	case DDI_EINVAL:
 		iset.status = PCITOOL_INVALID_CPUID;
 		rval = EINVAL;
+		break;
+	default:
+		iset.status = PCITOOL_IO_ERROR;
+		rval = EIO;
+		break;
 	}
-	mutex_exit(&cpu_lock);
 
 done_set_intr:
 	iset.drvr_version = PCITOOL_VERSION;
--- a/usr/src/uts/sun4/os/ddi_impl.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4/os/ddi_impl.c	Wed Jul 08 12:59:05 2009 +0800
@@ -743,6 +743,8 @@
 	switch (op) {
 	case DDI_INTROP_ADDISR:
 	case DDI_INTROP_REMISR:
+	case DDI_INTROP_GETTARGET:
+	case DDI_INTROP_SETTARGET:
 	case DDI_INTROP_ENABLE:
 	case DDI_INTROP_DISABLE:
 	case DDI_INTROP_BLOCKENABLE:
--- a/usr/src/uts/sun4u/io/pci/pci.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4u/io/pci/pci.c	Wed Jul 08 12:59:05 2009 +0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -1298,8 +1298,8 @@
 pci_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
     ddi_intr_handle_impl_t *hdlp, void *result)
 {
-	pci_t		*pci_p = get_pci_soft_state(
-	    ddi_get_instance(dip));
+	pci_t		*pci_p = get_pci_soft_state(ddi_get_instance(dip));
+	ib_ino_t	ino;
 	int		ret = DDI_SUCCESS;
 
 	switch (intr_op) {
@@ -1327,6 +1327,14 @@
 	case DDI_INTROP_REMISR:
 		ret = pci_remove_intr(dip, rdip, hdlp);
 		break;
+	case DDI_INTROP_GETTARGET:
+		ino = IB_MONDO_TO_INO(pci_xlate_intr(dip, rdip,
+		    pci_p->pci_ib_p, IB_MONDO_TO_INO(hdlp->ih_vector)));
+		ret = ib_get_intr_target(pci_p, ino, (int *)result);
+		break;
+	case DDI_INTROP_SETTARGET:
+		ret = DDI_ENOTSUP;
+		break;
 	case DDI_INTROP_ENABLE:
 		ret = ib_update_intr_state(pci_p, rdip, hdlp,
 		    PCI_INTR_STATE_ENABLE);
--- a/usr/src/uts/sun4u/io/pci/pci_ib.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4u/io/pci/pci_ib.c	Wed Jul 08 12:59:05 2009 +0800
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * PCI Interrupt Block (RISCx) implementation
  *	initialization
@@ -895,6 +893,122 @@
 }
 
 /*
+ * Get interrupt CPU for a given ino.
+ * Return info only for inos which are already mapped to devices.
+ */
+/*ARGSUSED*/
+int
+ib_get_intr_target(pci_t *pci_p, ib_ino_t ino, int *cpu_id_p)
+{
+	dev_info_t		*dip = pci_p->pci_dip;
+	ib_t			*ib_p = pci_p->pci_ib_p;
+	volatile uint64_t	*imregp;
+	uint64_t		imregval;
+
+	DEBUG1(DBG_IB, dip, "ib_get_intr_target: ino %x\n", ino);
+
+	imregp = ib_intr_map_reg_addr(ib_p, ino);
+	imregval = *imregp;
+
+	*cpu_id_p = ib_map_reg_get_cpu(imregval);
+
+	DEBUG1(DBG_IB, dip, "ib_get_intr_target: cpu_id %x\n", *cpu_id_p);
+
+	return (DDI_SUCCESS);
+}
+
+/*
+ * Associate a new CPU with a given ino.
+ * Operate only on inos which are already mapped to devices.
+ */
+int
+ib_set_intr_target(pci_t *pci_p, ib_ino_t ino, int cpu_id)
+{
+	dev_info_t		*dip = pci_p->pci_dip;
+	ib_t			*ib_p = pci_p->pci_ib_p;
+	int			ret = DDI_SUCCESS;
+	uint32_t		old_cpu_id;
+	hrtime_t		start_time;
+	uint64_t		imregval;
+	uint64_t		new_imregval;
+	volatile uint64_t	*imregp;
+	volatile uint64_t	*idregp;
+	extern const int	_ncpu;
+	extern cpu_t		*cpu[];
+
+	DEBUG2(DBG_IB, dip, "ib_set_intr_target: ino %x cpu_id %x\n",
+	    ino, cpu_id);
+
+	imregp = (uint64_t *)ib_intr_map_reg_addr(ib_p, ino);
+	idregp = IB_INO_INTR_STATE_REG(ib_p, ino);
+
+	/* Save original mapreg value. */
+	imregval = *imregp;
+	DEBUG1(DBG_IB, dip, "ib_set_intr_target: orig mapreg value: 0x%llx\n",
+	    imregval);
+
+	/* Operate only on inos which are already enabled. */
+	if (!(imregval & COMMON_INTR_MAP_REG_VALID))
+		return (DDI_FAILURE);
+
+	/* Is this request a noop? */
+	if ((old_cpu_id = ib_map_reg_get_cpu(imregval)) == cpu_id)
+		return (DDI_SUCCESS);
+
+	/* Clear the interrupt valid/enable bit for particular ino. */
+	DEBUG0(DBG_IB, dip, "Clearing intr_enabled...\n");
+	*imregp = imregval & ~COMMON_INTR_MAP_REG_VALID;
+
+	/* Wait until there are no more pending interrupts. */
+	start_time = gethrtime();
+
+	DEBUG0(DBG_IB, dip, "About to check for pending interrupts...\n");
+
+	while (IB_INO_INTR_PENDING(idregp, ino)) {
+		DEBUG0(DBG_IB, dip, "Waiting for pending ints to clear\n");
+		if ((gethrtime() - start_time) < pci_intrpend_timeout) {
+			continue;
+		} else { /* Timed out waiting. */
+			DEBUG0(DBG_IB, dip, "Timed out waiting \n");
+			return (DDI_EPENDING);
+		}
+	}
+
+	new_imregval = *imregp;
+
+	DEBUG1(DBG_IB, dip,
+	    "after disabling intr, mapreg value: 0x%llx\n", new_imregval);
+
+	/*
+	 * Get lock, validate cpu and write new mapreg value.
+	 */
+	mutex_enter(&cpu_lock);
+	if ((cpu_id < _ncpu) && (cpu[cpu_id] && cpu_is_online(cpu[cpu_id]))) {
+		/* Prepare new mapreg value with intr enabled and new cpu_id. */
+		new_imregval &=
+		    COMMON_INTR_MAP_REG_IGN | COMMON_INTR_MAP_REG_INO;
+		new_imregval = ib_get_map_reg(new_imregval, cpu_id);
+
+		DEBUG1(DBG_IB, dip, "Writing new mapreg value:0x%llx\n",
+		    new_imregval);
+
+		*imregp = new_imregval;
+
+		ib_log_new_cpu(ib_p, old_cpu_id, cpu_id, ino);
+	} else {	/* Invalid cpu.  Restore original register image. */
+		DEBUG0(DBG_IB, dip,
+		    "Invalid cpuid: writing orig mapreg value\n");
+
+		*imregp = imregval;
+		ret = DDI_EINVAL;
+	}
+	mutex_exit(&cpu_lock);
+
+	return (ret);
+}
+
+
+/*
  * Return the dips or number of dips associated with a given interrupt block.
  * Size of dips array arg is passed in as dips_ret arg.
  * Number of dips returned is returned in dips_ret arg.
--- a/usr/src/uts/sun4u/io/pci/pci_intr.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4u/io/pci/pci_intr.c	Wed Jul 08 12:59:05 2009 +0800
@@ -588,6 +588,7 @@
 		*ino_p->ino_map_reg;
 	}
 ino_done:
+	hdlp->ih_target = ino_p->ino_cpuid;
 	ih_p->ih_ipil_p = ipil_p;
 	ih_p->ih_ksp = kstat_create("pci_intrs",
 	    atomic_inc_32_nv(&pciintr_ks_instance), "config", "interrupts",
--- a/usr/src/uts/sun4u/io/pci/pci_tools.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4u/io/pci/pci_tools.c	Wed Jul 08 12:59:05 2009 +0800
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/stat.h>
 #include <sys/sunddi.h>
 #include <sys/param.h>
@@ -104,7 +102,6 @@
     uint64_t paddr, uint64_t *value_p);
 static int pcitool_phys_poke(pci_t *pci_p, boolean_t type, size_t size,
     uint64_t paddr, uint64_t value);
-static boolean_t pcitool_validate_cpuid(uint32_t cpu_id);
 static int pcitool_access(pci_t *pci_p, uint64_t phys_addr, uint64_t max_addr,
     uint64_t *data, uint8_t size, boolean_t write, boolean_t endian,
     uint32_t *pcitool_status);
@@ -251,22 +248,6 @@
 }
 
 
-/*
- * Validate the cpu_id passed in.
- * A value of B_TRUE will be returned for success.
- */
-static boolean_t
-pcitool_validate_cpuid(uint32_t cpuid)
-{
-	extern const int _ncpu;
-	extern cpu_t	*cpu[];
-
-	ASSERT(mutex_owned(&cpu_lock));
-
-	return ((cpuid < _ncpu) && (cpu[cpuid] && cpu_is_online(cpu[cpuid])));
-}
-
-
 /*ARGSUSED*/
 static int
 pcitool_intr_info(dev_info_t *dip, void *arg, int mode)
@@ -280,6 +261,9 @@
 		return (EFAULT);
 	}
 
+	if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI)
+		return (ENOTSUP);
+
 	intr_info.ctlr_version = 0;	/* XXX how to get real version? */
 	intr_info.ctlr_type = PCITOOL_CTLR_TYPE_RISC;
 	intr_info.num_intr = PCI_MAX_INO;
@@ -314,6 +298,7 @@
 	uint64_t imregval;
 	uint32_t ino;
 	uint8_t num_devs_ret;
+	int cpu_id;
 	int copyout_rval;
 	int rval = SUCCESS;
 
@@ -324,6 +309,13 @@
 		return (EFAULT);
 	}
 
+	if (partial_iget.flags & PCITOOL_INTR_FLAG_GET_MSI) {
+		partial_iget.status = PCITOOL_IO_ERROR;
+		partial_iget.num_devs_ret = 0;
+		rval = ENOTSUP;
+		goto done_get_intr;
+	}
+
 	ino = partial_iget.ino;
 	num_devs_ret = partial_iget.num_devs_ret;
 
@@ -369,7 +361,6 @@
 	 * This bit happens to be the same on Fire and Tomatillo.
 	 */
 	if (imregval & COMMON_INTR_MAP_REG_VALID) {
-
 		/*
 		 * The following looks up the ib_ino_info and returns
 		 * info of devices mapped to this ino.
@@ -377,18 +368,23 @@
 		iget->num_devs = ib_get_ino_devs(
 		    ib_p, ino, &iget->num_devs_ret, iget->dev);
 
+		if (ib_get_intr_target(pci_p, ino, &cpu_id) != DDI_SUCCESS) {
+			iget->status = PCITOOL_IO_ERROR;
+			rval = EIO;
+			goto done_get_intr;
+		}
+
 		/*
 		 * Consider only inos mapped to devices (as opposed to
 		 * inos mapped to the bridge itself.
 		 */
 		if (iget->num_devs > 0) {
-
 			/*
 			 * These 2 items are platform specific,
 			 * extracted from the bridge.
 			 */
 			iget->ctlr = 0;
-			iget->cpu_id = ib_map_reg_get_cpu(imregval);
+			iget->cpu_id = cpu_id;
 		}
 	}
 done_get_intr:
@@ -417,16 +413,14 @@
 {
 	ib_t *ib_p = pci_p->pci_ib_p;
 	int rval = SUCCESS;
-
+	int ret = DDI_SUCCESS;
 	uint8_t zero = 0;
 	pcitool_intr_set_t iset;
-	uint32_t old_cpu_id;
-	hrtime_t start_time;
+	volatile uint64_t *imregp;
 	uint64_t imregval;
-	uint64_t new_imregval;
-	volatile uint64_t *imregp;
-	volatile uint64_t *idregp;
+
 	size_t copyinout_size;
+	int old_cpu_id;
 
 	bzero(&iset, sizeof (pcitool_intr_set_t));
 
@@ -452,7 +446,8 @@
 		goto done_set_intr;
 	}
 
-	if (iset.flags & PCITOOL_INTR_SET_FLAG_GROUP) {
+	if ((iset.flags & PCITOOL_INTR_FLAG_SET_GROUP) ||
+	    (iset.flags & PCITOOL_INTR_FLAG_SET_MSI)) {
 		iset.status = PCITOOL_IO_ERROR;
 		rval = ENOTSUP;
 		goto done_set_intr;
@@ -467,21 +462,7 @@
 	}
 
 	imregp = (uint64_t *)ib_intr_map_reg_addr(ib_p, iset.ino);
-	idregp = IB_INO_INTR_STATE_REG(ib_p, iset.ino);
-
-	DEBUG4(DBG_TOOLS, dip, "set_intr: cpu:%d, ino:0x%x, mapreg @ "
-	    "0x%llx, intr_stat @ 0x%llx\n",
-	    iset.cpu_id, iset.ino, imregp, idregp);
-
-	/* Save original mapreg value. */
 	imregval = *imregp;
-	DEBUG1(DBG_TOOLS, dip, "orig mapreg value: 0x%llx\n", imregval);
-
-	/* Is this request a noop? */
-	if ((old_cpu_id = ib_map_reg_get_cpu(imregval)) == iset.cpu_id) {
-		iset.status = PCITOOL_SUCCESS;
-		goto done_set_intr;
-	}
 
 	/* Operate only on inos which are already enabled. */
 	if (!(imregval & COMMON_INTR_MAP_REG_VALID)) {
@@ -490,66 +471,32 @@
 		goto done_set_intr;
 	}
 
-	/* Clear the interrupt valid/enable bit for particular ino. */
-	DEBUG0(DBG_TOOLS, dip, "Clearing intr_enabled...\n");
-	*imregp = imregval & ~COMMON_INTR_MAP_REG_VALID;
-
-	/* Wait until there are no more pending interrupts. */
-	start_time = gethrtime();
-
-	DEBUG0(DBG_TOOLS, dip, "About to check for pending interrupts...\n");
+	if (ib_get_intr_target(pci_p, iset.ino, &old_cpu_id) != DDI_SUCCESS) {
+		iset.status = PCITOOL_INVALID_INO;
+		rval = EINVAL;
+		goto done_set_intr;
+	}
 
-	while (IB_INO_INTR_PENDING(idregp, iset.ino)) {
-
-		DEBUG0(DBG_TOOLS, dip, "Waiting for pending ints to clear\n");
-		if ((gethrtime() - start_time) < pci_intrpend_timeout)
-			continue;
-
-		else {	/* Timed out waiting. */
-			iset.status = PCITOOL_PENDING_INTRTIMEOUT;
-			rval = ETIME;
-			goto done_set_intr;
-		}
+	if ((ret = ib_set_intr_target(pci_p, iset.ino,
+	    iset.cpu_id)) == DDI_SUCCESS) {
+		iset.cpu_id = old_cpu_id;
+		iset.status = PCITOOL_SUCCESS;
+		goto done_set_intr;
 	}
 
-	new_imregval = *imregp;
-
-	DEBUG1(DBG_TOOLS, dip,
-	    "after disabling intr, mapreg value: 0x%llx\n", new_imregval);
-
-	/*
-	 * Get lock, validate cpu and write new mapreg value.
-	 * Return original cpu value to caller via iset.cpu_id.
-	 */
-	mutex_enter(&cpu_lock);
-	if (pcitool_validate_cpuid(iset.cpu_id)) {
-
-		/* Prepare new mapreg value with intr enabled and new cpu_id. */
-		new_imregval &=
-		    COMMON_INTR_MAP_REG_IGN | COMMON_INTR_MAP_REG_INO;
-		new_imregval = ib_get_map_reg(new_imregval, iset.cpu_id);
-
-		DEBUG1(DBG_TOOLS, dip, "Writing new mapreg value:0x%llx\n",
-		    new_imregval);
-
-		*imregp = new_imregval;
-
-		ib_log_new_cpu(ib_p, old_cpu_id, iset.cpu_id, iset.ino);
-
-		mutex_exit(&cpu_lock);
-
-		iset.cpu_id = old_cpu_id;
-		iset.status = PCITOOL_SUCCESS;
-
-	} else {	/* Invalid cpu.  Restore original register image. */
-
-		DEBUG0(DBG_TOOLS, dip,
-		    "Invalid cpuid: writing orig mapreg value\n");
-
-		*imregp = imregval;
-		mutex_exit(&cpu_lock);
+	switch (ret) {
+	case DDI_EPENDING:
+		iset.status = PCITOOL_PENDING_INTRTIMEOUT;
+		rval = ETIME;
+		break;
+	case DDI_EINVAL:
 		iset.status = PCITOOL_INVALID_CPUID;
 		rval = EINVAL;
+		break;
+	default:
+		iset.status = PCITOOL_INVALID_INO;
+		rval = EINVAL;
+		break;
 	}
 done_set_intr:
 	iset.drvr_version = PCITOOL_VERSION;
--- a/usr/src/uts/sun4u/sys/pci/pci_ib.h	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4u/sys/pci/pci_ib.h	Wed Jul 08 12:59:05 2009 +0800
@@ -19,15 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_SYS_PCI_IB_H
 #define	_SYS_PCI_IB_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -218,8 +216,6 @@
     uint_t (*int_handler)(caddr_t int_handler_arg1, caddr_t int_handler_arg2),
     caddr_t int_handler_arg1, caddr_t int_handler_arg2);
 extern void ib_free_ih(ih_t *ih_p);
-extern int ib_update_intr_state(pci_t *pci_p, dev_info_t *rdip,
-    ddi_intr_handle_impl_t *hdlp, uint_t new_intr_state);
 extern void ib_ino_map_reg_share(ib_t *ib_p, ib_ino_t ino,
     ib_ino_info_t *ino_p);
 extern int ib_ino_map_reg_unshare(ib_t *ib_p, ib_ino_t ino,
@@ -231,6 +227,10 @@
     volatile uint64_t *imr_p);
 extern void ib_intr_dist_all(void *arg, int32_t max_weight, int32_t weight);
 extern void ib_cpu_ticks_to_ih_nsec(ib_t *ib_p, ih_t *ih_p, uint32_t cpu_id);
+extern int ib_update_intr_state(pci_t *pci_p, dev_info_t *rdip,
+    ddi_intr_handle_impl_t *hdlp, uint_t new_intr_state);
+extern int ib_get_intr_target(pci_t *pci_p, ib_ino_t ino, int *cpu_id_p);
+extern int ib_set_intr_target(pci_t *pci_p, ib_ino_t ino, int cpu_id);
 extern uint8_t ib_get_ino_devs(ib_t *ib_p, uint32_t ino, uint8_t *devs_ret,
     pcitool_intr_dev_t *devs);
 extern void ib_log_new_cpu(ib_t *ib_p, uint32_t old_cpu_id, uint32_t new_cpu_id,
--- a/usr/src/uts/sun4v/io/px/px_lib4v.c	Tue Jul 07 11:23:28 2009 -0700
+++ b/usr/src/uts/sun4v/io/px/px_lib4v.c	Wed Jul 08 12:59:05 2009 +0800
@@ -268,7 +268,7 @@
 		return (DDI_FAILURE);
 	}
 
-	DBG(DBG_LIB_INT, dip, "px_lib_intr_gettarget: cpuid 0x%x\n", cpuid);
+	DBG(DBG_LIB_INT, dip, "px_lib_intr_gettarget: cpuid 0x%x\n", *cpuid);
 
 	return (DDI_SUCCESS);
 }