changeset 7798:2a682532f0ca

6749646 cpuid_pass2 stomps on cpi_xmaxeax 6751751 race in xc_common() & xc_serv() since WRMSR can not be used as a serializing instruction in x2apic
author Saurabh Misra <Saurabh.Mishra@Sun.COM>
date Fri, 10 Oct 2008 10:17:05 -0700
parents 048276b7fd9a
children 05fc7b266484
files usr/src/uts/i86pc/io/pcplusmp/apic.c usr/src/uts/i86pc/io/pcplusmp/apic_regops.c usr/src/uts/i86pc/os/cpuid.c usr/src/uts/i86pc/sys/apic.h
diffstat 4 files changed, 95 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/i86pc/io/pcplusmp/apic.c	Fri Oct 10 09:59:19 2008 -0700
+++ b/usr/src/uts/i86pc/io/pcplusmp/apic.c	Fri Oct 10 10:17:05 2008 -0700
@@ -561,14 +561,6 @@
 	uint_t nlvt;
 	uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
 
-	/*
-	 * On BSP we would have enabled x2apic, if supported by processor,
-	 * in acpi_probe(), but on AP we do it here.
-	 */
-	if (apic_detect_x2apic()) {
-		apic_enable_x2apic();
-	}
-
 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
 
 	if (apic_mode == LOCAL_APIC) {
@@ -821,19 +813,38 @@
 	outb(CMOS_ADDR, SSB);
 	outb(CMOS_DATA, BIOS_SHUTDOWN);
 
-	while (apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
-		apic_ret();
+	/*
+	 * According to x2APIC specification in section '2.3.5.1' of
+	 * Interrupt Command Register Semantics, the semantics of
+	 * programming the Interrupt Command Register to dispatch an interrupt
+	 * is simplified. A single MSR write to the 64-bit ICR is required
+	 * for dispatching an interrupt. Specifically, with the 64-bit MSR
+	 * interface to ICR, system software is not required to check the
+	 * status of the delivery status bit prior to writing to the ICR
+	 * to send an IPI. With the removal of the Delivery Status bit,
+	 * system software no longer has a reason to read the ICR. It remains
+	 * readable only to aid in debugging.
+	 */
+#ifdef	DEBUG
+	APIC_AV_PENDING_SET();
+#else
+	if (apic_mode == LOCAL_APIC) {
+		APIC_AV_PENDING_SET();
+	}
+#endif /* DEBUG */
 
 	/* for integrated - make sure there is one INIT IPI in buffer */
 	/* for external - it will wake up the cpu */
 	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_ASSERT | AV_RESET);
 
 	/* If only 1 CPU is installed, PENDING bit will not go low */
-	for (loop_count = 0x1000; loop_count; loop_count--)
-		if (apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
+	for (loop_count = 0x1000; loop_count; loop_count--) {
+		if (apic_mode == LOCAL_APIC &&
+		    apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
 			apic_ret();
 		else
 			break;
+	}
 
 	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_DEASSERT | AV_RESET);
 
@@ -1105,8 +1116,7 @@
 
 	flag = intr_clear();
 
-	while (apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
-		apic_ret();
+	APIC_AV_PENDING_SET();
 
 	apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
 	    vector);
@@ -1213,8 +1223,12 @@
 	if (cpun == apic_cpus[0].aci_local_id) {
 		countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
 	} else {
-		while (apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
-			apic_ret();
+#ifdef	DEBUG
+		APIC_AV_PENDING_SET();
+#else
+		if (apic_mode == LOCAL_APIC)
+			APIC_AV_PENDING_SET();
+#endif /* DEBUG */
 
 		apic_reg_ops->apic_write_int_cmd(
 		    apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
@@ -1322,6 +1336,15 @@
 {
 	int cpun;
 
+	/*
+	 * On BSP we would have enabled X2APIC, if supported by processor,
+	 * in acpi_probe(), but on AP we do it here.
+	 */
+	if (apic_detect_x2apic()) {
+		apic_enable_x2apic();
+	}
+
+	splx(ipltospl(LOCK_LEVEL));
 	apic_init_intr();
 
 	/*
@@ -1330,8 +1353,12 @@
 	 */
 	setcr0(getcr0() & ~(CR0_CD | CR0_NW));
 
-	while (apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
-		apic_ret();
+#ifdef	DEBUG
+	APIC_AV_PENDING_SET();
+#else
+	if (apic_mode == LOCAL_APIC)
+		APIC_AV_PENDING_SET();
+#endif	/* DEBUG */
 
 	/*
 	 * We may be booting, or resuming from suspend; aci_status will
@@ -1572,8 +1599,12 @@
 
 	/* Send NMI to all CPUs except self to do per processor shutdown */
 	iflag = intr_clear();
-	while (apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
-		apic_ret();
+#ifdef	DEBUG
+	APIC_AV_PENDING_SET();
+#else
+	if (apic_mode == LOCAL_APIC)
+		APIC_AV_PENDING_SET();
+#endif /* DEBUG */
 	apic_shutdown_processors = 1;
 	apic_reg_ops->apic_write(APIC_INT_CMD1,
 	    AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
--- a/usr/src/uts/i86pc/io/pcplusmp/apic_regops.c	Fri Oct 10 09:59:19 2008 -0700
+++ b/usr/src/uts/i86pc/io/pcplusmp/apic_regops.c	Fri Oct 10 10:17:05 2008 -0700
@@ -170,7 +170,7 @@
 static int
 get_local_x2apic_pri(void)
 {
-	return (rdmsr(REG_X2APIC_BASE_MSR + (APIC_TASK_REG) >> 2));
+	return (rdmsr(REG_X2APIC_BASE_MSR + (APIC_TASK_REG >> 2)));
 }
 
 static void
@@ -257,14 +257,42 @@
 {
 	int vector;
 	ulong_t flag;
+
 	ASSERT(apic_mode == LOCAL_X2APIC);
 
+	/*
+	 * With X2APIC, Intel relaxed the semantics of the
+	 * WRMSR instruction such that references to the X2APIC
+	 * MSR registers are no longer serializing instructions.
+	 * The code that initiates IPIs assumes that some sort
+	 * of memory serialization occurs. The old APIC code
+	 * did a write to uncachable memory mapped registers.
+	 * Any reference to uncached memory is a serializing
+	 * operation. To mimic those semantics here, we do an
+	 * atomic operation, which translates to a LOCK OR instruction,
+	 * which is serializing.
+	 */
+	atomic_or_ulong(&flag, 1);
+
 	vector = apic_resv_vector[ipl];
 
 	flag = intr_clear();
 
-	while (apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
-		apic_ret();
+	/*
+	 * According to x2APIC specification in section '2.3.5.1' of
+	 * Interrupt Command Register Semantics, the semantics of
+	 * programming Interrupt Command Register to dispatch an interrupt
+	 * is simplified. A single MSR write to the 64-bit ICR is required
+	 * for dispatching an interrupt. Specifically with the 64-bit MSR
+	 * interface to ICR, system software is not required to check the
+	 * status of the delivery status bit prior to writing to the ICR
+	 * to send an IPI. With the removal of the Delivery Status bit,
+	 * system software no longer has a reason to read the ICR. It remains
+	 * readable only to aid in debugging.
+	 */
+#ifdef	DEBUG
+	APIC_AV_PENDING_SET();
+#endif	/* DEBUG */
 
 	if ((cpun == psm_get_cpu_id()))
 		apic_reg_ops->apic_write(X2APIC_SELF_IPI, vector);
--- a/usr/src/uts/i86pc/os/cpuid.c	Fri Oct 10 09:59:19 2008 -0700
+++ b/usr/src/uts/i86pc/os/cpuid.c	Fri Oct 10 10:17:05 2008 -0700
@@ -1305,8 +1305,7 @@
 			    !ISP2(mwait_size)) {
 #if DEBUG
 				cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
-				    "size %ld",
-				    cpu->cpu_id, (long)mwait_size);
+				    "size %ld", cpu->cpu_id, (long)mwait_size);
 #endif
 				break;
 			}
@@ -1327,8 +1326,11 @@
 	}
 
 	if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) {
+		struct cpuid_regs regs;
+
+		cp = &regs;
 		cp->cp_eax = 0xB;
-		cp->cp_ecx = 0;
+		cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
 
 		(void) __cpuid_insn(cp);
 
@@ -1373,6 +1375,9 @@
 			cpi->cpi_coreid = x2apic_id >> coreid_shift;
 			cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
 		}
+
+		/* Make cp NULL so that we don't stumble on others */
+		cp = NULL;
 	}
 
 	if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
--- a/usr/src/uts/i86pc/sys/apic.h	Fri Oct 10 09:59:19 2008 -0700
+++ b/usr/src/uts/i86pc/sys/apic.h	Fri Oct 10 10:17:05 2008 -0700
@@ -106,7 +106,7 @@
 #define	LOCAL_APIC		0x2
 #define	LOCAL_X2APIC		0x3
 
-/* x2APIC SELF IPI Reguster */
+/* x2APIC SELF IPI Register */
 #define	X2APIC_SELF_IPI		0x100
 
 /* General x2APIC constants used at various places */
@@ -639,6 +639,10 @@
 	if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) \
 		cmn_err fmt;
 
+#define	APIC_AV_PENDING_SET() \
+	while (apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING) \
+		apic_ret();
+
 #define	APIC_VERBOSE_IRQ(fmt) \
 	if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) \
 		cmn_err fmt;