changeset 5045:75a798a98460

6577948 mach_alloc_mwait leaks memory when a CPU fails to start 6588054 panic() in mach_alloc_mwait() should be changed to degraded operation... 6596141 Solaris should not use an unmodified MWAIT idle loop on AMD 10h due to increased power consumption
author bholler
date Thu, 13 Sep 2007 19:59:30 -0700
parents 366fa13eb032
children 66c87e13529d
files usr/src/uts/i86pc/os/cpuid.c usr/src/uts/i86pc/os/mp_machdep.c usr/src/uts/i86pc/os/mp_startup.c usr/src/uts/intel/sys/x86_archext.h
diffstat 4 files changed, 114 insertions(+), 46 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/i86pc/os/cpuid.c	Thu Sep 13 15:17:59 2007 -0700
+++ b/usr/src/uts/i86pc/os/cpuid.c	Thu Sep 13 19:59:30 2007 -0700
@@ -40,6 +40,7 @@
 #include <sys/sunndi.h>
 #include <sys/cpuvar.h>
 #include <sys/processor.h>
+#include <sys/sysmacros.h>
 #include <sys/pg.h>
 #include <sys/fp.h>
 #include <sys/controlregs.h>
@@ -116,10 +117,17 @@
 
 /*
  * monitor/mwait info.
+ *
+ * size_actual and buf_actual are the real address and size allocated to get
+ * proper mwait_buf alignement.  buf_actual and size_actual should be passed
+ * to kmem_free().  Currently kmem_alloc() and mwait happen to both use
+ * processor cache-line alignment, but this is not guarantied in the furture.
  */
 struct mwait_info {
 	size_t		mon_min;	/* min size to avoid missed wakeups */
 	size_t		mon_max;	/* size to avoid false wakeups */
+	size_t		size_actual;	/* size actually allocated */
+	void		*buf_actual;	/* memory actually allocated */
 	uint32_t	support;	/* processor support of monitor/mwait */
 };
 
@@ -465,6 +473,7 @@
 	struct cpuid_info *cpi;
 	struct cpuid_regs *cp;
 	int xcpuid;
+	extern int idle_cpu_prefer_mwait;
 
 
 	/*
@@ -653,6 +662,16 @@
 		 */
 		if (cpi->cpi_maxeax < 5)
 			mask_ecx &= ~CPUID_INTC_ECX_MON;
+
+		/*
+		 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD
+		 * processors.  AMD does not intend MWAIT to be used in the cpu
+		 * idle loop on current and future processors.  10h and future
+		 * AMD processors use more power in MWAIT than HLT.
+		 * Pre-family-10h Opterons do not have the MWAIT instruction.
+		 */
+		idle_cpu_prefer_mwait = 0;
+
 		break;
 	case X86_VENDOR_TM:
 		/*
@@ -1197,6 +1216,8 @@
 			break;
 
 		case 5:	/* Monitor/Mwait parameters */
+		{
+			size_t mwait_size;
 
 			/*
 			 * check cpi_mwait.support which was set in cpuid_pass1
@@ -1204,8 +1225,23 @@
 			if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
 				break;
 
+			/*
+			 * Protect ourself from insane mwait line size.
+			 * Workaround for incomplete hardware emulator(s).
+			 */
+			mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
+			if (mwait_size < sizeof (uint32_t) ||
+			    !ISP2(mwait_size)) {
+#if DEBUG
+				cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
+				    "size %ld",
+				    cpu->cpu_id, (long)mwait_size);
+#endif
+				break;
+			}
+
 			cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
-			cpi->cpi_mwait.mon_max = (size_t)MWAIT_SIZE_MAX(cpi);
+			cpi->cpi_mwait.mon_max = mwait_size;
 			if (MWAIT_EXTENSION(cpi)) {
 				cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
 				if (MWAIT_INT_ENABLE(cpi))
@@ -1213,6 +1249,7 @@
 					    MWAIT_ECX_INT_ENABLE;
 			}
 			break;
+		}
 		default:
 			break;
 		}
@@ -3405,9 +3442,59 @@
 	return (l2i->l2i_ret);
 }
 
-size_t
-cpuid_get_mwait_size(cpu_t *cpu)
+uint32_t *
+cpuid_mwait_alloc(cpu_t *cpu)
+{
+	uint32_t	*ret;
+	size_t		mwait_size;
+
+	ASSERT(cpuid_checkpass(cpu, 2));
+
+	mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
+	if (mwait_size == 0)
+		return (NULL);
+
+	/*
+	 * kmem_alloc() returns cache line size aligned data for mwait_size
+	 * allocations.  mwait_size is currently cache line sized.  Neither
+	 * of these implementation details are guarantied to be true in the
+	 * future.
+	 *
+	 * First try allocating mwait_size as kmem_alloc() currently returns
+	 * correctly aligned memory.  If kmem_alloc() does not return
+	 * mwait_size aligned memory, then use mwait_size ROUNDUP.
+	 *
+	 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
+	 * decide to free this memory.
+	 */
+	ret = kmem_zalloc(mwait_size, KM_SLEEP);
+	if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
+		cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
+		cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
+		*ret = MWAIT_RUNNING;
+		return (ret);
+	} else {
+		kmem_free(ret, mwait_size);
+		ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
+		cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
+		cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
+		ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
+		*ret = MWAIT_RUNNING;
+		return (ret);
+	}
+}
+
+void
+cpuid_mwait_free(cpu_t *cpu)
 {
 	ASSERT(cpuid_checkpass(cpu, 2));
-	return (cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max);
+
+	if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
+	    cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
+		kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
+		    cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
+	}
+
+	cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
+	cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
 }
--- a/usr/src/uts/i86pc/os/mp_machdep.c	Thu Sep 13 15:17:59 2007 -0700
+++ b/usr/src/uts/i86pc/os/mp_machdep.c	Thu Sep 13 19:59:30 2007 -0700
@@ -822,13 +822,26 @@
 	/*
 	 * Initialize the dispatcher's function hooks
 	 * to enable CPU halting when idle.
-	 * Do not use monitor/mwait if idle_cpu_use_hlt is not set(spin idle).
+	 * Do not use monitor/mwait if idle_cpu_use_hlt is not set(spin idle)
+	 * or idle_cpu_prefer_mwait is not set.
 	 * Allocate monitor/mwait buffer for cpu0.
 	 */
 	if (idle_cpu_use_hlt) {
 		if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) {
-			CPU->cpu_m.mcpu_mwait = mach_alloc_mwait(CPU);
-			idle_cpu = cpu_idle_mwait;
+			CPU->cpu_m.mcpu_mwait = cpuid_mwait_alloc(CPU);
+			/*
+			 * Protect ourself from insane mwait size.
+			 */
+			if (CPU->cpu_m.mcpu_mwait == NULL) {
+#ifdef DEBUG
+				cmn_err(CE_NOTE, "Using hlt idle.  Cannot "
+				    "handle cpu 0 mwait size.");
+#endif
+				idle_cpu_prefer_mwait = 0;
+				idle_cpu = cpu_idle;
+			} else {
+				idle_cpu = cpu_idle_mwait;
+			}
 		} else {
 			idle_cpu = cpu_idle;
 		}
@@ -837,42 +850,6 @@
 	mach_smpinit();
 }
 
-/*
- * Return a pointer to memory suitable for monitor/mwait use.  Memory must be
- * aligned as specified by cpuid (a cache line size).
- */
-uint32_t *
-mach_alloc_mwait(cpu_t *cp)
-{
-	size_t		mwait_size = cpuid_get_mwait_size(cp);
-	uint32_t	*ret;
-
-	if (mwait_size < sizeof (uint32_t) || !ISP2(mwait_size))
-		panic("Can't handle mwait size %ld", (long)mwait_size);
-
-	/*
-	 * kmem_alloc() returns cache line size aligned data for mwait_size
-	 * allocations.  mwait_size is currently cache line sized.  Neither
-	 * of these implementation details are guarantied to be true in the
-	 * future.
-	 *
-	 * First try allocating mwait_size as kmem_alloc() currently returns
-	 * correctly aligned memory.  If kmem_alloc() does not return
-	 * mwait_size aligned memory, then use mwait_size ROUNDUP.
-	 */
-	ret = kmem_zalloc(mwait_size, KM_SLEEP);
-	if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
-		*ret = MWAIT_RUNNING;
-		return (ret);
-	} else {
-		kmem_free(ret, mwait_size);
-		ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
-		ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
-		*ret = MWAIT_RUNNING;
-		return (ret);
-	}
-}
-
 static void
 mach_smpinit(void)
 {
--- a/usr/src/uts/i86pc/os/mp_startup.c	Thu Sep 13 15:17:59 2007 -0700
+++ b/usr/src/uts/i86pc/os/mp_startup.c	Thu Sep 13 19:59:30 2007 -0700
@@ -238,6 +238,7 @@
 	kthread_id_t tp;
 	caddr_t	sp;
 	proc_t *procp;
+	extern int idle_cpu_prefer_mwait;
 	extern void idle();
 
 #ifdef TRAPTRACE
@@ -247,8 +248,8 @@
 	ASSERT(cpun < NCPU && cpu[cpun] == NULL);
 
 	cp = kmem_zalloc(sizeof (*cp), KM_SLEEP);
-	if (x86_feature & X86_MWAIT)
-		cp->cpu_m.mcpu_mwait = mach_alloc_mwait(CPU);
+	if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait)
+		cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(CPU);
 
 	procp = curthread->t_procp;
 
@@ -499,6 +500,8 @@
 	disp_cpu_fini(cp);
 	mutex_exit(&cpu_lock);
 
+	if (cp->cpu_m.mcpu_mwait != NULL)
+		cpuid_mwait_free(cp);
 	kmem_free(cp, sizeof (*cp));
 }
 
--- a/usr/src/uts/intel/sys/x86_archext.h	Thu Sep 13 15:17:59 2007 -0700
+++ b/usr/src/uts/intel/sys/x86_archext.h	Thu Sep 13 19:59:30 2007 -0700
@@ -589,7 +589,8 @@
 
 extern void cpuid_get_addrsize(struct cpu *, uint_t *, uint_t *);
 extern uint_t cpuid_get_dtlb_nent(struct cpu *, size_t);
-extern size_t cpuid_get_mwait_size(struct cpu *cpu);
+extern uint32_t *cpuid_mwait_alloc(struct cpu *);
+extern void cpuid_mwait_free(struct cpu *);
 
 struct cpu_ucode_info;