changeset 6691:f8848c7acc9e

6671130 Shanghai provides better TLB management for 1GB pages 6679225 erratum 298 detection needed 6692442 errata updates needed for griffin processors (family 0x11)
author kchow
date Thu, 22 May 2008 13:30:16 -0700
parents 19b5b95523e1
children a6d8ea2756e6
files usr/src/uts/i86pc/Makefile.workarounds usr/src/uts/i86pc/os/cpuid.c usr/src/uts/i86pc/os/mp_startup.c usr/src/uts/i86pc/vm/hat_i86.c usr/src/uts/intel/sys/controlregs.h usr/src/uts/intel/sys/x86_archext.h
diffstat 6 files changed, 232 insertions(+), 29 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/i86pc/Makefile.workarounds	Thu May 22 12:51:35 2008 -0700
+++ b/usr/src/uts/i86pc/Makefile.workarounds	Thu May 22 13:30:16 2008 -0700
@@ -18,7 +18,7 @@
 #
 # CDDL HEADER END
 #
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 # ident	"%Z%%M%	%I%	%E% SMI"
@@ -111,3 +111,9 @@
 # Some Registered DIMMs incompatible with address parity feature
 #
 WORKAROUND_DEFS += -DOPTERON_ERRATUM_172
+
+#
+# L2 Eviction May Occur During Processor Operation To Set
+# Accessed or Dirty Bit.
+#
+WORKAROUND_DEFS += -DOPTERON_ERRATUM_298
--- a/usr/src/uts/i86pc/os/cpuid.c	Thu May 22 12:51:35 2008 -0700
+++ b/usr/src/uts/i86pc/os/cpuid.c	Thu May 22 13:30:16 2008 -0700
@@ -2676,6 +2676,14 @@
 			    SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
 			    DH_E6(eax) || JH_E6(eax))
 
+#define	DR_AX(eax)	(eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
+#define	DR_B0(eax)	(eax == 0x100f20)
+#define	DR_B1(eax)	(eax == 0x100f21)
+#define	DR_BA(eax)	(eax == 0x100f2a)
+#define	DR_B2(eax)	(eax == 0x100f22)
+#define	DR_B3(eax)	(eax == 0x100f23)
+#define	RB_C0(eax)	(eax == 0x100f40)
+
 	switch (erratum) {
 	case 1:
 		return (cpi->cpi_family < 0x10);
@@ -2684,11 +2692,11 @@
 	case 52:
 		return (B(eax));
 	case 57:
-		return (cpi->cpi_family <= 0x10);
+		return (cpi->cpi_family <= 0x11);
 	case 58:
 		return (B(eax));
 	case 60:
-		return (cpi->cpi_family <= 0x10);
+		return (cpi->cpi_family <= 0x11);
 	case 61:
 	case 62:
 	case 63:
@@ -2709,7 +2717,7 @@
 	case 76:
 		return (B(eax));
 	case 77:
-		return (cpi->cpi_family <= 0x10);
+		return (cpi->cpi_family <= 0x11);
 	case 78:
 		return (B(eax) || SH_C0(eax));
 	case 79:
@@ -2791,7 +2799,7 @@
 	case 121:
 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
 	case 122:
-		return (cpi->cpi_family < 0x10);
+		return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
 	case 123:
 		return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
 	case 131:
@@ -2812,6 +2820,81 @@
 		return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
 		    (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
 
+	case 6671130:
+		/*
+		 * check for processors (pre-Shanghai) that do not provide
+		 * optimal management of 1gb ptes in its tlb.
+		 */
+		return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
+
+	case 298:
+		return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
+		    DR_B2(eax) || RB_C0(eax));
+
+	default:
+		return (-1);
+
+	}
+}
+
+/*
+ * Determine if specified erratum is present via OSVW (OS Visible Workaround).
+ * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
+ */
+int
+osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
+{
+	struct cpuid_info	*cpi;
+	uint_t			osvwid;
+	static int		osvwfeature = -1;
+	uint64_t		osvwlength;
+
+
+	cpi = cpu->cpu_m.mcpu_cpi;
+
+	/* confirm OSVW supported */
+	if (osvwfeature == -1) {
+		osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
+	} else {
+		/* assert that osvw feature setting is consistent on all cpus */
+		ASSERT(osvwfeature ==
+		    (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
+	}
+	if (!osvwfeature)
+		return (-1);
+
+	osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
+
+	switch (erratum) {
+	case 298:	/* osvwid is 0 */
+		osvwid = 0;
+		if (osvwlength <= (uint64_t)osvwid) {
+			/* osvwid 0 is unknown */
+			return (-1);
+		}
+
+		/*
+		 * Check the OSVW STATUS MSR to determine the state
+		 * of the erratum where:
+		 *   0 - fixed by HW
+		 *   1 - BIOS has applied the workaround when BIOS
+		 *   workaround is available. (Or for other errata,
+		 *   OS workaround is required.)
+		 * For a value of 1, caller will confirm that the
+		 * erratum 298 workaround has indeed been applied by BIOS.
+		 *
+		 * A 1 may be set in cpus that have a HW fix
+		 * in a mixed cpu system. Regarding erratum 298:
+		 *   In a multiprocessor platform, the workaround above
+		 *   should be applied to all processors regardless of
+		 *   silicon revision when an affected processor is
+		 *   present.
+		 */
+
+		return (rdmsr(MSR_AMD_OSVW_STATUS +
+		    (osvwid / OSVW_ID_CNT_PER_MSR)) &
+		    (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
+
 	default:
 		return (-1);
 	}
--- a/usr/src/uts/i86pc/os/mp_startup.c	Thu May 22 12:51:35 2008 -0700
+++ b/usr/src/uts/i86pc/os/mp_startup.c	Thu May 22 13:30:16 2008 -0700
@@ -592,6 +592,10 @@
 int opteron_workaround_6323525;	/* if non-zero -> at least one cpu has it */
 #endif
 
+#if defined(OPTERON_ERRATUM_298)
+int opteron_erratum_298;
+#endif
+
 static void
 workaround_warning(cpu_t *cp, uint_t erratum)
 {
@@ -677,6 +681,57 @@
 #endif
 
 uint_t
+do_erratum_298(struct cpu *cpu)
+{
+	static int	osvwrc = -3;
+	extern int	osvw_opteron_erratum(cpu_t *, uint_t);
+
+	/*
+	 * L2 Eviction May Occur During Processor Operation To Set
+	 * Accessed or Dirty Bit.
+	 */
+	if (osvwrc == -3) {
+		osvwrc = osvw_opteron_erratum(cpu, 298);
+	} else {
+		/* osvw return codes should be consistent for all cpus */
+		ASSERT(osvwrc == osvw_opteron_erratum(cpu, 298));
+	}
+
+	switch (osvwrc) {
+	case 0:		/* erratum is not present: do nothing */
+		break;
+	case 1:		/* erratum is present: BIOS workaround applied */
+		/*
+		 * check if workaround is actually in place and issue warning
+		 * if not.
+		 */
+		if (((rdmsr(MSR_AMD_HWCR) & AMD_HWCR_TLBCACHEDIS) == 0) ||
+		    ((rdmsr(MSR_AMD_BU_CFG) & AMD_BU_CFG_E298) == 0)) {
+#if defined(OPTERON_ERRATUM_298)
+			opteron_erratum_298++;
+#else
+			workaround_warning(cpu, 298);
+			return (1);
+#endif
+		}
+		break;
+	case -1:	/* cannot determine via osvw: check cpuid */
+		if ((cpuid_opteron_erratum(cpu, 298) > 0) &&
+		    (((rdmsr(MSR_AMD_HWCR) & AMD_HWCR_TLBCACHEDIS) == 0) ||
+		    ((rdmsr(MSR_AMD_BU_CFG) & AMD_BU_CFG_E298) == 0))) {
+#if defined(OPTERON_ERRATUM_298)
+			opteron_erratum_298++;
+#else
+			workaround_warning(cpu, 298);
+			return (1);
+#endif
+		}
+		break;
+	}
+	return (0);
+}
+
+uint_t
 workaround_errata(struct cpu *cpu)
 {
 	uint_t missing = 0;
@@ -1041,7 +1096,7 @@
 #if defined(OPTERON_WORKAROUND_6323525)
 		/*
 		 * This problem only occurs with 2 or more cores. If bit in
-		 * MSR_BU_CFG set, then not applicable. The workaround
+		 * MSR_AMD_BU_CFG set, then not applicable. The workaround
 		 * is to patch the semaphone routines with the lfence
 		 * instruction to provide necessary load memory barrier with
 		 * possible subsequent read-modify-write ops.
@@ -1072,7 +1127,7 @@
 #else	/* __xpv */
 		} else if ((x86_feature & X86_SSE2) && ((opteron_get_nnodes() *
 		    cpuid_get_ncpu_per_chip(cpu)) > 1)) {
-			if ((xrdmsr(MSR_BU_CFG) & 0x02) == 0)
+			if ((xrdmsr(MSR_AMD_BU_CFG) & 0x02) == 0)
 				opteron_workaround_6323525++;
 #endif	/* __xpv */
 		}
@@ -1082,6 +1137,8 @@
 #endif
 	}
 
+	missing += do_erratum_298(cpu);
+
 #ifdef __xpv
 	return (0);
 #else
@@ -1162,6 +1219,16 @@
 	if (opteron_workaround_6323525)
 		workaround_applied(6323525);
 #endif
+#if defined(OPTERON_ERRATUM_298)
+	if (opteron_erratum_298) {
+		cmn_err(CE_WARN,
+		    "BIOS microcode patch for AMD 64/Opteron(tm)"
+		    " processor\nerratum 298 was not detected; updating your"
+		    " system's BIOS to a version\ncontaining this"
+		    " microcode patch is HIGHLY recommended or erroneous"
+		    " system\noperation may occur.\n");
+	}
+#endif
 }
 
 static cpuset_t procset;
--- a/usr/src/uts/i86pc/vm/hat_i86.c	Thu May 22 12:51:35 2008 -0700
+++ b/usr/src/uts/i86pc/vm/hat_i86.c	Thu May 22 13:30:16 2008 -0700
@@ -130,9 +130,23 @@
 uint_t use_boot_reserve = 1;	/* cleared after early boot process */
 uint_t can_steal_post_boot = 0;	/* set late in boot to enable stealing */
 
-/* export 1g page size to user applications if set */
+/*
+ * enable_1gpg: controls 1g page support for user applications.
+ * By default, 1g pages are exported to user applications. enable_1gpg can
+ * be set to 0 to not export.
+ */
 int	enable_1gpg = 1;
 
+/*
+ * AMD shanghai processors provide better management of 1gb ptes in its tlb.
+ * By default, 1g page suppport will be disabled for pre-shanghai AMD
+ * processors that don't have optimal tlb support for the 1g page size.
+ * chk_optimal_1gtlb can be set to 0 to force 1g page support on sub-optimal
+ * processors.
+ */
+int	chk_optimal_1gtlb = 1;
+
+
 #ifdef DEBUG
 uint_t	map1gcnt;
 #endif
@@ -461,6 +475,36 @@
 }
 
 /*
+ *
+ */
+static void
+set_max_page_level()
+{
+	level_t lvl;
+
+	if (!kbm_largepage_support) {
+		lvl = 0;
+	}
+	if (x86_feature & X86_1GPG) {
+		lvl = 2;
+		if (chk_optimal_1gtlb && cpuid_opteron_erratum(CPU, 6671130)) {
+			lvl = 1;
+		}
+		if (plat_mnode_xcheck(LEVEL_SIZE(2) >> LEVEL_SHIFT(0))) {
+			lvl = 1;
+		}
+	} else {
+		lvl = 1;
+	}
+	mmu.max_page_level = lvl;
+
+	if ((lvl == 2) && (enable_1gpg == 0))
+		mmu.umax_page_level = 1;
+	else
+		mmu.umax_page_level = lvl;
+}
+
+/*
  * Initialize hat data structures based on processor MMU information.
  */
 void
@@ -571,24 +615,8 @@
 		mmu.level_mask[i] = ~mmu.level_offset[i];
 	}
 
-	/*
-	 * Initialize parameters based on the 64 or 32 bit kernels and
-	 * for the 32 bit kernel decide if we should use PAE.
-	 */
-	if (kbm_largepage_support) {
-
-		if ((x86_feature & X86_1GPG) &&
-		    plat_mnode_xcheck((LEVEL_SIZE(2) >> LEVEL_SHIFT(0))) == 0) {
-			mmu.max_page_level = 2;
-			mmu.umax_page_level = (enable_1gpg) ? 2 : 1;
-		} else {
-			mmu.max_page_level = 1;
-			mmu.umax_page_level = 1;
-		}
-	} else {
-		mmu.max_page_level = 0;
-		mmu.umax_page_level = 0;
-	}
+	set_max_page_level();
+
 	mmu_page_sizes = mmu.max_page_level + 1;
 	mmu_exported_page_sizes = mmu.umax_page_level + 1;
 
--- a/usr/src/uts/intel/sys/controlregs.h	Thu May 22 12:51:35 2008 -0700
+++ b/usr/src/uts/intel/sys/controlregs.h	Thu May 22 13:30:16 2008 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -196,6 +196,7 @@
 
 #define	MSR_AMD_HWCR	0xc0010015
 
+#define	AMD_HWCR_TLBCACHEDIS		(UINT64_C(1) << 3)
 #define	AMD_HWCR_FFDIS			0x00040	/* disable TLB Flush Filter */
 #define	AMD_HWCR_MCI_STATUS_WREN	0x40000	/* enable write of MCi_STATUS */
 
@@ -203,11 +204,21 @@
 
 #define	MSR_AMD_NB_CFG	0xc001001f
 
-#define	MSR_BU_CFG	0xc0011023
-
 #define	AMD_NB_CFG_SRQ_HEARTBEAT	(UINT64_C(1) << 20)
 #define	AMD_NB_CFG_SRQ_SPR		(UINT64_C(1) << 32)
 
+#define	MSR_AMD_BU_CFG	0xc0011023
+
+#define	AMD_BU_CFG_E298			(UINT64_C(1) << 1)
+
+/* AMD's osvw MSRs */
+#define	MSR_AMD_OSVW_ID_LEN		0xc0010140
+#define	MSR_AMD_OSVW_STATUS		0xc0010141
+
+
+#define	OSVW_ID_LEN_MASK		0xffffULL
+#define	OSVW_ID_CNT_PER_MSR		64
+
 /*
  * Enable PCI Extended Configuration Space (ECS) on Greyhound
  */
--- a/usr/src/uts/intel/sys/x86_archext.h	Thu May 22 12:51:35 2008 -0700
+++ b/usr/src/uts/intel/sys/x86_archext.h	Thu May 22 13:30:16 2008 -0700
@@ -168,9 +168,17 @@
 #define	CPUID_AMD_ECX_CR8D	0x00000010	/* AMD: 32-bit mov %cr8 */
 #define	CPUID_AMD_ECX_LZCNT	0x00000020	/* AMD: LZCNT insn */
 #define	CPUID_AMD_ECX_SSE4A	0x00000040	/* AMD: SSE4A insns */
+#define	CPUID_AMD_ECX_MAS	0x00000080	/* AMD: MisAlignSse mnode */
+#define	CPUID_AMD_ECX_3DNP	0x00000100	/* AMD: 3DNowPrefectch */
+#define	CPUID_AMD_ECX_OSVW	0x00000200	/* AMD: OSVW */
+#define	CPUID_AMD_ECX_IBS	0x00000400	/* AMD: IBS */
+#define	CPUID_AMD_ECX_SSE5	0x00000800	/* AMD: SSE5 */
+#define	CPUID_AMD_ECX_SKINIT	0x00001000	/* AMD: SKINIT */
+#define	CPUID_AMD_ECX_WDT	0x00002000	/* AMD: WDT */
 
 #define	FMT_CPUID_AMD_ECX					\
 	"\20"							\
+	"\14wdt\13skinit\12sse5\11ibs\10osvw\93dnp\8mas"	\
 	"\7sse4a\6lzcnt\5cr8d\3svm\2lcmplgcy\1ahf64"
 
 /*