Mercurial > illumos > illumos-gate
changeset 6691:f8848c7acc9e
6671130 Shanghai provides better TLB management for 1GB pages
6679225 erratum 298 detection needed
6692442 errata updates needed for griffin processors (family 0x11)
author | kchow |
---|---|
date | Thu, 22 May 2008 13:30:16 -0700 |
parents | 19b5b95523e1 |
children | a6d8ea2756e6 |
files | usr/src/uts/i86pc/Makefile.workarounds usr/src/uts/i86pc/os/cpuid.c usr/src/uts/i86pc/os/mp_startup.c usr/src/uts/i86pc/vm/hat_i86.c usr/src/uts/intel/sys/controlregs.h usr/src/uts/intel/sys/x86_archext.h |
diffstat | 6 files changed, 232 insertions(+), 29 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/i86pc/Makefile.workarounds Thu May 22 12:51:35 2008 -0700 +++ b/usr/src/uts/i86pc/Makefile.workarounds Thu May 22 13:30:16 2008 -0700 @@ -18,7 +18,7 @@ # # CDDL HEADER END # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -111,3 +111,9 @@ # Some Registered DIMMs incompatible with address parity feature # WORKAROUND_DEFS += -DOPTERON_ERRATUM_172 + +# +# L2 Eviction May Occur During Processor Operation To Set +# Accessed or Dirty Bit. +# +WORKAROUND_DEFS += -DOPTERON_ERRATUM_298
--- a/usr/src/uts/i86pc/os/cpuid.c Thu May 22 12:51:35 2008 -0700 +++ b/usr/src/uts/i86pc/os/cpuid.c Thu May 22 13:30:16 2008 -0700 @@ -2676,6 +2676,14 @@ SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ DH_E6(eax) || JH_E6(eax)) +#define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) +#define DR_B0(eax) (eax == 0x100f20) +#define DR_B1(eax) (eax == 0x100f21) +#define DR_BA(eax) (eax == 0x100f2a) +#define DR_B2(eax) (eax == 0x100f22) +#define DR_B3(eax) (eax == 0x100f23) +#define RB_C0(eax) (eax == 0x100f40) + switch (erratum) { case 1: return (cpi->cpi_family < 0x10); @@ -2684,11 +2692,11 @@ case 52: return (B(eax)); case 57: - return (cpi->cpi_family <= 0x10); + return (cpi->cpi_family <= 0x11); case 58: return (B(eax)); case 60: - return (cpi->cpi_family <= 0x10); + return (cpi->cpi_family <= 0x11); case 61: case 62: case 63: @@ -2709,7 +2717,7 @@ case 76: return (B(eax)); case 77: - return (cpi->cpi_family <= 0x10); + return (cpi->cpi_family <= 0x11); case 78: return (B(eax) || SH_C0(eax)); case 79: @@ -2791,7 +2799,7 @@ case 121: return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); case 122: - return (cpi->cpi_family < 0x10); + return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); case 123: return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); case 131: @@ -2812,6 +2820,81 @@ return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); + case 6671130: + /* + * check for processors (pre-Shanghai) that do not provide + * optimal management of 1gb ptes in its tlb. + */ + return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); + + case 298: + return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || + DR_B2(eax) || RB_C0(eax)); + + default: + return (-1); + + } +} + +/* + * Determine if specified erratum is present via OSVW (OS Visible Workaround). + * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. + */ +int +osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) +{ + struct cpuid_info *cpi; + uint_t osvwid; + static int osvwfeature = -1; + uint64_t osvwlength; + + + cpi = cpu->cpu_m.mcpu_cpi; + + /* confirm OSVW supported */ + if (osvwfeature == -1) { + osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; + } else { + /* assert that osvw feature setting is consistent on all cpus */ + ASSERT(osvwfeature == + (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); + } + if (!osvwfeature) + return (-1); + + osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; + + switch (erratum) { + case 298: /* osvwid is 0 */ + osvwid = 0; + if (osvwlength <= (uint64_t)osvwid) { + /* osvwid 0 is unknown */ + return (-1); + } + + /* + * Check the OSVW STATUS MSR to determine the state + * of the erratum where: + * 0 - fixed by HW + * 1 - BIOS has applied the workaround when BIOS + * workaround is available. (Or for other errata, + * OS workaround is required.) + * For a value of 1, caller will confirm that the + * erratum 298 workaround has indeed been applied by BIOS. + * + * A 1 may be set in cpus that have a HW fix + * in a mixed cpu system. Regarding erratum 298: + * In a multiprocessor platform, the workaround above + * should be applied to all processors regardless of + * silicon revision when an affected processor is + * present. + */ + + return (rdmsr(MSR_AMD_OSVW_STATUS + + (osvwid / OSVW_ID_CNT_PER_MSR)) & + (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); + default: return (-1); }
--- a/usr/src/uts/i86pc/os/mp_startup.c Thu May 22 12:51:35 2008 -0700 +++ b/usr/src/uts/i86pc/os/mp_startup.c Thu May 22 13:30:16 2008 -0700 @@ -592,6 +592,10 @@ int opteron_workaround_6323525; /* if non-zero -> at least one cpu has it */ #endif +#if defined(OPTERON_ERRATUM_298) +int opteron_erratum_298; +#endif + static void workaround_warning(cpu_t *cp, uint_t erratum) { @@ -677,6 +681,57 @@ #endif uint_t +do_erratum_298(struct cpu *cpu) +{ + static int osvwrc = -3; + extern int osvw_opteron_erratum(cpu_t *, uint_t); + + /* + * L2 Eviction May Occur During Processor Operation To Set + * Accessed or Dirty Bit. + */ + if (osvwrc == -3) { + osvwrc = osvw_opteron_erratum(cpu, 298); + } else { + /* osvw return codes should be consistent for all cpus */ + ASSERT(osvwrc == osvw_opteron_erratum(cpu, 298)); + } + + switch (osvwrc) { + case 0: /* erratum is not present: do nothing */ + break; + case 1: /* erratum is present: BIOS workaround applied */ + /* + * check if workaround is actually in place and issue warning + * if not. + */ + if (((rdmsr(MSR_AMD_HWCR) & AMD_HWCR_TLBCACHEDIS) == 0) || + ((rdmsr(MSR_AMD_BU_CFG) & AMD_BU_CFG_E298) == 0)) { +#if defined(OPTERON_ERRATUM_298) + opteron_erratum_298++; +#else + workaround_warning(cpu, 298); + return (1); +#endif + } + break; + case -1: /* cannot determine via osvw: check cpuid */ + if ((cpuid_opteron_erratum(cpu, 298) > 0) && + (((rdmsr(MSR_AMD_HWCR) & AMD_HWCR_TLBCACHEDIS) == 0) || + ((rdmsr(MSR_AMD_BU_CFG) & AMD_BU_CFG_E298) == 0))) { +#if defined(OPTERON_ERRATUM_298) + opteron_erratum_298++; +#else + workaround_warning(cpu, 298); + return (1); +#endif + } + break; + } + return (0); +} + +uint_t workaround_errata(struct cpu *cpu) { uint_t missing = 0; @@ -1041,7 +1096,7 @@ #if defined(OPTERON_WORKAROUND_6323525) /* * This problem only occurs with 2 or more cores. If bit in - * MSR_BU_CFG set, then not applicable. The workaround + * MSR_AMD_BU_CFG set, then not applicable. The workaround * is to patch the semaphone routines with the lfence * instruction to provide necessary load memory barrier with * possible subsequent read-modify-write ops. @@ -1072,7 +1127,7 @@ #else /* __xpv */ } else if ((x86_feature & X86_SSE2) && ((opteron_get_nnodes() * cpuid_get_ncpu_per_chip(cpu)) > 1)) { - if ((xrdmsr(MSR_BU_CFG) & 0x02) == 0) + if ((xrdmsr(MSR_AMD_BU_CFG) & 0x02) == 0) opteron_workaround_6323525++; #endif /* __xpv */ } @@ -1082,6 +1137,8 @@ #endif } + missing += do_erratum_298(cpu); + #ifdef __xpv return (0); #else @@ -1162,6 +1219,16 @@ if (opteron_workaround_6323525) workaround_applied(6323525); #endif +#if defined(OPTERON_ERRATUM_298) + if (opteron_erratum_298) { + cmn_err(CE_WARN, + "BIOS microcode patch for AMD 64/Opteron(tm)" + " processor\nerratum 298 was not detected; updating your" + " system's BIOS to a version\ncontaining this" + " microcode patch is HIGHLY recommended or erroneous" + " system\noperation may occur.\n"); + } +#endif } static cpuset_t procset;
--- a/usr/src/uts/i86pc/vm/hat_i86.c Thu May 22 12:51:35 2008 -0700 +++ b/usr/src/uts/i86pc/vm/hat_i86.c Thu May 22 13:30:16 2008 -0700 @@ -130,9 +130,23 @@ uint_t use_boot_reserve = 1; /* cleared after early boot process */ uint_t can_steal_post_boot = 0; /* set late in boot to enable stealing */ -/* export 1g page size to user applications if set */ +/* + * enable_1gpg: controls 1g page support for user applications. + * By default, 1g pages are exported to user applications. enable_1gpg can + * be set to 0 to not export. + */ int enable_1gpg = 1; +/* + * AMD shanghai processors provide better management of 1gb ptes in its tlb. + * By default, 1g page suppport will be disabled for pre-shanghai AMD + * processors that don't have optimal tlb support for the 1g page size. + * chk_optimal_1gtlb can be set to 0 to force 1g page support on sub-optimal + * processors. + */ +int chk_optimal_1gtlb = 1; + + #ifdef DEBUG uint_t map1gcnt; #endif @@ -461,6 +475,36 @@ } /* + * + */ +static void +set_max_page_level() +{ + level_t lvl; + + if (!kbm_largepage_support) { + lvl = 0; + } + if (x86_feature & X86_1GPG) { + lvl = 2; + if (chk_optimal_1gtlb && cpuid_opteron_erratum(CPU, 6671130)) { + lvl = 1; + } + if (plat_mnode_xcheck(LEVEL_SIZE(2) >> LEVEL_SHIFT(0))) { + lvl = 1; + } + } else { + lvl = 1; + } + mmu.max_page_level = lvl; + + if ((lvl == 2) && (enable_1gpg == 0)) + mmu.umax_page_level = 1; + else + mmu.umax_page_level = lvl; +} + +/* * Initialize hat data structures based on processor MMU information. */ void @@ -571,24 +615,8 @@ mmu.level_mask[i] = ~mmu.level_offset[i]; } - /* - * Initialize parameters based on the 64 or 32 bit kernels and - * for the 32 bit kernel decide if we should use PAE. - */ - if (kbm_largepage_support) { - - if ((x86_feature & X86_1GPG) && - plat_mnode_xcheck((LEVEL_SIZE(2) >> LEVEL_SHIFT(0))) == 0) { - mmu.max_page_level = 2; - mmu.umax_page_level = (enable_1gpg) ? 2 : 1; - } else { - mmu.max_page_level = 1; - mmu.umax_page_level = 1; - } - } else { - mmu.max_page_level = 0; - mmu.umax_page_level = 0; - } + set_max_page_level(); + mmu_page_sizes = mmu.max_page_level + 1; mmu_exported_page_sizes = mmu.umax_page_level + 1;
--- a/usr/src/uts/intel/sys/controlregs.h Thu May 22 12:51:35 2008 -0700 +++ b/usr/src/uts/intel/sys/controlregs.h Thu May 22 13:30:16 2008 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -196,6 +196,7 @@ #define MSR_AMD_HWCR 0xc0010015 +#define AMD_HWCR_TLBCACHEDIS (UINT64_C(1) << 3) #define AMD_HWCR_FFDIS 0x00040 /* disable TLB Flush Filter */ #define AMD_HWCR_MCI_STATUS_WREN 0x40000 /* enable write of MCi_STATUS */ @@ -203,11 +204,21 @@ #define MSR_AMD_NB_CFG 0xc001001f -#define MSR_BU_CFG 0xc0011023 - #define AMD_NB_CFG_SRQ_HEARTBEAT (UINT64_C(1) << 20) #define AMD_NB_CFG_SRQ_SPR (UINT64_C(1) << 32) +#define MSR_AMD_BU_CFG 0xc0011023 + +#define AMD_BU_CFG_E298 (UINT64_C(1) << 1) + +/* AMD's osvw MSRs */ +#define MSR_AMD_OSVW_ID_LEN 0xc0010140 +#define MSR_AMD_OSVW_STATUS 0xc0010141 + + +#define OSVW_ID_LEN_MASK 0xffffULL +#define OSVW_ID_CNT_PER_MSR 64 + /* * Enable PCI Extended Configuration Space (ECS) on Greyhound */
--- a/usr/src/uts/intel/sys/x86_archext.h Thu May 22 12:51:35 2008 -0700 +++ b/usr/src/uts/intel/sys/x86_archext.h Thu May 22 13:30:16 2008 -0700 @@ -168,9 +168,17 @@ #define CPUID_AMD_ECX_CR8D 0x00000010 /* AMD: 32-bit mov %cr8 */ #define CPUID_AMD_ECX_LZCNT 0x00000020 /* AMD: LZCNT insn */ #define CPUID_AMD_ECX_SSE4A 0x00000040 /* AMD: SSE4A insns */ +#define CPUID_AMD_ECX_MAS 0x00000080 /* AMD: MisAlignSse mnode */ +#define CPUID_AMD_ECX_3DNP 0x00000100 /* AMD: 3DNowPrefectch */ +#define CPUID_AMD_ECX_OSVW 0x00000200 /* AMD: OSVW */ +#define CPUID_AMD_ECX_IBS 0x00000400 /* AMD: IBS */ +#define CPUID_AMD_ECX_SSE5 0x00000800 /* AMD: SSE5 */ +#define CPUID_AMD_ECX_SKINIT 0x00001000 /* AMD: SKINIT */ +#define CPUID_AMD_ECX_WDT 0x00002000 /* AMD: WDT */ #define FMT_CPUID_AMD_ECX \ "\20" \ + "\14wdt\13skinit\12sse5\11ibs\10osvw\93dnp\8mas" \ "\7sse4a\6lzcnt\5cr8d\3svm\2lcmplgcy\1ahf64" /*