Mercurial > illumos > illumos-gate
changeset 13681:73253247f9e5
2650 AMD family 0x15 PG support
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Albert Lee <trisk@nexenta.com>
author | Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> |
---|---|
date | Thu, 03 May 2012 15:56:05 +0200 |
parents | 2bd022a765e2 |
children | e7836650181b |
files | usr/src/uts/i86pc/os/cpuid.c usr/src/uts/i86pc/os/mp_machdep.c usr/src/uts/intel/sys/x86_archext.h |
diffstat | 3 files changed, 71 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/i86pc/os/cpuid.c Thu May 03 05:49:19 2012 -0700 +++ b/usr/src/uts/i86pc/os/cpuid.c Thu May 03 15:56:05 2012 +0200 @@ -160,7 +160,8 @@ "xsave", "avx", "vmx", - "svm" + "svm", + "topoext" }; boolean_t @@ -269,7 +270,7 @@ */ #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ -#define NMAX_CPI_EXTD 0x1c /* eax = 0x80000000 .. 0x8000001b */ +#define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */ /* * Some terminology needs to be explained: @@ -283,6 +284,8 @@ * memory controllers, PCI configuration spaces. They are connected * inside the package with Hypertransport links. On single-node * processors, processor node is equivalent to chip/socket/package. + * - Compute Unit: Some AMD processors pair cores in "compute units" that + * share the FPU and the I$ and L2 caches. */ struct cpuid_info { @@ -343,6 +346,8 @@ uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */ uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */ /* Intel: 1 */ + uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */ + uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */ struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */ }; @@ -727,6 +732,7 @@ cpi->cpi_pkgcoreid = 0; } cpi->cpi_procnodeid = cpi->cpi_chipid; + cpi->cpi_compunitid = cpi->cpi_coreid; } static void @@ -736,6 +742,7 @@ uint32_t nb_caps_reg; uint_t node2_1; struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; + struct cpuid_regs *cp; /* * AMD CMP chips currently have a single thread per core. @@ -753,9 +760,15 @@ * from 0 regardless of how many or which are disabled, and there * is no way for operating system to discover the real core id when some * are disabled. + * + * In family 0x15, the cores come in pairs called compute units. They + * share I$ and L2 caches and the FPU. Enumeration of this feature is + * simplified by the new topology extensions CPUID leaf, indicated by + * the X86 feature X86FSET_TOPOEXT. */ cpi->cpi_coreid = cpu->cpu_id; + cpi->cpi_compunitid = cpu->cpu_id; if (cpi->cpi_xmaxeax >= 0x80000008) { @@ -784,10 +797,21 @@ cpi->cpi_apicid & ((1<<coreidsz) - 1); cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip; - /* Get nodeID */ - if (cpi->cpi_family == 0xf) { + /* Get node ID, compute unit ID */ + if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && + cpi->cpi_xmaxeax >= 0x8000001e) { + cp = &cpi->cpi_extd[0x1e]; + cp->cp_eax = 0x8000001e; + (void) __cpuid_insn(cp); + + cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1; + cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0); + cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1; + cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) + + (cpi->cpi_ncore_per_chip / cpi->cpi_cores_per_compunit) + * (cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg); + } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) { cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; - cpi->cpi_chipid = cpi->cpi_procnodeid; } else if (cpi->cpi_family == 0x10) { /* * See if we are a multi-node processor. @@ -798,7 +822,6 @@ /* Single-node */ cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5, coreidsz); - cpi->cpi_chipid = cpi->cpi_procnodeid; } else { /* @@ -813,7 +836,6 @@ if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) { /* We are BSP */ cpi->cpi_procnodeid = (first_half ? 0 : 1); - cpi->cpi_chipid = cpi->cpi_procnodeid >> 1; } else { /* We are AP */ @@ -833,17 +855,14 @@ else cpi->cpi_procnodeid = node2_1 + first_half; - - cpi->cpi_chipid = cpi->cpi_procnodeid >> 1; } } - } else if (cpi->cpi_family >= 0x11) { - cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; - cpi->cpi_chipid = cpi->cpi_procnodeid; } else { cpi->cpi_procnodeid = 0; - cpi->cpi_chipid = cpi->cpi_procnodeid; } + + cpi->cpi_chipid = + cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg; } /* @@ -1437,6 +1456,10 @@ if (cp->cp_ecx & CPUID_AMD_ECX_SVM) { add_x86_feature(featureset, X86FSET_SVM); } + + if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) { + add_x86_feature(featureset, X86FSET_TOPOEXT); + } break; default: break; @@ -1545,6 +1568,7 @@ cpi->cpi_apicid = CPI_APIC_ID(cpi); cpi->cpi_procnodes_per_pkg = 1; + cpi->cpi_cores_per_compunit = 1; if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE && is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) { /* @@ -1571,6 +1595,7 @@ cpi->cpi_coreid = cpi->cpi_chipid; cpi->cpi_pkgcoreid = 0; cpi->cpi_procnodeid = cpi->cpi_chipid; + cpi->cpi_compunitid = cpi->cpi_chipid; } } @@ -3004,6 +3029,20 @@ return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg); } +uint_t +cpuid_get_compunitid(cpu_t *cpu) +{ + ASSERT(cpuid_checkpass(cpu, 1)); + return (cpu->cpu_m.mcpu_cpi->cpi_compunitid); +} + +uint_t +cpuid_get_cores_per_compunit(cpu_t *cpu) +{ + ASSERT(cpuid_checkpass(cpu, 1)); + return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit); +} + /*ARGSUSED*/ int cpuid_have_cr8access(cpu_t *cpu)
--- a/usr/src/uts/i86pc/os/mp_machdep.c Thu May 03 05:49:19 2012 -0700 +++ b/usr/src/uts/i86pc/os/mp_machdep.c Thu May 03 15:56:05 2012 +0200 @@ -245,6 +245,11 @@ } else { return (0); } + case PGHW_FPU: + if (cpuid_get_cores_per_compunit(cp) > 1) + return (1); + else + return (0); case PGHW_PROCNODE: if (cpuid_get_procnodes_per_pkg(cp) > 1) return (1); @@ -306,6 +311,8 @@ return (cpuid_get_coreid(cpu)); case PGHW_CACHE: return (cpuid_get_last_lvl_cacheid(cpu)); + case PGHW_FPU: + return (cpuid_get_compunitid(cpu)); case PGHW_PROCNODE: return (cpuid_get_procnodeid(cpu)); case PGHW_CHIP: @@ -331,6 +338,7 @@ static pghw_type_t hw_hier[] = { PGHW_IPIPE, PGHW_CACHE, + PGHW_FPU, PGHW_PROCNODE, PGHW_CHIP, PGHW_POW_IDLE, @@ -361,8 +369,13 @@ /* * For shared caches, also load balance across them to * maximize aggregate cache capacity + * + * On AMD family 0x15 CPUs, cores come in pairs called + * compute units, sharing the FPU and the I$ and L2 + * caches. Use balancing and cache affinity. */ switch (hw) { + case PGHW_FPU: case PGHW_CACHE: return (CMT_BALANCE|CMT_AFFINITY); default:
--- a/usr/src/uts/intel/sys/x86_archext.h Thu May 03 05:49:19 2012 -0700 +++ b/usr/src/uts/intel/sys/x86_archext.h Thu May 03 15:56:05 2012 +0200 @@ -187,9 +187,11 @@ #define CPUID_AMD_ECX_SSE5 0x00000800 /* AMD: SSE5 */ #define CPUID_AMD_ECX_SKINIT 0x00001000 /* AMD: SKINIT */ #define CPUID_AMD_ECX_WDT 0x00002000 /* AMD: WDT */ +#define CPUID_AMD_ECX_TOPOEXT 0x00400000 /* AMD: Topology Extensions */ #define FMT_CPUID_AMD_ECX \ "\20" \ + "\22topoext" \ "\14wdt\13skinit\12sse5\11ibs\10osvw\93dnp\8mas" \ "\7sse4a\6lzcnt\5cr8d\3svm\2lcmplgcy\1ahf64" @@ -368,6 +370,7 @@ #define X86FSET_AVX 34 #define X86FSET_VMX 35 #define X86FSET_SVM 36 +#define X86FSET_TOPOEXT 37 /* * flags to patch tsc_read routine. @@ -591,7 +594,7 @@ #if defined(_KERNEL) || defined(_KMEMUSER) -#define NUM_X86_FEATURES 37 +#define NUM_X86_FEATURES 38 extern uchar_t x86_featureset[]; extern void free_x86_featureset(void *featureset); @@ -676,6 +679,8 @@ extern uint32_t cpuid_get_apicid(struct cpu *); extern uint_t cpuid_get_procnodeid(struct cpu *cpu); extern uint_t cpuid_get_procnodes_per_pkg(struct cpu *cpu); +extern uint_t cpuid_get_compunitid(struct cpu *cpu); +extern uint_t cpuid_get_cores_per_compunit(struct cpu *cpu); extern int cpuid_is_cmt(struct cpu *); extern int cpuid_syscall32_insn(struct cpu *); extern int getl2cacheinfo(struct cpu *, int *, int *, int *);