changeset 13681:73253247f9e5

2650 AMD family 0x15 PG support Reviewed by: Robert Mustacchi <rm@joyent.com> Reviewed by: Richard Lowe <richlowe@richlowe.net> Approved by: Albert Lee <trisk@nexenta.com>
author Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
date Thu, 03 May 2012 15:56:05 +0200
parents 2bd022a765e2
children e7836650181b
files usr/src/uts/i86pc/os/cpuid.c usr/src/uts/i86pc/os/mp_machdep.c usr/src/uts/intel/sys/x86_archext.h
diffstat 3 files changed, 71 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/i86pc/os/cpuid.c	Thu May 03 05:49:19 2012 -0700
+++ b/usr/src/uts/i86pc/os/cpuid.c	Thu May 03 15:56:05 2012 +0200
@@ -160,7 +160,8 @@
 	"xsave",
 	"avx",
 	"vmx",
-	"svm"
+	"svm",
+	"topoext"
 };
 
 boolean_t
@@ -269,7 +270,7 @@
  */
 
 #define	NMAX_CPI_STD	6		/* eax = 0 .. 5 */
-#define	NMAX_CPI_EXTD	0x1c		/* eax = 0x80000000 .. 0x8000001b */
+#define	NMAX_CPI_EXTD	0x1f		/* eax = 0x80000000 .. 0x8000001e */
 
 /*
  * Some terminology needs to be explained:
@@ -283,6 +284,8 @@
  *    memory controllers, PCI configuration spaces. They are connected
  *    inside the package with Hypertransport links. On single-node
  *    processors, processor node is equivalent to chip/socket/package.
+ *  - Compute Unit: Some AMD processors pair cores in "compute units" that
+ *    share the FPU and the I$ and L2 caches.
  */
 
 struct cpuid_info {
@@ -343,6 +346,8 @@
 	uint_t cpi_procnodeid;		/* AMD: nodeID on HT, Intel: chipid */
 	uint_t cpi_procnodes_per_pkg;	/* AMD: # of nodes in the package */
 					/* Intel: 1 */
+	uint_t cpi_compunitid;		/* AMD: ComputeUnit ID, Intel: coreid */
+	uint_t cpi_cores_per_compunit;	/* AMD: # of cores in the ComputeUnit */
 
 	struct xsave_info cpi_xsave;	/* fn D: xsave/xrestor info */
 };
@@ -727,6 +732,7 @@
 		cpi->cpi_pkgcoreid = 0;
 	}
 	cpi->cpi_procnodeid = cpi->cpi_chipid;
+	cpi->cpi_compunitid = cpi->cpi_coreid;
 }
 
 static void
@@ -736,6 +742,7 @@
 	uint32_t nb_caps_reg;
 	uint_t node2_1;
 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
+	struct cpuid_regs *cp;
 
 	/*
 	 * AMD CMP chips currently have a single thread per core.
@@ -753,9 +760,15 @@
 	 * from 0 regardless of how many or which are disabled, and there
 	 * is no way for operating system to discover the real core id when some
 	 * are disabled.
+	 *
+	 * In family 0x15, the cores come in pairs called compute units. They
+	 * share I$ and L2 caches and the FPU. Enumeration of this feature is
+	 * simplified by the new topology extensions CPUID leaf, indicated by
+	 * the X86 feature X86FSET_TOPOEXT.
 	 */
 
 	cpi->cpi_coreid = cpu->cpu_id;
+	cpi->cpi_compunitid = cpu->cpu_id;
 
 	if (cpi->cpi_xmaxeax >= 0x80000008) {
 
@@ -784,10 +797,21 @@
 	    cpi->cpi_apicid & ((1<<coreidsz) - 1);
 	cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip;
 
-	/* Get nodeID */
-	if (cpi->cpi_family == 0xf) {
+	/* Get node ID, compute unit ID */
+	if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
+	    cpi->cpi_xmaxeax >= 0x8000001e) {
+		cp = &cpi->cpi_extd[0x1e];
+		cp->cp_eax = 0x8000001e;
+		(void) __cpuid_insn(cp);
+
+		cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
+		cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
+		cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1;
+		cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0)
+		    + (cpi->cpi_ncore_per_chip / cpi->cpi_cores_per_compunit)
+		    * (cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg);
+	} else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
 		cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
-		cpi->cpi_chipid = cpi->cpi_procnodeid;
 	} else if (cpi->cpi_family == 0x10) {
 		/*
 		 * See if we are a multi-node processor.
@@ -798,7 +822,6 @@
 			/* Single-node */
 			cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
 			    coreidsz);
-			cpi->cpi_chipid = cpi->cpi_procnodeid;
 		} else {
 
 			/*
@@ -813,7 +836,6 @@
 			if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
 				/* We are BSP */
 				cpi->cpi_procnodeid = (first_half ? 0 : 1);
-				cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
 			} else {
 
 				/* We are AP */
@@ -833,17 +855,14 @@
 				else
 					cpi->cpi_procnodeid = node2_1 +
 					    first_half;
-
-				cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
 			}
 		}
-	} else if (cpi->cpi_family >= 0x11) {
-		cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
-		cpi->cpi_chipid = cpi->cpi_procnodeid;
 	} else {
 		cpi->cpi_procnodeid = 0;
-		cpi->cpi_chipid = cpi->cpi_procnodeid;
 	}
+
+	cpi->cpi_chipid =
+	    cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
 }
 
 /*
@@ -1437,6 +1456,10 @@
 			if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
 				add_x86_feature(featureset, X86FSET_SVM);
 			}
+
+			if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
+				add_x86_feature(featureset, X86FSET_TOPOEXT);
+			}
 			break;
 		default:
 			break;
@@ -1545,6 +1568,7 @@
 
 	cpi->cpi_apicid = CPI_APIC_ID(cpi);
 	cpi->cpi_procnodes_per_pkg = 1;
+	cpi->cpi_cores_per_compunit = 1;
 	if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
 	    is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
 		/*
@@ -1571,6 +1595,7 @@
 			cpi->cpi_coreid = cpi->cpi_chipid;
 			cpi->cpi_pkgcoreid = 0;
 			cpi->cpi_procnodeid = cpi->cpi_chipid;
+			cpi->cpi_compunitid = cpi->cpi_chipid;
 		}
 	}
 
@@ -3004,6 +3029,20 @@
 	return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
 }
 
+uint_t
+cpuid_get_compunitid(cpu_t *cpu)
+{
+	ASSERT(cpuid_checkpass(cpu, 1));
+	return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
+}
+
+uint_t
+cpuid_get_cores_per_compunit(cpu_t *cpu)
+{
+	ASSERT(cpuid_checkpass(cpu, 1));
+	return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
+}
+
 /*ARGSUSED*/
 int
 cpuid_have_cr8access(cpu_t *cpu)
--- a/usr/src/uts/i86pc/os/mp_machdep.c	Thu May 03 05:49:19 2012 -0700
+++ b/usr/src/uts/i86pc/os/mp_machdep.c	Thu May 03 15:56:05 2012 +0200
@@ -245,6 +245,11 @@
 		} else {
 			return (0);
 		}
+	case PGHW_FPU:
+		if (cpuid_get_cores_per_compunit(cp) > 1)
+			return (1);
+		else
+			return (0);
 	case PGHW_PROCNODE:
 		if (cpuid_get_procnodes_per_pkg(cp) > 1)
 			return (1);
@@ -306,6 +311,8 @@
 		return (cpuid_get_coreid(cpu));
 	case PGHW_CACHE:
 		return (cpuid_get_last_lvl_cacheid(cpu));
+	case PGHW_FPU:
+		return (cpuid_get_compunitid(cpu));
 	case PGHW_PROCNODE:
 		return (cpuid_get_procnodeid(cpu));
 	case PGHW_CHIP:
@@ -331,6 +338,7 @@
 	static pghw_type_t hw_hier[] = {
 		PGHW_IPIPE,
 		PGHW_CACHE,
+		PGHW_FPU,
 		PGHW_PROCNODE,
 		PGHW_CHIP,
 		PGHW_POW_IDLE,
@@ -361,8 +369,13 @@
 	/*
 	 * For shared caches, also load balance across them to
 	 * maximize aggregate cache capacity
+	 *
+	 * On AMD family 0x15 CPUs, cores come in pairs called
+	 * compute units, sharing the FPU and the I$ and L2
+	 * caches. Use balancing and cache affinity.
 	 */
 	switch (hw) {
+	case PGHW_FPU:
 	case PGHW_CACHE:
 		return (CMT_BALANCE|CMT_AFFINITY);
 	default:
--- a/usr/src/uts/intel/sys/x86_archext.h	Thu May 03 05:49:19 2012 -0700
+++ b/usr/src/uts/intel/sys/x86_archext.h	Thu May 03 15:56:05 2012 +0200
@@ -187,9 +187,11 @@
 #define	CPUID_AMD_ECX_SSE5	0x00000800	/* AMD: SSE5 */
 #define	CPUID_AMD_ECX_SKINIT	0x00001000	/* AMD: SKINIT */
 #define	CPUID_AMD_ECX_WDT	0x00002000	/* AMD: WDT */
+#define	CPUID_AMD_ECX_TOPOEXT	0x00400000	/* AMD: Topology Extensions */
 
 #define	FMT_CPUID_AMD_ECX					\
 	"\20"							\
+	"\22topoext"						\
 	"\14wdt\13skinit\12sse5\11ibs\10osvw\93dnp\8mas"	\
 	"\7sse4a\6lzcnt\5cr8d\3svm\2lcmplgcy\1ahf64"
 
@@ -368,6 +370,7 @@
 #define	X86FSET_AVX		34
 #define	X86FSET_VMX		35
 #define	X86FSET_SVM		36
+#define	X86FSET_TOPOEXT		37
 
 /*
  * flags to patch tsc_read routine.
@@ -591,7 +594,7 @@
 
 #if defined(_KERNEL) || defined(_KMEMUSER)
 
-#define	NUM_X86_FEATURES	37
+#define	NUM_X86_FEATURES	38
 extern uchar_t x86_featureset[];
 
 extern void free_x86_featureset(void *featureset);
@@ -676,6 +679,8 @@
 extern uint32_t cpuid_get_apicid(struct cpu *);
 extern uint_t cpuid_get_procnodeid(struct cpu *cpu);
 extern uint_t cpuid_get_procnodes_per_pkg(struct cpu *cpu);
+extern uint_t cpuid_get_compunitid(struct cpu *cpu);
+extern uint_t cpuid_get_cores_per_compunit(struct cpu *cpu);
 extern int cpuid_is_cmt(struct cpu *);
 extern int cpuid_syscall32_insn(struct cpu *);
 extern int getl2cacheinfo(struct cpu *, int *, int *, int *);