changeset 13146:8315ff49e22e

6958308 XSAVE/XRSTOR mechanism to save and restore processor state Contributed by Lejun Zhu <lejun.zhu@intel.com>
author Kuriakose Kuruvilla <kuriakose.kuruvilla@oracle.com>
date Mon, 16 Aug 2010 19:36:08 -0700
parents 83213fd85699
children 9f04f32f7f45
files usr/src/common/elfcap/elfcap.c usr/src/common/elfcap/elfcap.h usr/src/uts/common/disp/thread.c usr/src/uts/common/sys/auxv_386.h usr/src/uts/i86pc/ml/genassym.c usr/src/uts/i86pc/ml/offsets.in usr/src/uts/i86pc/os/cpr_impl.c usr/src/uts/i86pc/os/cpuid.c usr/src/uts/i86pc/os/fpu_subr.c usr/src/uts/i86pc/os/mp_startup.c usr/src/uts/i86pc/os/startup.c usr/src/uts/intel/ia32/ml/exception.s usr/src/uts/intel/ia32/ml/float.s usr/src/uts/intel/ia32/ml/i86_subr.s usr/src/uts/intel/ia32/os/archdep.c usr/src/uts/intel/ia32/os/fpu.c usr/src/uts/intel/ia32/os/sysi86.c usr/src/uts/intel/sys/archsystm.h usr/src/uts/intel/sys/controlregs.h usr/src/uts/intel/sys/fp.h usr/src/uts/intel/sys/pcb.h usr/src/uts/intel/sys/regset.h usr/src/uts/intel/sys/x86_archext.h
diffstat 23 files changed, 942 insertions(+), 191 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/common/elfcap/elfcap.c	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/common/elfcap/elfcap.c	Mon Aug 16 19:36:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /* LINTLIBRARY */
@@ -288,6 +287,14 @@
 	{						/* 0x08000000 */
 		AV_386_PCLMULQDQ, STRDESC("AV_386_PCLMULQDQ"),
 		STRDESC("PCLMULQDQ"), STRDESC("pclmulqdq"),
+	},
+	{						/* 0x10000000 */
+		AV_386_XSAVE, STRDESC("AV_386_XSAVE"),
+		STRDESC("XSAVE"), STRDESC("xsave"),
+	},
+	{						/* 0x20000000 */
+		AV_386_AVX, STRDESC("AV_386_AVX"),
+		STRDESC("AVX"), STRDESC("avx"),
 	}
 };
 
--- a/usr/src/common/elfcap/elfcap.h	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/common/elfcap/elfcap.h	Mon Aug 16 19:36:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _ELFCAP_DOT_H
@@ -114,7 +113,7 @@
  */
 #define	ELFCAP_NUM_SF1			3
 #define	ELFCAP_NUM_HW1_SPARC		17
-#define	ELFCAP_NUM_HW1_386		28
+#define	ELFCAP_NUM_HW1_386		30
 
 
 /*
--- a/usr/src/uts/common/disp/thread.c	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/common/disp/thread.c	Mon Aug 16 19:36:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -186,11 +185,11 @@
 
 	/*
 	 * "struct _klwp" includes a "struct pcb", which includes a
-	 * "struct fpu", which needs to be 16-byte aligned on amd64
-	 * (and even on i386 for fxsave/fxrstor).
+	 * "struct fpu", which needs to be 64-byte aligned on amd64
+	 * (and even on i386) for xsave/xrstor.
 	 */
 	lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
-	    16, NULL, NULL, NULL, NULL, NULL, 0);
+	    64, NULL, NULL, NULL, NULL, NULL, 0);
 #else
 	/*
 	 * Allocate thread structures from static_arena.  This prevents
--- a/usr/src/uts/common/sys/auxv_386.h	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/common/sys/auxv_386.h	Mon Aug 16 19:36:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_AUXV_386_H
@@ -68,9 +67,12 @@
 #define	AV_386_MOVBE		0x2000000 /* Intel MOVBE insns */
 #define	AV_386_AES		0x4000000 /* Intel AES insns */
 #define	AV_386_PCLMULQDQ	0x8000000 /* Intel PCLMULQDQ insn */
+#define	AV_386_XSAVE		0x10000000 /* Intel XSAVE/XRSTOR insns */
+#define	AV_386_AVX		0x20000000 /* Intel AVX insns */
 
 #define	FMT_AV_386							\
 	"\20"								\
+	"\36avx\35xsave"						\
 	"\34pclmulqdq\33aes"						\
 	"\32movbe\31sse4.2"						\
 	"\30sse4.1\27ssse3\26amd_lzcnt\25popcnt"			\
--- a/usr/src/uts/i86pc/ml/genassym.c	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/i86pc/ml/genassym.c	Mon Aug 16 19:36:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_GENASSYM
@@ -123,6 +122,10 @@
 	printf("#define\tFP_387 0x%x\n", FP_387);
 	printf("#define\t__FP_SSE 0x%x\n", __FP_SSE);
 
+	printf("#define\tFP_FNSAVE 0x%x\n", FP_FNSAVE);
+	printf("#define\tFP_FXSAVE 0x%x\n", FP_FXSAVE);
+	printf("#define\tFP_XSAVE 0x%x\n", FP_XSAVE);
+
 	printf("#define\tAV_INT_SPURIOUS 0x%x\n", AV_INT_SPURIOUS);
 
 	printf("#define\tCPU_READY 0x%x\n", CPU_READY);
--- a/usr/src/uts/i86pc/ml/offsets.in	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/i86pc/ml/offsets.in	Mon Aug 16 19:36:08 2010 -0700
@@ -165,6 +165,7 @@
 fpu_ctx
 	fpu_regs		FPU_CTX_FPU_REGS
 	fpu_flags		FPU_CTX_FPU_FLAGS
+	fpu_xsave_mask		FPU_CTX_FPU_XSAVE_MASK
 
 fxsave_state	FXSAVE_STATE_SIZE
 	fx_fsw			FXSAVE_STATE_FSW
--- a/usr/src/uts/i86pc/os/cpr_impl.c	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/i86pc/os/cpr_impl.c	Mon Aug 16 19:36:08 2010 -0700
@@ -65,6 +65,7 @@
 #include <sys/reboot.h>
 #include <sys/acpi/acpi.h>
 #include <sys/acpica.h>
+#include <sys/fp.h>
 
 #define	AFMT	"%lx"
 
@@ -944,6 +945,13 @@
 		pat_sync();
 
 	/*
+	 * If we use XSAVE, we need to restore XFEATURE_ENABLE_MASK register.
+	 */
+	if (fp_save_mech == FP_XSAVE) {
+		setup_xfem();
+	}
+
+	/*
 	 * Initialize this CPU's syscall handlers
 	 */
 	init_cpu_syscall(cp);
--- a/usr/src/uts/i86pc/os/cpuid.c	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/i86pc/os/cpuid.c	Mon Aug 16 19:36:08 2010 -0700
@@ -118,7 +118,7 @@
 uint_t pentiumpro_bug4046376;
 uint_t pentiumpro_bug4064495;
 
-#define	NUM_X86_FEATURES	33
+#define	NUM_X86_FEATURES	35
 void    *x86_featureset;
 ulong_t x86_featureset0[BT_SIZEOFMAP(NUM_X86_FEATURES)];
 
@@ -155,7 +155,9 @@
 	"clfsh",
 	"64",
 	"aes",
-	"pclmulqdq" };
+	"pclmulqdq",
+	"xsave",
+	"avx" };
 
 static void *
 init_x86_featureset(void)
@@ -217,6 +219,11 @@
 }
 
 uint_t enable486;
+
+static size_t xsave_state_size = 0;
+uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
+boolean_t xsave_force_disable = B_FALSE;
+
 /*
  * This is set to platform type Solaris is running on.
  */
@@ -247,6 +254,23 @@
 };
 
 /*
+ * xsave/xrestor info.
+ *
+ * This structure contains HW feature bits and size of the xsave save area.
+ * Note: the kernel will use the maximum size required for all hardware
+ * features. It is not optimize for potential memory savings if features at
+ * the end of the save area are not enabled.
+ */
+struct xsave_info {
+	uint32_t	xsav_hw_features_low;   /* Supported HW features */
+	uint32_t	xsav_hw_features_high;  /* Supported HW features */
+	size_t		xsav_max_size;  /* max size save area for HW features */
+	size_t		ymm_size;	/* AVX: size of ymm save area */
+	size_t		ymm_offset;	/* AVX: offset for ymm save area */
+};
+
+
+/*
  * These constants determine how many of the elements of the
  * cpuid we cache in the cpuid_info data structure; the
  * remaining elements are accessible via the cpuid instruction.
@@ -327,6 +351,8 @@
 	uint_t cpi_procnodeid;		/* AMD: nodeID on HT, Intel: chipid */
 	uint_t cpi_procnodes_per_pkg;	/* AMD: # of nodes in the package */
 					/* Intel: 1 */
+
+	struct xsave_info cpi_xsave;	/* fn D: xsave/xrestor info */
 };
 
 
@@ -429,6 +455,12 @@
 	BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
 
 /*
+ * XSAVE leaf 0xD enumeration
+ */
+#define	CPUID_LEAFD_2_YMM_OFFSET	576
+#define	CPUID_LEAFD_2_YMM_SIZE		256
+
+/*
  * Functions we consune from cpuid_subr.c;  don't publish these in a header
  * file to try and keep people using the expected cpuid_* interfaces.
  */
@@ -815,6 +847,27 @@
 	}
 }
 
+/*
+ * Setup XFeature_Enabled_Mask register. Required by xsave feature.
+ */
+void
+setup_xfem(void)
+{
+	uint64_t flags = XFEATURE_LEGACY_FP;
+
+	ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
+
+	if (is_x86_feature(x86_featureset, X86FSET_SSE))
+		flags |= XFEATURE_SSE;
+
+	if (is_x86_feature(x86_featureset, X86FSET_AVX))
+		flags |= XFEATURE_AVX;
+
+	set_xcr(XFEATURE_ENABLED_MASK, flags);
+
+	xsave_bv_all = flags;
+}
+
 void *
 cpuid_pass1(cpu_t *cpu)
 {
@@ -827,7 +880,6 @@
 	extern int idle_cpu_prefer_mwait;
 #endif
 
-
 #if !defined(__xpv)
 	determine_platform();
 #endif
@@ -1082,8 +1134,18 @@
 	 * Do not support MONITOR/MWAIT under a hypervisor
 	 */
 	mask_ecx &= ~CPUID_INTC_ECX_MON;
+	/*
+	 * Do not support XSAVE under a hypervisor for now
+	 */
+	xsave_force_disable = B_TRUE;
+
 #endif	/* __xpv */
 
+	if (xsave_force_disable) {
+		mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
+		mask_ecx &= ~CPUID_INTC_ECX_AVX;
+	}
+
 	/*
 	 * Now we've figured out the masks that determine
 	 * which bits we choose to believe, apply the masks
@@ -1180,6 +1242,15 @@
 			if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
 				add_x86_feature(featureset, X86FSET_PCLMULQDQ);
 			}
+
+			if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
+				add_x86_feature(featureset, X86FSET_XSAVE);
+				/* We only test AVX when there is XSAVE */
+				if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
+					add_x86_feature(featureset,
+					    X86FSET_AVX);
+				}
+			}
 		}
 	}
 	if (cp->cp_edx & CPUID_INTC_EDX_DE) {
@@ -1724,6 +1795,92 @@
 		cp = NULL;
 	}
 
+	/*
+	 * XSAVE enumeration
+	 */
+	if (cpi->cpi_maxeax >= 0xD && cpi->cpi_vendor == X86_VENDOR_Intel) {
+		struct cpuid_regs regs;
+		boolean_t cpuid_d_valid = B_TRUE;
+
+		cp = &regs;
+		cp->cp_eax = 0xD;
+		cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
+
+		(void) __cpuid_insn(cp);
+
+		/*
+		 * Sanity checks for debug
+		 */
+		if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
+		    (cp->cp_eax & XFEATURE_SSE) == 0) {
+			cpuid_d_valid = B_FALSE;
+		}
+
+		cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
+		cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
+		cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
+
+		/*
+		 * If the hw supports AVX, get the size and offset in the save
+		 * area for the ymm state.
+		 */
+		if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
+			cp->cp_eax = 0xD;
+			cp->cp_ecx = 2;
+			cp->cp_edx = cp->cp_ebx = 0;
+
+			(void) __cpuid_insn(cp);
+
+			if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
+			    cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
+				cpuid_d_valid = B_FALSE;
+			}
+
+			cpi->cpi_xsave.ymm_size = cp->cp_eax;
+			cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
+		}
+
+		if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
+			xsave_state_size = 0;
+		} else if (cpuid_d_valid) {
+			xsave_state_size = cpi->cpi_xsave.xsav_max_size;
+		} else {
+			/* Broken CPUID 0xD, probably in HVM */
+			cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
+			    "value: hw_low = %d, hw_high = %d, xsave_size = %d"
+			    ", ymm_size = %d, ymm_offset = %d\n",
+			    cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
+			    cpi->cpi_xsave.xsav_hw_features_high,
+			    (int)cpi->cpi_xsave.xsav_max_size,
+			    (int)cpi->cpi_xsave.ymm_size,
+			    (int)cpi->cpi_xsave.ymm_offset);
+
+			if (xsave_state_size != 0) {
+				/*
+				 * This must be a non-boot CPU. We cannot
+				 * continue, because boot cpu has already
+				 * enabled XSAVE.
+				 */
+				ASSERT(cpu->cpu_id != 0);
+				cmn_err(CE_PANIC, "cpu%d: we have already "
+				    "enabled XSAVE on boot cpu, cannot "
+				    "continue.", cpu->cpu_id);
+			} else {
+				/*
+				 * Must be from boot CPU, OK to disable XSAVE.
+				 */
+				ASSERT(cpu->cpu_id == 0);
+				remove_x86_feature(x86_featureset,
+				    X86FSET_XSAVE);
+				remove_x86_feature(x86_featureset, X86FSET_AVX);
+				CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_XSAVE;
+				CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_AVX;
+				xsave_force_disable = B_TRUE;
+			}
+		}
+	}
+
+
 	if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
 		goto pass2_done;
 
@@ -2386,6 +2543,11 @@
 				*ecx &= ~CPUID_INTC_ECX_AES;
 			if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
 				*ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
+			if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
+				*ecx &= ~(CPUID_INTC_ECX_XSAVE |
+				    CPUID_INTC_ECX_OSXSAVE);
+			if (!is_x86_feature(x86_featureset, X86FSET_AVX))
+				*ecx &= ~CPUID_INTC_ECX_AVX;
 		}
 
 		/*
@@ -2419,6 +2581,9 @@
 				hwcap_flags |= AV_386_AES;
 			if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
 				hwcap_flags |= AV_386_PCLMULQDQ;
+			if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
+			    (*ecx & CPUID_INTC_ECX_OSXSAVE))
+				hwcap_flags |= AV_386_XSAVE;
 		}
 		if (*ecx & CPUID_INTC_ECX_POPCNT)
 			hwcap_flags |= AV_386_POPCNT;
@@ -4273,6 +4438,31 @@
 }
 
 /*
+ * Setup necessary registers to enable XSAVE feature on this processor.
+ * This function needs to be called early enough, so that no xsave/xrstor
+ * ops will execute on the processor before the MSRs are properly set up.
+ *
+ * Current implementation has the following assumption:
+ * - cpuid_pass1() is done, so that X86 features are known.
+ * - fpu_probe() is done, so that fp_save_mech is chosen.
+ */
+void
+xsave_setup_msr(cpu_t *cpu)
+{
+	ASSERT(fp_save_mech == FP_XSAVE);
+	ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
+
+	/* Enable OSXSAVE in CR4. */
+	setcr4(getcr4() | CR4_OSXSAVE);
+	/*
+	 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
+	 * correct value.
+	 */
+	cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
+	setup_xfem();
+}
+
+/*
  * Starting with the Westmere processor the local
  * APIC timer will continue running in all C-states,
  * including the deepest C-states.
--- a/usr/src/uts/i86pc/os/fpu_subr.c	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/i86pc/os/fpu_subr.c	Mon Aug 16 19:36:08 2010 -0700
@@ -48,6 +48,15 @@
 int fp_kind = FP_387;
 
 /*
+ * Mechanism to save FPU state.
+ */
+#if defined(__amd64)
+int fp_save_mech = FP_FXSAVE;
+#elif defined(__i386)
+int fp_save_mech = FP_FNSAVE;
+#endif
+
+/*
  * The variable fpu_ignored is provided to allow other code to
  * determine whether emulation is being done because there is
  * no FPU or because of an override requested via /etc/system.
@@ -141,8 +150,20 @@
 		 */
 		if (is_x86_feature(x86_featureset, X86FSET_SSE) &&
 		    is_x86_feature(x86_featureset, X86FSET_SSE2)) {
-			fp_kind = __FP_SSE;
+			fp_kind |= __FP_SSE;
 			ENABLE_SSE();
+
+			if (is_x86_feature(x86_featureset, X86FSET_AVX)) {
+				ASSERT(is_x86_feature(x86_featureset,
+				    X86FSET_XSAVE));
+				fp_kind |= __FP_AVX;
+			}
+
+			if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
+				fp_save_mech = FP_XSAVE;
+				fpsave_ctxt = xsave_ctxt;
+				patch_xsave();
+			}
 		}
 #elif defined(__i386)
 		/*
@@ -150,15 +171,37 @@
 		 * code to exploit it when present.
 		 */
 		if (is_x86_feature(x86_featureset, X86FSET_SSE)) {
-			fp_kind = __FP_SSE;
+			fp_kind |= __FP_SSE;
+			ENABLE_SSE();
+			fp_save_mech = FP_FXSAVE;
 			fpsave_ctxt = fpxsave_ctxt;
-			patch_sse();
-			if (is_x86_feature(x86_featureset, X86FSET_SSE2))
+
+			if (is_x86_feature(x86_featureset, X86FSET_SSE2)) {
 				patch_sse2();
-			ENABLE_SSE();
+			}
+
+			if (is_x86_feature(x86_featureset, X86FSET_AVX)) {
+				ASSERT(is_x86_feature(x86_featureset,
+				    X86FSET_XSAVE));
+				fp_kind |= __FP_AVX;
+			}
+
+			if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
+				fp_save_mech = FP_XSAVE;
+				fpsave_ctxt = xsave_ctxt;
+				patch_xsave();
+			} else {
+				patch_sse();	/* use fxrstor */
+			}
 		} else {
 			remove_x86_feature(x86_featureset, X86FSET_SSE2);
 			/*
+			 * We will not likely to have a chip with AVX but not
+			 * SSE. But to be safe we disable AVX if SSE is not
+			 * enabled.
+			 */
+			remove_x86_feature(x86_featureset, X86FSET_AVX);
+			/*
 			 * (Just in case the BIOS decided we wanted SSE
 			 * enabled when we didn't. See 4965674.)
 			 */
@@ -169,7 +212,7 @@
 			use_sse_pagecopy = use_sse_pagezero = use_sse_copy = 1;
 		}
 
-		if (fp_kind == __FP_SSE) {
+		if (fp_kind & __FP_SSE) {
 			struct fxsave_state *fx;
 			uint8_t fxsave_state[sizeof (struct fxsave_state) +
 			    XMM_ALIGN];
--- a/usr/src/uts/i86pc/os/mp_startup.c	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/i86pc/os/mp_startup.c	Mon Aug 16 19:36:08 2010 -0700
@@ -1711,6 +1711,13 @@
 	 */
 	cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED);
 
+	/*
+	 * Setup this processor for XSAVE.
+	 */
+	if (fp_save_mech == FP_XSAVE) {
+		xsave_setup_msr(cp);
+	}
+
 	cpuid_pass2(cp);
 	cpuid_pass3(cp);
 	(void) cpuid_pass4(cp);
--- a/usr/src/uts/i86pc/os/startup.c	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/i86pc/os/startup.c	Mon Aug 16 19:36:08 2010 -0700
@@ -2193,6 +2193,13 @@
 	PRM_POINT("configure() done");
 
 	/*
+	 * We can now setup for XSAVE because fpu_probe is done in configure().
+	 */
+	if (fp_save_mech == FP_XSAVE) {
+		xsave_setup_msr(CPU);
+	}
+
+	/*
 	 * Set the isa_list string to the defined instruction sets we
 	 * support.
 	 */
--- a/usr/src/uts/intel/ia32/ml/exception.s	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/intel/ia32/ml/exception.s	Mon Aug 16 19:36:08 2010 -0700
@@ -1,6 +1,5 @@
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -637,30 +636,36 @@
 	 * after a context switch -- we do the frequent path in ndptrap_frstor
 	 * below; for all other cases, we let the trap code handle it
 	 */
-	LOADCPU(%rbx)			/* swapgs handled in hypervisor */
+	LOADCPU(%rax)			/* swapgs handled in hypervisor */
 	cmpl	$0, fpu_exists(%rip)
 	je	.handle_in_trap		/* let trap handle no fp case */
-	movq	CPU_THREAD(%rbx), %r15	/* %r15 = curthread */
-	movl	$FPU_EN, %ebx
-	movq	T_LWP(%r15), %r15	/* %r15 = lwp */
-	testq	%r15, %r15
+	movq	CPU_THREAD(%rax), %rbx	/* %rbx = curthread */
+	movl	$FPU_EN, %eax
+	movq	T_LWP(%rbx), %rbx	/* %rbx = lwp */
+	testq	%rbx, %rbx
 	jz	.handle_in_trap		/* should not happen? */
 #if LWP_PCB_FPU	!= 0
-	addq	$LWP_PCB_FPU, %r15	/* &lwp->lwp_pcb.pcb_fpu */
+	addq	$LWP_PCB_FPU, %rbx	/* &lwp->lwp_pcb.pcb_fpu */
 #endif
-	testl	%ebx, PCB_FPU_FLAGS(%r15)
+	testl	%eax, PCB_FPU_FLAGS(%rbx)
 	jz	.handle_in_trap		/* must be the first fault */
 	CLTS
-	andl	$_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%r15)
+	andl	$_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rbx)
 #if FPU_CTX_FPU_REGS != 0
-	addq	$FPU_CTX_FPU_REGS, %r15
+	addq	$FPU_CTX_FPU_REGS, %rbx
 #endif
+
+	movl	FPU_CTX_FPU_XSAVE_MASK(%rbx), %eax	/* for xrstor */
+	movl	FPU_CTX_FPU_XSAVE_MASK+4(%rbx), %edx	/* for xrstor */
+
 	/*
 	 * the label below is used in trap.c to detect FP faults in
 	 * kernel due to user fault.
 	 */
 	ALTENTRY(ndptrap_frstor)
-	FXRSTORQ	((%r15))
+	.globl  _patch_xrstorq_rbx
+_patch_xrstorq_rbx:
+	FXRSTORQ	((%rbx))
 	cmpw	$KCS_SEL, REGOFF_CS(%rsp)
 	je	.return_to_kernel
 
@@ -694,42 +699,56 @@
 	pushq	%rbx
 	cmpw    $KCS_SEL, 24(%rsp)	/* did we come from kernel mode? */
 	jne     1f
-	LOADCPU(%rbx)			/* if yes, don't swapgs */
+	LOADCPU(%rax)			/* if yes, don't swapgs */
 	jmp	2f
-1:	
+1:
 	SWAPGS				/* if from user, need swapgs */
-	LOADCPU(%rbx)
+	LOADCPU(%rax)
 	SWAPGS
-2:				
+2:	
+	/*
+	 * Xrstor needs to use edx as part of its flag.
+	 * NOTE: have to push rdx after "cmpw ...24(%rsp)", otherwise rsp+$24
+	 * will not point to CS.
+	 */
+	pushq	%rdx
 	cmpl	$0, fpu_exists(%rip)
 	je	.handle_in_trap		/* let trap handle no fp case */
-	movq	CPU_THREAD(%rbx), %rax	/* %rax = curthread */
-	movl	$FPU_EN, %ebx
-	movq	T_LWP(%rax), %rax	/* %rax = lwp */
-	testq	%rax, %rax
+	movq	CPU_THREAD(%rax), %rbx	/* %rbx = curthread */
+	movl	$FPU_EN, %eax
+	movq	T_LWP(%rbx), %rbx	/* %rbx = lwp */
+	testq	%rbx, %rbx
 	jz	.handle_in_trap		/* should not happen? */
 #if LWP_PCB_FPU	!= 0
-	addq	$LWP_PCB_FPU, %rax	/* &lwp->lwp_pcb.pcb_fpu */
+	addq	$LWP_PCB_FPU, %rbx	/* &lwp->lwp_pcb.pcb_fpu */
 #endif
-	testl	%ebx, PCB_FPU_FLAGS(%rax)
+	testl	%eax, PCB_FPU_FLAGS(%rbx)
 	jz	.handle_in_trap		/* must be the first fault */
 	clts
-	andl	$_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rax)
+	andl	$_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rbx)
 #if FPU_CTX_FPU_REGS != 0
-	addq	$FPU_CTX_FPU_REGS, %rax
+	addq	$FPU_CTX_FPU_REGS, %rbx
 #endif
+
+	movl	FPU_CTX_FPU_XSAVE_MASK(%rbx), %eax	/* for xrstor */
+	movl	FPU_CTX_FPU_XSAVE_MASK+4(%rbx), %edx	/* for xrstor */
+
 	/*
 	 * the label below is used in trap.c to detect FP faults in
 	 * kernel due to user fault.
 	 */
 	ALTENTRY(ndptrap_frstor)
-	FXRSTORQ	((%rax))
+	.globl  _patch_xrstorq_rbx
+_patch_xrstorq_rbx:
+	FXRSTORQ	((%rbx))
+	popq	%rdx
 	popq	%rbx
 	popq	%rax
 	IRET
 	/*NOTREACHED*/
 
 .handle_in_trap:
+	popq	%rdx
 	popq	%rbx
 	popq	%rax
 	TRAP_NOERR(T_NOEXTFLT)	/* $7 */
@@ -749,6 +768,7 @@
 	 */
 	pushl	%eax
 	pushl	%ebx
+	pushl	%edx			/* for xrstor */
 	pushl	%ds
 	pushl	%gs
 	movl	$KDS_SEL, %ebx
@@ -773,17 +793,24 @@
 #if FPU_CTX_FPU_REGS != 0
 	addl	$FPU_CTX_FPU_REGS, %ebx
 #endif
+
+	movl	FPU_CTX_FPU_XSAVE_MASK(%ebx), %eax	/* for xrstor */
+	movl	FPU_CTX_FPU_XSAVE_MASK+4(%ebx), %edx	/* for xrstor */
+
 	/*
 	 * the label below is used in trap.c to detect FP faults in kernel
 	 * due to user fault.
 	 */
 	ALTENTRY(ndptrap_frstor)
-	.globl	_patch_fxrstor_ebx
+	.globl  _patch_fxrstor_ebx
 _patch_fxrstor_ebx:
+	.globl  _patch_xrstor_ebx
+_patch_xrstor_ebx:
 	frstor	(%ebx)		/* may be patched to fxrstor */
 	nop			/* (including this byte) */
 	popl	%gs
 	popl	%ds
+	popl	%edx
 	popl	%ebx
 	popl	%eax
 	IRET
@@ -791,6 +818,7 @@
 .handle_in_trap:
 	popl	%gs
 	popl	%ds
+	popl	%edx
 	popl	%ebx
 	popl	%eax
 	TRAP_NOERR(T_NOEXTFLT)	/* $7 */
--- a/usr/src/uts/intel/ia32/ml/float.s	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/intel/ia32/ml/float.s	Mon Aug 16 19:36:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -31,7 +30,10 @@
 /*      Copyright (c) 1987, 1988 Microsoft Corporation  */
 /*        All Rights Reserved   */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
+/*
+ * Copyright (c) 2009, Intel Corporation.
+ * All rights reserved.
+ */
 
 #include <sys/asm_linkage.h>
 #include <sys/asm_misc.h>
@@ -152,6 +154,10 @@
 patch_sse2(void)
 {}
 
+void
+patch_xsave(void)
+{}
+
 #else	/* __lint */
 
 	ENTRY_NP(patch_sse)
@@ -188,10 +194,74 @@
 	ret
 	SET_SIZE(patch_sse2)
 
+	/*
+	 * Patch lazy fp restore instructions in the trap handler
+	 * to use xrstor instead of frstor
+	 */
+	ENTRY_NP(patch_xsave)
+	_HOT_PATCH_PROLOG
+	/
+	/	frstor (%ebx); nop	-> xrstor (%ebx)
+	/
+	_HOT_PATCH(_xrstor_ebx_insn, _patch_xrstor_ebx, 3)
+	_HOT_PATCH_EPILOG
+	ret
+_xrstor_ebx_insn:			/ see ndptrap_frstor()
+	#xrstor (%ebx)
+	.byte	0x0f, 0xae, 0x2b
+	SET_SIZE(patch_xsave)
+
 #endif	/* __lint */
 #endif	/* __i386 */
 
+#if defined(__amd64)
+#if defined(__lint)
+
+void
+patch_xsave(void)
+{}
+
+#else	/* __lint */
+
+	/*
+	 * Patch lazy fp restore instructions in the trap handler
+	 * to use xrstor instead of fxrstorq
+	 */
+	ENTRY_NP(patch_xsave)
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r15
+	/
+	/	FXRSTORQ (%rbx);	-> xrstor (%rbx)
+	/ hot_patch(_xrstor_rbx_insn, _patch_xrstorq_rbx, 4)
+	/
+	leaq	_patch_xrstorq_rbx(%rip), %rbx
+	leaq	_xrstor_rbx_insn(%rip), %rbp
+	movq	$4, %r15
+1:
+	movq	%rbx, %rdi			/* patch address */
+	movzbq	(%rbp), %rsi			/* instruction byte */
+	movq	$1, %rdx			/* count */
+	call	hot_patch_kernel_text
+	addq	$1, %rbx
+	addq	$1, %rbp
+	subq	$1, %r15
+	jnz	1b
 	
+	popq	%r15
+	popq	%rbp
+	popq	%rbx
+	ret
+
+_xrstor_rbx_insn:			/ see ndptrap_frstor()
+	#rex.W=1 (.byte 0x48)
+	#xrstor (%rbx)
+	.byte	0x48, 0x0f, 0xae, 0x2b
+	SET_SIZE(patch_xsave)
+
+#endif	/* __lint */
+#endif	/* __amd64 */
+
 /*
  * One of these routines is called from any lwp with floating
  * point context as part of the prolog of a context switch.
@@ -201,6 +271,11 @@
 
 /*ARGSUSED*/
 void
+xsave_ctxt(void *arg)
+{}
+
+/*ARGSUSED*/
+void
 fpxsave_ctxt(void *arg)
 {}
 
@@ -242,6 +317,33 @@
 			/* AMD Software Optimization Guide - Section 6.2 */
 	SET_SIZE(fpxsave_ctxt)
 
+	ENTRY_NP(xsave_ctxt)
+	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+	jne	1f
+	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+	/*
+	 * Setup xsave flags in EDX:EAX
+	 */
+	movl	FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
+	movl	FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
+	leaq	FPU_CTX_FPU_REGS(%rdi), %rsi
+	#xsave	(%rsi)
+	.byte	0x0f, 0xae, 0x26
+	
+	/*
+	 * (see notes above about "exception pointers")
+	 * TODO: does it apply to any machine that uses xsave?
+	 */
+	btw	$7, FXSAVE_STATE_FSW(%rdi)	/* Test saved ES bit */
+	jnc	0f				/* jump if ES = 0 */
+	fnclex		/* clear pending x87 exceptions */
+0:	ffree	%st(7)	/* clear tag bit to remove possible stack overflow */
+	fildl	.fpzero_const(%rip)
+			/* dummy load changes all exception pointers */
+	STTS(%rsi)	/* trap on next fpu touch */
+1:	ret
+	SET_SIZE(xsave_ctxt)
+
 #elif defined(__i386)
 
 	ENTRY_NP(fpnsave_ctxt)
@@ -276,6 +378,32 @@
 			/* AMD Software Optimization Guide - Section 6.2 */
 	SET_SIZE(fpxsave_ctxt)
 
+	ENTRY_NP(xsave_ctxt)
+	movl	4(%esp), %ecx		/* a struct fpu_ctx */
+	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%ecx)
+	jne	1f
+	
+	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx)
+	movl	FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax
+	movl	FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx
+	leal	FPU_CTX_FPU_REGS(%ecx), %ecx
+	#xsave	(%ecx)
+	.byte	0x0f, 0xae, 0x21
+	
+	/*
+	 * (see notes above about "exception pointers")
+	 * TODO: does it apply to any machine that uses xsave?
+	 */
+	btw	$7, FXSAVE_STATE_FSW(%ecx)	/* Test saved ES bit */
+	jnc	0f				/* jump if ES = 0 */
+	fnclex		/* clear pending x87 exceptions */
+0:	ffree	%st(7)	/* clear tag bit to remove possible stack overflow */
+	fildl	.fpzero_const
+			/* dummy load changes all exception pointers */
+	STTS(%edx)	/* trap on next fpu touch */
+1:	ret
+	SET_SIZE(xsave_ctxt)
+
 #endif	/* __i386 */
 
 	.align	8
@@ -298,6 +426,11 @@
 fpxsave(struct fxsave_state *f)
 {}
 
+/*ARGSUSED*/
+void
+xsave(struct xsave_state *f, uint64_t m)
+{}
+
 #else	/* __lint */
 
 #if defined(__amd64)
@@ -310,6 +443,19 @@
 	ret
 	SET_SIZE(fpxsave)
 
+	ENTRY_NP(xsave)
+	CLTS
+	movl	%esi, %eax		/* bv mask */
+	movq	%rsi, %rdx
+	shrq	$32, %rdx
+	#xsave	(%rdi)
+	.byte	0x0f, 0xae, 0x27
+	
+	fninit				/* clear exceptions, init x87 tags */
+	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */
+	ret
+	SET_SIZE(xsave)
+
 #elif defined(__i386)
 
 	ENTRY_NP(fpsave)
@@ -329,6 +475,19 @@
 	ret
 	SET_SIZE(fpxsave)
 
+	ENTRY_NP(xsave)
+	CLTS
+	movl	4(%esp), %ecx
+	movl	8(%esp), %eax
+	movl	12(%esp), %edx
+	#xsave	(%ecx)
+	.byte	0x0f, 0xae, 0x21
+	
+	fninit				/* clear exceptions, init x87 tags */
+	STTS(%eax)			/* set TS bit in %cr0 (disable FPU) */
+	ret
+	SET_SIZE(xsave)
+
 #endif	/* __i386 */
 #endif	/* __lint */
 
@@ -344,6 +503,11 @@
 fpxrestore(struct fxsave_state *f)
 {}
 
+/*ARGSUSED*/
+void
+xrestore(struct xsave_state *f, uint64_t m)
+{}
+
 #else	/* __lint */
 
 #if defined(__amd64)
@@ -354,6 +518,16 @@
 	ret
 	SET_SIZE(fpxrestore)
 
+	ENTRY_NP(xrestore)
+	CLTS
+	movl	%esi, %eax		/* bv mask */
+	movq	%rsi, %rdx
+	shrq	$32, %rdx
+	#xrstor	(%rdi)
+	.byte	0x0f, 0xae, 0x2f
+	ret
+	SET_SIZE(xrestore)
+
 #elif defined(__i386)
 
 	ENTRY_NP(fprestore)
@@ -370,6 +544,16 @@
 	ret
 	SET_SIZE(fpxrestore)
 
+	ENTRY_NP(xrestore)
+	CLTS
+	movl	4(%esp), %ecx
+	movl	8(%esp), %eax
+	movl	12(%esp), %edx
+	#xrstor	(%ecx)
+	.byte	0x0f, 0xae, 0x29
+	ret
+	SET_SIZE(xrestore)
+
 #endif	/* __i386 */
 #endif	/* __lint */
 
@@ -418,26 +602,56 @@
 
 	ENTRY_NP(fpinit)
 	CLTS
+	cmpl	$FP_XSAVE, fp_save_mech
+	je	1f
+
+	/* fxsave */
 	leaq	sse_initial(%rip), %rax
 	FXRSTORQ	((%rax))		/* load clean initial state */
 	ret
+
+1:	/* xsave */
+	leaq	avx_initial(%rip), %rcx
+	xorl	%edx, %edx
+	movl	$XFEATURE_AVX, %eax
+	bt	$X86FSET_AVX, x86_featureset
+	cmovael	%edx, %eax
+	orl	$(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
+	/* xrstor (%rcx) */
+	.byte	0x0f, 0xae, 0x29		/* load clean initial state */
+	ret
 	SET_SIZE(fpinit)
 
 #elif defined(__i386)
 
 	ENTRY_NP(fpinit)
 	CLTS
-	cmpl	$__FP_SSE, fp_kind
+	cmpl	$FP_FXSAVE, fp_save_mech
 	je	1f
+	cmpl	$FP_XSAVE, fp_save_mech
+	je	2f
 
+	/* fnsave */
 	fninit
 	movl	$x87_initial, %eax
 	frstor	(%eax)			/* load clean initial state */
 	ret
-1:
+
+1:	/* fxsave */
 	movl	$sse_initial, %eax
 	fxrstor	(%eax)			/* load clean initial state */
 	ret
+
+2:	/* xsave */
+	movl	$avx_initial, %ecx
+	xorl	%edx, %edx
+	movl	$XFEATURE_AVX, %eax
+	bt	$X86FSET_AVX, x86_featureset
+	cmovael	%edx, %eax
+	orl	$(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
+	/* xrstor (%ecx) */
+	.byte	0x0f, 0xae, 0x29	/* load clean initial state */
+	ret
 	SET_SIZE(fpinit)
 
 #endif	/* __i386 */
--- a/usr/src/uts/intel/ia32/ml/i86_subr.s	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/intel/ia32/ml/i86_subr.s	Mon Aug 16 19:36:08 2010 -0700
@@ -30,6 +30,11 @@
  */
 
 /*
+ * Copyright (c) 2009, Intel Corporation.
+ * All rights reserved.
+ */
+
+/*
  * General assembly language routines.
  * It is the intent of this file to contain routines that are
  * independent of the specific kernel architecture, and those that are
@@ -2867,6 +2872,16 @@
 invalidate_cache(void)
 {}
 
+/*ARGSUSED*/
+uint64_t
+get_xcr(uint_t r)
+{ return (0); }
+
+/*ARGSUSED*/
+void
+set_xcr(uint_t r, const uint64_t val)
+{}
+
 #else  /* __lint */
 
 #define	XMSR_ACCESS_VAL		$0x9c5a203a
@@ -2914,7 +2929,26 @@
 	leave
 	ret
 	SET_SIZE(xwrmsr)
-	
+
+	ENTRY(get_xcr)
+	movl	%edi, %ecx
+	#xgetbv
+	.byte	0x0f,0x01,0xd0
+	shlq	$32, %rdx
+	orq	%rdx, %rax
+	ret
+	SET_SIZE(get_xcr)
+
+	ENTRY(set_xcr)
+	movq	%rsi, %rdx
+	shrq	$32, %rdx
+	movl	%esi, %eax
+	movl	%edi, %ecx
+	#xsetbv
+	.byte	0x0f,0x01,0xd1
+	ret
+	SET_SIZE(set_xcr)
+
 #elif defined(__i386)
 
 	ENTRY(rdmsr)
@@ -2957,6 +2991,22 @@
 	ret
 	SET_SIZE(xwrmsr)
 
+	ENTRY(get_xcr)
+	movl	4(%esp), %ecx
+	#xgetbv
+	.byte	0x0f,0x01,0xd0
+	ret
+	SET_SIZE(get_xcr)
+
+	ENTRY(set_xcr)
+	movl	4(%esp), %ecx
+	movl	8(%esp), %eax
+	movl	12(%esp), %edx
+	#xsetbv
+	.byte	0x0f,0x01,0xd1
+	ret
+	SET_SIZE(set_xcr)
+
 #endif	/* __i386 */
 
 	ENTRY(invalidate_cache)
--- a/usr/src/uts/intel/ia32/os/archdep.c	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/intel/ia32/os/archdep.c	Mon Aug 16 19:36:08 2010 -0700
@@ -62,6 +62,7 @@
 #include <sys/dtrace.h>
 #include <sys/brand.h>
 #include <sys/machbrand.h>
+#include <sys/cmn_err.h>
 
 extern const struct fnsave_state x87_initial;
 extern const struct fxsave_state sse_initial;
@@ -278,41 +279,43 @@
 			 */
 			fp_free(fpu, 0);
 		}
-#if !defined(__amd64)
-		if (fp_kind == __FP_SSE) {
-#endif
-			fpregset_to_fxsave(fp, &fpu->fpu_regs.kfpu_u.kfpu_fx);
-			fpu->fpu_regs.kfpu_xstatus =
-			    fp->fp_reg_set.fpchip_state.xstatus;
-#if !defined(__amd64)
-		} else
-			bcopy(fp, &fpu->fpu_regs.kfpu_u.kfpu_fn,
-			    sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn));
+	}
+	/*
+	 * Else: if we are trying to change the FPU state of a thread which
+	 * hasn't yet initialized floating point, store the state in
+	 * the pcb and indicate that the state is valid.  When the
+	 * thread enables floating point, it will use this state instead
+	 * of the default state.
+	 */
+
+	switch (fp_save_mech) {
+#if defined(__i386)
+	case FP_FNSAVE:
+		bcopy(fp, &fpu->fpu_regs.kfpu_u.kfpu_fn,
+		    sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn));
+		break;
 #endif
-		fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status;
-		fpu->fpu_flags |= FPU_VALID;
-	} else {
-		/*
-		 * If we are trying to change the FPU state of a thread which
-		 * hasn't yet initialized floating point, store the state in
-		 * the pcb and indicate that the state is valid.  When the
-		 * thread enables floating point, it will use this state instead
-		 * of the default state.
-		 */
-#if !defined(__amd64)
-		if (fp_kind == __FP_SSE) {
-#endif
-			fpregset_to_fxsave(fp, &fpu->fpu_regs.kfpu_u.kfpu_fx);
-			fpu->fpu_regs.kfpu_xstatus =
-			    fp->fp_reg_set.fpchip_state.xstatus;
-#if !defined(__amd64)
-		} else
-			bcopy(fp, &fpu->fpu_regs.kfpu_u.kfpu_fn,
-			    sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn));
-#endif
-		fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status;
-		fpu->fpu_flags |= FPU_VALID;
+	case FP_FXSAVE:
+		fpregset_to_fxsave(fp, &fpu->fpu_regs.kfpu_u.kfpu_fx);
+		fpu->fpu_regs.kfpu_xstatus =
+		    fp->fp_reg_set.fpchip_state.xstatus;
+		break;
+
+	case FP_XSAVE:
+		fpregset_to_fxsave(fp,
+		    &fpu->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave);
+		fpu->fpu_regs.kfpu_xstatus =
+		    fp->fp_reg_set.fpchip_state.xstatus;
+		fpu->fpu_regs.kfpu_u.kfpu_xs.xs_xstate_bv |=
+		    (XFEATURE_LEGACY_FP | XFEATURE_SSE);
+		break;
+	default:
+		panic("Invalid fp_save_mech");
+		/*NOTREACHED*/
 	}
+
+	fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status;
+	fpu->fpu_flags |= FPU_VALID;
 }
 
 /*
@@ -349,32 +352,54 @@
 		/*
 		 * Cases 1 and 3.
 		 */
-#if !defined(__amd64)
-		if (fp_kind == __FP_SSE) {
+		switch (fp_save_mech) {
+#if defined(__i386)
+		case FP_FNSAVE:
+			bcopy(&fpu->fpu_regs.kfpu_u.kfpu_fn, fp,
+			    sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn));
+			break;
 #endif
+		case FP_FXSAVE:
 			fxsave_to_fpregset(&fpu->fpu_regs.kfpu_u.kfpu_fx, fp);
 			fp->fp_reg_set.fpchip_state.xstatus =
 			    fpu->fpu_regs.kfpu_xstatus;
-#if !defined(__amd64)
-		} else
-			bcopy(&fpu->fpu_regs.kfpu_u.kfpu_fn, fp,
-			    sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn));
-#endif
+			break;
+		case FP_XSAVE:
+			fxsave_to_fpregset(
+			    &fpu->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave, fp);
+			fp->fp_reg_set.fpchip_state.xstatus =
+			    fpu->fpu_regs.kfpu_xstatus;
+			break;
+		default:
+			panic("Invalid fp_save_mech");
+			/*NOTREACHED*/
+		}
 		fp->fp_reg_set.fpchip_state.status = fpu->fpu_regs.kfpu_status;
 	} else {
 		/*
 		 * Case 2.
 		 */
-#if !defined(__amd64)
-		if (fp_kind == __FP_SSE) {
+		switch (fp_save_mech) {
+#if defined(__i386)
+		case FP_FNSAVE:
+			bcopy(&x87_initial, fp, sizeof (x87_initial));
+			break;
 #endif
+		case FP_FXSAVE:
+		case FP_XSAVE:
+			/*
+			 * For now, we don't have any AVX specific field in ABI.
+			 * If we add any in the future, we need to initial them
+			 * as well.
+			 */
 			fxsave_to_fpregset(&sse_initial, fp);
 			fp->fp_reg_set.fpchip_state.xstatus =
 			    fpu->fpu_regs.kfpu_xstatus;
-#if !defined(__amd64)
-		} else
-			bcopy(&x87_initial, fp, sizeof (x87_initial));
-#endif
+			break;
+		default:
+			panic("Invalid fp_save_mech");
+			/*NOTREACHED*/
+		}
 		fp->fp_reg_set.fpchip_state.status = fpu->fpu_regs.kfpu_status;
 	}
 	kpreempt_enable();
--- a/usr/src/uts/intel/ia32/os/fpu.c	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/intel/ia32/os/fpu.c	Mon Aug 16 19:36:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -30,7 +29,10 @@
 /*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
 /*		All Rights Reserved				*/
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
+/*
+ * Copyright (c) 2009, Intel Corporation.
+ * All rights reserved.
+ */
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -56,6 +58,10 @@
 #include <sys/debug.h>
 #include <sys/x86_archext.h>
 #include <sys/sysmacros.h>
+#include <sys/cmn_err.h>
+
+/* Legacy fxsave layout + xsave header + ymm */
+#define	AVX_XSAVE_SIZE		(512 + 64 + 256)
 
 /*CSTYLED*/
 #pragma	align 16 (sse_initial)
@@ -83,6 +89,45 @@
 	/* rest of structure is zero */
 };
 
+/*CSTYLED*/
+#pragma	align 64 (avx_initial)
+
+/*
+ * Initial kfpu state for AVX used by fpinit()
+ */
+const struct xsave_state avx_initial = {
+	/*
+	 * The definition below needs to be identical with sse_initial
+	 * defined above.
+	 */
+	{
+		FPU_CW_INIT,	/* fx_fcw */
+		0,		/* fx_fsw */
+		0,		/* fx_fctw */
+		0,		/* fx_fop */
+#if defined(__amd64)
+		0,		/* fx_rip */
+		0,		/* fx_rdp */
+#else
+		0,		/* fx_eip */
+		0,		/* fx_cs */
+		0,		/* __fx_ign0 */
+		0,		/* fx_dp */
+		0,		/* fx_ds */
+		0,		/* __fx_ign1 */
+#endif /* __amd64 */
+		SSE_MXCSR_INIT	/* fx_mxcsr */
+		/* rest of structure is zero */
+	},
+	/*
+	 * bit0 = 1 for XSTATE_BV to indicate that legacy fields are valid,
+	 * and CPU should initialize XMM/YMM.
+	 */
+	1,
+	{0, 0}	/* These 2 bytes must be zero */
+	/* rest of structure is zero */
+};
+
 /*
  * mxcsr_mask value (possibly reset in fpu_probe); used to avoid
  * the #gp exception caused by setting unsupported bits in the
@@ -103,11 +148,16 @@
 };
 
 #if defined(__amd64)
-#define	fpsave_ctxt	fpxsave_ctxt
+/*
+ * This vector is patched to xsave_ctxt() if we discover we have an
+ * XSAVE-capable chip in fpu_probe.
+ */
+void (*fpsave_ctxt)(void *) = fpxsave_ctxt;
 #elif defined(__i386)
 /*
- * This vector is patched to fpxsave_ctxt() if we discover
- * we have an SSE-capable chip in fpu_probe().
+ * This vector is patched to fpxsave_ctxt() if we discover we have an
+ * SSE-capable chip in fpu_probe(). It is patched to xsave_ctxt
+ * if we discover we have an XSAVE-capable chip in fpu_probe.
  */
 void (*fpsave_ctxt)(void *) = fpnsave_ctxt;
 #endif
@@ -129,6 +179,10 @@
 	struct fpu_ctx *fp;		/* parent fpu context */
 	struct fpu_ctx *cfp;		/* new fpu context */
 	struct fxsave_state *fx, *cfx;
+#if defined(__i386)
+	struct fnsave_state *fn, *cfn;
+#endif
+	struct xsave_state *cxs;
 
 	ASSERT(fp_kind != FP_NO);
 
@@ -145,27 +199,41 @@
 	cfp->fpu_regs.kfpu_status = 0;
 	cfp->fpu_regs.kfpu_xstatus = 0;
 
-#if defined(__amd64)
-	fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
-	cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx;
-	bcopy(&sse_initial, cfx, sizeof (*cfx));
-	cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
-	cfx->fx_fcw = fx->fx_fcw;
-#else
-	if (fp_kind == __FP_SSE) {
+	switch (fp_save_mech) {
+#if defined(__i386)
+	case FP_FNSAVE:
+		fn = &fp->fpu_regs.kfpu_u.kfpu_fn;
+		cfn = &cfp->fpu_regs.kfpu_u.kfpu_fn;
+		bcopy(&x87_initial, cfn, sizeof (*cfn));
+		cfn->f_fcw = fn->f_fcw;
+		break;
+#endif
+	case FP_FXSAVE:
 		fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
 		cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx;
 		bcopy(&sse_initial, cfx, sizeof (*cfx));
 		cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
 		cfx->fx_fcw = fx->fx_fcw;
-	} else {
-		struct fnsave_state *fn = &fp->fpu_regs.kfpu_u.kfpu_fn;
-		struct fnsave_state *cfn = &cfp->fpu_regs.kfpu_u.kfpu_fn;
+		break;
+
+	case FP_XSAVE:
+		cfp->fpu_xsave_mask = fp->fpu_xsave_mask;
+
+		fx = &fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave;
+		cxs = &cfp->fpu_regs.kfpu_u.kfpu_xs;
+		cfx = &cxs->xs_fxsave;
 
-		bcopy(&x87_initial, cfn, sizeof (*cfn));
-		cfn->f_fcw = fn->f_fcw;
+		bcopy(&avx_initial, cxs, sizeof (*cxs));
+		cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
+		cfx->fx_fcw = fx->fx_fcw;
+		cxs->xs_xstate_bv |= (get_xcr(XFEATURE_ENABLED_MASK) &
+		    XFEATURE_FP_ALL);
+		break;
+	default:
+		panic("Invalid fp_save_mech");
+		/*NOTREACHED*/
 	}
-#endif
+
 	installctx(ct, cfp,
 	    fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free);
 	/*
@@ -212,7 +280,7 @@
 	if (curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu) {
 		/* Clear errors if any to prevent frstor from complaining */
 		(void) fperr_reset();
-		if (fp_kind == __FP_SSE)
+		if (fp_kind & __FP_SSE)
 			(void) fpxerr_reset();
 		fpdisable();
 	}
@@ -234,18 +302,24 @@
 	}
 	ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu);
 
-#if defined(__amd64)
-	fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx);
-#else
-	switch (fp_kind) {
-	case __FP_SSE:
+	switch (fp_save_mech) {
+#if defined(__i386)
+	case FP_FNSAVE:
+		fpsave(&fp->fpu_regs.kfpu_u.kfpu_fn);
+		break;
+#endif
+	case FP_FXSAVE:
 		fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx);
 		break;
-	default:
-		fpsave(&fp->fpu_regs.kfpu_u.kfpu_fn);
+
+	case FP_XSAVE:
+		xsave(&fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask);
 		break;
+	default:
+		panic("Invalid fp_save_mech");
+		/*NOTREACHED*/
 	}
-#endif
+
 	fp->fpu_flags |= FPU_VALID;
 	kpreempt_enable();
 }
@@ -259,15 +333,24 @@
 void
 fp_restore(struct fpu_ctx *fp)
 {
-#if defined(__amd64)
-	fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx);
-#else
-	/* case 2 */
-	if (fp_kind == __FP_SSE)
+	switch (fp_save_mech) {
+#if defined(__i386)
+	case FP_FNSAVE:
+		fprestore(&fp->fpu_regs.kfpu_u.kfpu_fn);
+		break;
+#endif
+	case FP_FXSAVE:
 		fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx);
-	else
-		fprestore(&fp->fpu_regs.kfpu_u.kfpu_fn);
-#endif
+		break;
+
+	case FP_XSAVE:
+		xrestore(&fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask);
+		break;
+	default:
+		panic("Invalid fp_save_mech");
+		/*NOTREACHED*/
+	}
+
 	fp->fpu_flags &= ~FPU_VALID;
 }
 
@@ -289,6 +372,11 @@
 	/*
 	 * Always initialize a new context and initialize the hardware.
 	 */
+	if (fp_save_mech == FP_XSAVE) {
+		fp->fpu_xsave_mask = get_xcr(XFEATURE_ENABLED_MASK) &
+		    XFEATURE_FP_ALL;
+	}
+
 	installctx(curthread, fp,
 	    fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free);
 	fpinit();
@@ -324,6 +412,9 @@
 	ASSERT(sizeof (struct fxsave_state) == 512 &&
 	    sizeof (struct fnsave_state) == 108);
 	ASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0);
+
+	ASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE);
+
 #if defined(__i386)
 	ASSERT(sizeof (struct fpu) == sizeof (struct __old_fpu));
 #endif	/* __i386 */
@@ -375,8 +466,9 @@
 	 * configured to enable fully fledged (%xmm) fxsave/fxrestor on
 	 * this CPU.  For the non-SSE case, ensure that it isn't.
 	 */
-	ASSERT((fp_kind == __FP_SSE && (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) ||
-	    (fp_kind != __FP_SSE &&
+	ASSERT(((fp_kind & __FP_SSE) &&
+	    (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) ||
+	    (!(fp_kind & __FP_SSE) &&
 	    (getcr4() & (CR4_OSXMMEXCPT|CR4_OSFXSR)) == 0));
 #endif
 
@@ -451,25 +543,36 @@
 	fp_save(fp);
 
 	/* clear exception flags in saved state, as if by fnclex */
-#if defined(__amd64)
-	fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw;
-	fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw;
-	fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS;
-#else
-		switch (fp_kind) {
-		case __FP_SSE:
-			fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw;
-			fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw;
-			fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS;
-			break;
-		default:
-			fpsw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw;
-			fpcw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw;
-			fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw &= ~FPS_SW_EFLAGS;
-			break;
-		}
+	switch (fp_save_mech) {
+#if defined(__i386)
+	case FP_FNSAVE:
+		fpsw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw;
+		fpcw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw;
+		fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw &= ~FPS_SW_EFLAGS;
+		break;
 #endif
 
+	case FP_FXSAVE:
+		fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw;
+		fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw;
+		fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS;
+		break;
+
+	case FP_XSAVE:
+		fpsw = fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave.fx_fsw;
+		fpcw = fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave.fx_fcw;
+		fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave.fx_fsw &= ~FPS_SW_EFLAGS;
+		/*
+		 * Always set LEGACY_FP as it may have been cleared by XSAVE
+		 * instruction
+		 */
+		fp->fpu_regs.kfpu_u.kfpu_xs.xs_xstate_bv |= XFEATURE_LEGACY_FP;
+		break;
+	default:
+		panic("Invalid fp_save_mech");
+		/*NOTREACHED*/
+	}
+
 	fp->fpu_regs.kfpu_status = fpsw;
 
 	if ((fpsw & FPS_ES) == 0)
@@ -493,7 +596,7 @@
 	uint32_t mxcsr, xmask;
 	fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
 
-	ASSERT(fp_kind == __FP_SSE);
+	ASSERT(fp_kind & __FP_SSE);
 
 	/*
 	 * NOTE: Interrupts are disabled during execution of this
@@ -625,20 +728,30 @@
 	 */
 	fp_save(fp);
 
-#if defined(__amd64)
-	fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
-	fx->fx_fcw = fcw;
-	fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
-#else
-	switch (fp_kind) {
-	case __FP_SSE:
+	switch (fp_save_mech) {
+#if defined(__i386)
+	case FP_FNSAVE:
+		fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw = fcw;
+		break;
+#endif
+	case FP_FXSAVE:
 		fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
 		fx->fx_fcw = fcw;
 		fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
 		break;
-	default:
-		fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw = fcw;
+
+	case FP_XSAVE:
+		fx = &fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave;
+		fx->fx_fcw = fcw;
+		fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
+		/*
+		 * Always set LEGACY_FP as it may have been cleared by XSAVE
+		 * instruction
+		 */
+		fp->fpu_regs.kfpu_u.kfpu_xs.xs_xstate_bv |= XFEATURE_LEGACY_FP;
 		break;
+	default:
+		panic("Invalid fp_save_mech");
+		/*NOTREACHED*/
 	}
-#endif
 }
--- a/usr/src/uts/intel/ia32/os/sysi86.c	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/intel/ia32/os/sysi86.c	Mon Aug 16 19:36:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
@@ -30,8 +29,6 @@
 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
 /*	  All Rights Reserved	*/
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/sysmacros.h>
@@ -170,7 +167,7 @@
 			break;
 		}
 		fpsetcw((uint16_t)arg2, (uint32_t)arg3);
-		return (fp_kind == __FP_SSE ? 1 : 0);
+		return ((fp_kind & __FP_SSE) ? 1 : 0);
 
 	/* real time clock management commands */
 
--- a/usr/src/uts/intel/sys/archsystm.h	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/intel/sys/archsystm.h	Mon Aug 16 19:36:08 2010 -0700
@@ -58,6 +58,8 @@
 extern void patch_sse2(void);
 #endif
 
+extern void patch_xsave(void);
+
 extern void cli(void);
 extern void sti(void);
 
@@ -193,6 +195,7 @@
 #if defined(__amd64) && !defined(__xpv)
 extern void patch_memops(uint_t);
 #endif	/* defined(__amd64) && !defined(__xpv) */
+extern void setup_xfem(void);
 #define	cpr_dprintf prom_printf
 #define	IN_XPV_PANIC() (__lintzero)
 #endif
--- a/usr/src/uts/intel/sys/controlregs.h	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/intel/sys/controlregs.h	Mon Aug 16 19:36:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_CONTROLREGS_H
@@ -108,9 +107,10 @@
 					/* 0x1000 reserved */
 #define	CR4_VMXE	0x2000
 #define	CR4_SMXE	0x4000
+#define	CR4_OSXSAVE	0x40000		/* OS xsave/xrestore support	*/
 
 #define	FMT_CR4							\
-	"\20\17smxe\16vmxe\13xmme\12fxsr\11pce\10pge"		\
+	"\20\23osxsav\17smxe\16vmxe\13xmme\12fxsr\11pce\10pge"		\
 	"\7mce\6pae\5pse\4de\3tsd\2pvi\1vme"
 
 /*
--- a/usr/src/uts/intel/sys/fp.h	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/intel/sys/fp.h	Mon Aug 16 19:36:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
@@ -30,8 +29,6 @@
 #ifndef _SYS_FP_H
 #define	_SYS_FP_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -50,7 +47,20 @@
 #define	FP_387	3	/* 80387 chip present				*/
 #define	FP_487	6	/* 80487 chip present				*/
 #define	FP_486	6	/* 80486 chip present				*/
-#define	__FP_SSE 0x103	/* x87 plus SSE-capable CPU			*/
+/*
+ * The following values are bit flags instead of actual values.
+ * E.g. to know if we are using SSE, test (value & __FP_SSE) instead
+ * of (value == __FP_SSE).
+ */
+#define	__FP_SSE	0x100	/* .. plus SSE-capable CPU		*/
+#define	__FP_AVX	0x200	/* .. plus AVX-capable CPU		*/
+
+/*
+ * values that go into fp_save_mech
+ */
+#define	FP_FNSAVE	1	/* fnsave/frstor instructions		*/
+#define	FP_FXSAVE	2	/* fxsave/fxrstor instructions		*/
+#define	FP_XSAVE	3	/* xsave/xrstor instructions		*/
 
 /*
  * masks for 80387 control word
@@ -159,6 +169,7 @@
 	"\10im\7daz\6pe\5ue\4oe\3ze\2de\1ie"
 
 extern int fp_kind;		/* kind of fp support			*/
+extern int fp_save_mech;	/* fp save/restore mechanism		*/
 extern int fpu_exists;		/* FPU hw exists			*/
 
 #ifdef _KERNEL
@@ -174,15 +185,19 @@
 
 extern void fpnsave_ctxt(void *);
 extern void fpxsave_ctxt(void *);
+extern void xsave_ctxt(void *);
 extern void (*fpsave_ctxt)(void *);
 
 struct fnsave_state;
 struct fxsave_state;
+struct xsave_state;
 extern void fxsave_insn(struct fxsave_state *);
 extern void fpsave(struct fnsave_state *);
 extern void fprestore(struct fnsave_state *);
 extern void fpxsave(struct fxsave_state *);
 extern void fpxrestore(struct fxsave_state *);
+extern void xsave(struct xsave_state *, uint64_t);
+extern void xrestore(struct xsave_state *, uint64_t);
 
 extern void fpenable(void);
 extern void fpdisable(void);
--- a/usr/src/uts/intel/sys/pcb.h	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/intel/sys/pcb.h	Mon Aug 16 19:36:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _SYS_PCB_H
@@ -37,6 +36,10 @@
 #ifndef _ASM
 typedef struct fpu_ctx {
 	kfpu_t		fpu_regs;	/* kernel save area for FPU */
+	uint64_t	fpu_xsave_mask;	/* xsave mask for FPU/SSE/AVX */
+#if defined(__i386)
+	uint64_t	fpu_padding;	/* fix 32bit libmicro regression */
+#endif
 	uint_t		fpu_flags;	/* FPU state flags */
 } fpu_ctx_t;
 
--- a/usr/src/uts/intel/sys/regset.h	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/intel/sys/regset.h	Mon Aug 16 19:36:08 2010 -0700
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,10 +19,8 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  */
-
 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
 
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T		*/
@@ -32,8 +29,6 @@
 #ifndef	_SYS_REGSET_H
 #define	_SYS_REGSET_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/feature_tests.h>
 
 #if !defined(_ASM)
@@ -246,6 +241,18 @@
 #endif
 };	/* 512 bytes */
 
+/*
+ * This structure is written to memory by an 'xsave' instruction.
+ * First 512 byte is compatible with the format of an 'fxsave' area.
+ */
+struct xsave_state {
+	struct fxsave_state	xs_fxsave;
+	uint64_t		xs_xstate_bv;	/* 512 */
+	uint64_t		xs_rsv_mbz[2];
+	uint64_t		xs_reserved[5];
+	upad128_t		xs_ymm[16];	/* avx - 576 */
+};	/* 832 bytes, asserted in fpnoextflt() */
+
 #if defined(__amd64)
 
 typedef struct fpu {
@@ -352,6 +359,7 @@
 #if defined(__i386)
 		struct fnsave_state kfpu_fn;
 #endif
+		struct xsave_state kfpu_xs;
 	} kfpu_u;
 	uint32_t kfpu_status;		/* saved at #mf exception */
 	uint32_t kfpu_xstatus;		/* saved at #xm exception */
--- a/usr/src/uts/intel/sys/x86_archext.h	Mon Aug 16 18:05:48 2010 -0700
+++ b/usr/src/uts/intel/sys/x86_archext.h	Mon Aug 16 19:36:08 2010 -0700
@@ -112,9 +112,13 @@
 #define	CPUID_INTC_ECX_MOVBE	0x00400000	/* MOVBE insn */
 #define	CPUID_INTC_ECX_POPCNT	0x00800000	/* POPCNT insn */
 #define	CPUID_INTC_ECX_AES	0x02000000	/* AES insns */
+#define	CPUID_INTC_ECX_XSAVE	0x04000000	/* XSAVE/XRESTOR insns */
+#define	CPUID_INTC_ECX_OSXSAVE	0x08000000	/* OS supports XSAVE insns */
+#define	CPUID_INTC_ECX_AVX	0x10000000	/* AVX supported */
 
 #define	FMT_CPUID_INTC_ECX					\
 	"\20"							\
+	"\35avx\34osxsav\33xsave"				\
 	"\32aes"						\
 	"\30popcnt\27movbe\25sse4.2\24sse4.1\23dca"		\
 	"\20\17etprd\16cx16\13cid\12ssse3\11tm2"		\
@@ -356,6 +360,8 @@
 #define	X86FSET_64		30
 #define	X86FSET_AES		31
 #define	X86FSET_PCLMULQDQ	32
+#define	X86FSET_XSAVE		33
+#define	X86FSET_AVX		34
 
 /*
  * flags to patch tsc_read routine.
@@ -561,6 +567,20 @@
 #define	X86_SOCKET_ASB2		_X86_SOCKET_MKVAL(X86_VENDOR_AMD, 0x001000)
 #define	X86_SOCKET_C32		_X86_SOCKET_MKVAL(X86_VENDOR_AMD, 0x002000)
 
+/*
+ * xgetbv/xsetbv support
+ */
+
+#define	XFEATURE_ENABLED_MASK	0x0
+/*
+ * XFEATURE_ENABLED_MASK values (eax)
+ */
+#define	XFEATURE_LEGACY_FP	0x1
+#define	XFEATURE_SSE		0x2
+#define	XFEATURE_AVX		0x4
+#define	XFEATURE_MAX		XFEATURE_AVX
+#define	XFEATURE_FP_ALL		(XFEATURE_LEGACY_FP|XFEATURE_SSE|XFEATURE_AVX)
+
 #if !defined(_ASM)
 
 #if defined(_KERNEL) || defined(_KMEMUSER)
@@ -601,6 +621,13 @@
 	uint32_t	cp_edx;
 };
 
+/*
+ * Utility functions to get/set extended control registers (XCR)
+ * Initial use is to get/set the contents of the XFEATURE_ENABLED_MASK.
+ */
+extern uint64_t get_xcr(uint_t);
+extern void set_xcr(uint_t, uint64_t);
+
 extern uint64_t rdmsr(uint_t);
 extern void wrmsr(uint_t, const uint64_t);
 extern uint64_t xrdmsr(uint_t);
@@ -732,6 +759,8 @@
 extern int get_hwenv(void);
 extern int is_controldom(void);
 
+extern void xsave_setup_msr(struct cpu *);
+
 /*
  * Defined hardware environments
  */