changeset 25251:cb2fa6ac1b0c

[illumos-gate merge] commit 5c1597bcad8dabd41c0a919fb8acfabd8054e924 12992 loader: logo-illumos.4th needs to have mode resets commit abe1e6b305e672d0eedeb6b52003acac658c8371 12982 ambiguous instructions require an explicit suffix commit c3ae3afa3e57b39e29577c4b791adbe511bb7e3f 12916 bhyve should be able to limit vmx capabilities 12917 bhyve should always use Unrestricted Guest commit 22e68029451fec16a660c22ed1304b05ed67f43d 12973 add support for PCI-E rge cards
author Jerry Jelinek <jerry.jelinek@joyent.com>
date Wed, 29 Jul 2020 11:35:17 +0000
parents eda8bc751f09 (current diff) f4d7d7636089 (diff)
children c6fb4943e10c
files usr/src/cmd/bhyve/bhyverun.c usr/src/cmd/bhyve/spinup_ap.c usr/src/lib/libvmmapi/common/vmmapi.c usr/src/uts/i86pc/io/vmm/amd/svm.c usr/src/uts/i86pc/io/vmm/intel/vmx.c usr/src/uts/i86pc/io/vmm/intel/vmx.h usr/src/uts/i86pc/ml/kpti_trampolines.s usr/src/uts/i86pc/ml/locore.s usr/src/uts/i86pc/sys/vmm.h usr/src/uts/intel/brand/common/brand_asm.h usr/src/uts/intel/ia32/ml/float.s usr/src/uts/intel/os/driver_aliases
diffstat 18 files changed, 206 insertions(+), 534 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/boot/sys/boot/forth/logo-illumos.4th	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/boot/sys/boot/forth/logo-illumos.4th	Wed Jul 29 11:35:17 2020 +0000
@@ -38,25 +38,25 @@
 
 	0 0 0 0 0 s" /boot/illumos.png" fb-putimage if 2drop exit then
 
-	s"     @[33m,                             " logo+
-	s"    @[33m,./% @[31m&                         " logo+
-	s"    @[33m(****@[31m*(                        " logo+
-	s"      @[33m*/*@[31m//                        " logo+
-	s"      @[33m*,//@[31m/((                      " logo+
-	s"        @[33m,*/@[31m/((/%                   " logo+
-	s"          @[33m//@[31m/((((%                 " logo+
-	s"           @[33m,*@[31m/(((((%       @[33m&@[31m#///((&" logo+
-	s"            @[33m./@[31m//((((((%  %/(((/    " logo+
-	s"             @[33m./@[31m///(((((///((,      " logo+
-	s"             @[33m.*//@[31m//((((((((((      " logo+
-	s"                  ./((((((((/      " logo+
-	s"                   (/(((((((       " logo+
-	s"                   ,,((((((/       " logo+
-	s"                     /((((         " logo+
-	s"                  %/((((           " logo+
-	s"              @[33m&@[31m%#/((((.            " logo+
-	s"            @[33m,@[31m( ,/ /(/              " logo+
-	s"                ,/@[m                 " logo+
+	s"     @[33m,@[m                             " logo+
+	s"    @[33m,./% @[31m&@[m                         " logo+
+	s"    @[33m(****@[31m*(@[m                        " logo+
+	s"      @[33m*/*@[31m//@[m                        " logo+
+	s"      @[33m*,//@[31m/((@[m                      " logo+
+	s"        @[33m,*/@[31m/((/%@[m                   " logo+
+	s"          @[33m//@[31m/((((%@[m                 " logo+
+	s"           @[33m,*@[31m/(((((%@[m       @[33m&@[31m#///((&@[m" logo+
+	s"            @[33m./@[31m//((((((%@[m  @[31m%/(((/@[m    " logo+
+	s"             @[33m./@[31m///(((((///((,@[m      " logo+
+	s"             @[33m.*//@[31m//((((((((((@[m      " logo+
+	s"                  @[31m./((((((((/@[m      " logo+
+	s"                   @[31m(/(((((((@[m       " logo+
+	s"                   @[31m,,((((((/@[m       " logo+
+	s"                     @[31m/((((@[m         " logo+
+	s"                  @[31m%/((((@[m           " logo+
+	s"              @[33m&@[31m%#/((((.@[m            " logo+
+	s"            @[33m,@[31m(@[m @[31m,/@[m @[31m/(/@[m              " logo+
+	s"                @[31m,/@[m                 " logo+
 
 	2drop
 ;
--- a/usr/src/boot/sys/boot/i386/cdboot/cdboot.S	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/boot/sys/boot/i386/cdboot/cdboot.S	Wed Jul 29 11:35:17 2020 +0000
@@ -34,7 +34,7 @@
 #
 # Basically, we first create a set of boot arguments to pass to the loaded
 # binary.  Then we attempt to load /boot/loader from the CD we were booted
-# off of. 
+# from.
 #
 
 #include <bootargs.h>
@@ -136,7 +136,7 @@
 		stosl				#  to zero
 		mov drive,%dl			# Store BIOS boot device
 		mov %dl,0x4(%bx)		#  in kargs->bootdev
-		or $KARGS_FLAGS_CD,0x8(%bx)	# kargs->bootflags |=
+		orb $KARGS_FLAGS_CD,0x8(%bx)	# kargs->bootflags |=
 						#  KARGS_FLAGS_CD
 #
 # Load Volume Descriptor
@@ -494,7 +494,7 @@
 # legacy-free and simply doesn't have a keyboard controller.
 # Thus, the A20 line is already enabled.
 #
-seta20: 	cli				# Disable interrupts
+seta20:		cli				# Disable interrupts
 		xor %cx,%cx			# Clear
 seta20.1:	inc %cx				# Increment, overflow?
 		jz seta20.3			# Yes
@@ -518,7 +518,7 @@
 		shrb $0x4,%al			# Do upper
 		call hex8.1			#  4
 		popl %eax			# Restore
-hex8.1: 	andb $0xf,%al			# Get lower 4
+hex8.1:		andb $0xf,%al			# Get lower 4
 		cmpb $0xa,%al			# Convert
 		sbbb $0x69,%al			#  to hex
 		das				#  digit
--- a/usr/src/cmd/bhyve/bhyverun.c	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/cmd/bhyve/bhyverun.c	Wed Jul 29 11:35:17 2020 +0000
@@ -39,6 +39,7 @@
  *
  * Copyright 2015 Pluribus Networks Inc.
  * Copyright 2018 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
  */
 
 #include <sys/cdefs.h>
@@ -952,6 +953,7 @@
 static int
 num_vcpus_allowed(struct vmctx *ctx)
 {
+#ifdef __FreeBSD__
 	int tmp, error;
 
 	error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp);
@@ -964,6 +966,10 @@
 		return (VM_MAXCPU);
 	else
 		return (1);
+#else
+	/* Unrestricted Guest is always enabled on illumos */
+	return (VM_MAXCPU);
+#endif /* __FreeBSD__ */
 }
 
 void
@@ -1340,11 +1346,15 @@
 	vga_init(1);
 
 	if (lpc_bootrom()) {
+#ifdef __FreeBSD__
 		if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) {
 			fprintf(stderr, "ROM boot failed: unrestricted guest "
 			    "capability not available\n");
 			exit(4);
 		}
+#else
+		/* Unrestricted Guest is always enabled on illumos */
+#endif
 		error = vcpu_reset(ctx, BSP);
 		assert(error == 0);
 	}
--- a/usr/src/cmd/bhyve/spinup_ap.c	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/cmd/bhyve/spinup_ap.c	Wed Jul 29 11:35:17 2020 +0000
@@ -27,6 +27,18 @@
  *
  * $FreeBSD$
  */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
@@ -89,6 +101,7 @@
 
 	fbsdrun_set_capabilities(ctx, newcpu);
 
+#ifdef __FreeBSD__
 	/*
 	 * Enable the 'unrestricted guest' mode for 'newcpu'.
 	 *
@@ -97,6 +110,9 @@
 	 */
 	error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
 	assert(error == 0);
+#else
+	/* Unrestricted Guest is always enabled on illumos */
+#endif
 
 	spinup_ap_realmode(ctx, newcpu, &rip);
 
--- a/usr/src/lib/libvmmapi/common/vmmapi.c	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/lib/libvmmapi/common/vmmapi.c	Wed Jul 29 11:35:17 2020 +0000
@@ -39,6 +39,7 @@
  *
  * Copyright 2015 Pluribus Networks Inc.
  * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
  */
 
 #include <sys/cdefs.h>
@@ -1004,7 +1005,9 @@
 	[VM_CAP_HALT_EXIT]  = "hlt_exit",
 	[VM_CAP_MTRAP_EXIT] = "mtrap_exit",
 	[VM_CAP_PAUSE_EXIT] = "pause_exit",
+#ifdef __FreeBSD__
 	[VM_CAP_UNRESTRICTED_GUEST] = "unrestricted_guest",
+#endif
 	[VM_CAP_ENABLE_INVPCID] = "enable_invpcid",
 	[VM_CAP_BPT_EXIT] = "bpt_exit",
 };
--- a/usr/src/pkg/manifests/driver-network-rge.mf	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/pkg/manifests/driver-network-rge.mf	Wed Jul 29 11:35:17 2020 +0000
@@ -47,7 +47,9 @@
     alias=pci10ec,8168 \
     alias=pci10ec,8169 \
     alias=pci16ec,116 \
-    alias=pciex10ec,8136
+    alias=pciex10ec,8136 \
+    alias=pciex10ec,8168 \
+    alias=pciex10ec,8169
 file path=kernel/drv/$(ARCH64)/rge group=sys
 file path=usr/share/man/man7d/rge.7d
 legacy pkg=SUNWrge desc="Realtek Gigabit Ethernet Network Adapter Driver" \
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c	Wed Jul 29 11:35:17 2020 +0000
@@ -2342,11 +2342,6 @@
 		svm_set_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
 		    VMCB_INTCPT_PAUSE, val);
 		break;
-	case VM_CAP_UNRESTRICTED_GUEST:
-		/* Unrestricted guest execution cannot be disabled in SVM */
-		if (val == 0)
-			error = EINVAL;
-		break;
 	default:
 		error = ENOENT;
 		break;
@@ -2372,9 +2367,6 @@
 		*retval = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
 		    VMCB_INTCPT_PAUSE);
 		break;
-	case VM_CAP_UNRESTRICTED_GUEST:
-		*retval = 1;	/* unrestricted guest is always enabled */
-		break;
 	default:
 		error = ENOENT;
 		break;
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c	Wed Jul 29 11:35:17 2020 +0000
@@ -134,7 +134,14 @@
 	PROCBASED_CR3_STORE_EXITING |	\
 	PROCBASED_IO_BITMAPS)
 
-#define	PROCBASED_CTLS2_ONE_SETTING	PROCBASED2_ENABLE_EPT
+/*
+ * EPT and Unrestricted Guest are considered necessities.  The latter is not a
+ * requirement on FreeBSD, where grub2-bhyve is used to load guests directly
+ * without a bootrom starting in real mode.
+ */
+#define	PROCBASED_CTLS2_ONE_SETTING		\
+	(PROCBASED2_ENABLE_EPT |		\
+	PROCBASED2_UNRESTRICTED_GUEST)
 #define	PROCBASED_CTLS2_ZERO_SETTING	0
 
 #define	VM_EXIT_CTLS_ONE_SETTING					\
@@ -206,10 +213,6 @@
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, pause_exit, CTLFLAG_RD, &cap_pause_exit,
     0, "PAUSE triggers a VM-exit");
 
-static int cap_unrestricted_guest;
-SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, unrestricted_guest, CTLFLAG_RD,
-    &cap_unrestricted_guest, 0, "Unrestricted guests");
-
 static int cap_monitor_trap;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, monitor_trap, CTLFLAG_RD,
     &cap_monitor_trap, 0, "Monitor trap flag");
@@ -218,17 +221,8 @@
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, invpcid, CTLFLAG_RD, &cap_invpcid,
     0, "Guests are allowed to use INVPCID");
 
-static int tpr_shadowing;
-SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, tpr_shadowing, CTLFLAG_RD,
-    &tpr_shadowing, 0, "TPR shadowing support");
-
-static int virtual_interrupt_delivery;
-SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, virtual_interrupt_delivery, CTLFLAG_RD,
-    &virtual_interrupt_delivery, 0, "APICv virtual interrupt delivery support");
-
-static int posted_interrupts;
-SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, posted_interrupts, CTLFLAG_RD,
-    &posted_interrupts, 0, "APICv posted interrupt support");
+/* Extra capabilities (VMX_CAP_*) beyond the minimum */
+static enum vmx_caps vmx_capabilities;
 
 static int pirvec = -1;
 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, posted_interrupt_vector, CTLFLAG_RD,
@@ -601,94 +595,6 @@
 	}
 }
 
-#ifdef __FreeBSD__
-static void
-vpid_init(void)
-{
-	/*
-	 * VPID 0 is required when the "enable VPID" execution control is
-	 * disabled.
-	 *
-	 * VPIDs [1,VM_MAXCPU] are used as the "overflow namespace" when the
-	 * unit number allocator does not have sufficient unique VPIDs to
-	 * satisfy the allocation.
-	 *
-	 * The remaining VPIDs are managed by the unit number allocator.
-	 */
-	vpid_unr = new_unrhdr(VM_MAXCPU + 1, 0xffff, NULL);
-}
-
-static void
-vmx_disable(void *arg __unused)
-{
-	struct invvpid_desc invvpid_desc = { 0 };
-	struct invept_desc invept_desc = { 0 };
-
-	if (vmxon_enabled[curcpu]) {
-		/*
-		 * See sections 25.3.3.3 and 25.3.3.4 in Intel Vol 3b.
-		 *
-		 * VMXON or VMXOFF are not required to invalidate any TLB
-		 * caching structures. This prevents potential retention of
-		 * cached information in the TLB between distinct VMX episodes.
-		 */
-		invvpid(INVVPID_TYPE_ALL_CONTEXTS, invvpid_desc);
-		invept(INVEPT_TYPE_ALL_CONTEXTS, invept_desc);
-		vmxoff();
-	}
-	load_cr4(rcr4() & ~CR4_VMXE);
-}
-
-static int
-vmx_cleanup(void)
-{
-
-	if (pirvec >= 0)
-		lapic_ipi_free(pirvec);
-
-	if (vpid_unr != NULL) {
-		delete_unrhdr(vpid_unr);
-		vpid_unr = NULL;
-	}
-
-	if (nmi_flush_l1d_sw == 1)
-		nmi_flush_l1d_sw = 0;
-
-	smp_rendezvous(NULL, vmx_disable, NULL, NULL);
-
-	return (0);
-}
-
-static void
-vmx_enable(void *arg __unused)
-{
-	int error;
-	uint64_t feature_control;
-
-	feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
-	if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 0 ||
-	    (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) {
-		wrmsr(MSR_IA32_FEATURE_CONTROL,
-		    feature_control | IA32_FEATURE_CONTROL_VMX_EN |
-		    IA32_FEATURE_CONTROL_LOCK);
-	}
-
-	load_cr4(rcr4() | CR4_VMXE);
-
-	*(uint32_t *)vmxon_region[curcpu] = vmx_revision();
-	error = vmxon(vmxon_region[curcpu]);
-	if (error == 0)
-		vmxon_enabled[curcpu] = 1;
-}
-
-static void
-vmx_restore(void)
-{
-
-	if (vmxon_enabled[curcpu])
-		vmxon(vmxon_region[curcpu]);
-}
-#else /* __FreeBSD__ */
 static int
 vmx_cleanup(void)
 {
@@ -701,48 +607,14 @@
 {
 	/* No-op on illumos */
 }
-#endif /* __FreeBSD__ */
 
 static int
 vmx_init(int ipinum)
 {
 	int error;
-#ifdef __FreeBSD__
-	uint64_t basic, fixed0, fixed1, feature_control;
-#else
 	uint64_t fixed0, fixed1;
-#endif
-	uint32_t tmp, procbased2_vid_bits;
-
-#ifdef __FreeBSD__
-	/* CPUID.1:ECX[bit 5] must be 1 for processor to support VMX */
-	if (!(cpu_feature2 & CPUID2_VMX)) {
-		printf("vmx_init: processor does not support VMX operation\n");
-		return (ENXIO);
-	}
-
-	/*
-	 * Verify that MSR_IA32_FEATURE_CONTROL lock and VMXON enable bits
-	 * are set (bits 0 and 2 respectively).
-	 */
-	feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
-	if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 1 &&
-	    (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) {
-		printf("vmx_init: VMX operation disabled by BIOS\n");
-		return (ENXIO);
-	}
-
-	/*
-	 * Verify capabilities MSR_VMX_BASIC:
-	 * - bit 54 indicates support for INS/OUTS decoding
-	 */
-	basic = rdmsr(MSR_VMX_BASIC);
-	if ((basic & (1UL << 54)) == 0) {
-		printf("vmx_init: processor does not support desired basic "
-		    "capabilities\n");
-		return (EINVAL);
-	}
-#endif /* __FreeBSD__ */
+	uint32_t tmp;
+	enum vmx_caps avail_caps = VMX_CAP_NONE;
 
 	/* Check support for primary processor-based VM-execution controls */
 	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
@@ -826,93 +698,53 @@
 					 PROCBASED_PAUSE_EXITING, 0,
 					 &tmp) == 0);
 
-	cap_unrestricted_guest = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
-					MSR_VMX_PROCBASED_CTLS2,
-					PROCBASED2_UNRESTRICTED_GUEST, 0,
-				        &tmp) == 0);
-
 	cap_invpcid = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
 	    MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0,
 	    &tmp) == 0);
 
-	/*
-	 * Check support for TPR shadow.
-	 */
-	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
-	    MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0,
-	    &tmp);
-	if (error == 0) {
-		tpr_shadowing = 1;
-		TUNABLE_INT_FETCH("hw.vmm.vmx.use_tpr_shadowing",
-		    &tpr_shadowing);
-	}
-
-	if (tpr_shadowing) {
-		procbased_ctls |= PROCBASED_USE_TPR_SHADOW;
-		procbased_ctls &= ~PROCBASED_CR8_LOAD_EXITING;
-		procbased_ctls &= ~PROCBASED_CR8_STORE_EXITING;
-	}
-
-	/*
-	 * Check support for virtual interrupt delivery.
+	/* Check for APIC virtualization capabilities:
+	 * - TPR shadowing
+	 * - Full APICv (with or without x2APIC support)
+	 * - Posted interrupt handling
 	 */
-	procbased2_vid_bits = (PROCBASED2_VIRTUALIZE_APIC_ACCESSES |
-	    PROCBASED2_VIRTUALIZE_X2APIC_MODE |
-	    PROCBASED2_APIC_REGISTER_VIRTUALIZATION |
-	    PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY);
-
-	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2,
-	    procbased2_vid_bits, 0, &tmp);
-	if (error == 0 && tpr_shadowing) {
-		virtual_interrupt_delivery = 1;
-		TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_vid",
-		    &virtual_interrupt_delivery);
-	}
-
-	if (virtual_interrupt_delivery) {
-		procbased_ctls |= PROCBASED_USE_TPR_SHADOW;
-		procbased_ctls2 |= procbased2_vid_bits;
-		procbased_ctls2 &= ~PROCBASED2_VIRTUALIZE_X2APIC_MODE;
-
-		/*
-		 * Check for Posted Interrupts only if Virtual Interrupt
-		 * Delivery is enabled.
-		 */
-		error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS,
-		    MSR_VMX_TRUE_PINBASED_CTLS, PINBASED_POSTED_INTERRUPT, 0,
-		    &tmp);
-		if (error == 0) {
-#ifdef __FreeBSD__
-			pirvec = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) :
-			    &IDTVEC(justreturn));
-			if (pirvec < 0) {
-				if (bootverbose) {
-					printf("vmx_init: unable to allocate "
-					    "posted interrupt vector\n");
+	if (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, MSR_VMX_TRUE_PROCBASED_CTLS,
+	    PROCBASED_USE_TPR_SHADOW, 0, &tmp) == 0) {
+		avail_caps |= VMX_CAP_TPR_SHADOW;
+
+		const uint32_t apicv_bits =
+		    PROCBASED2_VIRTUALIZE_APIC_ACCESSES |
+		    PROCBASED2_APIC_REGISTER_VIRTUALIZATION |
+		    PROCBASED2_VIRTUALIZE_X2APIC_MODE |
+		    PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY;
+		if (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
+		    MSR_VMX_PROCBASED_CTLS2, apicv_bits, 0, &tmp) == 0) {
+			avail_caps |= VMX_CAP_APICV;
+
+			/*
+			 * It may make sense in the future to differentiate
+			 * hardware (or software) configurations with APICv but
+			 * no support for accelerating x2APIC mode.
+			 */
+			avail_caps |= VMX_CAP_APICV_X2APIC;
+
+			error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS,
+			    MSR_VMX_TRUE_PINBASED_CTLS,
+			    PINBASED_POSTED_INTERRUPT, 0, &tmp);
+			if (error == 0) {
+				/*
+				 * If the PSM-provided interfaces for requesting
+				 * and using a PIR IPI vector are present, use
+				 * them for posted interrupts.
+				 */
+				if (psm_get_pir_ipivect != NULL &&
+				    psm_send_pir_ipi != NULL) {
+					pirvec = psm_get_pir_ipivect();
+					avail_caps |= VMX_CAP_APICV_PIR;
 				}
-			} else {
-				posted_interrupts = 1;
-				TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_pir",
-				    &posted_interrupts);
 			}
-#else
-			/*
-			 * If the PSM-provided interfaces for requesting and
-			 * using a PIR IPI vector are present, use them for
-			 * posted interrupts.
-			 */
-			if (psm_get_pir_ipivect != NULL &&
-			    psm_send_pir_ipi != NULL) {
-				pirvec = psm_get_pir_ipivect();
-				posted_interrupts = 1;
-			}
-#endif
 		}
 	}
 
-	if (posted_interrupts)
-		    pinbased_ctls |= PINBASED_POSTED_INTERRUPT;
-
 	/* Initialize EPT */
 	error = ept_init(ipinum);
 	if (error) {
@@ -960,11 +792,10 @@
 	cr0_zeros_mask = ~fixed0 & ~fixed1;
 
 	/*
-	 * CR0_PE and CR0_PG can be set to zero in VMX non-root operation
-	 * if unrestricted guest execution is allowed.
+	 * Since Unrestricted Guest was already verified present, CR0_PE and
+	 * CR0_PG are allowed to be set to zero in VMX non-root operation
 	 */
-	if (cap_unrestricted_guest)
-		cr0_ones_mask &= ~(CR0_PG | CR0_PE);
+	cr0_ones_mask &= ~(CR0_PG | CR0_PE);
 
 	/*
 	 * Do not allow the guest to set CR0_NW or CR0_CD.
@@ -976,17 +807,9 @@
 	cr4_ones_mask = fixed0 & fixed1;
 	cr4_zeros_mask = ~fixed0 & ~fixed1;
 
-#ifdef __FreeBSD__
-	vpid_init();
-#endif
-
 	vmx_msr_init();
 
-#ifdef __FreeBSD__
-	/* enable VMX operation */
-	smp_rendezvous(NULL, vmx_enable, NULL, NULL);
-#endif
-
+	vmx_capabilities = avail_caps;
 	vmx_initialized = 1;
 
 	return (0);
@@ -1063,6 +886,7 @@
 	struct vmcs *vmcs;
 	uint32_t exc_bitmap;
 	uint16_t maxcpus;
+	uint32_t proc_ctls, proc2_ctls, pin_ctls;
 
 	vmx = malloc(sizeof(struct vmx), M_VMX, M_WAITOK | M_ZERO);
 	if ((uintptr_t)vmx & PAGE_MASK) {
@@ -1117,16 +941,38 @@
 
 	vpid_alloc(vpid, VM_MAXCPU);
 
-	if (virtual_interrupt_delivery) {
+	/* Grab the established defaults */
+	proc_ctls = procbased_ctls;
+	proc2_ctls = procbased_ctls2;
+	pin_ctls = pinbased_ctls;
+	/* For now, default to the available capabilities */
+	vmx->vmx_caps = vmx_capabilities;
+
+	if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) {
+		proc_ctls |= PROCBASED_USE_TPR_SHADOW;
+		proc_ctls &= ~PROCBASED_CR8_LOAD_EXITING;
+		proc_ctls &= ~PROCBASED_CR8_STORE_EXITING;
+	}
+	if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
+		ASSERT(vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW));
+
+		proc2_ctls |= (PROCBASED2_VIRTUALIZE_APIC_ACCESSES |
+		    PROCBASED2_APIC_REGISTER_VIRTUALIZATION |
+		    PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY);
+
 		error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE,
 		    APIC_ACCESS_ADDRESS);
 		/* XXX this should really return an error to the caller */
 		KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error));
 	}
+	if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) {
+		ASSERT(vmx_cap_en(vmx, VMX_CAP_APICV));
+
+		pin_ctls |= PINBASED_POSTED_INTERRUPT;
+	}
 
 	maxcpus = vm_get_maxcpus(vm);
 	for (i = 0; i < maxcpus; i++) {
-#ifndef __FreeBSD__
 		/*
 		 * Cache physical address lookups for various components which
 		 * may be required inside the critical_enter() section implied
@@ -1135,13 +981,10 @@
 		vm_paddr_t msr_bitmap_pa = vtophys(vmx->msr_bitmap);
 		vm_paddr_t apic_page_pa = vtophys(&vmx->apic_page[i]);
 		vm_paddr_t pir_desc_pa = vtophys(&vmx->pir_desc[i]);
-#endif /* __FreeBSD__ */
 
 		vmcs = &vmx->vmcs[i];
 		vmcs->identifier = vmx_revision();
-#ifndef __FreeBSD__
 		vmcs->vmcs_pa = (uint64_t)vtophys(vmcs);
-#endif
 		error = vmclear(vmcs);
 		if (error != 0) {
 			panic("vmx_vminit: vmclear error %d on vcpu %d\n",
@@ -1155,25 +998,14 @@
 
 		VMPTRLD(vmcs);
 		error = 0;
-#ifdef __FreeBSD__
-		/*
-		 * The illumos vmx_enter_guest implementation avoids some of
-		 * the %rsp-manipulation games which are present in the stock
-		 * one from FreeBSD.
-		 */
-		error += vmwrite(VMCS_HOST_RSP, (u_long)&vmx->ctx[i]);
-#endif
+
 		error += vmwrite(VMCS_EPTP, vmx->eptp);
-		error += vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls);
-		error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls);
-		error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2);
+		error += vmwrite(VMCS_PIN_BASED_CTLS, pin_ctls);
+		error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, proc_ctls);
+		error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls);
 		error += vmwrite(VMCS_EXIT_CTLS, exit_ctls);
 		error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls);
-#ifdef __FreeBSD__
-		error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap));
-#else
 		error += vmwrite(VMCS_MSR_BITMAP, msr_bitmap_pa);
-#endif
 		error += vmwrite(VMCS_VPID, vpid[i]);
 
 		if (guest_l1d_flush && !guest_l1d_flush_sw) {
@@ -1195,37 +1027,27 @@
 		vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1;
 		error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1);
 
-		if (tpr_shadowing) {
-#ifdef __FreeBSD__
-			error += vmwrite(VMCS_VIRTUAL_APIC,
-			    vtophys(&vmx->apic_page[i]));
-#else
+		if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) {
 			error += vmwrite(VMCS_VIRTUAL_APIC, apic_page_pa);
-#endif
 		}
 
-		if (virtual_interrupt_delivery) {
+		if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
 			error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
 			error += vmwrite(VMCS_EOI_EXIT0, 0);
 			error += vmwrite(VMCS_EOI_EXIT1, 0);
 			error += vmwrite(VMCS_EOI_EXIT2, 0);
 			error += vmwrite(VMCS_EOI_EXIT3, 0);
 		}
-		if (posted_interrupts) {
+		if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) {
 			error += vmwrite(VMCS_PIR_VECTOR, pirvec);
-#ifdef __FreeBSD__
-			error += vmwrite(VMCS_PIR_DESC,
-			    vtophys(&vmx->pir_desc[i]));
-#else
 			error += vmwrite(VMCS_PIR_DESC, pir_desc_pa);
-#endif
 		}
 		VMCLEAR(vmcs);
 		KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs"));
 
 		vmx->cap[i].set = 0;
-		vmx->cap[i].proc_ctls = procbased_ctls;
-		vmx->cap[i].proc_ctls2 = procbased_ctls2;
+		vmx->cap[i].proc_ctls = proc_ctls;
+		vmx->cap[i].proc_ctls2 = proc2_ctls;
 		vmx->cap[i].exc_bitmap = exc_bitmap;
 
 		vmx->state[i].nextrip = ~0;
@@ -1534,7 +1356,6 @@
 #endif
 }
 
-#ifndef __FreeBSD__
 static void
 vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
     uint64_t guestrip)
@@ -1638,7 +1459,7 @@
 		 */
 		KASSERT(vector >= 0 && vector <= 255,
 		    ("invalid vector %d from INTR", vector));
-	} else if (!virtual_interrupt_delivery) {
+	} else if (!vmx_cap_en(vmx, VMX_CAP_APICV)) {
 		/* Ask the local apic for a vector to inject */
 		if (!vlapic_pending_intr(vlapic, &vector))
 			return;
@@ -1713,197 +1534,6 @@
 	 */
 	vmx_set_int_window_exiting(vmx, vcpu);
 }
-#else
-static void
-vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
-    uint64_t guestrip)
-{
-	int vector, need_nmi_exiting, extint_pending;
-	uint64_t rflags, entryinfo;
-	uint32_t gi, info;
-
-	vlapic_tmr_update(vlapic);
-
-	if (vmx->state[vcpu].nextrip != guestrip) {
-		gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
-		if (gi & HWINTR_BLOCKING) {
-			VCPU_CTR2(vmx->vm, vcpu, "Guest interrupt blocking "
-			    "cleared due to rip change: %#lx/%#lx",
-			    vmx->state[vcpu].nextrip, guestrip);
-			gi &= ~HWINTR_BLOCKING;
-			vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
-		}
-	}
-
-	if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) {
-		KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry "
-		    "intinfo is not valid: %#lx", __func__, entryinfo));
-
-		info = vmcs_read(VMCS_ENTRY_INTR_INFO);
-		KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject "
-		     "pending exception: %#lx/%#x", __func__, entryinfo, info));
-
-		info = entryinfo;
-		vector = info & 0xff;
-		if (vector == IDT_BP || vector == IDT_OF) {
-			/*
-			 * VT-x requires #BP and #OF to be injected as software
-			 * exceptions.
-			 */
-			info &= ~VMCS_INTR_T_MASK;
-			info |= VMCS_INTR_T_SWEXCEPTION;
-		}
-
-		if (info & VMCS_INTR_DEL_ERRCODE)
-			vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, entryinfo >> 32);
-
-		vmcs_write(VMCS_ENTRY_INTR_INFO, info);
-	}
-
-	if (vm_nmi_pending(vmx->vm, vcpu)) {
-		/*
-		 * If there are no conditions blocking NMI injection then
-		 * inject it directly here otherwise enable "NMI window
-		 * exiting" to inject it as soon as we can.
-		 *
-		 * We also check for STI_BLOCKING because some implementations
-		 * don't allow NMI injection in this case. If we are running
-		 * on a processor that doesn't have this restriction it will
-		 * immediately exit and the NMI will be injected in the
-		 * "NMI window exiting" handler.
-		 */
-		need_nmi_exiting = 1;
-		gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
-		if ((gi & (HWINTR_BLOCKING | NMI_BLOCKING)) == 0) {
-			info = vmcs_read(VMCS_ENTRY_INTR_INFO);
-			if ((info & VMCS_INTR_VALID) == 0) {
-				vmx_inject_nmi(vmx, vcpu);
-				need_nmi_exiting = 0;
-			} else {
-				VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI "
-				    "due to VM-entry intr info %#x", info);
-			}
-		} else {
-			VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI due to "
-			    "Guest Interruptibility-state %#x", gi);
-		}
-
-		if (need_nmi_exiting)
-			vmx_set_nmi_window_exiting(vmx, vcpu);
-	}
-
-	extint_pending = vm_extint_pending(vmx->vm, vcpu);
-
-	if (!extint_pending && virtual_interrupt_delivery) {
-		vmx_inject_pir(vlapic);
-		return;
-	}
-
-	/*
-	 * If interrupt-window exiting is already in effect then don't bother
-	 * checking for pending interrupts. This is just an optimization and
-	 * not needed for correctness.
-	 */
-	if ((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0) {
-		VCPU_CTR0(vmx->vm, vcpu, "Skip interrupt injection due to "
-		    "pending int_window_exiting");
-		return;
-	}
-
-	if (!extint_pending) {
-		/* Ask the local apic for a vector to inject */
-		if (!vlapic_pending_intr(vlapic, &vector))
-			return;
-
-		/*
-		 * From the Intel SDM, Volume 3, Section "Maskable
-		 * Hardware Interrupts":
-		 * - maskable interrupt vectors [16,255] can be delivered
-		 *   through the local APIC.
-		*/
-		KASSERT(vector >= 16 && vector <= 255,
-		    ("invalid vector %d from local APIC", vector));
-	} else {
-		/* Ask the legacy pic for a vector to inject */
-		vatpic_pending_intr(vmx->vm, &vector);
-
-		/*
-		 * From the Intel SDM, Volume 3, Section "Maskable
-		 * Hardware Interrupts":
-		 * - maskable interrupt vectors [0,255] can be delivered
-		 *   through the INTR pin.
-		 */
-		KASSERT(vector >= 0 && vector <= 255,
-		    ("invalid vector %d from INTR", vector));
-	}
-
-	/* Check RFLAGS.IF and the interruptibility state of the guest */
-	rflags = vmcs_read(VMCS_GUEST_RFLAGS);
-	if ((rflags & PSL_I) == 0) {
-		VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
-		    "rflags %#lx", vector, rflags);
-		goto cantinject;
-	}
-
-	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
-	if (gi & HWINTR_BLOCKING) {
-		VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
-		    "Guest Interruptibility-state %#x", vector, gi);
-		goto cantinject;
-	}
-
-	info = vmcs_read(VMCS_ENTRY_INTR_INFO);
-	if (info & VMCS_INTR_VALID) {
-		/*
-		 * This is expected and could happen for multiple reasons:
-		 * - A vectoring VM-entry was aborted due to astpending
-		 * - A VM-exit happened during event injection.
-		 * - An exception was injected above.
-		 * - An NMI was injected above or after "NMI window exiting"
-		 */
-		VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
-		    "VM-entry intr info %#x", vector, info);
-		goto cantinject;
-	}
-
-	/* Inject the interrupt */
-	info = VMCS_INTR_T_HWINTR | VMCS_INTR_VALID;
-	info |= vector;
-	vmcs_write(VMCS_ENTRY_INTR_INFO, info);
-
-	if (!extint_pending) {
-		/* Update the Local APIC ISR */
-		vlapic_intr_accepted(vlapic, vector);
-	} else {
-		vm_extint_clear(vmx->vm, vcpu);
-		vatpic_intr_accepted(vmx->vm, vector);
-
-		/*
-		 * After we accepted the current ExtINT the PIC may
-		 * have posted another one.  If that is the case, set
-		 * the Interrupt Window Exiting execution control so
-		 * we can inject that one too.
-		 *
-		 * Also, interrupt window exiting allows us to inject any
-		 * pending APIC vector that was preempted by the ExtINT
-		 * as soon as possible. This applies both for the software
-		 * emulated vlapic and the hardware assisted virtual APIC.
-		 */
-		vmx_set_int_window_exiting(vmx, vcpu);
-	}
-
-	VCPU_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector);
-
-	return;
-
-cantinject:
-	/*
-	 * Set the Interrupt Window Exiting execution control so we can inject
-	 * the interrupt as soon as blocking condition goes away.
-	 */
-	vmx_set_int_window_exiting(vmx, vcpu);
-}
-#endif /* __FreeBSD__ */
 
 /*
  * If the Virtual NMIs execution control is '1' then the logical processor
@@ -2828,11 +2458,12 @@
 		SDT_PROBE3(vmm, vmx, exit, halt, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_HLT;
 		vmexit->u.hlt.rflags = vmcs_read(VMCS_GUEST_RFLAGS);
-		if (virtual_interrupt_delivery)
+		if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
 			vmexit->u.hlt.intr_status =
 			    vmcs_read(VMCS_GUEST_INTR_STATUS);
-		else
+		} else {
 			vmexit->u.hlt.intr_status = 0;
+		}
 		break;
 	case EXIT_REASON_MTF:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
@@ -3334,22 +2965,16 @@
 		 *
 		 * The same reasoning applies to the IPI generated by
 		 * pmap_invalidate_ept().
-		 */
-#ifdef __FreeBSD__
-		disable_intr();
-		vmx_inject_interrupts(vmx, vcpu, vlapic, rip);
-#else
-		/*
+		 *
 		 * The bulk of guest interrupt injection is done without
 		 * interrupts disabled on the host CPU.  This is necessary
 		 * since contended mutexes might force the thread to sleep.
 		 */
 		vmx_inject_interrupts(vmx, vcpu, vlapic, rip);
 		disable_intr();
-		if (virtual_interrupt_delivery) {
+		if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
 			vmx_inject_pir(vlapic);
 		}
-#endif /* __FreeBSD__ */
 
 		/*
 		 * Check for vcpu suspension after injecting events because
@@ -3447,12 +3072,15 @@
 #endif
 
 		/*
-		 * If TPR Shadowing is enabled, the TPR Threshold
-		 * must be updated right before entering the guest.
+		 * If TPR Shadowing is enabled, the TPR Threshold must be
+		 * updated right before entering the guest.
 		 */
-		if (tpr_shadowing && !virtual_interrupt_delivery) {
-			if ((vmx->cap[vcpu].proc_ctls & PROCBASED_USE_TPR_SHADOW) != 0) {
-				vmcs_write(VMCS_TPR_THRESHOLD, vlapic_get_cr8(vlapic));
+		if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW) &&
+		    !vmx_cap_en(vmx, VMX_CAP_APICV)) {
+			if ((vmx->cap[vcpu].proc_ctls &
+			    PROCBASED_USE_TPR_SHADOW) != 0) {
+				vmcs_write(VMCS_TPR_THRESHOLD,
+				    vlapic_get_cr8(vlapic));
 			}
 		}
 
@@ -3810,10 +3438,6 @@
 		if (cap_monitor_trap)
 			ret = 0;
 		break;
-	case VM_CAP_UNRESTRICTED_GUEST:
-		if (cap_unrestricted_guest)
-			ret = 0;
-		break;
 	case VM_CAP_ENABLE_INVPCID:
 		if (cap_invpcid)
 			ret = 0;
@@ -3874,15 +3498,6 @@
 			reg = VMCS_PRI_PROC_BASED_CTLS;
 		}
 		break;
-	case VM_CAP_UNRESTRICTED_GUEST:
-		if (cap_unrestricted_guest) {
-			retval = 0;
-			pptr = &vmx->cap[vcpu].proc_ctls2;
-			baseval = *pptr;
-			flag = PROCBASED2_UNRESTRICTED_GUEST;
-			reg = VMCS_SEC_PROC_BASED_CTLS;
-		}
-		break;
 	case VM_CAP_ENABLE_INVPCID:
 		if (cap_invpcid) {
 			retval = 0;
@@ -4307,21 +3922,21 @@
 	vlapic_vtx->pir_desc = &vmx->pir_desc[vcpuid];
 	vlapic_vtx->vmx = vmx;
 
-	if (tpr_shadowing) {
+	if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) {
 		vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_ts;
 	}
-
-	if (virtual_interrupt_delivery) {
+	if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
 		vlapic->ops.set_intr_ready = vmx_set_intr_ready;
 		vlapic->ops.pending_intr = vmx_pending_intr;
 		vlapic->ops.intr_accepted = vmx_intr_accepted;
 		vlapic->ops.set_tmr = vmx_set_tmr;
 		vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_vid;
+
+		if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) {
+			vlapic->ops.post_intr = vmx_post_intr;
+		}
 	}
 
-	if (posted_interrupts)
-		vlapic->ops.post_intr = vmx_post_intr;
-
 	vlapic_init(vlapic);
 
 	return (vlapic);
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.h	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h	Wed Jul 29 11:35:17 2020 +0000
@@ -29,7 +29,17 @@
  */
 
 /*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
  * Copyright 2018 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
  */
 
 #ifndef _VMX_H_
@@ -151,6 +161,7 @@
 	struct vmxcap	cap[VM_MAXCPU];
 	struct vmxstate	state[VM_MAXCPU];
 	uint64_t	eptp;
+	enum vmx_caps	vmx_caps;
 	struct vm	*vm;
 	long		eptgen[MAXCPU];		/* cached pmap->pm_eptgen */
 };
@@ -158,6 +169,12 @@
 CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0);
 CTASSERT((offsetof(struct vmx, pir_desc[0]) & 63) == 0);
 
+static __inline bool
+vmx_cap_en(const struct vmx *vmx, enum vmx_caps cap)
+{
+	return ((vmx->vmx_caps & cap) == cap);
+}
+
 #define	VMX_GUEST_VMEXIT	0
 #define	VMX_VMRESUME_ERROR	1
 #define	VMX_VMLAUNCH_ERROR	2
--- a/usr/src/uts/i86pc/ml/cpr_wakecode.s	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/uts/i86pc/ml/cpr_wakecode.s	Wed Jul 29 11:35:17 2020 +0000
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
  */
 
 #include <sys/asm_linkage.h>
@@ -583,7 +584,7 @@
 	 * Before proceeding, enable usage of the page table NX bit if
 	 * that's how the page tables are set up.
 	 */
-	bt      $X86FSET_NX, x86_featureset(%rip)
+	btl     $X86FSET_NX, x86_featureset(%rip)
 	jnc     1f
 	movl    $MSR_AMD_EFER, %ecx
 	rdmsr
--- a/usr/src/uts/i86pc/ml/kpti_trampolines.s	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/uts/i86pc/ml/kpti_trampolines.s	Wed Jul 29 11:35:17 2020 +0000
@@ -10,6 +10,7 @@
  */
 /*
  * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
  */
 
 /*
@@ -256,7 +257,7 @@
 	pushq	%r14;				\
 	subq	$KPTI_R14, %rsp;		\
 	/* Check for clobbering */		\
-	cmp	$0, KPTI_FLAG(%rsp);		\
+	cmpq	$0, KPTI_FLAG(%rsp);		\
 	je	1f;				\
 	/* Don't worry, this totally works */	\
 	int	$8;				\
--- a/usr/src/uts/i86pc/ml/locore.s	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/uts/i86pc/ml/locore.s	Wed Jul 29 11:35:17 2020 +0000
@@ -24,6 +24,7 @@
  */
 /*
  * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
  */
 
 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
@@ -184,7 +185,7 @@
 	/*
 	 * (We just assert this works by virtue of being here)
 	 */
-	bts	$X86FSET_CPUID, x86_featureset(%rip)
+	btsl	$X86FSET_CPUID, x86_featureset(%rip)
 
 	/*
 	 * mlsetup() gets called with a struct regs as argument, while
--- a/usr/src/uts/i86pc/ml/mpcore.s	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/uts/i86pc/ml/mpcore.s	Wed Jul 29 11:35:17 2020 +0000
@@ -26,6 +26,7 @@
  * All rights reserved.
  *
  * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
  */
 
 #include <sys/asm_linkage.h>
@@ -275,7 +276,7 @@
 	 * Before going any further, enable usage of page table NX bit if
 	 * that's how our page tables are set up.
 	 */
-	bt	$X86FSET_NX, x86_featureset(%rip)
+	btl	$X86FSET_NX, x86_featureset(%rip)
 	jnc	1f
 	movl	$MSR_AMD_EFER, %ecx
 	rdmsr
--- a/usr/src/uts/i86pc/sys/vmm.h	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/uts/i86pc/sys/vmm.h	Wed Jul 29 11:35:17 2020 +0000
@@ -39,6 +39,7 @@
  *
  * Copyright 2015 Pluribus Networks Inc.
  * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
  */
 
 #ifndef _VMM_H_
@@ -165,12 +166,19 @@
 	VM_CAP_HALT_EXIT,
 	VM_CAP_MTRAP_EXIT,
 	VM_CAP_PAUSE_EXIT,
-	VM_CAP_UNRESTRICTED_GUEST,
 	VM_CAP_ENABLE_INVPCID,
 	VM_CAP_BPT_EXIT,
 	VM_CAP_MAX
 };
 
+enum vmx_caps {
+	VMX_CAP_NONE		= 0,
+	VMX_CAP_TPR_SHADOW	= (1UL << 0),
+	VMX_CAP_APICV		= (1UL << 1),
+	VMX_CAP_APICV_X2APIC	= (1UL << 2),
+	VMX_CAP_APICV_PIR	= (1UL << 3),
+};
+
 enum vm_intr_trigger {
 	EDGE_TRIGGER,
 	LEVEL_TRIGGER
--- a/usr/src/uts/intel/brand/common/brand_asm.h	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/uts/intel/brand/common/brand_asm.h	Wed Jul 29 11:35:17 2020 +0000
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
  */
 
 #ifndef _COMMON_BRAND_ASM_H
@@ -65,7 +66,7 @@
  *      24 | saved stack pointer		|
  *    | 16 | lwp pointer			|
  *    v  8 | user return address		|
- *       0 | BRAND_CALLBACK()'s return addr 	|
+ *       0 | BRAND_CALLBACK()'s return addr	|
  *         --------------------------------------
  */
 
@@ -182,7 +183,7 @@
 	GET_P_BRAND_DATA(SP_REG, 0, scr);	/* get p_brand_data */	\
 	cmp	$0, scr;						\
 	je	9f;							\
-	cmp	$0, handler(scr);		/* check handler */	\
+	cmpq	$0, handler(scr);		/* check handler */	\
 	je	9f
 
 /*
--- a/usr/src/uts/intel/ia32/ml/float.s	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/uts/intel/ia32/ml/float.s	Wed Jul 29 11:35:17 2020 +0000
@@ -22,6 +22,7 @@
 /*
  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
  */
 
 /*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -262,7 +263,7 @@
  */
 
 	ENTRY_NP(fpdisable)
-	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */ 
+	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */
 	ret
 	SET_SIZE(fpdisable)
 
@@ -284,7 +285,7 @@
 	leaq	avx_initial(%rip), %rcx
 	xorl	%edx, %edx
 	movl	$XFEATURE_AVX, %eax
-	bt	$X86FSET_AVX, x86_featureset
+	btl	$X86FSET_AVX, x86_featureset
 	cmovael	%edx, %eax
 	orl	$(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
 	xrstor (%rcx)
--- a/usr/src/uts/intel/kdi/kdi_idthdl.s	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/uts/intel/kdi/kdi_idthdl.s	Wed Jul 29 11:35:17 2020 +0000
@@ -23,6 +23,7 @@
  * Use is subject to license terms.
  *
  * Copyright 2018 Joyent, Inc.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
  */
 
 /*
@@ -71,7 +72,7 @@
 	pushq	%r14;				\
 	subq	$KPTI_R14, %rsp;		\
 	/* Check for clobbering */		\
-	cmp	$0, KPTI_FLAG(%rsp);		\
+	cmpq	$0, KPTI_FLAG(%rsp);		\
 	je	1f;				\
 	/* Don't worry, this totally works */	\
 	int	$8;				\
--- a/usr/src/uts/intel/os/driver_aliases	Tue Jul 28 11:25:41 2020 +0000
+++ b/usr/src/uts/intel/os/driver_aliases	Wed Jul 29 11:35:17 2020 +0000
@@ -1331,6 +1331,8 @@
 rge "pci10ec,8169"
 rge "pci16ec,116"
 rge "pciex10ec,8136"
+rge "pciex10ec,8168"
+rge "pciex10ec,8169"
 rtls "pci10ec,8139"
 rtls "pci1113,1211"
 rtls "pci1186,1300"