changeset 25214:5b4f2c1d6e77

[illumos-gate merge] commit eb8e4816e886312ff129c1a9213ad86d0fe5ffb6 12935 loader: setting vdev size based on label asize is not working commit b38fc3ed80ecaf16c46433e690e6f17495c5e179 12922 bhyve vmm ops could have stronger guardrail commit 88628b1bc8bd723915686a0f84bd0461ec80e590 12913 i40e want configurable num_rx_groups
author Jerry Jelinek <jerry.jelinek@joyent.com>
date Tue, 14 Jul 2020 11:45:08 +0000
parents c854183fa07c (current diff) 721689034089 (diff)
children 91529019146f
files usr/src/boot/lib/libstand/zfs/zfsimpl.c usr/src/man/man7d/i40e.7d usr/src/uts/common/io/i40e/i40e_gld.c usr/src/uts/common/io/i40e/i40e_main.c usr/src/uts/common/io/i40e/i40e_sw.h usr/src/uts/i86pc/io/vmm/vmm.c
diffstat 7 files changed, 148 insertions(+), 112 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/boot/Makefile.version	Mon Jul 13 14:13:11 2020 +0000
+++ b/usr/src/boot/Makefile.version	Tue Jul 14 11:45:08 2020 +0000
@@ -33,4 +33,4 @@
 # Use date like formatting here, YYYY.MM.DD.XX, without leading zeroes.
 # The version is processed from left to right, the version number can only
 # be increased.
-BOOT_VERSION = $(LOADER_VERSION)-2020.06.26.1
+BOOT_VERSION = $(LOADER_VERSION)-2020.07.07.1
--- a/usr/src/boot/lib/libstand/zfs/zfsimpl.c	Mon Jul 13 14:13:11 2020 +0000
+++ b/usr/src/boot/lib/libstand/zfs/zfsimpl.c	Tue Jul 14 11:45:08 2020 +0000
@@ -1835,6 +1835,55 @@
 	return (vdev_read_phys(vd, &bp, buf, off, size));
 }
 
+static uint64_t
+vdev_get_label_asize(unsigned char *nvl)
+{
+	unsigned char *vdevs;
+	uint64_t asize;
+	const char *type;
+	int len;
+
+	asize = 0;
+	/* Get vdev tree */
+	if (nvlist_find(nvl, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST,
+	    NULL, &vdevs, NULL) != 0)
+		return (asize);
+
+	/*
+	 * Get vdev type. We will calculate asize for raidz, mirror and disk.
+	 * For raidz, the asize is raw size of all children.
+	 */
+	if (nvlist_find(vdevs, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING,
+	    NULL, &type, &len) != 0)
+		goto done;
+
+	if (memcmp(type, VDEV_TYPE_MIRROR, len) != 0 &&
+	    memcmp(type, VDEV_TYPE_DISK, len) != 0 &&
+	    memcmp(type, VDEV_TYPE_RAIDZ, len) != 0)
+		goto done;
+
+	if (nvlist_find(vdevs, ZPOOL_CONFIG_ASIZE, DATA_TYPE_UINT64,
+	    NULL, &asize, NULL) != 0)
+		goto done;
+
+	if (memcmp(type, VDEV_TYPE_RAIDZ, len) == 0) {
+		unsigned char *kids;
+		int nkids;
+
+		if (nvlist_find(vdevs, ZPOOL_CONFIG_CHILDREN,
+		    DATA_TYPE_NVLIST_ARRAY, &nkids, &kids, NULL) != 0) {
+			asize = 0;
+			goto done;
+		}
+
+		asize /= nkids;
+	}
+
+	asize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
+done:
+	return (asize);
+}
+
 static unsigned char *
 vdev_label_read_config(vdev_t *vd, uint64_t txg)
 {
@@ -1882,10 +1931,9 @@
 			 * Use asize from pool config. We need this
 			 * because we can get bad value from BIOS.
 			 */
-			if (nvlist_find(nvlist, ZPOOL_CONFIG_ASIZE,
-			    DATA_TYPE_UINT64, NULL, &asize, NULL) == 0) {
-				vd->v_psize = asize +
-				    VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
+			asize = vdev_get_label_asize(nvl);
+			if (asize != 0) {
+				vd->v_psize = asize;
 			}
 		}
 	}
--- a/usr/src/man/man7d/i40e.7d	Mon Jul 13 14:13:11 2020 +0000
+++ b/usr/src/man/man7d/i40e.7d	Tue Jul 14 11:45:08 2020 +0000
@@ -10,8 +10,9 @@
 .\"
 .\"
 .\" Copyright (c) 2018 Joyent, Inc.
+.\" Copyright 2020 Ryan Zezeski
 .\"
-.Dd April 15, 2020
+.Dd July 1, 2020
 .Dt I40E 7D
 .Os
 .Sh NAME
@@ -157,6 +158,25 @@
 The default is always to enable them.
 It is not recommended to to disable them.
 .Ed
+.It Sy rx_num_groups
+.Bd -filled -compact
+Minimum:
+.Sy 1 |
+Maximum:
+.Sy 32
+.Ed
+.Bd -filled
+The
+.Sy rx_num_groups
+property determines the number of receive mac groups provided by the driver.
+Each group can handle all unicast traffic for a single MAC address, more groups
+means more unicast traffic that can be steered by hardware.
+However, more groups also means more demand for kernel memory.
+If you are not making heavy use of VNICs, or do not need the efficiency gains
+of hardware steering, then reducing this number can reduce kernel memory
+taken by
+.Nm i40e.
+.Ed
 .It Sy rx_ring_size
 .Bd -filled -compact
 Minimum:
--- a/usr/src/uts/common/io/i40e/i40e_gld.c	Mon Jul 13 14:13:11 2020 +0000
+++ b/usr/src/uts/common/io/i40e/i40e_gld.c	Tue Jul 14 11:45:08 2020 +0000
@@ -13,6 +13,7 @@
  * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
  * Copyright (c) 2018, Joyent, Inc.
  * Copyright 2017 Tegile Systems, Inc.  All rights reserved.
+ * Copyright 2020 Ryan Zezeski
  */
 
 /*
@@ -565,7 +566,7 @@
 	infop->mgi_addmac = i40e_group_add_mac;
 	infop->mgi_remmac = i40e_group_remove_mac;
 
-	ASSERT(i40e->i40e_num_rx_groups <= I40E_GROUP_MAX);
+	ASSERT3U(i40e->i40e_num_rx_groups, <=, I40E_MAX_NUM_RX_GROUPS);
 	infop->mgi_count = i40e->i40e_num_trqpairs_per_vsi;
 }
 
--- a/usr/src/uts/common/io/i40e/i40e_main.c	Mon Jul 13 14:13:11 2020 +0000
+++ b/usr/src/uts/common/io/i40e/i40e_main.c	Tue Jul 14 11:45:08 2020 +0000
@@ -14,6 +14,7 @@
  * Copyright 2019 Joyent, Inc.
  * Copyright 2017 Tegile Systems, Inc.  All rights reserved.
  * Copyright 2020 RackTop Systems, Inc.
+ * Copyright 2020 Ryan Zezeski
  */
 
 /*
@@ -1220,7 +1221,7 @@
 	}
 
 	if (i40e->i40e_num_rx_groups == 0) {
-		i40e->i40e_num_rx_groups = I40E_GROUP_MAX;
+		i40e->i40e_num_rx_groups = I40E_DEF_NUM_RX_GROUPS;
 	}
 }
 
@@ -1589,6 +1590,10 @@
 	    i40e->i40e_tx_ring_size - I40E_TX_MAX_COOKIE,
 	    I40E_DEF_TX_BLOCK_THRESH);
 
+	i40e->i40e_num_rx_groups = i40e_get_prop(i40e, "rx_num_groups",
+	    I40E_MIN_NUM_RX_GROUPS, I40E_MAX_NUM_RX_GROUPS,
+	    I40E_DEF_NUM_RX_GROUPS);
+
 	i40e->i40e_rx_ring_size = i40e_get_prop(i40e, "rx_ring_size",
 	    I40E_MIN_RX_RING_SIZE, I40E_MAX_RX_RING_SIZE,
 	    I40E_DEF_RX_RING_SIZE);
@@ -1776,7 +1781,6 @@
 	}
 
 	i40e->i40e_intr_type = 0;
-	i40e->i40e_num_rx_groups = I40E_GROUP_MAX;
 
 	/*
 	 * We need to determine the number of queue pairs per traffic
--- a/usr/src/uts/common/io/i40e/i40e_sw.h	Mon Jul 13 14:13:11 2020 +0000
+++ b/usr/src/uts/common/io/i40e/i40e_sw.h	Tue Jul 14 11:45:08 2020 +0000
@@ -13,6 +13,7 @@
  * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
  * Copyright 2019 Joyent, Inc.
  * Copyright 2017 Tegile Systems, Inc.  All rights reserved.
+ * Copyright 2020 Ryan Zezeski
  */
 
 /*
@@ -89,6 +90,16 @@
 #define	I40E_MAX_TX_RING_SIZE	4096
 #define	I40E_DEF_TX_RING_SIZE	1024
 
+/*
+ * Place an artificial limit on the max number of groups. The X710
+ * series supports up to 384 VSIs to be partitioned across PFs as the
+ * driver sees fit. But until we support more interrupts this seems
+ * like a good place to start.
+ */
+#define	I40E_MIN_NUM_RX_GROUPS	1
+#define	I40E_MAX_NUM_RX_GROUPS	32
+#define	I40E_DEF_NUM_RX_GROUPS	16
+
 #define	I40E_MIN_RX_RING_SIZE	64
 #define	I40E_MAX_RX_RING_SIZE	4096
 #define	I40E_DEF_RX_RING_SIZE	1024
@@ -269,14 +280,6 @@
  */
 #define	I40E_DDI_PROP_LEN	64
 
-/*
- * Place an artificial limit on the max number of groups. The X710
- * series supports up to 384 VSIs to be partitioned across PFs as the
- * driver sees fit. But until we support more interrupts this seems
- * like a good place to start.
- */
-#define	I40E_GROUP_MAX		32
-
 #define	I40E_GROUP_NOMSIX	1
 #define	I40E_TRQPAIR_NOMSIX	1
 
@@ -834,7 +837,7 @@
 	/*
 	 * Device state, switch information, and resources.
 	 */
-	i40e_vsi_t		i40e_vsis[I40E_GROUP_MAX];
+	i40e_vsi_t		i40e_vsis[I40E_MAX_NUM_RX_GROUPS];
 	uint16_t		i40e_mac_seid;	 /* SEID of physical MAC */
 	uint16_t		i40e_veb_seid;	 /* switch atop MAC (SEID) */
 	uint16_t		i40e_vsi_avail;	 /* VSIs avail to this PF */
--- a/usr/src/uts/i86pc/io/vmm/vmm.c	Mon Jul 13 14:13:11 2020 +0000
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c	Tue Jul 14 11:45:08 2020 +0000
@@ -39,6 +39,7 @@
  *
  * Copyright 2015 Pluribus Networks Inc.
  * Copyright 2018 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
  */
 
 #include <sys/cdefs.h>
@@ -203,35 +204,56 @@
 
 static int vmm_initialized;
 
-static struct vmm_ops *ops;
-#define	VMM_INIT(num)	(ops != NULL ? (*ops->init)(num) : 0)
-#define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
-#define	VMM_RESUME()	(ops != NULL ? (*ops->resume)() : 0)
-
-#define	VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
+
+static void
+nullop_panic(void)
+{
+	panic("null vmm operation call");
+}
+
+/* Do not allow use of an un-set `ops` to do anything but panic */
+static struct vmm_ops vmm_ops_null = {
+	.init		= (vmm_init_func_t)nullop_panic,
+	.cleanup	= (vmm_cleanup_func_t)nullop_panic,
+	.resume		= (vmm_resume_func_t)nullop_panic,
+	.vminit		= (vmi_init_func_t)nullop_panic,
+	.vmrun		= (vmi_run_func_t)nullop_panic,
+	.vmcleanup	= (vmi_cleanup_func_t)nullop_panic,
+	.vmgetreg	= (vmi_get_register_t)nullop_panic,
+	.vmsetreg	= (vmi_set_register_t)nullop_panic,
+	.vmgetdesc	= (vmi_get_desc_t)nullop_panic,
+	.vmsetdesc	= (vmi_set_desc_t)nullop_panic,
+	.vmgetcap	= (vmi_get_cap_t)nullop_panic,
+	.vmsetcap	= (vmi_set_cap_t)nullop_panic,
+	.vmspace_alloc	= (vmi_vmspace_alloc)nullop_panic,
+	.vmspace_free	= (vmi_vmspace_free)nullop_panic,
+	.vlapic_init	= (vmi_vlapic_init)nullop_panic,
+	.vlapic_cleanup	= (vmi_vlapic_cleanup)nullop_panic,
+	.vmsavectx	= (vmi_savectx)nullop_panic,
+	.vmrestorectx	= (vmi_restorectx)nullop_panic,
+};
+
+static struct vmm_ops *ops = &vmm_ops_null;
+
+#define	VMM_INIT(num)			((*ops->init)(num))
+#define	VMM_CLEANUP()			((*ops->cleanup)())
+#define	VMM_RESUME()			((*ops->resume)())
+
+#define	VMINIT(vm, pmap)		((*ops->vminit)(vm, pmap))
 #define	VMRUN(vmi, vcpu, rip, pmap, evinfo) \
-	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo) : ENXIO)
-#define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
-#define	VMSPACE_ALLOC(min, max) \
-	(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
-#define	VMSPACE_FREE(vmspace) \
-	(ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
-#define	VMGETREG(vmi, vcpu, num, retval)		\
-	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
-#define	VMSETREG(vmi, vcpu, num, val)		\
-	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
-#define	VMGETDESC(vmi, vcpu, num, desc)		\
-	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
-#define	VMSETDESC(vmi, vcpu, num, desc)		\
-	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
-#define	VMGETCAP(vmi, vcpu, num, retval)	\
-	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
-#define	VMSETCAP(vmi, vcpu, num, val)		\
-	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
-#define	VLAPIC_INIT(vmi, vcpu)			\
-	(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
-#define	VLAPIC_CLEANUP(vmi, vlapic)		\
-	(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
+	((*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo) )
+#define	VMCLEANUP(vmi)			((*ops->vmcleanup)(vmi) )
+#define	VMSPACE_ALLOC(min, max)		((*ops->vmspace_alloc)(min, max))
+#define	VMSPACE_FREE(vmspace)		((*ops->vmspace_free)(vmspace))
+
+#define	VMGETREG(vmi, vcpu, num, rv)	((*ops->vmgetreg)(vmi, vcpu, num, rv))
+#define	VMSETREG(vmi, vcpu, num, val)	((*ops->vmsetreg)(vmi, vcpu, num, val))
+#define	VMGETDESC(vmi, vcpu, num, dsc)	((*ops->vmgetdesc)(vmi, vcpu, num, dsc))
+#define	VMSETDESC(vmi, vcpu, num, dsc)	((*ops->vmsetdesc)(vmi, vcpu, num, dsc))
+#define	VMGETCAP(vmi, vcpu, num, rv)	((*ops->vmgetcap)(vmi, vcpu, num, rv))
+#define	VMSETCAP(vmi, vcpu, num, val)	((*ops->vmsetcap)(vmi, vcpu, num, val))
+#define	VLAPIC_INIT(vmi, vcpu)		((*ops->vlapic_init)(vmi, vcpu))
+#define	VLAPIC_CLEANUP(vmi, vlapic)	((*ops->vlapic_cleanup)(vmi, vlapic))
 
 #define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
 #define	fpu_stop_emulating()	clts()
@@ -380,14 +402,6 @@
 	return (&vcpu->exitinfo);
 }
 
-#ifdef __FreeBSD__
-static void
-vmm_resume(void)
-{
-	VMM_RESUME();
-}
-#endif
-
 static int
 vmm_init(void)
 {
@@ -423,67 +437,13 @@
 	return (VMM_INIT(vmm_ipinum));
 }
 
-#ifdef __FreeBSD__
-
-static int
-vmm_handler(module_t mod, int what, void *arg)
-{
-	int error;
-
-	switch (what) {
-	case MOD_LOAD:
-		vmmdev_init();
-		error = vmm_init();
-		if (error == 0)
-			vmm_initialized = 1;
-		break;
-	case MOD_UNLOAD:
-		error = vmmdev_cleanup();
-		if (error == 0) {
-			vmm_resume_p = NULL;
-			iommu_cleanup();
-#ifdef __FreeBSD__
-			if (vmm_ipinum != IPI_AST)
-				lapic_ipi_free(vmm_ipinum);
-#endif
-			error = VMM_CLEANUP();
-			/*
-			 * Something bad happened - prevent new
-			 * VMs from being created
-			 */
-			if (error)
-				vmm_initialized = 0;
-		}
-		break;
-	default:
-		error = 0;
-		break;
-	}
-	return (error);
-}
-
-static moduledata_t vmm_kmod = {
-	"vmm",
-	vmm_handler,
-	NULL
-};
-
-/*
- * vmm initialization has the following dependencies:
- *
- * - VT-x initialization requires smp_rendezvous() and therefore must happen
- *   after SMP is fully functional (after SI_SUB_SMP).
- */
-DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
-MODULE_VERSION(vmm, 1);
-
-#else /* __FreeBSD__ */
-
 int
 vmm_mod_load()
 {
 	int	error;
 
+	VERIFY(vmm_initialized == 0);
+
 	error = vmm_init();
 	if (error == 0)
 		vmm_initialized = 1;
@@ -496,6 +456,8 @@
 {
 	int	error;
 
+	VERIFY(vmm_initialized == 1);
+
 	iommu_cleanup();
 	error = VMM_CLEANUP();
 	if (error)
@@ -505,8 +467,6 @@
 	return (0);
 }
 
-#endif /* __FreeBSD__ */
-
 static void
 vm_init(struct vm *vm, bool create)
 {