changeset 9746:5f004f9c2aec

6821402 Need support for PGs that span lgroups
author Eric Saxe <Eric.Saxe@Sun.COM>
date Thu, 28 May 2009 14:24:11 -0700
parents 9b8942c27ac5
children 2d2e4e27c35c
files usr/src/uts/common/disp/cmt.c
diffstat 1 files changed, 63 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/disp/cmt.c	Thu May 28 11:48:32 2009 -0700
+++ b/usr/src/uts/common/disp/cmt.c	Thu May 28 14:24:11 2009 -0700
@@ -74,12 +74,15 @@
  * each PG can have at most one parent, and siblings are the group of PGs
  * sharing the same parent.
  *
- * On NUMA systems, the CMT load balancing algorithm balances across the
- * CMT PGs within their respective lgroups. On UMA based system, there
- * exists a top level group of PGs to balance across. On NUMA systems multiple
- * top level groups are instantiated, where the top level balancing begins by
- * balancng across the CMT PGs within their respective (per lgroup) top level
- * groups.
+ * On UMA based systems, the CMT load balancing algorithm begins by balancing
+ * load across the group of top level PGs in the system hierarchy.
+ * On NUMA systems, the CMT load balancing algorithm balances load across the
+ * group of top level PGs in each leaf lgroup...but for root homed threads,
+ * is willing to balance against all the top level PGs in the system.
+ *
+ * Groups of top level PGs are maintained to implement the above, one for each
+ * leaf lgroup (containing the top level PGs in that lgroup), and one (for the
+ * root lgroup) that contains all the top level PGs in the system.
  */
 static cmt_lgrp_t	*cmt_lgrps = NULL;	/* cmt_lgrps list head */
 static cmt_lgrp_t	*cpu0_lgrp = NULL;	/* boot CPU's initial lgrp */
@@ -90,7 +93,8 @@
 
 /*
  * Array of hardware sharing relationships that are blacklisted.
- * PGs won't be instantiated for blacklisted hardware sharing relationships.
+ * CMT scheduling optimizations won't be performed for blacklisted sharing
+ * relationships.
  */
 static int		cmt_hw_blacklisted[PGHW_NUM_COMPONENTS];
 
@@ -296,6 +300,13 @@
 static void
 cmt_callback_init(pg_t *pg)
 {
+	/*
+	 * Stick with the default callbacks if there isn't going to be
+	 * any CMT thread placement optimizations implemented.
+	 */
+	if (((pg_cmt_t *)pg)->cmt_policy == CMT_NO_POLICY)
+		return;
+
 	switch (((pghw_t *)pg)->pghw_hw) {
 	case PGHW_POW_ACTIVE:
 		pg->pg_cb.thread_swtch = cmt_ev_thread_swtch_pwr;
@@ -515,12 +526,12 @@
 			continue;
 
 		/*
-		 * Continue if the hardware sharing relationship has been
-		 * blacklisted.
+		 * We will still create the PGs for hardware sharing
+		 * relationships that have been blacklisted, but won't
+		 * implement CMT thread placement optimizations against them.
 		 */
-		if (cmt_hw_blacklisted[hw]) {
-			continue;
-		}
+		if (cmt_hw_blacklisted[hw] == 1)
+			policy = CMT_NO_POLICY;
 
 		/*
 		 * Find (or create) the PG associated with
@@ -1391,7 +1402,7 @@
 
 /*
  * Prune PG, and all other instances of PG's hardware sharing relationship
- * from the PG hierarchy.
+ * from the CMT PG hierarchy.
  *
  * This routine operates on the CPU specific processor group data (for the CPUs
  * in the PG being pruned), and may be invoked from a context where one CPU's
@@ -1442,7 +1453,13 @@
 	hwset = pghw_set_lookup(hw);
 
 	/*
-	 * Blacklist the hardware so that future groups won't be created.
+	 * Blacklist the hardware so future processor groups of this type won't
+	 * participate in CMT thread placement.
+	 *
+	 * XXX
+	 * For heterogeneous system configurations, this might be overkill.
+	 * We may only need to blacklist the illegal PGs, and other instances
+	 * of this hardware sharing relationship may be ok.
 	 */
 	cmt_hw_blacklisted[hw] = 1;
 
@@ -1472,6 +1489,7 @@
 			    pg->cmt_siblings != &cmt_root->cl_pgs) {
 				group_expand(&cmt_root->cl_pgs,
 				    GROUP_SIZE(&cmt_root->cl_pgs) + cap_needed);
+				cmt_root->cl_npgs += cap_needed;
 			}
 		}
 	}
@@ -1500,6 +1518,13 @@
 			(void) group_remove(&cmt_root->cl_pgs, pg,
 			    GRP_NORESIZE);
 		}
+
+		/*
+		 * Indicate that no CMT policy will be implemented across
+		 * this PG.
+		 */
+		pg->cmt_policy = CMT_NO_POLICY;
+
 		/*
 		 * Move PG's children from it's children set to it's parent's
 		 * children set. Note that the parent's children set, and PG's
@@ -1520,6 +1545,14 @@
 					r = group_add(pg->cmt_siblings, child,
 					    GRP_NORESIZE);
 					ASSERT(r == 0);
+
+					if (pg->cmt_parent == NULL &&
+					    pg->cmt_siblings !=
+					    &cmt_root->cl_pgs) {
+						r = group_add(&cmt_root->cl_pgs,
+						    child, GRP_NORESIZE);
+						ASSERT(r == 0);
+					}
 				}
 			}
 			group_empty(pg->cmt_children);
@@ -1564,8 +1597,10 @@
 
 			/*
 			 * Update the CPU's lineages
+			 *
+			 * Remove the PG from the CPU's group used for CMT
+			 * scheduling.
 			 */
-			(void) group_remove(&cpd->pgs, pg, GRP_NORESIZE);
 			(void) group_remove(&cpd->cmt_pgs, pg, GRP_NORESIZE);
 		}
 	}
@@ -1770,8 +1805,20 @@
 		 * to do CMT thread placement across lgroups, as this would
 		 * conflict with policies implementing MPO thread affinity.
 		 *
-		 * The handling for this falls through to the next case.
+		 * If the PG is of a sharing relationship type known to
+		 * legitimately span lgroups, specify that no CMT thread
+		 * placement policy should be implemented, and prune the PG
+		 * from the existing CMT PG hierarchy.
+		 *
+		 * Otherwise, fall though to the case below for handling.
 		 */
+		if (((pghw_t *)pg)->pghw_hw == PGHW_CHIP) {
+			if (pg_cmt_prune(pg, lineage, sz, pgdata) == 0) {
+				cmt_lineage_status = CMT_LINEAGE_REPAIRED;
+				goto revalidate;
+			}
+		}
+		/*LINTED*/
 	case CMT_LINEAGE_NON_PROMOTABLE:
 		/*
 		 * We've detected a PG that already exists in another CPU's