Mercurial > illumos > illumos-gate
changeset 9036:49811247ffb0
6812782 additional cmt lineage validation logic needed to defend against buggy _PSDs
author | Eric Saxe <Eric.Saxe@Sun.COM> |
---|---|
date | Fri, 13 Mar 2009 16:23:41 -0700 |
parents | cba817a83719 |
children | 6ea398b88b25 |
files | usr/src/uts/common/disp/cmt.c |
diffstat | 1 files changed, 224 insertions(+), 75 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/disp/cmt.c Fri Mar 13 17:12:59 2009 -0400 +++ b/usr/src/uts/common/disp/cmt.c Fri Mar 13 16:23:41 2009 -0700 @@ -100,6 +100,36 @@ */ int cmt_sched_disabled = 0; +/* + * Status codes for CMT lineage validation + * See pg_cmt_lineage_validate() below + */ +typedef enum cmt_lineage_validation { + CMT_LINEAGE_VALID, + CMT_LINEAGE_NON_CONCENTRIC, + CMT_LINEAGE_PG_SPANS_LGRPS, + CMT_LINEAGE_NON_PROMOTABLE, + CMT_LINEAGE_REPAIRED, + CMT_LINEAGE_UNRECOVERABLE +} cmt_lineage_validation_t; + +/* + * Status of the current lineage under construction. + * One must be holding cpu_lock to change this. + */ +cmt_lineage_validation_t cmt_lineage_status = CMT_LINEAGE_VALID; + +/* + * Power domain definitions (on x86) are defined by ACPI, and + * therefore may be subject to BIOS bugs. + */ +#define PG_CMT_HW_SUSPECT(hw) PGHW_IS_PM_DOMAIN(hw) + +/* + * Macro to test if PG is managed by the CMT PG class + */ +#define IS_CMT_PG(pg) (((pg_t *)(pg))->pg_class->pgc_id == pg_cmt_class_id) + static pg_cid_t pg_cmt_class_id; /* PG class id */ static pg_t *pg_cmt_alloc(); @@ -117,40 +147,13 @@ static int pg_cmt_hw(pghw_type_t); static cmt_lgrp_t *pg_cmt_find_lgrp(lgrp_handle_t); static cmt_lgrp_t *pg_cmt_lgrp_create(lgrp_handle_t); -static int pg_cmt_lineage_validate(pg_cmt_t **, int *); static void cmt_ev_thread_swtch(pg_t *, cpu_t *, hrtime_t, kthread_t *, kthread_t *); static void cmt_ev_thread_swtch_pwr(pg_t *, cpu_t *, hrtime_t, kthread_t *, kthread_t *); static void cmt_ev_thread_remain_pwr(pg_t *, cpu_t *, kthread_t *); - -/* - * Macro to test if PG is managed by the CMT PG class - */ -#define IS_CMT_PG(pg) (((pg_t *)(pg))->pg_class->pgc_id == pg_cmt_class_id) +static cmt_lineage_validation_t pg_cmt_lineage_validate(pg_cmt_t **, int *); -/* - * Status codes for CMT lineage validation - * See cmt_lineage_validate() below - */ -typedef enum cmt_lineage_validation { - CMT_LINEAGE_VALID, - CMT_LINEAGE_NON_CONCENTRIC, - CMT_LINEAGE_REPAIRED, - CMT_LINEAGE_UNRECOVERABLE -} cmt_lineage_validation_t; - -/* - * Status of the current lineage under construction. - * One must be holding cpu_lock to change this. - */ -static cmt_lineage_validation_t cmt_lineage_status = CMT_LINEAGE_VALID; - -/* - * Power domain definitions (on x86) are defined by ACPI, and - * therefore may be subject to BIOS bugs. - */ -#define PG_CMT_HW_SUSPECT(hw) PGHW_IS_PM_DOMAIN(hw) /* * CMT PG ops @@ -457,6 +460,7 @@ pg_cmt_t *cpu_cmt_hier[PGHW_NUM_COMPONENTS]; lgrp_handle_t lgrp_handle; cmt_lgrp_t *lgrp; + cmt_lineage_validation_t lineage_status; ASSERT(MUTEX_HELD(&cpu_lock)); @@ -578,10 +582,12 @@ * This routine will also try to fix the lineage along with the * rest of the PG hierarchy should it detect an issue. * - * If it returns -1, an unrecoverable error has happened and we - * need to return. + * If it returns anything other than VALID or REPAIRED, an + * unrecoverable error has occurred, and we cannot proceed. */ - if (pg_cmt_lineage_validate(cpu_cmt_hier, &levels) < 0) + lineage_status = pg_cmt_lineage_validate(cpu_cmt_hier, &levels); + if ((lineage_status != CMT_LINEAGE_VALID) && + (lineage_status != CMT_LINEAGE_REPAIRED)) return; /* @@ -1451,7 +1457,14 @@ GRP_NORESIZE); } /* - * Add PGs children to it's group of siblings. + * Move PG's children from it's children set to it's parent's + * children set. Note that the parent's children set, and PG's + * siblings set are the same thing. + * + * Because we are iterating over the same group that we are + * operating on (removing the children), first add all of PG's + * children to the parent's children set, and once we are done + * iterating, empty PG's children set. */ if (pg->cmt_children != NULL) { children = pg->cmt_children; @@ -1459,19 +1472,13 @@ group_iter_init(&child_iter); while ((child = group_iterate(children, &child_iter)) != NULL) { - /* - * Transplant child from it's siblings set to - * PGs. - */ - if (pg->cmt_siblings != NULL && - child->cmt_siblings != NULL && - group_remove(child->cmt_siblings, child, - GRP_NORESIZE) != -1) { + if (pg->cmt_siblings != NULL) { r = group_add(pg->cmt_siblings, child, GRP_NORESIZE); ASSERT(r == 0); } } + group_empty(pg->cmt_children); } /* @@ -1536,64 +1543,203 @@ cmn_err(CE_NOTE, "!CMT thread placement optimizations unavailable"); } -static int +/* + * CMT lineage validation + * + * This routine is invoked by pg_cmt_cpu_init() to validate the integrity + * of the PGs in a CPU's lineage. This is necessary because it's possible that + * some groupings (power domain groupings in particular) may be defined by + * sources that are buggy (e.g. BIOS bugs). In such cases, it may not be + * possible to integrate those groupings into the CMT PG hierarchy, if doing + * so would violate the subset invariant of the hierarchy, which says that + * a PG must be subset of its parent (if it has one). + * + * pg_cmt_lineage_validate()'s purpose is to detect grouping definitions that + * would result in a violation of this invariant. If a violation is found, + * and the PG is of a grouping type who's definition is known to originate from + * suspect sources (BIOS), then pg_cmt_prune() will be invoked to prune the + * PG (and all other instances PG's sharing relationship type) from the + * hierarchy. Further, future instances of that sharing relationship type won't + * be instantiated. If the grouping definition doesn't originate from suspect + * sources, then pg_cmt_disable() will be invoked to log an error, and disable + * CMT scheduling altogether. + * + * This routine is invoked after the CPU has been added to the PGs in which + * it belongs, but before those PGs have been added to (or had their place + * adjusted in) the CMT PG hierarchy. + * + * The first argument is the CPUs PG lineage (essentially an array of PGs in + * which the CPU belongs) that has already been sorted in ascending order + * by CPU count. Some of the PGs in the CPUs lineage may already have other + * CPUs in them, and have already been integrated into the CMT hierarchy. + * + * The addition of this new CPU to these pre-existing PGs means that those + * PGs may need to be promoted up in the hierarchy to satisfy the subset + * invariant. In additon to testing the subset invariant for the lineage, + * this routine also verifies that the addition of the new CPU to the + * existing PGs wouldn't cause the subset invariant to be violated in + * the exiting lineages. + * + * This routine will normally return one of the following: + * CMT_LINEAGE_VALID - There were no problems detected with the lineage. + * CMT_LINEAGE_REPAIRED - Problems were detected, but repaired via pruning. + * + * Otherwise, this routine will return a value indicating which error it + * was unable to recover from (and set cmt_lineage_status along the way). + */ +static cmt_lineage_validation_t pg_cmt_lineage_validate(pg_cmt_t **lineage, int *sz) { - int i, size; - pg_cmt_t *pg, *parent, *pg_bad; + int i, j, size; + pg_cmt_t *pg, *pg_next, *pg_bad, *pg_tmp; cpu_t *cp; pg_cpu_itr_t cpu_iter; + lgrp_handle_t lgrp; ASSERT(MUTEX_HELD(&cpu_lock)); revalidate: size = *sz; pg_bad = NULL; - for (i = 0; i < size - 1; i++) { + lgrp = LGRP_NULL_HANDLE; + for (i = 0; i < size; i++) { pg = lineage[i]; - parent = lineage[i + 1]; + if (i < size - 1) + pg_next = lineage[i + 1]; + else + pg_next = NULL; /* * We assume that the lineage has already been sorted * by the number of CPUs. In fact, we depend on it. */ - ASSERT(PG_NUM_CPUS((pg_t *)pg) <= PG_NUM_CPUS((pg_t *)parent)); + ASSERT(pg_next == NULL || + (PG_NUM_CPUS((pg_t *)pg) <= PG_NUM_CPUS((pg_t *)pg_next))); /* - * Walk each of the CPUs in the PGs group, and verify that - * the next larger PG contains at least the CPUs in this one. + * Check to make sure that the existing parent of PG (if any) + * is either in the PG's lineage, or the PG has more CPUs than + * its existing parent and can and should be promoted above its + * parent. + * + * Since the PG topology is in the middle of being changed, we + * need to check whether the PG's existing parent (if any) is + * part of its lineage (and therefore should contain the new + * CPU). If not, it means that the addition of the new CPU + * should have made this PG have more CPUs than its parent, and + * this PG should be promoted to be above its existing parent + * now. We need to verify all of this to defend against a buggy + * BIOS giving bad power domain CPU groupings. Sigh. + */ + if (pg->cmt_parent) { + /* + * Determine if cmt_parent is in this lineage + */ + for (j = 0; j < size; j++) { + pg_tmp = lineage[j]; + if (pg_tmp == pg->cmt_parent) + break; + } + if (pg_tmp != pg->cmt_parent) { + /* + * cmt_parent is not in the lineage, verify + * it is a proper subset of PG. + */ + if (PG_NUM_CPUS((pg_t *)pg->cmt_parent) >= + PG_NUM_CPUS((pg_t *)pg)) { + /* + * Not a proper subset if pg has less + * CPUs than cmt_parent... + */ + cmt_lineage_status = + CMT_LINEAGE_NON_PROMOTABLE; + goto handle_error; + } + } + } + + /* + * Walk each of the CPUs in the PGs group and perform + * consistency checks along the way. */ PG_CPU_ITR_INIT((pg_t *)pg, cpu_iter); while ((cp = pg_cpu_next(&cpu_iter)) != NULL) { - if (pg_cpu_find((pg_t *)parent, cp) == B_FALSE) { + /* + * Verify that there aren't any CPUs contained in PG + * that the next PG in the lineage (which is larger + * or same size) doesn't also contain. + */ + if (pg_next != NULL && + pg_cpu_find((pg_t *)pg_next, cp) == B_FALSE) { cmt_lineage_status = CMT_LINEAGE_NON_CONCENTRIC; goto handle_error; } + + /* + * Verify that all the CPUs in the PG are in the same + * lgroup. + */ + if (lgrp == LGRP_NULL_HANDLE) { + lgrp = lgrp_plat_cpu_to_hand(cp->cpu_id); + } else if (lgrp_plat_cpu_to_hand(cp->cpu_id) != lgrp) { + cmt_lineage_status = CMT_LINEAGE_PG_SPANS_LGRPS; + goto handle_error; + } } } handle_error: + /* + * Some of these validation errors can result when the CPU grouping + * information is derived from buggy sources (for example, incorrect + * ACPI tables on x86 systems). + * + * We'll try to recover in such cases by pruning out the illegal + * groupings from the PG hierarchy, which means that we won't optimize + * for those levels, but we will for the remaining ones. + */ switch (cmt_lineage_status) { case CMT_LINEAGE_VALID: case CMT_LINEAGE_REPAIRED: break; + case CMT_LINEAGE_PG_SPANS_LGRPS: + /* + * We've detected a PG whose CPUs span lgroups. + * + * This isn't supported, as the dispatcher isn't allowed to + * to do CMT thread placement across lgroups, as this would + * conflict with policies implementing MPO thread affinity. + * + * The handling for this falls through to the next case. + */ + case CMT_LINEAGE_NON_PROMOTABLE: + /* + * We've detected a PG that already exists in another CPU's + * lineage that cannot cannot legally be promoted into place + * without breaking the invariants of the hierarchy. + */ + if (PG_CMT_HW_SUSPECT(((pghw_t *)pg)->pghw_hw)) { + if (pg_cmt_prune(pg, lineage, sz) == 0) { + cmt_lineage_status = CMT_LINEAGE_REPAIRED; + goto revalidate; + } + } + /* + * Something went wrong trying to prune out the bad level. + * Disable CMT scheduling altogether. + */ + pg_cmt_disable(); + break; case CMT_LINEAGE_NON_CONCENTRIC: /* - * We've detected a non-concentric PG lineage. - * - * This can happen when some of the CPU grouping information - * is derived from buggy sources (for example, incorrect ACPI - * tables on x86 systems). + * We've detected a non-concentric PG lineage, which means that + * there's a PG in the lineage that has CPUs that the next PG + * over in the lineage (which is the same size or larger) + * doesn't have. * - * We attempt to recover from this by pruning out the - * illegal groupings from the PG hierarchy, which means that - * we won't optimize for those levels, but we will for the - * remaining ones. - * - * If a given level has CPUs not found in it's parent, then - * we examine the PG and it's parent to see if either grouping - * is enumerated from potentially buggy sources. + * In this case, we examine the two PGs to see if either + * grouping is defined by potentially buggy sources. * * If one has less CPUs than the other, and contains CPUs * not found in the parent, and it is an untrusted enumeration, @@ -1603,15 +1749,15 @@ * This process repeats until we have a concentric lineage, * or we would have to prune out level derived from what we * thought was a reliable source, in which case CMT scheduling - * is disabled all together. + * is disabled altogether. */ - if ((PG_NUM_CPUS((pg_t *)pg) < PG_NUM_CPUS((pg_t *)parent)) && + if ((PG_NUM_CPUS((pg_t *)pg) < PG_NUM_CPUS((pg_t *)pg_next)) && (PG_CMT_HW_SUSPECT(((pghw_t *)pg)->pghw_hw))) { pg_bad = pg; } else if (PG_NUM_CPUS((pg_t *)pg) == - PG_NUM_CPUS((pg_t *)parent)) { - if (PG_CMT_HW_SUSPECT(((pghw_t *)parent)->pghw_hw)) { - pg_bad = parent; + PG_NUM_CPUS((pg_t *)pg_next)) { + if (PG_CMT_HW_SUSPECT(((pghw_t *)pg_next)->pghw_hw)) { + pg_bad = pg_next; } else if (PG_CMT_HW_SUSPECT(((pghw_t *)pg)->pghw_hw)) { pg_bad = pg; } @@ -1622,17 +1768,20 @@ goto revalidate; } } - /*FALLTHROUGH*/ + /* + * Something went wrong trying to identify and/or prune out + * the bad level. Disable CMT scheduling altogether. + */ + pg_cmt_disable(); + break; default: /* - * If we're here, something has gone wrong in trying to - * recover from a illegal PG hierarchy, or we've encountered - * a validation error for which we don't know how to recover. - * In this case, disable CMT scheduling all together. + * If we're here, we've encountered a validation error for + * which we don't know how to recover. In this case, disable + * CMT scheduling altogether. */ + cmt_lineage_status = CMT_LINEAGE_UNRECOVERABLE; pg_cmt_disable(); - cmt_lineage_status = CMT_LINEAGE_UNRECOVERABLE; - return (-1); } - return (0); + return (cmt_lineage_status); }