Mercurial > illumos > illumos-gate
changeset 11713:03615b084875
6892591 per-MMU context id domains for sun4v
author | Pavel Tatashin <Pavel.Tatashin@Sun.COM> |
---|---|
date | Fri, 19 Feb 2010 10:18:21 -0800 |
parents | 3b88ce606c90 |
children | c68907917e95 |
files | usr/src/uts/sfmmu/ml/sfmmu_asm.s usr/src/uts/sfmmu/vm/hat_sfmmu.c usr/src/uts/sfmmu/vm/hat_sfmmu.h usr/src/uts/sun4v/os/fillsysinfo.c usr/src/uts/sun4v/os/mach_descrip.c usr/src/uts/sun4v/os/mach_startup.c usr/src/uts/sun4v/os/suspend.c usr/src/uts/sun4v/sys/mach_descrip.h |
diffstat | 8 files changed, 511 insertions(+), 65 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/sfmmu/ml/sfmmu_asm.s Fri Feb 19 10:41:19 2010 -0700 +++ b/usr/src/uts/sfmmu/ml/sfmmu_asm.s Fri Feb 19 10:18:21 2010 -0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -635,6 +635,13 @@ ! load global mmu_ctxp info ldx [%o2 + CPU_MMU_CTXP], %o3 ! %o3 = mmu_ctx_t ptr + +#ifdef sun4v + /* During suspend on sun4v, context domains can be temporary removed */ + brz,a,pn %o3, 0f + nop +#endif + lduw [%o2 + CPU_MMU_IDX], %g2 ! %g2 = mmu index ! load global mmu_ctxp gnum @@ -687,6 +694,13 @@ ! (invalid HAT cnum) && (allocflag == 1) ba,pt %icc, 2f nop +#ifdef sun4v +0: + set INVALID_CONTEXT, %o1 + membar #LoadStore|#StoreStore + ba,pt %icc, 8f + mov %g0, %g4 ! %g4 = ret = 0 +#endif 1: ! valid HAT cnum, check gnum cmp %g5, %o4
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c Fri Feb 19 10:41:19 2010 -0700 +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c Fri Feb 19 10:18:21 2010 -0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -532,7 +532,7 @@ extern void sfmmu_setup_tsbinfo(sfmmu_t *); extern void sfmmu_clear_utsbinfo(void); -static void sfmmu_ctx_wrap_around(mmu_ctx_t *); +static void sfmmu_ctx_wrap_around(mmu_ctx_t *, boolean_t); extern int vpm_enable; @@ -1112,19 +1112,11 @@ * a set_platform_defaults() or does not choose to modify * max_mmu_ctxdoms, it gets one MMU context domain for every CPU. * - * For sun4v, there will be one global context domain, this is to - * avoid the ldom cpu substitution problem. - * * For all platforms that have CPUs sharing MMUs, this * value must be defined. */ - if (max_mmu_ctxdoms == 0) { -#ifndef sun4v + if (max_mmu_ctxdoms == 0) max_mmu_ctxdoms = max_ncpus; -#else /* sun4v */ - max_mmu_ctxdoms = 1; -#endif /* sun4v */ - } size = max_mmu_ctxdoms * sizeof (mmu_ctx_t *); mmu_ctxs_tbl = kmem_zalloc(size, KM_SLEEP); @@ -1611,26 +1603,16 @@ * specify that interface, then the function below is used instead to return * default information. The defaults are as follows: * - * - For sun4u systems there's one MMU context domain per CPU. - * This default is used by all sun4u systems except OPL. OPL systems - * provide platform specific interface to map CPU ids to MMU ids - * because on OPL more than 1 CPU shares a single MMU. - * Note that on sun4v, there is one global context domain for - * the entire system. This is to avoid running into potential problem - * with ldom physical cpu substitution feature. * - The number of MMU context IDs supported on any CPU in the * system is 8K. + * - There is one MMU context domain per CPU. */ /*ARGSUSED*/ static void sfmmu_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *infop) { infop->mmu_nctxs = nctxs; -#ifndef sun4v infop->mmu_idx = cpu[cpuid]->cpu_seqid; -#else /* sun4v */ - infop->mmu_idx = 0; -#endif /* sun4v */ } /* @@ -1676,6 +1658,7 @@ mmu_ctxs_tbl[info.mmu_idx] = mmu_ctxp; } else { ASSERT(mmu_ctxp->mmu_idx == info.mmu_idx); + ASSERT(mmu_ctxp->mmu_nctxs <= info.mmu_nctxs); } /* @@ -1693,6 +1676,24 @@ mutex_exit(&mmu_ctxp->mmu_lock); } +static void +sfmmu_ctxdom_free(mmu_ctx_t *mmu_ctxp) +{ + ASSERT(MUTEX_HELD(&cpu_lock)); + ASSERT(!MUTEX_HELD(&mmu_ctxp->mmu_lock)); + + mutex_destroy(&mmu_ctxp->mmu_lock); + + if (mmu_ctxp->mmu_kstat) + kstat_delete(mmu_ctxp->mmu_kstat); + + /* mmu_saved_gnum is protected by the cpu_lock. */ + if (mmu_saved_gnum < mmu_ctxp->mmu_gnum) + mmu_saved_gnum = mmu_ctxp->mmu_gnum; + + kmem_cache_free(mmuctxdom_cache, mmu_ctxp); +} + /* * Called to perform MMU context-related cleanup for a CPU. */ @@ -1718,23 +1719,165 @@ if (--mmu_ctxp->mmu_ncpus == 0) { mmu_ctxs_tbl[mmu_ctxp->mmu_idx] = NULL; mutex_exit(&mmu_ctxp->mmu_lock); - mutex_destroy(&mmu_ctxp->mmu_lock); - - if (mmu_ctxp->mmu_kstat) - kstat_delete(mmu_ctxp->mmu_kstat); - - /* mmu_saved_gnum is protected by the cpu_lock. */ - if (mmu_saved_gnum < mmu_ctxp->mmu_gnum) - mmu_saved_gnum = mmu_ctxp->mmu_gnum; - - kmem_cache_free(mmuctxdom_cache, mmu_ctxp); - + sfmmu_ctxdom_free(mmu_ctxp); return; } mutex_exit(&mmu_ctxp->mmu_lock); } +uint_t +sfmmu_ctxdom_nctxs(int idx) +{ + return (mmu_ctxs_tbl[idx]->mmu_nctxs); +} + +#ifdef sun4v +/* + * sfmmu_ctxdoms_* is an interface provided to help keep context domains + * consistant after suspend/resume on system that can resume on a different + * hardware than it was suspended. + * + * sfmmu_ctxdom_lock(void) locks all context domains and prevents new contexts + * from being allocated. It acquires all hat_locks, which blocks most access to + * context data, except for a few cases that are handled separately or are + * harmless. It wraps each domain to increment gnum and invalidate on-CPU + * contexts, and forces cnum to its max. As a result of this call all user + * threads that are running on CPUs trap and try to perform wrap around but + * can't because hat_locks are taken. Threads that were not on CPUs but started + * by scheduler go to sfmmu_alloc_ctx() to aquire context without checking + * hat_lock, but fail, because cnum == nctxs, and therefore also trap and block + * on hat_lock trying to wrap. sfmmu_ctxdom_lock() must be called before CPUs + * are paused, else it could deadlock acquiring locks held by paused CPUs. + * + * sfmmu_ctxdoms_remove() removes context domains from every CPUs and records + * the CPUs that had them. It must be called after CPUs have been paused. This + * ensures that no threads are in sfmmu_alloc_ctx() accessing domain data, + * because pause_cpus sends a mondo interrupt to every CPU, and sfmmu_alloc_ctx + * runs with interrupts disabled. When CPUs are later resumed, they may enter + * sfmmu_alloc_ctx, but it will check for CPU_MMU_CTXP = NULL and immediately + * return failure. Or, they will be blocked trying to acquire hat_lock. Thus + * after sfmmu_ctxdoms_remove returns, we are guaranteed that no one is + * accessing the old context domains. + * + * sfmmu_ctxdoms_update(void) frees space used by old context domains and + * allocates new context domains based on hardware layout. It initializes + * every CPU that had context domain before migration to have one again. + * sfmmu_ctxdoms_update must be called after CPUs are resumed, else it + * could deadlock acquiring locks held by paused CPUs. + * + * sfmmu_ctxdoms_unlock(void) releases all hat_locks after which user threads + * acquire new context ids and continue execution. + * + * Therefore functions should be called in the following order: + * suspend_routine() + * sfmmu_ctxdom_lock() + * pause_cpus() + * suspend() + * if (suspend failed) + * sfmmu_ctxdom_unlock() + * ... + * sfmmu_ctxdom_remove() + * resume_cpus() + * sfmmu_ctxdom_update() + * sfmmu_ctxdom_unlock() + */ +static cpuset_t sfmmu_ctxdoms_pset; + +void +sfmmu_ctxdoms_remove() +{ + processorid_t id; + cpu_t *cp; + + /* + * Record the CPUs that have domains in sfmmu_ctxdoms_pset, so they can + * be restored post-migration. A CPU may be powered off and not have a + * domain, for example. + */ + CPUSET_ZERO(sfmmu_ctxdoms_pset); + + for (id = 0; id < NCPU; id++) { + if ((cp = cpu[id]) != NULL && CPU_MMU_CTXP(cp) != NULL) { + CPUSET_ADD(sfmmu_ctxdoms_pset, id); + CPU_MMU_CTXP(cp) = NULL; + } + } +} + +void +sfmmu_ctxdoms_lock(void) +{ + int idx; + mmu_ctx_t *mmu_ctxp; + + sfmmu_hat_lock_all(); + + /* + * At this point, no thread can be in sfmmu_ctx_wrap_around, because + * hat_lock is always taken before calling it. + * + * For each domain, set mmu_cnum to max so no more contexts can be + * allocated, and wrap to flush on-CPU contexts and force threads to + * acquire a new context when we later drop hat_lock after migration. + * Setting mmu_cnum may race with sfmmu_alloc_ctx which also sets cnum, + * but the latter uses CAS and will miscompare and not overwrite it. + */ + kpreempt_disable(); /* required by sfmmu_ctx_wrap_around */ + for (idx = 0; idx < max_mmu_ctxdoms; idx++) { + if ((mmu_ctxp = mmu_ctxs_tbl[idx]) != NULL) { + mutex_enter(&mmu_ctxp->mmu_lock); + mmu_ctxp->mmu_cnum = mmu_ctxp->mmu_nctxs; + /* make sure updated cnum visible */ + membar_enter(); + mutex_exit(&mmu_ctxp->mmu_lock); + sfmmu_ctx_wrap_around(mmu_ctxp, B_FALSE); + } + } + kpreempt_enable(); +} + +void +sfmmu_ctxdoms_unlock(void) +{ + sfmmu_hat_unlock_all(); +} + +void +sfmmu_ctxdoms_update(void) +{ + processorid_t id; + cpu_t *cp; + uint_t idx; + mmu_ctx_t *mmu_ctxp; + + /* + * Free all context domains. As side effect, this increases + * mmu_saved_gnum to the maximum gnum over all domains, which is used to + * init gnum in the new domains, which therefore will be larger than the + * sfmmu gnum for any process, guaranteeing that every process will see + * a new generation and allocate a new context regardless of what new + * domain it runs in. + */ + mutex_enter(&cpu_lock); + + for (idx = 0; idx < max_mmu_ctxdoms; idx++) { + if (mmu_ctxs_tbl[idx] != NULL) { + mmu_ctxp = mmu_ctxs_tbl[idx]; + mmu_ctxs_tbl[idx] = NULL; + sfmmu_ctxdom_free(mmu_ctxp); + } + } + + for (id = 0; id < NCPU; id++) { + if (CPU_IN_SET(sfmmu_ctxdoms_pset, id) && + (cp = cpu[id]) != NULL) + sfmmu_cpu_init(cp); + } + mutex_exit(&cpu_lock); +} +#endif + /* * Hat_setup, makes an address space context the current active one. * In sfmmu this translates to setting the secondary context with the @@ -9745,7 +9888,7 @@ * Do a wrap-around if cnum reaches the max # cnum supported by a MMU. */ if (mmu_ctxp->mmu_cnum == mmu_ctxp->mmu_nctxs) - sfmmu_ctx_wrap_around(mmu_ctxp); + sfmmu_ctx_wrap_around(mmu_ctxp, B_TRUE); /* * Let the MMU set up the page sizes to use for @@ -9786,7 +9929,7 @@ * next generation and start from 2. */ static void -sfmmu_ctx_wrap_around(mmu_ctx_t *mmu_ctxp) +sfmmu_ctx_wrap_around(mmu_ctx_t *mmu_ctxp, boolean_t reset_cnum) { /* caller must have disabled the preemption */ @@ -9820,7 +9963,7 @@ /* xcall to others on the same MMU to invalidate ctx */ cpuset = mmu_ctxp->mmu_cpuset; - ASSERT(CPU_IN_SET(cpuset, CPU->cpu_id)); + ASSERT(CPU_IN_SET(cpuset, CPU->cpu_id) || !reset_cnum); CPUSET_DEL(cpuset, CPU->cpu_id); CPUSET_AND(cpuset, cpu_ready_set); @@ -9857,7 +10000,8 @@ } /* reset mmu cnum, skips cnum 0 and 1 */ - mmu_ctxp->mmu_cnum = NUM_LOCKED_CTXS; + if (reset_cnum == B_TRUE) + mmu_ctxp->mmu_cnum = NUM_LOCKED_CTXS; done: mutex_exit(&mmu_ctxp->mmu_lock);
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.h Fri Feb 19 10:41:19 2010 -0700 +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.h Fri Feb 19 10:18:21 2010 -0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -554,10 +554,10 @@ * is protected via CAS. * mmu_nctxs * The max number of context IDs supported on every CPU in this - * MMU context domain. It is 8K except for Rock where it is 64K. - * This is needed here in case the system supports mixed type of - * processors/MMUs. It also helps to make ctx switch code access - * fewer cache lines i.e. no need to retrieve it from some global nctxs. + * MMU context domain. This is needed here in case the system supports + * mixed type of processors/MMUs. It also helps to make ctx switch code + * access fewer cache lines i.e. no need to retrieve it from some global + * nctxs. * mmu_lock * The mutex spin lock used to serialize context ID wrap around * mmu_idx @@ -599,6 +599,15 @@ extern void sfmmu_cpu_init(cpu_t *); extern void sfmmu_cpu_cleanup(cpu_t *); +extern uint_t sfmmu_ctxdom_nctxs(int); + +#ifdef sun4v +extern void sfmmu_ctxdoms_remove(void); +extern void sfmmu_ctxdoms_lock(void); +extern void sfmmu_ctxdoms_unlock(void); +extern void sfmmu_ctxdoms_update(void); +#endif + /* * The following structure is used to get MMU context domain information for * a CPU from the platform. @@ -607,7 +616,6 @@ * The MMU context domain index within the global array mmu_ctxs * mmu_nctxs * The number of context IDs supported in the MMU context domain - * (64K for Rock) */ typedef struct mmu_ctx_info { uint_t mmu_idx; @@ -2575,7 +2583,11 @@ #define SFMMU_STAT_ADD(stat, amount) sfmmu_global_stat.stat += (amount) #define SFMMU_STAT_SET(stat, count) sfmmu_global_stat.stat = (count) -#define SFMMU_MMU_STAT(stat) CPU->cpu_m.cpu_mmu_ctxp->stat++ +#define SFMMU_MMU_STAT(stat) { \ + mmu_ctx_t *ctx = CPU->cpu_m.cpu_mmu_ctxp; \ + if (ctx) \ + ctx->stat++; \ +} #endif /* !_ASM */
--- a/usr/src/uts/sun4v/os/fillsysinfo.c Fri Feb 19 10:41:19 2010 -0700 +++ b/usr/src/uts/sun4v/os/fillsysinfo.c Fri Feb 19 10:18:21 2010 -0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1050,3 +1050,244 @@ md_free_scan_dag(mdp, &platlist); } + +/* + * Number of bits forming a valid context for use in a sun4v TTE and the MMU + * context registers. Sun4v defines the minimum default value to be 13 if this + * property is not specified in a cpu node in machine descriptor graph. + */ +#define MMU_INFO_CTXBITS_MIN 13 + +/* Convert context bits to number of contexts */ +#define MMU_INFO_BNCTXS(nbits) ((uint_t)(1u<<(nbits))) + +/* + * Read machine descriptor and load TLB to CPU mappings. + * Returned values: cpuid2pset[NCPU], nctxs[NCPU], md_gen + * - cpuid2pset is initialized so it can convert cpuids to processor set of CPUs + * that are shared between TLBs. + * - nctxs is initialized to number of contexts for each CPU + * - md_gen is set to generation number of machine descriptor from which this + * data was. + * Return: zero on success. + */ +static int +load_tlb_cpu_mappings(cpuset_t **cpuid2pset, uint_t *nctxs, uint64_t *md_gen) +{ + mde_str_cookie_t cpu_sc, bck_sc; + int tlbs_idx, cp_idx; + mde_cookie_t root; + md_t *mdp = NULL; + mde_cookie_t *tlbs = NULL; + mde_cookie_t *cp = NULL; + uint64_t *cpids = NULL; + uint64_t nbit; + int ntlbs; + int ncp; + int retval = 1; + cpuset_t *ppset; + + /* get MD handle, and string cookies for cpu and back nodes */ + if ((mdp = md_get_handle()) == NULL || + (cpu_sc = md_find_name(mdp, "cpu")) == MDE_INVAL_STR_COOKIE || + (bck_sc = md_find_name(mdp, "back")) == MDE_INVAL_STR_COOKIE) + goto cleanup; + + /* set generation number of current MD handle */ + *md_gen = md_get_gen(mdp); + + /* Find root element, and search for all TLBs in MD */ + if ((root = md_root_node(mdp)) == MDE_INVAL_ELEM_COOKIE || + (ntlbs = md_alloc_scan_dag(mdp, root, "tlb", "fwd", &tlbs)) <= 0) + goto cleanup; + + cp = kmem_alloc(sizeof (mde_cookie_t) * NCPU, KM_SLEEP); + cpids = kmem_alloc(sizeof (uint64_t) * NCPU, KM_SLEEP); + + /* + * Build processor sets, one per possible context domain. For each tlb, + * search for connected CPUs. If any CPU is already in a set, then add + * all the TLB's CPUs to that set. Otherwise, create and populate a new + * pset. Thus, a single pset is built to represent multiple TLBs if + * they have CPUs in common. + */ + for (tlbs_idx = 0; tlbs_idx < ntlbs; tlbs_idx++) { + ncp = md_scan_dag(mdp, tlbs[tlbs_idx], cpu_sc, bck_sc, cp); + if (ncp < 0) + goto cleanup; + else if (ncp == 0) + continue; + + /* Get the id and number of contexts for each cpu */ + for (cp_idx = 0; cp_idx < ncp; cp_idx++) { + mde_cookie_t c = cp[cp_idx]; + + if (md_get_prop_val(mdp, c, "id", &cpids[cp_idx])) + goto cleanup; + if (md_get_prop_val(mdp, c, "mmu-#context-bits", &nbit)) + nbit = MMU_INFO_CTXBITS_MIN; + nctxs[cpids[cp_idx]] = MMU_INFO_BNCTXS(nbit); + } + + /* + * If a CPU is already in a set as shown by cpuid2pset[], then + * use that set. + */ + for (cp_idx = 0; cp_idx < ncp; cp_idx++) { + ASSERT(cpids[cp_idx] < NCPU); + ppset = cpuid2pset[cpids[cp_idx]]; + if (ppset != NULL) + break; + } + + /* No CPU has a set. Create a new one. */ + if (ppset == NULL) { + ppset = kmem_alloc(sizeof (cpuset_t), KM_SLEEP); + CPUSET_ZERO(*ppset); + } + + /* Add every CPU to the set, and record the set assignment. */ + for (cp_idx = 0; cp_idx < ncp; cp_idx++) { + cpuid2pset[cpids[cp_idx]] = ppset; + CPUSET_ADD(*ppset, cpids[cp_idx]); + } + } + + retval = 0; + +cleanup: + if (tlbs != NULL) + md_free_scan_dag(mdp, &tlbs); + if (cp != NULL) + kmem_free(cp, sizeof (mde_cookie_t) * NCPU); + if (cpids != NULL) + kmem_free(cpids, sizeof (uint64_t) * NCPU); + if (mdp != NULL) + (void) md_fini_handle(mdp); + + return (retval); +} + +/* + * Return MMU info based on cpuid. + * + * Algorithm: + * Read machine descriptor and find all CPUs that share the same TLB with CPU + * specified by cpuid. Go through found CPUs and see if any one of them already + * has MMU index, if so, set index based on that value. If CPU does not share + * TLB with any other CPU or if none of those CPUs has mmu_ctx pointer, find the + * smallest available MMU index and give it to current CPU. If no available + * domain, perform a round robin, and start assigning from the beginning. + * + * For optimization reasons, this function uses a cache to store all TLB to CPU + * mappings, and updates them only when machine descriptor graph is changed. + * Because of this, and because we search MMU table for smallest index id, this + * function needs to be serialized which is protected by cpu_lock. + */ +void +plat_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *info) +{ + static cpuset_t **cpuid2pset = NULL; + static uint_t *nctxs; + static uint_t next_domain = 0; + static uint64_t md_gen = MDESC_INVAL_GEN; + uint64_t current_gen; + int idx; + cpuset_t cpuid_pset; + processorid_t id; + cpu_t *cp; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + current_gen = md_get_current_gen(); + + /* + * Load TLB CPU mappings only if MD generation has changed, FW that do + * not provide generation number, always return MDESC_INVAL_GEN, and as + * result MD is read here only once on such machines: when cpuid2pset is + * NULL + */ + if (current_gen != md_gen || cpuid2pset == NULL) { + if (cpuid2pset == NULL) { + cpuid2pset = kmem_zalloc(sizeof (cpuset_t *) * NCPU, + KM_SLEEP); + nctxs = kmem_alloc(sizeof (uint_t) * NCPU, KM_SLEEP); + } else { + /* clean cpuid2pset[NCPU], before loading new values */ + for (idx = 0; idx < NCPU; idx++) { + cpuset_t *pset = cpuid2pset[idx]; + + if (pset != NULL) { + for (;;) { + CPUSET_FIND(*pset, id); + if (id == CPUSET_NOTINSET) + break; + CPUSET_DEL(*pset, id); + ASSERT(id < NCPU); + cpuid2pset[id] = NULL; + } + ASSERT(cpuid2pset[idx] == NULL); + kmem_free(pset, sizeof (cpuset_t)); + } + } + } + + if (load_tlb_cpu_mappings(cpuid2pset, nctxs, &md_gen)) + goto error_panic; + } + + info->mmu_nctxs = nctxs[cpuid]; + + if (cpuid2pset[cpuid] == NULL) + goto error_panic; + + cpuid_pset = *cpuid2pset[cpuid]; + CPUSET_DEL(cpuid_pset, cpuid); + + /* Search for a processor in the same TLB pset with MMU context */ + for (;;) { + CPUSET_FIND(cpuid_pset, id); + + if (id == CPUSET_NOTINSET) + break; + + ASSERT(id < NCPU); + cp = cpu[id]; + if (cp != NULL && CPU_MMU_CTXP(cp) != NULL) { + info->mmu_idx = CPU_MMU_IDX(cp); + + return; + } + CPUSET_DEL(cpuid_pset, id); + } + + /* + * No CPU in the TLB pset has a context domain yet. + * Use next_domain if available, or search for an unused domain, or + * overload next_domain, in that order. Overloading is necessary when + * the number of TLB psets is greater than max_mmu_ctxdoms. + */ + idx = next_domain; + + if (mmu_ctxs_tbl[idx] != NULL) { + for (idx = 0; idx < max_mmu_ctxdoms; idx++) + if (mmu_ctxs_tbl[idx] == NULL) + break; + if (idx == max_mmu_ctxdoms) { + /* overload next_domain */ + idx = next_domain; + + if (info->mmu_nctxs < sfmmu_ctxdom_nctxs(idx)) + cmn_err(CE_PANIC, "max_mmu_ctxdoms is too small" + " to support CPUs with different nctxs"); + } + } + + info->mmu_idx = idx; + next_domain = (idx + 1) % max_mmu_ctxdoms; + + return; + +error_panic: + cmn_err(CE_PANIC, "!cpu%d: failed to get MMU CTX domain index", cpuid); +}
--- a/usr/src/uts/sun4v/os/mach_descrip.c Fri Feb 19 10:41:19 2010 -0700 +++ b/usr/src/uts/sun4v/os/mach_descrip.c Fri Feb 19 10:18:21 2010 -0800 @@ -20,12 +20,10 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Kernel Machine Description (MD) * @@ -861,3 +859,23 @@ mdp->freep(*list, sizeof (mde_cookie_t) * mdp->node_count); } + +/* + * Return generation number of current machine descriptor. Can be used for + * performance purposes to avoid requesting new md handle just to see if graph + * was updated. + */ +uint64_t +md_get_current_gen(void) +{ + uint64_t gen = MDESC_INVAL_GEN; + + mutex_enter(&curr_mach_descrip_lock); + + if (curr_mach_descrip != NULL) + gen = (curr_mach_descrip->gen); + + mutex_exit(&curr_mach_descrip_lock); + + return (gen); +}
--- a/usr/src/uts/sun4v/os/mach_startup.c Fri Feb 19 10:41:19 2010 -0700 +++ b/usr/src/uts/sun4v/os/mach_startup.c Fri Feb 19 10:18:21 2010 -0800 @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -527,3 +527,15 @@ /* virtual console concentrator */ (void) i_ddi_attach_hw_nodes("vcc"); } + +void +set_platform_defaults(void) +{ + /* + * Allow at most one context domain per 8 CPUs, which is ample for + * good performance. Do not make this too large, because it + * increases the space consumed in the per-process sfmmu structure. + */ + if (max_mmu_ctxdoms == 0) + max_mmu_ctxdoms = (NCPU + 7) / 8; +}
--- a/usr/src/uts/sun4v/os/suspend.c Fri Feb 19 10:41:19 2010 -0700 +++ b/usr/src/uts/sun4v/os/suspend.c Fri Feb 19 10:18:21 2010 -0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,6 +44,7 @@ #include <sys/sunddi.h> #include <sys/cpupart.h> #include <sys/hsvc.h> +#include <vm/hat_sfmmu.h> /* * Sun4v OS Suspend @@ -125,10 +126,9 @@ boolean_t tick_stick_emulation_active = B_FALSE; /* - * Controls whether or not MD information is refreshed after a - * successful suspend and resume. When non-zero, after a successful - * suspend and resume, the MD will be downloaded, cpunodes updated, - * and processor grouping information recalculated. + * When non-zero, after a successful suspend and resume, cpunodes, CPU HW + * sharing data structures, and processor groups will be updated using + * information from the updated MD. */ static int suspend_update_cpu_mappings = 1; @@ -243,15 +243,8 @@ md_t *mdp; processorid_t id; cpu_t *cp; - int rv; cpu_pg_t *pgps[NCPU]; - /* Download the latest MD */ - if ((rv = mach_descrip_update()) != 0) { - DBG("suspend: mach_descrip_update error: %d", rv); - return; - } - if ((mdp = md_get_handle()) == NULL) { DBG("suspend: md_get_handle failed"); return; @@ -491,6 +484,8 @@ ASSERT(suspend_supported()); DBG("suspend: %s", __func__); + sfmmu_ctxdoms_lock(); + mutex_enter(&cpu_lock); /* Suspend the watchdog */ @@ -535,6 +530,7 @@ start_cpus(); watchdog_resume(); mutex_exit(&cpu_lock); + sfmmu_ctxdoms_unlock(); DBG("suspend: failed, rv: %ld\n", rv); return (rv); } @@ -561,6 +557,8 @@ tick_stick_emulation_active = B_TRUE; } + sfmmu_ctxdoms_remove(); + /* Resume cyclics, unpause CPUs */ cyclic_resume(); start_cpus(); @@ -575,6 +573,14 @@ mutex_exit(&cpu_lock); + /* Download the latest MD */ + if ((rv = mach_descrip_update()) != 0) + cmn_err(CE_PANIC, "suspend: mach_descrip_update failed: %ld", + rv); + + sfmmu_ctxdoms_update(); + sfmmu_ctxdoms_unlock(); + /* Get new MD, update CPU mappings/relationships */ if (suspend_update_cpu_mappings) update_cpu_mappings();
--- a/usr/src/uts/sun4v/sys/mach_descrip.h Fri Feb 19 10:41:19 2010 -0700 +++ b/usr/src/uts/sun4v/sys/mach_descrip.h Fri Feb 19 10:18:21 2010 -0800 @@ -20,15 +20,13 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _MACH_DESCRIP_H #define _MACH_DESCRIP_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -85,6 +83,7 @@ extern int md_alloc_scan_dag(md_t *, mde_cookie_t, char *, char *, mde_cookie_t **); extern void md_free_scan_dag(md_t *, mde_cookie_t **); +extern uint64_t md_get_current_gen(void); #ifdef __cplusplus }