Mercurial > illumos > illumos-gate

--- a/usr/src/uts/common/vm/seg_vn.c	Mon Sep 17 14:46:54 2007 -0700
+++ b/usr/src/uts/common/vm/seg_vn.c	Mon Sep 17 15:08:19 2007 -0700
@@ -1628,8 +1628,8 @@


 /*
- * callback function used by segvn_unmap to invoke free_vp_pages() for only
- * those pages actually processed by the HAT
+ * callback function to invoke free_vp_pages() for only those pages actually
+ * processed by the HAT when a shared region is destroyed.
  */
 extern int free_pages;

@@ -1657,6 +1657,10 @@
 	free_vp_pages(vp, off, len);
 }

+/*
+ * callback function used by segvn_unmap to invoke free_vp_pages() for only
+ * those pages actually processed by the HAT
+ */
 static void
 segvn_hat_unload_callback(hat_callback_t *cb)
 {
--- a/usr/src/uts/i86pc/vm/hat_i86.c	Mon Sep 17 14:46:54 2007 -0700
+++ b/usr/src/uts/i86pc/vm/hat_i86.c	Mon Sep 17 15:08:19 2007 -0700
@@ -1722,7 +1722,7 @@

 /* ARGSUSED */
 void
-hat_unlock_region(struct hat *sfmmup, caddr_t addr, size_t len,
+hat_unlock_region(struct hat *hat, caddr_t addr, size_t len,
     hat_region_cookie_t rcookie)
 {
 	panic("No shared region support on x86");
@@ -3877,13 +3877,13 @@

 /* ARGSUSED */
 void
-hat_join_srd(struct hat *sfmmup, vnode_t *evp)
+hat_join_srd(struct hat *hat, vnode_t *evp)
 {
 }

 /* ARGSUSED */
 hat_region_cookie_t
-hat_join_region(struct hat *sfmmup,
+hat_join_region(struct hat *hat,
     caddr_t r_saddr,
     size_t r_size,
     void *r_obj,
@@ -3899,14 +3899,14 @@

 /* ARGSUSED */
 void
-hat_leave_region(struct hat *sfmmup, hat_region_cookie_t rcookie, uint_t flags)
+hat_leave_region(struct hat *hat, hat_region_cookie_t rcookie, uint_t flags)
 {
 	panic("No shared region support on x86");
 }

 /* ARGSUSED */
 void
-hat_dup_region(struct hat *sfmmup, hat_region_cookie_t rcookie)
+hat_dup_region(struct hat *hat, hat_region_cookie_t rcookie)
 {
 	panic("No shared region support on x86");
 }
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c	Mon Sep 17 14:46:54 2007 -0700
+++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c	Mon Sep 17 15:08:19 2007 -0700
@@ -823,10 +823,7 @@
  * highbit() - 1) to get the size code for the smallest TSB that can represent
  * all of physical memory, while erring on the side of too much.
  *
- * If the computed size code is less than the current tsb_max_growsize, we set
- * tsb_max_growsize to the computed size code.  In the case where the computed
- * size code is greater than tsb_max_growsize, we have these restrictions that
- * apply to increasing tsb_max_growsize:
+ * Restrict tsb_max_growsize to make sure that:
  *	1) TSBs can't grow larger than the TSB slab size
  *	2) TSBs can't grow larger than UTSB_MAX_SZCODE.
  */
@@ -1381,25 +1378,33 @@
 	mutex_init(&kpr_mutex, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&kpr_suspendlock, NULL, MUTEX_SPIN, (void *)PIL_MAX);

-	srd_buckets = kmem_zalloc(SFMMU_MAX_SRD_BUCKETS *
-	    sizeof (srd_buckets[0]), KM_SLEEP);
-	for (i = 0; i < SFMMU_MAX_SRD_BUCKETS; i++) {
-		mutex_init(&srd_buckets[i].srdb_lock, NULL, MUTEX_DEFAULT,
-		    NULL);
-	}
-	/*
-	 * 64 byte alignment is required in order to isolate certain field
-	 * into its own cacheline.
-	 */
-	srd_cache = kmem_cache_create("srd_cache", sizeof (sf_srd_t), 64,
-	    sfmmu_srdcache_constructor, sfmmu_srdcache_destructor,
-	    NULL, NULL, NULL, 0);
-	region_cache = kmem_cache_create("region_cache",
-	    sizeof (sf_region_t), 0, sfmmu_rgncache_constructor,
-	    sfmmu_rgncache_destructor, NULL, NULL, NULL, 0);
-	scd_cache = kmem_cache_create("scd_cache", sizeof (sf_scd_t), 0,
-	    sfmmu_scdcache_constructor,  sfmmu_scdcache_destructor,
-	    NULL, NULL, NULL, 0);
+	/*
+	 * If Shared context support is disabled via /etc/system
+	 * set shctx_on to 0 here if it was set to 1 earlier in boot
+	 * sequence by cpu module initialization code.
+	 */
+	if (shctx_on && disable_shctx) {
+		shctx_on = 0;
+	}
+
+	if (shctx_on) {
+		srd_buckets = kmem_zalloc(SFMMU_MAX_SRD_BUCKETS *
+		    sizeof (srd_buckets[0]), KM_SLEEP);
+		for (i = 0; i < SFMMU_MAX_SRD_BUCKETS; i++) {
+			mutex_init(&srd_buckets[i].srdb_lock, NULL,
+			    MUTEX_DEFAULT, NULL);
+		}
+
+		srd_cache = kmem_cache_create("srd_cache", sizeof (sf_srd_t),
+		    0, sfmmu_srdcache_constructor, sfmmu_srdcache_destructor,
+		    NULL, NULL, NULL, 0);
+		region_cache = kmem_cache_create("region_cache",
+		    sizeof (sf_region_t), 0, sfmmu_rgncache_constructor,
+		    sfmmu_rgncache_destructor, NULL, NULL, NULL, 0);
+		scd_cache = kmem_cache_create("scd_cache", sizeof (sf_scd_t),
+		    0, sfmmu_scdcache_constructor,  sfmmu_scdcache_destructor,
+		    NULL, NULL, NULL, 0);
+	}

 	/*
 	 * Pre-allocate hrm_hashtab before enabling the collection of
@@ -3716,7 +3721,8 @@
 }

 /*
- * update *eaddrp only if hmeblk was unloaded.
+ * Searches for an hmeblk which maps addr, then unloads this mapping
+ * and updates *eaddrp, if the hmeblk is found.
  */
 static void
 sfmmu_unload_hmeregion_va(sf_srd_t *srdp, uint_t rid, caddr_t addr,
@@ -3760,13 +3766,6 @@
 	sfmmu_hblks_list_purge(&list);
 }

-/*
- * This routine can be optimized to eliminate scanning areas of smaller page
- * size bitmaps when a corresponding bit is set in the bitmap for a bigger
- * page size. For now assume the region will usually only have the primary
- * size mappings so we'll scan only one bitmap anyway by checking rgn_hmeflags
- * first.
- */
 static void
 sfmmu_unload_hmeregion(sf_srd_t *srdp, sf_region_t *rgnp)
 {
@@ -3972,6 +3971,7 @@
 			HME_HASH_SEARCH_PREV(hmebp, hblktag, hmeblkp, hblkpa,
 			    pr_hblk, prevpa, &list);
 			if (hmeblkp == NULL) {
+				SFMMU_HASH_UNLOCK(hmebp);
 				ttesz--;
 				continue;
 			}
@@ -7407,9 +7407,11 @@
 		 * from the list.
 		 */
 		tmphme = sfhme->hme_next;
+		if (IS_PAHME(sfhme))
+			continue;
 		/*
 		 * If we are looking for large mappings and this hme doesn't
-		 * reach the range we are seeking, just ignore its.
+		 * reach the range we are seeking, just ignore it.
 		 */
 		hmeblkp = sfmmu_hmetohblk(sfhme);
 		if (hmeblkp->hblk_xhat_bit)
@@ -7917,7 +7919,7 @@
 }

 /*
- * This routine will return both pfn and tte for the addr.
+ * This routine will return both pfn and tte for the vaddr.
  */
 static pfn_t
 sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup, tte_t *ttep)
@@ -8129,7 +8131,7 @@
 }

 /*
- * Return 1 the number of mappings exceeds sh_thresh. Return 0
+ * Return 1 if the number of mappings exceeds sh_thresh. Return 0
  * otherwise. Count shared hmeblks by region's refcnt.
  */
 int
@@ -8156,10 +8158,23 @@
 again:
 	for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
 		tmphme = sfhme->hme_next;
-		if (hme_size(sfhme) != sz) {
+		if (IS_PAHME(sfhme)) {
 			continue;
 		}
+
 		hmeblkp = sfmmu_hmetohblk(sfhme);
+		if (hmeblkp->hblk_xhat_bit) {
+		    cnt++;
+		    if (cnt > sh_thresh) {
+			sfmmu_mlist_exit(pml);
+			return (1);
+		    }
+		    continue;
+		}
+		if (hme_size(sfhme) != sz) {
+		    continue;
+		}
+
 		if (hmeblkp->hblk_shared) {
 			sf_srd_t *srdp = hblktosrd(hmeblkp);
 			uint_t rid = hmeblkp->hblk_tag.htag_rid;
@@ -8238,6 +8253,7 @@

 	ASSERT(PAGE_EXCL(pp));
 	ASSERT(!PP_ISFREE(pp));
+	ASSERT(!PP_ISKAS(pp));
 	ASSERT(page_szc_lock_assert(pp));
 	pml = sfmmu_mlist_enter(pp);

@@ -8264,6 +8280,7 @@
 		rootpp = PP_GROUPLEADER(pp, sz);
 		for (sfhme = rootpp->p_mapping; sfhme; sfhme = tmphme) {
 			tmphme = sfhme->hme_next;
+			ASSERT(!IS_PAHME(sfhme));
 			hmeblkp = sfmmu_hmetohblk(sfhme);
 			if (hme_size(sfhme) != sz) {
 				continue;
@@ -8649,10 +8666,7 @@
 	 * sfmmu_check_page_sizes at the end of this routine.
 	 */
 	old_scdp = sfmmup->sfmmu_scdp;
-	/*
-	 * Call hat_join_region without the hat lock, because it's
-	 * used in hat_join_region.
-	 */
+
 	rcookie = hat_join_region(sfmmup, addr, len, (void *)ism_hatid, 0,
 	    PROT_ALL, ismszc, NULL, HAT_REGION_ISM);
 	if (rcookie != HAT_INVALID_REGION_COOKIE) {
@@ -8810,8 +8824,8 @@

 		/*
 		 * After hat_leave_region, the sfmmup may leave SCD,
-		 * in which case, we want to grow the private tsb size
-		 * when call sfmmu_check_page_sizes at the end of the routine.
+		 * in which case, we want to grow the private tsb size when
+		 * calling sfmmu_check_page_sizes at the end of the routine.
 		 */
 		old_scdp = sfmmup->sfmmu_scdp;
 		/*
@@ -9195,6 +9209,8 @@
 	 */
 	for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
 		tmphme = sfhmep->hme_next;
+		if (IS_PAHME(sfhmep))
+			continue;
 		hmeblkp = sfmmu_hmetohblk(sfhmep);
 		if (hmeblkp->hblk_xhat_bit)
 			continue;
@@ -9221,6 +9237,8 @@

 	for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
 		tmphme = sfhmep->hme_next;
+		if (IS_PAHME(sfhmep))
+			continue;
 		hmeblkp = sfmmu_hmetohblk(sfhmep);
 		if (hmeblkp->hblk_xhat_bit)
 			continue;
@@ -9368,6 +9386,8 @@
 		}

 		for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) {
+			if (IS_PAHME(sfhme))
+				continue;
 			hmeblkp = sfmmu_hmetohblk(sfhme);
 			if (hmeblkp->hblk_xhat_bit)
 				continue;
@@ -9514,6 +9534,8 @@

 	for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) {

+		if (IS_PAHME(sfhme))
+			continue;
 		hmeblkp = sfmmu_hmetohblk(sfhme);

 		if (hmeblkp->hblk_xhat_bit)
@@ -10064,7 +10086,10 @@
 	sfmmu_hat_exit(hatlockp);
 }

-/* Update scd_rttecnt for shme rgns in the SCD */
+/*
+ * The scd_rttecnt field in the SCD must be updated to take account of the
+ * regions which it contains.
+ */
 static void
 sfmmu_set_scd_rttecnt(sf_srd_t *srdp, sf_scd_t *scdp)
 {
@@ -11030,6 +11055,13 @@
 				    size, flags);
 		}
 	} else if (SFMMU_IS_SHMERID_VALID(rid)) {
+		/*
+		 * Shared hmes use per region bitmaps in rgn_hmeflag
+		 * rather than shadow hmeblks to keep track of the
+		 * mapping sizes which have been allocated for the region.
+		 * Here we cleanup old invalid hmeblks with this rid,
+		 * which may be left around by pageunload().
+		 */
 		int ttesz;
 		caddr_t va;
 		caddr_t	eva = vaddr + TTEBYTES(size);
@@ -11317,11 +11349,6 @@
 	hmeblkp->hblk_nextpa = hblkpa;
 	hmeblkp->hblk_shw_bit = 0;

-	/*
-	 * Clear ttebit map in the region this hmeblk belongs to. The region
-	 * must exist as long as any of its hmeblks exist. This invariant
-	 * holds because before region is freed all its hmeblks are removed.
-	 */
 	if (hmeblkp->hblk_shared) {
 		sf_srd_t	*srdp;
 		sf_region_t	*rgnp;
@@ -11334,7 +11361,6 @@
 		ASSERT(rid < SFMMU_MAX_HME_REGIONS);
 		rgnp = srdp->srd_hmergnp[rid];
 		ASSERT(rgnp != NULL);
-		vaddr = (caddr_t)get_hblk_base(hmeblkp);
 		SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid);
 		hmeblkp->hblk_shared = 0;
 	}
@@ -11572,11 +11598,6 @@
 	 */
 	hmeblkp->hblk_shw_bit = 0;

-	/*
-	 * Clear ttebit map in the region this hmeblk belongs to. The region
-	 * must exist as long as any of its hmeblks exist. This invariant
-	 * holds because before region is freed all its hmeblks are removed.
-	 */
 	if (hmeblkp->hblk_shared) {
 		sf_srd_t	*srdp;
 		sf_region_t	*rgnp;
@@ -11589,7 +11610,6 @@
 		ASSERT(rid < SFMMU_MAX_HME_REGIONS);
 		rgnp = srdp->srd_hmergnp[rid];
 		ASSERT(rgnp != NULL);
-		vaddr = (caddr_t)get_hblk_base(hmeblkp);
 		SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid);
 		hmeblkp->hblk_shared = 0;
 	}
@@ -12087,6 +12107,12 @@
 	return (rcpuset);
 }

+/*
+ * This routine takes an sfmmu pointer and the va for an adddress in an
+ * ISM region as input and returns the corresponding region id in ism_rid.
+ * The return value of 1 indicates that a region has been found and ism_rid
+ * is valid, otherwise 0 is returned.
+ */
 static int
 find_ism_rid(sfmmu_t *sfmmup, sfmmu_t *ism_sfmmup, caddr_t va, uint_t *ism_rid)
 {
@@ -13321,6 +13347,8 @@
 	 */

 	for (sfhmep = pp->p_mapping; sfhmep; sfhmep = sfhmep->hme_next) {
+		if (IS_PAHME(sfhmep))
+			continue;
 		hmeblkp = sfmmu_hmetohblk(sfhmep);
 		if (hmeblkp->hblk_xhat_bit)
 			continue;
@@ -13357,7 +13385,7 @@
 	case	HAT_VMODSORT:
 		return (1);
 	case	HAT_SHARED_REGIONS:
-		if (!disable_shctx && shctx_on)
+		if (shctx_on)
 			return (1);
 		else
 			return (0);
@@ -13679,7 +13707,7 @@
 	ASSERT(sfmmup != ksfmmup);
 	ASSERT(sfmmup->sfmmu_srdp == NULL);

-	if (disable_shctx || !shctx_on) {
+	if (!shctx_on) {
 		return;
 	}

@@ -13901,7 +13929,6 @@
 	sf_region_t **rarrp;
 	uint16_t *busyrgnsp;
 	ulong_t rttecnt;
-	int rkmalloc = 0;
 	uchar_t tteflag;
 	uchar_t r_type = flags & HAT_REGION_TYPE_MASK;
 	int text = (r_type == HAT_REGION_TEXT);
@@ -14088,27 +14115,22 @@

 	ASSERT(MUTEX_HELD(&srdp->srd_mutex));
 	if (*freelistp != NULL) {
-		new_rgnp = *freelistp;
-		*freelistp = new_rgnp->rgn_next;
-		ASSERT(new_rgnp->rgn_id < *nextidp);
-		ASSERT(new_rgnp->rgn_id < maxids);
-		ASSERT(new_rgnp->rgn_flags & SFMMU_REGION_FREE);
-		ASSERT((new_rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK)
+		rgnp = *freelistp;
+		*freelistp = rgnp->rgn_next;
+		ASSERT(rgnp->rgn_id < *nextidp);
+		ASSERT(rgnp->rgn_id < maxids);
+		ASSERT(rgnp->rgn_flags & SFMMU_REGION_FREE);
+		ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK)
 		    == r_type);
-		ASSERT(rarrp[new_rgnp->rgn_id] == new_rgnp);
-
-		ASSERT(new_rgnp->rgn_hmeflags == 0);
-	}
-
-	if (new_rgnp == NULL) {
+		ASSERT(rarrp[rgnp->rgn_id] == rgnp);
+		ASSERT(rgnp->rgn_hmeflags == 0);
+	} else {
 		/*
 		 * release local locks before memory allocation.
 		 */
 		mutex_exit(&srdp->srd_mutex);
-		if (new_rgnp == NULL) {
-			rkmalloc = 1;
-			new_rgnp = kmem_cache_alloc(region_cache, KM_SLEEP);
-		}
+
+		new_rgnp = kmem_cache_alloc(region_cache, KM_SLEEP);

 		mutex_enter(&srdp->srd_mutex);
 		for (rgnp = srdp->srd_rgnhash[rhash]; rgnp != NULL;
@@ -14123,34 +14145,19 @@
 			}
 		}
 		if (rgnp != NULL) {
-			if (!rkmalloc) {
-				ASSERT(new_rgnp->rgn_flags &
-				    SFMMU_REGION_FREE);
-				new_rgnp->rgn_next = *freelistp;
-				*freelistp = new_rgnp;
-				new_rgnp = NULL;
-			}
 			goto rfound;
 		}

-		if (rkmalloc) {
-			if (*nextidp >= maxids) {
-				mutex_exit(&srdp->srd_mutex);
-				goto fail;
-			}
-			rgnp = new_rgnp;
-			new_rgnp = NULL;
-			rgnp->rgn_id = (*nextidp)++;
-			ASSERT(rgnp->rgn_id < maxids);
-			ASSERT(rarrp[rgnp->rgn_id] == NULL);
-			rarrp[rgnp->rgn_id] = rgnp;
-		} else {
-			rgnp = new_rgnp;
-			new_rgnp = NULL;
-		}
-	} else {
+		if (*nextidp >= maxids) {
+			mutex_exit(&srdp->srd_mutex);
+			goto fail;
+		}
 		rgnp = new_rgnp;
 		new_rgnp = NULL;
+		rgnp->rgn_id = (*nextidp)++;
+		ASSERT(rgnp->rgn_id < maxids);
+		ASSERT(rarrp[rgnp->rgn_id] == NULL);
+		rarrp[rgnp->rgn_id] = rgnp;
 	}

 	ASSERT(rgnp->rgn_sfmmu_head == NULL);
@@ -14177,14 +14184,7 @@

 fail:
 	ASSERT(new_rgnp != NULL);
-	if (rkmalloc) {
-		kmem_cache_free(region_cache, new_rgnp);
-	} else {
-		/* put it back on the free list. */
-		ASSERT(new_rgnp->rgn_flags & SFMMU_REGION_FREE);
-		new_rgnp->rgn_next = *freelistp;
-		*freelistp = new_rgnp;
-	}
+	kmem_cache_free(region_cache, new_rgnp);
 	return (HAT_INVALID_REGION_COOKIE);
 }

@@ -14543,7 +14543,7 @@
 #endif /* DEBUG */

 /*
- * Removes an sfmmu from the start of the queue.
+ * Removes an sfmmu from the SCD sfmmu list.
  */
 static void
 sfmmu_from_scd_list(sfmmu_t **headp, sfmmu_t *sfmmup)
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.h	Mon Sep 17 14:46:54 2007 -0700
+++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.h	Mon Sep 17 15:08:19 2007 -0700
@@ -426,9 +426,9 @@
 	sf_region_t		*srd_hmergnfree;
 	/* pointer to the next free ism region */
 	sf_region_t		*srd_ismrgnfree;
-	/* id of next ism rgn created */
+	/* id of next ism region created */
 	uint16_t		srd_next_ismrid;
-	/* pointer of next hme region created */
+	/* id of next hme region created */
 	uint16_t		srd_next_hmerid;
 	uint16_t		srd_ismbusyrgns; /* # of ism rgns in use */
 	uint16_t		srd_hmebusyrgns; /* # of hme rgns in use */
@@ -468,6 +468,8 @@
  * This macro grabs hat lock and allocates level 2 hat chain
  * associated with a shme rgn. In the majority of cases, the macro
  * is called with alloc = 0, and lock = 0.
+ * A pointer to the level 2 sf_rgn_link_t structure is returned in the lnkp
+ * parameter.
  */
 #define	SFMMU_HMERID2RLINKP(sfmmup, rid, lnkp, alloc, lock)		\
 {									\
@@ -619,8 +621,23 @@
  * tte counts should be protected by cas.
  * cpuset is protected by cas.
  *
+ * ttecnt accounting for mappings which do not use shared hme is carried out
+ * during pagefault handling. In the shared hme case, only the first process
+ * to access a mapping generates a pagefault, subsequent processes simply
+ * find the shared hme entry during trap handling and therefore there is no
+ * corresponding event to initiate ttecnt accounting. Currently, as shared
+ * hmes are only used for text segments, when joining a region we assume the
+ * worst case and add the the number of ttes required to map the entire region
+ * to the ttecnt corresponding to the region pagesize. However, if the region
+ * has a 4M pagesize, and memory is low, the allocation of 4M pages may fail
+ * then 8K pages will be allocated instead and the first TSB which stores 8K
+ * mappings will potentially be undersized. To compensate for the potential
+ * underaccounting in this case we always add 1/4 of the region size to the 8K
+ * ttecnt.
+ *
  * Note that sfmmu_xhat_provider MUST be the first element.
  */
+
 struct hat {
 	void		*sfmmu_xhat_provider;	/* NULL for CPU hat */
 	cpuset_t	sfmmu_cpusran;	/* cpu bit mask for efficient xcalls */
@@ -704,9 +721,6 @@
 #define	scd_hmeregion_map	scd_region_map.h_rmap_s.hmeregion_map
 #define	scd_ismregion_map	scd_region_map.h_rmap_s.ismregion_map

-#define	scd_hmeregion_map	scd_region_map.h_rmap_s.hmeregion_map
-#define	scd_ismregion_map	scd_region_map.h_rmap_s.ismregion_map
-
 extern int disable_shctx;
 extern int shctx_on;
--- a/usr/src/uts/sun4/vm/sfmmu.c	Mon Sep 17 14:46:54 2007 -0700
+++ b/usr/src/uts/sun4/vm/sfmmu.c	Mon Sep 17 15:08:19 2007 -0700
@@ -152,7 +152,7 @@
 	if ((pfn = va_to_pfn(vaddr)) == PFN_INVALID)
 		return ((uint64_t)-1);
 	return (((uint64_t)pfn << MMU_PAGESHIFT) |
-		((uint64_t)vaddr & MMU_PAGEOFFSET));
+	    ((uint64_t)vaddr & MMU_PAGEOFFSET));
 }

 void
@@ -195,7 +195,7 @@
 		}
 	}

-	if (!shctx_on || disable_shctx) {
+	if (!shctx_on) {
 		sfmmu_patch_shctx();
 	}

@@ -891,7 +891,7 @@
 	if (enable_bigktsb) {
 		ASSERT((max_nucuhme_buckets + max_nuckhme_buckets) *
 		    sizeof (struct hmehash_bucket) <=
-			TSB_BYTES(TSB_1M_SZCODE));
+		    TSB_BYTES(TSB_1M_SZCODE));

 		max_nucuhme_buckets *= 2;
 		max_nuckhme_buckets *= 2;
@@ -906,7 +906,7 @@
 	 * physical memory only.
 	 */
 	hme_buckets = (npages * HMEHASH_FACTOR) /
-		(HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT));
+	    (HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT));

 	uhmehash_num = (int)MIN(hme_buckets, MAX_UHME_BUCKETS);

@@ -926,7 +926,7 @@
 	khmehash_num = MAX(khmehash_num, MIN_KHME_BUCKETS);

 	if ((khmehash_num > max_nuckhme_buckets) ||
-		(uhmehash_num > max_nucuhme_buckets)) {
+	    (uhmehash_num > max_nucuhme_buckets)) {
 		khme_hash = NULL;
 		uhme_hash = NULL;
 	} else {
--- a/usr/src/uts/sun4u/vm/mach_kpm.c	Mon Sep 17 14:46:54 2007 -0700
+++ b/usr/src/uts/sun4u/vm/mach_kpm.c	Mon Sep 17 15:08:19 2007 -0700
@@ -1618,6 +1618,8 @@
 	 */
 	for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
 		tmphme = sfhmep->hme_next;
+		if (IS_PAHME(sfhmep))
+			continue;
 		hmeblkp = sfmmu_hmetohblk(sfhmep);
 		if (hmeblkp->hblk_xhat_bit)
 			continue;
@@ -1641,6 +1643,8 @@

 	for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
 		tmphme = sfhmep->hme_next;
+		if (IS_PAHME(sfhmep))
+			continue;
 		hmeblkp = sfmmu_hmetohblk(sfhmep);
 		if (hmeblkp->hblk_xhat_bit)
 			continue;
--- a/usr/src/uts/sun4v/os/fillsysinfo.c	Mon Sep 17 14:46:54 2007 -0700
+++ b/usr/src/uts/sun4v/os/fillsysinfo.c	Mon Sep 17 15:08:19 2007 -0700
@@ -521,9 +521,7 @@
 }

 /*
- * Get the number of shared contexts from MD. This property more accurately
- * describes the total number of contexts available, not just "shared contexts".
- * If absent the default value is 1,
+ * Get the number of shared contexts from MD. If absent the default value is 0.
  *
  */
 static uint64_t
--- a/usr/src/uts/sun4v/sys/mmu.h	Mon Sep 17 14:46:54 2007 -0700
+++ b/usr/src/uts/sun4v/sys/mmu.h	Mon Sep 17 15:08:19 2007 -0700
@@ -152,8 +152,8 @@
 #define	MAX_NCTXS	(1ull << MAX_NCTXS_BITS)

 /*
- * MIN_NCONTEXTS and MIN_NTSBS are the minimum number of contexts and tsbs
- * necessary for shared context support.
+ * MIN_NSHCONTEXTS and MIN_NTSBS are the minimum number of shared contexts
+ * and tsbs necessary for shared context support.
  */
 #define	MIN_NSHCONTEXTS			1
 #define	MIN_NTSBS			4
--- a/usr/src/uts/sun4v/vm/mach_sfmmu.c	Mon Sep 17 14:46:54 2007 -0700
+++ b/usr/src/uts/sun4v/vm/mach_sfmmu.c	Mon Sep 17 15:08:19 2007 -0700
@@ -322,7 +322,31 @@
 }

 /*
- * Set machine specific TSB information
+ * The tsbord[] array is set up to translate from the order of tsbs in the sfmmu
+ * list to the order of tsbs in the tsb descriptor array passed to the hv, which
+ * is the search order used during Hardware Table Walk.
+ * So, the tsb with index i in the sfmmu list will have search order tsbord[i].
+ *
+ * The order of tsbs in the sfmmu list will be as follows:
+ *
+ *              0 8K - 512K private TSB
+ *              1 4M - 256M private TSB
+ *              2 8K - 512K shared TSB
+ *              3 4M - 256M shared TSB
+ *
+ * Shared TSBs are only used if a process is part of an SCD.
+ *
+ * So, e.g. tsbord[3] = 1;
+ *         corresponds to searching the shared 4M TSB second.
+ *
+ * The search order is selected so that the 8K-512K private TSB is always first.
+ * Currently shared context is not expected to map many 8K-512K pages that cause
+ * TLB misses so we order the shared TSB for 4M-256M pages in front of the
+ * shared TSB for 8K-512K pages. We also expect more TLB misses against private
+ * context mappings than shared context mappings and place private TSBs ahead of
+ * shared TSBs in descriptor order. The shtsb4m_first /etc/system tuneable can
+ * be used to change the default ordering of private and shared TSBs for
+ * 4M-256M pages.
  */
 void
 sfmmu_setup_tsbinfo(sfmmu_t *sfmmup)