changeset 4004:027a924f9ee3

6539804 panic[cpu0]/thread=fffffffffbc27f40: boot_mapin(): No pp for pfnum = 1ff849 6540208 assertion failed: (((threadp()))->t_hatdepth & (~(1 << 7))) < 64, file: ../../i86pc/vm/hat_i86.c
author josephb
date Tue, 10 Apr 2007 12:46:10 -0700
parents 70e1c9a81b40
children 7f159f75c1e1
files usr/src/uts/i86pc/os/fakebop.c usr/src/uts/i86pc/os/startup.c usr/src/uts/i86pc/vm/hat_i86.c usr/src/uts/i86pc/vm/hat_i86.h usr/src/uts/i86pc/vm/hment.c usr/src/uts/i86pc/vm/htable.c
diffstat 6 files changed, 116 insertions(+), 167 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/i86pc/os/fakebop.c	Tue Apr 10 11:17:42 2007 -0700
+++ b/usr/src/uts/i86pc/os/fakebop.c	Tue Apr 10 12:46:10 2007 -0700
@@ -110,6 +110,8 @@
 
 static void build_firmware_properties(void);
 
+static int early_allocation = 1;
+
 /*
  * Allocate aligned physical memory at boot time. This allocator allocates
  * from the highest possible addresses. This avoids exhausting memory that
@@ -129,19 +131,18 @@
 	 * space we can treat physmem as a pfn (not just a pgcnt) and
 	 * get a conservative upper limit.
 	 */
-	extern pgcnt_t physmem;
 	if (physmem != 0 && high_phys > pfn_to_pa(physmem))
 		high_phys = pfn_to_pa(physmem);
 
 	/*
-	 * find the highest available memory in physinstalled
+	 * find the lowest or highest available memory in physinstalled
 	 */
 	size = P2ROUNDUP(size, align);
 	for (; ml; ml = ml->next) {
-		start = ml->address;
-		end = P2ALIGN(start + ml->size, align);
+		start = P2ROUNDUP(ml->address, align);
+		end = P2ALIGN(ml->address + ml->size, align);
 		if (start < next_phys)
-			start = next_phys;
+			start = P2ROUNDUP(next_phys, align);
 		if (end > high_phys)
 			end = P2ALIGN(high_phys, align);
 
@@ -150,11 +151,23 @@
 		if (end - start < size)
 			continue;
 
-		if (end - size > pa)
-			pa = end - size;
+		/*
+		 * Early allocations need to use low memory, since
+		 * physmem might be further limited by bootenv.rc
+		 */
+		if (early_allocation) {
+			if (pa == 0 || start < pa)
+				pa = start;
+		} else {
+			if (end - size > pa)
+				pa = end - size;
+		}
 	}
 	if (pa != 0) {
-		high_phys = pa;
+		if (early_allocation)
+			next_phys = pa + size;
+		else
+			high_phys = pa;
 		return (pa);
 	}
 	panic("do_bop_phys_alloc(0x%" PRIx64 ", 0x%" PRIx64 ") Out of memory\n",
@@ -412,6 +425,55 @@
 }
 
 /*
+ * Parse numeric value from a string. Understands decimal, hex, octal, - and ~
+ */
+static int
+parse_value(char *p, uint64_t *retval)
+{
+	int adjust = 0;
+	uint64_t tmp = 0;
+	int digit;
+	int radix = 10;
+
+	*retval = 0;
+	if (*p == '-' || *p == '~')
+		adjust = *p++;
+
+	if (*p == '0') {
+		++p;
+		if (*p == 0)
+			return (0);
+		if (*p == 'x' || *p == 'X') {
+			radix = 16;
+			++p;
+		} else {
+			radix = 8;
+			++p;
+		}
+	}
+	while (*p) {
+		if ('0' <= *p && *p <= '9')
+			digit = *p - '0';
+		else if ('a' <= *p && *p <= 'f')
+			digit = 10 + *p - 'a';
+		else if ('A' <= *p && *p <= 'F')
+			digit = 10 + *p - 'A';
+		else
+			return (-1);
+		if (digit >= radix)
+			return (-1);
+		tmp = tmp * radix + digit;
+		++p;
+	}
+	if (adjust == '-')
+		tmp = -tmp;
+	else if (adjust == '~')
+		tmp = ~tmp;
+	*retval = tmp;
+	return (0);
+}
+
+/*
  * 2nd part of building the table of boot properties. This includes:
  * - values from /boot/solaris/bootenv.rc (ie. eeprom(1m) values)
  *
@@ -438,6 +500,7 @@
 	char *inputdev;	/* these override the comand line if serial ports */
 	char *outputdev;
 	char *consoledev;
+	uint64_t lvalue;
 
 	DBG_MSG("Opening /boot/solaris/bootenv.rc\n");
 	fd = BRD_OPEN(bfs_ops, "/boot/solaris/bootenv.rc", 0);
@@ -538,6 +601,19 @@
 		BRD_CLOSE(bfs_ops, fd);
 
 	/*
+	 * Check if we have to limit the boot time allocator
+	 */
+	if (do_bsys_getproplen(NULL, "physmem") != -1 &&
+	    do_bsys_getprop(NULL, "physmem", line) >= 0 &&
+	    parse_value(line, &lvalue) != -1) {
+		if (0 < lvalue && (lvalue < physmem || physmem == 0)) {
+			physmem = (pgcnt_t)lvalue;
+			DBG(physmem);
+		}
+	}
+	early_allocation = 0;
+
+	/*
 	 * check to see if we have to override the default value of the console
 	 */
 	inputdev = line;
@@ -799,7 +875,7 @@
 	}
 
 	/*
-	 * Values forcibly set boot propertiex on the command line via -B.
+	 * Values forcibly set boot properties on the command line via -B.
 	 * Allow use of quotes in values. Other stuff goes on kernel
 	 * command line.
 	 */
--- a/usr/src/uts/i86pc/os/startup.c	Tue Apr 10 11:17:42 2007 -0700
+++ b/usr/src/uts/i86pc/os/startup.c	Tue Apr 10 12:46:10 2007 -0700
@@ -2393,11 +2393,7 @@
 		segmapfreelists = (int)lvalue;
 	}
 
-	if ((BOP_GETPROPLEN(bootops, "physmem") <= sizeof (prop)) &&
-	    (BOP_GETPROP(bootops, "physmem", prop) >= 0) &&
-	    (kobj_getvalue(prop, &lvalue) != -1)) {
-		physmem = (uintptr_t)lvalue;
-	}
+	/* physmem used to be here, but moved much earlier to fakebop.c */
 }
 
 /*
--- a/usr/src/uts/i86pc/vm/hat_i86.c	Tue Apr 10 11:17:42 2007 -0700
+++ b/usr/src/uts/i86pc/vm/hat_i86.c	Tue Apr 10 12:46:10 2007 -0700
@@ -139,13 +139,6 @@
 
 #endif
 
-/*
- * Locks, etc. to control use of the hat reserves when recursively
- * allocating pagetables for the hat data structures.
- */
-static kmutex_t hat_reserves_lock;
-static kcondvar_t hat_reserves_cv;
-kthread_t *hat_reserves_thread;
 uint_t use_boot_reserve = 1;	/* cleared after early boot process */
 uint_t can_steal_post_boot = 0;	/* set late in boot to enable stealing */
 
@@ -1287,115 +1280,6 @@
 }
 
 /*
- * The t_hatdepth field is an 8-bit counter.  We use the lower seven bits
- * to track exactly how deep we are in the memload->kmem_alloc recursion.
- * If the depth is greater than 1, that indicates that we are performing a
- * hat operation to satisfy another hat operation.  To prevent infinite
- * recursion, we switch over to using pre-allocated "reserves" of htables
- * and hments.
- *
- * The uppermost bit is used to indicate that we are transitioning away
- * from being the reserves thread.  See hati_reserves_exit() for the
- * details.
- */
-#define	EXITING_FLAG		(1 << 7)
-#define	DEPTH_MASK		(~EXITING_FLAG)
-#define	HAT_DEPTH(t)		((t)->t_hatdepth & DEPTH_MASK)
-#define	EXITING_RESERVES(t)	((t)->t_hatdepth & EXITING_FLAG)
-
-/*
- * Access to reserves for HAT_NO_KALLOC is single threaded.
- * If someone else is in the reserves, we'll politely wait for them
- * to finish. This keeps normal hat_memload()s from eating up
- * the mappings needed to replenish the reserve.
- */
-static void
-hati_reserves_enter(uint_t kmem_for_hat)
-{
-	/*
-	 * 64 is an arbitrary number to catch serious problems.  I'm not
-	 * sure what the absolute maximum depth is, but it should be
-	 * substantially less than this.
-	 */
-	ASSERT(HAT_DEPTH(curthread) < 64);
-
-	/*
-	 * If we are doing a memload to satisfy a kmem operation, we enter
-	 * the reserves immediately; we don't wait to recurse to a second
-	 * level of memload.
-	 */
-	ASSERT(kmem_for_hat < 2);
-	curthread->t_hatdepth += (1 + kmem_for_hat);
-
-	if (hat_reserves_thread == curthread || use_boot_reserve)
-		return;
-
-	if (HAT_DEPTH(curthread) > 1 || hat_reserves_thread != NULL) {
-		mutex_enter(&hat_reserves_lock);
-		while (hat_reserves_thread != NULL)
-			cv_wait(&hat_reserves_cv, &hat_reserves_lock);
-
-		if (HAT_DEPTH(curthread) > 1)
-			hat_reserves_thread = curthread;
-
-		mutex_exit(&hat_reserves_lock);
-	}
-}
-
-/*
- * If we are the reserves_thread and we've finally finished with all our
- * memloads (ie. no longer doing hat slabs), we can release our use of the
- * reserve.
- */
-static void
-hati_reserves_exit(uint_t kmem_for_hat)
-{
-	ASSERT(kmem_for_hat < 2);
-	curthread->t_hatdepth -= (1 + kmem_for_hat);
-
-	/*
-	 * Simple case: either we are not the reserves thread, or we are
-	 * the reserves thread and we are nested deeply enough that we
-	 * should still be the reserves thread.
-	 *
-	 * Note: we may not become the reserves thread after we recursively
-	 * enter our second HAT routine, but we don't stop being the
-	 * reserves thread until we exit the toplevel HAT routine.  This is
-	 * to work around vmem's inability to determine when an allocation
-	 * should be satisfied from the hat_memload arena, which can lead
-	 * to an infinite loop of memload->vmem_populate->memload->.
-	 */
-	if (curthread != hat_reserves_thread || HAT_DEPTH(curthread) > 0 ||
-	    use_boot_reserve)
-		return;
-
-	mutex_enter(&hat_reserves_lock);
-	ASSERT(hat_reserves_thread == curthread);
-	hat_reserves_thread = NULL;
-	cv_broadcast(&hat_reserves_cv);
-	mutex_exit(&hat_reserves_lock);
-
-	/*
-	 * As we leave the reserves, we want to be sure the reserve lists
-	 * aren't overstocked.  Freeing excess reserves requires that we
-	 * call kmem_free(), which may require additional allocations,
-	 * causing us to re-enter the reserves.  To avoid infinite
-	 * recursion, we only try to adjust reserves at the very top level.
-	 */
-	if (!kmem_for_hat && !EXITING_RESERVES(curthread)) {
-		curthread->t_hatdepth |= EXITING_FLAG;
-		htable_adjust_reserve();
-		hment_adjust_reserve();
-		curthread->t_hatdepth &= (~EXITING_FLAG);
-	}
-
-	/*
-	 * just in case something went wrong in doing adjust reserves
-	 */
-	ASSERT(hat_reserves_thread != curthread);
-}
-
-/*
  * Internal routine to load a single page table entry. This only fails if
  * we attempt to overwrite a page table link with a large page.
  */
@@ -1412,9 +1296,15 @@
 	htable_t	*ht;
 	uint_t		entry;
 	x86pte_t	pte;
-	uint_t		kmem_for_hat = (flags & HAT_NO_KALLOC) ? 1 : 0;
 	int		rv = 0;
 
+	/*
+	 * The number 16 is arbitrary and here to catch a recursion problem
+	 * early before we blow out the kernel stack.
+	 */
+	++curthread->t_hatdepth;
+	ASSERT(curthread->t_hatdepth < 16);
+
 	ASSERT(hat == kas.a_hat ||
 	    AS_LOCK_HELD(hat->hat_as, &hat->hat_as->a_lock));
 
@@ -1427,23 +1317,9 @@
 	ht = htable_lookup(hat, va, level);
 
 	/*
-	 * All threads go through hati_reserves_enter() to at least wait
-	 * for any existing reserves user to finish. This helps reduce
-	 * pressure on the reserves. In addition, if this thread needs
-	 * to become the new reserve user it will.
+	 * We must have HAT_LOAD_NOCONSIST if page_t is NULL.
 	 */
-	hati_reserves_enter(kmem_for_hat);
-
-	ASSERT(HAT_DEPTH(curthread) == 1 || va >= kernelbase);
-
-	/*
-	 * Kernel memloads for HAT data should never use hments!
-	 * If it did that would seriously complicate the reserves system, since
-	 * hment_alloc() would need to know about HAT_NO_KALLOC.
-	 *
-	 * We also must have HAT_LOAD_NOCONSIST if page_t is NULL.
-	 */
-	if (HAT_DEPTH(curthread) > 1 || pp == NULL)
+	if (pp == NULL)
 		flags |= HAT_LOAD_NOCONSIST;
 
 	if (ht == NULL) {
@@ -1478,7 +1354,7 @@
 	 * release the htable and any reserves
 	 */
 	htable_release(ht);
-	hati_reserves_exit(kmem_for_hat);
+	--curthread->t_hatdepth;
 	return (rv);
 }
 
@@ -1518,7 +1394,10 @@
 	ht = mmu.kmap_htables[(va - mmu.kmap_htables[0]->ht_vaddr) >>
 	    LEVEL_SHIFT(1)];
 	entry = htable_va2entry(va, ht);
+	++curthread->t_hatdepth;
+	ASSERT(curthread->t_hatdepth < 16);
 	(void) hati_pte_map(ht, entry, pp, pte, flags, pte_ptr);
+	--curthread->t_hatdepth;
 }
 
 /*
@@ -3568,17 +3447,16 @@
 	uint_t		entry;
 	x86pte_t	oldpte;
 	hat_mempte_t	p;
-	uint_t		created = 0;
 
 	ASSERT(IS_PAGEALIGNED(va));
 	ASSERT(!IN_VA_HOLE(va));
+	++curthread->t_hatdepth;
 	ht = htable_getpte(kas.a_hat, va, &entry, &oldpte, 0);
 	if (ht == NULL) {
 		ht = htable_create(kas.a_hat, va, 0, NULL);
 		entry = htable_va2entry(va, ht);
 		ASSERT(ht->ht_level == 0);
 		oldpte = x86pte_get(ht, entry);
-		created = 1;
 	}
 	if (PTE_ISVALID(oldpte))
 		panic("hat_mempte_setup(): address already mapped"
@@ -3594,8 +3472,7 @@
 	 */
 	htable_release(ht);
 	p = PT_INDEX_PHYSADDR(pfn_to_pa(ht->ht_pfn), entry);
-	if (created)
-		hati_reserves_exit(0);
+	--curthread->t_hatdepth;
 	return (p);
 }
 
--- a/usr/src/uts/i86pc/vm/hat_i86.h	Tue Apr 10 11:17:42 2007 -0700
+++ b/usr/src/uts/i86pc/vm/hat_i86.h	Tue Apr 10 12:46:10 2007 -0700
@@ -185,9 +185,8 @@
  */
 extern uint_t can_steal_post_boot;
 extern uint_t use_boot_reserve;
-extern kthread_t *hat_reserves_thread;
-#define	USE_HAT_RESERVES()						\
-	(use_boot_reserve || curthread == hat_reserves_thread ||	\
+#define	USE_HAT_RESERVES()					\
+	(use_boot_reserve || curthread->t_hatdepth > 1 ||	\
 	panicstr != NULL || vmem_is_populator())
 
 /*
--- a/usr/src/uts/i86pc/vm/hment.c	Tue Apr 10 11:17:42 2007 -0700
+++ b/usr/src/uts/i86pc/vm/hment.c	Tue Apr 10 12:46:10 2007 -0700
@@ -78,7 +78,6 @@
 kmutex_t hment_reserve_mutex;
 uint_t	hment_reserve_count;
 hment_t	*hment_reserve_pool;
-extern  kthread_t *hat_reserves_thread;
 
 /*
  * Possible performance RFE: we might need to make this dynamic, perhaps
@@ -241,10 +240,12 @@
 #endif
 	HATSTAT_INC(hs_hm_free);
 	if (USE_HAT_RESERVES() ||
-	    hment_reserve_count < hment_reserve_amount)
+	    hment_reserve_count < hment_reserve_amount) {
 		hment_put_reserve(hm);
-	else
+	} else {
 		kmem_cache_free(hment_cache, hm);
+		hment_adjust_reserve();
+	}
 }
 
 int
@@ -587,12 +588,12 @@
 	/*
 	 * Free up any excess reserves
 	 */
-	while (hment_reserve_count > hment_reserve_amount) {
-		ASSERT(curthread != hat_reserves_thread);
+	while (hment_reserve_count > hment_reserve_amount &&
+	    !USE_HAT_RESERVES()) {
 		hm = hment_get_reserve();
 		if (hm == NULL)
 			return;
-		hment_free(hm);
+		kmem_cache_free(hment_cache, hm);
 	}
 }
 
--- a/usr/src/uts/i86pc/vm/htable.c	Tue Apr 10 11:17:42 2007 -0700
+++ b/usr/src/uts/i86pc/vm/htable.c	Tue Apr 10 12:46:10 2007 -0700
@@ -276,12 +276,11 @@
 {
 	htable_t *ht;
 
-	ASSERT(curthread != hat_reserves_thread);
-
 	/*
 	 * Free any excess htables in the reserve list
 	 */
-	while (htable_reserve_cnt > htable_reserve_amount) {
+	while (htable_reserve_cnt > htable_reserve_amount &&
+	    !USE_HAT_RESERVES()) {
 		ht = htable_get_reserve();
 		if (ht == NULL)
 			return;
@@ -627,7 +626,6 @@
 			 * Donate successful htable allocations to the reserve.
 			 */
 			for (;;) {
-				ASSERT(curthread != hat_reserves_thread);
 				ht = kmem_cache_alloc(htable_cache, kmflags);
 				if (ht == NULL)
 					break;
@@ -777,10 +775,12 @@
 	/*
 	 * Free htables or put into reserves.
 	 */
-	if (USE_HAT_RESERVES() || htable_reserve_cnt < htable_reserve_amount)
+	if (USE_HAT_RESERVES() || htable_reserve_cnt < htable_reserve_amount) {
 		htable_put_reserve(ht);
-	else
+	} else {
 		kmem_cache_free(htable_cache, ht);
+		htable_adjust_reserve();
+	}
 }