Mercurial > illumos > illumos-gate
changeset 4004:027a924f9ee3
6539804 panic[cpu0]/thread=fffffffffbc27f40: boot_mapin(): No pp for pfnum = 1ff849
6540208 assertion failed: (((threadp()))->t_hatdepth & (~(1 << 7))) < 64, file: ../../i86pc/vm/hat_i86.c
author | josephb |
---|---|
date | Tue, 10 Apr 2007 12:46:10 -0700 |
parents | 70e1c9a81b40 |
children | 7f159f75c1e1 |
files | usr/src/uts/i86pc/os/fakebop.c usr/src/uts/i86pc/os/startup.c usr/src/uts/i86pc/vm/hat_i86.c usr/src/uts/i86pc/vm/hat_i86.h usr/src/uts/i86pc/vm/hment.c usr/src/uts/i86pc/vm/htable.c |
diffstat | 6 files changed, 116 insertions(+), 167 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/i86pc/os/fakebop.c Tue Apr 10 11:17:42 2007 -0700 +++ b/usr/src/uts/i86pc/os/fakebop.c Tue Apr 10 12:46:10 2007 -0700 @@ -110,6 +110,8 @@ static void build_firmware_properties(void); +static int early_allocation = 1; + /* * Allocate aligned physical memory at boot time. This allocator allocates * from the highest possible addresses. This avoids exhausting memory that @@ -129,19 +131,18 @@ * space we can treat physmem as a pfn (not just a pgcnt) and * get a conservative upper limit. */ - extern pgcnt_t physmem; if (physmem != 0 && high_phys > pfn_to_pa(physmem)) high_phys = pfn_to_pa(physmem); /* - * find the highest available memory in physinstalled + * find the lowest or highest available memory in physinstalled */ size = P2ROUNDUP(size, align); for (; ml; ml = ml->next) { - start = ml->address; - end = P2ALIGN(start + ml->size, align); + start = P2ROUNDUP(ml->address, align); + end = P2ALIGN(ml->address + ml->size, align); if (start < next_phys) - start = next_phys; + start = P2ROUNDUP(next_phys, align); if (end > high_phys) end = P2ALIGN(high_phys, align); @@ -150,11 +151,23 @@ if (end - start < size) continue; - if (end - size > pa) - pa = end - size; + /* + * Early allocations need to use low memory, since + * physmem might be further limited by bootenv.rc + */ + if (early_allocation) { + if (pa == 0 || start < pa) + pa = start; + } else { + if (end - size > pa) + pa = end - size; + } } if (pa != 0) { - high_phys = pa; + if (early_allocation) + next_phys = pa + size; + else + high_phys = pa; return (pa); } panic("do_bop_phys_alloc(0x%" PRIx64 ", 0x%" PRIx64 ") Out of memory\n", @@ -412,6 +425,55 @@ } /* + * Parse numeric value from a string. Understands decimal, hex, octal, - and ~ + */ +static int +parse_value(char *p, uint64_t *retval) +{ + int adjust = 0; + uint64_t tmp = 0; + int digit; + int radix = 10; + + *retval = 0; + if (*p == '-' || *p == '~') + adjust = *p++; + + if (*p == '0') { + ++p; + if (*p == 0) + return (0); + if (*p == 'x' || *p == 'X') { + radix = 16; + ++p; + } else { + radix = 8; + ++p; + } + } + while (*p) { + if ('0' <= *p && *p <= '9') + digit = *p - '0'; + else if ('a' <= *p && *p <= 'f') + digit = 10 + *p - 'a'; + else if ('A' <= *p && *p <= 'F') + digit = 10 + *p - 'A'; + else + return (-1); + if (digit >= radix) + return (-1); + tmp = tmp * radix + digit; + ++p; + } + if (adjust == '-') + tmp = -tmp; + else if (adjust == '~') + tmp = ~tmp; + *retval = tmp; + return (0); +} + +/* * 2nd part of building the table of boot properties. This includes: * - values from /boot/solaris/bootenv.rc (ie. eeprom(1m) values) * @@ -438,6 +500,7 @@ char *inputdev; /* these override the comand line if serial ports */ char *outputdev; char *consoledev; + uint64_t lvalue; DBG_MSG("Opening /boot/solaris/bootenv.rc\n"); fd = BRD_OPEN(bfs_ops, "/boot/solaris/bootenv.rc", 0); @@ -538,6 +601,19 @@ BRD_CLOSE(bfs_ops, fd); /* + * Check if we have to limit the boot time allocator + */ + if (do_bsys_getproplen(NULL, "physmem") != -1 && + do_bsys_getprop(NULL, "physmem", line) >= 0 && + parse_value(line, &lvalue) != -1) { + if (0 < lvalue && (lvalue < physmem || physmem == 0)) { + physmem = (pgcnt_t)lvalue; + DBG(physmem); + } + } + early_allocation = 0; + + /* * check to see if we have to override the default value of the console */ inputdev = line; @@ -799,7 +875,7 @@ } /* - * Values forcibly set boot propertiex on the command line via -B. + * Values forcibly set boot properties on the command line via -B. * Allow use of quotes in values. Other stuff goes on kernel * command line. */
--- a/usr/src/uts/i86pc/os/startup.c Tue Apr 10 11:17:42 2007 -0700 +++ b/usr/src/uts/i86pc/os/startup.c Tue Apr 10 12:46:10 2007 -0700 @@ -2393,11 +2393,7 @@ segmapfreelists = (int)lvalue; } - if ((BOP_GETPROPLEN(bootops, "physmem") <= sizeof (prop)) && - (BOP_GETPROP(bootops, "physmem", prop) >= 0) && - (kobj_getvalue(prop, &lvalue) != -1)) { - physmem = (uintptr_t)lvalue; - } + /* physmem used to be here, but moved much earlier to fakebop.c */ } /*
--- a/usr/src/uts/i86pc/vm/hat_i86.c Tue Apr 10 11:17:42 2007 -0700 +++ b/usr/src/uts/i86pc/vm/hat_i86.c Tue Apr 10 12:46:10 2007 -0700 @@ -139,13 +139,6 @@ #endif -/* - * Locks, etc. to control use of the hat reserves when recursively - * allocating pagetables for the hat data structures. - */ -static kmutex_t hat_reserves_lock; -static kcondvar_t hat_reserves_cv; -kthread_t *hat_reserves_thread; uint_t use_boot_reserve = 1; /* cleared after early boot process */ uint_t can_steal_post_boot = 0; /* set late in boot to enable stealing */ @@ -1287,115 +1280,6 @@ } /* - * The t_hatdepth field is an 8-bit counter. We use the lower seven bits - * to track exactly how deep we are in the memload->kmem_alloc recursion. - * If the depth is greater than 1, that indicates that we are performing a - * hat operation to satisfy another hat operation. To prevent infinite - * recursion, we switch over to using pre-allocated "reserves" of htables - * and hments. - * - * The uppermost bit is used to indicate that we are transitioning away - * from being the reserves thread. See hati_reserves_exit() for the - * details. - */ -#define EXITING_FLAG (1 << 7) -#define DEPTH_MASK (~EXITING_FLAG) -#define HAT_DEPTH(t) ((t)->t_hatdepth & DEPTH_MASK) -#define EXITING_RESERVES(t) ((t)->t_hatdepth & EXITING_FLAG) - -/* - * Access to reserves for HAT_NO_KALLOC is single threaded. - * If someone else is in the reserves, we'll politely wait for them - * to finish. This keeps normal hat_memload()s from eating up - * the mappings needed to replenish the reserve. - */ -static void -hati_reserves_enter(uint_t kmem_for_hat) -{ - /* - * 64 is an arbitrary number to catch serious problems. I'm not - * sure what the absolute maximum depth is, but it should be - * substantially less than this. - */ - ASSERT(HAT_DEPTH(curthread) < 64); - - /* - * If we are doing a memload to satisfy a kmem operation, we enter - * the reserves immediately; we don't wait to recurse to a second - * level of memload. - */ - ASSERT(kmem_for_hat < 2); - curthread->t_hatdepth += (1 + kmem_for_hat); - - if (hat_reserves_thread == curthread || use_boot_reserve) - return; - - if (HAT_DEPTH(curthread) > 1 || hat_reserves_thread != NULL) { - mutex_enter(&hat_reserves_lock); - while (hat_reserves_thread != NULL) - cv_wait(&hat_reserves_cv, &hat_reserves_lock); - - if (HAT_DEPTH(curthread) > 1) - hat_reserves_thread = curthread; - - mutex_exit(&hat_reserves_lock); - } -} - -/* - * If we are the reserves_thread and we've finally finished with all our - * memloads (ie. no longer doing hat slabs), we can release our use of the - * reserve. - */ -static void -hati_reserves_exit(uint_t kmem_for_hat) -{ - ASSERT(kmem_for_hat < 2); - curthread->t_hatdepth -= (1 + kmem_for_hat); - - /* - * Simple case: either we are not the reserves thread, or we are - * the reserves thread and we are nested deeply enough that we - * should still be the reserves thread. - * - * Note: we may not become the reserves thread after we recursively - * enter our second HAT routine, but we don't stop being the - * reserves thread until we exit the toplevel HAT routine. This is - * to work around vmem's inability to determine when an allocation - * should be satisfied from the hat_memload arena, which can lead - * to an infinite loop of memload->vmem_populate->memload->. - */ - if (curthread != hat_reserves_thread || HAT_DEPTH(curthread) > 0 || - use_boot_reserve) - return; - - mutex_enter(&hat_reserves_lock); - ASSERT(hat_reserves_thread == curthread); - hat_reserves_thread = NULL; - cv_broadcast(&hat_reserves_cv); - mutex_exit(&hat_reserves_lock); - - /* - * As we leave the reserves, we want to be sure the reserve lists - * aren't overstocked. Freeing excess reserves requires that we - * call kmem_free(), which may require additional allocations, - * causing us to re-enter the reserves. To avoid infinite - * recursion, we only try to adjust reserves at the very top level. - */ - if (!kmem_for_hat && !EXITING_RESERVES(curthread)) { - curthread->t_hatdepth |= EXITING_FLAG; - htable_adjust_reserve(); - hment_adjust_reserve(); - curthread->t_hatdepth &= (~EXITING_FLAG); - } - - /* - * just in case something went wrong in doing adjust reserves - */ - ASSERT(hat_reserves_thread != curthread); -} - -/* * Internal routine to load a single page table entry. This only fails if * we attempt to overwrite a page table link with a large page. */ @@ -1412,9 +1296,15 @@ htable_t *ht; uint_t entry; x86pte_t pte; - uint_t kmem_for_hat = (flags & HAT_NO_KALLOC) ? 1 : 0; int rv = 0; + /* + * The number 16 is arbitrary and here to catch a recursion problem + * early before we blow out the kernel stack. + */ + ++curthread->t_hatdepth; + ASSERT(curthread->t_hatdepth < 16); + ASSERT(hat == kas.a_hat || AS_LOCK_HELD(hat->hat_as, &hat->hat_as->a_lock)); @@ -1427,23 +1317,9 @@ ht = htable_lookup(hat, va, level); /* - * All threads go through hati_reserves_enter() to at least wait - * for any existing reserves user to finish. This helps reduce - * pressure on the reserves. In addition, if this thread needs - * to become the new reserve user it will. + * We must have HAT_LOAD_NOCONSIST if page_t is NULL. */ - hati_reserves_enter(kmem_for_hat); - - ASSERT(HAT_DEPTH(curthread) == 1 || va >= kernelbase); - - /* - * Kernel memloads for HAT data should never use hments! - * If it did that would seriously complicate the reserves system, since - * hment_alloc() would need to know about HAT_NO_KALLOC. - * - * We also must have HAT_LOAD_NOCONSIST if page_t is NULL. - */ - if (HAT_DEPTH(curthread) > 1 || pp == NULL) + if (pp == NULL) flags |= HAT_LOAD_NOCONSIST; if (ht == NULL) { @@ -1478,7 +1354,7 @@ * release the htable and any reserves */ htable_release(ht); - hati_reserves_exit(kmem_for_hat); + --curthread->t_hatdepth; return (rv); } @@ -1518,7 +1394,10 @@ ht = mmu.kmap_htables[(va - mmu.kmap_htables[0]->ht_vaddr) >> LEVEL_SHIFT(1)]; entry = htable_va2entry(va, ht); + ++curthread->t_hatdepth; + ASSERT(curthread->t_hatdepth < 16); (void) hati_pte_map(ht, entry, pp, pte, flags, pte_ptr); + --curthread->t_hatdepth; } /* @@ -3568,17 +3447,16 @@ uint_t entry; x86pte_t oldpte; hat_mempte_t p; - uint_t created = 0; ASSERT(IS_PAGEALIGNED(va)); ASSERT(!IN_VA_HOLE(va)); + ++curthread->t_hatdepth; ht = htable_getpte(kas.a_hat, va, &entry, &oldpte, 0); if (ht == NULL) { ht = htable_create(kas.a_hat, va, 0, NULL); entry = htable_va2entry(va, ht); ASSERT(ht->ht_level == 0); oldpte = x86pte_get(ht, entry); - created = 1; } if (PTE_ISVALID(oldpte)) panic("hat_mempte_setup(): address already mapped" @@ -3594,8 +3472,7 @@ */ htable_release(ht); p = PT_INDEX_PHYSADDR(pfn_to_pa(ht->ht_pfn), entry); - if (created) - hati_reserves_exit(0); + --curthread->t_hatdepth; return (p); }
--- a/usr/src/uts/i86pc/vm/hat_i86.h Tue Apr 10 11:17:42 2007 -0700 +++ b/usr/src/uts/i86pc/vm/hat_i86.h Tue Apr 10 12:46:10 2007 -0700 @@ -185,9 +185,8 @@ */ extern uint_t can_steal_post_boot; extern uint_t use_boot_reserve; -extern kthread_t *hat_reserves_thread; -#define USE_HAT_RESERVES() \ - (use_boot_reserve || curthread == hat_reserves_thread || \ +#define USE_HAT_RESERVES() \ + (use_boot_reserve || curthread->t_hatdepth > 1 || \ panicstr != NULL || vmem_is_populator()) /*
--- a/usr/src/uts/i86pc/vm/hment.c Tue Apr 10 11:17:42 2007 -0700 +++ b/usr/src/uts/i86pc/vm/hment.c Tue Apr 10 12:46:10 2007 -0700 @@ -78,7 +78,6 @@ kmutex_t hment_reserve_mutex; uint_t hment_reserve_count; hment_t *hment_reserve_pool; -extern kthread_t *hat_reserves_thread; /* * Possible performance RFE: we might need to make this dynamic, perhaps @@ -241,10 +240,12 @@ #endif HATSTAT_INC(hs_hm_free); if (USE_HAT_RESERVES() || - hment_reserve_count < hment_reserve_amount) + hment_reserve_count < hment_reserve_amount) { hment_put_reserve(hm); - else + } else { kmem_cache_free(hment_cache, hm); + hment_adjust_reserve(); + } } int @@ -587,12 +588,12 @@ /* * Free up any excess reserves */ - while (hment_reserve_count > hment_reserve_amount) { - ASSERT(curthread != hat_reserves_thread); + while (hment_reserve_count > hment_reserve_amount && + !USE_HAT_RESERVES()) { hm = hment_get_reserve(); if (hm == NULL) return; - hment_free(hm); + kmem_cache_free(hment_cache, hm); } }
--- a/usr/src/uts/i86pc/vm/htable.c Tue Apr 10 11:17:42 2007 -0700 +++ b/usr/src/uts/i86pc/vm/htable.c Tue Apr 10 12:46:10 2007 -0700 @@ -276,12 +276,11 @@ { htable_t *ht; - ASSERT(curthread != hat_reserves_thread); - /* * Free any excess htables in the reserve list */ - while (htable_reserve_cnt > htable_reserve_amount) { + while (htable_reserve_cnt > htable_reserve_amount && + !USE_HAT_RESERVES()) { ht = htable_get_reserve(); if (ht == NULL) return; @@ -627,7 +626,6 @@ * Donate successful htable allocations to the reserve. */ for (;;) { - ASSERT(curthread != hat_reserves_thread); ht = kmem_cache_alloc(htable_cache, kmflags); if (ht == NULL) break; @@ -777,10 +775,12 @@ /* * Free htables or put into reserves. */ - if (USE_HAT_RESERVES() || htable_reserve_cnt < htable_reserve_amount) + if (USE_HAT_RESERVES() || htable_reserve_cnt < htable_reserve_amount) { htable_put_reserve(ht); - else + } else { kmem_cache_free(htable_cache, ht); + htable_adjust_reserve(); + } }