Mercurial > illumos > illumos-gate
changeset 3764:74844940a161
6510878 Kernel Boot time data structures can be mapped with Large pages to improve performance
author | dp78419 |
---|---|
date | Mon, 05 Mar 2007 13:11:00 -0800 |
parents | fb3428ef36ce |
children | e36fc8d4b665 |
files | usr/src/uts/common/vm/seg_kmem.c usr/src/uts/sfmmu/vm/hat_sfmmu.c usr/src/uts/sfmmu/vm/hat_sfmmu.h usr/src/uts/sun4/os/mp_startup.c usr/src/uts/sun4/os/startup.c usr/src/uts/sun4/vm/sfmmu.c usr/src/uts/sun4/vm/vm_dep.h usr/src/uts/sun4u/cpu/us3_cheetah.c usr/src/uts/sun4u/lw8/Makefile.lw8 usr/src/uts/sun4u/serengeti/Makefile.serengeti.shared usr/src/uts/sun4u/vm/mach_vm_dep.c usr/src/uts/sun4v/vm/mach_vm_dep.c |
diffstat | 12 files changed, 437 insertions(+), 226 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/vm/seg_kmem.c Mon Mar 05 09:14:01 2007 -0800 +++ b/usr/src/uts/common/vm/seg_kmem.c Mon Mar 05 13:11:00 2007 -0800 @@ -227,12 +227,22 @@ size_t heap_size; vmem_t *heaptext_parent; size_t heap_lp_size = 0; +#ifdef __sparc + size_t kmem64_sz = kmem64_aligned_end - kmem64_base; +#endif /* __sparc */ kernelheap = heap_start; ekernelheap = heap_end; #ifdef __sparc heap_lp_size = (((uintptr_t)heap_end - (uintptr_t)heap_start) / 4); + /* + * Bias heap_lp start address by kmem64_sz to reduce collisions + * in 4M kernel TSB between kmem64 area and heap_lp + */ + kmem64_sz = P2ROUNDUP(kmem64_sz, MMU_PAGESIZE256M); + if (kmem64_sz <= heap_lp_size / 2) + heap_lp_size -= kmem64_sz; heap_lp_base = ekernelheap - heap_lp_size; heap_lp_end = heap_lp_base + heap_lp_size; #endif /* __sparc */
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c Mon Mar 05 09:14:01 2007 -0800 +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c Mon Mar 05 13:11:00 2007 -0800 @@ -1245,8 +1245,6 @@ NULL); } -extern caddr_t kmem64_base, kmem64_end; - #define SFMMU_KERNEL_MAXVA \ (kmem64_base ? (uintptr_t)kmem64_end : (SYSLIMIT)) @@ -9961,7 +9959,7 @@ * make sure that we are calculating the * number of hblk8's that we need correctly. */ - panic("no nucleus hblk8 to allocate"); + prom_panic("no nucleus hblk8 to allocate"); } hmeblkp = (struct hme_blk *)&nucleus_hblk8.list[index]; @@ -9971,12 +9969,12 @@ index = nucleus_hblk1.index; if (nucleus_hblk1.index >= nucleus_hblk1.len) { /* - * If we panic here, see startup_modules() - * and H8TOH1; most likely you need to - * update the calculation of the number - * of hblk1's the kernel needs to boot. + * If we panic here, see startup_modules(). + * Most likely you need to update the + * calculation of the number of hblk1 elements + * that the kernel needs to boot. */ - panic("no nucleus hblk1 to allocate"); + prom_panic("no nucleus hblk1 to allocate"); } hmeblkp = (struct hme_blk *)&nucleus_hblk1.list[index];
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.h Mon Mar 05 09:14:01 2007 -0800 +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.h Mon Mar 05 13:11:00 2007 -0800 @@ -925,7 +925,6 @@ #define HME8BLK_SZ (sizeof (struct hme_blk) + \ (NHMENTS - 1) * sizeof (struct sf_hment)) #define HME1BLK_SZ (sizeof (struct hme_blk)) -#define H8TOH1 (MMU_PAGESIZE4M / MMU_PAGESIZE) #define H1MIN (2 + MAX_BIGKTSB_TTES) /* nucleus text+data, ktsb */ /*
--- a/usr/src/uts/sun4/os/mp_startup.c Mon Mar 05 09:14:01 2007 -0800 +++ b/usr/src/uts/sun4/os/mp_startup.c Mon Mar 05 13:11:00 2007 -0800 @@ -84,8 +84,9 @@ #ifdef TRAPTRACE /* - * This function bop allocs traptrace buffers for all cpus + * This function sets traptrace buffers for all cpus * other than boot cpu. + * Note that the memory at base will be allocated later. */ caddr_t trap_trace_alloc(caddr_t base) @@ -97,10 +98,8 @@ return (base); } - if ((vaddr = (caddr_t)BOP_ALLOC(bootops, base, (TRAP_TSIZE * - (max_ncpus - 1)), TRAP_TSIZE)) == NULL) { - panic("traptrace_alloc: can't bop alloc"); - } + vaddr = (caddr_t)base; + ttrace_buf = vaddr; PRM_DEBUG(ttrace_buf); return (vaddr + (TRAP_TSIZE * (max_ncpus - 1)));
--- a/usr/src/uts/sun4/os/startup.c Mon Mar 05 09:14:01 2007 -0800 +++ b/usr/src/uts/sun4/os/startup.c Mon Mar 05 13:11:00 2007 -0800 @@ -159,6 +159,10 @@ caddr_t kmem64_base; /* base of kernel mem segment in 64-bit space */ caddr_t kmem64_end; /* end of kernel mem segment in 64-bit space */ +caddr_t kmem64_aligned_end; /* end of large page, overmaps 64-bit space */ +int kmem64_alignsize; /* page size for mem segment in 64-bit space */ +int kmem64_szc; /* page size code */ +uint64_t kmem64_pabase = (uint64_t)-1; /* physical address of kmem64_base */ uintptr_t shm_alignment; /* VAC address consistency modulus */ struct memlist *phys_install; /* Total installed physical memory */ @@ -251,7 +255,6 @@ static caddr_t hblk_base; uint_t hblk_alloc_dynamic = 0; uint_t hblk1_min = H1MIN; -uint_t hblk8_min; /* @@ -415,6 +418,10 @@ * 0x000007FF.00000000 -|-----------------------|- hole_start ----- * : : ^ * : : | + * 0x00000XXX.XXX00000 -|-----------------------|- kmem64_ | + * | overmapped area | alignend_end | + * | (kmem64_alignsize | | + * | boundary) | | * 0x00000XXX.XXXXXXXX -|-----------------------|- kmem64_end | * | | | * | 64-bit kernel ONLY | | @@ -733,6 +740,85 @@ (MAX_RSVD_IV * sizeof (intr_vec_t)) + \ (MAX_RSVD_IVX * sizeof (intr_vecx_t))) +#if !defined(C_OBP) +/* + * Install a temporary tte handler in OBP for kmem64 area. + * + * We map kmem64 area with large pages before the trap table is taken + * over. Since OBP makes 8K mappings, it can create 8K tlb entries in + * the same area. Duplicate tlb entries with different page sizes + * cause unpredicatble behavior. To avoid this, we don't create + * kmem64 mappings via BOP_ALLOC (ends up as prom_alloc() call to + * OBP). Instead, we manage translations with a temporary va>tte-data + * handler (kmem64-tte). This handler is replaced by unix-tte when + * the trap table is taken over. + * + * The temporary handler knows the physical address of the kmem64 + * area. It uses the prom's pgmap@ Forth word for other addresses. + * + * We have to use BOP_ALLOC() method for C-OBP platforms because + * pgmap@ is not defined in C-OBP. C-OBP is only used on serengeti + * sun4u platforms. On sun4u we flush tlb after trap table is taken + * over if we use large pages for kernel heap and kmem64. Since sun4u + * prom (unlike sun4v) calls va>tte-data first for client address + * translation prom's ttes for kmem64 can't get into TLB even if we + * later switch to prom's trap table again. C-OBP uses 4M pages for + * client mappings when possible so on all platforms we get the + * benefit from large mappings for kmem64 area immediately during + * boot. + * + * pseudo code: + * if (context != 0) { + * return false + * } else if (miss_va in range[kmem64_base, kmem64_end)) { + * tte = tte_template + + * (((miss_va & pagemask) - kmem64_base)); + * return tte, true + * } else { + * return pgmap@ result + * } + */ +char kmem64_obp_str[] = + "h# %lx constant kmem64_base " + "h# %lx constant kmem64_end " + "h# %lx constant kmem64_pagemask " + "h# %lx constant kmem64_template " + + ": kmem64-tte ( addr cnum -- false | tte-data true ) " + " if ( addr ) " + " drop false exit then ( false ) " + " dup kmem64_base kmem64_end within if ( addr ) " + " kmem64_pagemask and ( addr' ) " + " kmem64_base - ( addr' ) " + " kmem64_template + ( tte ) " + " true ( tte true ) " + " else ( addr ) " + " pgmap@ ( tte ) " + " dup 0< if true else drop false then ( tte true | false ) " + " then ( tte true | false ) " + "; " + + "' kmem64-tte is va>tte-data " +; + +void +install_kmem64_tte() +{ + char b[sizeof (kmem64_obp_str) + (4 * 16)]; + tte_t tte; + + PRM_DEBUG(kmem64_pabase); + PRM_DEBUG(kmem64_szc); + sfmmu_memtte(&tte, kmem64_pabase >> MMU_PAGESHIFT, + PROC_DATA | HAT_NOSYNC, kmem64_szc); + PRM_DEBUG(tte.ll); + (void) sprintf(b, kmem64_obp_str, + kmem64_base, kmem64_end, TTE_PAGEMASK(kmem64_szc), tte.ll); + ASSERT(strlen(b) < sizeof (b)); + prom_interpret(b, 0, 0, 0, 0, 0); +} +#endif /* !C_OBP */ + /* * As OBP takes up some RAM when the system boots, pages will already be "lost" * to the system and reflected in npages by the time we see it. @@ -768,7 +854,8 @@ struct memlist *cur; size_t syslimit = (size_t)SYSLIMIT; size_t sysbase = (size_t)SYSBASE; - int alloc_alignsize = MMU_PAGESIZE; + int alloc_alignsize = ecache_alignsize; + int i; extern void page_coloring_init(void); extern void page_set_colorequiv_arr(void); @@ -833,7 +920,7 @@ PRM_DEBUG(e_text); modtext = (caddr_t)roundup((uintptr_t)e_text, MMU_PAGESIZE); if (((uintptr_t)modtext & MMU_PAGEMASK4M) != (uintptr_t)s_text) - panic("nucleus text overflow"); + prom_panic("nucleus text overflow"); modtext_sz = (caddr_t)roundup((uintptr_t)modtext, MMU_PAGESIZE4M) - modtext; PRM_DEBUG(modtext); @@ -995,37 +1082,6 @@ } /* - * If we have enough memory, use 4M pages for alignment because it - * greatly reduces the number of TLB misses we take albeit at the cost - * of possible RAM wastage (degenerate case of 4 MB - MMU_PAGESIZE per - * allocation.) Still, the speedup on large memory systems (e.g. > 64 - * GB) is quite noticeable, so it is worth the effort to do if we can. - * - * Note, however, that this speedup will only occur if the boot PROM - * uses the largest possible MMU page size possible to map memory - * requests that are properly aligned and sized (for example, a request - * for a multiple of 4MB of memory aligned to a 4MB boundary will - * result in a mapping using a 4MB MMU page.) - * - * Even then, the large page mappings will only speed things up until - * the startup process proceeds a bit further, as when - * sfmmu_map_prom_mappings() copies page mappings from the PROM to the - * kernel it remaps everything but the TSBs using 8K pages anyway... - * - * At some point in the future, sfmmu_map_prom_mappings() will be - * rewritten to copy memory mappings to the kernel using the same MMU - * page sizes the PROM used. When that occurs, if the PROM did use - * large MMU pages to map memory, the alignment/sizing work we're - * doing now should give us a nice extra performance boost, albeit at - * the cost of greater RAM usage... - */ - alloc_alignsize = ((npages >= tune_npages) ? MMU_PAGESIZE4M : - MMU_PAGESIZE); - - PRM_DEBUG(tune_npages); - PRM_DEBUG(alloc_alignsize); - - /* * Save off where the contiguous allocations to date have ended * in econtig32. */ @@ -1038,10 +1094,7 @@ /* * To avoid memory allocation collisions in the 32-bit virtual address * space, make allocations from this point forward in 64-bit virtual - * address space starting at syslimit and working up. Also use the - * alignment specified by alloc_alignsize, as we may be able to save - * ourselves TLB misses by using larger page sizes if they're - * available. + * address space starting at syslimit and working up. * * All this is needed because on large memory systems, the default * Solaris allocations will collide with SYSBASE32, which is hard @@ -1058,7 +1111,11 @@ kmem64_base = (caddr_t)syslimit; PRM_DEBUG(kmem64_base); - alloc_base = (caddr_t)roundup((uintptr_t)kmem64_base, alloc_alignsize); + /* + * Allocate addresses, but not physical memory. None of these locations + * can be touched until physical memory is allocated below. + */ + alloc_base = kmem64_base; /* * If KHME and/or UHME hash buckets won't fit in the nucleus, allocate @@ -1084,23 +1141,11 @@ */ if (max_mem_nodes > 1) { int mnode; - caddr_t alloc_start = alloc_base; for (mnode = 1; mnode < max_mem_nodes; mnode++) { alloc_base = alloc_page_freelists(mnode, alloc_base, ecache_alignsize); } - - if (alloc_base > alloc_start) { - alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, - alloc_alignsize); - if ((caddr_t)BOP_ALLOC(bootops, alloc_start, - alloc_base - alloc_start, - alloc_alignsize) != alloc_start) - cmn_err(CE_PANIC, - "Unable to alloc page freelists\n"); - } - PRM_DEBUG(alloc_base); } @@ -1115,11 +1160,7 @@ alloc_sz = roundup(mmltable_sz, alloc_alignsize); alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, alloc_alignsize); - - if ((mml_table = (kmutex_t *)BOP_ALLOC(bootops, alloc_base, - alloc_sz, alloc_alignsize)) != (kmutex_t *)alloc_base) - panic("mml_table alloc failure"); - + mml_table = (kmutex_t *)alloc_base; alloc_base += alloc_sz; PRM_DEBUG(mml_table); PRM_DEBUG(alloc_base); @@ -1141,11 +1182,7 @@ alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, alloc_alignsize); - table = BOP_ALLOC(bootops, alloc_base, alloc_sz, - alloc_alignsize); - - if (table != alloc_base) - panic("kpmp_table or kpmp_stable alloc failure"); + table = alloc_base; if (kpm_smallpages == 0) { kpmp_table = (kpm_hlk_t *)table; @@ -1246,13 +1283,9 @@ if (pp_base == NULL) { alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, alloc_alignsize); - alloc_sz = roundup(pp_sz, alloc_alignsize); - if ((pp_base = (struct page *)BOP_ALLOC(bootops, - alloc_base, alloc_sz, alloc_alignsize)) != - (struct page *)alloc_base) - panic("page alloc failure"); + pp_base = (struct page *)alloc_base; alloc_base += alloc_sz; } @@ -1309,8 +1342,8 @@ */ kpm_npages_setup(memblocks + 4); kpm_pp_sz = (kpm_smallpages == 0) ? - kpm_npages * sizeof (kpm_page_t): - kpm_npages * sizeof (kpm_spage_t); + kpm_npages * sizeof (kpm_page_t): + kpm_npages * sizeof (kpm_spage_t); kpm_pp_base = (uintptr_t)ndata_alloc(&ndata, kpm_pp_sz, ecache_alignsize); @@ -1331,9 +1364,7 @@ alloc_sz = roundup(alloc_sz, alloc_alignsize); - if ((bop_base = (uintptr_t)BOP_ALLOC(bootops, alloc_base, - alloc_sz, alloc_alignsize)) != (uintptr_t)alloc_base) - panic("system page struct alloc failure"); + bop_base = (uintptr_t)alloc_base; alloc_base += alloc_sz; @@ -1364,12 +1395,6 @@ ASSERT(bop_base <= (uintptr_t)alloc_base); } - /* - * Initialize per page size free list counters. - */ - ctrs_end = page_ctrs_alloc(ctrs_base); - ASSERT(ctrs_base + ctrs_sz >= ctrs_end); - PRM_DEBUG(page_hash); PRM_DEBUG(memseg_base); PRM_DEBUG(kpm_pp_base); @@ -1379,41 +1404,112 @@ PRM_DEBUG(alloc_base); #ifdef TRAPTRACE - /* - * Allocate trap trace buffer last so as not to affect - * the 4M alignments of the allocations above on V9 SPARCs... - */ alloc_base = trap_trace_alloc(alloc_base); PRM_DEBUG(alloc_base); #endif /* TRAPTRACE */ - if (kmem64_base) { - /* - * Set the end of the kmem64 segment for V9 SPARCs, if - * appropriate... - */ - kmem64_end = (caddr_t)roundup((uintptr_t)alloc_base, - alloc_alignsize); - - PRM_DEBUG(kmem64_base); - PRM_DEBUG(kmem64_end); + /* + * In theory it's possible that kmem64 chunk is 0 sized + * (on very small machines). Check for that. + */ + if (alloc_base == kmem64_base) { + kmem64_base = NULL; + kmem64_end = NULL; + kmem64_aligned_end = NULL; + goto kmem64_alloced; } /* + * Allocate kmem64 memory. + * Round up to end of large page and overmap. + * kmem64_end..kmem64_aligned_end is added to memory list for reuse + */ + kmem64_end = (caddr_t)roundup((uintptr_t)alloc_base, + MMU_PAGESIZE); + + /* + * Make one large memory alloc after figuring out the 64-bit size. This + * will enable use of the largest page size appropriate for the system + * architecture. + */ + ASSERT(mmu_exported_pagesize_mask & (1 << TTE8K)); + ASSERT(IS_P2ALIGNED(kmem64_base, TTEBYTES(max_bootlp_tteszc))); + for (i = max_bootlp_tteszc; i >= TTE8K; i--) { + size_t asize; +#if !defined(C_OBP) + unsigned long long pa; +#endif /* !C_OBP */ + + if ((mmu_exported_pagesize_mask & (1 << i)) == 0) + continue; + kmem64_alignsize = TTEBYTES(i); + kmem64_szc = i; + + /* limit page size for small memory */ + if (mmu_btop(kmem64_alignsize) > (npages >> 2)) + continue; + + kmem64_aligned_end = (caddr_t)roundup((uintptr_t)kmem64_end, + kmem64_alignsize); + asize = kmem64_aligned_end - kmem64_base; +#if !defined(C_OBP) + if (prom_allocate_phys(asize, kmem64_alignsize, &pa) == 0) { + if (prom_claim_virt(asize, kmem64_base) != + (caddr_t)-1) { + kmem64_pabase = pa; + install_kmem64_tte(); + break; + } else { + prom_free_phys(asize, pa); + } + } +#else /* !C_OBP */ + if ((caddr_t)BOP_ALLOC(bootops, kmem64_base, asize, + kmem64_alignsize) == kmem64_base) { + kmem64_pabase = va_to_pa(kmem64_base); + break; + } +#endif /* !C_OBP */ + if (i == TTE8K) { + prom_panic("kmem64 allocation failure"); + } + } + + PRM_DEBUG(kmem64_base); + PRM_DEBUG(kmem64_end); + PRM_DEBUG(kmem64_aligned_end); + PRM_DEBUG(kmem64_alignsize); + + /* + * Now set pa using saved va from above. + */ + if (&ecache_init_scrub_flush_area) { + (void) ecache_init_scrub_flush_area(NULL); + } + +kmem64_alloced: + + /* + * Initialize per page size free list counters. + */ + ctrs_end = page_ctrs_alloc(ctrs_base); + ASSERT(ctrs_base + ctrs_sz >= ctrs_end); + + /* * Allocate space for the interrupt vector table and also for the * reserved interrupt vector data structures. */ memspace = (caddr_t)BOP_ALLOC(bootops, (caddr_t)intr_vec_table, IVSIZE, MMU_PAGESIZE); if (memspace != (caddr_t)intr_vec_table) - panic("interrupt vector table allocation failure"); + prom_panic("interrupt vector table allocation failure"); /* * The memory lists from boot are allocated from the heap arena * so that later they can be freed and/or reallocated. */ if (BOP_GETPROP(bootops, "extent", &memlist_sz) == -1) - panic("could not retrieve property \"extent\""); + prom_panic("could not retrieve property \"extent\""); /* * Between now and when we finish copying in the memory lists, @@ -1479,6 +1575,20 @@ &memlist, 0, 0); /* + * Add any unused kmem64 memory from overmapped page + * (Note: va_to_pa does not work for kmem64_end) + */ + if (kmem64_end < kmem64_aligned_end) { + uint64_t overlap_size = kmem64_aligned_end - kmem64_end; + uint64_t overlap_pa = kmem64_pabase + + (kmem64_end - kmem64_base); + + PRM_DEBUG(overlap_pa); + PRM_DEBUG(overlap_size); + memlist_add(overlap_pa, overlap_size, &memlist, &phys_avail); + } + + /* * Add any extra memory after e_text to the phys_avail list, as long * as there's at least a page to add. */ @@ -1505,7 +1615,7 @@ PRM_DEBUG(memspace); if ((caddr_t)memlist > (memspace + memlist_sz)) - panic("memlist overflow"); + prom_panic("memlist overflow"); PRM_DEBUG(pp_base); PRM_DEBUG(memseg_base); @@ -1563,7 +1673,7 @@ { int proplen, nhblk1, nhblk8; size_t nhblksz; - pgcnt_t hblk_pages, pages_per_hblk; + pgcnt_t pages_per_hblk; size_t hme8blk_sz, hme1blk_sz; /* @@ -1709,49 +1819,35 @@ &boot_physavail, &boot_physavail_len, &boot_virtavail, &boot_virtavail_len); - bop_alloc_pages = size_virtalloc(boot_virtavail, boot_virtavail_len); - /* * Calculation and allocation of hmeblks needed to remap - * the memory allocated by PROM till now: - * - * (1) calculate how much virtual memory has been bop_alloc'ed. - * (2) roundup this memory to span of hme8blk, i.e. 64KB - * (3) calculate number of hme8blk's needed to remap this memory - * (4) calculate amount of memory that's consumed by these hme8blk's - * (5) add memory calculated in steps (2) and (4) above. - * (6) roundup this memory to span of hme8blk, i.e. 64KB - * (7) calculate number of hme8blk's needed to remap this memory - * (8) calculate amount of memory that's consumed by these hme8blk's - * (9) allocate additional hme1blk's to hold large mappings. - * H8TOH1 determines this. The current SWAG gives enough hblk1's - * to remap everything with 4M mappings. - * (10) account for partially used hblk8's due to non-64K aligned - * PROM mapping entries. - * (11) add memory calculated in steps (8), (9), and (10) above. - * (12) kmem_zalloc the memory calculated in (11); since segkmem - * is not ready yet, this gets bop_alloc'ed. - * (13) there will be very few bop_alloc's after this point before - * trap table takes over + * the memory allocated by PROM till now. + * Overestimate the number of hblk1 elements by assuming + * worst case of TTE64K mappings. + * sfmmu_hblk_alloc will panic if this calculation is wrong. */ - - /* sfmmu_init_nucleus_hblks expects properly aligned data structures. */ + bop_alloc_pages = btopr(kmem64_end - kmem64_base); + pages_per_hblk = btop(HMEBLK_SPAN(TTE64K)); + bop_alloc_pages = roundup(bop_alloc_pages, pages_per_hblk); + nhblk1 = bop_alloc_pages / pages_per_hblk + hblk1_min; + + bop_alloc_pages = size_virtalloc(boot_virtavail, boot_virtavail_len); + + /* sfmmu_init_nucleus_hblks expects properly aligned data structures */ hme8blk_sz = roundup(HME8BLK_SZ, sizeof (int64_t)); hme1blk_sz = roundup(HME1BLK_SZ, sizeof (int64_t)); + bop_alloc_pages += btopr(nhblk1 * hme1blk_sz); + pages_per_hblk = btop(HMEBLK_SPAN(TTE8K)); - bop_alloc_pages = roundup(bop_alloc_pages, pages_per_hblk); - nhblk8 = bop_alloc_pages / pages_per_hblk; - nhblk1 = roundup(nhblk8, H8TOH1) / H8TOH1; - hblk_pages = btopr(nhblk8 * hme8blk_sz + nhblk1 * hme1blk_sz); - bop_alloc_pages += hblk_pages; - bop_alloc_pages = roundup(bop_alloc_pages, pages_per_hblk); - nhblk8 = bop_alloc_pages / pages_per_hblk; - nhblk1 = roundup(nhblk8, H8TOH1) / H8TOH1; - if (nhblk1 < hblk1_min) - nhblk1 = hblk1_min; - if (nhblk8 < hblk8_min) - nhblk8 = hblk8_min; + nhblk8 = 0; + while (bop_alloc_pages > 1) { + bop_alloc_pages = roundup(bop_alloc_pages, pages_per_hblk); + nhblk8 += bop_alloc_pages /= pages_per_hblk; + bop_alloc_pages *= hme8blk_sz; + bop_alloc_pages = btopr(bop_alloc_pages); + } + nhblk8 += 2; /* * Since hblk8's can hold up to 64k of mappings aligned on a 64k @@ -1834,6 +1930,9 @@ startup_fixup_physavail(void) { struct memlist *cur; + size_t kmem64_overmap_size = kmem64_aligned_end - kmem64_end; + + PRM_DEBUG(kmem64_overmap_size); /* * take the most current snapshot we can by calling mem-update @@ -1852,6 +1951,16 @@ &memlist, 0, 0); /* + * Add any unused kmem64 memory from overmapped page + * (Note: va_to_pa does not work for kmem64_end) + */ + if (kmem64_overmap_size) { + memlist_add(kmem64_pabase + (kmem64_end - kmem64_base), + kmem64_overmap_size, + &memlist, &cur); + } + + /* * Add any extra memory after e_text we added to the phys_avail list * back to the old list. */
--- a/usr/src/uts/sun4/vm/sfmmu.c Mon Mar 05 09:14:01 2007 -0800 +++ b/usr/src/uts/sun4/vm/sfmmu.c Mon Mar 05 13:11:00 2007 -0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -124,6 +124,15 @@ if (tba_taken_over) return (hat_getpfnum(kas.a_hat, (caddr_t)vaddr)); +#if !defined(C_OBP) + if ((caddr_t)vaddr >= kmem64_base && (caddr_t)vaddr < kmem64_end) { + if (kmem64_pabase == (uint64_t)-1) + prom_panic("va_to_pfn: kmem64_pabase not init"); + physaddr = kmem64_pabase + ((caddr_t)vaddr - kmem64_base); + return ((pfn_t)physaddr >> MMU_PAGESHIFT); + } +#endif /* !C_OBP */ + if ((prom_translate_virt(vaddr, &valid, &physaddr, &mode) != -1) && (valid == -1)) { return ((pfn_t)(physaddr >> MMU_PAGESHIFT)); @@ -169,7 +178,6 @@ PRM_DEBUG(ktsb_pbase); PRM_DEBUG(ktsb4m_pbase); - sfmmu_setup_4lp(); sfmmu_patch_ktsb(); sfmmu_patch_utsb(); sfmmu_patch_mmu_asi(ktsb_phys); @@ -222,6 +230,12 @@ #define COMBINE(hi, lo) (((uint64_t)(uint32_t)(hi) << 32) | (uint32_t)(lo)) /* + * Track larges pages used. + * Provides observability for this feature on non-debug kernels. + */ +ulong_t map_prom_lpcount[MMU_PAGE_SIZES]; + +/* * This function traverses the prom mapping list and creates equivalent * mappings in the sfmmu mapping hash. */ @@ -243,10 +257,12 @@ ASSERT(promt->tte_hi != 0); ASSERT32(promt->virt_hi == 0 && promt->size_hi == 0); + vaddr = (caddr_t)COMBINE(promt->virt_hi, promt->virt_lo); + /* * hack until we get rid of map-for-unix */ - if (COMBINE(promt->virt_hi, promt->virt_lo) < KERNELBASE) + if (vaddr < (caddr_t)KERNELBASE) continue; ttep->tte_inthi = promt->tte_hi; @@ -301,6 +317,34 @@ "i/o page without side-effect"); } } + + /* + * skip kmem64 area + */ + if (vaddr >= kmem64_base && + vaddr < kmem64_aligned_end) { +#if !defined(C_OBP) + cmn_err(CE_PANIC, + "unexpected kmem64 prom mapping\n"); +#else /* !C_OBP */ + size_t mapsz; + + if (ptob(pfn) != + kmem64_pabase + (vaddr - kmem64_base)) { + cmn_err(CE_PANIC, + "unexpected kmem64 prom mapping\n"); + } + + mapsz = kmem64_aligned_end - vaddr; + if (mapsz >= size) { + break; + } + size -= mapsz; + offset += mapsz; + continue; +#endif /* !C_OBP */ + } + oldpfn = sfmmu_vatopfn(vaddr, KHATID, &oldtte); ASSERT(oldpfn != PFN_SUSPENDED); ASSERT(page_relocate_ready == 0); @@ -336,6 +380,37 @@ offset += MMU_PAGESIZE; } } + + /* + * We claimed kmem64 from prom, so now we need to load tte. + */ + if (kmem64_base != NULL) { + pgcnt_t pages; + size_t psize; + int pszc; + + pszc = kmem64_szc; +#ifdef sun4u + if (pszc > TTE8K) { + pszc = segkmem_lpszc; + } +#endif /* sun4u */ + psize = TTEBYTES(pszc); + pages = btop(psize); + basepfn = kmem64_pabase >> MMU_PAGESHIFT; + vaddr = kmem64_base; + while (vaddr < kmem64_end) { + sfmmu_memtte(ttep, basepfn, + PROC_DATA | HAT_NOSYNC, pszc); + sfmmu_tteload(kas.a_hat, ttep, vaddr, NULL, + HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD); + vaddr += psize; + basepfn += pages; + } + map_prom_lpcount[pszc] = + ((caddr_t)P2ROUNDUP((uintptr_t)kmem64_end, psize) - + kmem64_base) >> TTE_PAGE_SHIFT(pszc); + } } #undef COMBINE /* local to previous routine */ @@ -714,13 +789,18 @@ * We choose the TSB to hold kernel 4M mappings to have twice * the reach as the primary kernel TSB since this TSB will * potentially (currently) be shared by both mappings to all of - * physical memory plus user TSBs. Since the current - * limit on primary kernel TSB size is 16MB this will top out - * at 64K which we can certainly afford. + * physical memory plus user TSBs. If this TSB has to be in nucleus + * (only for Spitfire and Cheetah) limit its size to 64K. */ - ktsb4m_szcode = ktsb_szcode - (MMU_PAGESHIFT4M - MMU_PAGESHIFT) + 1; - if (ktsb4m_szcode < TSB_MIN_SZCODE) - ktsb4m_szcode = TSB_MIN_SZCODE; + ktsb4m_szcode = highbit((2 * npages) / TTEPAGES(TTE4M) - 1); + ktsb4m_szcode -= TSB_START_SIZE; + ktsb4m_szcode = MAX(ktsb4m_szcode, TSB_MIN_SZCODE); + ktsb4m_szcode = MIN(ktsb4m_szcode, TSB_SOFTSZ_MASK); + if ((enable_bigktsb == 0 || ktsb_phys == 0) && ktsb4m_szcode > + TSB_64K_SZCODE) { + ktsb4m_szcode = TSB_64K_SZCODE; + max_bootlp_tteszc = TTE8K; + } ktsb_sz = TSB_BYTES(ktsb_szcode); /* kernel 8K tsb size */ ktsb4m_sz = TSB_BYTES(ktsb4m_szcode); /* kernel 4M tsb size */ @@ -734,6 +814,11 @@ ndata_alloc_tsbs(struct memlist *ndata, pgcnt_t npages) { /* + * Set ktsb_phys to 1 if the processor supports ASI_QUAD_LDD_PHYS. + */ + sfmmu_setup_4lp(); + + /* * Size the kernel TSBs based upon the amount of physical * memory in the system. */ @@ -755,13 +840,17 @@ /* * Next, allocate 4M kernel TSB from the nucleus since it's small. */ - if ((ktsb4m_base = ndata_alloc(ndata, ktsb4m_sz, ktsb4m_sz)) == NULL) - return (-1); - ASSERT(!((uintptr_t)ktsb4m_base & (ktsb4m_sz - 1))); + if (ktsb4m_szcode <= TSB_64K_SZCODE) { - PRM_DEBUG(ktsb4m_base); - PRM_DEBUG(ktsb4m_sz); - PRM_DEBUG(ktsb4m_szcode); + ktsb4m_base = ndata_alloc(ndata, ktsb4m_sz, ktsb4m_sz); + if (ktsb4m_base == NULL) + return (-1); + ASSERT(!((uintptr_t)ktsb4m_base & (ktsb4m_sz - 1))); + + PRM_DEBUG(ktsb4m_base); + PRM_DEBUG(ktsb4m_sz); + PRM_DEBUG(ktsb4m_szcode); + } return (0); } @@ -927,42 +1016,31 @@ return (0); } +/* + * Allocate virtual addresses at base with given alignment. + * Note that there is no physical memory behind the address yet. + */ caddr_t -alloc_hme_buckets(caddr_t base, int pagesize) +alloc_hme_buckets(caddr_t base, int alignsize) { size_t hmehash_sz = (uhmehash_num + khmehash_num) * - sizeof (struct hmehash_bucket); + sizeof (struct hmehash_bucket); ASSERT(khme_hash == NULL); ASSERT(uhme_hash == NULL); - /* If no pagesize specified, use default MMU pagesize */ - if (!pagesize) - pagesize = MMU_PAGESIZE; + base = (caddr_t)roundup((uintptr_t)base, alignsize); + hmehash_sz = roundup(hmehash_sz, alignsize); - /* - * If we start aligned and ask for a multiple of a pagesize, and OBP - * supports large pages, we will then use mappings of the largest size - * possible for the BOP_ALLOC, possibly saving us tens of thousands of - * TLB miss-induced traversals of the TSBs and/or the HME hashes... - */ - base = (caddr_t)roundup((uintptr_t)base, pagesize); - hmehash_sz = roundup(hmehash_sz, pagesize); - - khme_hash = (struct hmehash_bucket *)BOP_ALLOC(bootops, base, - hmehash_sz, pagesize); - - if ((caddr_t)khme_hash != base) - cmn_err(CE_PANIC, "Cannot bop_alloc hme hash buckets."); - + khme_hash = (struct hmehash_bucket *)base; uhme_hash = (struct hmehash_bucket *)((caddr_t)khme_hash + - khmehash_num * sizeof (struct hmehash_bucket)); + khmehash_num * sizeof (struct hmehash_bucket)); base += hmehash_sz; return (base); } /* - * This function bop allocs the kernel TSB. + * This function bop allocs kernel TSBs. */ caddr_t sfmmu_ktsb_alloc(caddr_t tsbbase) @@ -975,12 +1053,26 @@ ktsb_sz); if (vaddr != ktsb_base) cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc" - " bigktsb"); + " 8K bigktsb"); ktsb_base = vaddr; tsbbase = ktsb_base + ktsb_sz; PRM_DEBUG(ktsb_base); PRM_DEBUG(tsbbase); } + + if (ktsb4m_szcode > TSB_64K_SZCODE) { + ASSERT(ktsb_phys && enable_bigktsb); + ktsb4m_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb4m_sz); + vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb4m_base, ktsb4m_sz, + ktsb4m_sz); + if (vaddr != ktsb4m_base) + cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc" + " 4M bigktsb"); + ktsb4m_base = vaddr; + tsbbase = ktsb4m_base + ktsb4m_sz; + PRM_DEBUG(ktsb4m_base); + PRM_DEBUG(tsbbase); + } return (tsbbase); }
--- a/usr/src/uts/sun4/vm/vm_dep.h Mon Mar 05 09:14:01 2007 -0800 +++ b/usr/src/uts/sun4/vm/vm_dep.h Mon Mar 05 13:11:00 2007 -0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -425,6 +425,14 @@ extern int vac_shift; /* + * Kernel mem segment in 64-bit space + */ +extern caddr_t kmem64_base, kmem64_end, kmem64_aligned_end; +extern int kmem64_alignsize, kmem64_szc; +extern uint64_t kmem64_pabase; +extern int max_bootlp_tteszc; + +/* * Maximum and default values for user heap, stack, private and shared * anonymous memory, and user text and initialized data. *
--- a/usr/src/uts/sun4u/cpu/us3_cheetah.c Mon Mar 05 09:14:01 2007 -0800 +++ b/usr/src/uts/sun4u/cpu/us3_cheetah.c Mon Mar 05 13:11:00 2007 -0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -151,6 +151,7 @@ max_privmap_lpsize = MMU_PAGESIZE; max_utext_lpsize = MMU_PAGESIZE; max_shm_lpsize = MMU_PAGESIZE; + max_bootlp_tteszc = TTE8K; } void @@ -663,53 +664,36 @@ /* * Allocate and initialize the exclusive displacement flush area. - * Must be called before startup_bop_gone(). + * Called twice. The first time allocates virtual address. The second + * call looks up the physical address. */ caddr_t ecache_init_scrub_flush_area(caddr_t alloc_base) { - unsigned size = 2 * CH_ECACHE_8M_SIZE; - caddr_t tmp_alloc_base = alloc_base; - caddr_t flush_alloc_base = - (caddr_t)roundup((uintptr_t)alloc_base, size); - caddr_t ecache_tl1_virtaddr; + static caddr_t ecache_tl1_virtaddr; - /* - * Allocate the physical memory for the exclusive flush area - * - * Need to allocate an exclusive flush area that is twice the - * largest supported E$ size, physically contiguous, and - * aligned on twice the largest E$ size boundary. - * - * Memory allocated via BOP_ALLOC is included in the "cage" - * from the DR perspective and due to this, its physical - * address will never change and the memory will not be - * removed. - * - * BOP_ALLOC takes 4 arguments: bootops, virtual address hint, - * size of the area to allocate, and alignment of the area to - * allocate. It returns zero if the allocation fails, or the - * virtual address for a successful allocation. Memory BOP_ALLOC'd - * is physically contiguous. - */ - if ((ecache_tl1_virtaddr = (caddr_t)BOP_ALLOC(bootops, - flush_alloc_base, size, size)) != NULL) { + if (alloc_base != NULL) { + /* + * Need to allocate an exclusive flush area that is twice the + * largest supported E$ size, physically contiguous, and + * aligned on twice the largest E$ size boundary. + */ + unsigned size = 2 * CH_ECACHE_8M_SIZE; + caddr_t va = (caddr_t)roundup((uintptr_t)alloc_base, size); - tmp_alloc_base = - (caddr_t)roundup((uintptr_t)(ecache_tl1_virtaddr + size), - ecache_alignsize); - - /* - * get the physical address of the exclusive flush area - */ - ecache_tl1_flushaddr = va_to_pa(ecache_tl1_virtaddr); + ecache_tl1_virtaddr = va; + alloc_base = va + size; } else { - ecache_tl1_virtaddr = (caddr_t)-1; - cmn_err(CE_NOTE, "!ecache_init_scrub_flush_area failed\n"); + /* + * Get the physical address of the exclusive flush area. + */ + ASSERT(ecache_tl1_virtaddr != NULL); + ecache_tl1_flushaddr = va_to_pa(ecache_tl1_virtaddr); + ASSERT(ecache_tl1_flushaddr != ((uint64_t)-1)); } - return (tmp_alloc_base); + return (alloc_base); } /*
--- a/usr/src/uts/sun4u/lw8/Makefile.lw8 Mon Mar 05 09:14:01 2007 -0800 +++ b/usr/src/uts/sun4u/lw8/Makefile.lw8 Mon Mar 05 13:11:00 2007 -0800 @@ -20,7 +20,7 @@ # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -112,6 +112,7 @@ MACHINE_DEFS += -DLOCKED_DTLB_ENTRIES=6 # Max IOSRAM TOC major version number supported MACHINE_DEFS += -DMAX_IOSRAM_TOC_VER=0x1 +MACHINE_DEFS += -DC_OBP # Define for inline pre-processing since # cpp not smart about v9 yet.
--- a/usr/src/uts/sun4u/serengeti/Makefile.serengeti.shared Mon Mar 05 09:14:01 2007 -0800 +++ b/usr/src/uts/sun4u/serengeti/Makefile.serengeti.shared Mon Mar 05 13:11:00 2007 -0800 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -128,6 +128,7 @@ MACHINE_DEFS += -DLOCKED_DTLB_ENTRIES=6 # Max IOSRAM TOC major version number supported MACHINE_DEFS += -DMAX_IOSRAM_TOC_VER=0x1 +MACHINE_DEFS += -DC_OBP # Define for inline pre-processing since # cpp is not smart about v9 yet.
--- a/usr/src/uts/sun4u/vm/mach_vm_dep.c Mon Mar 05 09:14:01 2007 -0800 +++ b/usr/src/uts/sun4u/vm/mach_vm_dep.c Mon Mar 05 13:11:00 2007 -0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -95,6 +95,11 @@ }; /* + * Maximum page size used to map 64-bit memory segment kmem64_base..kmem64_end + */ +int max_bootlp_tteszc = TTE4M; + +/* * use_text_pgsz64k and use_text_pgsz512k allow the user to turn on these * additional text page sizes for USIII-IV+ and OPL by changing the default * values via /etc/system.
--- a/usr/src/uts/sun4v/vm/mach_vm_dep.c Mon Mar 05 09:14:01 2007 -0800 +++ b/usr/src/uts/sun4v/vm/mach_vm_dep.c Mon Mar 05 13:11:00 2007 -0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -93,6 +93,11 @@ }; /* + * Maximum page size used to map 64-bit memory segment kmem64_base..kmem64_end + */ +int max_bootlp_tteszc = TTE256M; + +/* * Maximum and default segment size tunables for user heap, stack, private * and shared anonymous memory, and user text and initialized data. */