Mercurial > illumos > illumos-gate
changeset 2991:4b13d6c49c6b
6254029 memcntl() MC_HAT_ADVISE with page size 0 may cause segment page sizes to be demoted
6325885 map_pgszstk() uses p->p_brkpageszc rather than p->p_stkpageszc
6371967 assign large pages to anon segment created using mmap /dev/zero
6483208 unify and cleanup OOB (out of the box) large pagesize selection code
6483216 use intermediate pagesizes to map the beginning of bss/heap and stack when it may help performance
6483226 bss size is not properly taken into account by LP OOB policy at exec() time
6483230 grow_internal() doesn't properly align stack bottom for large pages
6483231 memcntl.c: ASSERT(IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz));
6483233 provide a mechanism to enable the use of 32M text pages on OPL by default
6485171 memcntl() shouldn't silently fail when stack space is unavailable with requested pagesize
line wrap: on
line diff
--- a/usr/src/uts/common/exec/aout/aout.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/common/exec/aout/aout.c Thu Oct 26 16:44:53 2006 -0700 @@ -226,6 +226,7 @@ edp.ux_bsize, edp.ux_doffset, dataprot, pagedata, 0)) goto done; + exenv.ex_bssbase = (caddr_t)edp.ux_datorg; exenv.ex_brkbase = (caddr_t)edp.ux_datorg; exenv.ex_brksize = edp.ux_dsize + edp.ux_bsize; exenv.ex_magic = edp.ux_mag;
--- a/usr/src/uts/common/exec/elf/elf.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/common/exec/elf/elf.c Thu Oct 26 16:44:53 2006 -0700 @@ -1096,6 +1096,7 @@ off_t offset; int hsize = ehdr->e_phentsize; caddr_t mintmp = (caddr_t)-1; + extern int use_brk_lpg; if (ehdr->e_type == ET_DYN) { /* @@ -1145,47 +1146,41 @@ page = 0; } + /* + * Set the heap pagesize for OOB when the bss size + * is known and use_brk_lpg is not 0. + */ + if (brksize != NULL && use_brk_lpg && + zfodsz != 0 && phdr == dataphdrp && + (prot & PROT_WRITE)) { + size_t tlen = P2NPHASE((uintptr_t)addr + + phdr->p_filesz, PAGESIZE); + + if (zfodsz > tlen) { + curproc->p_brkpageszc = + page_szc(map_pgsz(MAPPGSZ_HEAP, + curproc, addr + phdr->p_filesz + + tlen, zfodsz - tlen, 0)); + } + } + if (curproc->p_brkpageszc != 0 && phdr == dataphdrp && (prot & PROT_WRITE)) { - /* - * segvn only uses large pages for segments - * that have the requested large page size - * aligned base and size. To insure the part - * of bss that starts at heap large page size - * boundary gets mapped by large pages create - * 2 bss segvn segments which is accomplished - * by calling execmap twice. First execmap - * will create the bss segvn segment that is - * before the large page boundary and it will - * be mapped with base pages. If bss start is - * already large page aligned only 1 bss - * segment will be created. The second bss - * segment's size is large page size aligned - * so that segvn uses large pages for that - * segment and it also makes the heap that - * starts right after bss to start at large - * page boundary. - */ uint_t szc = curproc->p_brkpageszc; size_t pgsz = page_get_pagesize(szc); - caddr_t zaddr = addr + phdr->p_filesz; - size_t zlen = P2NPHASE((uintptr_t)zaddr, pgsz); + caddr_t ebss = addr + phdr->p_memsz; + size_t extra_zfodsz; ASSERT(pgsz > PAGESIZE); + extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz); + if (error = execmap(vp, addr, phdr->p_filesz, - zlen, phdr->p_offset, prot, page, szc)) + zfodsz + extra_zfodsz, phdr->p_offset, + prot, page, szc)) goto bad; - if (zfodsz > zlen) { - zfodsz -= zlen; - zaddr += zlen; - zlen = P2ROUNDUP(zfodsz, pgsz); - if (error = execmap(vp, zaddr, 0, zlen, - phdr->p_offset, prot, page, szc)) - goto bad; - } if (brksize != NULL) - *brksize = zlen - zfodsz; + *brksize = extra_zfodsz; } else { if (error = execmap(vp, addr, phdr->p_filesz, zfodsz, phdr->p_offset, prot, page, 0))
--- a/usr/src/uts/common/os/exec.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/common/os/exec.c Thu Oct 26 16:44:53 2006 -0700 @@ -89,7 +89,6 @@ uint_t auxv_hwcap32 = 0; /* 32-bit version of auxv_hwcap */ #endif -int exec_lpg_disable = 0; #define PSUIDFLAGS (SNOCD|SUGID) /* @@ -1114,7 +1113,23 @@ error = ENOMEM; goto bad; } - crargs.szc = szc; + if (szc > 0) { + /* + * ASSERT alignment because the mapelfexec() + * caller for the szc > 0 case extended zfod + * so it's end is pgsz aligned. + */ + size_t pgsz = page_get_pagesize(szc); + ASSERT(IS_P2ALIGNED(zfodbase + zfodlen, pgsz)); + + if (IS_P2ALIGNED(zfodbase, pgsz)) { + crargs.szc = szc; + } else { + crargs.szc = AS_MAP_HEAP; + } + } else { + crargs.szc = AS_MAP_NO_LPOOB; + } if (error = as_map(p->p_as, (caddr_t)zfodbase, zfodlen, segvn_create, &crargs)) goto bad; @@ -1555,11 +1570,6 @@ return (0); } -#ifdef DEBUG -int mpss_brkpgszsel = 0; -int mpss_stkpgszsel = 0; -#endif - /* * Initialize a new user stack with the specified arguments and environment. * The initial user stack layout is as follows: @@ -1614,6 +1624,7 @@ rctl_entity_p_t e; struct as *as; + extern int use_stk_lpg; args->from_model = p->p_model; if (p->p_model == DATAMODEL_NATIVE) { @@ -1751,7 +1762,9 @@ p->p_brkbase = NULL; p->p_brksize = 0; + p->p_brkpageszc = 0; p->p_stksize = 0; + p->p_stkpageszc = 0; p->p_model = args->to_model; p->p_usrstack = usrstack; p->p_stkprot = args->stk_prot; @@ -1766,51 +1779,14 @@ e.rcep_t = RCENTITY_PROCESS; rctl_set_reset(p->p_rctls, p, &e); - if (exec_lpg_disable == 0) { -#ifdef DEBUG - uint_t pgsizes = page_num_pagesizes(); - uint_t szc; -#endif - p->p_brkpageszc = args->brkpageszc; - p->p_stkpageszc = args->stkpageszc; - - if (p->p_brkpageszc == 0) { - p->p_brkpageszc = page_szc(map_pgsz(MAPPGSZ_HEAP, - p, 0, 0, NULL)); - } - if (p->p_stkpageszc == 0) { - p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK, - p, 0, 0, NULL)); - } + /* Too early to call map_pgsz for the heap */ + if (use_stk_lpg) { + p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK, p, 0, 0, 0)); + } -#ifdef DEBUG - if (mpss_brkpgszsel != 0) { - if (mpss_brkpgszsel == -1) { - szc = ((uint_t)gethrtime() >> 8) % pgsizes; - } else { - szc = mpss_brkpgszsel % pgsizes; - } - p->p_brkpageszc = szc; - } - - if (mpss_stkpgszsel != 0) { - if (mpss_stkpgszsel == -1) { - szc = ((uint_t)gethrtime() >> 7) % pgsizes; - } else { - szc = mpss_stkpgszsel % pgsizes; - } - p->p_stkpageszc = szc; - } - -#endif - mutex_enter(&p->p_lock); - p->p_flag |= SAUTOLPG; /* kernel controls page sizes */ - mutex_exit(&p->p_lock); - - } else { - p->p_brkpageszc = 0; - p->p_stkpageszc = 0; - } + mutex_enter(&p->p_lock); + p->p_flag |= SAUTOLPG; /* kernel controls page sizes */ + mutex_exit(&p->p_lock); exec_set_sp(size);
--- a/usr/src/uts/common/os/grow.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/common/os/grow.c Thu Oct 26 16:44:53 2006 -0700 @@ -60,7 +60,6 @@ int use_brk_lpg = 1; int use_stk_lpg = 1; -int use_zmap_lpg = 1; static int brk_lpg(caddr_t nva); static int grow_lpg(caddr_t sp); @@ -96,12 +95,11 @@ { struct proc *p = curproc; size_t pgsz, len; - caddr_t addr; + caddr_t addr, brkend; caddr_t bssbase = p->p_bssbase; caddr_t brkbase = p->p_brkbase; int oszc, szc; int err; - int remap = 0; oszc = p->p_brkpageszc; @@ -115,7 +113,7 @@ len = nva - bssbase; - pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, &remap); + pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0); szc = page_szc(pgsz); /* @@ -133,28 +131,6 @@ return (err); } - if (remap == 0) { - /* - * Map from the current brk end up to the new page size - * alignment using the current page size. - */ - addr = brkbase + p->p_brksize; - addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); - if (addr < nva) { - err = brk_internal(addr, oszc); - /* - * In failure case, try again if oszc is not base page - * size, then return err. - */ - if (err != 0) { - if (oszc != 0) { - err = brk_internal(nva, 0); - } - return (err); - } - } - } - err = brk_internal(nva, szc); /* If using szc failed, map with base page size and return. */ if (err != 0) { @@ -164,16 +140,18 @@ return (err); } - if (remap != 0) { - /* - * Round up brk base to a large page boundary and remap - * anything in the segment already faulted in beyond that - * point. - */ - addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); - len = (brkbase + p->p_brksize) - addr; - /* advisory, so ignore errors */ + /* + * Round up brk base to a large page boundary and remap + * anything in the segment already faulted in beyond that + * point. + */ + addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); + brkend = brkbase + p->p_brksize; + len = brkend - addr; + /* Check that len is not negative. Update page size code for heap. */ + if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) { (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); + p->p_brkpageszc = szc; } ASSERT(err == 0); @@ -272,8 +250,26 @@ /* * Add new zfod mapping to extend UNIX data segment + * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies + * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate + * page sizes if ova is not aligned to szc's pgsz. */ - crargs.szc = szc; + if (szc > 0) { + caddr_t rbss; + + rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, + pgsz); + if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) { + crargs.szc = p->p_brkpageszc ? p->p_brkpageszc : + AS_MAP_NO_LPOOB; + } else if (ova == rbss) { + crargs.szc = szc; + } else { + crargs.szc = AS_MAP_HEAP; + } + } else { + crargs.szc = AS_MAP_NO_LPOOB; + } crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP; error = as_map(as, ova, (size_t)(nva - ova), segvn_create, &crargs); @@ -288,7 +284,6 @@ (void) as_unmap(as, nva, (size_t)(ova - nva)); } p->p_brksize = size; - p->p_brkpageszc = szc; return (0); } @@ -300,6 +295,9 @@ grow(caddr_t sp) { struct proc *p = curproc; + struct as *as = p->p_as; + size_t oldsize = p->p_stksize; + size_t newsize; int err; /* @@ -307,13 +305,24 @@ * This also serves as the lock protecting p_stksize * and p_stkpageszc. */ - as_rangelock(p->p_as); + as_rangelock(as); if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) { err = grow_lpg(sp); } else { err = grow_internal(sp, p->p_stkpageszc); } - as_rangeunlock(p->p_as); + as_rangeunlock(as); + + if (err == 0 && (newsize = p->p_stksize) > oldsize) { + ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE)); + ASSERT(IS_P2ALIGNED(newsize, PAGESIZE)); + /* + * Set up translations so the process doesn't have to fault in + * the stack pages we just gave it. + */ + (void) as_fault(as->a_hat, as, p->p_usrstack - newsize, + newsize - oldsize, F_INVAL, S_WRITE); + } return ((err == 0 ? 1 : 0)); } @@ -328,15 +337,15 @@ struct proc *p = curproc; size_t pgsz; size_t len, newsize; - caddr_t addr, oldsp; + caddr_t addr, saddr; + caddr_t growend; int oszc, szc; int err; - int remap = 0; newsize = p->p_usrstack - sp; oszc = p->p_stkpageszc; - pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, &remap); + pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0); szc = page_szc(pgsz); /* @@ -357,30 +366,8 @@ /* * We've grown sufficiently to switch to a new page size. - * If we're not going to remap the whole segment with the new - * page size, split the grow into two operations: map to the new - * page size alignment boundary with the existing page size, then - * map the rest with the new page size. + * So we are going to remap the whole segment with the new page size. */ - err = 0; - if (remap == 0) { - oldsp = p->p_usrstack - p->p_stksize; - addr = (caddr_t)P2ALIGN((uintptr_t)oldsp, pgsz); - if (addr > sp) { - err = grow_internal(addr, oszc); - /* - * In this case, grow with oszc failed, so grow all the - * way to sp with base page size. - */ - if (err != 0) { - if (oszc != 0) { - err = grow_internal(sp, 0); - } - return (err); - } - } - } - err = grow_internal(sp, szc); /* The grow with szc failed, so fall back to base page size. */ if (err != 0) { @@ -390,22 +377,21 @@ return (err); } - if (remap) { - /* - * Round up stack pointer to a large page boundary and remap - * any pgsz pages in the segment already faulted in beyond that - * point. - */ - addr = p->p_usrstack - p->p_stksize; - addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); - len = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz) - addr; - /* advisory, so ignore errors */ + /* + * Round up stack pointer to a large page boundary and remap + * any pgsz pages in the segment already faulted in beyond that + * point. + */ + saddr = p->p_usrstack - p->p_stksize; + addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz); + growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz); + len = growend - addr; + /* Check that len is not negative. Update page size code for stack. */ + if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) { (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); + p->p_stkpageszc = szc; } - /* Update page size code for stack. */ - p->p_stkpageszc = szc; - ASSERT(err == 0); return (err); /* should always be 0 */ } @@ -418,8 +404,7 @@ grow_internal(caddr_t sp, uint_t growszc) { struct proc *p = curproc; - struct as *as = p->p_as; - size_t newsize = p->p_usrstack - sp; + size_t newsize; size_t oldsize; int error; size_t pgsz; @@ -427,6 +412,7 @@ struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); ASSERT(sp < p->p_usrstack); + sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE); /* * grow to growszc alignment but use current p->p_stkpageszc for @@ -437,7 +423,7 @@ if ((szc = growszc) != 0) { pgsz = page_get_pagesize(szc); ASSERT(pgsz > PAGESIZE); - newsize = P2ROUNDUP(newsize, pgsz); + newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz); if (newsize > (size_t)p->p_stk_ctl) { szc = 0; pgsz = PAGESIZE; @@ -445,6 +431,7 @@ } } else { pgsz = PAGESIZE; + newsize = p->p_usrstack - sp; } if (newsize > (size_t)p->p_stk_ctl) { @@ -455,7 +442,6 @@ } oldsize = p->p_stksize; - newsize = P2ROUNDUP(newsize, pgsz); ASSERT(P2PHASE(oldsize, PAGESIZE) == 0); if (newsize <= oldsize) { /* prevent the stack from shrinking */ @@ -466,13 +452,31 @@ crargs.prot &= ~PROT_EXEC; } /* - * extend stack with the p_stkpageszc. growszc is different than - * p_stkpageszc only on a memcntl to increase the stack pagesize. + * extend stack with the proposed new growszc, which is different + * than p_stkpageszc only on a memcntl to increase the stack pagesize. + * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via + * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes + * if not aligned to szc's pgsz. */ - crargs.szc = p->p_stkpageszc; + if (szc > 0) { + caddr_t oldsp = p->p_usrstack - oldsize; + caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, + pgsz); + + if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) { + crargs.szc = p->p_stkpageszc ? p->p_stkpageszc : + AS_MAP_NO_LPOOB; + } else if (oldsp == austk) { + crargs.szc = szc; + } else { + crargs.szc = AS_MAP_STACK; + } + } else { + crargs.szc = AS_MAP_NO_LPOOB; + } crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN; - if ((error = as_map(as, p->p_usrstack - newsize, newsize - oldsize, + if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize, segvn_create, &crargs)) != 0) { if (error == EAGAIN) { cmn_err(CE_WARN, "Sorry, no swap space to grow stack " @@ -481,15 +485,6 @@ return (error); } p->p_stksize = newsize; - - - /* - * Set up translations so the process doesn't have to fault in - * the stack pages we just gave it. - */ - (void) as_fault(as->a_hat, as, - p->p_usrstack - newsize, newsize - oldsize, F_INVAL, S_WRITE); - return (0); } @@ -500,13 +495,7 @@ zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags, offset_t pos) { - struct segvn_crargs a, b; - struct proc *p = curproc; - int err; - size_t pgsz; - size_t l0, l1, l2, l3, l4; /* 0th through 5th chunks */ - caddr_t ruaddr, ruaddr0; /* rounded up addresses */ - extern size_t auto_lpg_va_default; + struct segvn_crargs vn_a; if (((PROT_ALL & uprot) != uprot)) return (EACCES); @@ -549,130 +538,18 @@ * Use the seg_vn segment driver; passing in the NULL amp * gives the desired "cloning" effect. */ - a.vp = NULL; - a.offset = 0; - a.type = flags & MAP_TYPE; - a.prot = uprot; - a.maxprot = PROT_ALL; - a.flags = flags & ~MAP_TYPE; - a.cred = CRED(); - a.amp = NULL; - a.szc = 0; - a.lgrp_mem_policy_flags = 0; - - /* - * Call arch-specific map_pgsz routine to pick best page size to map - * this segment, and break the mapping up into parts if required. - * - * The parts work like this: - * - * addr --------- - * | | l0 - * --------- - * | | l1 - * --------- - * | | l2 - * --------- - * | | l3 - * --------- - * | | l4 - * --------- - * addr+len - * - * Starting from the middle, l2 is the number of bytes mapped by the - * selected large page. l1 and l3 are mapped by auto_lpg_va_default - * page size pages, and l0 and l4 are mapped by base page size pages. - * If auto_lpg_va_default is the base page size, then l0 == l4 == 0. - * If the requested address or length are aligned to the selected large - * page size, l1 or l3 may also be 0. - */ - if (use_zmap_lpg && a.type == MAP_PRIVATE) { - - pgsz = map_pgsz(MAPPGSZ_VA, p, *addrp, len, NULL); - if (pgsz <= PAGESIZE || len < pgsz) { - return (as_map(as, *addrp, len, segvn_create, &a)); - } + vn_a.vp = NULL; + vn_a.offset = 0; + vn_a.type = flags & MAP_TYPE; + vn_a.prot = uprot; + vn_a.maxprot = PROT_ALL; + vn_a.flags = flags & ~MAP_TYPE; + vn_a.cred = CRED(); + vn_a.amp = NULL; + vn_a.szc = 0; + vn_a.lgrp_mem_policy_flags = 0; - ruaddr = (caddr_t)P2ROUNDUP((uintptr_t)*addrp, pgsz); - if (auto_lpg_va_default != MMU_PAGESIZE) { - ruaddr0 = (caddr_t)P2ROUNDUP((uintptr_t)*addrp, - auto_lpg_va_default); - l0 = ruaddr0 - *addrp; - } else { - l0 = 0; - ruaddr0 = *addrp; - } - l1 = ruaddr - ruaddr0; - l3 = P2PHASE(len - l0 - l1, pgsz); - if (auto_lpg_va_default == MMU_PAGESIZE) { - l4 = 0; - } else { - l4 = P2PHASE(l3, auto_lpg_va_default); - l3 -= l4; - } - l2 = len - l0 - l1 - l3 - l4; - - if (l0) { - b = a; - err = as_map(as, *addrp, l0, segvn_create, &b); - if (err) { - return (err); - } - } - - if (l1) { - b = a; - b.szc = page_szc(auto_lpg_va_default); - err = as_map(as, ruaddr0, l1, segvn_create, &b); - if (err) { - goto error1; - } - } - - if (l2) { - b = a; - b.szc = page_szc(pgsz); - err = as_map(as, ruaddr, l2, segvn_create, &b); - if (err) { - goto error2; - } - } - - if (l3) { - b = a; - b.szc = page_szc(auto_lpg_va_default); - err = as_map(as, ruaddr + l2, l3, segvn_create, &b); - if (err) { - goto error3; - } - } - if (l4) { - err = as_map(as, ruaddr + l2 + l3, l4, segvn_create, - &a); - if (err) { -error3: - if (l3) { - (void) as_unmap(as, ruaddr + l2, l3); - } -error2: - if (l2) { - (void) as_unmap(as, ruaddr, l2); - } -error1: - if (l1) { - (void) as_unmap(as, ruaddr0, l1); - } - if (l0) { - (void) as_unmap(as, *addrp, l0); - } - return (err); - } - } - - return (0); - } - - return (as_map(as, *addrp, len, segvn_create, &a)); + return (as_map(as, *addrp, len, segvn_create, &vn_a)); } static int
--- a/usr/src/uts/common/os/shm.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/common/os/shm.c Thu Oct 26 16:44:53 2006 -0700 @@ -341,8 +341,7 @@ * [D]ISM segment, then use the previously selected page size. */ if (!isspt(sp)) { - share_size = map_pgsz(MAPPGSZ_ISM, - pp, addr, size, NULL); + share_size = map_pgsz(MAPPGSZ_ISM, pp, addr, size, 0); if (share_size == 0) { as_rangeunlock(as); error = EINVAL;
--- a/usr/src/uts/common/sys/exec.h Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/common/sys/exec.h Thu Oct 26 16:44:53 2006 -0700 @@ -101,8 +101,6 @@ size_t from_ptrsize; size_t ncargs; struct execsw *execswp; - uint_t stkpageszc; - uint_t brkpageszc; uintptr_t entry; uintptr_t thrptr; char *emulator;
--- a/usr/src/uts/common/sys/vmsystm.h Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/common/sys/vmsystm.h Thu Oct 26 16:44:53 2006 -0700 @@ -105,6 +105,14 @@ #define MAPPGSZ_HEAP 0x04 #define MAPPGSZ_ISM 0x08 +/* + * Flags for map_pgszcvec + */ +#define MAPPGSZC_SHM 0x01 +#define MAPPGSZC_PRIVM 0x02 +#define MAPPGSZC_STACK 0x04 +#define MAPPGSZC_HEAP 0x08 + struct as; struct page; struct anon; @@ -118,10 +126,10 @@ int dir); extern int valid_usr_range(caddr_t, size_t, uint_t, struct as *, caddr_t); extern int useracc(void *, size_t, int); -extern size_t map_pgsz(int maptype, struct proc *p, caddr_t addr, - size_t len, int *remap); -extern uint_t map_execseg_pgszcvec(int, caddr_t, size_t); -extern uint_t map_shm_pgszcvec(caddr_t, size_t, uintptr_t); +extern size_t map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, + int memcntl); +extern uint_t map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, + int type, int memcntl); extern void map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags); extern int map_addr_vacalign_check(caddr_t, u_offset_t);
--- a/usr/src/uts/common/syscall/memcntl.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/common/syscall/memcntl.c Thu Oct 26 16:44:53 2006 -0700 @@ -199,31 +199,36 @@ else type = MAPPGSZ_STK; - pgsz = map_pgsz(type, p, 0, 0, NULL); + pgsz = map_pgsz(type, p, 0, 0, 1); } } else { /* + * addr and len must be valid for range specified. + */ + if (valid_usr_range(addr, len, 0, as, + as->a_userlimit) != RANGE_OKAY) { + return (set_errno(ENOMEM)); + } + /* * Note that we don't disable automatic large page * selection for anon segments based on use of * memcntl(). */ if (pgsz == 0) { - pgsz = map_pgsz(MAPPGSZ_VA, p, addr, len, - NULL); + error = as_set_default_lpsize(as, addr, len); + if (error) { + (void) set_errno(error); + } + return (error); } /* * addr and len must be prefered page size aligned - * and valid for range specified. */ if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz)) { return (set_errno(EINVAL)); } - if (valid_usr_range(addr, len, 0, as, - as->a_userlimit) != RANGE_OKAY) { - return (set_errno(ENOMEM)); - } } szc = mem_getpgszc(pgsz); @@ -257,10 +262,17 @@ return (set_errno(error)); } } + /* + * It is possible for brk_internal to silently fail to + * promote the heap size, so don't panic or ASSERT. + */ + if (!IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)) { + as_rangeunlock(as); + return (set_errno(ENOMEM)); + } oszc = p->p_brkpageszc; p->p_brkpageszc = szc; - ASSERT(IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)); addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); len = (p->p_brkbase + p->p_brksize) - addr; @@ -292,17 +304,24 @@ } if (szc > p->p_stkpageszc) { - error = grow_internal(p->p_usrstack - - p->p_stksize, szc); + error = grow_internal(p->p_usrstack - + p->p_stksize, szc); if (error) { as_rangeunlock(as); return (set_errno(error)); } } + /* + * It is possible for grow_internal to silently fail to + * promote the stack size, so don't panic or ASSERT. + */ + if (!IS_P2ALIGNED(p->p_usrstack - p->p_stksize, pgsz)) { + as_rangeunlock(as); + return (set_errno(ENOMEM)); + } oszc = p->p_stkpageszc; p->p_stkpageszc = szc; - ASSERT(IS_P2ALIGNED(p->p_usrstack, pgsz)); addr = p->p_usrstack - p->p_stksize; len = p->p_stksize;
--- a/usr/src/uts/common/vm/as.h Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/common/vm/as.h Thu Oct 26 16:44:53 2006 -0700 @@ -153,6 +153,13 @@ (((as)->a_userlimit > (caddr_t)UINT32_MAX) ? 1 : 0) /* + * Flags for as_map/as_map_ansegs + */ +#define AS_MAP_NO_LPOOB ((uint_t)-1) +#define AS_MAP_HEAP ((uint_t)-2) +#define AS_MAP_STACK ((uint_t)-3) + +/* * The as_callback is the basic structure which supports the ability to * inform clients of specific events pertaining to address space management. * A user calls as_add_callback to register an address space callback @@ -274,6 +281,7 @@ size_t size, enum seg_rw rw); int as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc, boolean_t wait); +int as_set_default_lpsize(struct as *as, caddr_t addr, size_t size); void as_setwatch(struct as *as); void as_clearwatch(struct as *as); int as_getmemid(struct as *, caddr_t, memid_t *);
--- a/usr/src/uts/common/vm/hat.h Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/common/vm/hat.h Thu Oct 26 16:44:53 2006 -0700 @@ -345,7 +345,7 @@ * hat layer data structures. This flag forces hat layer * to tap its reserves in order to prevent infinite * recursion. - * HAT_LOAD_AUTOLPG Get MMU specific disable_auto_large_pages + * HAT_LOAD_TEXT A flag to hat_memload() to indicate loading text pages. */ /* @@ -362,7 +362,15 @@ #define HAT_RELOAD_SHARE 0x100 #define HAT_NO_KALLOC 0x200 #define HAT_LOAD_TEXT 0x400 -#define HAT_LOAD_AUTOLPG 0x800 + +/* + * Flags for initializing disable_*large_pages. + * + * HAT_AUTO_TEXT Get MMU specific disable_auto_text_large_pages + * HAT_AUTO_DATA Get MMU specific disable_auto_data_large_pages + */ +#define HAT_AUTO_TEXT 0x800 +#define HAT_AUTO_DATA 0x1000 /* * Attributes for hat_memload/hat_devload/hat_*attr
--- a/usr/src/uts/common/vm/seg_vn.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/common/vm/seg_vn.c Thu Oct 26 16:44:53 2006 -0700 @@ -395,7 +395,7 @@ a->flags &= ~MAP_NORESERVE; if (a->szc != 0) { - if (segvn_lpg_disable != 0 || + if (segvn_lpg_disable != 0 || (a->szc == AS_MAP_NO_LPOOB) || (a->amp != NULL && a->type == MAP_PRIVATE) || (a->flags & MAP_NORESERVE) || seg->s_as == &kas) { a->szc = 0; @@ -5270,8 +5270,9 @@ err = segvn_demote_range(seg, addr, len, SDR_END, 0); } else { - uint_t szcvec = map_shm_pgszcvec(seg->s_base, - pgsz, (uintptr_t)seg->s_base); + uint_t szcvec = map_pgszcvec(seg->s_base, + pgsz, (uintptr_t)seg->s_base, + (svd->flags & MAP_TEXT), MAPPGSZC_SHM, 0); err = segvn_demote_range(seg, addr, len, SDR_END, szcvec); } @@ -6267,7 +6268,8 @@ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); - return (svd->type | (svd->flags & MAP_NORESERVE)); + return (svd->type | (svd->flags & (MAP_NORESERVE | MAP_TEXT | + MAP_INITDATA))); } /*ARGSUSED*/
--- a/usr/src/uts/common/vm/seg_vn.h Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/common/vm/seg_vn.h Thu Oct 26 16:44:53 2006 -0700 @@ -137,16 +137,18 @@ #define SEGVN_ZFOD_ARGS(prot, max) \ { NULL, NULL, 0, MAP_PRIVATE, prot, max, 0, NULL, 0, 0 } -#define AS_MAP_VNSEGS_USELPGS(crfp, argsp) \ +#define AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp) \ ((crfp) == (int (*)())segvn_create && \ (((struct segvn_crargs *)(argsp))->flags & \ (MAP_TEXT | MAP_INITDATA)) && \ - ((struct segvn_crargs *)(argsp))->vp != NULL && \ - ((struct segvn_crargs *)(argsp))->amp == NULL) + ((struct segvn_crargs *)(argsp))->szc == 0 && \ + ((struct segvn_crargs *)(argsp))->vp != NULL) -#define AS_MAP_SHAMP(crfp, argsp) \ +#define AS_MAP_CHECK_ANON_LPOOB(crfp, argsp) \ ((crfp) == (int (*)())segvn_create && \ - ((struct segvn_crargs *)(argsp))->type == MAP_SHARED && \ + (((struct segvn_crargs *)(argsp))->szc == 0 || \ + ((struct segvn_crargs *)(argsp))->szc == AS_MAP_HEAP || \ + ((struct segvn_crargs *)(argsp))->szc == AS_MAP_STACK) && \ ((struct segvn_crargs *)(argsp))->vp == NULL) extern void segvn_init(void);
--- a/usr/src/uts/common/vm/vm_as.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/common/vm/vm_as.c Thu Oct 26 16:44:53 2006 -0700 @@ -1573,8 +1573,10 @@ as_map_vnsegs(struct as *as, caddr_t addr, size_t size, int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) { - int text = vn_a->flags & MAP_TEXT; - uint_t szcvec = map_execseg_pgszcvec(text, addr, size); + uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA); + int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM; + uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags, + type, 0); int error; struct seg *seg; struct vattr va; @@ -1616,7 +1618,8 @@ save_size = size; size = va.va_size - (vn_a->offset & PAGEMASK); size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t); - szcvec = map_execseg_pgszcvec(text, addr, size); + szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags, + type, 0); if (szcvec <= 1) { size = save_size; goto again; @@ -1637,14 +1640,32 @@ return (0); } +/* + * as_map_ansegs: shared or private anonymous memory. Note that the flags + * passed to map_pgszvec cannot be MAP_INITDATA, for anon. + */ static int -as_map_sham(struct as *as, caddr_t addr, size_t size, +as_map_ansegs(struct as *as, caddr_t addr, size_t size, int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) { - uint_t szcvec = map_shm_pgszcvec(addr, size, - vn_a->amp == NULL ? (uintptr_t)addr : - (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE)); - + uint_t szcvec; + uchar_t type; + + ASSERT(vn_a->type == MAP_SHARED || vn_a->type == MAP_PRIVATE); + if (vn_a->type == MAP_SHARED) { + type = MAPPGSZC_SHM; + } else if (vn_a->type == MAP_PRIVATE) { + if (vn_a->szc == AS_MAP_HEAP) { + type = MAPPGSZC_HEAP; + } else if (vn_a->szc == AS_MAP_STACK) { + type = MAPPGSZC_STACK; + } else { + type = MAPPGSZC_PRIVM; + } + } + szcvec = map_pgszcvec(addr, size, vn_a->amp == NULL ? + (uintptr_t)addr : (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE), + (vn_a->flags & MAP_TEXT), type, 0); ASSERT(AS_WRITE_HELD(as, &as->a_lock)); ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); ASSERT(IS_P2ALIGNED(size, PAGESIZE)); @@ -1669,6 +1690,7 @@ caddr_t raddr; /* rounded down addr */ size_t rsize; /* rounded up size */ int error; + int unmap = 0; struct proc *p = curproc; raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); @@ -1695,15 +1717,19 @@ return (ENOMEM); } - if (AS_MAP_VNSEGS_USELPGS(crfp, argsp) || AS_MAP_SHAMP(crfp, argsp)) { - int unmap = 0; - if (AS_MAP_SHAMP(crfp, argsp)) { - error = as_map_sham(as, raddr, rsize, crfp, - (struct segvn_crargs *)argsp, &unmap); - } else { - error = as_map_vnsegs(as, raddr, rsize, crfp, - (struct segvn_crargs *)argsp, &unmap); + if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) { + error = as_map_vnsegs(as, raddr, rsize, crfp, + (struct segvn_crargs *)argsp, &unmap); + if (error != 0) { + AS_LOCK_EXIT(as, &as->a_lock); + if (unmap) { + (void) as_unmap(as, addr, size); + } + return (error); } + } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) { + error = as_map_ansegs(as, raddr, rsize, crfp, + (struct segvn_crargs *)argsp, &unmap); if (error != 0) { AS_LOCK_EXIT(as, &as->a_lock); if (unmap) { @@ -2741,6 +2767,377 @@ } /* + * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments + * in its chunk where s_szc is less than the szc we want to set. + */ +static int +as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc, + int *retry) +{ + struct seg *seg; + size_t ssize; + int error; + + seg = as_segat(as, raddr); + if (seg == NULL) { + panic("as_iset3_default_lpsize: no seg"); + } + + for (; rsize != 0; rsize -= ssize, raddr += ssize) { + if (raddr >= seg->s_base + seg->s_size) { + seg = AS_SEGNEXT(as, seg); + if (seg == NULL || raddr != seg->s_base) { + panic("as_iset3_default_lpsize: as changed"); + } + } + if ((raddr + rsize) > (seg->s_base + seg->s_size)) { + ssize = seg->s_base + seg->s_size - raddr; + } else { + ssize = rsize; + } + + if (szc > seg->s_szc) { + error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc); + /* Only retry on EINVAL segments that have no vnode. */ + if (error == EINVAL) { + vnode_t *vp = NULL; + if ((SEGOP_GETTYPE(seg, raddr) & MAP_SHARED) && + (SEGOP_GETVP(seg, raddr, &vp) != 0 || + vp == NULL)) { + *retry = 1; + } else { + *retry = 0; + } + } + if (error) { + return (error); + } + } + } + return (0); +} + +/* + * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the + * pagesize on each segment in its range, but if any fails with EINVAL, + * then it reduces the pagesizes to the next size in the bitmap and + * retries as_iset3_default_lpsize(). The reason why the code retries + * smaller allowed sizes on EINVAL is because (a) the anon offset may not + * match the bigger sizes, and (b) it's hard to get this offset (to begin + * with) to pass to map_pgszcvec(). + */ +static int +as_iset2_default_lpsize(struct as *as, caddr_t addr, size_t size, uint_t szc, + uint_t szcvec) +{ + int error; + int retry; + + for (;;) { + error = as_iset3_default_lpsize(as, addr, size, szc, &retry); + if (error == EINVAL && retry) { + szcvec &= ~(1 << szc); + if (szcvec <= 1) { + return (EINVAL); + } + szc = highbit(szcvec) - 1; + } else { + return (error); + } + } +} + +/* + * as_iset1_default_lpsize() breaks its chunk into areas where existing + * segments have a smaller szc than we want to set. For each such area, + * it calls as_iset2_default_lpsize() + */ +static int +as_iset1_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc, + uint_t szcvec) +{ + struct seg *seg; + size_t ssize; + caddr_t setaddr = raddr; + size_t setsize = 0; + int set; + int error; + + ASSERT(AS_WRITE_HELD(as, &as->a_lock)); + + seg = as_segat(as, raddr); + if (seg == NULL) { + panic("as_iset1_default_lpsize: no seg"); + } + if (seg->s_szc < szc) { + set = 1; + } else { + set = 0; + } + + for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) { + if (raddr >= seg->s_base + seg->s_size) { + seg = AS_SEGNEXT(as, seg); + if (seg == NULL || raddr != seg->s_base) { + panic("as_iset1_default_lpsize: as changed"); + } + if (seg->s_szc >= szc && set) { + ASSERT(setsize != 0); + error = as_iset2_default_lpsize(as, + setaddr, setsize, szc, szcvec); + if (error) { + return (error); + } + set = 0; + } else if (seg->s_szc < szc && !set) { + setaddr = raddr; + setsize = 0; + set = 1; + } + } + if ((raddr + rsize) > (seg->s_base + seg->s_size)) { + ssize = seg->s_base + seg->s_size - raddr; + } else { + ssize = rsize; + } + } + error = 0; + if (set) { + ASSERT(setsize != 0); + error = as_iset2_default_lpsize(as, setaddr, setsize, + szc, szcvec); + } + return (error); +} + +/* + * as_iset_default_lpsize() breaks its chunk according to the size code bitmap + * returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each + * chunk to as_iset1_default_lpsize(). + */ +static int +as_iset_default_lpsize(struct as *as, caddr_t addr, size_t size, int flags, + int type) +{ + int rtype = (type & MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM; + uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, + flags, rtype, 1); + uint_t szc; + uint_t nszc; + int error; + caddr_t a; + caddr_t eaddr; + size_t segsize; + size_t pgsz; + uint_t save_szcvec; + + ASSERT(AS_WRITE_HELD(as, &as->a_lock)); + ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); + ASSERT(IS_P2ALIGNED(size, PAGESIZE)); + + szcvec &= ~1; + if (szcvec <= 1) { /* skip if base page size */ + return (0); + } + + /* Get the pagesize of the first larger page size. */ + szc = lowbit(szcvec) - 1; + pgsz = page_get_pagesize(szc); + eaddr = addr + size; + addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); + eaddr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); + + save_szcvec = szcvec; + szcvec >>= (szc + 1); + nszc = szc; + while (szcvec) { + if ((szcvec & 0x1) == 0) { + nszc++; + szcvec >>= 1; + continue; + } + nszc++; + pgsz = page_get_pagesize(nszc); + a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); + if (a != addr) { + ASSERT(szc > 0); + ASSERT(a < eaddr); + segsize = a - addr; + error = as_iset1_default_lpsize(as, addr, segsize, szc, + save_szcvec); + if (error) { + return (error); + } + addr = a; + } + szc = nszc; + szcvec >>= 1; + } + + ASSERT(addr < eaddr); + szcvec = save_szcvec; + while (szcvec) { + a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); + ASSERT(a >= addr); + if (a != addr) { + ASSERT(szc > 0); + segsize = a - addr; + error = as_iset1_default_lpsize(as, addr, segsize, szc, + save_szcvec); + if (error) { + return (error); + } + addr = a; + } + szcvec &= ~(1 << szc); + if (szcvec) { + szc = highbit(szcvec) - 1; + pgsz = page_get_pagesize(szc); + } + } + ASSERT(addr == eaddr); + + return (0); +} + +/* + * Set the default large page size for the range. Called via memcntl with + * page size set to 0. as_set_default_lpsize breaks the range down into + * chunks with the same type/flags, ignores-non segvn segments, and passes + * each chunk to as_iset_default_lpsize(). + */ +int +as_set_default_lpsize(struct as *as, caddr_t addr, size_t size) +{ + struct seg *seg; + caddr_t raddr; + size_t rsize; + size_t ssize; + int rtype, rflags; + int stype, sflags; + int error; + caddr_t setaddr; + size_t setsize; + int segvn; + + if (size == 0) + return (0); + + AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); +again: + error = 0; + + raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); + rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - + (size_t)raddr; + + if (raddr + rsize < raddr) { /* check for wraparound */ + AS_LOCK_EXIT(as, &as->a_lock); + return (ENOMEM); + } + as_clearwatchprot(as, raddr, rsize); + seg = as_segat(as, raddr); + if (seg == NULL) { + as_setwatch(as); + AS_LOCK_EXIT(as, &as->a_lock); + return (ENOMEM); + } + if (seg->s_ops == &segvn_ops) { + rtype = SEGOP_GETTYPE(seg, addr); + rflags = rtype & (MAP_TEXT | MAP_INITDATA); + rtype = rtype & (MAP_SHARED | MAP_PRIVATE); + segvn = 1; + } else { + segvn = 0; + } + setaddr = raddr; + setsize = 0; + + for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) { + if (raddr >= (seg->s_base + seg->s_size)) { + seg = AS_SEGNEXT(as, seg); + if (seg == NULL || raddr != seg->s_base) { + error = ENOMEM; + break; + } + if (seg->s_ops == &segvn_ops) { + stype = SEGOP_GETTYPE(seg, raddr); + sflags = stype & (MAP_TEXT | MAP_INITDATA); + stype &= (MAP_SHARED | MAP_PRIVATE); + if (segvn && (rflags != sflags || + rtype != stype)) { + /* + * The next segment is also segvn but + * has different flags and/or type. + */ + ASSERT(setsize != 0); + error = as_iset_default_lpsize(as, + setaddr, setsize, rflags, rtype); + if (error) { + break; + } + rflags = sflags; + rtype = stype; + setaddr = raddr; + setsize = 0; + } else if (!segvn) { + rflags = sflags; + rtype = stype; + setaddr = raddr; + setsize = 0; + segvn = 1; + } + } else if (segvn) { + /* The next segment is not segvn. */ + ASSERT(setsize != 0); + error = as_iset_default_lpsize(as, + setaddr, setsize, rflags, rtype); + if (error) { + break; + } + segvn = 0; + } + } + if ((raddr + rsize) > (seg->s_base + seg->s_size)) { + ssize = seg->s_base + seg->s_size - raddr; + } else { + ssize = rsize; + } + } + if (error == 0 && segvn) { + /* The last chunk when rsize == 0. */ + ASSERT(setsize != 0); + error = as_iset_default_lpsize(as, setaddr, setsize, + rflags, rtype); + } + + if (error == IE_RETRY) { + goto again; + } else if (error == IE_NOMEM) { + error = EAGAIN; + } else if (error == ENOTSUP) { + error = EINVAL; + } else if (error == EAGAIN) { + mutex_enter(&as->a_contents); + if (AS_ISUNMAPWAIT(as) == 0) { + cv_broadcast(&as->a_cv); + } + AS_SETUNMAPWAIT(as); + AS_LOCK_EXIT(as, &as->a_lock); + while (AS_ISUNMAPWAIT(as)) { + cv_wait(&as->a_cv, &as->a_contents); + } + mutex_exit(&as->a_contents); + AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); + goto again; + } + + as_setwatch(as); + AS_LOCK_EXIT(as, &as->a_lock); + return (error); +} + +/* * Setup all of the uninitialized watched pages that we can. */ void
--- a/usr/src/uts/i86pc/os/startup.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/i86pc/os/startup.c Thu Oct 26 16:44:53 2006 -0700 @@ -1475,8 +1475,7 @@ extern void hat_kern_setup(void); pgcnt_t pages_left; - extern int exec_lpg_disable, use_brk_lpg, use_stk_lpg, use_zmap_lpg; - extern pgcnt_t auto_lpg_min_physmem; + extern int use_brk_lpg, use_stk_lpg; PRM_POINT("startup_vm() starting..."); @@ -1729,11 +1728,21 @@ * disable automatic large pages for small memory systems or * when the disable flag is set. */ - if (physmem < auto_lpg_min_physmem || auto_lpg_disable) { - exec_lpg_disable = 1; + if (!auto_lpg_disable && mmu.max_page_level > 0) { + max_uheap_lpsize = LEVEL_SIZE(1); + max_ustack_lpsize = LEVEL_SIZE(1); + max_privmap_lpsize = LEVEL_SIZE(1); + max_uidata_lpsize = LEVEL_SIZE(1); + max_utext_lpsize = LEVEL_SIZE(1); + max_shm_lpsize = LEVEL_SIZE(1); + } + if (physmem < privm_lpg_min_physmem || mmu.max_page_level == 0 || + auto_lpg_disable) { use_brk_lpg = 0; use_stk_lpg = 0; - use_zmap_lpg = 0; + } + if (mmu.max_page_level > 0) { + mcntl0_lpsize = LEVEL_SIZE(1); } PRM_POINT("Calling hat_init_finish()...");
--- a/usr/src/uts/i86pc/vm/vm_dep.h Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/i86pc/vm/vm_dep.h Thu Oct 26 16:44:53 2006 -0700 @@ -569,6 +569,29 @@ #define PGI_MT_RANGE (PGI_MT_RANGE0 | PGI_MT_RANGE16M | PGI_MT_RANGE4G) /* + * Maximum and default values for user heap, stack, private and shared + * anonymous memory, and user text and initialized data. + * Used by map_pgsz*() routines. + */ +extern size_t max_uheap_lpsize; +extern size_t default_uheap_lpsize; +extern size_t max_ustack_lpsize; +extern size_t default_ustack_lpsize; +extern size_t max_privmap_lpsize; +extern size_t max_uidata_lpsize; +extern size_t max_utext_lpsize; +extern size_t max_shm_lpsize; +extern size_t mcntl0_lpsize; + +/* + * Sanity control. Don't use large pages regardless of user + * settings if there's less than priv or shm_lpg_min_physmem memory installed. + * The units for this variable are 8K pages. + */ +extern pgcnt_t privm_lpg_min_physmem; +extern pgcnt_t shm_lpg_min_physmem; + +/* * hash as and addr to get a bin. */
--- a/usr/src/uts/i86pc/vm/vm_machdep.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/i86pc/vm/vm_machdep.c Thu Oct 26 16:44:53 2006 -0700 @@ -55,6 +55,7 @@ #include <sys/exec.h> #include <sys/exechdr.h> #include <sys/debug.h> +#include <sys/vmsystm.h> #include <vm/hat.h> #include <vm/as.h> @@ -122,39 +123,80 @@ /* How many page sizes the users can see */ uint_t mmu_exported_page_sizes; -size_t auto_lpg_va_default = MMU_PAGESIZE; /* used by zmap() */ /* * Number of pages in 1 GB. Don't enable automatic large pages if we have * fewer than this many pages. */ -pgcnt_t auto_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); +pgcnt_t shm_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); +pgcnt_t privm_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); + +/* + * Maximum and default segment size tunables for user private + * and shared anon memory, and user text and initialized data. + * These can be patched via /etc/system to allow large pages + * to be used for mapping application private and shared anon memory. + */ +size_t mcntl0_lpsize = MMU_PAGESIZE; +size_t max_uheap_lpsize = MMU_PAGESIZE; +size_t default_uheap_lpsize = MMU_PAGESIZE; +size_t max_ustack_lpsize = MMU_PAGESIZE; +size_t default_ustack_lpsize = MMU_PAGESIZE; +size_t max_privmap_lpsize = MMU_PAGESIZE; +size_t max_uidata_lpsize = MMU_PAGESIZE; +size_t max_utext_lpsize = MMU_PAGESIZE; +size_t max_shm_lpsize = MMU_PAGESIZE; /* * Return the optimum page size for a given mapping */ /*ARGSUSED*/ size_t -map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int *remap) +map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl) { - level_t l; + level_t l = 0; + size_t pgsz = MMU_PAGESIZE; + size_t max_lpsize; + uint_t mszc; - if (remap) - *remap = 0; + ASSERT(maptype != MAPPGSZ_VA); + + if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) { + return (MMU_PAGESIZE); + } switch (maptype) { - + case MAPPGSZ_HEAP: case MAPPGSZ_STK: - case MAPPGSZ_HEAP: - case MAPPGSZ_VA: + max_lpsize = memcntl ? mcntl0_lpsize : (maptype == + MAPPGSZ_HEAP ? max_uheap_lpsize : max_ustack_lpsize); + if (max_lpsize == MMU_PAGESIZE) { + return (MMU_PAGESIZE); + } + if (len == 0) { + len = (maptype == MAPPGSZ_HEAP) ? p->p_brkbase + + p->p_brksize - p->p_bssbase : p->p_stksize; + } + len = (maptype == MAPPGSZ_HEAP) ? MAX(len, + default_uheap_lpsize) : MAX(len, default_ustack_lpsize); + /* * use the pages size that best fits len */ for (l = mmu.max_page_level; l > 0; --l) { - if (len < LEVEL_SIZE(l)) + if (LEVEL_SIZE(l) > max_lpsize || len < LEVEL_SIZE(l)) { continue; + } else { + pgsz = LEVEL_SIZE(l); + } break; } - return (LEVEL_SIZE(l)); + + mszc = (maptype == MAPPGSZ_HEAP ? p->p_brkpageszc : + p->p_stkpageszc); + if (addr == 0 && (pgsz < hw_page_array[mszc].hp_size)) { + pgsz = hw_page_array[mszc].hp_size; + } + return (pgsz); /* * for ISM use the 1st large page size. @@ -164,65 +206,96 @@ return (MMU_PAGESIZE); return (LEVEL_SIZE(1)); } - return (0); + return (pgsz); } -/* - * This can be patched via /etc/system to allow large pages - * to be used for mapping application and libraries text segments. - */ -int use_text_largepages = 0; -int use_shm_largepages = 0; +static uint_t +map_szcvec(caddr_t addr, size_t size, uintptr_t off, size_t max_lpsize, + size_t min_physmem) +{ + caddr_t eaddr = addr + size; + uint_t szcvec = 0; + caddr_t raddr; + caddr_t readdr; + size_t pgsz; + int i; + + if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) { + return (0); + } + + for (i = mmu_page_sizes - 1; i > 0; i--) { + pgsz = page_get_pagesize(i); + if (pgsz > max_lpsize) { + continue; + } + raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); + readdr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); + if (raddr < addr || raddr >= readdr) { + continue; + } + if (P2PHASE((uintptr_t)addr ^ off, pgsz)) { + continue; + } + /* + * Set szcvec to the remaining page sizes. + */ + szcvec = ((1 << (i + 1)) - 1) & ~1; + break; + } + return (szcvec); +} /* * Return a bit vector of large page size codes that * can be used to map [addr, addr + len) region. */ - /*ARGSUSED*/ uint_t -map_execseg_pgszcvec(int text, caddr_t addr, size_t len) +map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type, + int memcntl) { - size_t pgsz; - caddr_t a; + size_t max_lpsize = mcntl0_lpsize; - if (!text || !use_text_largepages || - mmu.max_page_level == 0) + if (mmu.max_page_level == 0) return (0); - pgsz = LEVEL_SIZE(1); - a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); - if (a < addr || a >= addr + len) { - return (0); - } - len -= (a - addr); - if (len < pgsz) { - return (0); - } - return (1 << 1); -} + if (flags & MAP_TEXT) { + if (!memcntl) + max_lpsize = max_utext_lpsize; + return (map_szcvec(addr, size, off, max_lpsize, + shm_lpg_min_physmem)); + + } else if (flags & MAP_INITDATA) { + if (!memcntl) + max_lpsize = max_uidata_lpsize; + return (map_szcvec(addr, size, off, max_lpsize, + privm_lpg_min_physmem)); + + } else if (type == MAPPGSZC_SHM) { + if (!memcntl) + max_lpsize = max_shm_lpsize; + return (map_szcvec(addr, size, off, max_lpsize, + shm_lpg_min_physmem)); -uint_t -map_shm_pgszcvec(caddr_t addr, size_t len, uintptr_t off) -{ - size_t pgsz; - caddr_t a; - - if (!use_shm_largepages || mmu.max_page_level == 0) { - return (0); - } + } else if (type == MAPPGSZC_HEAP) { + if (!memcntl) + max_lpsize = max_uheap_lpsize; + return (map_szcvec(addr, size, off, max_lpsize, + privm_lpg_min_physmem)); - pgsz = LEVEL_SIZE(1); - a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); - if (a < addr || a >= addr + len || - P2PHASE((uintptr_t)addr ^ off, pgsz)) { - return (0); + } else if (type == MAPPGSZC_STACK) { + if (!memcntl) + max_lpsize = max_ustack_lpsize; + return (map_szcvec(addr, size, off, max_lpsize, + privm_lpg_min_physmem)); + + } else { + if (!memcntl) + max_lpsize = max_privmap_lpsize; + return (map_szcvec(addr, size, off, max_lpsize, + privm_lpg_min_physmem)); } - len -= (a - addr); - if (len < pgsz) { - return (0); - } - return (1 << 1); } /*
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c Thu Oct 26 16:44:53 2006 -0700 @@ -139,14 +139,21 @@ #define LARGE_PAGES_OFF 0x1 /* - * WARNING: 512K pages MUST be disabled for ISM/DISM. If not - * a process would page fault indefinitely if it tried to - * access a 512K page. - */ -int disable_ism_large_pages = (1 << TTE512K); -int disable_large_pages = 0; -int disable_auto_large_pages = 0; -int disable_shm_large_pages = 0; + * The disable_large_pages and disable_ism_large_pages variables control + * hat_memload_array and the page sizes to be used by ISM and the kernel. + * + * The disable_auto_data_large_pages and disable_auto_text_large_pages variables + * are only used to control which OOB pages to use at upper VM segment creation + * time, and are set in hat_init_pagesizes and used in the map_pgsz* routines. + * Their values may come from platform or CPU specific code to disable page + * sizes that should not be used. + * + * WARNING: 512K pages are currently not supported for ISM/DISM. + */ +uint_t disable_large_pages = 0; +uint_t disable_ism_large_pages = (1 << TTE512K); +uint_t disable_auto_data_large_pages = 0; +uint_t disable_auto_text_large_pages = 0; /* * Private sfmmu data structures for hat management @@ -891,17 +898,12 @@ mmu_exported_page_sizes = 0; for (i = TTE8K; i < max_mmu_page_sizes; i++) { - extern int disable_text_largepages; - extern int disable_initdata_largepages; szc_2_userszc[i] = (uint_t)-1; userszc_2_szc[i] = (uint_t)-1; if ((mmu_exported_pagesize_mask & (1 << i)) == 0) { disable_large_pages |= (1 << i); - disable_ism_large_pages |= (1 << i); - disable_text_largepages |= (1 << i); - disable_initdata_largepages |= (1 << i); } else { szc_2_userszc[i] = mmu_exported_page_sizes; userszc_2_szc[mmu_exported_page_sizes] = i; @@ -909,7 +911,9 @@ } } - disable_auto_large_pages = disable_large_pages; + disable_ism_large_pages |= disable_large_pages; + disable_auto_data_large_pages = disable_large_pages; + disable_auto_text_large_pages = disable_large_pages; /* * Initialize mmu-specific large page sizes. @@ -918,11 +922,11 @@ disable_large_pages |= mmu_large_pages_disabled(HAT_LOAD); disable_ism_large_pages |= mmu_large_pages_disabled(HAT_LOAD_SHARE); - disable_auto_large_pages |= - mmu_large_pages_disabled(HAT_LOAD_AUTOLPG); - } - - disable_shm_large_pages = disable_auto_large_pages; + disable_auto_data_large_pages |= + mmu_large_pages_disabled(HAT_AUTO_DATA); + disable_auto_text_large_pages |= + mmu_large_pages_disabled(HAT_AUTO_TEXT); + } } /* @@ -1993,7 +1997,7 @@ pgcnt_t numpg, npgs; tte_t tte; page_t *pp; - int large_pages_disable; + uint_t large_pages_disable; ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.h Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.h Thu Oct 26 16:44:53 2006 -0700 @@ -1782,7 +1782,7 @@ #pragma weak mmu_set_ctx_page_sizes #pragma weak mmu_check_page_sizes -extern int mmu_large_pages_disabled(uint_t); +extern uint_t mmu_large_pages_disabled(uint_t); extern void mmu_set_ctx_page_sizes(sfmmu_t *); extern void mmu_check_page_sizes(sfmmu_t *, uint64_t *); @@ -1822,6 +1822,11 @@ extern vmem_t *kmem_tsb_default_arena[]; extern int tsb_lgrp_affinity; +extern uint_t disable_large_pages; +extern uint_t disable_ism_large_pages; +extern uint_t disable_auto_data_large_pages; +extern uint_t disable_auto_text_large_pages; + /* kpm externals */ extern pfn_t sfmmu_kpm_vatopfn(caddr_t); extern void sfmmu_kpm_patch_tlbm(void);
--- a/usr/src/uts/sun4/os/startup.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/sun4/os/startup.c Thu Oct 26 16:44:53 2006 -0700 @@ -1878,7 +1878,7 @@ pgcnt_t max_phys_segkp; int mnode; - extern int exec_lpg_disable, use_brk_lpg, use_stk_lpg, use_zmap_lpg; + extern int use_brk_lpg, use_stk_lpg; /* * get prom's mappings, create hments for them and switch @@ -1974,12 +1974,12 @@ avmem = (uint64_t)freemem << PAGESHIFT; cmn_err(CE_CONT, "?avail mem = %lld\n", (unsigned long long)avmem); - /* For small memory systems disable automatic large pages. */ - if (physmem < auto_lpg_min_physmem) { - exec_lpg_disable = 1; + /* + * For small memory systems disable automatic large pages. + */ + if (physmem < privm_lpg_min_physmem) { use_brk_lpg = 0; use_stk_lpg = 0; - use_zmap_lpg = 0; } /*
--- a/usr/src/uts/sun4/vm/vm_dep.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/sun4/vm/vm_dep.c Thu Oct 26 16:44:53 2006 -0700 @@ -97,8 +97,6 @@ caddr_t errata57_limit; #endif -extern int disable_auto_large_pages; /* used by map_pgsz*() routines */ - extern void page_relocate_hash(page_t *, page_t *); /* @@ -467,89 +465,56 @@ } } -#define MAP_PGSZ_COMMON(pgsz, n, upper, lower, len) \ - for ((n) = (upper); (n) > (lower); (n)--) { \ - if (disable_auto_large_pages & (1 << (n))) \ - continue; \ - if (hw_page_array[(n)].hp_size <= (len)) { \ - (pgsz) = hw_page_array[(n)].hp_size; \ - break; \ - } \ +/* + * Return non 0 value if the address may cause a VAC alias with KPM mappings. + * KPM selects an address such that it's equal offset modulo shm_alignment and + * assumes it can't be in VAC conflict with any larger than PAGESIZE mapping. + */ +int +map_addr_vacalign_check(caddr_t addr, u_offset_t off) +{ + if (vac) { + return (((uintptr_t)addr ^ off) & shm_alignment - 1); + } else { + return (0); } - - -/*ARGSUSED*/ -static size_t -map_pgszva(struct proc *p, caddr_t addr, size_t len) -{ - size_t pgsz = MMU_PAGESIZE; - int n, upper; +} - /* - * Select the best fit page size within the constraints of - * auto_lpg_{min,max}szc. - * - * Note that we also take the heap size into account when - * deciding if we've crossed the threshold at which we should - * increase the page size. This isn't perfect since the heap - * may not have reached its full size yet, but it's better than - * not considering it at all. - */ - len += p->p_brksize; - if (ptob(auto_lpg_tlb_threshold) <= len) { - - upper = MIN(mmu_page_sizes - 1, auto_lpg_maxszc); - - /* - * Use auto_lpg_minszc - 1 as the limit so we never drop - * below auto_lpg_minszc. We don't have a size code to refer - * to like we have for bss and stack, so we assume 0. - * auto_lpg_minszc should always be >= 0. Using - * auto_lpg_minszc cuts off the loop. - */ - MAP_PGSZ_COMMON(pgsz, n, upper, auto_lpg_minszc - 1, len); - } - - return (pgsz); -} +/* + * Sanity control. Don't use large pages regardless of user + * settings if there's less than priv or shm_lpg_min_physmem memory installed. + * The units for this variable is 8K pages. + */ +pgcnt_t shm_lpg_min_physmem = 131072; /* 1GB */ +pgcnt_t privm_lpg_min_physmem = 131072; /* 1GB */ static size_t map_pgszheap(struct proc *p, caddr_t addr, size_t len) { - size_t pgsz; - int n, upper, lower; + size_t pgsz = MMU_PAGESIZE; + int szc; /* * If len is zero, retrieve from proc and don't demote the page size. + * Use atleast the default pagesize. */ if (len == 0) { - len = p->p_brksize; + len = p->p_brkbase + p->p_brksize - p->p_bssbase; + } + len = MAX(len, default_uheap_lpsize); + + for (szc = mmu_page_sizes - 1; szc >= 0; szc--) { + pgsz = hw_page_array[szc].hp_size; + if ((disable_auto_data_large_pages & (1 << szc)) || + pgsz > max_uheap_lpsize) + continue; + if (len >= pgsz) { + break; + } } /* - * Still zero? Then we don't have a heap yet, so pick the default - * heap size. - */ - if (len == 0) { - pgsz = auto_lpg_heap_default; - } else { - pgsz = hw_page_array[p->p_brkpageszc].hp_size; - } - - if ((pgsz * auto_lpg_tlb_threshold) <= len) { - /* - * We're past the threshold, so select the best fit - * page size within the constraints of - * auto_lpg_{min,max}szc and the minimum required - * alignment. - */ - upper = MIN(mmu_page_sizes - 1, auto_lpg_maxszc); - lower = MAX(auto_lpg_minszc - 1, p->p_brkpageszc); - MAP_PGSZ_COMMON(pgsz, n, upper, lower, len); - } - - /* - * If addr == 0 we were called by memcntl() or exec_args() when the + * If addr == 0 we were called by memcntl() when the * size code is 0. Don't set pgsz less than current size. */ if (addr == 0 && (pgsz < hw_page_array[p->p_brkpageszc].hp_size)) { @@ -562,36 +527,26 @@ static size_t map_pgszstk(struct proc *p, caddr_t addr, size_t len) { - size_t pgsz; - int n, upper, lower; + size_t pgsz = MMU_PAGESIZE; + int szc; /* * If len is zero, retrieve from proc and don't demote the page size. + * Use atleast the default pagesize. */ if (len == 0) { len = p->p_stksize; } - - /* - * Still zero? Then we don't have a heap yet, so pick the default - * stack size. - */ - if (len == 0) { - pgsz = auto_lpg_stack_default; - } else { - pgsz = hw_page_array[p->p_stkpageszc].hp_size; - } + len = MAX(len, default_ustack_lpsize); - if ((pgsz * auto_lpg_tlb_threshold) <= len) { - /* - * We're past the threshold, so select the best fit - * page size within the constraints of - * auto_lpg_{min,max}szc and the minimum required - * alignment. - */ - upper = MIN(mmu_page_sizes - 1, auto_lpg_maxszc); - lower = MAX(auto_lpg_minszc - 1, p->p_brkpageszc); - MAP_PGSZ_COMMON(pgsz, n, upper, lower, len); + for (szc = mmu_page_sizes - 1; szc >= 0; szc--) { + pgsz = hw_page_array[szc].hp_size; + if ((disable_auto_data_large_pages & (1 << szc)) || + pgsz > max_ustack_lpsize) + continue; + if (len >= pgsz) { + break; + } } /* @@ -610,7 +565,6 @@ { uint_t szc; size_t pgsz; - extern int disable_ism_large_pages; for (szc = mmu_page_sizes - 1; szc >= TTE4M; szc--) { if (disable_ism_large_pages & (1 << szc)) @@ -620,234 +574,69 @@ if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz)) return (pgsz); } + return (DEFAULT_ISM_PAGESIZE); } /* * Suggest a page size to be used to map a segment of type maptype and length * len. Returns a page size (not a size code). - * If remap is non-NULL, fill in a value suggesting whether or not to remap - * this segment. */ +/* ARGSUSED */ size_t -map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int *remap) +map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl) { - size_t pgsz = 0; + size_t pgsz = MMU_PAGESIZE; + + ASSERT(maptype != MAPPGSZ_VA); - if (remap != NULL) - *remap = (len > auto_lpg_remap_threshold); + if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) { + return (MMU_PAGESIZE); + } switch (maptype) { case MAPPGSZ_ISM: pgsz = map_pgszism(addr, len); break; - case MAPPGSZ_VA: - pgsz = map_pgszva(p, addr, len); - break; - case MAPPGSZ_STK: - pgsz = map_pgszstk(p, addr, len); + if (max_ustack_lpsize > MMU_PAGESIZE) { + pgsz = map_pgszstk(p, addr, len); + } break; case MAPPGSZ_HEAP: - pgsz = map_pgszheap(p, addr, len); + if (max_uheap_lpsize > MMU_PAGESIZE) { + pgsz = map_pgszheap(p, addr, len); + } break; } return (pgsz); } -/* - * Return non 0 value if the address may cause a VAC alias with KPM mappings. - * KPM selects an address such that it's equal offset modulo shm_alignment and - * assumes it can't be in VAC conflict with any larger than PAGESIZE mapping. - */ -int -map_addr_vacalign_check(caddr_t addr, u_offset_t off) -{ - if (vac) { - return (((uintptr_t)addr ^ off) & shm_alignment - 1); - } else { - return (0); - } -} - -/* - * use_text_pgsz64k, use_initdata_pgsz64k and use_text_pgsz4m - * can be set in platform or CPU specific code but user can change the - * default values via /etc/system. - * - * Initial values are defined in architecture specific mach_vm_dep.c file. - */ -extern int use_text_pgsz64k; -extern int use_text_pgsz4m; -extern int use_initdata_pgsz64k; - -/* - * disable_text_largepages and disable_initdata_largepages bitmaks are set in - * platform or CPU specific code to disable page sizes that should not be - * used. These variables normally shouldn't be changed via /etc/system. A - * particular page size for text or inititialized data will be used by default - * if both one of use_* variables is set to 1 AND this page size is not - * disabled in the corresponding disable_* bitmask variable. - * - * Initial values are defined in architecture specific mach_vm_dep.c file. - */ -extern int disable_text_largepages; -extern int disable_initdata_largepages; - -/* - * Minimum segment size tunables before 64K or 4M large pages - * should be used to map it. - * - * Initial values are defined in architecture specific mach_vm_dep.c file. - */ -extern size_t text_pgsz64k_minsize; -extern size_t text_pgsz4m_minsize; -extern size_t initdata_pgsz64k_minsize; - -/* - * Sanity control. Don't use large pages regardless of user - * settings if there's less than execseg_lpg_min_physmem memory installed. - * The units for this variable is 8K pages. - */ -pgcnt_t execseg_lpg_min_physmem = 131072; /* 1GB */ - -extern int disable_shm_large_pages; -pgcnt_t shm_lpg_min_physmem = 131072; /* 1GB */ -extern size_t max_shm_lpsize; - /* assumes TTE8K...TTE4M == szc */ static uint_t -map_text_pgsz4m(caddr_t addr, size_t len) -{ - caddr_t a; - - if (len < text_pgsz4m_minsize) { - return (0); - } - - a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE4M, uintptr_t); - if (a < addr || a >= addr + len) { - return (0); - } - len -= (a - addr); - if (len < MMU_PAGESIZE4M) { - return (0); - } - - return (1 << TTE4M); -} - -static uint_t -map_text_pgsz64k(caddr_t addr, size_t len) -{ - caddr_t a; - size_t svlen = len; - - if (len < text_pgsz64k_minsize) { - return (0); - } - - a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE64K, uintptr_t); - if (a < addr || a >= addr + len) { - return (0); - } - len -= (a - addr); - if (len < MMU_PAGESIZE64K) { - return (0); - } - if (!use_text_pgsz4m || - disable_text_largepages & (1 << TTE4M)) { - return (1 << TTE64K); - } - if (svlen < text_pgsz4m_minsize) { - return (1 << TTE64K); - } - addr = a; - a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE4M, uintptr_t); - if (a < addr || a >= addr + len) { - return (1 << TTE64K); - } - len -= (a - addr); - if (len < MMU_PAGESIZE4M) { - return (1 << TTE64K); - } - return ((1 << TTE4M) | (1 << TTE64K)); -} - -static uint_t -map_initdata_pgsz64k(caddr_t addr, size_t len) -{ - caddr_t a; - - if (len < initdata_pgsz64k_minsize) { - return (0); - } - - a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE64K, uintptr_t); - if (a < addr || a >= addr + len) { - return (0); - } - len -= (a - addr); - if (len < MMU_PAGESIZE64K) { - return (0); - } - return (1 << TTE64K); -} - -/* - * Return a bit vector of large page size codes that - * can be used to map [addr, addr + len) region. - */ -uint_t -map_execseg_pgszcvec(int text, caddr_t addr, size_t len) -{ - uint_t ret = 0; - - if (physmem < execseg_lpg_min_physmem) { - return (0); - } - - if (text) { - if (use_text_pgsz64k && - !(disable_text_largepages & (1 << TTE64K))) { - ret = map_text_pgsz64k(addr, len); - } else if (use_text_pgsz4m && - !(disable_text_largepages & (1 << TTE4M))) { - ret = map_text_pgsz4m(addr, len); - } - } else if (use_initdata_pgsz64k && - !(disable_initdata_largepages & (1 << TTE64K))) { - ret = map_initdata_pgsz64k(addr, len); - } - - return (ret); -} - -uint_t -map_shm_pgszcvec(caddr_t addr, size_t size, uintptr_t off) +map_szcvec(caddr_t addr, size_t size, uintptr_t off, int disable_lpgs, + size_t max_lpsize, size_t min_physmem) { caddr_t eaddr = addr + size; uint_t szcvec = 0; - int i; caddr_t raddr; caddr_t readdr; size_t pgsz; + int i; - if (physmem < shm_lpg_min_physmem || mmu_page_sizes <= 1 || - max_shm_lpsize <= MMU_PAGESIZE) { + if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) { return (0); } - for (i = mmu_page_sizes - 1; i > 0; i--) { - if (disable_shm_large_pages & (1 << i)) { + if (disable_lpgs & (1 << i)) { continue; } pgsz = page_get_pagesize(i); - if (pgsz > max_shm_lpsize) { + if (pgsz > max_lpsize) { continue; } raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); @@ -862,7 +651,7 @@ /* * And or in the remaining enabled page sizes. */ - szcvec |= P2PHASE(~disable_shm_large_pages, (1 << i)); + szcvec |= P2PHASE(~disable_lpgs, (1 << i)); szcvec &= ~1; /* no need to return 8K pagesize */ break; } @@ -870,6 +659,41 @@ } /* + * Return a bit vector of large page size codes that + * can be used to map [addr, addr + len) region. + */ +/* ARGSUSED */ +uint_t +map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type, + int memcntl) +{ + if (flags & MAP_TEXT) { + return (map_szcvec(addr, size, off, disable_auto_text_large_pages, + max_utext_lpsize, shm_lpg_min_physmem)); + + } else if (flags & MAP_INITDATA) { + return (map_szcvec(addr, size, off, disable_auto_data_large_pages, + max_uidata_lpsize, privm_lpg_min_physmem)); + + } else if (type == MAPPGSZC_SHM) { + return (map_szcvec(addr, size, off, disable_auto_data_large_pages, + max_shm_lpsize, shm_lpg_min_physmem)); + + } else if (type == MAPPGSZC_HEAP) { + return (map_szcvec(addr, size, off, disable_auto_data_large_pages, + max_uheap_lpsize, privm_lpg_min_physmem)); + + } else if (type == MAPPGSZC_STACK) { + return (map_szcvec(addr, size, off, disable_auto_data_large_pages, + max_ustack_lpsize, privm_lpg_min_physmem)); + + } else { + return (map_szcvec(addr, size, off, disable_auto_data_large_pages, + max_privmap_lpsize, privm_lpg_min_physmem)); + } +} + +/* * Anchored in the table below are counters used to keep track * of free contiguous physical memory. Each element of the table contains * the array of counters, the size of array which is allocated during @@ -1240,7 +1064,6 @@ size_t memtotal = physmem * PAGESIZE; size_t mmusz; uint_t szc; - extern int disable_large_pages; if (memtotal < segkmem_lpminphysmem) return (PAGESIZE);
--- a/usr/src/uts/sun4/vm/vm_dep.h Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/sun4/vm/vm_dep.h Thu Oct 26 16:44:53 2006 -0700 @@ -423,18 +423,33 @@ extern int vac_shift; /* - * Auto large page selection support variables. Some CPU - * implementations may differ from the defaults and will need - * to change these. + * Maximum and default values for user heap, stack, private and shared + * anonymous memory, and user text and initialized data. + * + * Initial values are defined in architecture specific mach_vm_dep.c file. + * Used by map_pgsz*() routines. */ -extern int auto_lpg_tlb_threshold; -extern int auto_lpg_minszc; -extern int auto_lpg_maxszc; -extern size_t auto_lpg_heap_default; -extern size_t auto_lpg_stack_default; -extern size_t auto_lpg_va_default; -extern size_t auto_lpg_remap_threshold; -extern pgcnt_t auto_lpg_min_physmem; +extern size_t max_uheap_lpsize; +extern size_t default_uheap_lpsize; +extern size_t max_ustack_lpsize; +extern size_t default_ustack_lpsize; +extern size_t max_privmap_lpsize; +extern size_t max_uidata_lpsize; +extern size_t max_utext_lpsize; +extern size_t max_shm_lpsize; + +/* + * For adjusting the default lpsize, for DTLB-limited page sizes. + */ +extern void adjust_data_maxlpsize(size_t ismpagesize); + +/* + * Sanity control. Don't use large pages regardless of user + * settings if there's less than priv or shm_lpg_min_physmem memory installed. + * The units for this variable are 8K pages. + */ +extern pgcnt_t privm_lpg_min_physmem; +extern pgcnt_t shm_lpg_min_physmem; /* * AS_2_BIN macro controls the page coloring policy.
--- a/usr/src/uts/sun4u/cpu/opl_olympus.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/sun4u/cpu/opl_olympus.c Thu Oct 26 16:44:53 2006 -0700 @@ -494,8 +494,6 @@ extern int at_flags; extern int disable_delay_tlb_flush, delay_tlb_flush; extern int cpc_has_overflow_intr; - extern int disable_text_largepages; - extern int use_text_pgsz4m; uint64_t cpu0_log; extern uint64_t opl_cpu0_err_log; @@ -590,16 +588,6 @@ * fpRAS. */ fpras_implemented = 0; - - /* - * Enable 4M pages to be used for mapping user text by default. Don't - * use large pages for initialized data segments since we may not know - * at exec() time what should be the preferred large page size for DTLB - * programming. - */ - use_text_pgsz4m = 1; - disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) | - (1 << TTE32M) | (1 << TTE256M); } /* @@ -700,11 +688,14 @@ * */ int init_mmu_page_sizes = 0; -static int mmu_disable_ism_large_pages = ((1 << TTE64K) | + +static uint_t mmu_disable_large_pages = 0; +static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); -static int mmu_disable_auto_large_pages = ((1 << TTE64K) | +static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); -static int mmu_disable_large_pages = 0; +static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) | + (1 << TTE512K)); /* * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support. @@ -721,7 +712,6 @@ mmu_page_sizes = MMU_PAGE_SIZES; mmu_hashcnt = MAX_HASHCNT; mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE; - auto_lpg_maxszc = TTE4M; mmu_exported_pagesize_mask = (1 << TTE8K) | (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M) | (1 << TTE256M); @@ -747,19 +737,30 @@ /* * The function returns the mmu-specific values for the * hat's disable_large_pages, disable_ism_large_pages, and - * disable_auto_large_pages variables. + * disable_auto_data_large_pages and + * disable_text_data_large_pages variables. */ -int +uint_t mmu_large_pages_disabled(uint_t flag) { - int pages_disable = 0; + uint_t pages_disable = 0; + extern int use_text_pgsz64K; + extern int use_text_pgsz512K; if (flag == HAT_LOAD) { pages_disable = mmu_disable_large_pages; } else if (flag == HAT_LOAD_SHARE) { pages_disable = mmu_disable_ism_large_pages; - } else if (flag == HAT_LOAD_AUTOLPG) { - pages_disable = mmu_disable_auto_large_pages; + } else if (flag == HAT_AUTO_DATA) { + pages_disable = mmu_disable_auto_data_large_pages; + } else if (flag == HAT_AUTO_TEXT) { + pages_disable = mmu_disable_auto_text_large_pages; + if (use_text_pgsz512K) { + pages_disable &= ~(1 << TTE512K); + } + if (use_text_pgsz64K) { + pages_disable &= ~(1 << TTE64K); + } } return (pages_disable); } @@ -779,23 +780,22 @@ case MMU_PAGESIZE4M: mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); - mmu_disable_auto_large_pages = ((1 << TTE64K) | + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); - auto_lpg_maxszc = TTE4M; break; case MMU_PAGESIZE32M: mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE256M)); - mmu_disable_auto_large_pages = ((1 << TTE64K) | + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M)); - auto_lpg_maxszc = TTE32M; + adjust_data_maxlpsize(ism_pagesize); break; case MMU_PAGESIZE256M: mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M)); - mmu_disable_auto_large_pages = ((1 << TTE64K) | + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M)); - auto_lpg_maxszc = TTE256M; + adjust_data_maxlpsize(ism_pagesize); break; default: cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx",
--- a/usr/src/uts/sun4u/cpu/spitfire.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/sun4u/cpu/spitfire.c Thu Oct 26 16:44:53 2006 -0700 @@ -34,6 +34,7 @@ #include <sys/elf_SPARC.h> #include <vm/hat_sfmmu.h> #include <vm/page.h> +#include <vm/vm_dep.h> #include <sys/cpuvar.h> #include <sys/spitregs.h> #include <sys/async.h> @@ -431,9 +432,6 @@ #if defined(SF_ERRATA_57) extern caddr_t errata57_limit; #endif - extern int disable_text_largepages; - extern int disable_initdata_largepages; - cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1; @@ -514,14 +512,10 @@ #endif /* - * Allow only 8K, 64K and 4M pages for text by default. - * Allow only 8K and 64K page for initialized data segments by - * default. + * Disable text by default. + * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c. */ - disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) | - (1 << TTE256M); - disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) | - (1 << TTE32M) | (1 << TTE256M); + max_utext_lpsize = MMU_PAGESIZE; } static int @@ -4490,27 +4484,6 @@ { } -static int mmu_disable_ism_large_pages = ((1 << TTE512K) | - (1 << TTE32M) | (1 << TTE256M)); -static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); - -/* - * The function returns the US_II mmu-specific values for the - * hat's disable_large_pages and disable_ism_large_pages variables. - */ -int -mmu_large_pages_disabled(uint_t flag) -{ - int pages_disable = 0; - - if (flag == HAT_LOAD) { - pages_disable = mmu_disable_large_pages; - } else if (flag == HAT_LOAD_SHARE) { - pages_disable = mmu_disable_ism_large_pages; - } - return (pages_disable); -} - /*ARGSUSED*/ void mmu_init_kernel_pgsz(struct hat *hat)
--- a/usr/src/uts/sun4u/cpu/us3_cheetah.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/sun4u/cpu/us3_cheetah.c Thu Oct 26 16:44:53 2006 -0700 @@ -69,6 +69,11 @@ #endif /* CHEETAHPLUS_ERRATUM_25 */ /* + * Note that 'Cheetah PRM' refers to: + * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III + */ + +/* * Setup trap handlers. */ void @@ -122,10 +127,6 @@ "ecache-associativity", &ecache_associativity, CH_ECACHE_NWAY }; - extern int exec_lpg_disable, use_brk_lpg, use_stk_lpg, use_zmap_lpg; - extern size_t max_shm_lpsize; - - for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) *prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval); @@ -143,11 +144,12 @@ /* * Cheetah's large page support has problems with large numbers of * large pages, so just disable large pages out-of-the-box. + * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c. */ - exec_lpg_disable = 1; - use_brk_lpg = 0; - use_stk_lpg = 0; - use_zmap_lpg = 0; + max_uheap_lpsize = MMU_PAGESIZE; + max_ustack_lpsize = MMU_PAGESIZE; + max_privmap_lpsize = MMU_PAGESIZE; + max_utext_lpsize = MMU_PAGESIZE; max_shm_lpsize = MMU_PAGESIZE; }
--- a/usr/src/uts/sun4u/cpu/us3_common.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/sun4u/cpu/us3_common.c Thu Oct 26 16:44:53 2006 -0700 @@ -475,8 +475,6 @@ extern int at_flags; extern int disable_delay_tlb_flush, delay_tlb_flush; extern int cpc_has_overflow_intr; - extern int disable_text_largepages; - extern int use_text_pgsz4m; /* * Setup chip-specific trap handlers. @@ -574,16 +572,6 @@ fpras_implemented = 1; /* - * Enable 4M pages to be used for mapping user text by default. Don't - * use large pages for initialized data segments since we may not know - * at exec() time what should be the preferred large page size for DTLB - * programming. - */ - use_text_pgsz4m = 1; - disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) | - (1 << TTE32M) | (1 << TTE256M); - - /* * Setup CE lookup table */ CE_INITDISPTBL_POPULATE(ce_disp_table);
--- a/usr/src/uts/sun4u/cpu/us3_common_mmu.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/sun4u/cpu/us3_common_mmu.c Thu Oct 26 16:44:53 2006 -0700 @@ -42,60 +42,58 @@ #include <sys/panic.h> /* - * Note that 'Cheetah PRM' refers to: - * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III - */ - -/* * pan_disable_ism_large_pages and pan_disable_large_pages are the Panther- * specific versions of disable_ism_large_pages and disable_large_pages, * and feed back into those two hat variables at hat initialization time, * for Panther-only systems. * - * chpjag_disable_ism_large_pages is the Ch/Jaguar-specific version of - * disable_ism_large_pages. Ditto for chjag_disable_large_pages. + * chpjag_disable_large_pages is the Ch/Jaguar-specific version of + * disable_large_pages. Ditto for pan_disable_large_pages. + * Note that the Panther and Ch/Jaguar ITLB do not support 32M/256M pages. */ static int panther_only = 0; -static int pan_disable_ism_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); -static int pan_disable_large_pages = (1 << TTE256M); -static int pan_disable_auto_large_pages = ((1 << TTE64K) | +static uint_t pan_disable_large_pages = (1 << TTE256M); +static uint_t chjag_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); + +static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); - -static int chjag_disable_ism_large_pages = ((1 << TTE64K) | +static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); -static int chjag_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); -static int chjag_disable_auto_large_pages = ((1 << TTE64K) | +static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); /* - * The function returns the USIII-IV mmu-specific values for the + * The function returns the USIII+(i)-IV+ mmu-specific values for the * hat's disable_large_pages and disable_ism_large_pages variables. * Currently the hat's disable_large_pages and disable_ism_large_pages * already contain the generic sparc 4 page size info, and the return * values are or'd with those values. */ -int +uint_t mmu_large_pages_disabled(uint_t flag) { - int pages_disable = 0; + uint_t pages_disable = 0; + extern int use_text_pgsz64K; + extern int use_text_pgsz512K; - if (panther_only) { - if (flag == HAT_LOAD) { + if (flag == HAT_LOAD) { + if (panther_only) { pages_disable = pan_disable_large_pages; - } else if (flag == HAT_LOAD_SHARE) { - pages_disable = pan_disable_ism_large_pages; - } else if (flag == HAT_LOAD_AUTOLPG) { - pages_disable = pan_disable_auto_large_pages; + } else { + pages_disable = chjag_disable_large_pages; } - } else { - if (flag == HAT_LOAD) { - pages_disable = chjag_disable_large_pages; - } else if (flag == HAT_LOAD_SHARE) { - pages_disable = chjag_disable_ism_large_pages; - } else if (flag == HAT_LOAD_AUTOLPG) { - pages_disable = chjag_disable_auto_large_pages; + } else if (flag == HAT_LOAD_SHARE) { + pages_disable = mmu_disable_ism_large_pages; + } else if (flag == HAT_AUTO_DATA) { + pages_disable = mmu_disable_auto_data_large_pages; + } else if (flag == HAT_AUTO_TEXT) { + pages_disable = mmu_disable_auto_text_large_pages; + if (use_text_pgsz512K) { + pages_disable &= ~(1 << TTE512K); + } + if (use_text_pgsz64K) { + pages_disable &= ~(1 << TTE64K); } } return (pages_disable); @@ -141,7 +139,7 @@ * since it would be bad form to panic due * to a user typo. * - * The function re-initializes the pan_disable_ism_large_pages and + * The function re-initializes the disable_ism_large_pages and * pan_disable_large_pages variables, which are closely related. * Aka, if 32M is the desired [D]ISM page sizes, then 256M cannot be allowed * for non-ISM large page usage, or DTLB conflict will occur. Please see the @@ -151,37 +149,37 @@ mmu_init_large_pages(size_t ism_pagesize) { if (cpu_impl_dual_pgsz == 0) { /* disable_dual_pgsz flag */ - pan_disable_ism_large_pages = ((1 << TTE64K) | + pan_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); + mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); - pan_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); - auto_lpg_maxszc = TTE4M; + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | + (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); return; } switch (ism_pagesize) { case MMU_PAGESIZE4M: - pan_disable_ism_large_pages = ((1 << TTE64K) | + pan_disable_large_pages = (1 << TTE256M); + mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); - pan_disable_large_pages = (1 << TTE256M); - pan_disable_auto_large_pages = ((1 << TTE64K) | + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); - auto_lpg_maxszc = TTE4M; break; case MMU_PAGESIZE32M: - pan_disable_ism_large_pages = ((1 << TTE64K) | + pan_disable_large_pages = (1 << TTE256M); + mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE256M)); - pan_disable_large_pages = (1 << TTE256M); - pan_disable_auto_large_pages = ((1 << TTE64K) | + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M)); - auto_lpg_maxszc = TTE32M; + adjust_data_maxlpsize(ism_pagesize); break; case MMU_PAGESIZE256M: - pan_disable_ism_large_pages = ((1 << TTE64K) | + pan_disable_large_pages = (1 << TTE32M); + mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M)); - pan_disable_large_pages = (1 << TTE32M); - pan_disable_auto_large_pages = ((1 << TTE64K) | + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M)); - auto_lpg_maxszc = TTE256M; + adjust_data_maxlpsize(ism_pagesize); break; default: cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", @@ -211,7 +209,6 @@ (1 << TTE32M) | (1 << TTE256M); panther_dtlb_restrictions = 1; panther_only = 1; - auto_lpg_maxszc = TTE4M; } else if (npanther > 0) { panther_dtlb_restrictions = 1; }
--- a/usr/src/uts/sun4u/vm/mach_vm_dep.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/sun4u/vm/mach_vm_dep.c Thu Oct 26 16:44:53 2006 -0700 @@ -95,55 +95,42 @@ }; /* - * use_text_pgsz64k, use_initdata_pgsz64k and use_text_pgsz4m - * can be set in platform or CPU specific code but user can change the - * default values via /etc/system. + * use_text_pgsz64k and use_text_pgsz512k allow the user to turn on these + * additional text page sizes for USIII-IV+ and OPL by changing the default + * values via /etc/system. */ - -int use_text_pgsz64k = 0; -int use_text_pgsz4m = 0; -int use_initdata_pgsz64k = 0; - -/* - * disable_text_largepages and disable_initdata_largepages bitmaks are set in - * platform or CPU specific code to disable page sizes that should not be - * used. These variables normally shouldn't be changed via /etc/system. A - * particular page size for text or inititialized data will be used by default - * if both one of use_* variables is set to 1 AND this page size is not - * disabled in the corresponding disable_* bitmask variable. - */ - -int disable_text_largepages = (1 << TTE4M) | (1 << TTE64K); -int disable_initdata_largepages = (1 << TTE64K); +int use_text_pgsz64K = 0; +int use_text_pgsz512K = 0; /* - * Minimum segment size tunables before 64K or 4M large pages - * should be used to map it. + * Maximum and default segment size tunables for user heap, stack, private + * and shared anonymous memory, and user text and initialized data. */ -size_t text_pgsz64k_minsize = MMU_PAGESIZE64K; -size_t text_pgsz4m_minsize = MMU_PAGESIZE4M; -size_t initdata_pgsz64k_minsize = MMU_PAGESIZE64K; - -size_t max_shm_lpsize = ULONG_MAX; +size_t max_uheap_lpsize = MMU_PAGESIZE4M; +size_t default_uheap_lpsize = MMU_PAGESIZE; +size_t max_ustack_lpsize = MMU_PAGESIZE4M; +size_t default_ustack_lpsize = MMU_PAGESIZE; +size_t max_privmap_lpsize = MMU_PAGESIZE4M; +size_t max_uidata_lpsize = MMU_PAGESIZE; +size_t max_utext_lpsize = MMU_PAGESIZE4M; +size_t max_shm_lpsize = MMU_PAGESIZE4M; -/* - * Platforms with smaller or larger TLBs may wish to change this. Most - * sun4u platforms can hold 1024 8K entries by default and most processes - * are observed to be < 6MB on these machines, so we decide to move up - * here to give ourselves some wiggle room for other, smaller segments. - */ -int auto_lpg_tlb_threshold = 768; -int auto_lpg_minszc = TTE4M; -int auto_lpg_maxszc = TTE4M; -size_t auto_lpg_heap_default = MMU_PAGESIZE; -size_t auto_lpg_stack_default = MMU_PAGESIZE; -size_t auto_lpg_va_default = MMU_PAGESIZE; -size_t auto_lpg_remap_threshold = 0; -/* - * Number of pages in 1 GB. Don't enable automatic large pages if we have - * fewer than this many pages. - */ -pgcnt_t auto_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); +void +adjust_data_maxlpsize(size_t ismpagesize) +{ + if (max_uheap_lpsize == MMU_PAGESIZE4M) { + max_uheap_lpsize = ismpagesize; + } + if (max_ustack_lpsize == MMU_PAGESIZE4M) { + max_ustack_lpsize = ismpagesize; + } + if (max_privmap_lpsize == MMU_PAGESIZE4M) { + max_privmap_lpsize = ismpagesize; + } + if (max_shm_lpsize == MMU_PAGESIZE4M) { + max_shm_lpsize = ismpagesize; + } +} /* * map_addr_proc() is the routine called when the system is to
--- a/usr/src/uts/sun4v/vm/mach_vm_dep.c Thu Oct 26 16:33:33 2006 -0700 +++ b/usr/src/uts/sun4v/vm/mach_vm_dep.c Thu Oct 26 16:44:53 2006 -0700 @@ -91,63 +91,18 @@ }; /* - * Enable usage of 64k/4M pages for text and 64k pages for initdata for - * all sun4v platforms. These variables can be overwritten by the platmod - * or the CPU module. User can also change the setting via /etc/system. - */ - -int use_text_pgsz64k = 1; -int use_text_pgsz4m = 1; -int use_initdata_pgsz64k = 1; - -/* - * disable_text_largepages and disable_initdata_largepages bitmaks reflect - * both unconfigured and undesirable page sizes. Current implementation - * supports 64K and 4M page sizes for text and only 64K for data. Rest of - * the page sizes are not currently supported, hence disabled below. In - * future, when support is added for any other page size, it should be - * reflected below. - * - * Note that these bitmask can be set in platform or CPU specific code to - * disable page sizes that should not be used. These variables normally - * shouldn't be changed via /etc/system. - * - * These bitmasks are also updated within hat_init to reflect unsupported - * page sizes on a sun4v processor per mmu_exported_pagesize_mask global - * variable. + * Maximum and default segment size tunables for user heap, stack, private + * and shared anonymous memory, and user text and initialized data. */ - -int disable_text_largepages = - (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M) | (1 << TTE2G) | - (1 << TTE16G); -int disable_initdata_largepages = - (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M) | (1 << TTE256M) | - (1 << TTE2G) | (1 << TTE16G); - -/* - * Minimum segment size tunables before 64K or 4M large pages - * should be used to map it. - */ -size_t text_pgsz64k_minsize = MMU_PAGESIZE64K; -size_t text_pgsz4m_minsize = MMU_PAGESIZE4M; -size_t initdata_pgsz64k_minsize = MMU_PAGESIZE64K; - +size_t max_uheap_lpsize = MMU_PAGESIZE64K; +size_t default_uheap_lpsize = MMU_PAGESIZE64K; +size_t max_ustack_lpsize = MMU_PAGESIZE64K; +size_t default_ustack_lpsize = MMU_PAGESIZE64K; +size_t max_privmap_lpsize = MMU_PAGESIZE64K; +size_t max_uidata_lpsize = MMU_PAGESIZE64K; +size_t max_utext_lpsize = MMU_PAGESIZE4M; size_t max_shm_lpsize = MMU_PAGESIZE4M; -/* Auto large page tunables. */ -int auto_lpg_tlb_threshold = 32; -int auto_lpg_minszc = TTE64K; -int auto_lpg_maxszc = TTE64K; -size_t auto_lpg_heap_default = MMU_PAGESIZE64K; -size_t auto_lpg_stack_default = MMU_PAGESIZE64K; -size_t auto_lpg_va_default = MMU_PAGESIZE64K; -size_t auto_lpg_remap_threshold = 0; /* always remap */ -/* - * Number of pages in 1 GB. Don't enable automatic large pages if we have - * fewer than this many pages. - */ -pgcnt_t auto_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); - /* * map_addr_proc() is the routine called when the system is to * choose an address for the user. We will pick an address