Mercurial > illumos > illumos-gate
changeset 12293:2286b8b6d071
backout 6535949: needs more work
author | jmcp <James.McPherson@Sun.COM> |
---|---|
date | Mon, 03 May 2010 04:17:29 -0700 |
parents | 4c8e89f9481c |
children | 2a74b443e6b1 |
files | usr/src/uts/common/os/clock.c usr/src/uts/common/os/kflt_mem_stubs.c usr/src/uts/common/sys/kflt_mem.h usr/src/uts/common/sys/mem_cage.h usr/src/uts/common/vm/page.h usr/src/uts/common/vm/vm_page.c usr/src/uts/common/vm/vm_pagelist.c usr/src/uts/i86pc/Makefile.files usr/src/uts/i86pc/os/startup.c usr/src/uts/i86pc/vm/kflt_mem.c usr/src/uts/i86pc/vm/vm_dep.h usr/src/uts/i86pc/vm/vm_machdep.c usr/src/uts/i86xpv/Makefile.files usr/src/uts/sun4/vm/vm_dep.c usr/src/uts/sun4/vm/vm_dep.h usr/src/uts/sun4u/Makefile.files usr/src/uts/sun4u/starfire/os/starfire.c usr/src/uts/sun4v/Makefile.files |
diffstat | 18 files changed, 421 insertions(+), 2494 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/os/clock.c Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/common/os/clock.c Mon May 03 04:17:29 2010 -0700 @@ -54,7 +54,6 @@ #include <sys/disp.h> #include <sys/msacct.h> #include <sys/mem_cage.h> -#include <sys/kflt_mem.h> #include <vm/page.h> #include <vm/anon.h> @@ -630,13 +629,8 @@ /* * Wakeup the cageout thread waiters once per second. */ - if (one_sec) { - if (kcage_on) { - kcage_tick(); - } else if (kflt_on) { - kflt_tick(); - } - } + if (one_sec) + kcage_tick(); if (one_sec) {
--- a/usr/src/uts/common/os/kflt_mem_stubs.c Sun May 02 21:47:48 2010 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2010, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#include <sys/types.h> -#include <sys/cmn_err.h> -#include <sys/errno.h> -#include <sys/debug.h> -#include <vm/page.h> -#include <sys/mem_config.h> -#include <sys/kflt_mem.h> - -/* These should be in a platform stubs file. */ - -int kflt_on; -pgcnt_t kflt_freemem; -pgcnt_t kflt_throttlefree; -pgcnt_t kflt_minfree; -pgcnt_t kflt_desfree; -pgcnt_t kflt_needfree; -pgcnt_t kflt_lotsfree; - -/*ARGSUSED*/ -int -kflt_create_throttle(pgcnt_t npages, int flags) -{ - return (0); -} - -void -kflt_init(void) -{ -} - -void -kflt_evict_wakeup(void) -{ -} - -void -kflt_tick(void) -{ -}
--- a/usr/src/uts/common/sys/kflt_mem.h Sun May 02 21:47:48 2010 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,107 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2010, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#ifndef _KFLT_MEM_H -#define _KFLT_MEM_H - -#include <sys/types.h> -#include <sys/memlist.h> - -/* - * Kernel memory freelist interfaces. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -#define KFT_FAILURE 0 -#define KFT_CRIT 1 -#define KFT_NONCRIT 2 - -#define KFLT_EXPAND_RETRIES 10 -#define KFLT_PAGESIZE 1 - -extern pgcnt_t kflt_freemem; -extern pgcnt_t kflt_desfree; -extern pgcnt_t kflt_minfree; -extern pgcnt_t kflt_lotsfree; -extern pgcnt_t kflt_needfree; -extern pgcnt_t kflt_user_alloc; -extern pgcnt_t kflt_user_threshhold; -extern pgcnt_t kflt_throttlefree; -extern pgcnt_t kflt_reserve; -extern kthread_id_t kflt_evict_thread; -extern int kflt_on; - -extern void kflt_evict_wakeup(void); -extern void kflt_freemem_add(pgcnt_t); -extern void kflt_freemem_sub(pgcnt_t); -extern int kflt_create_throttle(pgcnt_t, int); -extern void kflt_expand(void); -extern void kflt_init(void); -extern void kflt_tick(void); -#pragma weak kflt_expand - -#if defined(__amd64) && !defined(__xpv) -/* Macros to throttle memory allocations from the kernel page freelist. */ - -#define KERNEL_THROTTLE_NONCRIT(npages, flags) \ - (kflt_create_throttle(npages, flags) == KFT_NONCRIT) - -#define KERNEL_THROTTLE(npages, flags) \ - if (((flags) & PG_KFLT) && \ - (kflt_freemem < (kflt_throttlefree + (npages)))) { \ - (void) kflt_create_throttle(npages, flags); \ - } - -#define KERNEL_THROTTLE_PGCREATE(npages, flags, cond) \ - ((((flags) & (PG_KFLT|(cond)) == (PG_KFLT|(cond))) && \ - (kflt_freemem < (kflt_throttlefree + (npages))) && \ - (kflt_create_throttle(npages, flags) == KFT_FAILURE)) ? \ - 1 : 0) - -#define KERNEL_NOT_THROTTLED(flags) (!kflt_on || !(flags & PG_KFLT)) - -#elif !defined(__sparc) - -#define KERNEL_THROTTLE_NONCRIT(npages, flags) 0 - -#define KERNEL_THROTTLE(npages, flags) - -#define KERNEL_THROTTLE_PGCREATE(npages, flags, cond) 0 - -#define KERNEL_NOT_THROTTLED(flags) 1 - -#endif - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _KFLT_MEM_H */
--- a/usr/src/uts/common/sys/mem_cage.h Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/common/sys/mem_cage.h Mon May 03 04:17:29 2010 -0700 @@ -19,12 +19,15 @@ * CDDL HEADER END */ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #ifndef _SYS_MEM_CAGE_H #define _SYS_MEM_CAGE_H +#pragma ident "%Z%%M% %I% %E% SMI" + #include <sys/types.h> #include <sys/memlist.h> @@ -82,28 +85,6 @@ extern kcage_dir_t kcage_startup_dir; -#if defined(__sparc) -/* Macros to throttle memory allocations from the kernel cage. */ - -#define KERNEL_THROTTLE_NONCRIT(npages, flags) \ - (kcage_create_throttle(1, flags) == KCT_NONCRIT) - -#define KERNEL_THROTTLE(npages, flags) \ - if (((flags) & PG_NORELOC) && \ - (kcage_freemem < (kcage_throttlefree + (npages)))) { \ - (void) kcage_create_throttle(npages, flags); \ - } - - -#define KERNEL_THROTTLE_PGCREATE(npages, flags, cond) \ - ((((flags) & (PG_NORELOC|(cond)) == (PG_NORELOC|(cond))) && \ - (kcage_freemem < (kcage_throttlefree + (npages))) && \ - (kcage_create_throttle(npages, flags) == KCT_FAILURE)) ? \ - 1 : 0) - -#define KERNEL_NOT_THROTTLED(flags) (!kcage_on || !((flags) & PG_NORELOC)) -#endif /* __sparc */ - #endif /* _KERNEL */ #ifdef __cplusplus
--- a/usr/src/uts/common/vm/page.h Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/common/vm/page.h Mon May 03 04:17:29 2010 -0700 @@ -659,8 +659,6 @@ #define PG_LOCAL 0x0080 /* alloc from given lgrp only */ #define PG_NORMALPRI 0x0100 /* PG_WAIT like priority, but */ /* non-blocking */ -#define PG_KFLT 0x0200 /* alloc from kernel page freelist */ - /* * When p_selock has the SE_EWANTED bit set, threads waiting for SE_EXCL * access are given priority over all other waiting threads. @@ -942,8 +940,6 @@ #define P_SWAP 0x10 /* belongs to vnode that is V_ISSWAP */ #define P_BOOTPAGES 0x08 /* member of bootpages list */ #define P_RAF 0x04 /* page retired at free */ -#define P_KFLT 0x02 /* allocated from kernel free list */ -#define P_USERKFLT 0x01 /* user pages from kernel free list */ #define PP_ISFREE(pp) ((pp)->p_state & P_FREE) #define PP_ISAGED(pp) (((pp)->p_state & P_FREE) && \ @@ -955,8 +951,6 @@ #define PP_ISSWAP(pp) ((pp)->p_state & P_SWAP) #define PP_ISBOOTPAGES(pp) ((pp)->p_state & P_BOOTPAGES) #define PP_ISRAF(pp) ((pp)->p_state & P_RAF) -#define PP_ISKFLT(pp) ((pp)->p_state & P_KFLT) -#define PP_ISUSERKFLT(pp) ((pp)->p_state & P_USERKFLT) #define PP_SETFREE(pp) ((pp)->p_state = ((pp)->p_state & ~P_MIGRATE) \ | P_FREE) @@ -966,8 +960,6 @@ #define PP_SETSWAP(pp) ((pp)->p_state |= P_SWAP) #define PP_SETBOOTPAGES(pp) ((pp)->p_state |= P_BOOTPAGES) #define PP_SETRAF(pp) ((pp)->p_state |= P_RAF) -#define PP_SETKFLT(pp) ((pp)->p_state |= P_KFLT) -#define PP_SETUSERKFLT(pp) ((pp)->p_state |= P_USERKFLT) #define PP_CLRFREE(pp) ((pp)->p_state &= ~P_FREE) #define PP_CLRAGED(pp) ASSERT(!PP_ISAGED(pp)) @@ -976,8 +968,6 @@ #define PP_CLRSWAP(pp) ((pp)->p_state &= ~P_SWAP) #define PP_CLRBOOTPAGES(pp) ((pp)->p_state &= ~P_BOOTPAGES) #define PP_CLRRAF(pp) ((pp)->p_state &= ~P_RAF) -#define PP_CLRKFLT(pp) ((pp)->p_state &= ~P_KFLT) -#define PP_CLRUSERKFLT(pp) ((pp)->p_state &= ~P_USERKFLT) /* * Flags for page_t p_toxic, for tracking memory hardware errors.
--- a/usr/src/uts/common/vm/vm_page.c Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/common/vm/vm_page.c Mon May 03 04:17:29 2010 -0700 @@ -59,7 +59,6 @@ #include <sys/condvar_impl.h> #include <sys/mem_config.h> #include <sys/mem_cage.h> -#include <sys/kflt_mem.h> #include <sys/kmem.h> #include <sys/atomic.h> #include <sys/strlog.h> @@ -130,7 +129,7 @@ * The free list contains those pages that should be reused first. * * The implementation of the lists is machine dependent. - * PAGE_GET_FREELISTS(), page_get_cachelist(), + * page_get_freelist(), page_get_cachelist(), * page_list_sub(), and page_list_add() * form the interface to the machine dependent implementation. * @@ -508,13 +507,10 @@ static void page_init_mem_config(void) { -#ifdef DEBUG - ASSERT(kphysm_setup_func_register(&page_mem_config_vec, - (void *)NULL) == 0); -#else /* !DEBUG */ - (void) kphysm_setup_func_register(&page_mem_config_vec, (void *)NULL); -#endif /* !DEBUG */ - + int ret; + + ret = kphysm_setup_func_register(&page_mem_config_vec, (void *)NULL); + ASSERT(ret == 0); } /* @@ -1559,9 +1555,9 @@ ASSERT(!kcage_on ? !(flags & PG_NORELOC) : 1); checkagain: - - /* Throttle kernel memory allocations if necessary */ - KERNEL_THROTTLE(npages, flags); + if ((flags & PG_NORELOC) && + kcage_freemem < kcage_throttlefree + npages) + (void) kcage_create_throttle(npages, flags); if (freemem < npages + throttlefree) if (!page_create_throttle(npages, flags)) @@ -1786,7 +1782,7 @@ */ flags |= PG_PANIC; - if ((flags & (PG_NORELOC|PG_KFLT)) != 0) { + if ((flags & PG_NORELOC) != 0) { VM_STAT_ADD(pcgs_entered_noreloc); /* * Requests for free pages from critical threads @@ -1805,8 +1801,7 @@ * kcage_freemem won't fall below minfree prior to grabbing * pages from the freelists. */ - /* LINTED */ - if (KERNEL_THROTTLE_NONCRIT(1, flags)) { + if (kcage_create_throttle(1, flags) == KCT_NONCRIT) { mutex_enter(&pcgs_cagelock); cagelocked = 1; VM_STAT_ADD(pcgs_cagelocked); @@ -1863,8 +1858,8 @@ lgrp = lgrp_mem_choose(seg, vaddr, PAGESIZE); - for (count = 0; kcage_on || kflt_on || count < MAX_PCGS; count++) { - PAGE_GET_FREELISTS(pp, vp, off, seg, vaddr, PAGESIZE, + for (count = 0; kcage_on || count < MAX_PCGS; count++) { + pp = page_get_freelist(vp, off, seg, vaddr, PAGESIZE, flags, lgrp); if (pp == NULL) { pp = page_get_cachelist(vp, off, seg, vaddr, @@ -1874,7 +1869,7 @@ /* * Serialize. Don't fight with other pcgs(). */ - if (!locked && KERNEL_NOT_THROTTLED(flags)) { + if (!locked && (!kcage_on || !(flags & PG_NORELOC))) { mutex_enter(&pcgs_lock); VM_STAT_ADD(pcgs_locked); locked = 1; @@ -2053,16 +2048,14 @@ while (npgs && szc) { lgrp = lgrp_mem_choose(seg, addr, pgsz); if (pgflags == PG_LOCAL) { - PAGE_GET_FREELISTS(pp, vp, 0, seg, addr, pgsz, + pp = page_get_freelist(vp, 0, seg, addr, pgsz, pgflags, lgrp); if (pp == NULL) { - /* LINTED */ - PAGE_GET_FREELISTS(pp, vp, 0, seg, addr, pgsz, + pp = page_get_freelist(vp, 0, seg, addr, pgsz, 0, lgrp); } } else { - /* LINTED */ - PAGE_GET_FREELISTS(pp, vp, 0, seg, addr, pgsz, + pp = page_get_freelist(vp, 0, seg, addr, pgsz, 0, lgrp); } if (pp != NULL) { @@ -2163,24 +2156,10 @@ npages = btop(bytes); - if (kflt_on && ((flags & PG_NORELOC) || VN_ISKAS(vp)) && - !panicstr) { - /* - * If the kernel freelist is active, and this is a - * kernel page or one that is non-relocatable because it - * is locked then set the PG_KFLT flag so that this page - * will be allocated from the kernel freelist and therefore - * will not fragment memory - */ - flags |= PG_KFLT; - } - if (!kcage_on || panicstr) { /* - * If the cage is off, we turn off the PG_NORELOC flag - * however if the kernel freelist is active we will use - * this to prevent memory fragmentation instead. - * In panic do not use the cage or the kernel freelist. + * Cage is OFF, or we are single threaded in + * panic, so make everything a RELOC request. */ flags &= ~PG_NORELOC; } @@ -2195,13 +2174,22 @@ } /* - * If cage or kernel freelist is on, dampen draw from cage when - * available cage space is low. + * If cage is on, dampen draw from cage when available + * cage space is low. */ - /* LINTED */ - if (KERNEL_THROTTLE_PGCREATE(npages, flags, PG_WAIT)) { - VM_STAT_ADD(page_create_large_cnt[2]); - return (NULL); + if ((flags & (PG_NORELOC | PG_WAIT)) == (PG_NORELOC | PG_WAIT) && + kcage_freemem < kcage_throttlefree + npages) { + + /* + * The cage is on, the caller wants PG_NORELOC + * pages and available cage memory is very low. + * Call kcage_create_throttle() to attempt to + * control demand on the cage. + */ + if (kcage_create_throttle(npages, flags) == KCT_FAILURE) { + VM_STAT_ADD(page_create_large_cnt[2]); + return (NULL); + } } if (!pcf_decrement_bucket(npages) && @@ -2222,9 +2210,8 @@ else lgrp = lgrp_mem_choose(seg, vaddr, bytes); - PAGE_GET_FREELISTS(rootpp, &kvp, off, seg, vaddr, - bytes, flags & ~PG_MATCH_COLOR, lgrp); - if (rootpp == NULL) { + if ((rootpp = page_get_freelist(&kvp, off, seg, vaddr, + bytes, flags & ~PG_MATCH_COLOR, lgrp)) == NULL) { page_create_putback(npages); VM_STAT_ADD(page_create_large_cnt[5]); return (NULL); @@ -2322,41 +2309,33 @@ } } - if (kflt_on && ((flags & PG_NORELOC) || VN_ISKAS(vp)) && - !panicstr) { - /* - * If the kernel freelist is active, and this is a - * kernel page or one that is non-relocatable because it - * is locked then set the PG_KFLT flag so that this page - * will be allocated from the kernel freelist and therefore - * will not fragment memory - */ - flags |= PG_KFLT; - } - if (!kcage_on || panicstr) { /* - * If the cage is off, we turn off the PG_NORELOC flag - * however if the kernel freelist is active we will use - * this to prevent memory fragmentation instead. - * In panic do not use the cage or the kernel freelist. + * Cage is OFF, or we are single threaded in + * panic, so make everything a RELOC request. */ flags &= ~PG_NORELOC; } - if ((freemem <= throttlefree + npages) && - (!page_create_throttle(npages, flags))) { + if (freemem <= throttlefree + npages) + if (!page_create_throttle(npages, flags)) return (NULL); - } /* - * If cage or kernel freelist is on, dampen draw from cage when - * available cage space is low. + * If cage is on, dampen draw from cage when available + * cage space is low. */ - - /* LINTED */ - if (KERNEL_THROTTLE_PGCREATE(npages, flags, 0)) { - return (NULL); + if ((flags & PG_NORELOC) && + kcage_freemem < kcage_throttlefree + npages) { + + /* + * The cage is on, the caller wants PG_NORELOC + * pages and available cage memory is very low. + * Call kcage_create_throttle() to attempt to + * control demand on the cage. + */ + if (kcage_create_throttle(npages, flags) == KCT_FAILURE) + return (NULL); } VM_STAT_ADD(page_create_cnt[0]); @@ -2431,7 +2410,7 @@ * the physical memory */ lgrp = lgrp_mem_choose(seg, vaddr, PAGESIZE); - PAGE_GET_FREELISTS(npp, vp, off, seg, vaddr, PAGESIZE, + npp = page_get_freelist(vp, off, seg, vaddr, PAGESIZE, flags | PG_MATCH_COLOR, lgrp); if (npp == NULL) { npp = page_get_cachelist(vp, off, seg, @@ -2578,6 +2557,7 @@ npp->p_offset = (u_offset_t)-1; page_list_add(npp, PG_FREE_LIST | PG_LIST_TAIL); page_unlock(npp); + } ASSERT(pages_req >= found_on_free); @@ -2626,9 +2606,7 @@ { page_t *tpp; pgcnt_t i, pgcnt = page_get_pagecnt(rootpp->p_szc); -#ifdef DEBUG uint_t szc = rootpp->p_szc; -#endif for (i = 0, tpp = rootpp; i < pgcnt; i++, tpp = tpp->p_next) { ASSERT(tpp->p_szc == szc); @@ -2802,9 +2780,7 @@ page_t *tpp, *rootpp = NULL; pgcnt_t pgcnt = page_get_pagecnt(pp->p_szc); pgcnt_t i; -#ifdef DEBUG uint_t szc = pp->p_szc; -#endif VM_STAT_ADD(pagecnt.pc_free_pages); TRACE_1(TR_FAC_VM, TR_PAGE_FREE_FREE, @@ -3148,9 +3124,7 @@ page_t *tpp, *rootpp = NULL; pgcnt_t pgcnt = page_get_pagecnt(pp->p_szc); pgcnt_t i, pglcks = 0; -#ifdef DEBUG uint_t szc = pp->p_szc; -#endif ASSERT(pp->p_szc != 0 && pp->p_szc < page_num_pagesizes()); @@ -3277,9 +3251,7 @@ * large pages left lying around. */ if (opp->p_szc != 0) { -#ifdef DEBUG vnode_t *ovp = opp->p_vnode; -#endif ASSERT(ovp != NULL); ASSERT(!IS_SWAPFSVP(ovp)); ASSERT(!VN_ISKAS(ovp)); @@ -3517,11 +3489,8 @@ mutex_exit(vphm); if (hold == NULL) mutex_exit(phm); -#ifdef VM_STATS - if (rc == 0) { + if (rc == 0) VM_STAT_ADD(hashin_already); - } -#endif return (rc); } @@ -5229,10 +5198,8 @@ page_t *tpp, *rootpp = pp; pfn_t pfn = page_pptonum(pp); spgcnt_t i, npgs; + uint_t szc = pp->p_szc; vnode_t *vp = pp->p_vnode; -#ifdef DEBUG - uint_t szc = pp->p_szc; -#endif ASSERT(PAGE_EXCL(pp));
--- a/usr/src/uts/common/vm/vm_pagelist.c Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/common/vm/vm_pagelist.c Mon May 03 04:17:29 2010 -0700 @@ -57,7 +57,6 @@ #include <sys/mem_config.h> #include <sys/callb.h> #include <sys/mem_cage.h> -#include <sys/kflt_mem.h> #include <sys/sdt.h> #include <sys/dumphdr.h> #include <sys/swap.h> @@ -122,18 +121,9 @@ if (++pgcpfailcnt[szc] >= PGCPFAILMAX) \ pgcpfailcnt[szc] = PGCPFAILMAX / 2; -/* - * There are two page freelist types that are supported, flt_user, the user - * page freelist type and flt_kern, the kernel page freelist type. - */ - -page_freelist_type_t flt_user; -page_freelist_type_t flt_kern; -page_freelist_type_t *ufltp = &flt_user; -page_freelist_type_t *kfltp = &flt_kern; - #ifdef VM_STATS struct vmm_vmstats_str vmm_vmstats; + #endif /* VM_STATS */ #if defined(__sparc) @@ -245,9 +235,6 @@ page_t *page_freelist_split(uchar_t, uint_t, int, int, pfn_t, pfn_t, page_list_walker_t *); page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int); -static page_t *page_get_flist(page_freelist_type_t *, uint_t, int, - uchar_t, uint_t, struct lgrp *); - static int page_trylock_cons(page_t *pp, se_t se); /* @@ -365,6 +352,7 @@ */ krwlock_t page_ctrs_rwlock[MAX_MEM_NODES]; + /* * initialize cpu_vm_data to point at cache aligned vm_cpu_data_t. */ @@ -1422,8 +1410,7 @@ * threaded), add a page to the free list and add to the * the free region counters w/o any locking */ - ASSERT(!PP_ISKFLT(pp)); - ppp = PAGE_FREELISTP(PFLT_USER, mnode, 0, bin, mtype); + ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype); /* inline version of page_add() */ if (*ppp != NULL) { @@ -1437,13 +1424,13 @@ page_ctr_add_internal(mnode, mtype, pp, flags); VM_STAT_ADD(vmm_vmstats.pladd_free[0]); } else { - pcm = PC_BIN_MUTEX(PP_ISKFLT(pp), mnode, bin, flags); + pcm = PC_BIN_MUTEX(mnode, bin, flags); if (flags & PG_FREE_LIST) { VM_STAT_ADD(vmm_vmstats.pladd_free[0]); ASSERT(PP_ISAGED(pp)); - ppp = PAGE_FREELISTP(PP_ISKFLT(pp), mnode, 0, - bin, mtype); + ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype); + } else { VM_STAT_ADD(vmm_vmstats.pladd_cache); ASSERT(pp->p_vnode); @@ -1468,16 +1455,7 @@ if (PP_ISNORELOC(pp)) { kcage_freemem_add(1); } -#elif defined(__amd64) && !defined(__xpv) - if (PP_ISKFLT(pp)) { - kflt_freemem_add(1); - if (PP_ISUSERKFLT(pp)) { - ASSERT(kflt_user_alloc > 0); - atomic_add_long(&kflt_user_alloc, -1); - PP_CLRUSERKFLT(pp); - } - } -#endif /* __sparc */ +#endif /* * It is up to the caller to unlock the page! */ @@ -1517,8 +1495,7 @@ ASSERT(pp->p_szc == 0); if (PP_ISAGED(pp)) { - ASSERT(!PP_ISKFLT(pp)); - ppp = PAGE_FREELISTP(PFLT_USER, mnode, 0, bin, mtype); + ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype); flags |= PG_FREE_LIST; } else { ppp = &PAGE_CACHELISTS(mnode, bin, mtype); @@ -1556,8 +1533,7 @@ * Get new list for page. */ if (PP_ISAGED(pp)) { - ASSERT(!PP_ISKFLT(pp)); - ppp = PAGE_FREELISTP(PFLT_USER, mnode, 0, bin, mtype); + ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype); } else { ppp = &PAGE_CACHELISTS(mnode, bin, mtype); } @@ -1615,31 +1591,25 @@ if (flags & PG_LIST_ISINIT) { ASSERT(pp->p_szc == mmu_page_sizes - 1); - page_vpadd(PAGE_FREELISTP(PFLT_USER, mnode, pp->p_szc, - bin, mtype), pp); + page_vpadd(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp); ASSERT(!PP_ISNORELOC(pp)); PLCNT_INCR(pp, mnode, mtype, pp->p_szc, flags); } else { ASSERT(pp->p_szc != 0 && pp->p_szc < mmu_page_sizes); - pcm = PC_BIN_MUTEX(PFLT_USER, mnode, bin, PG_FREE_LIST); + pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); mutex_enter(pcm); - ASSERT(!PP_ISKFLT(pp)); - page_vpadd(PAGE_FREELISTP(PFLT_USER, mnode, pp->p_szc, - bin, mtype), pp); + page_vpadd(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp); page_ctr_add(mnode, mtype, pp, PG_FREE_LIST); mutex_exit(pcm); pgcnt = page_get_pagecnt(pp->p_szc); #if defined(__sparc) - if (PP_ISNORELOC(pp)) { + if (PP_ISNORELOC(pp)) kcage_freemem_add(pgcnt); - } -#elif defined(__amd64) && !defined(__xpv) - ASSERT(!PP_ISKFLT(pp)); -#endif /* __sparc */ +#endif for (i = 0; i < pgcnt; i++, pp++) page_unlock_nocapture(pp); } @@ -1697,7 +1667,7 @@ try_again: bin = PP_2_BIN(pp); mnode = PP_2_MEM_NODE(pp); - pcm = PC_BIN_MUTEX(PP_ISKFLT(pp), mnode, bin, flags); + pcm = PC_BIN_MUTEX(mnode, bin, flags); mutex_enter(pcm); if (PP_2_BIN(pp) != bin) { mutex_exit(pcm); @@ -1708,8 +1678,7 @@ if (flags & PG_FREE_LIST) { VM_STAT_ADD(vmm_vmstats.plsub_free[0]); ASSERT(PP_ISAGED(pp)); - ppp = PAGE_FREELISTP(PP_ISKFLT(pp), mnode, pp->p_szc, - bin, mtype); + ppp = &PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype); } else { VM_STAT_ADD(vmm_vmstats.plsub_cache); ASSERT(!PP_ISAGED(pp)); @@ -1736,11 +1705,7 @@ if (PP_ISNORELOC(pp)) { kcage_freemem_sub(1); } -#elif defined(__amd64) && !defined(__xpv) - if (PP_ISKFLT(pp)) { - kflt_freemem_sub(1); - } -#endif /* __sparc */ +#endif return; } @@ -1775,16 +1740,14 @@ ASSERT(PP_ISAGED(pp)); ASSERT(pp->p_szc == 0); - /* Large pages on the kernel freelist are not supported. */ - ASSERT(!PP_ISKFLT(pp)); - /* * Subtract counters before releasing pcm mutex * to avoid race with page_freelist_coalesce. */ bin = PP_2_BIN(pp); mtype = PP_2_MTYPE(pp); - ppp = PAGE_FREELISTP(PFLT_USER, mnode, pp->p_szc, bin, mtype); + ppp = &PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype); + page_sub(ppp, pp); page_ctr_sub(mnode, mtype, pp, flags); page_freelist_unlock(mnode); @@ -1793,7 +1756,7 @@ if (PP_ISNORELOC(pp)) { kcage_freemem_sub(1); } -#endif /* __sparc */ +#endif } void @@ -1813,7 +1776,7 @@ try_again: bin = PP_2_BIN(pp); mnode = PP_2_MEM_NODE(pp); - pcm = PC_BIN_MUTEX(PP_ISKFLT(pp), mnode, bin, PG_FREE_LIST); + pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); mutex_enter(pcm); if (PP_2_BIN(pp) != bin) { mutex_exit(pcm); @@ -1842,19 +1805,16 @@ ASSERT(PP_ISAGED(pp)); ASSERT(pp->p_szc <= szc); ASSERT(pp == PP_PAGEROOT(pp)); - ASSERT(!PP_ISKFLT(pp)); VM_STAT_ADD(vmm_vmstats.plsub_free[pp->p_szc]); mtype = PP_2_MTYPE(pp); if (pp->p_szc != 0) { - page_vpsub(PAGE_FREELISTP(PFLT_USER, mnode, pp->p_szc, - bin, mtype), pp); + page_vpsub(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp); CHK_LPG(pp, pp->p_szc); } else { VM_STAT_ADD(vmm_vmstats.plsubpages_szc0); - page_sub(PAGE_FREELISTP(PFLT_USER, mnode, pp->p_szc, - bin, mtype), pp); + page_sub(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp); } page_ctr_sub(mnode, mtype, pp, PG_FREE_LIST); @@ -1871,7 +1831,7 @@ pgcnt = page_get_pagecnt(pp->p_szc); kcage_freemem_sub(pgcnt); } -#endif /* __sparc */ +#endif } /* @@ -1945,7 +1905,7 @@ static uint_t page_promote_err; static uint_t page_promote_noreloc_err; -static uint_t page_promote_kflt_err; + /* * Create a single larger page (of szc new_szc) from smaller contiguous pages * for the given mnode starting at pfnum. Pages involved are on the freelist @@ -1957,9 +1917,6 @@ * caller and put the large page on the freelist instead. * If flags is PC_FREE, then the large page will be placed on the freelist, * and NULL will be returned. - * If the PC_KFLT_EXPORT flag is set, the large page will be returned to the - * caller unlocked, as the caller is going to put it on the user page - * freelist * The caller is responsible for locking the freelist as well as any other * accounting which needs to be done for a returned page. * @@ -2047,17 +2004,6 @@ page_promote_err++; return (NULL); } - - /* - * page promote() can only legitimately be called for - * pages from the kernel freelist from the kflt_export() - * routine which sets the PC_KFLT_EXPORT flag. - */ - if (PP_ISKFLT(pp) && !(flags & PC_KFLT_EXPORT)) { - page_promote_kflt_err++; - page_promote_err++; - return (NULL); - } } pages_left = new_npgs; @@ -2079,13 +2025,11 @@ * PG_FREE_LIST */ if (pp->p_szc) { - page_vpsub(PAGE_FREELISTP(PFLT_USER, mnode, + page_vpsub(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp); } else { - ASSERT(!PP_ISKFLT(pp) || - (flags & PC_KFLT_EXPORT)); - mach_page_sub(PAGE_FREELISTP(PP_ISKFLT(pp), - mnode, 0, bin, mtype), pp); + mach_page_sub(&PAGE_FREELISTS(mnode, 0, + bin, mtype), pp); } which_list = PG_FREE_LIST; } else { @@ -2148,16 +2092,7 @@ * return the page to the user if requested * in the properly locked state. */ - if ((flags & PC_ALLOC) && (page_trylock_cons(pplist, SE_EXCL))) { - return (pplist); - } - - /* - * If the PC_KFLT_EXPORT flag is set, kflt_export() is just going to - * return this large page to the user page freelist, so there is no - * need to lock it. - */ - if (flags & PC_KFLT_EXPORT) { + if (flags == PC_ALLOC && (page_trylock_cons(pplist, SE_EXCL))) { return (pplist); } @@ -2167,8 +2102,7 @@ bin = PP_2_BIN(pplist); mnode = PP_2_MEM_NODE(pplist); mtype = PP_2_MTYPE(pplist); - page_vpadd(PAGE_FREELISTP(PFLT_USER, mnode, new_szc, - bin, mtype), pplist); + page_vpadd(&PAGE_FREELISTS(mnode, new_szc, bin, mtype), pplist); page_ctr_add(mnode, mtype, pplist, PG_FREE_LIST); return (NULL); @@ -2189,9 +2123,7 @@ pp->p_szc = 0; bin = PP_2_BIN(pp); mtype = PP_2_MTYPE(pp); - ASSERT(!PP_ISKFLT(pp)); - mach_page_add(PAGE_FREELISTP(PFLT_USER, mnode, - 0, bin, mtype), pp); + mach_page_add(&PAGE_FREELISTS(mnode, 0, bin, mtype), pp); page_ctr_add(mnode, mtype, pp, PG_FREE_LIST); } return (NULL); @@ -2227,13 +2159,11 @@ ASSERT(pplist != NULL); ASSERT(pplist->p_szc == cur_szc); - ASSERT(!PP_ISKFLT(pplist)); bin = PP_2_BIN(pplist); ASSERT(mnode == PP_2_MEM_NODE(pplist)); mtype = PP_2_MTYPE(pplist); - page_vpsub(PAGE_FREELISTP(PFLT_USER, mnode, cur_szc, - bin, mtype), pplist); + page_vpsub(&PAGE_FREELISTS(mnode, cur_szc, bin, mtype), pplist); CHK_LPG(pplist, cur_szc); page_ctr_sub(mnode, mtype, pplist, PG_FREE_LIST); @@ -2266,9 +2196,8 @@ ret_pp = pp; } else { mtype = PP_2_MTYPE(pp); - ASSERT(!PP_ISKFLT(pp)); - mach_page_add(PAGE_FREELISTP(PFLT_USER, mnode, - 0, bin, mtype), pp); + mach_page_add(&PAGE_FREELISTS(mnode, 0, bin, + mtype), pp); page_ctr_add(mnode, mtype, pp, PG_FREE_LIST); } } else { @@ -2313,8 +2242,8 @@ ret_pp = try_to_return_this_page; } else { mtype = PP_2_MTYPE(pp); - page_vpadd(PAGE_FREELISTP(PFLT_USER, mnode, - new_szc, bin, mtype), pplist); + page_vpadd(&PAGE_FREELISTS(mnode, new_szc, + bin, mtype), pplist); page_ctr_add(mnode, mtype, pplist, PG_FREE_LIST); @@ -2348,6 +2277,7 @@ #if defined(__sparc) pfn_t pfnum0, nlo, nhi; #endif + if (mpss_coalesce_disable) { ASSERT(szc < MMU_PAGE_SIZES); VM_STAT_ADD(vmm_vmstats.page_ctrs_coalesce[szc][0]); @@ -2505,21 +2435,11 @@ npgs = page_get_pagecnt(ret_pp->p_szc); kcage_freemem_sub(npgs); } -#elif defined(__amd64) && !defined(__xpv) - /* - * Only a single page size is supported on - * the kernel freelist. This will need to - * be changed to increase the availability - * of more than one large page size. - */ - ASSERT(!PP_ISKFLT(ret_pp)); -#endif /* __sparc */ +#endif return (ret_pp); } -#ifdef VM_STATS } else { VM_STAT_ADD(vmm_vmstats.page_ctrs_changed[r][mrange]); -#endif } page_freelist_unlock(mnode); @@ -2685,10 +2605,9 @@ /* * If page found then demote it. */ - if (PAGE_FREELISTS(PFLT_USER, mnode, nszc, bin, mtype)) { + if (PAGE_FREELISTS(mnode, nszc, bin, mtype)) { page_freelist_lock(mnode); - firstpp = pp = PAGE_FREELISTS(PFLT_USER, mnode, - nszc, bin, mtype); + firstpp = pp = PAGE_FREELISTS(mnode, nszc, bin, mtype); /* * If pfnhi is not PFNNULL, look for large page below @@ -2732,9 +2651,7 @@ ret_pp->p_szc); kcage_freemem_sub(npgs); } -#elif defined(__amd64) && !defined(__xpv) - ASSERT(!PP_ISKFLT(pp)); -#endif /* __sparc */ +#endif return (ret_pp); } } @@ -2892,42 +2809,6 @@ plw->plw_bins[1] = 0; plw->plw_ceq_mask[1] = INVALID_MASK; } - ASSERT(bin < plw->plw_colors); -} - -/* - * Walker variables for the kernel freelist are initialized so that all - * kernel page colors are treated as equivalent. This mimimizes the amount - * of memory used by the the kernel freelist. - */ -/* ARGSUSED */ -void -page_kflt_walk_init(uchar_t szc, uint_t flags, uint_t bin, int can_split, - int use_ceq, page_list_walker_t *plw) -{ - /* - * Note that the following values are only valid for pages with - * szc == 0. - */ - ASSERT(szc == 0); - - /* The number of colors for kernel pages */ - plw->plw_colors = KFLT_PAGE_COLORS; - plw->plw_color_mask = KFLT_PAGE_COLORS - 1; - - /* The marker indicates when at all the bins have been processed */ - plw->plw_bin_marker = plw->plw_bin0 = bin; - plw->plw_bin_split_prev = bin; - - /* Add plw_bin_step to get the next bin to process */ - plw->plw_bin_step = vac_colors; - - /* There is only 1 color group i.e. all colors are equivalent */ - plw->plw_ceq_dif = 1; - plw->plw_ceq_mask[0] = 0; - plw->plw_do_split = 0; - - ASSERT(bin < plw->plw_colors); } /* @@ -3030,8 +2911,8 @@ } page_t * -page_get_mnode_freelist(page_freelist_type_t *fp, int mnode, uint_t bin, - int mtype, uchar_t szc, uint_t flags) +page_get_mnode_freelist(int mnode, uint_t bin, int mtype, uchar_t szc, + uint_t flags) { kmutex_t *pcm; page_t *pp, *first_pp; @@ -3049,6 +2930,7 @@ return (NULL); } try_again: + plw_initialized = 0; plw.plw_ceq_dif = 1; @@ -3061,19 +2943,14 @@ plw.plw_count < plw.plw_ceq_dif; plw.plw_count++) { sbin = bin; do { - if (!PAGE_FREELISTS(PC_ISKFLT(fp), mnode, szc, - bin, mtype)) { + if (!PAGE_FREELISTS(mnode, szc, bin, mtype)) goto bin_empty_1; - } - - pcm = PC_BIN_MUTEX(PC_ISKFLT(fp), mnode, bin, - PG_FREE_LIST); + + pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); mutex_enter(pcm); - pp = PAGE_FREELISTS(PC_ISKFLT(fp), mnode, szc, - bin, mtype); - if (pp == NULL) { + pp = PAGE_FREELISTS(mnode, szc, bin, mtype); + if (pp == NULL) goto bin_empty_0; - } /* * These were set before the page @@ -3125,10 +3002,10 @@ ASSERT(mtype == PP_2_MTYPE(pp)); ASSERT(pp->p_szc == szc); if (szc == 0) { - page_sub(PAGE_FREELISTP(PC_ISKFLT(fp), mnode, + page_sub(&PAGE_FREELISTS(mnode, szc, bin, mtype), pp); } else { - page_vpsub(PAGE_FREELISTP(PC_ISKFLT(fp), mnode, + page_vpsub(&PAGE_FREELISTS(mnode, szc, bin, mtype), pp); CHK_LPG(pp, szc); } @@ -3144,12 +3021,7 @@ if (PP_ISNORELOC(pp)) kcage_freemem_sub(page_get_pagecnt(szc)); -#elif defined(__amd64) && !defined(__xpv) - if (PP_ISKFLT(pp)) { - ASSERT(szc == 0); - kflt_freemem_sub(1); - } -#endif /* __sparc */ +#endif VM_STAT_ADD(vmm_vmstats.pgmf_allocok[szc]); return (pp); @@ -3157,7 +3029,7 @@ mutex_exit(pcm); bin_empty_1: if (plw_initialized == 0) { - PAGE_LIST_WALK_INIT(fp, szc, flags, bin, 1, 1, + page_list_walk_init(szc, flags, bin, 1, 1, &plw); plw_initialized = 1; ASSERT(plw.plw_colors <= @@ -3171,7 +3043,6 @@ /* calculate the next bin with equivalent color */ bin = ADD_MASKED(bin, plw.plw_bin_step, plw.plw_ceq_mask[szc], plw.plw_color_mask); - } while (sbin != bin); /* @@ -3193,7 +3064,7 @@ return (pp); if (plw.plw_ceq_dif > 1) - bin = PAGE_LIST_WALK_NEXT(fp, szc, bin, &plw); + bin = page_list_walk_next_bin(szc, bin, &plw); } /* if allowed, cycle through additional mtypes */ @@ -3320,17 +3191,6 @@ } return (0); } - if (PP_ISKFLT(pp)) { - VM_STAT_ADD(vmm_vmstats.ptcpfailkflt[szc]); - ASSERT(i == 0); - while (i != (pgcnt_t)-1) { - pp = &spp[i]; - ASSERT(PAGE_EXCL(pp)); - page_unlock_nocapture(pp); - i--; - } - return (0); - } } VM_STAT_ADD(vmm_vmstats.ptcpok[szc]); return (1); @@ -3355,7 +3215,6 @@ while (pgcnt) { ASSERT(PAGE_EXCL(pp)); ASSERT(!PP_ISNORELOC(pp)); - ASSERT(!PP_ISKFLT(pp)); if (PP_ISFREE(pp)) { /* * If this is a PG_FREE_LIST page then its @@ -3457,7 +3316,6 @@ ASSERT(PAGE_EXCL(targpp)); ASSERT(!PP_ISFREE(targpp)); ASSERT(!PP_ISNORELOC(targpp)); - ASSERT(!PP_ISKFLT(targpp)); PP_SETFREE(targpp); ASSERT(PP_ISAGED(targpp)); ASSERT(targpp->p_szc < szc || (szc == 0 && @@ -3484,7 +3342,6 @@ * Trim kernel cage from pfnlo-pfnhi and store result in lo-hi. Return code * of 0 means nothing left after trim. */ -/* LINTED */ int trimkcage(struct memseg *mseg, pfn_t *lo, pfn_t *hi, pfn_t pfnlo, pfn_t pfnhi) { @@ -3547,12 +3404,14 @@ * * 'pfnflag' specifies the subset of the pfn range to search. */ + static page_t * page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags, pfn_t pfnlo, pfn_t pfnhi, pgcnt_t pfnflag) { struct memseg *mseg; pgcnt_t szcpgcnt = page_get_pagecnt(szc); + pgcnt_t szcpgmask = szcpgcnt - 1; pfn_t randpfn; page_t *pp, *randpp, *endpp; uint_t colors, ceq_mask; @@ -3561,16 +3420,13 @@ pfn_t hi, lo; uint_t skip; MEM_NODE_ITERATOR_DECL(it); -#ifdef DEBUG - pgcnt_t szcpgmask = szcpgcnt - 1; -#endif ASSERT(szc != 0 || (flags & PGI_PGCPSZC0)); + pfnlo = P2ROUNDUP(pfnlo, szcpgcnt); - if ((pfnhi - pfnlo) + 1 < szcpgcnt || pfnlo >= pfnhi) { + if ((pfnhi - pfnlo) + 1 < szcpgcnt || pfnlo >= pfnhi) return (NULL); - } ASSERT(szc < mmu_page_sizes); @@ -3611,9 +3467,8 @@ szcpages = ((pfnhi - pfnlo) + 1) / szcpgcnt; slotlen = howmany(szcpages, slots); /* skip if 'slotid' slot is empty */ - if (slotid * slotlen >= szcpages) { + if (slotid * slotlen >= szcpages) return (NULL); - } pfnlo = pfnlo + (((slotid * slotlen) % szcpages) * szcpgcnt); ASSERT(pfnlo < pfnhi); if (pfnhi > pfnlo + (slotlen * szcpgcnt)) @@ -3716,12 +3571,6 @@ ASSERT(!(pp->p_pagenum & szcpgmask)); ASSERT(((PP_2_BIN(pp) ^ bin) & ceq_mask) == 0); - /* Skip over pages on the kernel freelist */ - if (PP_ISKFLT(pp)) { - pp += skip; - goto skip_contig; - } - if (page_trylock_contig_pages(mnode, pp, szc, flags)) { /* pages unlocked by page_claim on failure */ if (page_claim_contig_pages(pp, szc, flags)) { @@ -3744,7 +3593,6 @@ (pfn - mseg->pages_base); } } -skip_contig: if (pp >= endpp) { /* start from the beginning */ MEM_NODE_ITERATOR_INIT(lo, mnode, szc, &it); @@ -3758,6 +3606,7 @@ return (NULL); } + /* * controlling routine that searches through physical memory in an attempt to * claim a large page based on the input parameters. @@ -3773,10 +3622,9 @@ * for PGI_PGCPSZC0 requests, page_get_contig_pages will relocate a base * pagesize page that satisfies mtype. */ -/* ARGSUSED */ page_t * -page_get_contig_pages(page_freelist_type_t *fp, int mnode, uint_t bin, - int mtype, uchar_t szc, uint_t flags) +page_get_contig_pages(int mnode, uint_t bin, int mtype, uchar_t szc, + uint_t flags) { pfn_t pfnlo, pfnhi; /* contig pages pfn range */ page_t *pp; @@ -3809,6 +3657,7 @@ do { /* get pfn range based on mnode and mtype */ MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi); + ASSERT(pfnhi >= pfnlo); pp = page_geti_contig_pages(mnode, bin, szc, flags, @@ -3872,10 +3721,137 @@ page_get_freelist(struct vnode *vp, u_offset_t off, struct seg *seg, caddr_t vaddr, size_t size, uint_t flags, struct lgrp *lgrp) { - page_t *pp; - - PAGE_GET_FREELISTS(pp, vp, off, seg, vaddr, size, flags, lgrp); - return (pp); + struct as *as = seg->s_as; + page_t *pp = NULL; + ulong_t bin; + uchar_t szc; + int mnode; + int mtype; + page_t *(*page_get_func)(int, uint_t, int, uchar_t, uint_t); + lgrp_mnode_cookie_t lgrp_cookie; + + page_get_func = page_get_mnode_freelist; + + /* + * If we aren't passed a specific lgroup, or passed a freed lgrp + * assume we wish to allocate near to the current thread's home. + */ + if (!LGRP_EXISTS(lgrp)) + lgrp = lgrp_home_lgrp(); + + if (kcage_on) { + if ((flags & (PG_NORELOC | PG_PANIC)) == PG_NORELOC && + kcage_freemem < kcage_throttlefree + btop(size) && + curthread != kcage_cageout_thread) { + /* + * Set a "reserve" of kcage_throttlefree pages for + * PG_PANIC and cageout thread allocations. + * + * Everybody else has to serialize in + * page_create_get_something() to get a cage page, so + * that we don't deadlock cageout! + */ + return (NULL); + } + } else { + flags &= ~PG_NORELOC; + flags |= PGI_NOCAGE; + } + + /* LINTED */ + MTYPE_INIT(mtype, vp, vaddr, flags, size); + + /* + * Convert size to page size code. + */ + if ((szc = page_szc(size)) == (uchar_t)-1) + panic("page_get_freelist: illegal page size request"); + ASSERT(szc < mmu_page_sizes); + + VM_STAT_ADD(vmm_vmstats.pgf_alloc[szc]); + + /* LINTED */ + AS_2_BIN(as, seg, vp, vaddr, bin, szc); + + ASSERT(bin < PAGE_GET_PAGECOLORS(szc)); + + /* + * Try to get a local page first, but try remote if we can't + * get a page of the right color. + */ +pgretry: + LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_LOCAL); + while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { + pp = page_get_func(mnode, bin, mtype, szc, flags); + if (pp != NULL) { + VM_STAT_ADD(vmm_vmstats.pgf_allocok[szc]); + DTRACE_PROBE4(page__get, + lgrp_t *, lgrp, + int, mnode, + ulong_t, bin, + uint_t, flags); + return (pp); + } + } + ASSERT(pp == NULL); + + /* + * for non-SZC0 PAGESIZE requests, check cachelist before checking + * remote free lists. Caller expected to call page_get_cachelist which + * will check local cache lists and remote free lists. + */ + if (szc == 0 && ((flags & PGI_PGCPSZC0) == 0)) { + VM_STAT_ADD(vmm_vmstats.pgf_allocdeferred); + return (NULL); + } + + ASSERT(szc > 0 || (flags & PGI_PGCPSZC0)); + + lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1); + + if (!(flags & PG_LOCAL)) { + /* + * Try to get a non-local freelist page. + */ + LGRP_MNODE_COOKIE_UPGRADE(lgrp_cookie); + while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { + pp = page_get_func(mnode, bin, mtype, szc, flags); + if (pp != NULL) { + DTRACE_PROBE4(page__get, + lgrp_t *, lgrp, + int, mnode, + ulong_t, bin, + uint_t, flags); + VM_STAT_ADD(vmm_vmstats.pgf_allocokrem[szc]); + return (pp); + } + } + ASSERT(pp == NULL); + } + + /* + * when the cage is off chances are page_get_contig_pages() will fail + * to lock a large page chunk therefore when the cage is off it's not + * called by default. this can be changed via /etc/system. + * + * page_get_contig_pages() also called to acquire a base pagesize page + * for page_create_get_something(). + */ + if (!(flags & PG_NORELOC) && (pg_contig_disable == 0) && + (kcage_on || pg_lpgcreate_nocage || szc == 0) && + (page_get_func != page_get_contig_pages)) { + + VM_STAT_ADD(vmm_vmstats.pgf_allocretry[szc]); + page_get_func = page_get_contig_pages; + goto pgretry; + } + + if (!(flags & PG_LOCAL) && pgcplimitsearch && + page_get_func == page_get_contig_pages) + SETPGCPFAILCNT(szc); + + VM_STAT_ADD(vmm_vmstats.pgf_allocfailed[szc]); + return (NULL); } /* @@ -3929,7 +3905,7 @@ } /* LINTED */ - AS_2_BIN(PFLT_USER, as, seg, vp, vaddr, bin, 0); + AS_2_BIN(as, seg, vp, vaddr, bin, 0); ASSERT(bin < PAGE_GET_PAGECOLORS(0)); @@ -3964,7 +3940,7 @@ */ LGRP_MNODE_COOKIE_UPGRADE(lgrp_cookie); while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { - pp = page_get_mnode_freelist(ufltp, mnode, bin, mtype, + pp = page_get_mnode_freelist(mnode, bin, mtype, 0, flags); if (pp != NULL) { VM_STAT_ADD(vmm_vmstats.pgc_allocokdeferred); @@ -4027,16 +4003,7 @@ if (!PAGE_CACHELISTS(mnode, bin, mtype)) goto bin_empty_1; - /* - * The first parameter is irrelevant here as the flags - * parameter to this macro decides which mutex to lock. - * With the PG_CACHE_LIST flag, we lock the cpc_mutex[]. - * - * User pages from the kernel page freelist may be - * on the cachelist. - */ - pcm = PC_BIN_MUTEX(PFLT_USER, mnode, bin, - PG_CACHE_LIST); + pcm = PC_BIN_MUTEX(mnode, bin, PG_CACHE_LIST); mutex_enter(pcm); pp = PAGE_CACHELISTS(mnode, bin, mtype); if (pp == NULL) @@ -4096,11 +4063,7 @@ if (PP_ISNORELOC(pp)) { kcage_freemem_sub(1); } -#elif defined(__amd64) && !defined(__xpv) - if (PP_ISKFLT(pp)) { - kflt_freemem_sub(1); - } -#endif /* __sparc */ +#endif VM_STAT_ADD(vmm_vmstats. pgmc_allocok); return (pp); } @@ -4239,8 +4202,8 @@ (mnode = lgrp_memnode_choose(&lgrp_cookie)) != -1) { pplist = - page_get_mnode_freelist(ufltp, - mnode, bin, mtype, szc, flags); + page_get_mnode_freelist(mnode, bin, + mtype, szc, flags); } /* @@ -4280,7 +4243,7 @@ * First try the local freelist... */ mnode = PP_2_MEM_NODE(like_pp); - pplist = page_get_mnode_freelist(ufltp, mnode, bin, + pplist = page_get_mnode_freelist(mnode, bin, mtype, szc, flags); if (pplist != NULL) break; @@ -4319,13 +4282,14 @@ (mem_node_config[mnode].exists == 0)) continue; - pplist = page_get_mnode_freelist(ufltp, mnode, + pplist = page_get_mnode_freelist(mnode, bin, mtype, szc, flags); } if (pplist != NULL) break; + /* Now try remote cachelists */ LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_HIER); @@ -4373,7 +4337,7 @@ lgrp_memnode_choose(&lgrp_cookie)) != -1) { pplist = page_get_contig_pages( - ufltp, mnode, bin, mtype, szc, + mnode, bin, mtype, szc, flags | PGI_PGCPHIPRI); } break; @@ -4482,481 +4446,3 @@ } } } - -/* - * The freelist type data structures allow freelist type specific allocation - * and policy routines to be configured. There are two freelist types currently - * defined, one for kernel memory allocation and the the other for user memory. - * The page_get_uflt() routine is called by the PAGE_GET_FREELISTS() macro to - * allocate memory from the user freelist type. - */ - -/* ARGSUSED */ -page_t * -page_get_uflt(struct vnode *vp, u_offset_t off, struct seg *seg, caddr_t vaddr, - size_t size, uint_t flags, struct lgrp *lgrp) -{ - struct as *as = seg->s_as; - ulong_t bin; - uchar_t szc; - int mtype; - - /* - * If we aren't passed a specific lgroup, or passed a freed lgrp - * assume we wish to allocate near the current thread's home. - */ - if (!LGRP_EXISTS(lgrp)) - lgrp = lgrp_home_lgrp(); - - if (kcage_on) { - if ((flags & (PG_NORELOC | PG_PANIC)) == PG_NORELOC && - kcage_freemem < kcage_throttlefree + btop(size) && - curthread != kcage_cageout_thread) { - /* - * Set a "reserve" of kcage_throttlefree pages for - * PG_PANIC and cageout thread allocations. - * - * Everybody else has to serialize in - * page_create_get_something() to get a cage page, so - * that we don't deadlock cageout! - */ - return (NULL); - } - } else { - flags &= ~PG_NORELOC; - flags |= PGI_NOCAGE; - } - - /* LINTED */ - MTYPE_INIT(mtype, vp, vaddr, flags, size); - - /* - * Convert size to page size code. - */ - if ((szc = page_szc(size)) == (uchar_t)-1) - panic("page_get_uflt: illegal page size request"); - ASSERT(szc < mmu_page_sizes); - - VM_STAT_ADD(vmm_vmstats.pgf_alloc[szc][ufltp->pflt_type]); - - /* LINTED */ - AS_2_BIN(PFLT_USER, as, seg, vp, vaddr, bin, szc); - - ASSERT(bin < PAGE_GET_PAGECOLORS(szc)); - - return (page_get_flist(ufltp, bin, mtype, szc, flags, lgrp)); -} - -/* - * This routine is passed a page color and inital mtype, and calls the page - * freelist type policy routines which actually do the allocations, first - * trying the local and then remote lgroups. The policy routines for user - * page allocations are currently configured to be: - * - * x64 systems support two freelist types, user and kernel. - * - * The user freelist has 3 policy routines. - * - * 1. page_get_mnode_freelist to allocate a page from the user freelists. - * 2. page_user_alloc_kflt to allocate a page from the kernel freelists - * 3. page_get_contig_pages to search for a large page in physical memory. - * - * The kernel freelist has only 1 policy routine. - * - * 1. page_get_mnode_freelist to allocate a page from the kernel freelists. - * - * Sparc, x32 and Xen, systems support only the user freelist type. - * - * The user freelist has 2 policy routines. - * - * 1. page_get_mnode_freelist to allocate a page from the user freelists. - * 2. page_get_contig_pages to search for a large page in physical memory. - * - */ -page_t * -page_get_flist(page_freelist_type_t *fltp, uint_t bin, int mtype, - uchar_t szc, uint_t flags, struct lgrp *lgrp) -{ - page_t *pp = NULL; - page_t *(*page_get_func)(page_freelist_type_t *, - int, uint_t, int, uchar_t, uint_t); - lgrp_mnode_cookie_t lgrp_cookie; - int i; - int mnode; - - for (i = 0; i < fltp->pflt_num_policies; i++) { - page_get_func = PAGE_GET_FREELISTS_POLICY(fltp, i); - - /* - * when the cage and the kernel freelist are off chances are - * that page_get_contig_pages() will fail to lock a large - * page chunk therefore in this case it's not called by - * default. This can be changed via /etc/system. - * - * page_get_contig_pages() also called to acquire a base - * pagesize page for page_create_get_something(). - */ - if (page_get_func == page_get_contig_pages) { - if ((flags & PG_NORELOC) || - (pg_contig_disable != 0) || - (!kcage_on && !kflt_on && - !pg_lpgcreate_nocage && szc != 0)) { - continue; -#ifdef VM_STATS - } else { - VM_STAT_ADD( - vmm_vmstats. - pgf_allocretry[szc][fltp->pflt_type]); -#endif - } - } - - /* - * Try to get a local page first, but try remote if we can't - * get a page of the right color. - */ - LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_LOCAL); - while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { - - pp = page_get_func(fltp, mnode, bin, mtype, szc, - flags); - if (pp != NULL) { -#ifdef VM_STATS - VM_STAT_ADD( - vmm_vmstats. - pgf_allocok[szc][fltp->pflt_type]); -#endif - DTRACE_PROBE4(page__get__page, - lgrp_t *, lgrp, - int, mnode, - ulong_t, bin, - uint_t, flags); - return (pp); - } - } - ASSERT(pp == NULL); - - /* - * for non-PGI_PGCPSZC0 PAGESIZE requests, check cachelist - * before checking remote free lists. Caller expected to call - * page_get_cachelist which will check local cache lists - * and remote free lists. - */ - if (!PC_ISKFLT(fltp) && szc == 0 && - ((flags & PGI_PGCPSZC0) == 0)) { - VM_STAT_ADD(vmm_vmstats.pgf_allocdeferred); - return (NULL); - } - - ASSERT(PC_ISKFLT(fltp) || szc > 0 || (flags & PGI_PGCPSZC0)); - - lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1); - - if (!(flags & PG_LOCAL)) { - /* - * Try to get a non-local freelist page. - */ - LGRP_MNODE_COOKIE_UPGRADE(lgrp_cookie); - while ((mnode = - lgrp_memnode_choose(&lgrp_cookie)) >= 0) { - pp = page_get_func(fltp, mnode, bin, mtype, - szc, flags); - if (pp != NULL) { - DTRACE_PROBE4(page__get, - lgrp_t *, lgrp, - int, mnode, - ulong_t, bin, - uint_t, flags); -#ifdef VM_STATS - VM_STAT_ADD(vmm_vmstats. - pgf_allocokrem[szc] - [fltp->pflt_type]); -#endif - return (pp); - } - } - ASSERT(pp == NULL); - } - - if (!(flags & PG_LOCAL) && pgcplimitsearch && - page_get_func == page_get_contig_pages) - SETPGCPFAILCNT(szc); - } - -#ifdef VM_STATS - VM_STAT_ADD(vmm_vmstats.pgf_allocfailed[szc][fltp->pflt_type]); -#endif - - return (NULL); -} -#if defined(__amd64) && !defined(__xpv) -/* - * The page_get_kflt() routine is called by the PAGE_GET_FREELISTS() macro to - * allocate memory from the kernel freelist type. - */ -/* ARGSUSED */ -page_t * -page_get_kflt(struct vnode *vp, u_offset_t off, struct seg *seg, caddr_t vaddr, - size_t size, uint_t flags, struct lgrp *lgrp) -{ - struct as *as = seg->s_as; - page_t *pp = NULL; - ulong_t bin; - uchar_t szc; - int mtype; - - ASSERT(!kcage_on); - ASSERT(kflt_on); - ASSERT((flags & PG_KFLT) == PG_KFLT); - - flags &= ~PG_NORELOC; - flags |= PGI_NOCAGE; - - if ((flags & PG_PANIC) == 0 && - kflt_freemem < kflt_throttlefree + btop(size) && - curthread != kflt_evict_thread) { - return (NULL); - } - - /* LINTED */ - MTYPE_INIT(mtype, vp, vaddr, flags, size); - - /* - * If we aren't passed a specific lgroup, or passed a freed lgrp - * assume we wish to allocate near to the current thread's home. - */ - if (!LGRP_EXISTS(lgrp)) - lgrp = lgrp_home_lgrp(); - - /* - * Convert size to page size code. - */ - if ((szc = page_szc(size)) == (uchar_t)-1) - panic("page_get_kflt: illegal page size request"); - ASSERT(szc == 0); - ASSERT(!(flags & PG_LOCAL)); - - VM_STAT_ADD(vmm_vmstats.pgf_alloc[szc][kfltp->pflt_type]); - - /* LINTED */ - AS_2_BIN(PFLT_KMEM, as, seg, vp, vaddr, bin, szc); - - ASSERT(bin < PAGE_GET_PAGECOLORS(szc)); - ASSERT(bin < KFLT_PAGE_COLORS); - -retry: - pp = page_get_flist(kfltp, bin, mtype, szc, flags, lgrp); - - if (pp != NULL) { - return (pp); - } - -#if defined(__amd64) - if (kernel_page_update_flags_x86(&flags)) { - goto retry; - } -#endif - /* - * Import memory from user page freelists. - */ - - /* LINTED: constant in conditional context */ - AS_2_BIN(PFLT_USER, as, seg, vp, vaddr, bin, KFLT_PAGESIZE); - - ASSERT(bin < PAGE_GET_PAGECOLORS(KFLT_PAGESIZE)); - - if ((pp = page_import_kflt(kfltp, bin, mtype, szc, - flags | PGI_NOPGALLOC | PGI_PGCPHIPRI, NULL)) != NULL) { - VM_STAT_ADD(vmm_vmstats.pgf_allocok[szc][kfltp->pflt_type]); - return (pp); - } - - VM_STAT_ADD(vmm_vmstats.pgf_allocfailed[szc][kfltp->pflt_type]); - return (NULL); -} - -/* - * This is the policy routine used to allocate user memory on the kernel - * freelist. - */ -/* ARGSUSED */ -page_t * -page_user_alloc_kflt(page_freelist_type_t *fp, int mnode, uint_t bin, int mtype, - uchar_t szc, uint_t flags) -{ - page_t *pp; - - if (szc != 0) - return (NULL); - - if (kflt_freemem < kflt_desfree) { - kflt_evict_wakeup(); - } - flags &= ~PG_MATCH_COLOR; - - bin = USER_2_KMEM_BIN(bin); - - if ((pp = page_get_mnode_freelist(kfltp, mnode, - bin, mtype, szc, flags)) != NULL) { - VM_STAT_ADD(vmm_vmstats.puak_allocok); - atomic_add_long(&kflt_user_alloc, 1); - PP_SETUSERKFLT(pp); - return (pp); - } - - VM_STAT_ADD(vmm_vmstats.puak_allocfailed); - return (NULL); -} - -/* - * This routine is called in order to allocate a large page from the user page - * freelist and split this into small pages which are then placed on the kernel - * freelist. If it is is called from kflt_expand() routine the PGI_NOPGALLOC - * flag is set to indicate that all pages should be placed on the freelist, - * otherwise a page of the requested type and color will be returned. - */ -/* ARGSUSED */ -page_t * -page_import_kflt(page_freelist_type_t *fp, uint_t bin, int mtype, - uchar_t szc, uint_t flags, int *np) -{ - page_t *pp, *pplist; - uint_t alloc_szc = KFLT_PAGESIZE; - kmutex_t *pcm; - page_t *ret_pp = NULL; - uint_t req_bin = bin; - int req_mtype = mtype; - int pgcnt = 0; - int pgalloc; - int mnode; - struct lgrp *lgrp; - - ASSERT(szc == 0); - - flags &= ~(PG_LOCAL|PG_MATCH_COLOR); - lgrp = lgrp_home_lgrp(); - - pgalloc = ((flags & PGI_NOPGALLOC) == 0); - - /* Allocate a large page from the user pagelist */ - if ((pplist = page_get_flist(ufltp, bin, mtype, alloc_szc, - flags, lgrp)) != NULL) { - - VM_STAT_ADD(vmm_vmstats.pgik_allocok); - CHK_LPG(pplist, alloc_szc); - mnode = PP_2_MEM_NODE(pplist); - /* - * Split up the large page and put the constituent pages - * on the kernel freelist. - */ - while (pplist) { - pgcnt++; - pp = pplist; - ASSERT(pp->p_szc == alloc_szc); - ASSERT(PP_ISFREE(pp)); - mach_page_sub(&pplist, pp); - - pp->p_szc = 0; - PP_SETKFLT(pp); - mtype = PP_2_MTYPE(pp); - bin = PP_2_BIN(pp); - if (pgalloc && (ret_pp == NULL) && - ((bin == req_bin && mtype == req_mtype))) { - ret_pp = pp; - } else { - pcm = PC_BIN_MUTEX(PFLT_KMEM, mnode, bin, - PG_FREE_LIST); - ASSERT(mtype == PP_2_MTYPE(pp)); - mutex_enter(pcm); - mach_page_add(PAGE_FREELISTP(PFLT_KMEM, mnode, - 0, bin, mtype), pp); - page_ctr_add(mnode, mtype, pp, PG_FREE_LIST); - mutex_exit(pcm); - page_unlock(pp); - } - } - - if (np != NULL) - *np = pgcnt; - - if (ret_pp == NULL) { - kflt_freemem_add(pgcnt); - } else { - kflt_freemem_add(pgcnt - 1); - } - return (ret_pp); - - } else { - - VM_STAT_ADD(vmm_vmstats.pgik_allocfailed); - return (NULL); - } -} - -/* - * This routine is called from the kflt_user_evict() thread when kernel - * memory is low and the thread has not managed to increase it by freeing up - * user pages - */ -void -kflt_expand() -{ - ulong_t bin; - int mtype; - uint_t flags; - spgcnt_t wanted; - caddr_t vaddr; - int np; - int lpallocated = 0; - int retries; - - ASSERT(kflt_on); - vaddr = 0; - flags = PGI_NOPGALLOC | PGI_PGCPHIPRI; - - wanted = MAX(kflt_lotsfree, kflt_throttlefree + kflt_needfree) - - kflt_freemem; - - if (wanted <= 0) { - return; - } - - /* LINTED */ - MTYPE_INIT(mtype, &kvp, vaddr, flags, KFLT_PAGESIZE); - -#if defined(__amd64) - (void) kernel_page_update_flags_x86(&flags); -#endif - /* LINTED */ - AS_2_BIN(PFLT_USER, &kas, NULL, &kvp, vaddr, bin, 1); - - retries = 0; - while (kflt_on && wanted > 0) { - (void) page_import_kflt(kfltp, bin, mtype, 0, - flags, &np); - - if (np == 0) { - if (lpallocated == 0 && - retries < KFLT_EXPAND_RETRIES) { - retries++; - ASSERT((flags & (PGI_NOPGALLOC | PGI_PGCPHIPRI)) - == (PGI_NOPGALLOC | PGI_PGCPHIPRI)); - continue; - } - break; - } else { - wanted -= np; - lpallocated = 1; - } - - } - -#ifdef DEBUG - if (lpallocated) { - VM_STAT_ADD(vmm_vmstats.pgkx_allocok); - } else { - VM_STAT_ADD(vmm_vmstats.pgkx_allocfailed); - } -#endif -} -#endif /* __amd64 && !__xpv */
--- a/usr/src/uts/i86pc/Makefile.files Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/i86pc/Makefile.files Mon May 03 04:17:29 2010 -0700 @@ -75,7 +75,6 @@ kdi_idt.o \ kdi_idthdl.o \ kdi_asm.o \ - kflt_mem.o \ lgrpplat.o \ mach_kdi.o \ mach_sysconfig.o \
--- a/usr/src/uts/i86pc/os/startup.c Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/i86pc/os/startup.c Mon May 03 04:17:29 2010 -0700 @@ -119,7 +119,6 @@ #include <sys/ddi_timer.h> #include <sys/systeminfo.h> #include <sys/multiboot.h> -#include <sys/kflt_mem.h> #ifdef __xpv @@ -234,14 +233,6 @@ caddr_t rm_platter_va = 0; uint32_t rm_platter_pa; -/* - * On 64 bit systems enable the kernel page freelist - */ -#if defined(__amd64) && !defined(__xpv) -int kflt_disable = 0; -#else -int kflt_disable = 1; -#endif /* __amd64 && !__xpv */ int auto_lpg_disable = 1; /* @@ -2196,13 +2187,6 @@ #endif /* - * Create the kernel page freelist management thread for x64 systems. - */ - if (!kflt_disable) { - kflt_init(); - } - - /* * Configure the system. */ PRM_POINT("Calling configure()...");
--- a/usr/src/uts/i86pc/vm/kflt_mem.c Sun May 02 21:47:48 2010 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,990 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2010, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/thread.h> -#include <sys/proc.h> -#include <sys/callb.h> -#include <sys/vnode.h> -#include <sys/debug.h> -#include <sys/systm.h> /* for bzero */ -#include <sys/memlist.h> -#include <sys/cmn_err.h> -#include <sys/sysmacros.h> -#include <sys/vmsystm.h> /* for NOMEMWAIT() */ -#include <sys/atomic.h> /* used to update kflt_freemem */ -#include <sys/kmem.h> /* for kmem_reap */ -#include <sys/errno.h> -#include <sys/kflt_mem.h> -#include <vm/seg_kmem.h> -#include <vm/page.h> -#include <vm/hat.h> -#include <vm/vm_dep.h> -#include <sys/mem_config.h> -#include <sys/lgrp.h> -#include <sys/rwlock.h> -#include <sys/cpupart.h> - -#ifdef DEBUG -#define KFLT_STATS -#endif - -#ifdef KFLT_STATS - -#define KFLT_STATS_VERSION 1 /* can help report generators */ -#define KFLT_STATS_NSCANS 256 /* depth of scan statistics buffer */ - -struct kflt_stats_scan { - /* managed by KFLT_STAT_* macros */ - clock_t scan_lbolt; - uint_t scan_id; - - /* set in kflt_user_evict() */ - uint_t kt_passes; - clock_t kt_ticks; - pgcnt_t kt_kflt_freemem_start; - pgcnt_t kt_kflt_freemem_end; - pgcnt_t kt_kflt_user_alloc_start; - pgcnt_t kt_kflt_user_alloc_end; - pgcnt_t kt_pfn_start; - pgcnt_t kt_pfn_end; - pgcnt_t kt_mnode_start; - pgcnt_t kt_mnode_end; - uint_t kt_examined; - uint_t kt_cantlock; - uint_t kt_skiplevel; - uint_t kt_skipshared; - uint_t kt_skiprefd; - uint_t kt_destroy; - - /* set in kflt_invalidate_page() */ - uint_t kip_reloclocked; - uint_t kip_relocmod; - uint_t kip_destroy; - uint_t kip_nomem; - uint_t kip_demotefailed; - - /* set in kflt_export */ - uint_t kex_lp; - uint_t kex_err; - uint_t kex_scan; -}; - -struct kflt_stats { - /* managed by KFLT_STAT_* macros */ - uint_t version; - uint_t size; - - /* set in kflt_evict_thread */ - uint_t kt_wakeups; - uint_t kt_scans; - uint_t kt_evict_break; - - /* set in kflt_create_throttle */ - uint_t kft_calls; - uint_t kft_user_evict; - uint_t kft_critical; - uint_t kft_exempt; - uint_t kft_wait; - uint_t kft_progress; - uint_t kft_noprogress; - uint_t kft_timeout; - - /* managed by KFLT_STAT_* macros */ - uint_t scan_array_size; - uint_t scan_index; - struct kflt_stats_scan scans[KFLT_STATS_NSCANS]; -}; - -static struct kflt_stats kflt_stats; -static struct kflt_stats_scan kflt_stats_scan_zero; - -/* - * No real need for atomics here. For the most part the incs and sets are - * done by the kernel freelist thread. There are a few that are done by any - * number of other threads. Those cases are noted by comments. - */ -#define KFLT_STAT_INCR(m) kflt_stats.m++ - -#define KFLT_STAT_NINCR(m, v) kflt_stats.m += (v) - -#define KFLT_STAT_INCR_SCAN(m) \ - KFLT_STAT_INCR(scans[kflt_stats.scan_index].m) - -#define KFLT_STAT_NINCR_SCAN(m, v) \ - KFLT_STAT_NINCR(scans[kflt_stats.scan_index].m, v) - -#define KFLT_STAT_SET(m, v) kflt_stats.m = (v) - -#define KFLT_STAT_SETZ(m, v) \ - if (kflt_stats.m == 0) kflt_stats.m = (v) - -#define KFLT_STAT_SET_SCAN(m, v) \ - KFLT_STAT_SET(scans[kflt_stats.scan_index].m, v) - -#define KFLT_STAT_SETZ_SCAN(m, v) \ - KFLT_STAT_SETZ(scans[kflt_stats.scan_index].m, v) - -#define KFLT_STAT_INC_SCAN_INDEX \ - KFLT_STAT_SET_SCAN(scan_lbolt, ddi_get_lbolt()); \ - KFLT_STAT_SET_SCAN(scan_id, kflt_stats.scan_index); \ - kflt_stats.scan_index = \ - (kflt_stats.scan_index + 1) % KFLT_STATS_NSCANS; \ - kflt_stats.scans[kflt_stats.scan_index] = kflt_stats_scan_zero - -#define KFLT_STAT_INIT_SCAN_INDEX \ - kflt_stats.version = KFLT_STATS_VERSION; \ - kflt_stats.size = sizeof (kflt_stats); \ - kflt_stats.scan_array_size = KFLT_STATS_NSCANS; \ - kflt_stats.scan_index = 0 - -#else /* KFLT_STATS */ - -#define KFLT_STAT_INCR(v) -#define KFLT_STAT_NINCR(m, v) -#define KFLT_STAT_INCR_SCAN(v) -#define KFLT_STAT_NINCR_SCAN(m, v) -#define KFLT_STAT_SET(m, v) -#define KFLT_STAT_SETZ(m, v) -#define KFLT_STAT_SET_SCAN(m, v) -#define KFLT_STAT_SETZ_SCAN(m, v) -#define KFLT_STAT_INC_SCAN_INDEX -#define KFLT_STAT_INIT_SCAN_INDEX - -#endif /* KFLT_STATS */ - -/* Internal Routines */ -void kflt_init(void); -void kflt_evict_wakeup(void); -static boolean_t kflt_evict_cpr(void *, int); -static void kflt_thread_init(void); -static pfn_t kflt_get_next_pfn(int *, pfn_t); -static void kflt_user_evict(void); -static int kflt_invalidate_page(page_t *, pgcnt_t *); -static int kflt_relocate_page(page_t *, pgcnt_t *); - -extern mnoderange_t *mnoderanges; -extern int mnoderangecnt; -void wakeup_pcgs(void); - -page_t *page_promote(int, pfn_t, uchar_t, int, int); - -static kcondvar_t kflt_evict_cv; /* evict thread naps here */ -static kmutex_t kflt_evict_mutex; /* protects cv and ready flag */ -static int kflt_evict_ready; /* nonzero when evict thread ready */ -kthread_id_t kflt_evict_thread; /* to aid debugging */ -static kmutex_t kflt_throttle_mutex; /* protects kflt_throttle_cv */ -static kcondvar_t kflt_throttle_cv; - -/* - * Statistics used to drive the behavior of the evict demon. - */ -pgcnt_t kflt_freemem; /* free memory on kernel freelist */ -pgcnt_t kflt_needfree; /* memory requirement for throttled threads */ -pgcnt_t kflt_lotsfree; /* export free kernel memory if > lotsfree */ -pgcnt_t kflt_desfree; /* wakeup evict thread if freemem < desfree */ -pgcnt_t kflt_minfree; /* keep scanning if freemem < minfree */ -pgcnt_t kflt_user_alloc; /* user memory allocated on kernel freelist */ -pgcnt_t kflt_throttlefree; /* throttle non-critical threads */ -pgcnt_t kflt_reserve; /* don't throttle real time if > reserve */ - /* time in seconds to check on throttled threads */ -int kflt_maxwait = 10; - -int kflt_on = 0; /* indicates evict thread is initialised */ - -/* - * This is called before a CPR suspend and after a CPR resume. We have to - * turn off kflt_evict before a suspend, and turn it back on after a - * restart. - */ -/*ARGSUSED*/ -static boolean_t -kflt_evict_cpr(void *arg, int code) -{ - if (code == CB_CODE_CPR_CHKPT) { - ASSERT(kflt_evict_ready); - kflt_evict_ready = 0; - return (B_TRUE); - } else if (code == CB_CODE_CPR_RESUME) { - ASSERT(kflt_evict_ready == 0); - kflt_evict_ready = 1; - return (B_TRUE); - } - return (B_FALSE); -} - -/* - * Sets up kernel freelist related statistics and starts the evict thread. - */ -void -kflt_init(void) -{ - ASSERT(!kflt_on); - - if (kflt_disable) { - return; - } - - mutex_init(&kflt_evict_mutex, NULL, MUTEX_DEFAULT, NULL); - cv_init(&kflt_evict_cv, NULL, CV_DEFAULT, NULL); - - if (kflt_lotsfree == 0) - kflt_lotsfree = MAX(32, total_pages / 128); - - if (kflt_minfree == 0) - kflt_minfree = MAX(32, kflt_lotsfree / 4); - - if (kflt_desfree == 0) - kflt_desfree = MAX(32, kflt_minfree); - - if (kflt_throttlefree == 0) - kflt_throttlefree = MAX(32, kflt_minfree / 2); - - if (kflt_reserve == 0) - kflt_reserve = MAX(32, kflt_throttlefree / 2); - - (void) callb_add(kflt_evict_cpr, NULL, CB_CL_CPR_POST_KERNEL, - "kflt_evict_thread"); - - kflt_on = 1; - kflt_thread_init(); -} - -/* - * Wakeup kflt_user_evict thread and throttle waiting for the number of pages - * requested to become available. For non-critical requests, a - * timeout is added, since freemem accounting is separate from kflt - * freemem accounting: it's possible for us to get stuck and not make - * forward progress even though there was sufficient freemem before - * arriving here. - */ -int -kflt_create_throttle(pgcnt_t npages, int flags) -{ - int niter = 0; - pgcnt_t lastfree; - int enough = kflt_freemem > kflt_throttlefree + npages; - - KFLT_STAT_INCR(kft_calls); /* unprotected incr. */ - - kflt_evict_wakeup(); /* just to be sure */ - KFLT_STAT_INCR(kft_user_evict); /* unprotected incr. */ - - /* - * Obviously, we can't throttle the evict thread since - * we depend on it. We also can't throttle the panic thread. - */ - if (curthread == kflt_evict_thread || - !kflt_evict_ready || panicstr) { - KFLT_STAT_INCR(kft_user_evict); /* unprotected incr. */ - return (KFT_CRIT); - } - - /* - * Don't throttle threads which are critical for proper - * vm management if we're above kfLt_throttlefree or - * if freemem is very low. - */ - if (NOMEMWAIT()) { - if (enough) { - KFLT_STAT_INCR(kft_exempt); /* unprotected incr. */ - return (KFT_CRIT); - } else if (freemem < minfree) { - KFLT_STAT_INCR(kft_critical); /* unprotected incr. */ - return (KFT_CRIT); - } - } - - /* - * Don't throttle real-time threads if kflt_freemem > kflt_reserve. - */ - if (DISP_PRIO(curthread) > maxclsyspri && - kflt_freemem > kflt_reserve) { - KFLT_STAT_INCR(kft_exempt); /* unprotected incr. */ - return (KFT_CRIT); - } - - /* - * Cause all other threads (which are assumed to not be - * critical to kflt_user_evict) to wait here until their request - * can be satisfied. Be a little paranoid and wake the - * kernel evict thread on each loop through this logic. - */ - while (kflt_freemem < kflt_throttlefree + npages) { - ASSERT(kflt_on); - - lastfree = kflt_freemem; - - if (kflt_evict_ready) { - mutex_enter(&kflt_throttle_mutex); - - kflt_needfree += npages; - KFLT_STAT_INCR(kft_wait); - - kflt_evict_wakeup(); - KFLT_STAT_INCR(kft_user_evict); - - cv_wait(&kflt_throttle_cv, &kflt_throttle_mutex); - - kflt_needfree -= npages; - - mutex_exit(&kflt_throttle_mutex); - } else { - /* - * NOTE: atomics are used just in case we enter - * mp operation before the evict thread is ready. - */ - atomic_add_long(&kflt_needfree, npages); - - kflt_evict_wakeup(); - KFLT_STAT_INCR(kft_user_evict); /* unprotected incr. */ - - atomic_add_long(&kflt_needfree, -npages); - } - - if ((flags & PG_WAIT) == 0) { - if (kflt_freemem > lastfree) { - KFLT_STAT_INCR(kft_progress); - niter = 0; - } else { - KFLT_STAT_INCR(kft_noprogress); - if (++niter >= kflt_maxwait) { - KFLT_STAT_INCR(kft_timeout); - return (KFT_FAILURE); - } - } - } - - if (NOMEMWAIT() && freemem < minfree) { - return (KFT_CRIT); - } - - } - return (KFT_NONCRIT); -} -/* - * Creates the kernel freelist evict thread. - */ -static void -kflt_thread_init(void) -{ - if (kflt_on) { - if (thread_create(NULL, 0, kflt_user_evict, - NULL, 0, &p0, TS_RUN, maxclsyspri - 1) == NULL) { - kflt_on = 0; - } - } -} - -/* - * This routine is used by the kernel freelist evict thread to iterate over the - * pfns. - */ -static pfn_t -kflt_get_next_pfn(int *mnode, pfn_t pfn) -{ - ASSERT((*mnode >= 0) && (*mnode <= mnoderangecnt)); - ASSERT((pfn == PFN_INVALID) || (pfn >= mnoderanges[*mnode].mnr_pfnlo)); - - if (pfn == PFN_INVALID) { - *mnode = 0; - pfn = mnoderanges[0].mnr_pfnlo; - return (pfn); - } - - pfn++; - if (pfn > mnoderanges[*mnode].mnr_pfnhi) { - (*mnode)++; - if (*mnode >= mnoderangecnt) { - return (PFN_INVALID); - } - pfn = mnoderanges[*mnode].mnr_pfnlo; - } - return (pfn); -} -/* - * Locks all the kernel page freelist mutexes before promoting a group of pages - * and returning the large page to the user page freelist. - */ -void -page_kflt_lock(int mnode) -{ - int i; - for (i = 0; i < NPC_MUTEX; i++) { - mutex_enter(KFPC_MUTEX(mnode, i)); - } -} - -/* - * Unlocks all the kernel page freelist mutexes after promoting a group of pages - * and returning the large page to the user page freelist. - */ -void -page_kflt_unlock(int mnode) -{ - int i; - for (i = 0; i < NPC_MUTEX; i++) { - mutex_exit(KFPC_MUTEX(mnode, i)); - } -} - -/* - * This routine is called by the kflt_user_evict() thread whenever a free page - * is found on the kernel page freelist and there is an excess of free memory on - * the kernel freelist. It determines whether it is possible to promote groups - * of small free pages into a large page which can then be returned to the - * user page freelist. - */ -static int -kflt_export(page_t *pp, int init_state) -{ - static page_t *lp_base = 0; - static pfn_t lp_base_page_num = 0; - static pgcnt_t lp_count = 0; - page_t *tpp; - page_t *lpp; - pfn_t lp_page_num; - int mtype; - int mnode; - int bin; - pgcnt_t pages_left, npgs; - uchar_t new_szc = KFLT_PAGESIZE; - int ret; - kmutex_t *pcm; - - - /* - * We're not holding any locks yet, so pp state may change. - */ - if (init_state || !PP_ISFREE(pp) || !PP_ISKFLT(pp)) { - lp_base = NULL; - lp_base_page_num = 0; - lp_count = 0; - return (0); - } - - ret = 0; - npgs = page_get_pagecnt(new_szc); - lp_page_num = PFN_BASE(pp->p_pagenum, new_szc); - - /* Count pages with the same large page base */ - if (lp_page_num == lp_base_page_num) { - ASSERT((pp->p_pagenum - lp_base_page_num) < npgs); - ASSERT(lp_count < npgs); - lp_count++; - if (lp_count == npgs) { - KFLT_STAT_INCR_SCAN(kex_lp); - ASSERT(lp_base != NULL); - mnode = PP_2_MEM_NODE(pp); - page_kflt_lock(mnode); - - /* - * Check that all pages are still free and on the kernel - * freelist. - */ - for (tpp = lp_base, pages_left = npgs; pages_left; - tpp++, pages_left--) { - if (!PP_ISFREE(tpp) || !PP_ISKFLT(tpp)) { - page_kflt_unlock(mnode); - KFLT_STAT_INCR_SCAN(kex_err); - goto out; - } - } - - lpp = page_promote(PP_2_MEM_NODE(lp_base), - lp_base_page_num, new_szc, PC_KFLT_EXPORT, - PP_2_MTYPE(lp_base)); - page_kflt_unlock(mnode); - -#ifdef KFLT_STATS - if (lpp == NULL) - VM_STAT_ADD(vmm_vmstats.pgexportfail); -#endif - if (lpp != NULL) { - VM_STAT_ADD(vmm_vmstats.pgexportok); - /* clear kflt bit in each page */ - tpp = lpp; - do { - ASSERT(PP_ISKFLT(tpp)); - ASSERT(PP_ISFREE(tpp)); - PP_CLRKFLT(tpp); - tpp = tpp->p_next; - } while (tpp != lpp); - - /* - * Return large page to the user page - * freelist - */ - atomic_add_long(&kflt_freemem, -npgs); - bin = PP_2_BIN(lpp); - mnode = PP_2_MEM_NODE(lpp); - mtype = PP_2_MTYPE(lpp); - pcm = PC_FREELIST_BIN_MUTEX(PFLT_USER, mnode, - bin, 0); - mutex_enter(pcm); - page_vpadd(PAGE_FREELISTP(PFLT_USER, mnode, - new_szc, bin, mtype), lpp); - mutex_exit(pcm); - ret = 1; - } - } - } else { -out: - lp_base = pp; - lp_base_page_num = lp_page_num; - lp_count = 1; - } - return (ret); -} - -/* - * This thread is woken up whenever pages are added or removed from the kernel - * page freelist and free memory on this list is low, or when there is excess - * memory on the kernel freelist. It iterates over the physical pages in the - * system and has two main tasks: - * - * 1) Relocate user pages which have been allocated on the kernel page freelist - * wherever this is possible. - * - * 2) Identify groups of free pages on the kernel page freelist which can be - * promoted to large pages and then exported to the user page freelist. - */ -static void -kflt_user_evict(void) -{ - pfn_t pfn; - int mnode; - page_t *pp = NULL; - callb_cpr_t cprinfo; - int pass; - int last_pass; - int did_something; - int scan_again; - int pages_skipped; - int shared_skipped; - ulong_t shared_level = 8; - pgcnt_t nfreed; - int prm; - pfn_t start_pfn; - int pages_scanned; - int pages_skipped_thresh = 20; - int shared_skipped_thresh = 20; - clock_t kflt_export_scan_start = 0; - int kflt_export_scan; - clock_t scan_start; - int kflt_min_scan_delay = (hz * 60); - int kflt_max_scan_delay = kflt_min_scan_delay * 5; - int kflt_scan_delay = kflt_min_scan_delay; - - ASSERT(kflt_on); - CALLB_CPR_INIT(&cprinfo, &kflt_evict_mutex, - callb_generic_cpr, "kflt_user_evict"); - - mutex_enter(&kflt_evict_mutex); - kflt_evict_thread = curthread; - - pfn = PFN_INVALID; /* force scan reset */ - start_pfn = PFN_INVALID; /* force init with 1st pfn */ - mnode = 0; - kflt_evict_ready = 1; - -loop: - CALLB_CPR_SAFE_BEGIN(&cprinfo); - cv_wait(&kflt_evict_cv, &kflt_evict_mutex); - CALLB_CPR_SAFE_END(&cprinfo, &kflt_evict_mutex); - - scan_start = ddi_get_lbolt(); - kflt_export_scan = 0; - if (kflt_freemem > kflt_lotsfree) { - /* Force a delay between kflt export scans */ - if ((scan_start - kflt_export_scan_start) > - kflt_scan_delay) { - kflt_export_scan = 1; - kflt_export_scan_start = scan_start; - KFLT_STAT_SET_SCAN(kex_scan, 1); - } - } - - KFLT_STAT_INCR(kt_wakeups); - KFLT_STAT_SET_SCAN(kt_kflt_user_alloc_start, kflt_user_alloc); - KFLT_STAT_SET_SCAN(kt_pfn_start, pfn); - KFLT_STAT_SET_SCAN(kt_kflt_freemem_start, kflt_freemem); - KFLT_STAT_SET_SCAN(kt_mnode_start, mnode); - pass = 0; - last_pass = 0; - - -again: - did_something = 0; - pages_skipped = 0; - shared_skipped = 0; - pages_scanned = 0; - - KFLT_STAT_INCR(kt_scans); - KFLT_STAT_INCR_SCAN(kt_passes); - - /* - * There are two conditions which drive the loop - - * - * 1. If we have too much free memory then it may be possible to - * export some large pages back to the user page freelist. - * - * 2. If a large number of user pages have been allocated from the - * kernel freelist then we try to relocate them. - */ - - while ((kflt_export_scan || kflt_needfree || - (kflt_freemem < kflt_lotsfree && kflt_user_alloc)) && - ((pfn = kflt_get_next_pfn(&mnode, pfn)) != PFN_INVALID)) { - if (start_pfn == PFN_INVALID) { - start_pfn = pfn; - } else if (start_pfn == pfn) { - last_pass = pass; - pass += 1; - - /* initialize internal state in kflt_export() */ - (void) kflt_export(pp, 1); - /* - * Did a complete walk of kernel freelist, but didn't - * free any pages. - */ - if (cp_default.cp_ncpus == 1 && did_something == 0) { - KFLT_STAT_INCR(kt_evict_break); - break; - } - did_something = 0; - } - pages_scanned = 1; - - pp = page_numtopp_nolock(pfn); - if (pp == NULL) { - continue; - } - - KFLT_STAT_INCR_SCAN(kt_examined); - - if (!PP_ISKFLT(pp)) - continue; - - if (kflt_export_scan) { - if (PP_ISFREE(pp) && kflt_export(pp, 0)) { - did_something = 1; - } - continue; - } - - if (!kflt_user_alloc) { - continue; - } - - if (PP_ISKAS(pp) || !page_trylock(pp, SE_EXCL)) { - KFLT_STAT_INCR_SCAN(kt_cantlock); - continue; - } - - /* Check that the page is in the same state after locking */ - if (PP_ISFREE(pp) || PP_ISKAS(pp)) { - page_unlock(pp); - continue; - } - - KFLT_STAT_SET_SCAN(kt_skiplevel, shared_level); - if (hat_page_checkshare(pp, shared_level)) { - page_unlock(pp); - pages_skipped++; - shared_skipped++; - KFLT_STAT_INCR_SCAN(kt_skipshared); - continue; - } - - prm = hat_pagesync(pp, - HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD); - - /* On first pass ignore ref'd pages */ - if (pass <= 1 && (prm & P_REF)) { - page_unlock(pp); - KFLT_STAT_INCR_SCAN(kt_skiprefd); - continue; - } - - /* On pass 2, VN_DISPOSE if mod bit is not set */ - if (pass <= 2) { - if (pp->p_szc != 0 || (prm & P_MOD) || - pp->p_lckcnt || pp->p_cowcnt) { - page_unlock(pp); - } else { - /* - * unload the mappings before - * checking if mod bit is set - */ - (void) hat_pageunload(pp, - HAT_FORCE_PGUNLOAD); - - /* - * skip this page if modified - */ - if (hat_ismod(pp)) { - pages_skipped++; - page_unlock(pp); - continue; - } - - /* LINTED: constant in conditional context */ - VN_DISPOSE(pp, B_INVAL, 0, kcred); - KFLT_STAT_INCR_SCAN(kt_destroy); - did_something = 1; - } - continue; - } - - if (kflt_invalidate_page(pp, &nfreed) == 0) { - did_something = 1; - } - - /* - * No need to drop the page lock here. - * kflt_invalidate_page has done that for us - * either explicitly or through a page_free. - */ - } - - /* - * Scan again if we need more memory from the kernel - * freelist or user memory allocations from the kernel freelist - * are too high. - */ - scan_again = 0; - if (kflt_freemem < kflt_minfree || kflt_needfree) { - if (pass <= 3 && kflt_user_alloc && pages_scanned && - pages_skipped > pages_skipped_thresh) { - scan_again = 1; - } else { - /* - * We need to allocate more memory to the kernel - * freelist. - */ - kflt_expand(); - } - } else if (kflt_freemem < kflt_lotsfree && kflt_user_alloc) { - ASSERT(pages_scanned); - if (pass <= 2 && pages_skipped > pages_skipped_thresh) - scan_again = 1; - if (pass == last_pass || did_something) - scan_again = 1; - else if (shared_skipped > shared_skipped_thresh && - shared_level < (8<<24)) { - shared_level <<= 1; - scan_again = 1; - } - } else if (kflt_export_scan) { - /* - * The delay between kflt export scans varies between a minimum - * of 60 secs and a maximum of 5 mins. The delay is set to the - * minimum if a page is promoted during a scan and increased - * otherwise. - */ - if (did_something) { - kflt_scan_delay = kflt_min_scan_delay; - } else if (kflt_scan_delay < kflt_max_scan_delay) { - kflt_scan_delay += kflt_min_scan_delay; - } - } - - if (scan_again && cp_default.cp_ncpus > 1) { - goto again; - } else { - if (shared_level > 8) - shared_level >>= 1; - - KFLT_STAT_SET_SCAN(kt_pfn_end, pfn); - KFLT_STAT_SET_SCAN(kt_mnode_end, mnode); - KFLT_STAT_SET_SCAN(kt_kflt_user_alloc_end, kflt_user_alloc); - KFLT_STAT_SET_SCAN(kt_kflt_freemem_end, kflt_freemem); - KFLT_STAT_SET_SCAN(kt_ticks, ddi_get_lbolt() - scan_start); - KFLT_STAT_INC_SCAN_INDEX; - goto loop; - } - -} - -/* - * Relocate page opp (Original Page Pointer) from kernel page freelist to page - * rpp * (Replacement Page Pointer) on the user page freelist. Page opp will be - * freed if relocation is successful, otherwise it is only unlocked. - * On entry, page opp must be exclusively locked and not free. - * *nfreedp: number of pages freed. - */ -static int -kflt_relocate_page(page_t *pp, pgcnt_t *nfreedp) -{ - page_t *opp = pp; - page_t *rpp = NULL; - spgcnt_t npgs; - int result; - - ASSERT(!PP_ISFREE(opp)); - ASSERT(PAGE_EXCL(opp)); - - result = page_relocate(&opp, &rpp, 1, 1, &npgs, NULL); - *nfreedp = npgs; - if (result == 0) { - while (npgs-- > 0) { - page_t *tpp; - - ASSERT(rpp != NULL); - tpp = rpp; - page_sub(&rpp, tpp); - page_unlock(tpp); - } - - ASSERT(rpp == NULL); - - return (0); /* success */ - } - - page_unlock(opp); - return (result); -} - -/* - * Based on page_invalidate_pages() - * - * Kflt_invalidate_page() uses page_relocate() twice. Both instances - * of use must be updated to match the new page_relocate() when it - * becomes available. - * - * Return result of kflt_relocate_page or zero if page was directly freed. - * *nfreedp: number of pages freed. - */ -static int -kflt_invalidate_page(page_t *pp, pgcnt_t *nfreedp) -{ - int result; - - ASSERT(!PP_ISFREE(pp)); - ASSERT(PAGE_EXCL(pp)); - - /* - * Is this page involved in some I/O? shared? - * The page_struct_lock need not be acquired to - * examine these fields since the page has an - * "exclusive" lock. - */ - if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { - result = kflt_relocate_page(pp, nfreedp); -#ifdef KFLT_STATS - if (result == 0) - KFLT_STAT_INCR_SCAN(kip_reloclocked); - else if (result == ENOMEM) - KFLT_STAT_INCR_SCAN(kip_nomem); -#endif - return (result); - } - - ASSERT(pp->p_vnode->v_type != VCHR); - - /* - * Unload the mappings and check if mod bit is set. - */ - (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); - - if (hat_ismod(pp)) { - result = kflt_relocate_page(pp, nfreedp); -#ifdef KFLT_STATS - if (result == 0) - KFLT_STAT_INCR_SCAN(kip_relocmod); - else if (result == ENOMEM) - KFLT_STAT_INCR_SCAN(kip_nomem); -#endif - return (result); - } - - if (!page_try_demote_pages(pp)) { - KFLT_STAT_INCR_SCAN(kip_demotefailed); - page_unlock(pp); - return (EAGAIN); - } - - /* LINTED: constant in conditional context */ - VN_DISPOSE(pp, B_INVAL, 0, kcred); - KFLT_STAT_INCR_SCAN(kip_destroy); - *nfreedp = 1; - return (0); -} - -void -kflt_evict_wakeup(void) -{ - if (mutex_tryenter(&kflt_evict_mutex)) { - if (kflt_evict_ready && (kflt_freemem > kflt_lotsfree || - (kflt_freemem < kflt_desfree && kflt_user_alloc) || - kflt_needfree)) { - cv_signal(&kflt_evict_cv); - } - mutex_exit(&kflt_evict_mutex); - } - /* else, kflt thread is already running */ -} - -void -kflt_freemem_sub(pgcnt_t npages) -{ - atomic_add_long(&kflt_freemem, -npages); - - ASSERT(kflt_freemem >= 0); - - if (kflt_evict_ready && - (kflt_freemem > kflt_lotsfree || - kflt_freemem < kflt_desfree || kflt_needfree)) { - kflt_evict_wakeup(); - } -} - -void -kflt_freemem_add(pgcnt_t npages) -{ - atomic_add_long(&kflt_freemem, npages); - - wakeup_pcgs(); /* wakeup threads in pcgs() */ - - if (kflt_evict_ready && kflt_needfree && - kflt_freemem >= (kflt_throttlefree + kflt_needfree)) { - mutex_enter(&kflt_throttle_mutex); - cv_broadcast(&kflt_throttle_cv); - mutex_exit(&kflt_throttle_mutex); - } -} - -void -kflt_tick() -{ - /* - * Once per second we wake up all the threads throttled - * waiting for kernel freelist memory, in case we've become stuck - * and haven't made forward progress expanding the kernel freelist. - */ - if (kflt_on && kflt_evict_ready) - cv_broadcast(&kflt_throttle_cv); -}
--- a/usr/src/uts/i86pc/vm/vm_dep.h Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/i86pc/vm/vm_dep.h Mon May 03 04:17:29 2010 -0700 @@ -19,7 +19,8 @@ * CDDL HEADER END */ /* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ /* * Copyright (c) 2010, Intel Corporation. @@ -56,7 +57,6 @@ extern hrtime_t randtick(); extern uint_t page_create_update_flags_x86(uint_t); -extern int kernel_page_update_flags_x86(uint_t *); extern size_t plcnt_sz(size_t); #define PLCNT_SZ(ctrs_sz) (ctrs_sz = plcnt_sz(ctrs_sz)) @@ -99,36 +99,6 @@ #define MNODE_MAX_MRANGE(mnode) memrange_num(mem_node_config[mnode].physbase) /* - * combined memory ranges from mnode and memranges[] to manage single - * mnode/mtype dimension in the page lists. - */ -typedef struct { - pfn_t mnr_pfnlo; - pfn_t mnr_pfnhi; - int mnr_mnode; - int mnr_memrange; /* index into memranges[] */ - int mnr_next; /* next lower PA mnoderange */ - int mnr_exists; - /* maintain page list stats */ - pgcnt_t mnr_mt_clpgcnt; /* cache list cnt */ - pgcnt_t mnr_mt_flpgcnt[MMU_PAGE_SIZES]; /* free list cnt per szc */ - pgcnt_t mnr_mt_totcnt; /* sum of cache and free lists */ -#ifdef DEBUG - struct mnr_mts { /* mnode/mtype szc stats */ - pgcnt_t mnr_mts_pgcnt; - int mnr_mts_colors; - pgcnt_t *mnr_mtsc_pgcnt; - } *mnr_mts; -#endif -} mnoderange_t; - -#define MEMRANGEHI(mtype) \ - (((mtype) > 0) ? memranges[(mtype) - 1] - 1: physmax) -#define MEMRANGELO(mtype) (memranges[(mtype)]) - -#define MTYPE_FREEMEM(mt) (mnoderanges[(mt)].mnr_mt_totcnt) - -/* * This was really badly defined, it implicitly uses mnode_maxmrange[] * which is a static in vm_pagelist.c */ @@ -137,127 +107,16 @@ (mnode_maxmrange[mnode] - mtype_2_mrange(mtype)) /* - * this structure is used for walking free page lists, it - * controls when to split large pages into smaller pages, - * and when to coalesce smaller pages into larger pages - */ -typedef struct page_list_walker { - uint_t plw_colors; /* num of colors for szc */ - uint_t plw_color_mask; /* colors-1 */ - uint_t plw_bin_step; /* next bin: 1 or 2 */ - uint_t plw_count; /* loop count */ - uint_t plw_bin0; /* starting bin */ - uint_t plw_bin_marker; /* bin after initial jump */ - uint_t plw_bin_split_prev; /* last bin we tried to split */ - uint_t plw_do_split; /* set if OK to split */ - uint_t plw_split_next; /* next bin to split */ - uint_t plw_ceq_dif; /* number of different color groups */ - /* to check */ - uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */ - uint_t plw_bins[MMU_PAGE_SIZES + 1]; /* num of bins */ -} page_list_walker_t; - -/* - * Page freelists are organized as two freelist types user and kernel, with - * their own policy and allocation routines. The definitions related to the - * freelist type structure are grouped below. - * - * The page size free lists themselves are allocated dynamically with + * Per page size free lists. Allocated dynamically. * dimensions [mtype][mmu_page_sizes][colors] * * mtype specifies a physical memory range with a unique mnode. */ -#define MAX_PFLT_POLICIES 3 -#define MAX_PFLT_TYPE 2 -enum freelist_types {PFLT_USER, PFLT_KMEM}; - -/* - * The kernel only needs a small number of page colors, far fewer than user - * programs. - */ -#define KFLT_PAGE_COLORS 16 - -typedef struct page_freelist_type page_freelist_type_t; -extern page_freelist_type_t flt_user; -extern page_freelist_type_t flt_kern; -extern page_freelist_type_t *ufltp; -extern page_freelist_type_t *kfltp; - -void page_flt_init(page_freelist_type_t *, page_freelist_type_t *); -page_t *page_get_uflt(struct vnode *, u_offset_t, struct seg *, caddr_t, - size_t, uint_t, struct lgrp *); -page_t *page_get_kflt(struct vnode *, u_offset_t, struct seg *, caddr_t, - size_t, uint_t, struct lgrp *); -void page_kflt_walk_init(uchar_t, uint_t, uint_t, int, int, - page_list_walker_t *); -uint_t page_kflt_walk_next_bin(uchar_t, uint_t, page_list_walker_t *); -page_t *page_import_kflt(page_freelist_type_t *, uint_t, int, uchar_t, - uint_t, int *); -page_t *page_user_alloc_kflt(page_freelist_type_t *, int, uint_t, int, uchar_t, - uint_t); -void kflt_expand(void); - -typedef page_t *(*pflt_get_func_p) (struct vnode *, u_offset_t, struct seg *, - caddr_t, size_t, uint_t, lgrp_t *); -typedef page_t *(*pflt_policy_func_p)(page_freelist_type_t *, int, uint_t, int, - uchar_t, uint_t); -typedef void (*pflt_list_walk_init_func_p)(uchar_t, uint_t, uint_t, int, int, - page_list_walker_t *); -typedef uint_t (*pflt_list_walk_next_func_p)(uchar_t, uint_t, - page_list_walker_t *); +extern page_t ****page_freelists; -struct page_freelist_type { - int pflt_type; /* type is user or kernel */ - pflt_get_func_p pflt_get_free; /* top-level alloc routine */ - pflt_list_walk_init_func_p pflt_walk_init; /* walker routines */ - pflt_list_walk_next_func_p pflt_walk_next; - int pflt_num_policies; /* the number of policy routines */ - /* - * the policy routines are called by the allocator routine - * to implement the actual allocation policies. - */ - pflt_policy_func_p pflt_policy[MAX_PFLT_POLICIES]; - page_t ****pflt_freelists; /* the page freelist arrays */ -}; - -#if defined(__amd64) && !defined(__xpv) -#define PAGE_FREELISTP(is_kflt, mnode, szc, color, mtype) \ - ((is_kflt) ? \ - (page_t **)(kfltp->pflt_freelists[mtype] + (color)) : \ - ((ufltp->pflt_freelists[mtype][szc] + (color)))) - -#define PAGE_GET_FREELISTS(pp, vp, off, seg, vaddr, size, flags, lgrp) \ - { \ - if (kflt_on && (((flags) & PG_KFLT) == PG_KFLT)) { \ - pp = kfltp->pflt_get_free(vp, off, seg, vaddr, size, \ - flags, lgrp); \ - } else { \ - pp = ufltp->pflt_get_free(vp, off, seg, vaddr, size, \ - flags, lgrp); \ - } \ - } -#else /* __amd64 && ! __xpv */ -#define PAGE_FREELISTP(is_kflt, mnode, szc, color, mtype) \ - ((ufltp->pflt_freelists[mtype][szc] + (color))) - -#define PAGE_GET_FREELISTS(pp, vp, off, seg, vaddr, size, flags, lgrp) \ - pp = ufltp->pflt_get_free(vp, off, seg, vaddr, size, \ - flags, lgrp); -#endif /* __amd64 && ! __xpv */ - -#define PAGE_FREELISTS(is_kflt, mnode, szc, color, mtype) \ - (*(PAGE_FREELISTP(is_kflt, mnode, szc, color, mtype))) - -#define PAGE_GET_FREELISTS_POLICY(fp, i) \ - (fp->pflt_policy[i]) - -#define PAGE_LIST_WALK_INIT(fp, szc, flags, bin, can_split, use_ceq, plw) \ - fp->pflt_walk_init(szc, flags, bin, can_split, use_ceq, plw) - -#define PAGE_LIST_WALK_NEXT(fp, szc, bin, plw) \ - fp->pflt_walk_next(szc, bin, plw) - +#define PAGE_FREELISTS(mnode, szc, color, mtype) \ + (*(page_freelists[mtype][szc] + (color))) /* * For now there is only a single size cache list. Allocated dynamically. @@ -271,7 +130,7 @@ (*(page_cachelists[mtype] + (color))) /* - * There are mutexes for the user page freelist, the kernel page freelist + * There are mutexes for both the page freelist * and the page cachelist. We want enough locks to make contention * reasonable, but not too many -- otherwise page_freelist_lock() gets * so expensive that it becomes the bottleneck! @@ -279,32 +138,11 @@ #define NPC_MUTEX 16 -/* - * The kflt_disable variable is used to determine whether the kernel freelist - * is supported on this platform. - */ -extern int kflt_disable; - extern kmutex_t *fpc_mutex[NPC_MUTEX]; -extern kmutex_t *kfpc_mutex[NPC_MUTEX]; extern kmutex_t *cpc_mutex[NPC_MUTEX]; -#define PC_ISKFLT(fltp) (fltp->pflt_type == PFLT_KMEM) - /* flag used by the kflt_export function when calling page_promote */ -#define PC_KFLT_EXPORT 0x4 - -extern page_t *page_get_mnode_freelist(page_freelist_type_t *, int, uint_t, - int, uchar_t, uint_t); +extern page_t *page_get_mnode_freelist(int, uint_t, int, uchar_t, uint_t); extern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int); -extern page_t *page_get_contig_pages(page_freelist_type_t *, int, uint_t, int, - uchar_t, uint_t); -extern void page_list_walk_init(uchar_t, uint_t, uint_t, int, int, - page_list_walker_t *); -extern uint_t page_list_walk_next_bin(uchar_t, uint_t, page_list_walker_t *); - -extern void kflt_evict_wakeup(); -extern void kflt_freemem_add(pgcnt_t); -extern void kflt_freemem_sub(pgcnt_t); /* mem node iterator is not used on x86 */ #define MEM_NODE_ITERATOR_DECL(it) @@ -391,8 +229,6 @@ } \ } -#define USER_2_KMEM_BIN(bin) ((bin) & (KFLT_PAGE_COLORS - 1)) - /* get the color equivalency mask for the next szc */ #define PAGE_GET_NSZ_MASK(szc, mask) \ ((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) @@ -404,9 +240,7 @@ /* Find the bin for the given page if it was of size szc */ #define PP_2_BIN_SZC(pp, szc) (PFN_2_COLOR(pp->p_pagenum, szc, NULL)) -#define PP_2_BIN(pp) ((PP_ISKFLT(pp)) ? \ - USER_2_KMEM_BIN(PP_2_BIN_SZC(pp, pp->p_szc)) : \ - (PP_2_BIN_SZC(pp, pp->p_szc))) +#define PP_2_BIN(pp) (PP_2_BIN_SZC(pp, pp->p_szc)) #define PP_2_MEM_NODE(pp) (PFN_2_MEM_NODE(pp->p_pagenum)) #define PP_2_MTYPE(pp) (pfn_2_mtype(pp->p_pagenum)) @@ -415,6 +249,27 @@ #define SZCPAGES(szc) (1 << PAGE_BSZS_SHIFT(szc)) #define PFN_BASE(pfnum, szc) (pfnum & ~(SZCPAGES(szc) - 1)) +/* + * this structure is used for walking free page lists + * controls when to split large pages into smaller pages, + * and when to coalesce smaller pages into larger pages + */ +typedef struct page_list_walker { + uint_t plw_colors; /* num of colors for szc */ + uint_t plw_color_mask; /* colors-1 */ + uint_t plw_bin_step; /* next bin: 1 or 2 */ + uint_t plw_count; /* loop count */ + uint_t plw_bin0; /* starting bin */ + uint_t plw_bin_marker; /* bin after initial jump */ + uint_t plw_bin_split_prev; /* last bin we tried to split */ + uint_t plw_do_split; /* set if OK to split */ + uint_t plw_split_next; /* next bin to split */ + uint_t plw_ceq_dif; /* number of different color groups */ + /* to check */ + uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */ + uint_t plw_bins[MMU_PAGE_SIZES + 1]; /* num of bins */ +} page_list_walker_t; + void page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin, int can_split, int use_ceq, page_list_walker_t *plw); @@ -460,19 +315,12 @@ #define MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi) \ mnodetype_2_pfn(mnode, mtype, &pfnlo, &pfnhi) -#define PC_FREELIST_BIN_MUTEX(is_kflt, mnode, bin, flags) \ - ((is_kflt) ? \ - (&kfpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) : \ - (&fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode])) - -#define PC_BIN_MUTEX(is_kflt, mnode, bin, flags) \ - ((flags & PG_FREE_LIST) ? \ - PC_FREELIST_BIN_MUTEX(is_kflt, mnode, bin, flags): \ +#define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \ + &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \ &cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) #define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode]) #define CPC_MUTEX(mnode, i) (&cpc_mutex[i][mnode]) -#define KFPC_MUTEX(mnode, i) (&kfpc_mutex[i][mnode]) #ifdef DEBUG #define CHK_LPG(pp, szc) chk_lpg(pp, szc) @@ -560,8 +408,6 @@ #define PGI_MT_NEXT 0x8000000 /* get next mtype */ #define PGI_MT_RANGE (PGI_MT_RANGE0 | PGI_MT_RANGE16M | PGI_MT_RANGE4G) -/* Flag to avoid allocating a page in page_import_kflt() */ -#define PGI_NOPGALLOC 0x10000000 /* * Maximum and default values for user heap, stack, private and shared @@ -590,17 +436,11 @@ * hash as and addr to get a bin. */ -#define AS_2_USER_BIN(as, seg, vp, addr, bin, szc) \ +#define AS_2_BIN(as, seg, vp, addr, bin, szc) \ bin = (((((uintptr_t)(addr) >> PAGESHIFT) + ((uintptr_t)(as) >> 4)) \ & page_colors_mask) >> \ (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift)) -#define AS_2_BIN(is_kflt, as, seg, vp, addr, bin, szc) { \ - AS_2_USER_BIN(as, seg, vp, addr, bin, szc); \ - if (is_kflt) { \ - bin = USER_2_KMEM_BIN(bin); \ - } \ -} /* * cpu private vm data - accessed thru CPU->cpu_vm_data * vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock() @@ -643,22 +483,12 @@ #ifdef VM_STATS struct vmm_vmstats_str { - /* page_get_uflt and page_get_kflt */ - ulong_t pgf_alloc[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; - ulong_t pgf_allocok[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; - ulong_t pgf_allocokrem[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; - ulong_t pgf_allocfailed[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; + ulong_t pgf_alloc[MMU_PAGE_SIZES]; /* page_get_freelist */ + ulong_t pgf_allocok[MMU_PAGE_SIZES]; + ulong_t pgf_allocokrem[MMU_PAGE_SIZES]; + ulong_t pgf_allocfailed[MMU_PAGE_SIZES]; ulong_t pgf_allocdeferred; - ulong_t pgf_allocretry[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; - ulong_t pgik_allocok; /* page_import_kflt */ - ulong_t pgik_allocfailed; - ulong_t pgkx_allocok; /* kflt_expand */ - ulong_t pgkx_allocfailed; - ulong_t puak_allocok; /* page_user_alloc_kflt */ - ulong_t puak_allocfailed; - ulong_t pgexportok; /* kflt_export */ - ulong_t pgexportfail; - ulong_t pgkflt_disable; /* kflt_user_evict */ + ulong_t pgf_allocretry[MMU_PAGE_SIZES]; ulong_t pgc_alloc; /* page_get_cachelist */ ulong_t pgc_allocok; ulong_t pgc_allocokrem; @@ -673,7 +503,6 @@ ulong_t ptcpfailexcl[MMU_PAGE_SIZES]; ulong_t ptcpfailszc[MMU_PAGE_SIZES]; ulong_t ptcpfailcage[MMU_PAGE_SIZES]; - ulong_t ptcpfailkflt[MMU_PAGE_SIZES]; ulong_t ptcpok[MMU_PAGE_SIZES]; ulong_t pgmf_alloc[MMU_PAGE_SIZES]; /* page_get_mnode_freelist */ ulong_t pgmf_allocfailed[MMU_PAGE_SIZES];
--- a/usr/src/uts/i86pc/vm/vm_machdep.c Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/i86pc/vm/vm_machdep.c Mon May 03 04:17:29 2010 -0700 @@ -87,7 +87,6 @@ #include <sys/stack.h> #include <util/qsort.h> #include <sys/taskq.h> -#include <sys/kflt_mem.h> #ifdef __xpv @@ -138,6 +137,36 @@ extern int use_sse_pagecopy, use_sse_pagezero; /* + * combined memory ranges from mnode and memranges[] to manage single + * mnode/mtype dimension in the page lists. + */ +typedef struct { + pfn_t mnr_pfnlo; + pfn_t mnr_pfnhi; + int mnr_mnode; + int mnr_memrange; /* index into memranges[] */ + int mnr_next; /* next lower PA mnoderange */ + int mnr_exists; + /* maintain page list stats */ + pgcnt_t mnr_mt_clpgcnt; /* cache list cnt */ + pgcnt_t mnr_mt_flpgcnt[MMU_PAGE_SIZES]; /* free list cnt per szc */ + pgcnt_t mnr_mt_totcnt; /* sum of cache and free lists */ +#ifdef DEBUG + struct mnr_mts { /* mnode/mtype szc stats */ + pgcnt_t mnr_mts_pgcnt; + int mnr_mts_colors; + pgcnt_t *mnr_mtsc_pgcnt; + } *mnr_mts; +#endif +} mnoderange_t; + +#define MEMRANGEHI(mtype) \ + ((mtype > 0) ? memranges[mtype - 1] - 1: physmax) +#define MEMRANGELO(mtype) (memranges[mtype]) + +#define MTYPE_FREEMEM(mt) (mnoderanges[mt].mnr_mt_totcnt) + +/* * As the PC architecture evolved memory up was clumped into several * ranges for various historical I/O devices to do DMA. * < 16Meg - ISA bus @@ -315,9 +344,8 @@ */ hw_pagesize_t hw_page_array[MAX_NUM_LEVEL + 1]; -kmutex_t *fpc_mutex[NPC_MUTEX]; /* user page freelist mutexes */ -kmutex_t *kfpc_mutex[NPC_MUTEX]; /* kernel page freelist mutexes */ -kmutex_t *cpc_mutex[NPC_MUTEX]; /* page cachelist mutexes */ +kmutex_t *fpc_mutex[NPC_MUTEX]; +kmutex_t *cpc_mutex[NPC_MUTEX]; /* Lock to protect mnoderanges array for memory DR operations. */ static kmutex_t mnoderange_lock; @@ -1145,9 +1173,7 @@ if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) { startpfn = pfn; CONTIG_UNLOCK(); -#ifdef DEBUG check_dma(mattr, pplist, *pgcnt); -#endif return (pplist); } minctg = howmany(*pgcnt, sgllen); @@ -1182,9 +1208,7 @@ if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) { startpfn = pfn; CONTIG_UNLOCK(); -#ifdef DEBUG check_dma(mattr, pplist, *pgcnt); -#endif return (pplist); } minctg = howmany(*pgcnt, sgllen); @@ -1784,15 +1808,6 @@ } colorsz = mnoderangecnt * sizeof (mnoderange_t); - if (!kflt_disable) { - /* size for kernel page freelists */ - colorsz += mnoderangecnt * sizeof (page_t ***); - colorsz += (mnoderangecnt * KFLT_PAGE_COLORS * - sizeof (page_t *)); - - /* size for kfpc_mutex */ - colorsz += (max_mem_nodes * sizeof (kmutex_t) * NPC_MUTEX); - } /* size for fpc_mutex and cpc_mutex */ colorsz += (2 * max_mem_nodes * sizeof (kmutex_t) * NPC_MUTEX); @@ -1842,44 +1857,28 @@ fpc_mutex[k] = (kmutex_t *)addr; addr += (max_mem_nodes * sizeof (kmutex_t)); } - if (!kflt_disable) { - for (k = 0; k < NPC_MUTEX; k++) { - kfpc_mutex[k] = (kmutex_t *)addr; - addr += (max_mem_nodes * sizeof (kmutex_t)); - } - } for (k = 0; k < NPC_MUTEX; k++) { cpc_mutex[k] = (kmutex_t *)addr; addr += (max_mem_nodes * sizeof (kmutex_t)); } - ufltp->pflt_freelists = (page_t ****)addr; + page_freelists = (page_t ****)addr; addr += (mnoderangecnt * sizeof (page_t ***)); page_cachelists = (page_t ***)addr; addr += (mnoderangecnt * sizeof (page_t **)); for (i = 0; i < mnoderangecnt; i++) { - ufltp->pflt_freelists[i] = (page_t ***)addr; + page_freelists[i] = (page_t ***)addr; addr += (mmu_page_sizes * sizeof (page_t **)); for (j = 0; j < mmu_page_sizes; j++) { colors = page_get_pagecolors(j); - ufltp->pflt_freelists[i][j] = (page_t **)addr; + page_freelists[i][j] = (page_t **)addr; addr += (colors * sizeof (page_t *)); } page_cachelists[i] = (page_t **)addr; addr += (page_colors * sizeof (page_t *)); } - - if (!kflt_disable) { - kfltp->pflt_freelists = (page_t ****)addr; - addr += (mnoderangecnt * sizeof (page_t ***)); - for (i = 0; i < mnoderangecnt; i++) { - kfltp->pflt_freelists[i] = (page_t ***)addr; - addr += (KFLT_PAGE_COLORS * sizeof (page_t *)); - } - } - page_flt_init(ufltp, kfltp); } #if defined(__xpv) @@ -1957,30 +1956,6 @@ return (flags); } -int -kernel_page_update_flags_x86(uint_t *flags) -{ - /* - * page_get_kflt() calls this after walking the kernel pagelists and - * not finding a free page to allocate. If the PGI_MT_RANGE4G flag is - * set then we only walk mnodes in the greater than 4g range, so if we - * didn't find a page there must be free kernel memory below this range. - * - * kflt_expand() calls this before trying to allocate large pages for - * kernel memory. - */ - if (physmax4g) { - if (*flags & PGI_MT_RANGE4G) { - *flags &= ~PGI_MT_RANGE4G; - *flags |= PGI_MT_RANGE0; - return (1); - } else { - return (0); - } - } - return (0); -} - /*ARGSUSED*/ int bp_color(struct buf *bp) @@ -2925,9 +2900,7 @@ goto fail; off += minctg * MMU_PAGESIZE; } -#ifdef DEBUG check_dma(mattr, mcpl, minctg); -#endif /* * Here with a minctg run of contiguous pages, add them to the * list we will return for this request. @@ -3131,14 +3104,12 @@ for (plw.plw_count = 0; plw.plw_count < page_colors; plw.plw_count++) { - if (PAGE_FREELISTS(PFLT_USER, mnode, szc, bin, mtype) - == NULL) + if (PAGE_FREELISTS(mnode, szc, bin, mtype) == NULL) goto nextfreebin; - pcm = PC_FREELIST_BIN_MUTEX(PFLT_USER, mnode, bin, - PG_FREE_LIST); + pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); mutex_enter(pcm); - pp = PAGE_FREELISTS(PFLT_USER, mnode, szc, bin, mtype); + pp = PAGE_FREELISTS(mnode, szc, bin, mtype); first_pp = pp; while (pp != NULL) { if (page_trylock(pp, SE_EXCL) == 0) { @@ -3176,8 +3147,8 @@ ASSERT(pp->p_szc == 0); /* found a page with specified DMA attributes */ - page_sub(PAGE_FREELISTP(PFLT_USER, mnode, szc, - bin, mtype), pp); + page_sub(&PAGE_FREELISTS(mnode, szc, bin, + mtype), pp); page_ctr_sub(mnode, mtype, pp, PG_FREE_LIST); if ((PP_ISFREE(pp) == 0) || @@ -3187,9 +3158,7 @@ } mutex_exit(pcm); -#ifdef DEBUG check_dma(dma_attr, pp, 1); -#endif VM_STAT_ADD(pga_vmstats.pgma_allocok); return (pp); } @@ -3208,9 +3177,7 @@ mmu_btop(dma_attr->dma_attr_addr_hi + 1), &plw); if (pp != NULL) { -#ifdef DEBUG check_dma(dma_attr, pp, 1); -#endif return (pp); } } @@ -3234,8 +3201,7 @@ for (i = 0; i <= page_colors; i++) { if (PAGE_CACHELISTS(mnode, bin, mtype) == NULL) goto nextcachebin; - pcm = PC_BIN_MUTEX(PFLT_USER, mnode, bin, - PG_CACHE_LIST); + pcm = PC_BIN_MUTEX(mnode, bin, PG_CACHE_LIST); mutex_enter(pcm); pp = PAGE_CACHELISTS(mnode, bin, mtype); first_pp = pp; @@ -3279,9 +3245,7 @@ mutex_exit(pcm); ASSERT(pp->p_vnode); ASSERT(PP_ISAGED(pp) == 0); -#ifdef DEBUG check_dma(dma_attr, pp, 1); -#endif VM_STAT_ADD(pga_vmstats.pgma_allocok); return (pp); } @@ -3337,7 +3301,7 @@ lgrp = lgrp_home_lgrp(); /* LINTED */ - AS_2_BIN(PFLT_USER, as, seg, vp, vaddr, bin, 0); + AS_2_BIN(as, seg, vp, vaddr, bin, 0); /* * Only hold one freelist or cachelist lock at a time, that way we @@ -3391,7 +3355,7 @@ mtype = m; do { if (fullrange != 0) { - pp = page_get_mnode_freelist(ufltp, mnode, + pp = page_get_mnode_freelist(mnode, bin, mtype, szc, flags); if (pp == NULL) { pp = page_get_mnode_cachelist( @@ -3403,9 +3367,7 @@ } if (pp != NULL) { VM_STAT_ADD(pga_vmstats.pga_allocok); -#ifdef DEBUG check_dma(dma_attr, pp, 1); -#endif return (pp); } } while (mtype != n && @@ -3517,9 +3479,7 @@ } while (pp != plist); if (!npages) { -#ifdef DEBUG check_dma(mattr, plist, pages_req); -#endif return (plist); } else { vaddr += (pages_req - npages) << MMU_PAGESHIFT; @@ -3682,9 +3642,7 @@ vaddr += MMU_PAGESIZE; } -#ifdef DEBUG check_dma(mattr, plist, pages_req); -#endif return (plist); fail: @@ -4026,33 +3984,3 @@ } return (pp); } - -/* - * Initializes the user and kernel page freelist type structures. - */ -/* ARGSUSED */ -void -page_flt_init(page_freelist_type_t *ufp, page_freelist_type_t *kfp) -{ - ufp->pflt_type = PFLT_USER; - ufp->pflt_get_free = &page_get_uflt; - ufp->pflt_walk_init = page_list_walk_init; - ufp->pflt_walk_next = page_list_walk_next_bin; - ufp->pflt_policy[0] = page_get_mnode_freelist; - ufp->pflt_policy[1] = page_get_contig_pages; - ufp->pflt_num_policies = 2; -#if defined(__amd64) && !defined(__xpv) - if (!kflt_disable) { - ufp->pflt_num_policies = 3; - ufp->pflt_policy[1] = page_user_alloc_kflt; - ufp->pflt_policy[2] = page_get_contig_pages; - - kfp->pflt_type = PFLT_KMEM; - kfp->pflt_get_free = &page_get_kflt; - kfp->pflt_walk_init = page_kflt_walk_init; - kfp->pflt_walk_next = page_list_walk_next_bin; - kfp->pflt_num_policies = 1; - kfp->pflt_policy[0] = page_get_mnode_freelist; - } -#endif /* __amd64 && !__xpv */ -}
--- a/usr/src/uts/i86xpv/Makefile.files Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/i86xpv/Makefile.files Mon May 03 04:17:29 2010 -0700 @@ -20,7 +20,8 @@ # # -# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. # # This Makefile defines file modules in the directory uts/i86xpv @@ -74,7 +75,6 @@ mach_sysconfig.o \ machdep.o \ mem_config_stubs.o \ - kflt_mem_stubs.o \ memnode.o \ microcode.o \ mlsetup.o \
--- a/usr/src/uts/sun4/vm/vm_dep.c Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/sun4/vm/vm_dep.c Mon May 03 04:17:29 2010 -0700 @@ -19,7 +19,8 @@ * CDDL HEADER END */ /* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ /* @@ -91,8 +92,6 @@ caddr_t errata57_limit; #endif -static void page_flt_init(page_freelist_type_t *); - extern void page_relocate_hash(page_t *, page_t *); /* @@ -725,6 +724,41 @@ } /* + * Anchored in the table below are counters used to keep track + * of free contiguous physical memory. Each element of the table contains + * the array of counters, the size of array which is allocated during + * startup based on physmax and a shift value used to convert a pagenum + * into a counter array index or vice versa. The table has page size + * for rows and region size for columns: + * + * page_counters[page_size][region_size] + * + * page_size: TTE size code of pages on page_size freelist. + * + * region_size: TTE size code of a candidate larger page made up + * made up of contiguous free page_size pages. + * + * As you go across a page_size row increasing region_size each + * element keeps track of how many (region_size - 1) size groups + * made up of page_size free pages can be coalesced into a + * regsion_size page. Yuck! Lets try an example: + * + * page_counters[1][3] is the table element used for identifying + * candidate 4M pages from contiguous pages off the 64K free list. + * Each index in the page_counters[1][3].array spans 4M. Its the + * number of free 512K size (regsion_size - 1) groups of contiguous + * 64K free pages. So when page_counters[1][3].counters[n] == 8 + * we know we have a candidate 4M page made up of 512K size groups + * of 64K free pages. + */ + +/* + * Per page size free lists. 3rd (max_mem_nodes) and 4th (page coloring bins) + * dimensions are allocated dynamically. + */ +page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES]; + +/* * For now there is only a single size cache list. * Allocated dynamically. */ @@ -787,11 +821,10 @@ for (szc = 0; szc < mmu_page_sizes; szc++) { clrs = page_get_pagecolors(szc); for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) { - ufltp->pflt_freelists[szc][mtype] = - (page_t ***)alloc_base; + page_freelists[szc][mtype] = (page_t ***)alloc_base; alloc_base += (max_mem_nodes * sizeof (page_t **)); for (mnode = 0; mnode < max_mem_nodes; mnode++) { - ufltp->pflt_freelists[szc][mtype][mnode] = + page_freelists[szc][mtype][mnode] = (page_t **)alloc_base; alloc_base += (clrs * (sizeof (page_t *))); } @@ -799,8 +832,6 @@ } alloc_base = page_ctrs_alloc(alloc_base); - - page_flt_init(ufltp); return (alloc_base); } @@ -1045,17 +1076,3 @@ } return (PAGESIZE); } -/* - * Initializes the user page freelist type structures. - */ -static void -page_flt_init(page_freelist_type_t *ufp) -{ - ufp->pflt_type = PFLT_USER; - ufp->pflt_get_free = &page_get_uflt; - ufp->pflt_walk_init = page_list_walk_init; - ufp->pflt_walk_next = page_list_walk_next_bin; - ufp->pflt_num_policies = 2; - ufp->pflt_policy[0] = page_get_mnode_freelist; - ufp->pflt_policy[1] = page_get_contig_pages; -}
--- a/usr/src/uts/sun4/vm/vm_dep.h Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/sun4/vm/vm_dep.h Mon May 03 04:17:29 2010 -0700 @@ -19,7 +19,8 @@ * CDDL HEADER END */ /* - * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ /* @@ -84,8 +85,12 @@ #define PGI_MT_RANGE 0x1000000 /* mtype range */ #define PGI_MT_NEXT 0x2000000 /* get next mtype */ +extern page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES]; extern page_t ***page_cachelists[MAX_MEM_TYPES]; +#define PAGE_FREELISTS(mnode, szc, color, mtype) \ + (*(page_freelists[szc][mtype][mnode] + (color))) + #define PAGE_CACHELISTS(mnode, color, mtype) \ (*(page_cachelists[mtype][mnode] + (color))) @@ -286,8 +291,8 @@ #define PP_2_MEM_NODE(pp) (PFN_2_MEM_NODE(pp->p_pagenum)) -#define PC_BIN_MUTEX(iskflt, mnode, bin, flags) ((flags & PG_FREE_LIST) ? \ - &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \ +#define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \ + &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \ &cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) #define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode]) @@ -319,88 +324,6 @@ void page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin, int can_split, int use_ceq, page_list_walker_t *plw); -/* - * Page freelists have a single freelist type, the user page freelist. The - * kernel page freelist is disabled on SPARC platforms. The definitions related - * to the freelist type structure are grouped below. - */ - -#define MAX_PFLT_POLICIES 3 -#define MAX_PFLT_TYPE 2 -enum freelist_types {PFLT_USER, PFLT_KMEM}; - -/* - * The kernel only needs a small number of page colors, far fewer than user - * programs. - */ -#define KFLT_PAGE_COLORS 16 - /* flag used by the kflt_export function when calling page_promote */ -#define PC_KFLT_EXPORT 0x4 -#define PC_ISKFLT(fltp) (fltp->pflt_type == PFLT_KMEM) - -typedef struct page_freelist_type page_freelist_type_t; -extern page_freelist_type_t flt_user; -extern page_freelist_type_t *ufltp; - -typedef page_t *(*pflt_get_func_p) (struct vnode *, u_offset_t, struct seg *, - caddr_t, size_t, uint_t, lgrp_t *); -typedef page_t *(*pflt_policy_func_p)(page_freelist_type_t *, int, uint_t, int, - uchar_t, uint_t); -typedef void (*pflt_list_walk_init_func_p)(uchar_t, uint_t, uint_t, int, int, - page_list_walker_t *); -typedef uint_t (*pflt_list_walk_next_func_p)(uchar_t, uint_t, - page_list_walker_t *); - -page_t *page_get_uflt(struct vnode *, u_offset_t, struct seg *, caddr_t, - size_t, uint_t, struct lgrp *); -extern page_t *page_get_mnode_freelist(page_freelist_type_t *, int, uint_t, - int, uchar_t, uint_t); -extern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int); -extern page_t *page_get_contig_pages(page_freelist_type_t *, int, uint_t, int, - uchar_t, uint_t); -extern void page_list_walk_init(uchar_t, uint_t, uint_t, int, int, - page_list_walker_t *); -extern uint_t page_list_walk_next_bin(uchar_t, uint_t, page_list_walker_t *); - -/* - * Page freelists are organized as freelist types, on Sparc systems there - * is only a single user freelist type as the kernel cage provides a - * similar function to kernel freelist in that it prevents memory - * fragmentation. - * - * The page freelists have fixed page size and memory type dimensions. - * the 3rd (max_mem_nodes) and 4th (page coloring bins) dimensions are - * allocated dynamically. - */ -struct page_freelist_type { - int pflt_type; - pflt_get_func_p pflt_get_free; - pflt_list_walk_init_func_p pflt_walk_init; - pflt_list_walk_next_func_p pflt_walk_next; - int pflt_num_policies; - pflt_policy_func_p pflt_policy[MAX_PFLT_POLICIES]; - page_t ***pflt_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES]; -}; - -#define PAGE_FREELISTP(is_kflt, mnode, szc, color, mtype) \ - ((ufltp->pflt_freelists[szc][mtype][mnode] + (color))) - -#define PAGE_FREELISTS(is_kflt, mnode, szc, color, mtype) \ - (*(ufltp->pflt_freelists[szc][mtype][mnode] + (color))) - -#define PAGE_GET_FREELISTS(pp, vp, off, seg, vaddr, size, flags, lgrp) \ - pp = ufltp->pflt_get_free(vp, off, seg, vaddr, size, \ - flags, lgrp); - -#define PAGE_GET_FREELISTS_POLICY(fp, i) \ - (fp->pflt_policy[i]) - -#define PAGE_LIST_WALK_INIT(fp, szc, flags, bin, can_split, use_ceq, plw) \ - fp->pflt_walk_init(szc, flags, bin, can_split, use_ceq, plw) - -#define PAGE_LIST_WALK_NEXT(fp, szc, bin, plw) \ - fp->pflt_walk_next(szc, bin, plw) - typedef char hpmctr_t; #ifdef DEBUG @@ -700,7 +623,7 @@ * 1 virtual=paddr * 2 bin hopping */ -#define AS_2_BIN(kflt, as, seg, vp, addr, bin, szc) \ +#define AS_2_BIN(as, seg, vp, addr, bin, szc) \ switch (consistent_coloring) { \ default: \ cmn_err(CE_WARN, \ @@ -826,26 +749,16 @@ #ifdef VM_STATS struct vmm_vmstats_str { - /* page_get_uflt and page_get_kflt */ - ulong_t pgf_alloc[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; - ulong_t pgf_allocok[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; - ulong_t pgf_allocokrem[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; - ulong_t pgf_allocfailed[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; - ulong_t pgf_allocdeferred; - ulong_t pgf_allocretry[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; - ulong_t pgik_allocok; /* page_import_kflt */ - ulong_t pgik_allocfailed; - ulong_t pgkx_allocok; /* kflt_expand */ - ulong_t pgkx_allocfailed; - ulong_t puak_allocok; /* page_user_alloc_kflt */ - ulong_t puak_allocfailed; - ulong_t pgexportok; /* kflt_export */ - ulong_t pgexportfail; - ulong_t pgkflt_disable; /* kflt_user_evict */ + ulong_t pgf_alloc[MMU_PAGE_SIZES]; /* page_get_freelist */ + ulong_t pgf_allocok[MMU_PAGE_SIZES]; + ulong_t pgf_allocokrem[MMU_PAGE_SIZES]; + ulong_t pgf_allocfailed[MMU_PAGE_SIZES]; + ulong_t pgf_allocdeferred; + ulong_t pgf_allocretry[MMU_PAGE_SIZES]; ulong_t pgc_alloc; /* page_get_cachelist */ ulong_t pgc_allocok; ulong_t pgc_allocokrem; - ulong_t pgc_allocokdeferred; + ulong_t pgc_allocokdeferred; ulong_t pgc_allocfailed; ulong_t pgcp_alloc[MMU_PAGE_SIZES]; /* page_get_contig_pages */ ulong_t pgcp_allocfailed[MMU_PAGE_SIZES]; @@ -856,7 +769,6 @@ ulong_t ptcpfailexcl[MMU_PAGE_SIZES]; ulong_t ptcpfailszc[MMU_PAGE_SIZES]; ulong_t ptcpfailcage[MMU_PAGE_SIZES]; - ulong_t ptcpfailkflt[MMU_PAGE_SIZES]; ulong_t ptcpok[MMU_PAGE_SIZES]; ulong_t pgmf_alloc[MMU_PAGE_SIZES]; /* page_get_mnode_freelist */ ulong_t pgmf_allocfailed[MMU_PAGE_SIZES]; @@ -875,24 +787,24 @@ ulong_t pfs_req[MMU_PAGE_SIZES]; /* page_freelist_split */ ulong_t pfs_demote[MMU_PAGE_SIZES]; ulong_t pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; - ulong_t ppr_reloc[MMU_PAGE_SIZES]; /* page_relocate */ + ulong_t ppr_reloc[MMU_PAGE_SIZES]; /* page_relocate */ + ulong_t ppr_relocok[MMU_PAGE_SIZES]; ulong_t ppr_relocnoroot[MMU_PAGE_SIZES]; ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES]; ulong_t ppr_relocnolock[MMU_PAGE_SIZES]; ulong_t ppr_relocnomem[MMU_PAGE_SIZES]; - ulong_t ppr_relocok[MMU_PAGE_SIZES]; ulong_t ppr_krelocfail[MMU_PAGE_SIZES]; ulong_t ppr_copyfail; /* page coalesce counter */ - ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; + ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; /* candidates useful */ - ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; + ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; /* ctrs changed after locking */ - ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; + ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; /* page_freelist_coalesce failed */ - ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; - ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */ - ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */ + ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; + ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */ + ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */ }; extern struct vmm_vmstats_str vmm_vmstats; #endif /* VM_STATS */
--- a/usr/src/uts/sun4u/Makefile.files Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/sun4u/Makefile.files Mon May 03 04:17:29 2010 -0700 @@ -20,7 +20,8 @@ # # -# Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. # # This Makefile defines all file modules for the directory uts/sun4u # and it's children. These are the source files which are sun4u @@ -56,7 +57,6 @@ CORE_OBJS += mach_vm_dep.o CORE_OBJS += mach_xc.o CORE_OBJS += mem_cage.o -CORE_OBJS += kflt_mem_stubs.o CORE_OBJS += mem_config.o CORE_OBJS += memlist_new.o CORE_OBJS += memscrub.o
--- a/usr/src/uts/sun4u/starfire/os/starfire.c Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/sun4u/starfire/os/starfire.c Mon May 03 04:17:29 2010 -0700 @@ -20,7 +20,8 @@ */ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #include <sys/param.h> @@ -294,15 +295,14 @@ } /* find freelist */ - freelist = &PAGE_FREELISTS(PLT_USER, mnode, - size, color, mtype); + freelist = &PAGE_FREELISTS(mnode, size, + color, mtype); if (*freelist == NULL) continue; /* acquire locks */ - pcm = PC_BIN_MUTEX(PLT_USER, mnode, color, - PG_FREE_LIST); + pcm = PC_BIN_MUTEX(mnode, color, PG_FREE_LIST); mutex_enter(pcm); /*
--- a/usr/src/uts/sun4v/Makefile.files Sun May 02 21:47:48 2010 -0700 +++ b/usr/src/uts/sun4v/Makefile.files Mon May 03 04:17:29 2010 -0700 @@ -20,7 +20,8 @@ # # -# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. # # This Makefile defines all file modules for the directory uts/sun4v # and it's children. These are the source files which are sun4v @@ -58,7 +59,6 @@ CORE_OBJS += mach_vm_dep.o CORE_OBJS += mach_xc.o CORE_OBJS += mem_cage.o -CORE_OBJS += kflt_mem_stubs.o CORE_OBJS += mem_config.o CORE_OBJS += memlist_new.o CORE_OBJS += memseg.o