changeset 5349:01422ec04372

6453272 ctfmerge uses the largest pagesize from getpagesizes() which can be bad on systems with giant pages 6543997 divide by 0 panic in page_geti_contig_pages during 1g page testing 6587615 1g pagesize support on AMD family 0x10 processors 6588824 throttling large page coalescing needs to be revisited for gigantic pages 6613824 legacy applications (java) should be prevented from inadvertantly using 1g pagesize
author kchow
date Sun, 28 Oct 2007 10:20:15 -0700
parents f6993989d2bf
children b0b296494991
files deleted_files/usr/src/lib/libc/common/sys/getpagesizes.s usr/src/lib/common/inc/c_synonyms.h usr/src/lib/libc/amd64/Makefile usr/src/lib/libc/common/sys/getpagesizes.s usr/src/lib/libc/i386/Makefile.com usr/src/lib/libc/inc/synonyms.h usr/src/lib/libc/port/mapfile-vers usr/src/lib/libc/port/sys/getpagesizes.c usr/src/lib/libc/sparc/Makefile usr/src/lib/libc/sparcv9/Makefile usr/src/tools/ctf/cvt/ctfmerge.c usr/src/uts/common/os/sysent.c usr/src/uts/common/sys/mman.h usr/src/uts/common/sys/syscall.h usr/src/uts/common/syscall/getpagesizes.c usr/src/uts/common/vm/page.h usr/src/uts/common/vm/vm_pagelist.c usr/src/uts/i86pc/os/cpuid.c usr/src/uts/i86pc/os/startup.c usr/src/uts/i86pc/sys/machparam.h usr/src/uts/i86pc/vm/hat_i86.c usr/src/uts/i86pc/vm/hat_pte.h usr/src/uts/i86pc/vm/htable.c usr/src/uts/i86pc/vm/i86_mmu.c usr/src/uts/i86pc/vm/kboot_mmu.c usr/src/uts/i86pc/vm/vm_dep.h usr/src/uts/i86pc/vm/vm_machdep.c usr/src/uts/intel/sys/x86_archext.h usr/src/uts/sun4/io/trapstat.c usr/src/uts/sun4/vm/vm_dep.h
diffstat 30 files changed, 270 insertions(+), 120 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deleted_files/usr/src/lib/libc/common/sys/getpagesizes.s	Sun Oct 28 10:20:15 2007 -0700
@@ -0,0 +1,45 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2001 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+.ident	"%Z%%M%	%I%	%E% SMI"
+
+/* C library -- getpagesizes	*/
+/* uint_t getpagesizes(size_t buf[], uint_t nelem)		*/
+
+	.file	"getpagesizes.s"
+
+#include <sys/asm_linkage.h>
+
+	ANSI_PRAGMA_WEAK(getpagesizes,function)
+
+#include "SYS.h"
+
+	SYSCALL(getpagesizes)
+	RET
+	SET_SIZE(getpagesizes)
--- a/usr/src/lib/common/inc/c_synonyms.h	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/lib/common/inc/c_synonyms.h	Sun Oct 28 10:20:15 2007 -0700
@@ -359,6 +359,8 @@
 #define	getopt				_getopt
 #define	getopt_long			_getopt_long
 #define	getopt_long_only		_getopt_long_only
+#define	getpagesizes			_getpagesizes
+#define	getpagesizes2			_getpagesizes2
 #define	getpass				_getpass
 #define	getpeerucred			_getpeerucred
 #define	getpflags			_getpflags
--- a/usr/src/lib/libc/amd64/Makefile	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/lib/libc/amd64/Makefile	Sun Oct 28 10:20:15 2007 -0700
@@ -206,7 +206,6 @@
 	gethrtime.o		\
 	getitimer.o		\
 	getmsg.o		\
-	getpagesizes.o		\
 	getpid.o		\
 	getpmsg.o		\
 	getppid.o		\
@@ -798,6 +797,7 @@
 	fsmisc.o		\
 	fstatat.o		\
 	fsync.o			\
+	getpagesizes.o		\
 	getpeerucred.o		\
 	inst_sync.o		\
 	issetugid.o		\
--- a/usr/src/lib/libc/common/sys/getpagesizes.s	Fri Oct 26 15:37:43 2007 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2001 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-.ident	"%Z%%M%	%I%	%E% SMI"
-
-/* C library -- getpagesizes	*/
-/* uint_t getpagesizes(size_t buf[], uint_t nelem)		*/
-
-	.file	"getpagesizes.s"
-
-#include <sys/asm_linkage.h>
-
-	ANSI_PRAGMA_WEAK(getpagesizes,function)
-
-#include "SYS.h"
-
-	SYSCALL(getpagesizes)
-	RET
-	SET_SIZE(getpagesizes)
--- a/usr/src/lib/libc/i386/Makefile.com	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/lib/libc/i386/Makefile.com	Sun Oct 28 10:20:15 2007 -0700
@@ -231,7 +231,6 @@
 	gethrtime.o		\
 	getitimer.o		\
 	getmsg.o		\
-	getpagesizes.o		\
 	getpid.o		\
 	getpmsg.o		\
 	getppid.o		\
@@ -838,6 +837,7 @@
 	fsmisc.o		\
 	fstatat.o		\
 	fsync.o			\
+	getpagesizes.o		\
 	getpeerucred.o		\
 	inst_sync.o		\
 	issetugid.o		\
--- a/usr/src/lib/libc/inc/synonyms.h	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/lib/libc/inc/synonyms.h	Sun Oct 28 10:20:15 2007 -0700
@@ -419,6 +419,7 @@
 #define	getopt_long		_getopt_long
 #define	getopt_long_only	_getopt_long_only
 #define	getpagesizes		_getpagesizes
+#define	getpagesizes2		_getpagesizes2
 #define	getpass			_getpass
 #define	getpassphrase		_getpassphrase
 #define	getpeerucred		_getpeerucred
--- a/usr/src/lib/libc/port/mapfile-vers	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/lib/libc/port/mapfile-vers	Sun Oct 28 10:20:15 2007 -0700
@@ -69,6 +69,8 @@
 	forkx;
 	fsetattr;
 	getattrat;
+	getpagesizes2;
+	_getpagesizes2;
 	htonl;
 	htons;
 	lio_listio;
@@ -515,6 +517,7 @@
 	gethomelgroup;
 	_gethomelgroup =		NODYNSORT;
 	getpagesizes;
+	_getpagesizes;
 	getrctl;
 	_getrctl;
 	issetugid;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/libc/port/sys/getpagesizes.c	Sun Oct 28 10:20:15 2007 -0700
@@ -0,0 +1,60 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#pragma weak getpagesizes = _getpagesizes
+#pragma weak getpagesizes2 = _getpagesizes2
+
+#include "synonyms.h"
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+/*
+ * mman.h contains "#pragma redefine_extname getpagesizes getpagesizes2".
+ * Applications that are still calling getpagesizes() instead of
+ * getpagesizes2() are 'legacy' applications that have not been recompiled
+ * since the #pragma redefine_extname change.
+ *
+ * Depending on the platform, 'legacy' applications may not be given the full
+ * set of supported page sizes to prevent them from inadvertantly using 'new'
+ * large pagesizes that might cause application failure or low system memory
+ * conditions.
+ *
+ * The first parameter to the SYS_getpagesizes syscall is effectively
+ * a 'legacy' boolean flag used as such in the kernel.
+ */
+int
+getpagesizes(size_t pagesize[], int nelem)
+{
+	return (syscall(SYS_getpagesizes, 1, pagesize, nelem));
+}
+
+int
+getpagesizes2(size_t pagesize[], int nelem)
+{
+	return (syscall(SYS_getpagesizes, 0, pagesize, nelem));
+}
--- a/usr/src/lib/libc/sparc/Makefile	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/lib/libc/sparc/Makefile	Sun Oct 28 10:20:15 2007 -0700
@@ -248,7 +248,6 @@
 	gethrtime.o		\
 	getitimer.o		\
 	getmsg.o		\
-	getpagesizes.o		\
 	getpid.o		\
 	getpmsg.o		\
 	getppid.o		\
@@ -864,6 +863,7 @@
 	fsmisc.o		\
 	fstatat.o		\
 	fsync.o			\
+	getpagesizes.o		\
 	getpeerucred.o		\
 	inst_sync.o		\
 	issetugid.o		\
--- a/usr/src/lib/libc/sparcv9/Makefile	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/lib/libc/sparcv9/Makefile	Sun Oct 28 10:20:15 2007 -0700
@@ -231,7 +231,6 @@
 	gethrtime.o		\
 	getitimer.o		\
 	getmsg.o		\
-	getpagesizes.o		\
 	getpid.o		\
 	getpmsg.o		\
 	getppid.o		\
@@ -809,6 +808,7 @@
 	fsmisc.o		\
 	fstatat.o		\
 	fsync.o			\
+	getpagesizes.o		\
 	getpeerucred.o		\
 	inst_sync.o		\
 	issetugid.o		\
--- a/usr/src/tools/ctf/cvt/ctfmerge.c	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/tools/ctf/cvt/ctfmerge.c	Sun Oct 28 10:20:15 2007 -0700
@@ -206,6 +206,8 @@
 static char *tmpname = NULL;
 static int dynsym;
 int debug_level = DEBUG_LEVEL;
+static size_t maxpgsize = 0x400000;
+
 
 void
 usage(void)
@@ -228,7 +230,7 @@
 bigheap(void)
 {
 	size_t big, *size;
-	int sizes, i;
+	int sizes;
 	struct memcntl_mha mha;
 
 	/*
@@ -237,12 +239,16 @@
 	if ((sizes = getpagesizes(NULL, 0)) == -1)
 		return;
 
-	if ((size = alloca(sizeof (size_t) * sizes)) == NULL)
+	if (sizes == 1 || (size = alloca(sizeof (size_t) * sizes)) == NULL)
 		return;
 
-	if (getpagesizes(size, sizes) == -1 || sizes == 1)
+	if (getpagesizes(size, sizes) == -1)
 		return;
 
+	while (size[sizes - 1] > maxpgsize)
+		sizes--;
+
+	/* set big to the largest allowed page size */
 	big = size[sizes - 1];
 	if (big & (big - 1)) {
 		/*
@@ -259,21 +265,13 @@
 		return;
 
 	/*
-	 * Finally, set our heap to use the largest page size for which the
-	 * MC_HAT_ADVISE doesn't return EAGAIN.
+	 * set the preferred page size for the heap
 	 */
 	mha.mha_cmd = MHA_MAPSIZE_BSSBRK;
 	mha.mha_flags = 0;
-
-	for (i = sizes - 1; i >= 0; i--) {
-		mha.mha_pagesize = size[i];
+	mha.mha_pagesize = big;
 
-		if (memcntl(NULL, 0, MC_HAT_ADVISE, (caddr_t)&mha, 0, 0) != -1)
-			break;
-
-		if (errno != EAGAIN)
-			break;
-	}
+	(void) memcntl(NULL, 0, MC_HAT_ADVISE, (caddr_t)&mha, 0, 0);
 }
 
 static void
--- a/usr/src/uts/common/os/sysent.c	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/common/os/sysent.c	Sun Oct 28 10:20:15 2007 -0700
@@ -520,7 +520,7 @@
 	/* 70 */ SYSENT_CI("tasksys",		tasksys,	5),
 	/* 71 */ SYSENT_LOADABLE(),		/* acctctl */
 	/* 72 */ SYSENT_LOADABLE(),		/* exacct */
-	/* 73 */ SYSENT_CI("getpagesizes",	getpagesizes,	2),
+	/* 73 */ SYSENT_CI("getpagesizes",	getpagesizes,	3),
 	/* 74 */ SYSENT_CI("rctlsys",		rctlsys,	6),
 	/* 75 */ SYSENT_2CI("sidsys",		sidsys,		4),
 	/* 76 */ IF_LP64(
@@ -921,7 +921,7 @@
 	/* 70 */ SYSENT_CI("tasksys",		tasksys,	5),
 	/* 71 */ SYSENT_LOADABLE32(),		/* acctctl */
 	/* 72 */ SYSENT_LOADABLE32(),		/* exacct */
-	/* 73 */ SYSENT_CI("getpagesizes",	getpagesizes32,	2),
+	/* 73 */ SYSENT_CI("getpagesizes",	getpagesizes32,	3),
 	/* 74 */ SYSENT_CI("rctlsys",		rctlsys,	6),
 	/* 75 */ SYSENT_2CI("sidsys",		sidsys,		4),
 	/* 76 */ SYSENT_CI("fsat",		fsat32,		6),
--- a/usr/src/uts/common/sys/mman.h	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/common/sys/mman.h	Sun Oct 28 10:20:15 2007 -0700
@@ -143,6 +143,12 @@
 #endif
 #endif	/* _LP64 && _LARGEFILE64_SOURCE */
 
+#ifdef __PRAGMA_REDEFINE_EXTNAME
+#pragma redefine_extname	getpagesizes	getpagesizes2
+#else
+#define	getpagesizes	getpagesizes2
+#endif
+
 /*
  * Except for old binaries mmap() will return the resultant
  * address of mapping on success and (caddr_t)-1 on error.
@@ -174,6 +180,7 @@
 extern int madvise(caddr_t, size_t, int);
 #if !defined(__XOPEN_OR_POSIX) || defined(__EXTENSIONS__)
 extern int getpagesizes(size_t *, int);
+extern int getpagesizes2(size_t *, int);
 /* guard visibility of uint64_t */
 #if defined(_INT64_TYPE)
 extern int meminfo(const uint64_t *, int, const uint_t *, int, uint64_t *,
@@ -205,6 +212,7 @@
 extern int msync();
 extern int madvise();
 extern int getpagesizes();
+extern int getpagesizes2();
 extern int mlock();
 extern int mlockall();
 extern int munlock();
--- a/usr/src/uts/common/sys/syscall.h	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/common/sys/syscall.h	Sun Oct 28 10:20:15 2007 -0700
@@ -195,6 +195,11 @@
 	 * 	wracct(...) :: exacct(2, ...)
 	 */
 #define	SYS_getpagesizes	73
+	/*
+	 * subcodes:
+	 *	getpagesizes2(...)	:: getpagesizes(0, ...)
+	 * 	getpagesizes(...)	:: getpagesizes(1, ...) legacy
+	 */
 #define	SYS_rctlsys	74
 	/*
 	 * subcodes:
--- a/usr/src/uts/common/syscall/getpagesizes.c	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/common/syscall/getpagesizes.c	Sun Oct 28 10:20:15 2007 -0700
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -35,9 +34,9 @@
  * Return supported page sizes.
  */
 int
-getpagesizes(size_t *buf, int nelem)
+getpagesizes(int legacy, size_t *buf, int nelem)
 {
-	int i, pagesizes = page_num_user_pagesizes();
+	int i, pagesizes = page_num_user_pagesizes(legacy);
 	size_t *pgsza;
 
 	if (nelem < 0) {
@@ -74,9 +73,9 @@
  * a 32-bit address space.
  */
 int
-getpagesizes32(size32_t *buf, int nelem)
+getpagesizes32(int legacy, size32_t *buf, int nelem)
 {
-	int i, pagesizes = page_num_user_pagesizes();
+	int i, pagesizes = page_num_user_pagesizes(legacy);
 	size32_t *pgsza32;
 	size_t pgsz;
 	int rc;
@@ -116,7 +115,8 @@
 	}
 	rc = nelem;
 done:
-	kmem_free(pgsza32, sizeof (*pgsza32) * page_num_user_pagesizes());
+	kmem_free(pgsza32, sizeof (*pgsza32) *
+	    page_num_user_pagesizes(legacy));
 	return (rc);
 }
 #endif
--- a/usr/src/uts/common/vm/page.h	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/common/vm/page.h	Sun Oct 28 10:20:15 2007 -0700
@@ -834,7 +834,7 @@
 extern uchar_t		colorequivszc[];
 
 uint_t	page_num_pagesizes(void);
-uint_t	page_num_user_pagesizes(void);
+uint_t	page_num_user_pagesizes(int);
 size_t	page_get_pagesize(uint_t);
 size_t	page_get_user_pagesize(uint_t n);
 pgcnt_t	page_get_pagecnt(uint_t);
--- a/usr/src/uts/common/vm/vm_pagelist.c	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/common/vm/vm_pagelist.c	Sun Oct 28 10:20:15 2007 -0700
@@ -419,10 +419,18 @@
  * Return how many page sizes are available for the user to use.  This is
  * what the hardware supports and not based upon how the OS implements the
  * support of different page sizes.
+ *
+ * If legacy is non-zero, return the number of pagesizes available to legacy
+ * applications. The number of legacy page sizes might be less than the
+ * exported user page sizes. This is to prevent legacy applications that
+ * use the largest page size returned from getpagesizes(3c) from inadvertantly
+ * using the 'new' large pagesizes.
  */
 uint_t
-page_num_user_pagesizes(void)
+page_num_user_pagesizes(int legacy)
 {
+	if (legacy)
+		return (mmu_legacy_page_sizes);
 	return (mmu_exported_page_sizes);
 }
 
@@ -3311,7 +3319,6 @@
  * 'pfnflag' specifies the subset of the pfn range to search.
  */
 
-
 static page_t *
 page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags,
     pfn_t pfnlo, pfn_t pfnhi, pgcnt_t pfnflag)
@@ -3330,7 +3337,9 @@
 
 	ASSERT(szc != 0 || (flags & PGI_PGCPSZC0));
 
-	if ((pfnhi - pfnlo) + 1 < szcpgcnt)
+	pfnlo = P2ROUNDUP(pfnlo, szcpgcnt);
+
+	if ((pfnhi - pfnlo) + 1 < szcpgcnt || pfnlo >= pfnhi)
 		return (NULL);
 
 	ASSERT(szc < mmu_page_sizes);
@@ -3368,15 +3377,16 @@
 		pgcnt_t	szcpages;
 		int	slotlen;
 
-		pfnlo = P2ROUNDUP(pfnlo, szcpgcnt);
-		pfnhi = pfnhi & ~(szcpgcnt - 1);
-
+		pfnhi = P2ALIGN((pfnhi + 1), szcpgcnt) - 1;
 		szcpages = ((pfnhi - pfnlo) + 1) / szcpgcnt;
 		slotlen = howmany(szcpages, slots);
+		/* skip if 'slotid' slot is empty */
+		if (slotid * slotlen >= szcpages)
+			return (NULL);
 		pfnlo = pfnlo + (((slotid * slotlen) % szcpages) * szcpgcnt);
 		ASSERT(pfnlo < pfnhi);
 		if (pfnhi > pfnlo + (slotlen * szcpgcnt))
-			pfnhi = pfnlo + (slotlen * szcpgcnt);
+			pfnhi = pfnlo + (slotlen * szcpgcnt) - 1;
 	}
 
 	memsegs_lock(0);
@@ -3406,8 +3416,9 @@
 
 		/* round to szcpgcnt boundaries */
 		lo = P2ROUNDUP(lo, szcpgcnt);
+
 		MEM_NODE_ITERATOR_INIT(lo, mnode, &it);
-		hi = hi & ~(szcpgcnt - 1);
+		hi = P2ALIGN((hi + 1), szcpgcnt) - 1;
 
 		if (hi <= lo)
 			continue;
@@ -3449,7 +3460,7 @@
 		ASSERT(randpp->p_pagenum == randpfn);
 
 		pp = randpp;
-		endpp =  mseg->pages + (hi - mseg->pages_base);
+		endpp =  mseg->pages + (hi - mseg->pages_base) + 1;
 
 		ASSERT(randpp + szcpgcnt <= endpp);
 
--- a/usr/src/uts/i86pc/os/cpuid.c	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/i86pc/os/cpuid.c	Sun Oct 28 10:20:15 2007 -0700
@@ -1025,6 +1025,12 @@
 			if (cp->cp_edx & CPUID_AMD_EDX_NX)
 				feature |= X86_NX;
 
+#if defined(__amd64)
+			/* 1 GB large page - enable only for 64 bit kernel */
+			if (cp->cp_edx & CPUID_AMD_EDX_1GPG)
+				feature |= X86_1GPG;
+#endif
+
 			if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
 			    (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
 			    (cp->cp_ecx & CPUID_AMD_ECX_SSE4A))
--- a/usr/src/uts/i86pc/os/startup.c	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/i86pc/os/startup.c	Sun Oct 28 10:20:15 2007 -0700
@@ -1823,6 +1823,8 @@
 	/*
 	 * disable automatic large pages for small memory systems or
 	 * when the disable flag is set.
+	 *
+	 * Do not yet consider page sizes larger than 2m/4m.
 	 */
 	if (!auto_lpg_disable && mmu.max_page_level > 0) {
 		max_uheap_lpsize = LEVEL_SIZE(1);
@@ -1837,9 +1839,7 @@
 		use_brk_lpg = 0;
 		use_stk_lpg = 0;
 	}
-	if (mmu.max_page_level > 0) {
-		mcntl0_lpsize = LEVEL_SIZE(1);
-	}
+	mcntl0_lpsize = LEVEL_SIZE(mmu.umax_page_level);
 
 	PRM_POINT("Calling hat_init_finish()...");
 	hat_init_finish();
--- a/usr/src/uts/i86pc/sys/machparam.h	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/i86pc/sys/machparam.h	Sun Oct 28 10:20:15 2007 -0700
@@ -77,7 +77,7 @@
  */
 
 /* supported page sizes */
-#define	MMU_PAGE_SIZES	2
+#define	MMU_PAGE_SIZES	3
 
 /*
  * MMU_PAGES* describes the physical page size used by the mapping hardware.
--- a/usr/src/uts/i86pc/vm/hat_i86.c	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/i86pc/vm/hat_i86.c	Sun Oct 28 10:20:15 2007 -0700
@@ -130,6 +130,14 @@
 uint_t use_boot_reserve = 1;	/* cleared after early boot process */
 uint_t can_steal_post_boot = 0;	/* set late in boot to enable stealing */
 
+/* export 1g page size to user applications if set */
+int	enable_1gpg;
+
+#ifdef DEBUG
+uint_t	map1gcnt;
+#endif
+
+
 /*
  * A cpuset for all cpus. This is used for kernel address cross calls, since
  * the kernel addresses apply to all cpus.
@@ -525,12 +533,25 @@
 	 * Initialize parameters based on the 64 or 32 bit kernels and
 	 * for the 32 bit kernel decide if we should use PAE.
 	 */
-	if (kbm_largepage_support)
-		mmu.max_page_level = 1;
-	else
+	if (kbm_largepage_support) {
+		if (x86_feature & X86_1GPG) {
+			mmu.max_page_level = 2;
+			mmu.umax_page_level = (enable_1gpg) ? 2 : 1;
+		} else {
+			mmu.max_page_level = 1;
+			mmu.umax_page_level = 1;
+		}
+	} else {
 		mmu.max_page_level = 0;
+		mmu.umax_page_level = 0;
+	}
 	mmu_page_sizes = mmu.max_page_level + 1;
-	mmu_exported_page_sizes = mmu_page_sizes;
+	mmu_exported_page_sizes = mmu.umax_page_level + 1;
+
+	/* restrict legacy applications from using pagesizes 1g and above */
+	mmu_legacy_page_sizes =
+	    (mmu_exported_page_sizes > 2) ? 2 : mmu_exported_page_sizes;
+
 
 #if defined(__amd64)
 
@@ -1296,6 +1317,13 @@
 		} else {
 			ASSERT(flags & HAT_LOAD_NOCONSIST);
 		}
+#if defined(__amd64)
+		if (ht->ht_flags & HTABLE_VLP) {
+			cpu_t *cpu = CPU;
+			x86pte_t *vlpptep = cpu->cpu_hat_info->hci_vlp_l2ptes;
+			VLP_COPY(hat->hat_vlp_ptes, vlpptep);
+		}
+#endif
 		HTABLE_INC(ht->ht_valid_cnt);
 		PGCNT_INC(hat, l);
 		return (rv);
@@ -1616,8 +1644,13 @@
 					ASSERT(pages[pgindx] + i ==
 					    pages[pgindx + i]);
 				}
-				if (i == mmu_btop(pgsize))
+				if (i == mmu_btop(pgsize)) {
+#ifdef DEBUG
+					if (level == 2)
+						map1gcnt++;
+#endif
 					break;
+				}
 			}
 		}
 
@@ -1720,8 +1753,13 @@
 				break;
 			if (IS_P2ALIGNED(va, pgsize) &&
 			    (eva - va) >= pgsize &&
-			    IS_P2ALIGNED(pfn, mmu_btop(pgsize)))
+			    IS_P2ALIGNED(pfn, mmu_btop(pgsize))) {
+#ifdef DEBUG
+				if (level == 2)
+					map1gcnt++;
+#endif
 				break;
+			}
 		}
 
 		/*
--- a/usr/src/uts/i86pc/vm/hat_pte.h	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/i86pc/vm/hat_pte.h	Sun Oct 28 10:20:15 2007 -0700
@@ -139,7 +139,7 @@
  *		...
  */
 #define	MAX_NUM_LEVEL		4
-#define	MAX_PAGE_LEVEL		1			/* for now.. sigh */
+#define	MAX_PAGE_LEVEL		2
 typedef	int8_t level_t;
 #define	LEVEL_SHIFT(l)	(mmu.level_shift[l])
 #define	LEVEL_SIZE(l)	(mmu.level_size[l])
@@ -173,6 +173,7 @@
 	uint_t num_level;	/* number of page table levels in use */
 	uint_t max_level;	/* just num_level - 1 */
 	uint_t max_page_level;	/* maximum level at which we can map a page */
+	uint_t umax_page_level; /* max user page map level */
 	uint_t ptes_per_table;	/* # of entries in lower level page tables */
 	uint_t top_level_count;	/* # of entries in top most level page table */
 
--- a/usr/src/uts/i86pc/vm/htable.c	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/i86pc/vm/htable.c	Sun Oct 28 10:20:15 2007 -0700
@@ -1655,7 +1655,7 @@
 	 * Find the level of the largest pagesize used by this HAT.
 	 */
 	if (hat->hat_ism_pgcnt > 0) {
-		max_mapped_level = mmu.max_page_level;
+		max_mapped_level = mmu.umax_page_level;
 	} else {
 		max_mapped_level = 0;
 		for (l = 1; l <= mmu.max_page_level; ++l)
@@ -2175,7 +2175,7 @@
 	x86pte_t	found;
 
 	ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN));
-	ASSERT(ht->ht_level != VLP_LEVEL);
+	ASSERT(ht->ht_level <= mmu.max_page_level);
 
 	if (pte_ptr != NULL)
 		ptep = pte_ptr;
@@ -2241,7 +2241,7 @@
 
 	ASSERT(new != 0);
 	ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN));
-	ASSERT(ht->ht_level != VLP_LEVEL);
+	ASSERT(ht->ht_level <= mmu.max_page_level);
 
 	ptep = x86pte_access_pagetable(ht, entry);
 	XPV_ALLOW_PAGETABLE_UPDATES();
--- a/usr/src/uts/i86pc/vm/i86_mmu.c	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/i86pc/vm/i86_mmu.c	Sun Oct 28 10:20:15 2007 -0700
@@ -337,11 +337,13 @@
 			paddr = pmem->address;
 			psize = pmem->size;
 			while (psize >= MMU_PAGESIZE) {
-				if ((paddr & LEVEL_OFFSET(lpagel)) == 0 &&
-				    psize > LEVEL_SIZE(lpagel))
-					l = lpagel;
-				else
-					l = 0;
+				/* find the largest page size */
+				for (l = lpagel; l > 0; l--) {
+					if ((paddr & LEVEL_OFFSET(l)) == 0 &&
+					    psize > LEVEL_SIZE(l))
+						break;
+				}
+
 #if defined(__xpv)
 				/*
 				 * Create read/only mappings to avoid
@@ -410,10 +412,12 @@
 		 * increment table_cnt. We can stop at the 1st level where
 		 * they are in the same htable.
 		 */
-		if (size == MMU_PAGESIZE)
-			start_level = 0;
-		else
-			start_level = 1;
+		start_level = 0;
+		while (start_level <= mmu.max_page_level) {
+			if (size == LEVEL_SIZE(start_level))
+				break;
+			start_level++;
+		}
 
 		for (l = start_level; l < mmu.max_level; ++l) {
 			if (va >> LEVEL_SHIFT(l + 1) ==
@@ -422,7 +426,8 @@
 			++table_cnt;
 		}
 		last_va = va;
-		va = (va & LEVEL_MASK(1)) + LEVEL_SIZE(1);
+		l = (start_level == 0) ? 1 : start_level;
+		va = (va & LEVEL_MASK(l)) + LEVEL_SIZE(l);
 	}
 
 	/*
--- a/usr/src/uts/i86pc/vm/kboot_mmu.c	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/i86pc/vm/kboot_mmu.c	Sun Oct 28 10:20:15 2007 -0700
@@ -167,7 +167,7 @@
 		panic("kbm_map() called too late");
 
 	pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE;
-	if (level == 1)
+	if (level >= 1)
 		pteval |= PT_PAGESIZE;
 	if (kbm_pge_support && is_kernel)
 		pteval |= PT_GLOBAL;
--- a/usr/src/uts/i86pc/vm/vm_dep.h	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/i86pc/vm/vm_dep.h	Sun Oct 28 10:20:15 2007 -0700
@@ -323,6 +323,12 @@
  */
 extern uint_t mmu_page_sizes;
 extern uint_t mmu_exported_page_sizes;
+/*
+ * page sizes that legacy applications can see via getpagesizes(3c).
+ * Used to prevent legacy applications from inadvertantly using the
+ * 'new' large pagesizes (1g and above).
+ */
+extern uint_t mmu_legacy_page_sizes;
 
 /* For x86, userszc is the same as the kernel's szc */
 #define	USERSZC_2_SZC(userszc)	(userszc)
--- a/usr/src/uts/i86pc/vm/vm_machdep.c	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/i86pc/vm/vm_machdep.c	Sun Oct 28 10:20:15 2007 -0700
@@ -283,6 +283,9 @@
 /* How many page sizes the users can see */
 uint_t mmu_exported_page_sizes;
 
+/* page sizes that legacy applications can see */
+uint_t mmu_legacy_page_sizes;
+
 /*
  * Number of pages in 1 GB.  Don't enable automatic large pages if we have
  * fewer than this many pages.
@@ -383,7 +386,7 @@
 		/*
 		 * use the pages size that best fits len
 		 */
-		for (l = mmu.max_page_level; l > 0; --l) {
+		for (l = mmu.umax_page_level; l > 0; --l) {
 			if (LEVEL_SIZE(l) > max_lpsize || len < LEVEL_SIZE(l)) {
 				continue;
 			} else {
@@ -399,13 +402,12 @@
 		}
 		return (pgsz);
 
-	/*
-	 * for ISM use the 1st large page size.
-	 */
 	case MAPPGSZ_ISM:
-		if (mmu.max_page_level == 0)
-			return (MMU_PAGESIZE);
-		return (LEVEL_SIZE(1));
+		for (l = mmu.umax_page_level; l > 0; --l) {
+			if (len >= LEVEL_SIZE(l))
+				return (LEVEL_SIZE(l));
+		}
+		return (LEVEL_SIZE(0));
 	}
 	return (pgsz);
 }
@@ -425,7 +427,7 @@
 		return (0);
 	}
 
-	for (i = mmu_page_sizes - 1; i > 0; i--) {
+	for (i = mmu_exported_page_sizes - 1; i > 0; i--) {
 		pgsz = page_get_pagesize(i);
 		if (pgsz > max_lpsize) {
 			continue;
@@ -719,7 +721,7 @@
 		 */
 		align_amount = ELF_386_MAXPGSZ;
 	} else {
-		int l = mmu.max_page_level;
+		int l = mmu.umax_page_level;
 
 		while (l && len < LEVEL_SIZE(l))
 			--l;
@@ -1482,6 +1484,8 @@
 	memranges += i;
 	nranges -= i;
 
+	ASSERT(mmu_page_sizes <= MMU_PAGE_SIZES);
+
 	ASSERT(ISP2(l2_sz));
 	ASSERT(ISP2(l2_linesz));
 	ASSERT(l2_sz > MMU_PAGESIZE);
--- a/usr/src/uts/intel/sys/x86_archext.h	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/intel/sys/x86_archext.h	Sun Oct 28 10:20:15 2007 -0700
@@ -147,7 +147,7 @@
 #define	CPUID_AMD_EDX_MMX	0x00800000	/* MMX instructions */
 #define	CPUID_AMD_EDX_FXSR	0x01000000	/* fxsave and fxrstor */
 #define	CPUID_AMD_EDX_FFXSR	0x02000000	/* fast fxsave/fxrstor */
-				/* 0x04000000 - reserved */
+#define	CPUID_AMD_EDX_1GPG	0x04000000	/* 1GB page */
 #define	CPUID_AMD_EDX_TSCP	0x08000000	/* rdtscp instruction */
 				/* 0x10000000 - reserved */
 #define	CPUID_AMD_EDX_LM	0x20000000	/* AMD: long mode */
@@ -329,6 +329,7 @@
 #define	X86_SSSE3	0x02000000
 #define	X86_SSE4_1	0x04000000
 #define	X86_SSE4_2	0x08000000
+#define	X86_1GPG	0x10000000
 
 /*
  * flags to patch tsc_read routine.
--- a/usr/src/uts/sun4/io/trapstat.c	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/sun4/io/trapstat.c	Sun Oct 28 10:20:15 2007 -0700
@@ -2350,7 +2350,7 @@
 	tstat_devi = devi;
 
 	tstat_pgszs = page_num_pagesizes();
-	tstat_user_pgszs = page_num_user_pagesizes();
+	tstat_user_pgszs = page_num_user_pagesizes(0);
 	tstat_data_t_size = sizeof (tstat_data_t) +
 	    (tstat_pgszs - 1) * sizeof (tstat_pgszdata_t);
 	tstat_data_t_exported_size = sizeof (tstat_data_t) +
--- a/usr/src/uts/sun4/vm/vm_dep.h	Fri Oct 26 15:37:43 2007 -0700
+++ b/usr/src/uts/sun4/vm/vm_dep.h	Sun Oct 28 10:20:15 2007 -0700
@@ -789,6 +789,7 @@
 extern uint_t szc_2_userszc[];
 extern uint_t userszc_2_szc[];
 
+#define	mmu_legacy_page_sizes	mmu_exported_page_sizes
 #define	USERSZC_2_SZC(userszc)	(userszc_2_szc[userszc])
 #define	SZC_2_USERSZC(szc)	(szc_2_userszc[szc])