changeset 10607:da3cc66100c3

6881217 32bit stack frames should be aligned on 16-byte boundaries (for sse2 code)
author Roger A. Faulkner <Roger.Faulkner@Sun.COM>
date Tue, 22 Sep 2009 09:27:56 -0700
parents 438ded3d6917
children 3ce7fe6573e8
files usr/src/lib/common/i386/crt1.s usr/src/lib/libc/i386/threads/asm_subr.s usr/src/lib/libc/i386/threads/machdep.c
diffstat 3 files changed, 39 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/lib/common/i386/crt1.s	Tue Sep 22 10:23:49 2009 -0400
+++ b/usr/src/lib/common/i386/crt1.s	Tue Sep 22 09:27:56 2009 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -161,7 +161,8 @@
 	leal	16(%ebp,%eax,4),%edx	/* envp */
 	movl	%edx,_environ		/* copy to _environ */
 1:
-	andl	$-16,%esp
+	andl	$-16,%esp	/* make main() and exit() be called with */
+	subl	$4,%esp		/* a 16-byte aligned stack pointer */
 	pushl	%edx
 	leal	12(%ebp),%edx	/* argv */
 	movl	%edx,___Argv
@@ -171,13 +172,12 @@
 	call	__fsr		/* support for ftrap/fround/fprecision  */
 	call	_init
 	call	main		/* main(argc,argv,envp) */
-	addl	$12,%esp
-	pushl	%eax		/* return value from main */
-	pushl	%eax		/* push it again (for _exit(), below) */
+	movl	%eax,(%esp)	/* return value from main, for exit() */
+	movl	%eax,4(%esp)	/* remember it for _exit(), below */
 	call	exit
-	addl	$4,%esp
-	call	_exit		/* if user redefined exit, call _exit */
-	addl	$4,%esp
+	movl	4(%esp),%eax	/* if user redefined exit, call _exit */
+	movl	%eax,(%esp)
+	call	_exit
 	hlt
 	.size	_start, .-_start
 
--- a/usr/src/lib/libc/i386/threads/asm_subr.s	Tue Sep 22 10:23:49 2009 -0400
+++ b/usr/src/lib/libc/i386/threads/asm_subr.s	Tue Sep 22 09:27:56 2009 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -86,18 +86,21 @@
 /*
  * __sighndlr(int sig, siginfo_t *si, ucontext_t *uc, void (*hndlr)())
  *
- * This is called from sigacthandler() for the entire purpose of
- * communicating the ucontext to java's stack tracing functions.
+ * This is called from sigacthandler() for the purpose of
+ * communicating the ucontext to java's stack tracing functions
+ * and to ensure a 16-byte aligned stack pointer for the benefit
+ * of gcc-compiled floating point code
  */
 	ENTRY(__sighndlr)
 	.globl	__sighndlrend
 	pushl	%ebp
 	movl	%esp, %ebp
+	andl	$-16,%esp	/ make sure handler is called with
+	subl	$4,%esp		/ a 16-byte aligned stack pointer
 	pushl	16(%ebp)
 	pushl	12(%ebp)
 	pushl	8(%ebp)
 	call	*20(%ebp)
-	addl	$12, %esp
 	leave
 	ret
 __sighndlrend:
--- a/usr/src/lib/libc/i386/threads/machdep.c	Tue Sep 22 10:23:49 2009 -0400
+++ b/usr/src/lib/libc/i386/threads/machdep.c	Tue Sep 22 09:27:56 2009 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -29,6 +29,19 @@
 #include <ucontext.h>
 #include <setjmp.h>
 
+/*
+ * The i386 ABI says that the stack pointer need be only 4-byte aligned
+ * before a function call (STACK_ALIGN == 4).  We use a 16-byte stack
+ * alignment for the benefit of floating point code compiled using sse2.
+ * Even though the i386 ABI doesn't require it, both cc and gcc
+ * assume this alignment on entry to a function and maintain it
+ * for calls made from that function.  If the stack is initially
+ * aligned on a 16-byte boundary, it will continue to be so aligned.
+ * If it is not initially so aligned, it will never become so aligned.
+ */
+#undef	STACK_ALIGN
+#define	STACK_ALIGN	16
+
 extern int getlwpstatus(thread_t, lwpstatus_t *);
 extern int putlwpregs(thread_t, prgregset_t);
 
@@ -39,13 +52,18 @@
 	struct {
 		uint32_t	rpc;
 		uint32_t	arg;
+		uint32_t	pad;
 		uint32_t	fp;
 		uint32_t	pc;
 	} frame;
 
 	/*
 	 * Top-of-stack must be rounded down to STACK_ALIGN and
-	 * there must be a minimum frame.
+	 * there must be a minimum frame.  Note: 'frame' is not a true
+	 * stack frame (see <sys/frame.h>) but a construction made here to
+	 * make it look like _lwp_start called the thread start function
+	 * with a 16-byte aligned stack pointer (the address of frame.arg
+	 * is the address that muet be aligned on a 16-byte boundary).
 	 */
 	stack = (uint32_t *)(((uintptr_t)stk + stksize) & ~(STACK_ALIGN-1));
 
@@ -55,9 +73,10 @@
 	 * thr_create(), pthread_create() or pthread_attr_setstack()
 	 * to fail, passing the problem up to the application.
 	 */
-	stack -= 4;
+	stack -= 5;	/* make the address of frame.arg be 16-byte aligned */
 	frame.pc = 0;
-	frame.fp = 0;
+	frame.fp = 0;	/* initial address for %ebp (see EBP below) */
+	frame.pad = 0;
 	frame.arg = (uint32_t)ulwp;
 	frame.rpc = (uint32_t)_lwp_start;
 	if (uucopy(&frame, (void *)stack, sizeof (frame)) == 0)
@@ -118,7 +137,7 @@
 	ucp->uc_flags |= UC_CPU;
 	ucp->uc_mcontext.gregs[EIP] = (greg_t)func;
 	ucp->uc_mcontext.gregs[UESP] = (greg_t)stack;
-	ucp->uc_mcontext.gregs[EBP] = (greg_t)(stack + 2);
+	ucp->uc_mcontext.gregs[EBP] = (greg_t)(stack + 3);
 
 	return (0);
 }