Mercurial > illumos > illumos-gate
comparison usr/src/cmd/sgs/rtld/amd64/boot_elf.s @ 13675:a9ae30c28ee4
2413 %ymm* need to be preserved on way through PLT
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Reviewed by: Joshua M. Clulow <josh@sysmgr.org>
Reviewed by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
Approved by: Albert Lee <trisk@nexenta.com>
author | Robert Mustacchi <rm@joyent.com> |
---|---|
date | Wed, 25 Apr 2012 00:27:21 -0400 |
parents | ae3aa141e3fa |
children |
comparison
equal
deleted
inserted
replaced
13674:181ba6c41bee | 13675:a9ae30c28ee4 |
---|---|
20 */ | 20 */ |
21 | 21 |
22 /* | 22 /* |
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. | 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
24 * Use is subject to license terms. | 24 * Use is subject to license terms. |
25 * Copyright (c) 2012 Joyent, Inc. All rights reserved. | |
25 */ | 26 */ |
26 | |
27 #pragma ident "%Z%%M% %I% %E% SMI" | |
28 | 27 |
29 #if defined(lint) | 28 #if defined(lint) |
30 | 29 |
31 #include <sys/types.h> | 30 #include <sys/types.h> |
32 #include <_rtld.h> | 31 #include <_rtld.h> |
33 #include <_audit.h> | 32 #include <_audit.h> |
34 #include <_elf.h> | 33 #include <_elf.h> |
35 #include <sys/regset.h> | 34 #include <sys/regset.h> |
35 #include <sys/auxv_386.h> | |
36 | 36 |
37 /* ARGSUSED0 */ | 37 /* ARGSUSED0 */ |
38 int | 38 int |
39 elf_plt_trace() | 39 elf_plt_trace() |
40 { | 40 { |
43 #else | 43 #else |
44 | 44 |
45 #include <link.h> | 45 #include <link.h> |
46 #include <_audit.h> | 46 #include <_audit.h> |
47 #include <sys/asm_linkage.h> | 47 #include <sys/asm_linkage.h> |
48 #include <sys/auxv_386.h> | |
48 | 49 |
49 .file "boot_elf.s" | 50 .file "boot_elf.s" |
50 .text | 51 .text |
51 | 52 |
52 /* | 53 /* |
104 * %r9 8 | 105 * %r9 8 |
105 * %r10 8 | 106 * %r10 8 |
106 * %r11 8 | 107 * %r11 8 |
107 * %rax 8 | 108 * %rax 8 |
108 * ======= | 109 * ======= |
109 * Subtotal: 144 (16byte aligned) | 110 * Subtotal: 144 (32byte aligned) |
110 * | 111 * |
111 * Saved Media Regs (used to pass floating point args): | 112 * Saved Media Regs (used to pass floating point args): |
112 * %xmm0 - %xmm7 16 * 8: 128 | 113 * %xmm0 - %xmm7 32 * 8: 256 |
113 * ======= | 114 * ======= |
114 * Total: 272 (16byte aligned) | 115 * Total: 400 (32byte aligned) |
115 * | 116 * |
116 * So - will subtract the following to create enough space | 117 * So - will subtract the following to create enough space |
117 * | 118 * |
118 * -8(%rbp) store dyndata ptr | 119 * -8(%rbp) store dyndata ptr |
119 * -16(%rbp) store call destination | 120 * -16(%rbp) store call destination |
129 * -128(%rbp) entering %r8 | 130 * -128(%rbp) entering %r8 |
130 * -136(%rbp) entering %r9 | 131 * -136(%rbp) entering %r9 |
131 * -144(%rbp) entering %r10 | 132 * -144(%rbp) entering %r10 |
132 * -152(%rbp) entering %r11 | 133 * -152(%rbp) entering %r11 |
133 * -160(%rbp) entering %rax | 134 * -160(%rbp) entering %rax |
134 * -176(%rbp) entering %xmm0 | 135 * -192(%rbp) entering %xmm0 |
135 * -192(%rbp) entering %xmm1 | 136 * -224(%rbp) entering %xmm1 |
136 * -208(%rbp) entering %xmm2 | 137 * -256(%rbp) entering %xmm2 |
137 * -224(%rbp) entering %xmm3 | 138 * -288(%rbp) entering %xmm3 |
138 * -240(%rbp) entering %xmm4 | 139 * -320(%rbp) entering %xmm4 |
139 * -256(%rbp) entering %xmm5 | 140 * -384(%rbp) entering %xmm5 |
140 * -272(%rbp) entering %xmm6 | 141 * -416(%rbp) entering %xmm6 |
141 * -288(%rbp) entering %xmm7 | 142 * -448(%rbp) entering %xmm7 |
142 * | 143 * |
143 */ | 144 */ |
144 #define SPDYNOFF -8 | 145 #define SPDYNOFF -8 |
145 #define SPDESTOFF -16 | 146 #define SPDESTOFF -16 |
146 #define SPLAREGOFF -80 | 147 #define SPLAREGOFF -80 |
147 #define SPPRVSTKOFF -88 | 148 #define SPPRVSTKOFF -88 |
148 | 149 |
149 /* | 150 /* |
150 * The next set of offsets are relative to %rsp. | 151 * The next set of offsets are relative to %rsp. |
151 * We guarantee %rsp is ABI compliant 16-byte aligned. This guarantees the | 152 * We guarantee %rsp is ABI compliant 32-byte aligned. This guarantees the |
152 * xmm registers are saved to 16-byte aligned addresses. | 153 * ymm registers are saved to 32-byte aligned addresses. |
153 * %rbp may only be 8 byte aligned if we came in from non-ABI compliant code. | 154 * %rbp may only be 8 byte aligned if we came in from non-ABI compliant code. |
154 */ | 155 */ |
155 #define SPRDIOFF 192 | 156 #define SPRDIOFF 320 |
156 #define SPRSIOFF 184 | 157 #define SPRSIOFF 312 |
157 #define SPRDXOFF 176 | 158 #define SPRDXOFF 304 |
158 #define SPRCXOFF 168 | 159 #define SPRCXOFF 296 |
159 #define SPR8OFF 160 | 160 #define SPR8OFF 288 |
160 #define SPR9OFF 152 | 161 #define SPR9OFF 280 |
161 #define SPR10OFF 144 | 162 #define SPR10OFF 272 |
162 #define SPR11OFF 136 | 163 #define SPR11OFF 264 |
163 #define SPRAXOFF 128 | 164 #define SPRAXOFF 256 |
164 #define SPXMM0OFF 112 | 165 #define SPXMM0OFF 224 |
165 #define SPXMM1OFF 96 | 166 #define SPXMM1OFF 192 |
166 #define SPXMM2OFF 80 | 167 #define SPXMM2OFF 160 |
167 #define SPXMM3OFF 64 | 168 #define SPXMM3OFF 128 |
168 #define SPXMM4OFF 48 | 169 #define SPXMM4OFF 96 |
169 #define SPXMM5OFF 32 | 170 #define SPXMM5OFF 64 |
170 #define SPXMM6OFF 16 | 171 #define SPXMM6OFF 32 |
171 #define SPXMM7OFF 0 | 172 #define SPXMM7OFF 0 |
172 | 173 |
174 /* See elf_rtbndr for explanation behind org_scapset */ | |
175 .extern org_scapset | |
173 .globl elf_plt_trace | 176 .globl elf_plt_trace |
174 .type elf_plt_trace,@function | 177 .type elf_plt_trace,@function |
175 .align 16 | 178 .align 16 |
176 elf_plt_trace: | 179 elf_plt_trace: |
177 /* | 180 /* |
178 * Enforce ABI 16-byte stack alignment here. | 181 * Enforce ABI 32-byte stack alignment here. |
179 * The next andq instruction does this pseudo code: | 182 * The next andq instruction does this pseudo code: |
180 * If %rsp is 8 byte aligned then subtract 8 from %rsp. | 183 * If %rsp is 8 byte aligned then subtract 8 from %rsp. |
181 */ | 184 */ |
182 andq $-16, %rsp /* enforce ABI 16-byte stack alignment */ | 185 andq $-32, %rsp /* enforce ABI 32-byte stack alignment */ |
183 subq $272,%rsp / create some local storage | 186 subq $400,%rsp / create some local storage |
184 | 187 |
185 movq %rdi, SPRDIOFF(%rsp) | 188 movq %rdi, SPRDIOFF(%rsp) |
186 movq %rsi, SPRSIOFF(%rsp) | 189 movq %rsi, SPRSIOFF(%rsp) |
187 movq %rdx, SPRDXOFF(%rsp) | 190 movq %rdx, SPRDXOFF(%rsp) |
188 movq %rcx, SPRCXOFF(%rsp) | 191 movq %rcx, SPRCXOFF(%rsp) |
189 movq %r8, SPR8OFF(%rsp) | 192 movq %r8, SPR8OFF(%rsp) |
190 movq %r9, SPR9OFF(%rsp) | 193 movq %r9, SPR9OFF(%rsp) |
191 movq %r10, SPR10OFF(%rsp) | 194 movq %r10, SPR10OFF(%rsp) |
192 movq %r11, SPR11OFF(%rsp) | 195 movq %r11, SPR11OFF(%rsp) |
193 movq %rax, SPRAXOFF(%rsp) | 196 movq %rax, SPRAXOFF(%rsp) |
197 | |
198 movq org_scapset@GOTPCREL(%rip),%r9 | |
199 movq (%r9),%r9 | |
200 movl (%r9),%edx | |
201 testl $AV_386_AVX,%edx | |
202 jne .trace_save_ymm | |
203 | |
204 .trace_save_xmm: | |
194 movdqa %xmm0, SPXMM0OFF(%rsp) | 205 movdqa %xmm0, SPXMM0OFF(%rsp) |
195 movdqa %xmm1, SPXMM1OFF(%rsp) | 206 movdqa %xmm1, SPXMM1OFF(%rsp) |
196 movdqa %xmm2, SPXMM2OFF(%rsp) | 207 movdqa %xmm2, SPXMM2OFF(%rsp) |
197 movdqa %xmm3, SPXMM3OFF(%rsp) | 208 movdqa %xmm3, SPXMM3OFF(%rsp) |
198 movdqa %xmm4, SPXMM4OFF(%rsp) | 209 movdqa %xmm4, SPXMM4OFF(%rsp) |
199 movdqa %xmm5, SPXMM5OFF(%rsp) | 210 movdqa %xmm5, SPXMM5OFF(%rsp) |
200 movdqa %xmm6, SPXMM6OFF(%rsp) | 211 movdqa %xmm6, SPXMM6OFF(%rsp) |
201 movdqa %xmm7, SPXMM7OFF(%rsp) | 212 movdqa %xmm7, SPXMM7OFF(%rsp) |
213 jmp .trace_save_finish | |
214 | |
215 .trace_save_ymm: | |
216 vmovdqa %ymm0, SPXMM0OFF(%rsp) | |
217 vmovdqa %ymm1, SPXMM1OFF(%rsp) | |
218 vmovdqa %ymm2, SPXMM2OFF(%rsp) | |
219 vmovdqa %ymm3, SPXMM3OFF(%rsp) | |
220 vmovdqa %ymm4, SPXMM4OFF(%rsp) | |
221 vmovdqa %ymm5, SPXMM5OFF(%rsp) | |
222 vmovdqa %ymm6, SPXMM6OFF(%rsp) | |
223 vmovdqa %ymm7, SPXMM7OFF(%rsp) | |
224 | |
225 .trace_save_finish: | |
202 | 226 |
203 movq SPDYNOFF(%rbp), %rax / %rax = dyndata | 227 movq SPDYNOFF(%rbp), %rax / %rax = dyndata |
204 testb $LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax) / <link.h> | 228 testb $LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax) / <link.h> |
205 je .start_pltenter | 229 je .start_pltenter |
206 movq SYMDEF_VALUE_OFF(%rax), %rdi | 230 movq SYMDEF_VALUE_OFF(%rax), %rdi |
271 movq %r11, 0(%rbp) / store destination at top | 295 movq %r11, 0(%rbp) / store destination at top |
272 | 296 |
273 / | 297 / |
274 / Restore registers | 298 / Restore registers |
275 / | 299 / |
300 movq org_scapset@GOTPCREL(%rip),%r9 | |
301 movq (%r9),%r9 | |
302 movl (%r9),%edx | |
303 testl $AV_386_AVX,%edx | |
304 jne .trace_restore_ymm | |
305 | |
306 .trace_restore_xmm: | |
307 movdqa SPXMM0OFF(%rsp), %xmm0 | |
308 movdqa SPXMM1OFF(%rsp), %xmm1 | |
309 movdqa SPXMM2OFF(%rsp), %xmm2 | |
310 movdqa SPXMM3OFF(%rsp), %xmm3 | |
311 movdqa SPXMM4OFF(%rsp), %xmm4 | |
312 movdqa SPXMM5OFF(%rsp), %xmm5 | |
313 movdqa SPXMM6OFF(%rsp), %xmm6 | |
314 movdqa SPXMM7OFF(%rsp), %xmm7 | |
315 jmp .trace_restore_finish | |
316 | |
317 .trace_restore_ymm: | |
318 vmovdqa SPXMM0OFF(%rsp), %ymm0 | |
319 vmovdqa SPXMM1OFF(%rsp), %ymm1 | |
320 vmovdqa SPXMM2OFF(%rsp), %ymm2 | |
321 vmovdqa SPXMM3OFF(%rsp), %ymm3 | |
322 vmovdqa SPXMM4OFF(%rsp), %ymm4 | |
323 vmovdqa SPXMM5OFF(%rsp), %ymm5 | |
324 vmovdqa SPXMM6OFF(%rsp), %ymm6 | |
325 vmovdqa SPXMM7OFF(%rsp), %ymm7 | |
326 | |
327 .trace_restore_finish: | |
276 movq SPRDIOFF(%rsp), %rdi | 328 movq SPRDIOFF(%rsp), %rdi |
277 movq SPRSIOFF(%rsp), %rsi | 329 movq SPRSIOFF(%rsp), %rsi |
278 movq SPRDXOFF(%rsp), %rdx | 330 movq SPRDXOFF(%rsp), %rdx |
279 movq SPRCXOFF(%rsp), %rcx | 331 movq SPRCXOFF(%rsp), %rcx |
280 movq SPR8OFF(%rsp), %r8 | 332 movq SPR8OFF(%rsp), %r8 |
281 movq SPR9OFF(%rsp), %r9 | 333 movq SPR9OFF(%rsp), %r9 |
282 movq SPR10OFF(%rsp), %r10 | 334 movq SPR10OFF(%rsp), %r10 |
283 movq SPR11OFF(%rsp), %r11 | 335 movq SPR11OFF(%rsp), %r11 |
284 movq SPRAXOFF(%rsp), %rax | 336 movq SPRAXOFF(%rsp), %rax |
285 movdqa SPXMM0OFF(%rsp), %xmm0 | |
286 movdqa SPXMM1OFF(%rsp), %xmm1 | |
287 movdqa SPXMM2OFF(%rsp), %xmm2 | |
288 movdqa SPXMM3OFF(%rsp), %xmm3 | |
289 movdqa SPXMM4OFF(%rsp), %xmm4 | |
290 movdqa SPXMM5OFF(%rsp), %xmm5 | |
291 movdqa SPXMM6OFF(%rsp), %xmm6 | |
292 movdqa SPXMM7OFF(%rsp), %xmm7 | |
293 | 337 |
294 subq $8, %rbp / adjust %rbp for 'ret' | 338 subq $8, %rbp / adjust %rbp for 'ret' |
295 movq %rbp, %rsp / | 339 movq %rbp, %rsp / |
296 /* | 340 /* |
297 * At this point, after a little doctoring, we should | 341 * At this point, after a little doctoring, we should |
363 .end_while: | 407 .end_while: |
364 / | 408 / |
365 / Restore registers using %r11 which contains our old %rsp value | 409 / Restore registers using %r11 which contains our old %rsp value |
366 / before growing the stack. | 410 / before growing the stack. |
367 / | 411 / |
412 | |
413 / Yes, we have to do this dance again. Sorry. | |
414 movq org_scapset@GOTPCREL(%rip),%r9 | |
415 movq (%r9),%r9 | |
416 movl (%r9),%edx | |
417 testl $AV_386_AVX,%edx | |
418 jne .trace_r2_ymm | |
419 | |
420 .trace_r2_xmm: | |
421 movdqa SPXMM0OFF(%r11), %xmm0 | |
422 movdqa SPXMM1OFF(%r11), %xmm1 | |
423 movdqa SPXMM2OFF(%r11), %xmm2 | |
424 movdqa SPXMM3OFF(%r11), %xmm3 | |
425 movdqa SPXMM4OFF(%r11), %xmm4 | |
426 movdqa SPXMM5OFF(%r11), %xmm5 | |
427 movdqa SPXMM6OFF(%r11), %xmm6 | |
428 movdqa SPXMM7OFF(%r11), %xmm7 | |
429 jmp .trace_r2_finish | |
430 | |
431 .trace_r2_ymm: | |
432 vmovdqa SPXMM0OFF(%r11), %ymm0 | |
433 vmovdqa SPXMM1OFF(%r11), %ymm1 | |
434 vmovdqa SPXMM2OFF(%r11), %ymm2 | |
435 vmovdqa SPXMM3OFF(%r11), %ymm3 | |
436 vmovdqa SPXMM4OFF(%r11), %ymm4 | |
437 vmovdqa SPXMM5OFF(%r11), %ymm5 | |
438 vmovdqa SPXMM6OFF(%r11), %ymm6 | |
439 vmovdqa SPXMM7OFF(%r11), %ymm7 | |
440 | |
441 .trace_r2_finish: | |
368 movq SPRDIOFF(%r11), %rdi | 442 movq SPRDIOFF(%r11), %rdi |
369 movq SPRSIOFF(%r11), %rsi | 443 movq SPRSIOFF(%r11), %rsi |
370 movq SPRDXOFF(%r11), %rdx | 444 movq SPRDXOFF(%r11), %rdx |
371 movq SPRCXOFF(%r11), %rcx | 445 movq SPRCXOFF(%r11), %rcx |
372 movq SPR8OFF(%r11), %r8 | 446 movq SPR8OFF(%r11), %r8 |
373 movq SPR9OFF(%r11), %r9 | 447 movq SPR9OFF(%r11), %r9 |
374 movq SPR10OFF(%r11), %r10 | 448 movq SPR10OFF(%r11), %r10 |
375 movq SPRAXOFF(%r11), %rax | 449 movq SPRAXOFF(%r11), %rax |
376 movdqa SPXMM0OFF(%r11), %xmm0 | |
377 movdqa SPXMM1OFF(%r11), %xmm1 | |
378 movdqa SPXMM2OFF(%r11), %xmm2 | |
379 movdqa SPXMM3OFF(%r11), %xmm3 | |
380 movdqa SPXMM4OFF(%r11), %xmm4 | |
381 movdqa SPXMM5OFF(%r11), %xmm5 | |
382 movdqa SPXMM6OFF(%r11), %xmm6 | |
383 movdqa SPXMM7OFF(%r11), %xmm7 | |
384 movq SPR11OFF(%r11), %r11 / retore %r11 last | 450 movq SPR11OFF(%r11), %r11 / retore %r11 last |
385 | 451 |
386 /* | 452 /* |
387 * Call to desitnation function - we'll return here | 453 * Call to desitnation function - we'll return here |
388 * for pltexit monitoring. | 454 * for pltexit monitoring. |
491 /* | 557 /* |
492 * Possible arguments for the resolved function are in registers as per | 558 * Possible arguments for the resolved function are in registers as per |
493 * the AMD64 ABI. We must save on the local stack all possible register | 559 * the AMD64 ABI. We must save on the local stack all possible register |
494 * arguments before interposing functions to resolve the called function. | 560 * arguments before interposing functions to resolve the called function. |
495 * Possible arguments must be restored before invoking the resolved function. | 561 * Possible arguments must be restored before invoking the resolved function. |
496 * | 562 * |
563 * Before the AVX instruction set enhancements to AMD64 there were no changes in | |
564 * the set of registers and their sizes across different processors. With AVX, | |
565 * the xmm registers became the lower 128 bits of the ymm registers. Because of | |
566 * this, we need to conditionally save 256 bits instead of 128 bits. Regardless | |
567 * of whether we have ymm registers or not, we're always going to push the stack | |
568 * space assuming that we do to simplify the code. | |
569 * | |
497 * Local stack space storage for elf_rtbndr is allocated as follows: | 570 * Local stack space storage for elf_rtbndr is allocated as follows: |
498 * | 571 * |
499 * Saved regs: | 572 * Saved regs: |
500 * %rax 8 | 573 * %rax 8 |
501 * %rdi 8 | 574 * %rdi 8 |
504 * %rcx 8 | 577 * %rcx 8 |
505 * %r8 8 | 578 * %r8 8 |
506 * %r9 8 | 579 * %r9 8 |
507 * %r10 8 | 580 * %r10 8 |
508 * ======= | 581 * ======= |
509 * Subtotal: 64 (16byte aligned) | 582 * Subtotal: 64 (32byte aligned) |
510 * | 583 * |
511 * Saved Media Regs (used to pass floating point args): | 584 * Saved Media Regs (used to pass floating point args): |
512 * %xmm0 - %xmm7 16 * 8: 128 | 585 * %ymm0 - %ymm7 32 * 8 256 |
513 * ======= | 586 * ======= |
514 * Total: 192 (16byte aligned) | 587 * Total: 320 (32byte aligned) |
515 * | 588 * |
516 * So - will subtract the following to create enough space | 589 * So - will subtract the following to create enough space |
517 * | 590 * |
518 * 0(%rsp) save %rax | 591 * 0(%rsp) save %rax |
519 * 8(%rsp) save %rdi | 592 * 8(%rsp) save %rdi |
521 * 24(%rsp) save %rdx | 594 * 24(%rsp) save %rdx |
522 * 32(%rsp) save %rcx | 595 * 32(%rsp) save %rcx |
523 * 40(%rsp) save %r8 | 596 * 40(%rsp) save %r8 |
524 * 48(%rsp) save %r9 | 597 * 48(%rsp) save %r9 |
525 * 56(%rsp) save %r10 | 598 * 56(%rsp) save %r10 |
526 * 64(%rsp) save %xmm0 | 599 * 64(%rsp) save %ymm0 |
527 * 80(%rsp) save %xmm1 | 600 * 96(%rsp) save %ymm1 |
528 * 96(%rsp) save %xmm2 | 601 * 128(%rsp) save %ymm2 |
529 * 112(%rsp) save %xmm3 | 602 * 160(%rsp) save %ymm3 |
530 * 128(%rsp) save %xmm4 | 603 * 192(%rsp) save %ymm4 |
531 * 144(%rsp) save %xmm5 | 604 * 224(%rsp) save %ymm5 |
532 * 160(%rsp) save %xmm6 | 605 * 256(%rsp) save %ymm6 |
533 * 176(%rsp) save %xmm7 | 606 * 288(%rsp) save %ymm7 |
534 * | 607 * |
535 * Note: Some callers may use 8-byte stack alignment instead of the | 608 * Note: Some callers may use 8-byte stack alignment instead of the |
536 * ABI required 16-byte alignment. We use %rsp offsets to save/restore | 609 * ABI required 16-byte alignment. We use %rsp offsets to save/restore |
537 * registers because %rbp may not be 16-byte aligned. We guarantee %rsp | 610 * registers because %rbp may not be 16-byte aligned. We guarantee %rsp |
538 * is 16-byte aligned in the function preamble. | 611 * is 16-byte aligned in the function preamble. |
539 */ | 612 */ |
540 #define LS_SIZE $192 /* local stack space to save all possible arguments */ | 613 /* |
614 * As the registers may either be xmm or ymm, we've left the name as xmm, but | |
615 * increased the offset between them to always cover the xmm and ymm cases. | |
616 */ | |
617 #define LS_SIZE $320 /* local stack space to save all possible arguments */ | |
541 #define LSRAXOFF 0 /* for SSE register count */ | 618 #define LSRAXOFF 0 /* for SSE register count */ |
542 #define LSRDIOFF 8 /* arg 0 ... */ | 619 #define LSRDIOFF 8 /* arg 0 ... */ |
543 #define LSRSIOFF 16 | 620 #define LSRSIOFF 16 |
544 #define LSRDXOFF 24 | 621 #define LSRDXOFF 24 |
545 #define LSRCXOFF 32 | 622 #define LSRCXOFF 32 |
546 #define LSR8OFF 40 | 623 #define LSR8OFF 40 |
547 #define LSR9OFF 48 | 624 #define LSR9OFF 48 |
548 #define LSR10OFF 56 /* ... arg 5 */ | 625 #define LSR10OFF 56 /* ... arg 5 */ |
549 #define LSXMM0OFF 64 /* SSE arg 0 ... */ | 626 #define LSXMM0OFF 64 /* SSE arg 0 ... */ |
550 #define LSXMM1OFF 80 | 627 #define LSXMM1OFF 96 |
551 #define LSXMM2OFF 96 | 628 #define LSXMM2OFF 128 |
552 #define LSXMM3OFF 112 | 629 #define LSXMM3OFF 160 |
553 #define LSXMM4OFF 128 | 630 #define LSXMM4OFF 192 |
554 #define LSXMM5OFF 144 | 631 #define LSXMM5OFF 224 |
555 #define LSXMM6OFF 160 | 632 #define LSXMM6OFF 256 |
556 #define LSXMM7OFF 176 /* ... SSE arg 7 */ | 633 #define LSXMM7OFF 288 /* ... SSE arg 7 */ |
557 | 634 |
635 /* | |
636 * The org_scapset is a global variable that is a part of rtld. It | |
637 * contains the capabilities that the kernel has told us are supported | |
638 * (auxv_hwcap). This is necessary for determining whether or not we | |
639 * need to save and restore AVX registers or simple SSE registers. Note, | |
640 * that the field we care about is currently at offset 0, if that | |
641 * changes, this code will have to be updated. | |
642 */ | |
643 .extern org_scapset | |
558 .weak _elf_rtbndr | 644 .weak _elf_rtbndr |
559 _elf_rtbndr = elf_rtbndr | 645 _elf_rtbndr = elf_rtbndr |
560 | 646 |
561 ENTRY(elf_rtbndr) | 647 ENTRY(elf_rtbndr) |
562 | 648 |
567 * Some libraries may (incorrectly) use non-ABI compliant 8-byte stack | 653 * Some libraries may (incorrectly) use non-ABI compliant 8-byte stack |
568 * alignment. Enforce ABI 16-byte stack alignment here. | 654 * alignment. Enforce ABI 16-byte stack alignment here. |
569 * The next andq instruction does this pseudo code: | 655 * The next andq instruction does this pseudo code: |
570 * If %rsp is 8 byte aligned then subtract 8 from %rsp. | 656 * If %rsp is 8 byte aligned then subtract 8 from %rsp. |
571 */ | 657 */ |
572 andq $-16, %rsp /* enforce ABI 16-byte stack alignment */ | 658 andq $-32, %rsp /* enforce ABI 32-byte stack alignment */ |
573 | 659 |
574 subq LS_SIZE, %rsp /* save all ABI defined argument registers */ | 660 subq LS_SIZE, %rsp /* save all ABI defined argument registers */ |
575 | 661 |
576 movq %rax, LSRAXOFF(%rsp) /* for SSE register count */ | 662 movq %rax, LSRAXOFF(%rsp) /* for SSE register count */ |
577 movq %rdi, LSRDIOFF(%rsp) /* arg 0 .. */ | 663 movq %rdi, LSRDIOFF(%rsp) /* arg 0 .. */ |
580 movq %rcx, LSRCXOFF(%rsp) | 666 movq %rcx, LSRCXOFF(%rsp) |
581 movq %r8, LSR8OFF(%rsp) | 667 movq %r8, LSR8OFF(%rsp) |
582 movq %r9, LSR9OFF(%rsp) /* .. arg 5 */ | 668 movq %r9, LSR9OFF(%rsp) /* .. arg 5 */ |
583 movq %r10, LSR10OFF(%rsp) /* call chain reg */ | 669 movq %r10, LSR10OFF(%rsp) /* call chain reg */ |
584 | 670 |
671 /* | |
672 * Our xmm registers could secretly by ymm registers in disguise. | |
673 */ | |
674 movq org_scapset@GOTPCREL(%rip),%r9 | |
675 movq (%r9),%r9 | |
676 movl (%r9),%edx | |
677 testl $AV_386_AVX,%edx | |
678 jne .save_ymm | |
679 | |
680 .save_xmm: | |
585 movdqa %xmm0, LSXMM0OFF(%rsp) /* SSE arg 0 ... */ | 681 movdqa %xmm0, LSXMM0OFF(%rsp) /* SSE arg 0 ... */ |
586 movdqa %xmm1, LSXMM1OFF(%rsp) | 682 movdqa %xmm1, LSXMM1OFF(%rsp) |
587 movdqa %xmm2, LSXMM2OFF(%rsp) | 683 movdqa %xmm2, LSXMM2OFF(%rsp) |
588 movdqa %xmm3, LSXMM3OFF(%rsp) | 684 movdqa %xmm3, LSXMM3OFF(%rsp) |
589 movdqa %xmm4, LSXMM4OFF(%rsp) | 685 movdqa %xmm4, LSXMM4OFF(%rsp) |
590 movdqa %xmm5, LSXMM5OFF(%rsp) | 686 movdqa %xmm5, LSXMM5OFF(%rsp) |
591 movdqa %xmm6, LSXMM6OFF(%rsp) | 687 movdqa %xmm6, LSXMM6OFF(%rsp) |
592 movdqa %xmm7, LSXMM7OFF(%rsp) /* ... SSE arg 7 */ | 688 movdqa %xmm7, LSXMM7OFF(%rsp) /* ... SSE arg 7 */ |
593 | 689 jmp .save_finish |
690 | |
691 .save_ymm: | |
692 vmovdqa %ymm0, LSXMM0OFF(%rsp) /* SSE arg 0 ... */ | |
693 vmovdqa %ymm1, LSXMM1OFF(%rsp) | |
694 vmovdqa %ymm2, LSXMM2OFF(%rsp) | |
695 vmovdqa %ymm3, LSXMM3OFF(%rsp) | |
696 vmovdqa %ymm4, LSXMM4OFF(%rsp) | |
697 vmovdqa %ymm5, LSXMM5OFF(%rsp) | |
698 vmovdqa %ymm6, LSXMM6OFF(%rsp) | |
699 vmovdqa %ymm7, LSXMM7OFF(%rsp) /* ... SSE arg 7 */ | |
700 | |
701 .save_finish: | |
594 movq LBPLMPOFF(%rbp), %rdi /* arg1 - *lmp */ | 702 movq LBPLMPOFF(%rbp), %rdi /* arg1 - *lmp */ |
595 movq LBPRELOCOFF(%rbp), %rsi /* arg2 - reloc index */ | 703 movq LBPRELOCOFF(%rbp), %rsi /* arg2 - reloc index */ |
596 movq LBRPCOFF(%rbp), %rdx /* arg3 - pc of caller */ | 704 movq LBRPCOFF(%rbp), %rdx /* arg3 - pc of caller */ |
597 call elf_bndr@PLT /* call elf_rtbndr(lmp, relndx, pc) */ | 705 call elf_bndr@PLT /* call elf_rtbndr(lmp, relndx, pc) */ |
598 movq %rax, LBPRELOCOFF(%rbp) /* store final destination */ | 706 movq %rax, LBPRELOCOFF(%rbp) /* store final destination */ |
599 | 707 |
600 /* restore possible arguments before invoking resolved function */ | 708 /* |
709 * Restore possible arguments before invoking resolved function. We | |
710 * check the xmm vs. ymm regs first so we can use the others. | |
711 */ | |
712 movq org_scapset@GOTPCREL(%rip),%r9 | |
713 movq (%r9),%r9 | |
714 movl (%r9),%edx | |
715 testl $AV_386_AVX,%edx | |
716 jne .restore_ymm | |
717 | |
718 .restore_xmm: | |
719 movdqa LSXMM0OFF(%rsp), %xmm0 | |
720 movdqa LSXMM1OFF(%rsp), %xmm1 | |
721 movdqa LSXMM2OFF(%rsp), %xmm2 | |
722 movdqa LSXMM3OFF(%rsp), %xmm3 | |
723 movdqa LSXMM4OFF(%rsp), %xmm4 | |
724 movdqa LSXMM5OFF(%rsp), %xmm5 | |
725 movdqa LSXMM6OFF(%rsp), %xmm6 | |
726 movdqa LSXMM7OFF(%rsp), %xmm7 | |
727 jmp .restore_finish | |
728 | |
729 .restore_ymm: | |
730 vmovdqa LSXMM0OFF(%rsp), %ymm0 | |
731 vmovdqa LSXMM1OFF(%rsp), %ymm1 | |
732 vmovdqa LSXMM2OFF(%rsp), %ymm2 | |
733 vmovdqa LSXMM3OFF(%rsp), %ymm3 | |
734 vmovdqa LSXMM4OFF(%rsp), %ymm4 | |
735 vmovdqa LSXMM5OFF(%rsp), %ymm5 | |
736 vmovdqa LSXMM6OFF(%rsp), %ymm6 | |
737 vmovdqa LSXMM7OFF(%rsp), %ymm7 | |
738 | |
739 .restore_finish: | |
601 movq LSRAXOFF(%rsp), %rax | 740 movq LSRAXOFF(%rsp), %rax |
602 movq LSRDIOFF(%rsp), %rdi | 741 movq LSRDIOFF(%rsp), %rdi |
603 movq LSRSIOFF(%rsp), %rsi | 742 movq LSRSIOFF(%rsp), %rsi |
604 movq LSRDXOFF(%rsp), %rdx | 743 movq LSRDXOFF(%rsp), %rdx |
605 movq LSRCXOFF(%rsp), %rcx | 744 movq LSRCXOFF(%rsp), %rcx |
606 movq LSR8OFF(%rsp), %r8 | 745 movq LSR8OFF(%rsp), %r8 |
607 movq LSR9OFF(%rsp), %r9 | 746 movq LSR9OFF(%rsp), %r9 |
608 movq LSR10OFF(%rsp), %r10 | 747 movq LSR10OFF(%rsp), %r10 |
609 | 748 |
610 movdqa LSXMM0OFF(%rsp), %xmm0 | |
611 movdqa LSXMM1OFF(%rsp), %xmm1 | |
612 movdqa LSXMM2OFF(%rsp), %xmm2 | |
613 movdqa LSXMM3OFF(%rsp), %xmm3 | |
614 movdqa LSXMM4OFF(%rsp), %xmm4 | |
615 movdqa LSXMM5OFF(%rsp), %xmm5 | |
616 movdqa LSXMM6OFF(%rsp), %xmm6 | |
617 movdqa LSXMM7OFF(%rsp), %xmm7 | |
618 | |
619 movq %rbp, %rsp | 749 movq %rbp, %rsp |
620 popq %rbp | 750 popq %rbp |
621 | 751 |
622 addq $8, %rsp /* pop 1st plt-pushed args */ | 752 addq $8, %rsp /* pop 1st plt-pushed args */ |
623 /* the second arguement is used */ | 753 /* the second arguement is used */ |