Mercurial > illumos > illumos-gate
annotate usr/src/uts/intel/ia32/os/archdep.c @ 3446:5903aece022d
PSARC 2006/469 EOF and removal of eeprom -I
PSARC 2006/568 direct boot (dboot) for x86
6219282 interrupt service routine called twice
6223946 potential infinite loop in fbt.c
6228819 tsc_gethrtimeunscaled resets to 0 on suspend
6342201 hat_unload() and cross call usage are needlessly slow on x86, x64
6378723 ctfconvert can't handle GCC empty struct C extension
6379124 ctfconvert can't handle C99 flexible array members
6401187 merging ip's CTF data into genunix introduces a race
6437553 shmat(2) fails on platforms that don't support large pages
6449286 eeprom -I should be torched
6464072 need support for firmware properties
6465816 need a debug record page
6467491 64-bit processes must point %fs or %gs to null selector to utilize [fs|gs]base
6475880 vestiges of old boot code on i386/amd64 should be purged
6475956 Interrupt handling code on x86 platforms should be easier to understand
6477828 pcplusmp psm module should be rearranged to allow multiple platforms to share the same apic code
6477867 x86 KDI should belong in the kernel
6477871 fix for 6232859 also needed for kmdb
6477872 MDB MMU commands need improvements
6477873 cpr debugging can be improved
6477877 mstate accounting should be reset when gethrtimef() is changed
6477915 32-bit x86 kernel should use interrupt gates for all traps
6477963 _update_sregs should be written in C
6477976 no longer need to capture [fs,gs]base in struct regs on every exception
6478642 Solaris needs to support multiple x86 platforms
6478648 ON could use an new & unencumbered xsvc driver
6478734 kernel lint not in C99 mode
6478826 swrand should have framework to lock individual pages
6480763 if interrupts can't be disabled, re-onlining an offline cpu fails
6481824 /dev/fb is not created if installation is done over tty
6483747 clock-tick processing should re-include threads waiting for I/O
6485872 use PTOU macro to access user area instead of old "u" from user.h
6486263 need way to extend kernel core dumps with pages that don't have page_t's
6486435 eeprom, prtdiag should only be platform specific where necessary
6486436 genunix mdb module makefiles duplicate list of files
6486437 intr_common.c should be common
6486445 /dev/mem needs support for page_t-less pages
6486451 cpu_pause() routine should be mach-specific
6486456 SIMULATOR_SUPPORT should die
6486832 x86 platform will need memlist/memseg locking
6486911 amd64 port not finished
6486972 platform TOD setting code may not have real TOD hdw to set
6492647 Better trap trace support for x-calls
6493613 apic_disable_intr() needs round robin fix
6497633 ctfmerge could allow for no ctf sections
6500637 floating point context switching is needlessly slow
6500656 move floating point configuration to C
6500666 need a means to override the setting of uname -m in the kernel build
6500669 update x86 defines
6502790 Files in the boot archive should be individually compressed
6503792 live upgrade environment + bfu + alternate root == incorrect menu.lst file
6504373 kmdb promif shouldn't needlessly busy-wait
6504374 ::interrupts could show level/edge
6506305 opteron erratum 123 should be enabled
6507629 porting x86 platform code to Xen
6510847 kobj.c is unaware of sparc hole between nucleus text and data (from dtrace)
author | mrj |
---|---|
date | Fri, 19 Jan 2007 08:10:06 -0800 |
parents | f74a135872bc |
children | 8cf697a3acff |
rev | line source |
---|---|
0 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
2712
f74a135872bc
PSARC/2005/471 BrandZ: Support for non-native zones
nn35248
parents:
1217
diff
changeset
|
5 * Common Development and Distribution License (the "License"). |
f74a135872bc
PSARC/2005/471 BrandZ: Support for non-native zones
nn35248
parents:
1217
diff
changeset
|
6 * You may not use this file except in compliance with the License. |
0 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
21 /* | |
3446 | 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. |
0 | 23 * Use is subject to license terms. |
24 */ | |
25 | |
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ | |
27 /* All Rights Reserved */ | |
28 | |
29 | |
30 #pragma ident "%Z%%M% %I% %E% SMI" | |
31 | |
32 #include <sys/param.h> | |
33 #include <sys/types.h> | |
34 #include <sys/vmparam.h> | |
35 #include <sys/systm.h> | |
36 #include <sys/signal.h> | |
37 #include <sys/stack.h> | |
38 #include <sys/regset.h> | |
39 #include <sys/privregs.h> | |
40 #include <sys/frame.h> | |
41 #include <sys/proc.h> | |
42 #include <sys/psw.h> | |
43 #include <sys/siginfo.h> | |
44 #include <sys/cpuvar.h> | |
45 #include <sys/asm_linkage.h> | |
46 #include <sys/kmem.h> | |
47 #include <sys/errno.h> | |
48 #include <sys/bootconf.h> | |
49 #include <sys/archsystm.h> | |
50 #include <sys/debug.h> | |
51 #include <sys/elf.h> | |
52 #include <sys/spl.h> | |
53 #include <sys/time.h> | |
54 #include <sys/atomic.h> | |
55 #include <sys/sysmacros.h> | |
56 #include <sys/cmn_err.h> | |
57 #include <sys/modctl.h> | |
58 #include <sys/kobj.h> | |
59 #include <sys/panic.h> | |
60 #include <sys/reboot.h> | |
61 #include <sys/time.h> | |
62 #include <sys/fp.h> | |
63 #include <sys/x86_archext.h> | |
64 #include <sys/auxv.h> | |
65 #include <sys/auxv_386.h> | |
66 #include <sys/dtrace.h> | |
2712
f74a135872bc
PSARC/2005/471 BrandZ: Support for non-native zones
nn35248
parents:
1217
diff
changeset
|
67 #include <sys/brand.h> |
f74a135872bc
PSARC/2005/471 BrandZ: Support for non-native zones
nn35248
parents:
1217
diff
changeset
|
68 #include <sys/machbrand.h> |
0 | 69 |
70 extern const struct fnsave_state x87_initial; | |
71 extern const struct fxsave_state sse_initial; | |
72 | |
73 /* | |
74 * Map an fnsave-formatted save area into an fxsave-formatted save area. | |
75 * | |
76 * Most fields are the same width, content and semantics. However | |
77 * the tag word is compressed. | |
78 */ | |
79 static void | |
80 fnsave_to_fxsave(const struct fnsave_state *fn, struct fxsave_state *fx) | |
81 { | |
82 uint_t i, tagbits; | |
83 | |
84 fx->fx_fcw = fn->f_fcw; | |
85 fx->fx_fsw = fn->f_fsw; | |
86 | |
87 /* | |
88 * copy element by element (because of holes) | |
89 */ | |
90 for (i = 0; i < 8; i++) | |
91 bcopy(&fn->f_st[i].fpr_16[0], &fx->fx_st[i].fpr_16[0], | |
92 sizeof (fn->f_st[0].fpr_16)); /* 80-bit x87-style floats */ | |
93 | |
94 /* | |
95 * synthesize compressed tag bits | |
96 */ | |
97 fx->fx_fctw = 0; | |
98 for (tagbits = fn->f_ftw, i = 0; i < 8; i++, tagbits >>= 2) | |
99 if ((tagbits & 3) != 3) | |
100 fx->fx_fctw |= (1 << i); | |
101 | |
102 fx->fx_fop = fn->f_fop; | |
103 | |
104 #if defined(__amd64) | |
105 fx->fx_rip = (uint64_t)fn->f_eip; | |
106 fx->fx_rdp = (uint64_t)fn->f_dp; | |
107 #else | |
108 fx->fx_eip = fn->f_eip; | |
109 fx->fx_cs = fn->f_cs; | |
110 fx->__fx_ign0 = 0; | |
111 fx->fx_dp = fn->f_dp; | |
112 fx->fx_ds = fn->f_ds; | |
113 fx->__fx_ign1 = 0; | |
114 #endif | |
115 } | |
116 | |
117 /* | |
118 * Map from an fxsave-format save area to an fnsave-format save area. | |
119 */ | |
120 static void | |
121 fxsave_to_fnsave(const struct fxsave_state *fx, struct fnsave_state *fn) | |
122 { | |
123 uint_t i, top, tagbits; | |
124 | |
125 fn->f_fcw = fx->fx_fcw; | |
126 fn->__f_ign0 = 0; | |
127 fn->f_fsw = fx->fx_fsw; | |
128 fn->__f_ign1 = 0; | |
129 | |
130 top = (fx->fx_fsw & FPS_TOP) >> 11; | |
131 | |
132 /* | |
133 * copy element by element (because of holes) | |
134 */ | |
135 for (i = 0; i < 8; i++) | |
136 bcopy(&fx->fx_st[i].fpr_16[0], &fn->f_st[i].fpr_16[0], | |
137 sizeof (fn->f_st[0].fpr_16)); /* 80-bit x87-style floats */ | |
138 | |
139 /* | |
140 * synthesize uncompressed tag bits | |
141 */ | |
142 fn->f_ftw = 0; | |
143 for (tagbits = fx->fx_fctw, i = 0; i < 8; i++, tagbits >>= 1) { | |
144 uint_t ibit, expo; | |
145 const uint16_t *fpp; | |
146 static const uint16_t zero[5] = { 0, 0, 0, 0, 0 }; | |
147 | |
148 if ((tagbits & 1) == 0) { | |
149 fn->f_ftw |= 3 << (i << 1); /* empty */ | |
150 continue; | |
151 } | |
152 | |
153 /* | |
154 * (tags refer to *physical* registers) | |
155 */ | |
156 fpp = &fx->fx_st[(i - top + 8) & 7].fpr_16[0]; | |
157 ibit = fpp[3] >> 15; | |
158 expo = fpp[4] & 0x7fff; | |
159 | |
160 if (ibit && expo != 0 && expo != 0x7fff) | |
161 continue; /* valid fp number */ | |
162 | |
163 if (bcmp(fpp, &zero, sizeof (zero))) | |
164 fn->f_ftw |= 2 << (i << 1); /* NaN */ | |
165 else | |
166 fn->f_ftw |= 1 << (i << 1); /* fp zero */ | |
167 } | |
168 | |
169 fn->f_fop = fx->fx_fop; | |
170 | |
171 fn->__f_ign2 = 0; | |
172 #if defined(__amd64) | |
173 fn->f_eip = (uint32_t)fx->fx_rip; | |
174 fn->f_cs = U32CS_SEL; | |
175 fn->f_dp = (uint32_t)fx->fx_rdp; | |
176 fn->f_ds = UDS_SEL; | |
177 #else | |
178 fn->f_eip = fx->fx_eip; | |
179 fn->f_cs = fx->fx_cs; | |
180 fn->f_dp = fx->fx_dp; | |
181 fn->f_ds = fx->fx_ds; | |
182 #endif | |
183 fn->__f_ign3 = 0; | |
184 } | |
185 | |
186 /* | |
187 * Map from an fpregset_t into an fxsave-format save area | |
188 */ | |
189 static void | |
190 fpregset_to_fxsave(const fpregset_t *fp, struct fxsave_state *fx) | |
191 { | |
192 #if defined(__amd64) | |
193 bcopy(fp, fx, sizeof (*fx)); | |
194 #else | |
195 const struct fpchip_state *fc = &fp->fp_reg_set.fpchip_state; | |
196 | |
197 fnsave_to_fxsave((const struct fnsave_state *)fc, fx); | |
198 fx->fx_mxcsr = fc->mxcsr; | |
199 bcopy(&fc->xmm[0], &fx->fx_xmm[0], sizeof (fc->xmm)); | |
200 #endif | |
201 /* | |
202 * avoid useless #gp exceptions - mask reserved bits | |
203 */ | |
204 fx->fx_mxcsr &= sse_mxcsr_mask; | |
205 } | |
206 | |
207 /* | |
208 * Map from an fxsave-format save area into a fpregset_t | |
209 */ | |
210 static void | |
211 fxsave_to_fpregset(const struct fxsave_state *fx, fpregset_t *fp) | |
212 { | |
213 #if defined(__amd64) | |
214 bcopy(fx, fp, sizeof (*fx)); | |
215 #else | |
216 struct fpchip_state *fc = &fp->fp_reg_set.fpchip_state; | |
217 | |
218 fxsave_to_fnsave(fx, (struct fnsave_state *)fc); | |
219 fc->mxcsr = fx->fx_mxcsr; | |
220 bcopy(&fx->fx_xmm[0], &fc->xmm[0], sizeof (fc->xmm)); | |
221 #endif | |
222 } | |
223 | |
224 #if defined(_SYSCALL32_IMPL) | |
225 static void | |
226 fpregset32_to_fxsave(const fpregset32_t *fp, struct fxsave_state *fx) | |
227 { | |
228 const struct fpchip32_state *fc = &fp->fp_reg_set.fpchip_state; | |
229 | |
230 fnsave_to_fxsave((const struct fnsave_state *)fc, fx); | |
231 /* | |
232 * avoid useless #gp exceptions - mask reserved bits | |
233 */ | |
234 fx->fx_mxcsr = sse_mxcsr_mask & fc->mxcsr; | |
235 bcopy(&fc->xmm[0], &fx->fx_xmm[0], sizeof (fc->xmm)); | |
236 } | |
237 | |
238 static void | |
239 fxsave_to_fpregset32(const struct fxsave_state *fx, fpregset32_t *fp) | |
240 { | |
241 struct fpchip32_state *fc = &fp->fp_reg_set.fpchip_state; | |
242 | |
243 fxsave_to_fnsave(fx, (struct fnsave_state *)fc); | |
244 fc->mxcsr = fx->fx_mxcsr; | |
245 bcopy(&fx->fx_xmm[0], &fc->xmm[0], sizeof (fc->xmm)); | |
246 } | |
247 | |
248 static void | |
249 fpregset_nto32(const fpregset_t *src, fpregset32_t *dst) | |
250 { | |
251 fxsave_to_fpregset32((struct fxsave_state *)src, dst); | |
252 dst->fp_reg_set.fpchip_state.status = | |
253 src->fp_reg_set.fpchip_state.status; | |
254 dst->fp_reg_set.fpchip_state.xstatus = | |
255 src->fp_reg_set.fpchip_state.xstatus; | |
256 } | |
257 | |
258 static void | |
259 fpregset_32ton(const fpregset32_t *src, fpregset_t *dst) | |
260 { | |
261 fpregset32_to_fxsave(src, (struct fxsave_state *)dst); | |
262 dst->fp_reg_set.fpchip_state.status = | |
263 src->fp_reg_set.fpchip_state.status; | |
264 dst->fp_reg_set.fpchip_state.xstatus = | |
265 src->fp_reg_set.fpchip_state.xstatus; | |
266 } | |
267 #endif | |
268 | |
269 /* | |
270 * Set floating-point registers from a native fpregset_t. | |
271 */ | |
272 void | |
273 setfpregs(klwp_t *lwp, fpregset_t *fp) | |
274 { | |
275 struct fpu_ctx *fpu = &lwp->lwp_pcb.pcb_fpu; | |
276 | |
277 if (fpu->fpu_flags & FPU_EN) { | |
278 if (!(fpu->fpu_flags & FPU_VALID)) { | |
279 /* | |
280 * FPU context is still active, release the | |
281 * ownership. | |
282 */ | |
283 fp_free(fpu, 0); | |
284 } | |
285 #if !defined(__amd64) | |
286 if (fp_kind == __FP_SSE) { | |
287 #endif | |
288 fpregset_to_fxsave(fp, &fpu->fpu_regs.kfpu_u.kfpu_fx); | |
289 fpu->fpu_regs.kfpu_xstatus = | |
290 fp->fp_reg_set.fpchip_state.xstatus; | |
291 #if !defined(__amd64) | |
292 } else | |
293 bcopy(fp, &fpu->fpu_regs.kfpu_u.kfpu_fn, | |
294 sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn)); | |
295 #endif | |
296 fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status; | |
297 fpu->fpu_flags |= FPU_VALID; | |
298 } else { | |
299 /* | |
300 * If we are trying to change the FPU state of a thread which | |
301 * hasn't yet initialized floating point, store the state in | |
302 * the pcb and indicate that the state is valid. When the | |
303 * thread enables floating point, it will use this state instead | |
304 * of the default state. | |
305 */ | |
306 #if !defined(__amd64) | |
307 if (fp_kind == __FP_SSE) { | |
308 #endif | |
309 fpregset_to_fxsave(fp, &fpu->fpu_regs.kfpu_u.kfpu_fx); | |
310 fpu->fpu_regs.kfpu_xstatus = | |
311 fp->fp_reg_set.fpchip_state.xstatus; | |
312 #if !defined(__amd64) | |
313 } else | |
314 bcopy(fp, &fpu->fpu_regs.kfpu_u.kfpu_fn, | |
315 sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn)); | |
316 #endif | |
317 fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status; | |
318 fpu->fpu_flags |= FPU_VALID; | |
319 } | |
320 } | |
321 | |
322 /* | |
323 * Get floating-point registers into a native fpregset_t. | |
324 */ | |
325 void | |
326 getfpregs(klwp_t *lwp, fpregset_t *fp) | |
327 { | |
328 struct fpu_ctx *fpu = &lwp->lwp_pcb.pcb_fpu; | |
329 | |
330 kpreempt_disable(); | |
331 if (fpu->fpu_flags & FPU_EN) { | |
332 /* | |
333 * If we have FPU hw and the thread's pcb doesn't have | |
334 * a valid FPU state then get the state from the hw. | |
335 */ | |
336 if (fpu_exists && ttolwp(curthread) == lwp && | |
337 !(fpu->fpu_flags & FPU_VALID)) | |
338 fp_save(fpu); /* get the current FPU state */ | |
339 } | |
340 | |
341 /* | |
342 * There are 3 possible cases we have to be aware of here: | |
343 * | |
344 * 1. FPU is enabled. FPU state is stored in the current LWP. | |
345 * | |
346 * 2. FPU is not enabled, and there have been no intervening /proc | |
347 * modifications. Return initial FPU state. | |
348 * | |
349 * 3. FPU is not enabled, but a /proc consumer has modified FPU state. | |
350 * FPU state is stored in the current LWP. | |
351 */ | |
352 if ((fpu->fpu_flags & FPU_EN) || (fpu->fpu_flags & FPU_VALID)) { | |
353 /* | |
354 * Cases 1 and 3. | |
355 */ | |
356 #if !defined(__amd64) | |
357 if (fp_kind == __FP_SSE) { | |
358 #endif | |
359 fxsave_to_fpregset(&fpu->fpu_regs.kfpu_u.kfpu_fx, fp); | |
360 fp->fp_reg_set.fpchip_state.xstatus = | |
361 fpu->fpu_regs.kfpu_xstatus; | |
362 #if !defined(__amd64) | |
363 } else | |
364 bcopy(&fpu->fpu_regs.kfpu_u.kfpu_fn, fp, | |
365 sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn)); | |
366 #endif | |
367 fp->fp_reg_set.fpchip_state.status = fpu->fpu_regs.kfpu_status; | |
368 } else { | |
369 /* | |
370 * Case 2. | |
371 */ | |
372 #if !defined(__amd64) | |
373 if (fp_kind == __FP_SSE) { | |
374 #endif | |
375 fxsave_to_fpregset(&sse_initial, fp); | |
376 fp->fp_reg_set.fpchip_state.xstatus = | |
377 fpu->fpu_regs.kfpu_xstatus; | |
378 #if !defined(__amd64) | |
379 } else | |
380 bcopy(&x87_initial, fp, sizeof (x87_initial)); | |
381 #endif | |
382 fp->fp_reg_set.fpchip_state.status = fpu->fpu_regs.kfpu_status; | |
383 } | |
384 kpreempt_enable(); | |
385 } | |
386 | |
387 #if defined(_SYSCALL32_IMPL) | |
388 | |
389 /* | |
390 * Set floating-point registers from an fpregset32_t. | |
391 */ | |
392 void | |
393 setfpregs32(klwp_t *lwp, fpregset32_t *fp) | |
394 { | |
395 fpregset_t fpregs; | |
396 | |
397 fpregset_32ton(fp, &fpregs); | |
398 setfpregs(lwp, &fpregs); | |
399 } | |
400 | |
401 /* | |
402 * Get floating-point registers into an fpregset32_t. | |
403 */ | |
404 void | |
405 getfpregs32(klwp_t *lwp, fpregset32_t *fp) | |
406 { | |
407 fpregset_t fpregs; | |
408 | |
409 getfpregs(lwp, &fpregs); | |
410 fpregset_nto32(&fpregs, fp); | |
411 } | |
412 | |
413 #endif /* _SYSCALL32_IMPL */ | |
414 | |
415 /* | |
416 * Return the general registers | |
417 */ | |
418 void | |
419 getgregs(klwp_t *lwp, gregset_t grp) | |
420 { | |
421 struct regs *rp = lwptoregs(lwp); | |
422 #if defined(__amd64) | |
423 struct pcb *pcb = &lwp->lwp_pcb; | |
424 int thisthread = lwptot(lwp) == curthread; | |
425 | |
426 grp[REG_RDI] = rp->r_rdi; | |
427 grp[REG_RSI] = rp->r_rsi; | |
428 grp[REG_RDX] = rp->r_rdx; | |
429 grp[REG_RCX] = rp->r_rcx; | |
430 grp[REG_R8] = rp->r_r8; | |
431 grp[REG_R9] = rp->r_r9; | |
432 grp[REG_RAX] = rp->r_rax; | |
433 grp[REG_RBX] = rp->r_rbx; | |
434 grp[REG_RBP] = rp->r_rbp; | |
435 grp[REG_R10] = rp->r_r10; | |
436 grp[REG_R11] = rp->r_r11; | |
437 grp[REG_R12] = rp->r_r12; | |
438 grp[REG_R13] = rp->r_r13; | |
439 grp[REG_R14] = rp->r_r14; | |
440 grp[REG_R15] = rp->r_r15; | |
441 grp[REG_FSBASE] = pcb->pcb_fsbase; | |
442 grp[REG_GSBASE] = pcb->pcb_gsbase; | |
443 if (thisthread) | |
444 kpreempt_disable(); | |
445 if (pcb->pcb_flags & RUPDATE_PENDING) { | |
446 grp[REG_DS] = pcb->pcb_ds; | |
447 grp[REG_ES] = pcb->pcb_es; | |
448 grp[REG_FS] = pcb->pcb_fs; | |
449 grp[REG_GS] = pcb->pcb_gs; | |
450 } else { | |
451 grp[REG_DS] = rp->r_ds; | |
452 grp[REG_ES] = rp->r_es; | |
453 grp[REG_FS] = rp->r_fs; | |
454 grp[REG_GS] = rp->r_gs; | |
455 } | |
456 if (thisthread) | |
457 kpreempt_enable(); | |
458 grp[REG_TRAPNO] = rp->r_trapno; | |
459 grp[REG_ERR] = rp->r_err; | |
460 grp[REG_RIP] = rp->r_rip; | |
461 grp[REG_CS] = rp->r_cs; | |
462 grp[REG_SS] = rp->r_ss; | |
463 grp[REG_RFL] = rp->r_rfl; | |
464 grp[REG_RSP] = rp->r_rsp; | |
465 #else | |
466 bcopy(&rp->r_gs, grp, sizeof (gregset_t)); | |
467 #endif | |
468 } | |
469 | |
470 #if defined(_SYSCALL32_IMPL) | |
471 | |
472 void | |
473 getgregs32(klwp_t *lwp, gregset32_t grp) | |
474 { | |
475 struct regs *rp = lwptoregs(lwp); | |
476 struct pcb *pcb = &lwp->lwp_pcb; | |
477 int thisthread = lwptot(lwp) == curthread; | |
478 | |
479 if (thisthread) | |
480 kpreempt_disable(); | |
481 if (pcb->pcb_flags & RUPDATE_PENDING) { | |
482 grp[GS] = (uint16_t)pcb->pcb_gs; | |
483 grp[FS] = (uint16_t)pcb->pcb_fs; | |
484 grp[DS] = (uint16_t)pcb->pcb_ds; | |
485 grp[ES] = (uint16_t)pcb->pcb_es; | |
486 } else { | |
487 grp[GS] = (uint16_t)rp->r_gs; | |
488 grp[FS] = (uint16_t)rp->r_fs; | |
489 grp[DS] = (uint16_t)rp->r_ds; | |
490 grp[ES] = (uint16_t)rp->r_es; | |
491 } | |
492 if (thisthread) | |
493 kpreempt_enable(); | |
494 grp[EDI] = (greg32_t)rp->r_rdi; | |
495 grp[ESI] = (greg32_t)rp->r_rsi; | |
496 grp[EBP] = (greg32_t)rp->r_rbp; | |
497 grp[ESP] = 0; | |
498 grp[EBX] = (greg32_t)rp->r_rbx; | |
499 grp[EDX] = (greg32_t)rp->r_rdx; | |
500 grp[ECX] = (greg32_t)rp->r_rcx; | |
501 grp[EAX] = (greg32_t)rp->r_rax; | |
502 grp[TRAPNO] = (greg32_t)rp->r_trapno; | |
503 grp[ERR] = (greg32_t)rp->r_err; | |
504 grp[EIP] = (greg32_t)rp->r_rip; | |
505 grp[CS] = (uint16_t)rp->r_cs; | |
506 grp[EFL] = (greg32_t)rp->r_rfl; | |
507 grp[UESP] = (greg32_t)rp->r_rsp; | |
508 grp[SS] = (uint16_t)rp->r_ss; | |
509 } | |
510 | |
511 void | |
512 ucontext_32ton(const ucontext32_t *src, ucontext_t *dst) | |
513 { | |
514 mcontext_t *dmc = &dst->uc_mcontext; | |
515 const mcontext32_t *smc = &src->uc_mcontext; | |
516 | |
517 bzero(dst, sizeof (*dst)); | |
518 dst->uc_flags = src->uc_flags; | |
519 dst->uc_link = (ucontext_t *)(uintptr_t)src->uc_link; | |
520 | |
521 bcopy(&src->uc_sigmask, &dst->uc_sigmask, sizeof (dst->uc_sigmask)); | |
522 | |
523 dst->uc_stack.ss_sp = (void *)(uintptr_t)src->uc_stack.ss_sp; | |
524 dst->uc_stack.ss_size = (size_t)src->uc_stack.ss_size; | |
525 dst->uc_stack.ss_flags = src->uc_stack.ss_flags; | |
526 | |
527 dmc->gregs[REG_GS] = (greg_t)(uint32_t)smc->gregs[GS]; | |
528 dmc->gregs[REG_FS] = (greg_t)(uint32_t)smc->gregs[FS]; | |
529 dmc->gregs[REG_ES] = (greg_t)(uint32_t)smc->gregs[ES]; | |
530 dmc->gregs[REG_DS] = (greg_t)(uint32_t)smc->gregs[DS]; | |
531 dmc->gregs[REG_RDI] = (greg_t)(uint32_t)smc->gregs[EDI]; | |
532 dmc->gregs[REG_RSI] = (greg_t)(uint32_t)smc->gregs[ESI]; | |
533 dmc->gregs[REG_RBP] = (greg_t)(uint32_t)smc->gregs[EBP]; | |
534 dmc->gregs[REG_RBX] = (greg_t)(uint32_t)smc->gregs[EBX]; | |
535 dmc->gregs[REG_RDX] = (greg_t)(uint32_t)smc->gregs[EDX]; | |
536 dmc->gregs[REG_RCX] = (greg_t)(uint32_t)smc->gregs[ECX]; | |
537 dmc->gregs[REG_RAX] = (greg_t)(uint32_t)smc->gregs[EAX]; | |
538 dmc->gregs[REG_TRAPNO] = (greg_t)(uint32_t)smc->gregs[TRAPNO]; | |
539 dmc->gregs[REG_ERR] = (greg_t)(uint32_t)smc->gregs[ERR]; | |
540 dmc->gregs[REG_RIP] = (greg_t)(uint32_t)smc->gregs[EIP]; | |
541 dmc->gregs[REG_CS] = (greg_t)(uint32_t)smc->gregs[CS]; | |
542 dmc->gregs[REG_RFL] = (greg_t)(uint32_t)smc->gregs[EFL]; | |
543 dmc->gregs[REG_RSP] = (greg_t)(uint32_t)smc->gregs[UESP]; | |
544 dmc->gregs[REG_SS] = (greg_t)(uint32_t)smc->gregs[SS]; | |
545 | |
546 /* | |
547 * A valid fpregs is only copied in if uc.uc_flags has UC_FPU set | |
548 * otherwise there is no guarantee that anything in fpregs is valid. | |
549 */ | |
550 if (src->uc_flags & UC_FPU) | |
551 fpregset_32ton(&src->uc_mcontext.fpregs, | |
552 &dst->uc_mcontext.fpregs); | |
553 } | |
554 | |
555 #endif /* _SYSCALL32_IMPL */ | |
556 | |
557 /* | |
558 * Return the user-level PC. | |
559 * If in a system call, return the address of the syscall trap. | |
560 */ | |
561 greg_t | |
562 getuserpc() | |
563 { | |
564 greg_t upc = lwptoregs(ttolwp(curthread))->r_pc; | |
565 uint32_t insn; | |
566 | |
567 if (curthread->t_sysnum == 0) | |
568 return (upc); | |
569 | |
570 /* | |
571 * We might've gotten here from sysenter (0xf 0x34), | |
572 * syscall (0xf 0x5) or lcall (0x9a 0 0 0 0 0x27 0). | |
573 * | |
574 * Go peek at the binary to figure it out.. | |
575 */ | |
576 if (fuword32((void *)(upc - 2), &insn) != -1 && | |
577 (insn & 0xffff) == 0x340f || (insn & 0xffff) == 0x050f) | |
578 return (upc - 2); | |
579 return (upc - 7); | |
580 } | |
581 | |
582 /* | |
583 * Protect segment registers from non-user privilege levels and GDT selectors | |
584 * other than USER_CS, USER_DS and lwp FS and GS values. If the segment | |
585 * selector is non-null and not USER_CS/USER_DS, we make sure that the | |
586 * TI bit is set to point into the LDT and that the RPL is set to 3. | |
587 * | |
588 * Since struct regs stores each 16-bit segment register as a 32-bit greg_t, we | |
589 * also explicitly zero the top 16 bits since they may be coming from the | |
590 * user's address space via setcontext(2) or /proc. | |
591 */ | |
592 | |
593 /*ARGSUSED*/ | |
594 static greg_t | |
595 fix_segreg(greg_t sr, model_t datamodel) | |
596 { | |
2712
f74a135872bc
PSARC/2005/471 BrandZ: Support for non-native zones
nn35248
parents:
1217
diff
changeset
|
597 kthread_t *t = curthread; |
f74a135872bc
PSARC/2005/471 BrandZ: Support for non-native zones
nn35248
parents:
1217
diff
changeset
|
598 |
0 | 599 switch (sr &= 0xffff) { |
600 #if defined(__amd64) | |
601 /* | |
602 * If lwp attempts to switch data model then force their | |
603 * code selector to be null selector. | |
604 */ | |
605 case U32CS_SEL: | |
606 if (datamodel == DATAMODEL_NATIVE) | |
607 return (0); | |
608 else | |
609 return (sr); | |
610 | |
611 case UCS_SEL: | |
612 if (datamodel == DATAMODEL_ILP32) | |
613 return (0); | |
614 #elif defined(__i386) | |
615 case UCS_SEL: | |
616 #endif | |
617 /*FALLTHROUGH*/ | |
618 case UDS_SEL: | |
619 case LWPFS_SEL: | |
620 case LWPGS_SEL: | |
621 case 0: | |
622 return (sr); | |
623 default: | |
624 break; | |
625 } | |
626 | |
627 /* | |
2712
f74a135872bc
PSARC/2005/471 BrandZ: Support for non-native zones
nn35248
parents:
1217
diff
changeset
|
628 * Allow this process's brand to do any necessary segment register |
f74a135872bc
PSARC/2005/471 BrandZ: Support for non-native zones
nn35248
parents:
1217
diff
changeset
|
629 * manipulation. |
f74a135872bc
PSARC/2005/471 BrandZ: Support for non-native zones
nn35248
parents:
1217
diff
changeset
|
630 */ |
f74a135872bc
PSARC/2005/471 BrandZ: Support for non-native zones
nn35248
parents:
1217
diff
changeset
|
631 if (PROC_IS_BRANDED(t->t_procp) && BRMOP(t->t_procp)->b_fixsegreg) |
f74a135872bc
PSARC/2005/471 BrandZ: Support for non-native zones
nn35248
parents:
1217
diff
changeset
|
632 return (BRMOP(t->t_procp)->b_fixsegreg(sr, datamodel)); |
f74a135872bc
PSARC/2005/471 BrandZ: Support for non-native zones
nn35248
parents:
1217
diff
changeset
|
633 |
f74a135872bc
PSARC/2005/471 BrandZ: Support for non-native zones
nn35248
parents:
1217
diff
changeset
|
634 /* |
1217 | 635 * Force it into the LDT in ring 3 for 32-bit processes, which by |
636 * default do not have an LDT, so that any attempt to use an invalid | |
637 * selector will reference the (non-existant) LDT, and cause a #gp fault | |
638 * for the process. | |
639 * | |
0 | 640 * 64-bit processes get the null gdt selector since they |
641 * are not allowed to have a private LDT. | |
642 */ | |
643 #if defined(__amd64) | |
644 return (datamodel == DATAMODEL_ILP32 ? (sr | SEL_TI_LDT | SEL_UPL) : 0); | |
645 #elif defined(__i386) | |
646 return (sr | SEL_TI_LDT | SEL_UPL); | |
647 #endif | |
648 } | |
649 | |
650 /* | |
651 * Set general registers. | |
652 */ | |
653 void | |
654 setgregs(klwp_t *lwp, gregset_t grp) | |
655 { | |
656 struct regs *rp = lwptoregs(lwp); | |
657 model_t datamodel = lwp_getdatamodel(lwp); | |
658 | |
659 #if defined(__amd64) | |
660 struct pcb *pcb = &lwp->lwp_pcb; | |
661 int thisthread = lwptot(lwp) == curthread; | |
662 | |
663 if (datamodel == DATAMODEL_NATIVE) { | |
664 | |
665 if (thisthread) | |
666 (void) save_syscall_args(); /* copy the args */ | |
667 | |
668 rp->r_rdi = grp[REG_RDI]; | |
669 rp->r_rsi = grp[REG_RSI]; | |
670 rp->r_rdx = grp[REG_RDX]; | |
671 rp->r_rcx = grp[REG_RCX]; | |
672 rp->r_r8 = grp[REG_R8]; | |
673 rp->r_r9 = grp[REG_R9]; | |
674 rp->r_rax = grp[REG_RAX]; | |
675 rp->r_rbx = grp[REG_RBX]; | |
676 rp->r_rbp = grp[REG_RBP]; | |
677 rp->r_r10 = grp[REG_R10]; | |
678 rp->r_r11 = grp[REG_R11]; | |
679 rp->r_r12 = grp[REG_R12]; | |
680 rp->r_r13 = grp[REG_R13]; | |
681 rp->r_r14 = grp[REG_R14]; | |
682 rp->r_r15 = grp[REG_R15]; | |
683 rp->r_trapno = grp[REG_TRAPNO]; | |
684 rp->r_err = grp[REG_ERR]; | |
685 rp->r_rip = grp[REG_RIP]; | |
686 /* | |
687 * Setting %cs or %ss to anything else is quietly but | |
688 * quite definitely forbidden! | |
689 */ | |
690 rp->r_cs = UCS_SEL; | |
691 rp->r_ss = UDS_SEL; | |
692 rp->r_rsp = grp[REG_RSP]; | |
693 | |
694 if (thisthread) | |
695 kpreempt_disable(); | |
696 | |
697 pcb->pcb_ds = UDS_SEL; | |
698 pcb->pcb_es = UDS_SEL; | |
699 | |
700 /* | |
701 * 64-bit processes -are- allowed to set their fsbase/gsbase | |
702 * values directly, but only if they're using the segment | |
703 * selectors that allow that semantic. | |
704 * | |
705 * (32-bit processes must use lwp_set_private().) | |
706 */ | |
707 pcb->pcb_fsbase = grp[REG_FSBASE]; | |
708 pcb->pcb_gsbase = grp[REG_GSBASE]; | |
709 pcb->pcb_fs = fix_segreg(grp[REG_FS], datamodel); | |
710 pcb->pcb_gs = fix_segreg(grp[REG_GS], datamodel); | |
711 | |
712 /* | |
713 * Ensure that we go out via update_sregs | |
714 */ | |
715 pcb->pcb_flags |= RUPDATE_PENDING; | |
716 lwptot(lwp)->t_post_sys = 1; | |
717 if (thisthread) | |
718 kpreempt_enable(); | |
719 #if defined(_SYSCALL32_IMPL) | |
720 } else { | |
721 rp->r_rdi = (uint32_t)grp[REG_RDI]; | |
722 rp->r_rsi = (uint32_t)grp[REG_RSI]; | |
723 rp->r_rdx = (uint32_t)grp[REG_RDX]; | |
724 rp->r_rcx = (uint32_t)grp[REG_RCX]; | |
725 rp->r_rax = (uint32_t)grp[REG_RAX]; | |
726 rp->r_rbx = (uint32_t)grp[REG_RBX]; | |
727 rp->r_rbp = (uint32_t)grp[REG_RBP]; | |
728 rp->r_trapno = (uint32_t)grp[REG_TRAPNO]; | |
729 rp->r_err = (uint32_t)grp[REG_ERR]; | |
730 rp->r_rip = (uint32_t)grp[REG_RIP]; | |
731 | |
732 /* | |
733 * The kernel uses %cs to determine if it is dealing with | |
734 * another part of the kernel or with a userland application. | |
735 * Specifically, it tests the privilege bits. For this reason, | |
736 * we must prevent user apps from ending up with a NULL selector | |
737 * in %cs. Instead, we'll use index 0 into the GDT but with the | |
738 * privilege bits set to usermode. | |
739 */ | |
740 rp->r_cs = fix_segreg(grp[REG_CS], datamodel) | SEL_UPL; | |
741 rp->r_ss = fix_segreg(grp[REG_DS], datamodel); | |
742 | |
743 rp->r_rsp = (uint32_t)grp[REG_RSP]; | |
744 | |
745 if (thisthread) | |
746 kpreempt_disable(); | |
747 | |
748 pcb->pcb_ds = fix_segreg(grp[REG_DS], datamodel); | |
749 pcb->pcb_es = fix_segreg(grp[REG_ES], datamodel); | |
750 | |
751 /* | |
752 * (See fsbase/gsbase commentary above) | |
753 */ | |
754 pcb->pcb_fs = fix_segreg(grp[REG_FS], datamodel); | |
755 pcb->pcb_gs = fix_segreg(grp[REG_GS], datamodel); | |
756 | |
757 /* | |
758 * Ensure that we go out via update_sregs | |
759 */ | |
760 pcb->pcb_flags |= RUPDATE_PENDING; | |
761 lwptot(lwp)->t_post_sys = 1; | |
762 if (thisthread) | |
763 kpreempt_enable(); | |
764 #endif | |
765 } | |
766 | |
767 /* | |
768 * Only certain bits of the flags register can be modified. | |
769 */ | |
770 rp->r_rfl = (rp->r_rfl & ~PSL_USERMASK) | | |
771 (grp[REG_RFL] & PSL_USERMASK); | |
772 | |
773 #elif defined(__i386) | |
774 | |
775 /* | |
776 * Only certain bits of the flags register can be modified. | |
777 */ | |
778 grp[EFL] = (rp->r_efl & ~PSL_USERMASK) | (grp[EFL] & PSL_USERMASK); | |
779 | |
780 /* | |
781 * Copy saved registers from user stack. | |
782 */ | |
783 bcopy(grp, &rp->r_gs, sizeof (gregset_t)); | |
784 | |
785 rp->r_cs = fix_segreg(rp->r_cs, datamodel); | |
786 rp->r_ss = fix_segreg(rp->r_ss, datamodel); | |
787 rp->r_ds = fix_segreg(rp->r_ds, datamodel); | |
788 rp->r_es = fix_segreg(rp->r_es, datamodel); | |
789 rp->r_fs = fix_segreg(rp->r_fs, datamodel); | |
790 rp->r_gs = fix_segreg(rp->r_gs, datamodel); | |
791 | |
792 #endif /* __i386 */ | |
793 } | |
794 | |
795 /* | |
796 * Determine whether eip is likely to have an interrupt frame | |
797 * on the stack. We do this by comparing the address to the | |
798 * range of addresses spanned by several well-known routines. | |
799 */ | |
800 extern void _interrupt(); | |
801 extern void _allsyscalls(); | |
802 extern void _cmntrap(); | |
803 extern void fakesoftint(); | |
804 | |
805 extern size_t _interrupt_size; | |
806 extern size_t _allsyscalls_size; | |
807 extern size_t _cmntrap_size; | |
808 extern size_t _fakesoftint_size; | |
809 | |
810 /* | |
811 * Get a pc-only stacktrace. Used for kmem_alloc() buffer ownership tracking. | |
812 * Returns MIN(current stack depth, pcstack_limit). | |
813 */ | |
814 int | |
815 getpcstack(pc_t *pcstack, int pcstack_limit) | |
816 { | |
817 struct frame *fp = (struct frame *)getfp(); | |
818 struct frame *nextfp, *minfp, *stacktop; | |
819 int depth = 0; | |
820 int on_intr; | |
821 uintptr_t pc; | |
822 | |
823 if ((on_intr = CPU_ON_INTR(CPU)) != 0) | |
824 stacktop = (struct frame *)(CPU->cpu_intr_stack + SA(MINFRAME)); | |
825 else | |
826 stacktop = (struct frame *)curthread->t_stk; | |
827 minfp = fp; | |
828 | |
829 pc = ((struct regs *)fp)->r_pc; | |
830 | |
831 while (depth < pcstack_limit) { | |
832 nextfp = (struct frame *)fp->fr_savfp; | |
833 pc = fp->fr_savpc; | |
834 if (nextfp <= minfp || nextfp >= stacktop) { | |
835 if (on_intr) { | |
836 /* | |
837 * Hop from interrupt stack to thread stack. | |
838 */ | |
839 stacktop = (struct frame *)curthread->t_stk; | |
840 minfp = (struct frame *)curthread->t_stkbase; | |
841 on_intr = 0; | |
842 continue; | |
843 } | |
844 break; | |
845 } | |
846 pcstack[depth++] = (pc_t)pc; | |
847 fp = nextfp; | |
848 minfp = fp; | |
849 } | |
850 return (depth); | |
851 } | |
852 | |
853 /* | |
854 * The following ELF header fields are defined as processor-specific | |
855 * in the V8 ABI: | |
856 * | |
857 * e_ident[EI_DATA] encoding of the processor-specific | |
858 * data in the object file | |
859 * e_machine processor identification | |
860 * e_flags processor-specific flags associated | |
861 * with the file | |
862 */ | |
863 | |
864 /* | |
865 * The value of at_flags reflects a platform's cpu module support. | |
866 * at_flags is used to check for allowing a binary to execute and | |
867 * is passed as the value of the AT_FLAGS auxiliary vector. | |
868 */ | |
869 int at_flags = 0; | |
870 | |
871 /* | |
872 * Check the processor-specific fields of an ELF header. | |
873 * | |
874 * returns 1 if the fields are valid, 0 otherwise | |
875 */ | |
876 /*ARGSUSED2*/ | |
877 int | |
878 elfheadcheck( | |
879 unsigned char e_data, | |
880 Elf32_Half e_machine, | |
881 Elf32_Word e_flags) | |
882 { | |
883 if (e_data != ELFDATA2LSB) | |
884 return (0); | |
885 #if defined(__amd64) | |
886 if (e_machine == EM_AMD64) | |
887 return (1); | |
888 #endif | |
889 return (e_machine == EM_386); | |
890 } | |
891 | |
892 uint_t auxv_hwcap_include = 0; /* patch to enable unrecognized features */ | |
893 uint_t auxv_hwcap_exclude = 0; /* patch for broken cpus, debugging */ | |
894 #if defined(_SYSCALL32_IMPL) | |
895 uint_t auxv_hwcap32_include = 0; /* ditto for 32-bit apps */ | |
896 uint_t auxv_hwcap32_exclude = 0; /* ditto for 32-bit apps */ | |
897 #endif | |
898 | |
899 /* | |
900 * Gather information about the processor and place it into auxv_hwcap | |
901 * so that it can be exported to the linker via the aux vector. | |
902 * | |
903 * We use this seemingly complicated mechanism so that we can ensure | |
904 * that /etc/system can be used to override what the system can or | |
905 * cannot discover for itself. | |
906 */ | |
907 void | |
908 bind_hwcap(void) | |
909 { | |
910 uint_t cpu_hwcap_flags = cpuid_pass4(NULL); | |
911 | |
912 auxv_hwcap = (auxv_hwcap_include | cpu_hwcap_flags) & | |
913 ~auxv_hwcap_exclude; | |
914 | |
915 #if defined(__amd64) | |
916 /* | |
917 * On AMD processors, sysenter just doesn't work at all | |
918 * when the kernel is in long mode. On IA-32e processors | |
919 * it does, but there's no real point in all the alternate | |
920 * mechanism when syscall works on both. | |
921 * | |
922 * Besides, the kernel's sysenter handler is expecting a | |
923 * 32-bit lwp ... | |
924 */ | |
925 auxv_hwcap &= ~AV_386_SEP; | |
3446 | 926 #else |
927 /* | |
928 * 32-bit processes can -always- use the lahf/sahf instructions | |
929 */ | |
930 auxv_hwcap |= AV_386_AHF; | |
0 | 931 #endif |
932 | |
933 if (auxv_hwcap_include || auxv_hwcap_exclude) | |
934 cmn_err(CE_CONT, "?user ABI extensions: %b\n", | |
935 auxv_hwcap, FMT_AV_386); | |
936 | |
937 #if defined(_SYSCALL32_IMPL) | |
938 auxv_hwcap32 = (auxv_hwcap32_include | cpu_hwcap_flags) & | |
939 ~auxv_hwcap32_exclude; | |
940 | |
941 #if defined(__amd64) | |
942 /* | |
943 * If this is an amd64 architecture machine from Intel, then | |
944 * syscall -doesn't- work in compatibility mode, only sysenter does. | |
945 * | |
946 * Sigh. | |
947 */ | |
948 if (!cpuid_syscall32_insn(NULL)) | |
949 auxv_hwcap32 &= ~AV_386_AMD_SYSC; | |
3446 | 950 |
951 /* | |
952 * 32-bit processes can -always- use the lahf/sahf instructions | |
953 */ | |
954 auxv_hwcap32 |= AV_386_AHF; | |
0 | 955 #endif |
956 | |
957 if (auxv_hwcap32_include || auxv_hwcap32_exclude) | |
958 cmn_err(CE_CONT, "?32-bit user ABI extensions: %b\n", | |
959 auxv_hwcap32, FMT_AV_386); | |
960 #endif | |
961 } | |
962 | |
963 /* | |
964 * sync_icache() - this is called | |
965 * in proc/fs/prusrio.c. x86 has an unified cache and therefore | |
966 * this is a nop. | |
967 */ | |
968 /* ARGSUSED */ | |
969 void | |
970 sync_icache(caddr_t addr, uint_t len) | |
971 { | |
972 /* Do nothing for now */ | |
973 } | |
974 | |
975 /*ARGSUSED*/ | |
976 void | |
977 sync_data_memory(caddr_t va, size_t len) | |
978 { | |
979 /* Not implemented for this platform */ | |
980 } | |
981 | |
982 int | |
983 __ipltospl(int ipl) | |
984 { | |
985 return (ipltospl(ipl)); | |
986 } | |
987 | |
988 /* | |
989 * The panic code invokes panic_saveregs() to record the contents of a | |
990 * regs structure into the specified panic_data structure for debuggers. | |
991 */ | |
992 void | |
993 panic_saveregs(panic_data_t *pdp, struct regs *rp) | |
994 { | |
995 panic_nv_t *pnv = PANICNVGET(pdp); | |
996 | |
997 struct cregs creg; | |
998 | |
999 getcregs(&creg); | |
1000 | |
1001 #if defined(__amd64) | |
1002 PANICNVADD(pnv, "rdi", rp->r_rdi); | |
1003 PANICNVADD(pnv, "rsi", rp->r_rsi); | |
1004 PANICNVADD(pnv, "rdx", rp->r_rdx); | |
1005 PANICNVADD(pnv, "rcx", rp->r_rcx); | |
1006 PANICNVADD(pnv, "r8", rp->r_r8); | |
1007 PANICNVADD(pnv, "r9", rp->r_r9); | |
1008 PANICNVADD(pnv, "rax", rp->r_rax); | |
1009 PANICNVADD(pnv, "rbx", rp->r_rbx); | |
1010 PANICNVADD(pnv, "rbp", rp->r_rbp); | |
1011 PANICNVADD(pnv, "r10", rp->r_r10); | |
1012 PANICNVADD(pnv, "r10", rp->r_r10); | |
1013 PANICNVADD(pnv, "r11", rp->r_r11); | |
1014 PANICNVADD(pnv, "r12", rp->r_r12); | |
1015 PANICNVADD(pnv, "r13", rp->r_r13); | |
1016 PANICNVADD(pnv, "r14", rp->r_r14); | |
1017 PANICNVADD(pnv, "r15", rp->r_r15); | |
3446 | 1018 PANICNVADD(pnv, "fsbase", rdmsr(MSR_AMD_FSBASE)); |
1019 PANICNVADD(pnv, "gsbase", rdmsr(MSR_AMD_GSBASE)); | |
0 | 1020 PANICNVADD(pnv, "ds", rp->r_ds); |
1021 PANICNVADD(pnv, "es", rp->r_es); | |
1022 PANICNVADD(pnv, "fs", rp->r_fs); | |
1023 PANICNVADD(pnv, "gs", rp->r_gs); | |
1024 PANICNVADD(pnv, "trapno", rp->r_trapno); | |
1025 PANICNVADD(pnv, "err", rp->r_err); | |
1026 PANICNVADD(pnv, "rip", rp->r_rip); | |
1027 PANICNVADD(pnv, "cs", rp->r_cs); | |
1028 PANICNVADD(pnv, "rflags", rp->r_rfl); | |
1029 PANICNVADD(pnv, "rsp", rp->r_rsp); | |
1030 PANICNVADD(pnv, "ss", rp->r_ss); | |
1031 PANICNVADD(pnv, "gdt_hi", (uint64_t)(creg.cr_gdt._l[3])); | |
1032 PANICNVADD(pnv, "gdt_lo", (uint64_t)(creg.cr_gdt._l[0])); | |
1033 PANICNVADD(pnv, "idt_hi", (uint64_t)(creg.cr_idt._l[3])); | |
1034 PANICNVADD(pnv, "idt_lo", (uint64_t)(creg.cr_idt._l[0])); | |
1035 #elif defined(__i386) | |
1036 PANICNVADD(pnv, "gs", (uint32_t)rp->r_gs); | |
1037 PANICNVADD(pnv, "fs", (uint32_t)rp->r_fs); | |
1038 PANICNVADD(pnv, "es", (uint32_t)rp->r_es); | |
1039 PANICNVADD(pnv, "ds", (uint32_t)rp->r_ds); | |
1040 PANICNVADD(pnv, "edi", (uint32_t)rp->r_edi); | |
1041 PANICNVADD(pnv, "esi", (uint32_t)rp->r_esi); | |
1042 PANICNVADD(pnv, "ebp", (uint32_t)rp->r_ebp); | |
1043 PANICNVADD(pnv, "esp", (uint32_t)rp->r_esp); | |
1044 PANICNVADD(pnv, "ebx", (uint32_t)rp->r_ebx); | |
1045 PANICNVADD(pnv, "edx", (uint32_t)rp->r_edx); | |
1046 PANICNVADD(pnv, "ecx", (uint32_t)rp->r_ecx); | |
1047 PANICNVADD(pnv, "eax", (uint32_t)rp->r_eax); | |
1048 PANICNVADD(pnv, "trapno", (uint32_t)rp->r_trapno); | |
1049 PANICNVADD(pnv, "err", (uint32_t)rp->r_err); | |
1050 PANICNVADD(pnv, "eip", (uint32_t)rp->r_eip); | |
1051 PANICNVADD(pnv, "cs", (uint32_t)rp->r_cs); | |
1052 PANICNVADD(pnv, "eflags", (uint32_t)rp->r_efl); | |
1053 PANICNVADD(pnv, "uesp", (uint32_t)rp->r_uesp); | |
1054 PANICNVADD(pnv, "ss", (uint32_t)rp->r_ss); | |
1055 PANICNVADD(pnv, "gdt", creg.cr_gdt); | |
1056 PANICNVADD(pnv, "idt", creg.cr_idt); | |
1057 #endif /* __i386 */ | |
1058 | |
1059 PANICNVADD(pnv, "ldt", creg.cr_ldt); | |
1060 PANICNVADD(pnv, "task", creg.cr_task); | |
1061 PANICNVADD(pnv, "cr0", creg.cr_cr0); | |
1062 PANICNVADD(pnv, "cr2", creg.cr_cr2); | |
1063 PANICNVADD(pnv, "cr3", creg.cr_cr3); | |
1064 if (creg.cr_cr4) | |
1065 PANICNVADD(pnv, "cr4", creg.cr_cr4); | |
1066 | |
1067 PANICNVSET(pdp, pnv); | |
1068 } | |
1069 | |
1070 #define TR_ARG_MAX 6 /* Max args to print, same as SPARC */ | |
1071 | |
1072 #if !defined(__amd64) | |
1073 | |
1074 /* | |
1075 * Given a return address (%eip), determine the likely number of arguments | |
1076 * that were pushed on the stack prior to its execution. We do this by | |
1077 * expecting that a typical call sequence consists of pushing arguments on | |
1078 * the stack, executing a call instruction, and then performing an add | |
1079 * on %esp to restore it to the value prior to pushing the arguments for | |
1080 * the call. We attempt to detect such an add, and divide the addend | |
1081 * by the size of a word to determine the number of pushed arguments. | |
1082 * | |
1083 * If we do not find such an add, we punt and return TR_ARG_MAX. It is not | |
1084 * possible to reliably determine if a function took no arguments (i.e. was | |
1085 * void) because assembler routines do not reliably perform an add on %esp | |
1086 * immediately upon returning (eg. _sys_call()), so returning TR_ARG_MAX is | |
1087 * safer than returning 0. | |
1088 */ | |
1089 static ulong_t | |
1090 argcount(uintptr_t eip) | |
1091 { | |
1092 const uint8_t *ins = (const uint8_t *)eip; | |
1093 ulong_t n; | |
1094 | |
1095 enum { | |
1096 M_MODRM_ESP = 0xc4, /* Mod/RM byte indicates %esp */ | |
1097 M_ADD_IMM32 = 0x81, /* ADD imm32 to r/m32 */ | |
1098 M_ADD_IMM8 = 0x83 /* ADD imm8 to r/m32 */ | |
1099 }; | |
1100 | |
1101 if (eip < KERNELBASE || ins[1] != M_MODRM_ESP) | |
1102 return (TR_ARG_MAX); | |
1103 | |
1104 switch (ins[0]) { | |
1105 case M_ADD_IMM32: | |
1106 n = ins[2] + (ins[3] << 8) + (ins[4] << 16) + (ins[5] << 24); | |
1107 break; | |
1108 | |
1109 case M_ADD_IMM8: | |
1110 n = ins[2]; | |
1111 break; | |
1112 | |
1113 default: | |
1114 return (TR_ARG_MAX); | |
1115 } | |
1116 | |
1117 n /= sizeof (long); | |
1118 return (MIN(n, TR_ARG_MAX)); | |
1119 } | |
1120 | |
1121 #endif /* !__amd64 */ | |
1122 | |
1123 /* | |
1124 * Print a stack backtrace using the specified frame pointer. We delay two | |
1125 * seconds before continuing, unless this is the panic traceback. Note | |
1126 * that the frame for the starting stack pointer value is omitted because | |
1127 * the corresponding %eip is not known. | |
1128 */ | |
1129 #if defined(__amd64) | |
1130 | |
1131 void | |
1132 traceback(caddr_t fpreg) | |
1133 { | |
1134 struct frame *fp = (struct frame *)fpreg; | |
1135 struct frame *nextfp; | |
1136 uintptr_t pc, nextpc; | |
1137 ulong_t off; | |
1138 char args[TR_ARG_MAX * 2 + 16], *sym; | |
1139 | |
1140 if (!panicstr) | |
1141 printf("traceback: %%fp = %p\n", (void *)fp); | |
1142 | |
1143 if ((uintptr_t)fp < KERNELBASE) | |
1144 goto out; | |
1145 | |
1146 pc = fp->fr_savpc; | |
1147 fp = (struct frame *)fp->fr_savfp; | |
1148 | |
1149 while ((uintptr_t)fp >= KERNELBASE) { | |
1150 /* | |
1151 * XX64 Until port is complete tolerate 8-byte aligned | |
1152 * frame pointers but flag with a warning so they can | |
1153 * be fixed. | |
1154 */ | |
1155 if (((uintptr_t)fp & (STACK_ALIGN - 1)) != 0) { | |
1156 if (((uintptr_t)fp & (8 - 1)) == 0) { | |
1157 printf(" >> warning! 8-byte" | |
1158 " aligned %%fp = %p\n", (void *)fp); | |
1159 } else { | |
1160 printf( | |
1161 " >> mis-aligned %%fp = %p\n", (void *)fp); | |
1162 break; | |
1163 } | |
1164 } | |
1165 | |
1166 args[0] = '\0'; | |
1167 nextpc = (uintptr_t)fp->fr_savpc; | |
1168 nextfp = (struct frame *)fp->fr_savfp; | |
1169 if ((sym = kobj_getsymname(pc, &off)) != NULL) { | |
1170 printf("%016lx %s:%s+%lx (%s)\n", (uintptr_t)fp, | |
1171 mod_containing_pc((caddr_t)pc), sym, off, args); | |
1172 } else { | |
1173 printf("%016lx %lx (%s)\n", | |
1174 (uintptr_t)fp, pc, args); | |
1175 } | |
1176 | |
1177 pc = nextpc; | |
1178 fp = nextfp; | |
1179 } | |
1180 out: | |
1181 if (!panicstr) { | |
1182 printf("end of traceback\n"); | |
1183 DELAY(2 * MICROSEC); | |
1184 } | |
1185 } | |
1186 | |
1187 #elif defined(__i386) | |
1188 | |
1189 void | |
1190 traceback(caddr_t fpreg) | |
1191 { | |
1192 struct frame *fp = (struct frame *)fpreg; | |
1193 struct frame *nextfp, *minfp, *stacktop; | |
1194 uintptr_t pc, nextpc; | |
1195 | |
1196 cpu_t *cpu; | |
1197 | |
1198 /* | |
1199 * args[] holds TR_ARG_MAX hex long args, plus ", " or '\0'. | |
1200 */ | |
1201 char args[TR_ARG_MAX * 2 + 8], *p; | |
1202 | |
1203 int on_intr; | |
1204 ulong_t off; | |
1205 char *sym; | |
1206 | |
1207 if (!panicstr) | |
1208 printf("traceback: %%fp = %p\n", (void *)fp); | |
1209 | |
1210 /* | |
1211 * If we are panicking, all high-level interrupt information in | |
1212 * CPU was overwritten. panic_cpu has the correct values. | |
1213 */ | |
1214 kpreempt_disable(); /* prevent migration */ | |
1215 | |
1216 cpu = (panicstr && CPU->cpu_id == panic_cpu.cpu_id)? &panic_cpu : CPU; | |
1217 | |
1218 if ((on_intr = CPU_ON_INTR(cpu)) != 0) | |
1219 stacktop = (struct frame *)(cpu->cpu_intr_stack + SA(MINFRAME)); | |
1220 else | |
1221 stacktop = (struct frame *)curthread->t_stk; | |
1222 | |
1223 kpreempt_enable(); | |
1224 | |
1225 if ((uintptr_t)fp < KERNELBASE) | |
1226 goto out; | |
1227 | |
1228 minfp = fp; /* Baseline minimum frame pointer */ | |
1229 pc = fp->fr_savpc; | |
1230 fp = (struct frame *)fp->fr_savfp; | |
1231 | |
1232 while ((uintptr_t)fp >= KERNELBASE) { | |
1233 ulong_t argc; | |
1234 long *argv; | |
1235 | |
1236 if (fp <= minfp || fp >= stacktop) { | |
1237 if (on_intr) { | |
1238 /* | |
1239 * Hop from interrupt stack to thread stack. | |
1240 */ | |
1241 stacktop = (struct frame *)curthread->t_stk; | |
1242 minfp = (struct frame *)curthread->t_stkbase; | |
1243 on_intr = 0; | |
1244 continue; | |
1245 } | |
1246 break; /* we're outside of the expected range */ | |
1247 } | |
1248 | |
1249 if ((uintptr_t)fp & (STACK_ALIGN - 1)) { | |
1250 printf(" >> mis-aligned %%fp = %p\n", (void *)fp); | |
1251 break; | |
1252 } | |
1253 | |
1254 nextpc = fp->fr_savpc; | |
1255 nextfp = (struct frame *)fp->fr_savfp; | |
1256 argc = argcount(nextpc); | |
1257 argv = (long *)((char *)fp + sizeof (struct frame)); | |
1258 | |
1259 args[0] = '\0'; | |
1260 p = args; | |
1261 while (argc-- > 0 && argv < (long *)stacktop) { | |
1262 p += snprintf(p, args + sizeof (args) - p, | |
1263 "%s%lx", (p == args) ? "" : ", ", *argv++); | |
1264 } | |
1265 | |
1266 if ((sym = kobj_getsymname(pc, &off)) != NULL) { | |
1267 printf("%08lx %s:%s+%lx (%s)\n", (uintptr_t)fp, | |
1268 mod_containing_pc((caddr_t)pc), sym, off, args); | |
1269 } else { | |
1270 printf("%08lx %lx (%s)\n", | |
1271 (uintptr_t)fp, pc, args); | |
1272 } | |
1273 | |
1274 minfp = fp; | |
1275 pc = nextpc; | |
1276 fp = nextfp; | |
1277 } | |
1278 out: | |
1279 if (!panicstr) { | |
1280 printf("end of traceback\n"); | |
1281 DELAY(2 * MICROSEC); | |
1282 } | |
1283 } | |
1284 | |
1285 #endif /* __i386 */ | |
1286 | |
1287 /* | |
1288 * Generate a stack backtrace from a saved register set. | |
1289 */ | |
1290 void | |
1291 traceregs(struct regs *rp) | |
1292 { | |
1293 traceback((caddr_t)rp->r_fp); | |
1294 } | |
1295 | |
1296 void | |
1297 exec_set_sp(size_t stksize) | |
1298 { | |
1299 klwp_t *lwp = ttolwp(curthread); | |
1300 | |
1301 lwptoregs(lwp)->r_sp = (uintptr_t)curproc->p_usrstack - stksize; | |
1302 } | |
1303 | |
1304 hrtime_t | |
1305 gethrtime_waitfree(void) | |
1306 { | |
1307 return (dtrace_gethrtime()); | |
1308 } | |
1309 | |
1310 hrtime_t | |
1311 gethrtime(void) | |
1312 { | |
1313 return (gethrtimef()); | |
1314 } | |
1315 | |
1316 hrtime_t | |
1317 gethrtime_unscaled(void) | |
1318 { | |
1319 return (gethrtimeunscaledf()); | |
1320 } | |
1321 | |
1322 void | |
1323 scalehrtime(hrtime_t *hrt) | |
1324 { | |
1325 scalehrtimef(hrt); | |
1326 } | |
1327 | |
1328 void | |
1329 gethrestime(timespec_t *tp) | |
1330 { | |
1331 gethrestimef(tp); | |
1332 } | |
1333 | |
1334 #if defined(__amd64) | |
1335 /* | |
1336 * Part of the implementation of hres_tick(); this routine is | |
1337 * easier in C than assembler .. called with the hres_lock held. | |
1338 * | |
1339 * XX64 Many of these timekeeping variables need to be extern'ed in a header | |
1340 */ | |
1341 | |
1342 #include <sys/time.h> | |
1343 #include <sys/machlock.h> | |
1344 | |
1345 extern int one_sec; | |
1346 extern timestruc_t hrestime; | |
1347 extern int max_hres_adj; | |
1348 | |
1349 void | |
1350 __adj_hrestime(void) | |
1351 { | |
1352 long long adj; | |
1353 | |
1354 if (hrestime_adj == 0) | |
1355 adj = 0; | |
1356 else if (hrestime_adj > 0) { | |
1357 if (hrestime_adj < max_hres_adj) | |
1358 adj = hrestime_adj; | |
1359 else | |
1360 adj = max_hres_adj; | |
1361 } else { | |
1362 if (hrestime_adj < -max_hres_adj) | |
1363 adj = -max_hres_adj; | |
1364 else | |
1365 adj = hrestime_adj; | |
1366 } | |
1367 | |
1368 timedelta -= adj; | |
1369 hrestime_adj = timedelta; | |
1370 hrestime.tv_nsec += adj; | |
1371 | |
1372 while (hrestime.tv_nsec >= NANOSEC) { | |
1373 one_sec++; | |
1374 hrestime.tv_sec++; | |
1375 hrestime.tv_nsec -= NANOSEC; | |
1376 } | |
1377 } | |
1378 #endif | |
1379 | |
1380 /* | |
1381 * Wrapper functions to maintain backwards compability | |
1382 */ | |
1383 int | |
1384 xcopyin(const void *uaddr, void *kaddr, size_t count) | |
1385 { | |
1386 return (xcopyin_nta(uaddr, kaddr, count, UIO_COPY_CACHED)); | |
1387 } | |
1388 | |
1389 int | |
1390 xcopyout(const void *kaddr, void *uaddr, size_t count) | |
1391 { | |
1392 return (xcopyout_nta(kaddr, uaddr, count, UIO_COPY_CACHED)); | |
1393 } |