changeset 5322:010e79fdab36

6600939 gethrtime sometimes return a large time value into the future
author sudheer
date Wed, 24 Oct 2007 14:13:57 -0700
parents 0d8bd7d3764e
children d03f62b6bc23
files usr/src/uts/i86pc/ml/amd64.il usr/src/uts/i86pc/os/cpuid.c usr/src/uts/i86pc/os/mlsetup.c usr/src/uts/intel/asm/clock.h usr/src/uts/intel/ia32/ml/i86_subr.s usr/src/uts/intel/ia32/ml/swtch.s usr/src/uts/intel/sys/archsystm.h usr/src/uts/intel/sys/x86_archext.h
diffstat 8 files changed, 125 insertions(+), 74 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/i86pc/ml/amd64.il	Wed Oct 24 13:59:51 2007 -0700
+++ b/usr/src/uts/i86pc/ml/amd64.il	Wed Oct 24 14:13:57 2007 -0700
@@ -149,19 +149,6 @@
 	.end
 
 /*
- * Read Time Stamp Counter
- * uint64_t tsc_read();
- *
- * usage:
- * uint64_t cycles = tsc_read();
- */
-	.inline	tsc_read, 0
-	rdtsc				/ %edx:%eax = RDTSC
-	shlq	$32, %rdx
-	orq	%rdx, %rax
-	.end
-
-/*
  * Call the halt instruction. This will put the CPU to sleep until
  * it is again awoken via an interrupt.
  * This function should be called with interrupts already disabled
--- a/usr/src/uts/i86pc/os/cpuid.c	Wed Oct 24 13:59:51 2007 -0700
+++ b/usr/src/uts/i86pc/os/cpuid.c	Wed Oct 24 14:13:57 2007 -0700
@@ -1059,7 +1059,8 @@
 			if (x86_vendor == X86_VENDOR_AMD)
 				feature &= ~X86_SEP;
 #endif
-			if (cp->cp_edx & CPUID_AMD_EDX_TSCP)
+			if (x86_vendor == X86_VENDOR_AMD &&
+			    cp->cp_edx & CPUID_AMD_EDX_TSCP)
 				feature |= X86_TSCP;
 			break;
 		default:
@@ -3660,4 +3661,27 @@
 	cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
 }
 
+void
+patch_tsc_read(int flag)
+{
+	size_t cnt;
+
+	switch (flag) {
+	case X86_NO_TSC:
+		cnt = &_no_rdtsc_end - &_no_rdtsc_start;
+		memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
+		break;
+	case X86_HAVE_TSCP:
+		cnt = &_tscp_end - &_tscp_start;
+		memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
+		break;
+	case X86_TSC_MFENCE:
+		cnt = &_tsc_mfence_end - &_tsc_mfence_start;
+		memcpy((void *)tsc_read, (void *)&_tsc_mfence_start, cnt);
+		break;
+	default:
+		break;
+	}
+}
+
 #endif	/* !__xpv */
--- a/usr/src/uts/i86pc/os/mlsetup.c	Wed Oct 24 13:59:51 2007 -0700
+++ b/usr/src/uts/i86pc/os/mlsetup.c	Wed Oct 24 14:13:57 2007 -0700
@@ -159,18 +159,40 @@
 	 */
 	init_desctbls();
 
+#if !defined(__xpv)
+
+	/*
+	 * Patch the tsc_read routine with appropriate set of instructions,
+	 * depending on the processor family and architecure, to read the
+	 * time-stamp counter while ensuring no out-of-order execution.
+	 * Patch it while the kernel text is still writable.
+	 *
+	 * Note: tsc_read is not patched for intel processors whose family
+	 * is >6 and for amd whose family >f (in case they don't support rdtscp
+	 * instruction, unlikely). By default tsc_read will use cpuid for
+	 * serialization in such cases. The following code needs to be
+	 * revisited if intel processors of family >= f retains the
+	 * instruction serialization nature of mfence instruction.
+	 */
+	if (x86_feature & X86_TSCP)
+		patch_tsc_read(X86_HAVE_TSCP);
+	else if (cpuid_getvendor(CPU) == X86_VENDOR_AMD &&
+	    cpuid_getfamily(CPU) <= 0xf)
+		patch_tsc_read(X86_TSC_MFENCE);
+	else if (cpuid_getvendor(CPU) == X86_VENDOR_Intel &&
+	    cpuid_getfamily(CPU) <= 6)
+		patch_tsc_read(X86_TSC_MFENCE);
+
+#endif	/* !__xpv */
 
 #if defined(__i386) && !defined(__xpv)
 	/*
 	 * Some i386 processors do not implement the rdtsc instruction,
-	 * or at least they do not implement it correctly.
-	 *
-	 * For those that do, patch in the rdtsc instructions in
-	 * various parts of the kernel right now while the text is
-	 * still writable.
+	 * or at least they do not implement it correctly. Patch them to
+	 * return 0.
 	 */
-	if (x86_feature & X86_TSC)
-		patch_tsc();
+	if ((x86_feature & X86_TSC) == 0)
+		patch_tsc_read(X86_NO_TSC);
 #endif	/* __i386 && !__xpv */
 
 #if !defined(__xpv)
--- a/usr/src/uts/intel/asm/clock.h	Wed Oct 24 13:59:51 2007 -0700
+++ b/usr/src/uts/intel/asm/clock.h	Wed Oct 24 14:13:57 2007 -0700
@@ -71,23 +71,6 @@
 #endif
 }
 
-#else /* __xpv */
-
-/*
- * rdtsc may not exist on 32-bit, so we don't have an inline for it.
- */
-#if defined(__amd64)
-extern __inline__ hrtime_t tsc_read(void)
-{
-	uint32_t lobits, hibits;
-
-	__asm__ __volatile__(
-	    "rdtsc"
-	    : "=a" (lobits), "=d" (hibits));
-	return (lobits | ((hrtime_t)hibits << 32));
-}
-#endif /* __amd64 */
-
 #endif /* __xpv */
 
 #endif	/* !__lint && __GNUC__ */
--- a/usr/src/uts/intel/ia32/ml/i86_subr.s	Wed Oct 24 13:59:51 2007 -0700
+++ b/usr/src/uts/intel/ia32/ml/i86_subr.s	Wed Oct 24 14:13:57 2007 -0700
@@ -772,46 +772,77 @@
 	return (0);
 }
 
-void
-patch_tsc(void)
-{}
-
 #else	/* __lint */
 
 #if defined(__amd64)
 
 	ENTRY_NP(tsc_read)
+	movq	%rbx, %r11
+	movl	$0, %eax
+	cpuid
+	rdtsc
+	movq	%r11, %rbx
+	shlq	$32, %rdx
+	orq	%rdx, %rax
+	ret
+	.globl _tsc_mfence_start
+_tsc_mfence_start:
+	mfence
 	rdtsc
 	shlq	$32, %rdx
 	orq	%rdx, %rax
 	ret
+	.globl _tsc_mfence_end
+_tsc_mfence_end:
+	.globl _tscp_start
+_tscp_start:
+	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
+	shlq	$32, %rdx
+	orq	%rdx, %rax
+	ret
+	.globl _tscp_end
+_tscp_end:
+	.globl _no_rdtsc_start
+_no_rdtsc_start:
+	xorl	%edx, %edx
+	xorl	%eax, %eax
+	ret
+	.globl _no_rdtsc_end
+_no_rdtsc_end:
 	SET_SIZE(tsc_read)
 
-#else  /* __i386 */
-
-	/*
-	 * To cope with processors that do not implement the rdtsc instruction,
-	 * we patch the kernel to use rdtsc if that feature is detected on the
-	 * CPU.  On an unpatched kernel, tsc_read() just returns zero.
-	 */
-	ENTRY_NP(patch_tsc)
-	movw	_rdtsc_bytes, %cx
-	movw	%cx, _tsc_patch_point
-	ret
-_rdtsc_bytes:
-	rdtsc
-	SET_SIZE(patch_tsc)
+#else /* __i386 */
 
 	ENTRY_NP(tsc_read)
-	xorl	%eax, %eax
+	pushl	%ebx
+	movl	$0, %eax
+	cpuid
+	rdtsc
+	popl	%ebx
+	ret
+	.globl _tsc_mfence_start
+_tsc_mfence_start:
+	mfence
+	rdtsc
+	ret
+	.globl _tsc_mfence_end
+_tsc_mfence_end:
+	.globl	_tscp_start
+_tscp_start:
+	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
+	ret
+	.globl _tscp_end
+_tscp_end:
+	.globl _no_rdtsc_start
+_no_rdtsc_start:
 	xorl	%edx, %edx
-	.globl _tsc_patch_point
-_tsc_patch_point:
-	nop; nop
+	xorl	%eax, %eax
 	ret
+	.globl _no_rdtsc_end
+_no_rdtsc_end:
 	SET_SIZE(tsc_read)
 
-#endif /* __i386 */
+#endif	/* __i386 */
 
 #endif	/* __lint */
 
--- a/usr/src/uts/intel/ia32/ml/swtch.s	Wed Oct 24 13:59:51 2007 -0700
+++ b/usr/src/uts/intel/ia32/ml/swtch.s	Wed Oct 24 14:13:57 2007 -0700
@@ -128,22 +128,10 @@
 	movq	P_AS(hatp), scratch_reg;		\
 	movq	A_HAT(scratch_reg), hatp
 
-#if defined (__xpv)
-
 #define	TSC_READ()					\
 	call	tsc_read;				\
 	movq	%rax, %r14;
 
-#else
-
-#define	TSC_READ()					\
-	rdtsc;						\
-	shlq	$32, %rdx;				\
-	movl	%eax, %r14d;				\
-	orq	%rdx, %r14
-
-#endif
-
 /*
  * If we are resuming an interrupt thread, store a timestamp in the thread
  * structure.  If an interrupt occurs between tsc_read() and its subsequent
--- a/usr/src/uts/intel/sys/archsystm.h	Wed Oct 24 13:59:51 2007 -0700
+++ b/usr/src/uts/intel/sys/archsystm.h	Wed Oct 24 14:13:57 2007 -0700
@@ -157,7 +157,6 @@
 
 extern void (*kcpc_hw_enable_cpc_intr)(void);
 
-extern void patch_tsc(void);
 extern void init_desctbls(void);
 
 extern user_desc_t *cpu_get_gdt(void);
@@ -188,6 +187,7 @@
 #else
 extern void setup_mca(void);
 extern void pat_sync(void);
+extern void patch_tsc_read(int);
 #define	cpr_dprintf prom_printf
 #define	IN_XPV_PANIC() (__lintzero)
 #endif
--- a/usr/src/uts/intel/sys/x86_archext.h	Wed Oct 24 13:59:51 2007 -0700
+++ b/usr/src/uts/intel/sys/x86_archext.h	Wed Oct 24 14:13:57 2007 -0700
@@ -330,6 +330,13 @@
 #define	X86_SSE4_1	0x04000000
 #define	X86_SSE4_2	0x08000000
 
+/*
+ * flags to patch tsc_read routine.
+ */
+#define	X86_NO_TSC		0x0
+#define	X86_HAVE_TSCP		0x1
+#define	X86_TSC_MFENCE		0x2
+
 #define	FMT_X86_FEATURE						\
 	"\20"							\
 	"\34sse4_2\33sse4_1\32ssse3\31cpuid"			\
@@ -570,6 +577,15 @@
 extern void ucode_free();
 #endif
 
+#if !defined(__xpv)
+extern	char _tsc_mfence_start;
+extern	char _tsc_mfence_end;
+extern	char _tscp_start;
+extern	char _tscp_end;
+extern	char _no_rdtsc_start;
+extern	char _no_rdtsc_end;
+#endif
+
 extern uint_t workaround_errata(struct cpu *);
 
 #if defined(OPTERON_ERRATUM_93)