changeset 4760:3a08865bfd12

6559115 niagara kernel copyin implementation prefetches kernel addresses instead of user addresses
author ae112802
date Mon, 30 Jul 2007 13:59:18 -0700
parents 3a228be89946
children dd784e5678f2
files usr/src/uts/sun4v/cpu/niagara_copy.s
diffstat 1 files changed, 40 insertions(+), 36 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/sun4v/cpu/niagara_copy.s	Mon Jul 30 12:41:05 2007 -0700
+++ b/usr/src/uts/sun4v/cpu/niagara_copy.s	Mon Jul 30 13:59:18 2007 -0700
@@ -3287,7 +3287,7 @@
 	mov	ASI_USER, %asi
 
 	andn	%i1, 0x3f, %l0		! %l0 has block aligned src address
-	prefetch [%l0+0x0], #one_read
+	prefetcha [%l0+0x0]%asi, #one_read
 	andcc	%i1, 0x3f, %g0		! is src 64B aligned
 	bz,pn	%ncc, .ci_blkcpy
 	nop
@@ -3332,8 +3332,8 @@
 	nop
 
 	ldda	[%l0+0x38]%asi, %d14
-	prefetch [%l0+0x40], #one_read
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x40]%asi, #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 7:
 	add	%l0, 0x40, %l0
 	stxa	%g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line
@@ -3346,14 +3346,14 @@
 	subcc	%i3, 0x40, %i3
 	add	%i0, 0x40, %i0
 	bgu,pt	%ncc, 7b
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 	ba	.ci_blkdone
 	membar	#Sync
 
 ci_off7:
 	ldda	[%l0]ASI_BLK_AIUS, %d0
-	prefetch [%l0+0x40], #one_read
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x40]%asi, #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 0:
 	add	%l0, 0x40, %l0
 	stxa	%g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line
@@ -3373,7 +3373,7 @@
 	subcc	%i3, 0x40, %i3
 	add	%i0, 0x40, %i0
 	bgu,pt	%ncc, 0b
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 	ba	.ci_blkdone
 	membar	#Sync
 
@@ -3385,8 +3385,8 @@
 	ldda	[%l0+0x28]%asi, %d10
 	ldda	[%l0+0x30]%asi, %d12
 	ldda	[%l0+0x38]%asi, %d14
-	prefetch [%l0+0x40], #one_read
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x40]%asi, #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 1:
 	add	%l0, 0x40, %l0
 	stxa	%g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line
@@ -3405,7 +3405,7 @@
 	subcc	%i3, 0x40, %i3
 	add	%i0, 0x40, %i0
 	bgu,pt	%ncc, 1b
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 	ba	.ci_blkdone
 	membar	#Sync
 
@@ -3416,8 +3416,8 @@
 	ldda	[%l0+0x28]%asi, %d10
 	ldda	[%l0+0x30]%asi, %d12
 	ldda	[%l0+0x38]%asi, %d14
-	prefetch [%l0+0x40], #one_read
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x40]%asi, #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 2:
 	add	%l0, 0x40, %l0
 	stxa	%g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line
@@ -3435,7 +3435,7 @@
 	subcc	%i3, 0x40, %i3
 	add	%i0, 0x40, %i0
 	bgu,pt	%ncc, 2b
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 	ba	.ci_blkdone
 	membar	#Sync
 
@@ -3445,8 +3445,8 @@
 	ldda	[%l0+0x28]%asi, %d10
 	ldda	[%l0+0x30]%asi, %d12
 	ldda	[%l0+0x38]%asi, %d14
-	prefetch [%l0+0x40], #one_read
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x40]%asi, #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 3:
 	add	%l0, 0x40, %l0
 	stxa	%g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line
@@ -3463,7 +3463,7 @@
 	subcc	%i3, 0x40, %i3
 	add	%i0, 0x40, %i0
 	bgu,pt	%ncc, 3b
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 	ba	.ci_blkdone
 	membar	#Sync
 
@@ -3472,8 +3472,8 @@
 	ldda	[%l0+0x28]%asi, %d10
 	ldda	[%l0+0x30]%asi, %d12
 	ldda	[%l0+0x38]%asi, %d14
-	prefetch [%l0+0x40], #one_read
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x40]%asi, #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 4:
 	add	%l0, 0x40, %l0
 	stxa	%g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line
@@ -3489,7 +3489,7 @@
 	subcc	%i3, 0x40, %i3
 	add	%i0, 0x40, %i0
 	bgu,pt	%ncc, 4b
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 	ba	.ci_blkdone
 	membar	#Sync
 
@@ -3497,8 +3497,8 @@
 	ldda	[%l0+0x28]%asi, %d10
 	ldda	[%l0+0x30]%asi, %d12
 	ldda	[%l0+0x38]%asi, %d14
-	prefetch [%l0+0x40], #one_read
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x40]%asi, #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 5:
 	add	%l0, 0x40, %l0
 	stxa	%g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line
@@ -3513,15 +3513,15 @@
 	subcc	%i3, 0x40, %i3
 	add	%i0, 0x40, %i0
 	bgu,pt	%ncc, 5b
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 	ba	.ci_blkdone
 	membar	#Sync
 
 ci_off55:
 	ldda	[%l0+0x30]%asi, %d12
 	ldda	[%l0+0x38]%asi, %d14
-	prefetch [%l0+0x40], #one_read
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x40]%asi, #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 6:
 	add	%l0, 0x40, %l0
 	stxa	%g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line
@@ -3535,13 +3535,13 @@
 	subcc	%i3, 0x40, %i3
 	add	%i0, 0x40, %i0
 	bgu,pt	%ncc, 6b
-	prefetch [%l0+0x80], #one_read
+	prefetcha [%l0+0x80]%asi, #one_read
 	ba	.ci_blkdone
 	membar	#Sync
 
 .ci_blkcpy:
-	prefetch [%i1+0x40], #one_read
-	prefetch [%i1+0x80], #one_read
+	prefetcha [%i1+0x40]%asi, #one_read
+	prefetcha [%i1+0x80]%asi, #one_read
 8:
 	stxa	%g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line
 	ldda	[%i1]ASI_BLK_AIUS, %d0
@@ -3551,7 +3551,7 @@
 	subcc	%i3, 0x40, %i3
 	add	%i0, 0x40, %i0
 	bgu,pt	%ncc, 8b
-	prefetch [%i1+0x80], #one_read
+	prefetcha [%i1+0x80]%asi, #one_read
 	membar	#Sync
 
 .ci_blkdone:
@@ -3573,13 +3573,14 @@
 
 	sub	%i1, %o2, %i1		! align the src at 16 bytes.
 	andn	%i1, 0x3f, %l0		! %l0 has block aligned source
-	prefetch [%l0+0x0], #one_read
+	prefetcha [%l0]ASI_USER, #one_read
 	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2
+	add	%l0, 0x40, %l0
 .ci_loop0:
 	add	%i1, 0x10, %i1
 	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4
 
-	prefetch [%l0+0x40], #one_read
+	prefetcha [%l0]ASI_USER, #one_read
 
 	stxa	%l3, [%i0+0x0]%asi
 	stxa	%l4, [%i0+0x8]%asi
@@ -3617,10 +3618,11 @@
 	mov	0x40, %o1
 	sub	%o1, %o0, %o1		! %o1 right shift = (64 - left shift)
 	andn	%i1, 0x3f, %l0		! %l0 has block aligned source
-	prefetch [%l0+0x0], #one_read
+	prefetcha [%l0]ASI_USER, #one_read
 	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2	! partial data in %l2
 							! and %l3 has complete
 							! data
+	add	%l0, 0x40, %l0
 .ci_loop1:
 	add	%i1, 0x10, %i1
 	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4	! %l4 has partial data
@@ -3628,7 +3630,7 @@
 	ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6)	! merge %l2, %l3 and %l4
 							! into %l2 and %l3
 
-	prefetch [%l0+0x40], #one_read
+	prefetcha [%l0]ASI_USER, #one_read
 
 	stxa	%l2, [%i0+0x0]%asi
 	stxa	%l3, [%i0+0x8]%asi
@@ -3673,17 +3675,18 @@
 	mov	0x40, %o1
 	sub	%o1, %o0, %o1		! %o1 right shift = (64 - left shift)
 	andn	%i1, 0x3f, %l0		! %l0 has block aligned source
-	prefetch [%l0+0x0], #one_read
+	prefetcha [%l0]ASI_USER, #one_read
 	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2	! partial data in %l3
 							! for this read and
 							! no data in %l2
+	add	%l0, 0x40, %l0
 .ci_loop2:
 	add	%i1, 0x10, %i1
 	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4	! %l4 has complete data
 							! and %l5 has partial
 	ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6)	! merge %l3, %l4 and %l5
 							! into %l3 and %l4
-	prefetch [%l0+0x40], #one_read
+	prefetcha [%l0]ASI_USER, #one_read
 
 	stxa	%l3, [%i0+0x0]%asi
 	stxa	%l4, [%i0+0x8]%asi
@@ -3726,14 +3729,15 @@
 .ci_blkcpy:
 
 	andn	%i1, 0x3f, %o0		! %o0 has block aligned source
-	prefetch [%o0+0x0], #one_read
+	prefetcha [%o0]ASI_USER, #one_read
+	add	%o0, 0x40, %o0
 1:
 	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l0
 	add	%i1, 0x10, %i1
 	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2
 	add	%i1, 0x10, %i1
 
-	prefetch [%o0+0x40], #one_read
+	prefetcha [%o0]ASI_USER, #one_read
 
 	stxa	%l0, [%i0+0x0]%asi