Mercurial > illumos > illumos-gate
changeset 10024:2213a466547f
6847710 Solaris libc memcpy and memset are ignorant of AMD L3 caches
6688056 amd64 memset.s and memcpy.s are missing AMD comments
author | bostrovs |
---|---|
date | Thu, 02 Jul 2009 10:30:52 -0700 |
parents | 71bf38dba3d6 |
children | 9214f62864a1 |
files | usr/src/lib/libc/amd64/Makefile usr/src/lib/libc/amd64/gen/cache.s usr/src/lib/libc/amd64/gen/memcpy.s usr/src/lib/libc/amd64/gen/memset.s usr/src/lib/libc/amd64/gen/proc64_id.c usr/src/lib/libc/amd64/gen/proc64_id.h usr/src/lib/libc/amd64/gen/proc64_support.s |
diffstat | 7 files changed, 115 insertions(+), 132 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/lib/libc/amd64/Makefile Thu Jul 02 12:58:38 2009 -0400 +++ b/usr/src/lib/libc/amd64/Makefile Thu Jul 02 10:30:52 2009 -0700 @@ -104,7 +104,6 @@ alloca.o \ attrat.o \ byteorder.o \ - cache.o \ cuexit.o \ ecvt.o \ errlst.o \
--- a/usr/src/lib/libc/amd64/gen/cache.s Thu Jul 02 12:58:38 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,108 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright (c) 2002 Advanced Micro Devices, Inc. - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the - * following conditions are met: - * - * + Redistributions of source code must retain the above - * copyright notice, this list of conditions and the - * following disclaimer. - * - * + Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the - * following disclaimer in the documentation and/or other - * materials provided with the distribution. - * - * + Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or - * promote products derived from this software without - * specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND - * CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, - * INC. OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE - * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * It is licensee's responsibility to comply with any export - * regulations applicable in licensee's jurisdiction. - */ - - .file "cache.s" - -#include "SYS.h" -#include "cache.h" -#include "proc64_id.h" - - .global .amd64cache1, .amd64cache1half, .amd64cache2, .amd64cache2half - .global .largest_level_cache_size - - .data - - .align 8 - -// defaults to SledgeHammer -.amd64cache1: .quad AMD_DFLT_L1_CACHE_SIZE -.amd64cache1half: .quad AMD_DFLT_L1_HALF_CACHE_SIZE -.amd64cache2: .quad AMD_DFLT_L2_CACHE_SIZE -.amd64cache2half: .quad AMD_DFLT_L2_HALF_CACHE_SIZE -.largest_level_cache_size: - .int AMD_DFLT_L2_CACHE_SIZE - - .text - -// AMD cache size determination - - ENTRY(__amd64id) - - push %rbx - - mov $CPUIDLARGESTFUNCTIONEX, %eax # get highest level of support - cpuid - - cmp $AMDIDL2INFO, %eax # check for support of cache info - jb 1f - - mov $AMDIDL1INFO, %eax # get L1 info - cpuid - - shr $24, %ecx - shl $10, %ecx - mov %rcx, _sref_(.amd64cache1) - - shr $1, %ecx - mov %rcx, _sref_(.amd64cache1half) - - mov $AMDIDL2INFO, %eax # get L2 info - cpuid - - shr $16, %ecx - shl $10, %ecx - mov %rcx, _sref_(.amd64cache2) - mov %ecx, _sref_(.largest_level_cache_size) - - shr $1, %ecx - mov %rcx, _sref_(.amd64cache2half) - -1: - pop %rbx - ret - - SET_SIZE(__amd64id)
--- a/usr/src/lib/libc/amd64/gen/memcpy.s Thu Jul 02 12:58:38 2009 -0400 +++ b/usr/src/lib/libc/amd64/gen/memcpy.s Thu Jul 02 10:30:52 2009 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -53,6 +53,11 @@ * * Pseudo code: * + * NOTE: On AMD NO_SSE is always set. Performance on Opteron did not improve + * using 16-byte stores. Setting NO_SSE on AMD should be re-evaluated on + * future AMD processors. + * + * * If (size <= 128 bytes) { * do unrolled code (primarily 8-byte loads/stores) regardless of * alignment. @@ -2274,7 +2279,7 @@ mov .largest_level_cache_size(%rip),%r9d shr %r9 # take half of it cmp %r9,%r8 - jg L(byte8_nt_top) + jge L(byte8_nt_top) # Find out whether to use rep movsq cmp $4096,%r8 jle L(byte8_top)
--- a/usr/src/lib/libc/amd64/gen/memset.s Thu Jul 02 12:58:38 2009 -0400 +++ b/usr/src/lib/libc/amd64/gen/memset.s Thu Jul 02 10:30:52 2009 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +29,10 @@ * All rights reserved. */ +/* + * Portions Copyright 2009 Advanced Micro Devices, Inc. + */ + .file "memset.s" #include <sys/asm_linkage.h> @@ -47,6 +51,11 @@ * * Pseudo code: * + * NOTE: On AMD NO_SSE is always set. Performance on Opteron did not improve + * using 16-byte stores. Setting NO_SSE on AMD should be re-evaluated on + * future AMD processors. + * + * * If (size <= 144 bytes) { * do unrolled code (primarily 8-byte stores) regardless of alignment. * } else { @@ -914,17 +923,25 @@ .balign 16 L(Loop8byte_nt_move): - lea -0x40(%r8),%r8 # 64 + lea -0x80(%r8),%r8 # 128 movnti %rdx,(%rdi) movnti %rdx,0x8(%rdi) movnti %rdx,0x10(%rdi) movnti %rdx,0x18(%rdi) - cmp $0x40,%r8 movnti %rdx,0x20(%rdi) movnti %rdx,0x28(%rdi) movnti %rdx,0x30(%rdi) movnti %rdx,0x38(%rdi) - lea 0x40(%rdi),%rdi + cmp $0x80,%r8 + movnti %rdx,0x40(%rdi) + movnti %rdx,0x48(%rdi) + movnti %rdx,0x50(%rdi) + movnti %rdx,0x58(%rdi) + movnti %rdx,0x60(%rdi) + movnti %rdx,0x68(%rdi) + movnti %rdx,0x70(%rdi) + movnti %rdx,0x78(%rdi) + lea 0x80(%rdi),%rdi jge L(Loop8byte_nt_move) sfence
--- a/usr/src/lib/libc/amd64/gen/proc64_id.c Thu Jul 02 12:58:38 2009 -0400 +++ b/usr/src/lib/libc/amd64/gen/proc64_id.c Thu Jul 02 10:30:52 2009 -0700 @@ -24,7 +24,9 @@ * All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Portions Copyright 2009 Advanced Micro Devices, Inc. + */ #include <sys/types.h> #include "proc64_id.h" @@ -44,8 +46,6 @@ uint_t edx; }; -extern void __amd64id(void); - /* * get_intel_cache_info() * Get cpu cache sizes for optimized 64-bit libc functions mem* and str*. @@ -103,8 +103,62 @@ } } - __intel_set_cache_sizes(l1_cache_size, l2_cache_size, - largest_level_cache); + __set_cache_sizes(l1_cache_size, l2_cache_size, largest_level_cache); +} + +/* + * get_amd_cache_info() + * Same as get_intel_cache_info() but for AMD processors + */ +static void +get_amd_cache_info(void) +{ + uint_t l1_cache_size = AMD_DFLT_L1_CACHE_SIZE; + uint_t l2_cache_size = AMD_DFLT_L2_CACHE_SIZE; + uint_t l3_cache_size = 0; + uint_t largest_level_cache = 0; + struct cpuid_values cpuid_info; + uint_t maxeax; + int ncores; + + cpuid_info.eax = 0; + __libc_get_cpuid(0x80000000, (uint_t *)&cpuid_info, -1); + maxeax = cpuid_info.eax; + + if (maxeax >= 0x80000005) { /* We have L1D info */ + __libc_get_cpuid(0x80000005, (uint_t *)&cpuid_info, -1); + l1_cache_size = ((cpuid_info.ecx >> 24) & 0xff) * 1024; + } + + if (maxeax >= 0x80000006) { /* We have L2 and L3 info */ + __libc_get_cpuid(0x80000006, (uint_t *)&cpuid_info, -1); + l2_cache_size = ((cpuid_info.ecx >> 16) & 0xffff) * 1024; + l3_cache_size = ((cpuid_info.edx >> 18) & 0x3fff) * 512 * 1024; + } + + /* + * L3 cache is shared between cores on the processor + */ + if (maxeax >= 0x80000008 && l3_cache_size != 0) { + largest_level_cache = l3_cache_size; + + /* + * Divide by number of cores on the processor + */ + __libc_get_cpuid(0x80000008, (uint_t *)&cpuid_info, -1); + ncores = (cpuid_info.ecx & 0xff) + 1; + if (ncores > 1) + largest_level_cache /= ncores; + + /* + * L3 is a victim cache for L2 + */ + largest_level_cache += l2_cache_size; + } else + largest_level_cache = l2_cache_size; + + __set_cache_sizes(l1_cache_size, l2_cache_size, + largest_level_cache); } /* @@ -126,7 +180,7 @@ if ((cpuid_info.ebx == 0x68747541) && /* Auth */ (cpuid_info.edx == 0x69746e65) && /* enti */ (cpuid_info.ecx == 0x444d4163)) { /* cAMD */ - __amd64id(); + get_amd_cache_info(); return; } @@ -174,7 +228,7 @@ } __intel_set_memops_method(use_sse); } else { - __intel_set_cache_sizes(INTEL_DFLT_L1_CACHE_SIZE, + __set_cache_sizes(INTEL_DFLT_L1_CACHE_SIZE, INTEL_DFLT_L2_CACHE_SIZE, INTEL_DFLT_LARGEST_CACHE_SIZE); __intel_set_memops_method(use_sse);
--- a/usr/src/lib/libc/amd64/gen/proc64_id.h Thu Jul 02 12:58:38 2009 -0400 +++ b/usr/src/lib/libc/amd64/gen/proc64_id.h Thu Jul 02 10:30:52 2009 -0700 @@ -24,11 +24,13 @@ * All rights reserved. */ +/* + * Portions Copyright 2009 Advanced Micro Devices, Inc. + */ + #ifndef _PROC64_ID_H #define _PROC64_ID_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/x86_archext.h> #ifdef __cplusplus @@ -56,9 +58,7 @@ * Cache size defaults for AMD SledgeHammer */ #define AMD_DFLT_L1_CACHE_SIZE (64 * 1024) -#define AMD_DFLT_L1_HALF_CACHE_SIZE (32 * 1024) #define AMD_DFLT_L2_CACHE_SIZE (1024 * 1024) -#define AMD_DFLT_L2_HALF_CACHE_SIZE (512 * 1024) #ifdef _ASM .extern .memops_method @@ -66,7 +66,7 @@ void __libc_get_cpuid(int cpuid_function, void *out_reg, int cache_index); void __intel_set_memops_method(long sse_level); -void __intel_set_cache_sizes(long l1_cache_size, long l2_cache_size, +void __set_cache_sizes(long l1_cache_size, long l2_cache_size, long largest_level_cache); #endif /* _ASM */
--- a/usr/src/lib/libc/amd64/gen/proc64_support.s Thu Jul 02 12:58:38 2009 -0400 +++ b/usr/src/lib/libc/amd64/gen/proc64_support.s Thu Jul 02 10:30:52 2009 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,6 +30,10 @@ */ /* + * Portions Copyright 2009 Advanced Micro Devices, Inc. + */ + +/* * Assembler support routines to getcpuid information used to set * cache size information. Cache information used by memset, strcpy, etc.. */ @@ -40,15 +44,26 @@ #include "proc64_id.h" .global .memops_method + .global .amd64cache1, .amd64cache1half, .amd64cache2, .amd64cache2half + .global .largest_level_cache_size + /* - * Defaults for Core 2 Duo + * Defaults for Core 2 Duo and AMD's SledgeHammer */ .data .balign 8 .memops_method: .int NO_SSE + .balign 8 +.amd64cache1: .quad AMD_DFLT_L1_CACHE_SIZE +.amd64cache1half: .quad AMD_DFLT_L1_CACHE_SIZE/2 +.amd64cache2: .quad AMD_DFLT_L2_CACHE_SIZE +.amd64cache2half: .quad AMD_DFLT_L2_CACHE_SIZE/2 +.largest_level_cache_size: + .int AMD_DFLT_L2_CACHE_SIZE + /* * Get cpuid data. * (void)__libc_get_cpuid(int cpuid_function, void *out_reg, int cache_index ) @@ -80,9 +95,10 @@ /* * Set cache info global variables used by various libc primitives. - * __intel_set_cache_sizes(long l1_cache_size, long l2_cache_size, long largest_level_cache); + * __set_cache_sizes(long l1_cache_size, long l2_cache_size, + * long largest_level_cache); */ - ENTRY(__intel_set_cache_sizes) + ENTRY(__set_cache_sizes) # rdi = l1_cache_size, rsi = l2_cache_size, rdx = largest_level_cache mov %rdi,.amd64cache1(%rip) @@ -95,4 +111,4 @@ mov %rdx,.largest_level_cache_size(%rip) ret - SET_SIZE(__intel_set_cache_sizes) + SET_SIZE(__set_cache_sizes)