--- old/src/os_cpu/linux_x86/vm/linux_x86_32.s Tue Sep 13 12:29:17 2011 +++ /dev/null Tue Sep 13 12:28:54 2011 @@ -1,668 +0,0 @@ -# -# Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# - - - # NOTE WELL! The _Copy functions are called directly - # from server-compiler-generated code via CallLeafNoFP, - # which means that they *must* either not use floating - # point or use it in the same manner as does the server - # compiler. - - .globl _Copy_conjoint_bytes - .globl _Copy_arrayof_conjoint_bytes - .globl _Copy_conjoint_jshorts_atomic - .globl _Copy_arrayof_conjoint_jshorts - .globl _Copy_conjoint_jints_atomic - .globl _Copy_arrayof_conjoint_jints - .globl _Copy_conjoint_jlongs_atomic - .globl _mmx_Copy_arrayof_conjoint_jshorts - - .globl _Atomic_cmpxchg_long - .globl _Atomic_move_long - - .text - - .globl SafeFetch32, Fetch32PFI, Fetch32Resume - .globl SafeFetchN - ## TODO: avoid exposing Fetch32PFI and Fetch32Resume. - ## Instead, the signal handler would call a new SafeFetchTriage(FaultingEIP) - ## routine to vet the address. If the address is the faulting LD then - ## SafeFetchTriage() would return the resume-at EIP, otherwise null. - .type SafeFetch32,@function - .p2align 4,,15 -SafeFetch32: -SafeFetchN: - movl 0x8(%esp), %eax - movl 0x4(%esp), %ecx -Fetch32PFI: - movl (%ecx), %eax -Fetch32Resume: - ret - - - .globl SpinPause - .type SpinPause,@function - .p2align 4,,15 -SpinPause: - rep - nop - movl $1, %eax - ret - - # Support for void Copy::conjoint_bytes(void* from, - # void* to, - # size_t count) - .p2align 4,,15 - .type _Copy_conjoint_bytes,@function -_Copy_conjoint_bytes: - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -1(%esi,%ecx),%eax # from + count - 1 - jbe cb_CopyRight - cmpl %eax,%edi - jbe cb_CopyLeft - # copy from low to high -cb_CopyRight: - cmpl $3,%ecx - jbe 5f # <= 3 bytes - # align source address at dword address boundary - movl %ecx,%eax # original count - movl $4,%ecx - subl %esi,%ecx - andl $3,%ecx # prefix byte count - jz 1f # no prefix - subl %ecx,%eax # byte count less prefix - # copy prefix - subl %esi,%edi -0: movb (%esi),%dl - movb %dl,(%edi,%esi,1) - addl $1,%esi - subl $1,%ecx - jnz 0b - addl %esi,%edi -1: movl %eax,%ecx # byte count less prefix - shrl $2,%ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - jbe 2f # <= 32 dwords - # copy aligned dwords - rep; smovl - jmp 4f - # copy aligned dwords -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: movl %eax,%ecx # byte count less prefix -5: andl $3,%ecx # suffix byte count - jz 7f # no suffix - # copy suffix - xorl %eax,%eax -6: movb (%esi,%eax,1),%dl - movb %dl,(%edi,%eax,1) - addl $1,%eax - subl $1,%ecx - jnz 6b -7: popl %edi - popl %esi - ret - # copy from high to low -cb_CopyLeft: - std - leal -4(%edi,%ecx),%edi # to + count - 4 - movl %eax,%esi # from + count - 1 - movl %ecx,%eax - subl $3,%esi # from + count - 4 - cmpl $3,%ecx - jbe 5f # <= 3 bytes -1: shrl $2,%ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - ja 3f # > 32 dwords - # copy dwords, aligned or not - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f - # copy dwords, aligned or not -3: rep; smovl -4: movl %eax,%ecx # byte count -5: andl $3,%ecx # suffix byte count - jz 7f # no suffix - # copy suffix - subl %esi,%edi - addl $3,%esi -6: movb (%esi),%dl - movb %dl,(%edi,%esi,1) - subl $1,%esi - subl $1,%ecx - jnz 6b -7: cld - popl %edi - popl %esi - ret - - # Support for void Copy::arrayof_conjoint_bytes(void* from, - # void* to, - # size_t count) - # - # Same as _Copy_conjoint_bytes, except no source alignment check. - .p2align 4,,15 - .type _Copy_arrayof_conjoint_bytes,@function -_Copy_arrayof_conjoint_bytes: - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -1(%esi,%ecx),%eax # from + count - 1 - jbe acb_CopyRight - cmpl %eax,%edi - jbe acb_CopyLeft - # copy from low to high -acb_CopyRight: - cmpl $3,%ecx - jbe 5f -1: movl %ecx,%eax - shrl $2,%ecx - jz 4f - cmpl $32,%ecx - ja 3f - # copy aligned dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f - # copy aligned dwords -3: rep; smovl -4: movl %eax,%ecx -5: andl $3,%ecx - jz 7f - # copy suffix - xorl %eax,%eax -6: movb (%esi,%eax,1),%dl - movb %dl,(%edi,%eax,1) - addl $1,%eax - subl $1,%ecx - jnz 6b -7: popl %edi - popl %esi - ret -acb_CopyLeft: - std - leal -4(%edi,%ecx),%edi # to + count - 4 - movl %eax,%esi # from + count - 1 - movl %ecx,%eax - subl $3,%esi # from + count - 4 - cmpl $3,%ecx - jbe 5f -1: shrl $2,%ecx - jz 4f - cmpl $32,%ecx - jbe 2f # <= 32 dwords - rep; smovl - jmp 4f - .=.+8 -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: movl %eax,%ecx -5: andl $3,%ecx - jz 7f - subl %esi,%edi - addl $3,%esi -6: movb (%esi),%dl - movb %dl,(%edi,%esi,1) - subl $1,%esi - subl $1,%ecx - jnz 6b -7: cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jshorts_atomic(void* from, - # void* to, - # size_t count) - .p2align 4,,15 - .type _Copy_conjoint_jshorts_atomic,@function -_Copy_conjoint_jshorts_atomic: - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 - jbe cs_CopyRight - cmpl %eax,%edi - jbe cs_CopyLeft - # copy from low to high -cs_CopyRight: - # align source address at dword address boundary - movl %esi,%eax # original from - andl $3,%eax # either 0 or 2 - jz 1f # no prefix - # copy prefix - subl $1,%ecx - jl 5f # zero count - movw (%esi),%dx - movw %dx,(%edi) - addl %eax,%esi # %eax == 2 - addl %eax,%edi -1: movl %ecx,%eax # word count less prefix - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - jbe 2f # <= 32 dwords - # copy aligned dwords - rep; smovl - jmp 4f - # copy aligned dwords -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - movw (%esi),%dx - movw %dx,(%edi) -5: popl %edi - popl %esi - ret - # copy from high to low -cs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 - movl %eax,%esi # from + count*2 - 2 - movl %ecx,%eax - subl $2,%esi # from + count*2 - 4 -1: sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - ja 3f # > 32 dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - addl $2,%esi - addl $2,%edi - movw (%esi),%dx - movw %dx,(%edi) -5: cld - popl %edi - popl %esi - ret - - # Support for void Copy::arrayof_conjoint_jshorts(void* from, - # void* to, - # size_t count) - .p2align 4,,15 - .type _Copy_arrayof_conjoint_jshorts,@function -_Copy_arrayof_conjoint_jshorts: - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 - jbe acs_CopyRight - cmpl %eax,%edi - jbe acs_CopyLeft -acs_CopyRight: - movl %ecx,%eax # word count - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - jbe 2f # <= 32 dwords - # copy aligned dwords - rep; smovl - jmp 4f - # copy aligned dwords - .=.+5 -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - movw (%esi),%dx - movw %dx,(%edi) -5: popl %edi - popl %esi - ret -acs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 - movl %eax,%esi # from + count*2 - 2 - movl %ecx,%eax - subl $2,%esi # from + count*2 - 4 - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - ja 3f # > 32 dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - addl $2,%esi - addl $2,%edi - movw (%esi),%dx - movw %dx,(%edi) -5: cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jints_atomic(void* from, - # void* to, - # size_t count) - # Equivalent to - # arrayof_conjoint_jints - .p2align 4,,15 - .type _Copy_conjoint_jints_atomic,@function - .type _Copy_arrayof_conjoint_jints,@function -_Copy_conjoint_jints_atomic: -_Copy_arrayof_conjoint_jints: - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 - jbe ci_CopyRight - cmpl %eax,%edi - jbe ci_CopyLeft -ci_CopyRight: - cmpl $32,%ecx - jbe 2f # <= 32 dwords - rep; smovl - popl %edi - popl %esi - ret - .=.+10 -2: subl %esi,%edi - jmp 4f - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi -4: subl $1,%ecx - jge 3b - popl %edi - popl %esi - ret -ci_CopyLeft: - std - leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 - cmpl $32,%ecx - ja 4f # > 32 dwords - subl %eax,%edi # eax == from + count*4 - 4 - jmp 3f - .p2align 4,,15 -2: movl (%eax),%edx - movl %edx,(%edi,%eax,1) - subl $4,%eax -3: subl $1,%ecx - jge 2b - cld - popl %edi - popl %esi - ret -4: movl %eax,%esi # from + count*4 - 4 - rep; smovl - cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jlongs_atomic(jlong* from, - # jlong* to, - # size_t count) - # - # 32-bit - # - # count treated as signed - # - # if (from > to) { - # while (--count >= 0) { - # *to++ = *from++; - # } - # } else { - # while (--count >= 0) { - # to[count] = from[count]; - # } - # } - .p2align 4,,15 - .type _Copy_conjoint_jlongs_atomic,@function -_Copy_conjoint_jlongs_atomic: - movl 4+8(%esp),%ecx # count - movl 4+0(%esp),%eax # from - movl 4+4(%esp),%edx # to - cmpl %eax,%edx - jae cla_CopyLeft -cla_CopyRight: - subl %eax,%edx - jmp 2f - .p2align 4,,15 -1: fildll (%eax) - fistpll (%edx,%eax,1) - addl $8,%eax -2: subl $1,%ecx - jge 1b - ret - .p2align 4,,15 -3: fildll (%eax,%ecx,8) - fistpll (%edx,%ecx,8) -cla_CopyLeft: - subl $1,%ecx - jge 3b - ret - - # Support for void Copy::arrayof_conjoint_jshorts(void* from, - # void* to, - # size_t count) - .p2align 4,,15 - .type _mmx_Copy_arrayof_conjoint_jshorts,@function -_mmx_Copy_arrayof_conjoint_jshorts: - pushl %esi - movl 4+12(%esp),%ecx - pushl %edi - movl 8+ 4(%esp),%esi - movl 8+ 8(%esp),%edi - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax - jbe mmx_acs_CopyRight - cmpl %eax,%edi - jbe mmx_acs_CopyLeft -mmx_acs_CopyRight: - movl %ecx,%eax - sarl %ecx - je 5f - cmpl $33,%ecx - jae 3f -1: subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 5f -3: smovl # align to 8 bytes, we know we are 4 byte aligned to start - subl $1,%ecx -4: .p2align 4,,15 - movq 0(%esi),%mm0 - addl $64,%edi - movq 8(%esi),%mm1 - subl $16,%ecx - movq 16(%esi),%mm2 - movq %mm0,-64(%edi) - movq 24(%esi),%mm0 - movq %mm1,-56(%edi) - movq 32(%esi),%mm1 - movq %mm2,-48(%edi) - movq 40(%esi),%mm2 - movq %mm0,-40(%edi) - movq 48(%esi),%mm0 - movq %mm1,-32(%edi) - movq 56(%esi),%mm1 - movq %mm2,-24(%edi) - movq %mm0,-16(%edi) - addl $64,%esi - movq %mm1,-8(%edi) - cmpl $16,%ecx - jge 4b - emms - testl %ecx,%ecx - ja 1b -5: andl $1,%eax - je 7f -6: movw (%esi),%dx - movw %dx,(%edi) -7: popl %edi - popl %esi - ret -mmx_acs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi - movl %eax,%esi - movl %ecx,%eax - subl $2,%esi - sarl %ecx - je 4f - cmpl $32,%ecx - ja 3f - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax - je 6f - addl $2,%esi - addl $2,%edi -5: movw (%esi),%dx - movw %dx,(%edi) -6: cld - popl %edi - popl %esi - ret - - - # Support for jlong Atomic::cmpxchg(jlong exchange_value, - # volatile jlong* dest, - # jlong compare_value, - # bool is_MP) - # - .p2align 4,,15 - .type _Atomic_cmpxchg_long,@function -_Atomic_cmpxchg_long: - # 8(%esp) : return PC - pushl %ebx # 4(%esp) : old %ebx - pushl %edi # 0(%esp) : old %edi - movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) - movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) - movl 24(%esp), %eax # 24(%esp) : compare_value (low) - movl 28(%esp), %edx # 28(%esp) : compare_value (high) - movl 20(%esp), %edi # 20(%esp) : dest - cmpl $0, 32(%esp) # 32(%esp) : is_MP - je 1f - lock -1: cmpxchg8b (%edi) - popl %edi - popl %ebx - ret - - - # Support for jlong Atomic::load and Atomic::store. - # void _Atomic_move_long(volatile jlong* src, volatile jlong* dst) - .p2align 4,,15 - .type _Atomic_move_long,@function -_Atomic_move_long: - movl 4(%esp), %eax # src - fildll (%eax) - movl 8(%esp), %eax # dest - fistpll (%eax) - ret - --- /dev/null Tue Sep 13 12:28:54 2011 +++ new/src/os_cpu/bsd_x86/vm/bsd_x86_32.s Tue Sep 13 12:29:17 2011 @@ -0,0 +1,699 @@ +# +# Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + + +#ifdef __APPLE__ +# Darwin uses _ prefixed global symbols +#define SYMBOL(s) _ ## s +#define ELF_TYPE(name, description) +#else +#define SYMBOL(s) s +#define ELF_TYPE(name, description) .type name,description +#endif + + .globl SYMBOL(fixcw) + + # NOTE WELL! The _Copy functions are called directly + # from server-compiler-generated code via CallLeafNoFP, + # which means that they *must* either not use floating + # point or use it in the same manner as does the server + # compiler. + + .globl SYMBOL(_Copy_conjoint_bytes) + .globl SYMBOL(_Copy_arrayof_conjoint_bytes) + .globl SYMBOL(_Copy_conjoint_jshorts_atomic) + .globl SYMBOL(_Copy_arrayof_conjoint_jshorts) + .globl SYMBOL(_Copy_conjoint_jints_atomic) + .globl SYMBOL(_Copy_arrayof_conjoint_jints) + .globl SYMBOL(_Copy_conjoint_jlongs_atomic) + .globl SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts) + + .globl SYMBOL(_Atomic_cmpxchg_long) + .globl SYMBOL(_Atomic_move_long) + + .text + +# Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp +# Set fpu to 53 bit precision. This happens too early to use a stub. +# ported from solaris_x86_32.s +#ifdef __APPLE__ + .align 4 +#else + .align 16 +#endif +SYMBOL(fixcw): + pushl $0x27f + fldcw 0(%esp) + popl %eax + ret + +#ifdef __APPLE__ + .align 4 +#else + .align 16 +#endif + + .globl SYMBOL(SafeFetch32), SYMBOL(Fetch32PFI), SYMBOL(Fetch32Resume) + .globl SYMBOL(SafeFetchN) + ## TODO: avoid exposing Fetch32PFI and Fetch32Resume. + ## Instead, the signal handler would call a new SafeFetchTriage(FaultingEIP) + ## routine to vet the address. If the address is the faulting LD then + ## SafeFetchTriage() would return the resume-at EIP, otherwise null. + ELF_TYPE(SafeFetch32,@function) + .p2align 4,,15 +SYMBOL(SafeFetch32): +SYMBOL(SafeFetchN): + movl 0x8(%esp), %eax + movl 0x4(%esp), %ecx +SYMBOL(Fetch32PFI): + movl (%ecx), %eax +SYMBOL(Fetch32Resume): + ret + + + .globl SYMBOL(SpinPause) + ELF_TYPE(SpinPause,@function) + .p2align 4,,15 +SYMBOL(SpinPause): + rep + nop + movl $1, %eax + ret + + # Support for void Copy::conjoint_bytes(void* from, + # void* to, + # size_t count) + .p2align 4,,15 + ELF_TYPE(_Copy_conjoint_bytes,@function) +SYMBOL(_Copy_conjoint_bytes): + pushl %esi + movl 4+12(%esp),%ecx # count + pushl %edi + movl 8+ 4(%esp),%esi # from + movl 8+ 8(%esp),%edi # to + cmpl %esi,%edi + leal -1(%esi,%ecx),%eax # from + count - 1 + jbe cb_CopyRight + cmpl %eax,%edi + jbe cb_CopyLeft + # copy from low to high +cb_CopyRight: + cmpl $3,%ecx + jbe 5f # <= 3 bytes + # align source address at dword address boundary + movl %ecx,%eax # original count + movl $4,%ecx + subl %esi,%ecx + andl $3,%ecx # prefix byte count + jz 1f # no prefix + subl %ecx,%eax # byte count less prefix + # copy prefix + subl %esi,%edi +0: movb (%esi),%dl + movb %dl,(%edi,%esi,1) + addl $1,%esi + subl $1,%ecx + jnz 0b + addl %esi,%edi +1: movl %eax,%ecx # byte count less prefix + shrl $2,%ecx # dword count + jz 4f # no dwords to move + cmpl $32,%ecx + jbe 2f # <= 32 dwords + # copy aligned dwords + rep; smovl + jmp 4f + # copy aligned dwords +2: subl %esi,%edi + .p2align 4,,15 +3: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + addl $4,%esi + subl $1,%ecx + jnz 3b + addl %esi,%edi +4: movl %eax,%ecx # byte count less prefix +5: andl $3,%ecx # suffix byte count + jz 7f # no suffix + # copy suffix + xorl %eax,%eax +6: movb (%esi,%eax,1),%dl + movb %dl,(%edi,%eax,1) + addl $1,%eax + subl $1,%ecx + jnz 6b +7: popl %edi + popl %esi + ret + # copy from high to low +cb_CopyLeft: + std + leal -4(%edi,%ecx),%edi # to + count - 4 + movl %eax,%esi # from + count - 1 + movl %ecx,%eax + subl $3,%esi # from + count - 4 + cmpl $3,%ecx + jbe 5f # <= 3 bytes +1: shrl $2,%ecx # dword count + jz 4f # no dwords to move + cmpl $32,%ecx + ja 3f # > 32 dwords + # copy dwords, aligned or not + subl %esi,%edi + .p2align 4,,15 +2: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + subl $4,%esi + subl $1,%ecx + jnz 2b + addl %esi,%edi + jmp 4f + # copy dwords, aligned or not +3: rep; smovl +4: movl %eax,%ecx # byte count +5: andl $3,%ecx # suffix byte count + jz 7f # no suffix + # copy suffix + subl %esi,%edi + addl $3,%esi +6: movb (%esi),%dl + movb %dl,(%edi,%esi,1) + subl $1,%esi + subl $1,%ecx + jnz 6b +7: cld + popl %edi + popl %esi + ret + + # Support for void Copy::arrayof_conjoint_bytes(void* from, + # void* to, + # size_t count) + # + # Same as _Copy_conjoint_bytes, except no source alignment check. + .p2align 4,,15 + ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function) +SYMBOL(_Copy_arrayof_conjoint_bytes): + pushl %esi + movl 4+12(%esp),%ecx # count + pushl %edi + movl 8+ 4(%esp),%esi # from + movl 8+ 8(%esp),%edi # to + cmpl %esi,%edi + leal -1(%esi,%ecx),%eax # from + count - 1 + jbe acb_CopyRight + cmpl %eax,%edi + jbe acb_CopyLeft + # copy from low to high +acb_CopyRight: + cmpl $3,%ecx + jbe 5f +1: movl %ecx,%eax + shrl $2,%ecx + jz 4f + cmpl $32,%ecx + ja 3f + # copy aligned dwords + subl %esi,%edi + .p2align 4,,15 +2: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + addl $4,%esi + subl $1,%ecx + jnz 2b + addl %esi,%edi + jmp 4f + # copy aligned dwords +3: rep; smovl +4: movl %eax,%ecx +5: andl $3,%ecx + jz 7f + # copy suffix + xorl %eax,%eax +6: movb (%esi,%eax,1),%dl + movb %dl,(%edi,%eax,1) + addl $1,%eax + subl $1,%ecx + jnz 6b +7: popl %edi + popl %esi + ret +acb_CopyLeft: + std + leal -4(%edi,%ecx),%edi # to + count - 4 + movl %eax,%esi # from + count - 1 + movl %ecx,%eax + subl $3,%esi # from + count - 4 + cmpl $3,%ecx + jbe 5f +1: shrl $2,%ecx + jz 4f + cmpl $32,%ecx + jbe 2f # <= 32 dwords + rep; smovl + jmp 4f + .=.+8 +2: subl %esi,%edi + .p2align 4,,15 +3: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + subl $4,%esi + subl $1,%ecx + jnz 3b + addl %esi,%edi +4: movl %eax,%ecx +5: andl $3,%ecx + jz 7f + subl %esi,%edi + addl $3,%esi +6: movb (%esi),%dl + movb %dl,(%edi,%esi,1) + subl $1,%esi + subl $1,%ecx + jnz 6b +7: cld + popl %edi + popl %esi + ret + + # Support for void Copy::conjoint_jshorts_atomic(void* from, + # void* to, + # size_t count) + .p2align 4,,15 + ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function) +SYMBOL(_Copy_conjoint_jshorts_atomic): + pushl %esi + movl 4+12(%esp),%ecx # count + pushl %edi + movl 8+ 4(%esp),%esi # from + movl 8+ 8(%esp),%edi # to + cmpl %esi,%edi + leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 + jbe cs_CopyRight + cmpl %eax,%edi + jbe cs_CopyLeft + # copy from low to high +cs_CopyRight: + # align source address at dword address boundary + movl %esi,%eax # original from + andl $3,%eax # either 0 or 2 + jz 1f # no prefix + # copy prefix + subl $1,%ecx + jl 5f # zero count + movw (%esi),%dx + movw %dx,(%edi) + addl %eax,%esi # %eax == 2 + addl %eax,%edi +1: movl %ecx,%eax # word count less prefix + sarl %ecx # dword count + jz 4f # no dwords to move + cmpl $32,%ecx + jbe 2f # <= 32 dwords + # copy aligned dwords + rep; smovl + jmp 4f + # copy aligned dwords +2: subl %esi,%edi + .p2align 4,,15 +3: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + addl $4,%esi + subl $1,%ecx + jnz 3b + addl %esi,%edi +4: andl $1,%eax # suffix count + jz 5f # no suffix + # copy suffix + movw (%esi),%dx + movw %dx,(%edi) +5: popl %edi + popl %esi + ret + # copy from high to low +cs_CopyLeft: + std + leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 + movl %eax,%esi # from + count*2 - 2 + movl %ecx,%eax + subl $2,%esi # from + count*2 - 4 +1: sarl %ecx # dword count + jz 4f # no dwords to move + cmpl $32,%ecx + ja 3f # > 32 dwords + subl %esi,%edi + .p2align 4,,15 +2: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + subl $4,%esi + subl $1,%ecx + jnz 2b + addl %esi,%edi + jmp 4f +3: rep; smovl +4: andl $1,%eax # suffix count + jz 5f # no suffix + # copy suffix + addl $2,%esi + addl $2,%edi + movw (%esi),%dx + movw %dx,(%edi) +5: cld + popl %edi + popl %esi + ret + + # Support for void Copy::arrayof_conjoint_jshorts(void* from, + # void* to, + # size_t count) + .p2align 4,,15 + ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function) +SYMBOL(_Copy_arrayof_conjoint_jshorts): + pushl %esi + movl 4+12(%esp),%ecx # count + pushl %edi + movl 8+ 4(%esp),%esi # from + movl 8+ 8(%esp),%edi # to + cmpl %esi,%edi + leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 + jbe acs_CopyRight + cmpl %eax,%edi + jbe acs_CopyLeft +acs_CopyRight: + movl %ecx,%eax # word count + sarl %ecx # dword count + jz 4f # no dwords to move + cmpl $32,%ecx + jbe 2f # <= 32 dwords + # copy aligned dwords + rep; smovl + jmp 4f + # copy aligned dwords + .=.+5 +2: subl %esi,%edi + .p2align 4,,15 +3: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + addl $4,%esi + subl $1,%ecx + jnz 3b + addl %esi,%edi +4: andl $1,%eax # suffix count + jz 5f # no suffix + # copy suffix + movw (%esi),%dx + movw %dx,(%edi) +5: popl %edi + popl %esi + ret +acs_CopyLeft: + std + leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 + movl %eax,%esi # from + count*2 - 2 + movl %ecx,%eax + subl $2,%esi # from + count*2 - 4 + sarl %ecx # dword count + jz 4f # no dwords to move + cmpl $32,%ecx + ja 3f # > 32 dwords + subl %esi,%edi + .p2align 4,,15 +2: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + subl $4,%esi + subl $1,%ecx + jnz 2b + addl %esi,%edi + jmp 4f +3: rep; smovl +4: andl $1,%eax # suffix count + jz 5f # no suffix + # copy suffix + addl $2,%esi + addl $2,%edi + movw (%esi),%dx + movw %dx,(%edi) +5: cld + popl %edi + popl %esi + ret + + # Support for void Copy::conjoint_jints_atomic(void* from, + # void* to, + # size_t count) + # Equivalent to + # arrayof_conjoint_jints + .p2align 4,,15 + ELF_TYPE(_Copy_conjoint_jints_atomic,@function) + ELF_TYPE(_Copy_arrayof_conjoint_jints,@function) +SYMBOL(_Copy_conjoint_jints_atomic): +SYMBOL(_Copy_arrayof_conjoint_jints): + pushl %esi + movl 4+12(%esp),%ecx # count + pushl %edi + movl 8+ 4(%esp),%esi # from + movl 8+ 8(%esp),%edi # to + cmpl %esi,%edi + leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 + jbe ci_CopyRight + cmpl %eax,%edi + jbe ci_CopyLeft +ci_CopyRight: + cmpl $32,%ecx + jbe 2f # <= 32 dwords + rep; smovl + popl %edi + popl %esi + ret + .=.+10 +2: subl %esi,%edi + jmp 4f + .p2align 4,,15 +3: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + addl $4,%esi +4: subl $1,%ecx + jge 3b + popl %edi + popl %esi + ret +ci_CopyLeft: + std + leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 + cmpl $32,%ecx + ja 4f # > 32 dwords + subl %eax,%edi # eax == from + count*4 - 4 + jmp 3f + .p2align 4,,15 +2: movl (%eax),%edx + movl %edx,(%edi,%eax,1) + subl $4,%eax +3: subl $1,%ecx + jge 2b + cld + popl %edi + popl %esi + ret +4: movl %eax,%esi # from + count*4 - 4 + rep; smovl + cld + popl %edi + popl %esi + ret + + # Support for void Copy::conjoint_jlongs_atomic(jlong* from, + # jlong* to, + # size_t count) + # + # 32-bit + # + # count treated as signed + # + # // if (from > to) { + # while (--count >= 0) { + # *to++ = *from++; + # } + # } else { + # while (--count >= 0) { + # to[count] = from[count]; + # } + # } + .p2align 4,,15 + ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function) +SYMBOL(_Copy_conjoint_jlongs_atomic): + movl 4+8(%esp),%ecx # count + movl 4+0(%esp),%eax # from + movl 4+4(%esp),%edx # to + cmpl %eax,%edx + jae cla_CopyLeft +cla_CopyRight: + subl %eax,%edx + jmp 2f + .p2align 4,,15 +1: fildll (%eax) + fistpll (%edx,%eax,1) + addl $8,%eax +2: subl $1,%ecx + jge 1b + ret + .p2align 4,,15 +3: fildll (%eax,%ecx,8) + fistpll (%edx,%ecx,8) +cla_CopyLeft: + subl $1,%ecx + jge 3b + ret + + # Support for void Copy::arrayof_conjoint_jshorts(void* from, + # void* to, + # size_t count) + .p2align 4,,15 + ELF_TYPE(_mmx_Copy_arrayof_conjoint_jshorts,@function) +SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts): + pushl %esi + movl 4+12(%esp),%ecx + pushl %edi + movl 8+ 4(%esp),%esi + movl 8+ 8(%esp),%edi + cmpl %esi,%edi + leal -2(%esi,%ecx,2),%eax + jbe mmx_acs_CopyRight + cmpl %eax,%edi + jbe mmx_acs_CopyLeft +mmx_acs_CopyRight: + movl %ecx,%eax + sarl %ecx + je 5f + cmpl $33,%ecx + jae 3f +1: subl %esi,%edi + .p2align 4,,15 +2: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + addl $4,%esi + subl $1,%ecx + jnz 2b + addl %esi,%edi + jmp 5f +3: smovl # align to 8 bytes, we know we are 4 byte aligned to start + subl $1,%ecx +4: .p2align 4,,15 + movq 0(%esi),%mm0 + addl $64,%edi + movq 8(%esi),%mm1 + subl $16,%ecx + movq 16(%esi),%mm2 + movq %mm0,-64(%edi) + movq 24(%esi),%mm0 + movq %mm1,-56(%edi) + movq 32(%esi),%mm1 + movq %mm2,-48(%edi) + movq 40(%esi),%mm2 + movq %mm0,-40(%edi) + movq 48(%esi),%mm0 + movq %mm1,-32(%edi) + movq 56(%esi),%mm1 + movq %mm2,-24(%edi) + movq %mm0,-16(%edi) + addl $64,%esi + movq %mm1,-8(%edi) + cmpl $16,%ecx + jge 4b + emms + testl %ecx,%ecx + ja 1b +5: andl $1,%eax + je 7f +6: movw (%esi),%dx + movw %dx,(%edi) +7: popl %edi + popl %esi + ret +mmx_acs_CopyLeft: + std + leal -4(%edi,%ecx,2),%edi + movl %eax,%esi + movl %ecx,%eax + subl $2,%esi + sarl %ecx + je 4f + cmpl $32,%ecx + ja 3f + subl %esi,%edi + .p2align 4,,15 +2: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + subl $4,%esi + subl $1,%ecx + jnz 2b + addl %esi,%edi + jmp 4f +3: rep; smovl +4: andl $1,%eax + je 6f + addl $2,%esi + addl $2,%edi +5: movw (%esi),%dx + movw %dx,(%edi) +6: cld + popl %edi + popl %esi + ret + + + # Support for jlong Atomic::cmpxchg(jlong exchange_value, + # volatile jlong* dest, + # jlong compare_value, + # bool is_MP) + # + .p2align 4,,15 + ELF_TYPE(_Atomic_cmpxchg_long,@function) +SYMBOL(_Atomic_cmpxchg_long): + # 8(%esp) : return PC + pushl %ebx # 4(%esp) : old %ebx + pushl %edi # 0(%esp) : old %edi + movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) + movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) + movl 24(%esp), %eax # 24(%esp) : compare_value (low) + movl 28(%esp), %edx # 28(%esp) : compare_value (high) + movl 20(%esp), %edi # 20(%esp) : dest + cmpl $0, 32(%esp) # 32(%esp) : is_MP + je 1f + lock +1: cmpxchg8b (%edi) + popl %edi + popl %ebx + ret + + + # Support for jlong Atomic::load and Atomic::store. + # void _Atomic_move_long(volatile jlong* src, volatile jlong* dst) + .p2align 4,,15 + ELF_TYPE(_Atomic_move_long,@function) +SYMBOL(_Atomic_move_long): + movl 4(%esp), %eax # src + fildll (%eax) + movl 8(%esp), %eax # dest + fistpll (%eax) + ret +