1 # 
   2 # Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
   3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 #
   5 # This code is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License version 2 only, as
   7 # published by the Free Software Foundation.
   8 #
   9 # This code is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 # version 2 for more details (a copy is included in the LICENSE file that
  13 # accompanied this code).
  14 #
  15 # You should have received a copy of the GNU General Public License version
  16 # 2 along with this work; if not, write to the Free Software Foundation,
  17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 #
  19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 # or visit www.oracle.com if you need additional information or have any
  21 # questions.
  22 #
  23 
  24 #ifdef __APPLE__
  25 # Darwin uses _ prefixed global symbols
  26 #define SYMBOL(s) _ ## s
  27 #define ELF_TYPE(name, description)
  28 #else
  29 #define SYMBOL(s) s
  30 #define ELF_TYPE(name, description) .type name,description
  31 #endif
  32 
  33         # NOTE WELL!  The _Copy functions are called directly
  34         # from server-compiler-generated code via CallLeafNoFP,
  35         # which means that they *must* either not use floating
  36         # point or use it in the same manner as does the server
  37         # compiler.
  38         
  39         .globl SYMBOL(_Copy_arrayof_conjoint_bytes)
  40         .globl SYMBOL(_Copy_arrayof_conjoint_jshorts)
  41         .globl SYMBOL(_Copy_conjoint_jshorts_atomic)
  42         .globl SYMBOL(_Copy_arrayof_conjoint_jints)
  43         .globl SYMBOL(_Copy_conjoint_jints_atomic)
  44         .globl SYMBOL(_Copy_arrayof_conjoint_jlongs)
  45         .globl SYMBOL(_Copy_conjoint_jlongs_atomic)
  46 
  47         .text
  48 
  49         # Support for void Copy::arrayof_conjoint_bytes(void* from,
  50         #                                               void* to,
  51         #                                               size_t count)
  52         # rdi - from
  53         # rsi - to
  54         # rdx - count, treated as ssize_t
  55         #
  56         .p2align 4,,15
  57         ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function)
  58 SYMBOL(_Copy_arrayof_conjoint_bytes):
  59         movq     %rdx,%r8             # byte count
  60         shrq     $3,%rdx              # qword count
  61         cmpq     %rdi,%rsi
  62         leaq     -1(%rdi,%r8,1),%rax  # from + bcount*1 - 1
  63         jbe      acb_CopyRight
  64         cmpq     %rax,%rsi
  65         jbe      acb_CopyLeft 
  66 acb_CopyRight:
  67         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
  68         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
  69         negq     %rdx
  70         jmp      7f
  71         .p2align 4,,15
  72 1:      movq     8(%rax,%rdx,8),%rsi
  73         movq     %rsi,8(%rcx,%rdx,8)
  74         addq     $1,%rdx
  75         jnz      1b
  76 2:      testq    $4,%r8               # check for trailing dword
  77         jz       3f
  78         movl     8(%rax),%esi         # copy trailing dword
  79         movl     %esi,8(%rcx)
  80         addq     $4,%rax
  81         addq     $4,%rcx              # original %rsi is trashed, so we
  82                                       #  can't use it as a base register
  83 3:      testq    $2,%r8               # check for trailing word
  84         jz       4f
  85         movw     8(%rax),%si          # copy trailing word
  86         movw     %si,8(%rcx)
  87         addq     $2,%rcx
  88 4:      testq    $1,%r8               # check for trailing byte
  89         jz       5f
  90         movb     -1(%rdi,%r8,1),%al   # copy trailing byte
  91         movb     %al,8(%rcx)
  92 5:      ret
  93         .p2align 4,,15
  94 6:      movq     -24(%rax,%rdx,8),%rsi
  95         movq     %rsi,-24(%rcx,%rdx,8)
  96         movq     -16(%rax,%rdx,8),%rsi
  97         movq     %rsi,-16(%rcx,%rdx,8)
  98         movq     -8(%rax,%rdx,8),%rsi
  99         movq     %rsi,-8(%rcx,%rdx,8)
 100         movq     (%rax,%rdx,8),%rsi
 101         movq     %rsi,(%rcx,%rdx,8)
 102 7:      addq     $4,%rdx
 103         jle      6b
 104         subq     $4,%rdx
 105         jl       1b
 106         jmp      2b
 107 acb_CopyLeft:
 108         testq    $1,%r8               # check for trailing byte
 109         jz       1f
 110         movb     -1(%rdi,%r8,1),%cl   # copy trailing byte
 111         movb     %cl,-1(%rsi,%r8,1)
 112         subq     $1,%r8               # adjust for possible trailing word
 113 1:      testq    $2,%r8               # check for trailing word
 114         jz       2f
 115         movw     -2(%rdi,%r8,1),%cx   # copy trailing word
 116         movw     %cx,-2(%rsi,%r8,1)
 117 2:      testq    $4,%r8               # check for trailing dword
 118         jz       5f
 119         movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
 120         movl     %ecx,(%rsi,%rdx,8)
 121         jmp      5f
 122         .p2align 4,,15
 123 3:      movq     -8(%rdi,%rdx,8),%rcx
 124         movq     %rcx,-8(%rsi,%rdx,8)
 125         subq     $1,%rdx
 126         jnz      3b
 127         ret
 128         .p2align 4,,15
 129 4:      movq     24(%rdi,%rdx,8),%rcx
 130         movq     %rcx,24(%rsi,%rdx,8)
 131         movq     16(%rdi,%rdx,8),%rcx
 132         movq     %rcx,16(%rsi,%rdx,8)
 133         movq     8(%rdi,%rdx,8),%rcx
 134         movq     %rcx,8(%rsi,%rdx,8)
 135         movq     (%rdi,%rdx,8),%rcx
 136         movq     %rcx,(%rsi,%rdx,8)
 137 5:      subq     $4,%rdx
 138         jge      4b
 139         addq     $4,%rdx
 140         jg       3b
 141         ret
 142 
 143         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 144         #                                                 void* to,
 145         #                                                 size_t count)
 146         # Equivalent to
 147         #   conjoint_jshorts_atomic
 148         #
 149         # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
 150         # let the hardware handle it.  The tow or four words within dwords
 151         # or qwords that span cache line boundaries will still be loaded
 152         # and stored atomically.
 153         #
 154         # rdi - from
 155         # rsi - to
 156         # rdx - count, treated as ssize_t
 157         #
 158         .p2align 4,,15
 159         ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function)
 160         ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function)
 161 SYMBOL(_Copy_arrayof_conjoint_jshorts):
 162 SYMBOL(_Copy_conjoint_jshorts_atomic):
 163         movq     %rdx,%r8             # word count
 164         shrq     $2,%rdx              # qword count
 165         cmpq     %rdi,%rsi
 166         leaq     -2(%rdi,%r8,2),%rax  # from + wcount*2 - 2
 167         jbe      acs_CopyRight
 168         cmpq     %rax,%rsi
 169         jbe      acs_CopyLeft 
 170 acs_CopyRight:
 171         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
 172         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
 173         negq     %rdx
 174         jmp      6f
 175 1:      movq     8(%rax,%rdx,8),%rsi
 176         movq     %rsi,8(%rcx,%rdx,8)
 177         addq     $1,%rdx
 178         jnz      1b
 179 2:      testq    $2,%r8               # check for trailing dword
 180         jz       3f
 181         movl     8(%rax),%esi         # copy trailing dword
 182         movl     %esi,8(%rcx)
 183         addq     $4,%rcx              # original %rsi is trashed, so we
 184                                       #  can't use it as a base register
 185 3:      testq    $1,%r8               # check for trailing word
 186         jz       4f
 187         movw     -2(%rdi,%r8,2),%si   # copy trailing word
 188         movw     %si,8(%rcx)
 189 4:      ret
 190         .p2align 4,,15
 191 5:      movq     -24(%rax,%rdx,8),%rsi
 192         movq     %rsi,-24(%rcx,%rdx,8)
 193         movq     -16(%rax,%rdx,8),%rsi
 194         movq     %rsi,-16(%rcx,%rdx,8)
 195         movq     -8(%rax,%rdx,8),%rsi
 196         movq     %rsi,-8(%rcx,%rdx,8)
 197         movq     (%rax,%rdx,8),%rsi
 198         movq     %rsi,(%rcx,%rdx,8)
 199 6:      addq     $4,%rdx
 200         jle      5b
 201         subq     $4,%rdx
 202         jl       1b
 203         jmp      2b
 204 acs_CopyLeft:
 205         testq    $1,%r8               # check for trailing word
 206         jz       1f
 207         movw     -2(%rdi,%r8,2),%cx   # copy trailing word
 208         movw     %cx,-2(%rsi,%r8,2)
 209 1:      testq    $2,%r8               # check for trailing dword
 210         jz       4f
 211         movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
 212         movl     %ecx,(%rsi,%rdx,8)
 213         jmp      4f
 214 2:      movq     -8(%rdi,%rdx,8),%rcx
 215         movq     %rcx,-8(%rsi,%rdx,8)
 216         subq     $1,%rdx
 217         jnz      2b
 218         ret
 219         .p2align 4,,15
 220 3:      movq     24(%rdi,%rdx,8),%rcx
 221         movq     %rcx,24(%rsi,%rdx,8)
 222         movq     16(%rdi,%rdx,8),%rcx
 223         movq     %rcx,16(%rsi,%rdx,8)
 224         movq     8(%rdi,%rdx,8),%rcx
 225         movq     %rcx,8(%rsi,%rdx,8)
 226         movq     (%rdi,%rdx,8),%rcx
 227         movq     %rcx,(%rsi,%rdx,8)
 228 4:      subq     $4,%rdx
 229         jge      3b
 230         addq     $4,%rdx
 231         jg       2b
 232         ret
 233 
 234         # Support for void Copy::arrayof_conjoint_jints(jint* from,
 235         #                                               jint* to,
 236         #                                               size_t count)
 237         # Equivalent to
 238         #   conjoint_jints_atomic
 239         #
 240         # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 241         # the hardware handle it.  The two dwords within qwords that span
 242         # cache line boundaries will still be loaded and stored atomically.
 243         #
 244         # rdi - from
 245         # rsi - to
 246         # rdx - count, treated as ssize_t
 247         #
 248         .p2align 4,,15
 249         ELF_TYPE(_Copy_arrayof_conjoint_jints,@function)
 250         ELF_TYPE(_Copy_conjoint_jints_atomic,@function)
 251 SYMBOL(_Copy_arrayof_conjoint_jints):
 252 SYMBOL(_Copy_conjoint_jints_atomic):
 253         movq     %rdx,%r8             # dword count
 254         shrq     %rdx                 # qword count
 255         cmpq     %rdi,%rsi
 256         leaq     -4(%rdi,%r8,4),%rax  # from + dcount*4 - 4
 257         jbe      aci_CopyRight
 258         cmpq     %rax,%rsi
 259         jbe      aci_CopyLeft 
 260 aci_CopyRight:
 261         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
 262         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
 263         negq     %rdx
 264         jmp      5f
 265         .p2align 4,,15
 266 1:      movq     8(%rax,%rdx,8),%rsi
 267         movq     %rsi,8(%rcx,%rdx,8)
 268         addq     $1,%rdx
 269         jnz       1b
 270 2:      testq    $1,%r8               # check for trailing dword
 271         jz       3f
 272         movl     8(%rax),%esi         # copy trailing dword
 273         movl     %esi,8(%rcx)
 274 3:      ret
 275         .p2align 4,,15
 276 4:      movq     -24(%rax,%rdx,8),%rsi
 277         movq     %rsi,-24(%rcx,%rdx,8)
 278         movq     -16(%rax,%rdx,8),%rsi
 279         movq     %rsi,-16(%rcx,%rdx,8)
 280         movq     -8(%rax,%rdx,8),%rsi
 281         movq     %rsi,-8(%rcx,%rdx,8)
 282         movq     (%rax,%rdx,8),%rsi
 283         movq     %rsi,(%rcx,%rdx,8)
 284 5:      addq     $4,%rdx
 285         jle      4b
 286         subq     $4,%rdx
 287         jl       1b
 288         jmp      2b
 289 aci_CopyLeft:
 290         testq    $1,%r8               # check for trailing dword
 291         jz       3f
 292         movl     -4(%rdi,%r8,4),%ecx  # copy trailing dword
 293         movl     %ecx,-4(%rsi,%r8,4)
 294         jmp      3f
 295 1:      movq     -8(%rdi,%rdx,8),%rcx
 296         movq     %rcx,-8(%rsi,%rdx,8)
 297         subq     $1,%rdx
 298         jnz      1b
 299         ret
 300         .p2align 4,,15
 301 2:      movq     24(%rdi,%rdx,8),%rcx
 302         movq     %rcx,24(%rsi,%rdx,8)
 303         movq     16(%rdi,%rdx,8),%rcx
 304         movq     %rcx,16(%rsi,%rdx,8)
 305         movq     8(%rdi,%rdx,8),%rcx
 306         movq     %rcx,8(%rsi,%rdx,8)
 307         movq     (%rdi,%rdx,8),%rcx
 308         movq     %rcx,(%rsi,%rdx,8)
 309 3:      subq     $4,%rdx
 310         jge      2b
 311         addq     $4,%rdx
 312         jg       1b
 313         ret
 314 
 315         # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
 316         #                                                jlong* to,
 317         #                                                size_t count)
 318         # Equivalent to
 319         #   conjoint_jlongs_atomic
 320         #   arrayof_conjoint_oops
 321         #   conjoint_oops_atomic
 322         #
 323         # rdi - from
 324         # rsi - to
 325         # rdx - count, treated as ssize_t
 326         #
 327         .p2align 4,,15
 328         ELF_TYPE(_Copy_arrayof_conjoint_jlongs,@function)
 329         ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function)
 330 SYMBOL(_Copy_arrayof_conjoint_jlongs):
 331 SYMBOL(_Copy_conjoint_jlongs_atomic):
 332         cmpq     %rdi,%rsi
 333         leaq     -8(%rdi,%rdx,8),%rax # from + count*8 - 8
 334         jbe      acl_CopyRight
 335         cmpq     %rax,%rsi
 336         jbe      acl_CopyLeft 
 337 acl_CopyRight:
 338         leaq     -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
 339         negq     %rdx
 340         jmp      3f
 341 1:      movq     8(%rax,%rdx,8),%rsi
 342         movq     %rsi,8(%rcx,%rdx,8)
 343         addq     $1,%rdx
 344         jnz      1b
 345         ret
 346         .p2align 4,,15
 347 2:      movq     -24(%rax,%rdx,8),%rsi
 348         movq     %rsi,-24(%rcx,%rdx,8)
 349         movq     -16(%rax,%rdx,8),%rsi
 350         movq     %rsi,-16(%rcx,%rdx,8)
 351         movq     -8(%rax,%rdx,8),%rsi
 352         movq     %rsi,-8(%rcx,%rdx,8)
 353         movq     (%rax,%rdx,8),%rsi
 354         movq     %rsi,(%rcx,%rdx,8)
 355 3:      addq     $4,%rdx
 356         jle      2b
 357         subq     $4,%rdx
 358         jl       1b
 359         ret
 360 4:      movq     -8(%rdi,%rdx,8),%rcx
 361         movq     %rcx,-8(%rsi,%rdx,8)
 362         subq     $1,%rdx
 363         jnz      4b
 364         ret
 365         .p2align 4,,15
 366 5:      movq     24(%rdi,%rdx,8),%rcx
 367         movq     %rcx,24(%rsi,%rdx,8)
 368         movq     16(%rdi,%rdx,8),%rcx
 369         movq     %rcx,16(%rsi,%rdx,8)
 370         movq     8(%rdi,%rdx,8),%rcx
 371         movq     %rcx,8(%rsi,%rdx,8)
 372         movq     (%rdi,%rdx,8),%rcx
 373         movq     %rcx,(%rsi,%rdx,8)
 374 acl_CopyLeft:
 375         subq     $4,%rdx
 376         jge      5b
 377         addq     $4,%rdx
 378         jg       4b
 379         ret