1 # 
   2 # Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
   3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 #
   5 # This code is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License version 2 only, as
   7 # published by the Free Software Foundation.
   8 #
   9 # This code is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 # version 2 for more details (a copy is included in the LICENSE file that
  13 # accompanied this code).
  14 #
  15 # You should have received a copy of the GNU General Public License version
  16 # 2 along with this work; if not, write to the Free Software Foundation,
  17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 #
  19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 # or visit www.oracle.com if you need additional information or have any
  21 # questions.
  22 #
  23 
  24 
  25         # NOTE WELL!  The _Copy functions are called directly
  26         # from server-compiler-generated code via CallLeafNoFP,
  27         # which means that they *must* either not use floating
  28         # point or use it in the same manner as does the server
  29         # compiler.
  30         
  31         .globl _Copy_arrayof_conjoint_bytes
  32         .globl _Copy_arrayof_conjoint_jshorts
  33         .globl _Copy_conjoint_jshorts_atomic
  34         .globl _Copy_arrayof_conjoint_jints
  35         .globl _Copy_conjoint_jints_atomic
  36         .globl _Copy_arrayof_conjoint_jlongs
  37         .globl _Copy_conjoint_jlongs_atomic
  38 
  39         .text
  40 
  41         # Support for void Copy::arrayof_conjoint_bytes(void* from,
  42         #                                               void* to,
  43         #                                               size_t count)
  44         # rdi - from
  45         # rsi - to
  46         # rdx - count, treated as ssize_t
  47         #
  48         .p2align 4,,15
  49         .type    _Copy_arrayof_conjoint_bytes,@function
  50 _Copy_arrayof_conjoint_bytes:
  51         movq     %rdx,%r8             # byte count
  52         shrq     $3,%rdx              # qword count
  53         cmpq     %rdi,%rsi
  54         leaq     -1(%rdi,%r8,1),%rax  # from + bcount*1 - 1
  55         jbe      acb_CopyRight
  56         cmpq     %rax,%rsi
  57         jbe      acb_CopyLeft 
  58 acb_CopyRight:
  59         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
  60         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
  61         negq     %rdx
  62         jmp      7f
  63         .p2align 4,,15
  64 1:      movq     8(%rax,%rdx,8),%rsi
  65         movq     %rsi,8(%rcx,%rdx,8)
  66         addq     $1,%rdx
  67         jnz      1b
  68 2:      testq    $4,%r8               # check for trailing dword
  69         jz       3f
  70         movl     8(%rax),%esi         # copy trailing dword
  71         movl     %esi,8(%rcx)
  72         addq     $4,%rax
  73         addq     $4,%rcx              # original %rsi is trashed, so we
  74                                       #  can't use it as a base register
  75 3:      testq    $2,%r8               # check for trailing word
  76         jz       4f
  77         movw     8(%rax),%si          # copy trailing word
  78         movw     %si,8(%rcx)
  79         addq     $2,%rcx
  80 4:      testq    $1,%r8               # check for trailing byte
  81         jz       5f
  82         movb     -1(%rdi,%r8,1),%al   # copy trailing byte
  83         movb     %al,8(%rcx)
  84 5:      ret
  85         .p2align 4,,15
  86 6:      movq     -24(%rax,%rdx,8),%rsi
  87         movq     %rsi,-24(%rcx,%rdx,8)
  88         movq     -16(%rax,%rdx,8),%rsi
  89         movq     %rsi,-16(%rcx,%rdx,8)
  90         movq     -8(%rax,%rdx,8),%rsi
  91         movq     %rsi,-8(%rcx,%rdx,8)
  92         movq     (%rax,%rdx,8),%rsi
  93         movq     %rsi,(%rcx,%rdx,8)
  94 7:      addq     $4,%rdx
  95         jle      6b
  96         subq     $4,%rdx
  97         jl       1b
  98         jmp      2b
  99 acb_CopyLeft:
 100         testq    $1,%r8               # check for trailing byte
 101         jz       1f
 102         movb     -1(%rdi,%r8,1),%cl   # copy trailing byte
 103         movb     %cl,-1(%rsi,%r8,1)
 104         subq     $1,%r8               # adjust for possible trailing word
 105 1:      testq    $2,%r8               # check for trailing word
 106         jz       2f
 107         movw     -2(%rdi,%r8,1),%cx   # copy trailing word
 108         movw     %cx,-2(%rsi,%r8,1)
 109 2:      testq    $4,%r8               # check for trailing dword
 110         jz       5f
 111         movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
 112         movl     %ecx,(%rsi,%rdx,8)
 113         jmp      5f
 114         .p2align 4,,15
 115 3:      movq     -8(%rdi,%rdx,8),%rcx
 116         movq     %rcx,-8(%rsi,%rdx,8)
 117         subq     $1,%rdx
 118         jnz      3b
 119         ret
 120         .p2align 4,,15
 121 4:      movq     24(%rdi,%rdx,8),%rcx
 122         movq     %rcx,24(%rsi,%rdx,8)
 123         movq     16(%rdi,%rdx,8),%rcx
 124         movq     %rcx,16(%rsi,%rdx,8)
 125         movq     8(%rdi,%rdx,8),%rcx
 126         movq     %rcx,8(%rsi,%rdx,8)
 127         movq     (%rdi,%rdx,8),%rcx
 128         movq     %rcx,(%rsi,%rdx,8)
 129 5:      subq     $4,%rdx
 130         jge      4b
 131         addq     $4,%rdx
 132         jg       3b
 133         ret
 134 
 135         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 136         #                                                 void* to,
 137         #                                                 size_t count)
 138         # Equivalent to
 139         #   conjoint_jshorts_atomic
 140         #
 141         # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
 142         # let the hardware handle it.  The tow or four words within dwords
 143         # or qwords that span cache line boundaries will still be loaded
 144         # and stored atomically.
 145         #
 146         # rdi - from
 147         # rsi - to
 148         # rdx - count, treated as ssize_t
 149         #
 150         .p2align 4,,15
 151         .type    _Copy_arrayof_conjoint_jshorts,@function
 152         .type    _Copy_conjoint_jshorts_atomic,@function
 153 _Copy_arrayof_conjoint_jshorts:
 154 _Copy_conjoint_jshorts_atomic:
 155         movq     %rdx,%r8             # word count
 156         shrq     $2,%rdx              # qword count
 157         cmpq     %rdi,%rsi
 158         leaq     -2(%rdi,%r8,2),%rax  # from + wcount*2 - 2
 159         jbe      acs_CopyRight
 160         cmpq     %rax,%rsi
 161         jbe      acs_CopyLeft 
 162 acs_CopyRight:
 163         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
 164         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
 165         negq     %rdx
 166         jmp      6f
 167 1:      movq     8(%rax,%rdx,8),%rsi
 168         movq     %rsi,8(%rcx,%rdx,8)
 169         addq     $1,%rdx
 170         jnz      1b
 171 2:      testq    $2,%r8               # check for trailing dword
 172         jz       3f
 173         movl     8(%rax),%esi         # copy trailing dword
 174         movl     %esi,8(%rcx)
 175         addq     $4,%rcx              # original %rsi is trashed, so we
 176                                       #  can't use it as a base register
 177 3:      testq    $1,%r8               # check for trailing word
 178         jz       4f
 179         movw     -2(%rdi,%r8,2),%si   # copy trailing word
 180         movw     %si,8(%rcx)
 181 4:      ret
 182         .p2align 4,,15
 183 5:      movq     -24(%rax,%rdx,8),%rsi
 184         movq     %rsi,-24(%rcx,%rdx,8)
 185         movq     -16(%rax,%rdx,8),%rsi
 186         movq     %rsi,-16(%rcx,%rdx,8)
 187         movq     -8(%rax,%rdx,8),%rsi
 188         movq     %rsi,-8(%rcx,%rdx,8)
 189         movq     (%rax,%rdx,8),%rsi
 190         movq     %rsi,(%rcx,%rdx,8)
 191 6:      addq     $4,%rdx
 192         jle      5b
 193         subq     $4,%rdx
 194         jl       1b
 195         jmp      2b
 196 acs_CopyLeft:
 197         testq    $1,%r8               # check for trailing word
 198         jz       1f
 199         movw     -2(%rdi,%r8,2),%cx   # copy trailing word
 200         movw     %cx,-2(%rsi,%r8,2)
 201 1:      testq    $2,%r8               # check for trailing dword
 202         jz       4f
 203         movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
 204         movl     %ecx,(%rsi,%rdx,8)
 205         jmp      4f
 206 2:      movq     -8(%rdi,%rdx,8),%rcx
 207         movq     %rcx,-8(%rsi,%rdx,8)
 208         subq     $1,%rdx
 209         jnz      2b
 210         ret
 211         .p2align 4,,15
 212 3:      movq     24(%rdi,%rdx,8),%rcx
 213         movq     %rcx,24(%rsi,%rdx,8)
 214         movq     16(%rdi,%rdx,8),%rcx
 215         movq     %rcx,16(%rsi,%rdx,8)
 216         movq     8(%rdi,%rdx,8),%rcx
 217         movq     %rcx,8(%rsi,%rdx,8)
 218         movq     (%rdi,%rdx,8),%rcx
 219         movq     %rcx,(%rsi,%rdx,8)
 220 4:      subq     $4,%rdx
 221         jge      3b
 222         addq     $4,%rdx
 223         jg       2b
 224         ret
 225 
 226         # Support for void Copy::arrayof_conjoint_jints(jint* from,
 227         #                                               jint* to,
 228         #                                               size_t count)
 229         # Equivalent to
 230         #   conjoint_jints_atomic
 231         #
 232         # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 233         # the hardware handle it.  The two dwords within qwords that span
 234         # cache line boundaries will still be loaded and stored atomically.
 235         #
 236         # rdi - from
 237         # rsi - to
 238         # rdx - count, treated as ssize_t
 239         #
 240         .p2align 4,,15
 241         .type    _Copy_arrayof_conjoint_jints,@function
 242         .type    _Copy_conjoint_jints_atomic,@function
 243 _Copy_arrayof_conjoint_jints:
 244 _Copy_conjoint_jints_atomic:
 245         movq     %rdx,%r8             # dword count
 246         shrq     %rdx                 # qword count
 247         cmpq     %rdi,%rsi
 248         leaq     -4(%rdi,%r8,4),%rax  # from + dcount*4 - 4
 249         jbe      aci_CopyRight
 250         cmpq     %rax,%rsi
 251         jbe      aci_CopyLeft 
 252 aci_CopyRight:
 253         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
 254         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
 255         negq     %rdx
 256         jmp      5f
 257         .p2align 4,,15
 258 1:      movq     8(%rax,%rdx,8),%rsi
 259         movq     %rsi,8(%rcx,%rdx,8)
 260         addq     $1,%rdx
 261         jnz       1b
 262 2:      testq    $1,%r8               # check for trailing dword
 263         jz       3f
 264         movl     8(%rax),%esi         # copy trailing dword
 265         movl     %esi,8(%rcx)
 266 3:      ret
 267         .p2align 4,,15
 268 4:      movq     -24(%rax,%rdx,8),%rsi
 269         movq     %rsi,-24(%rcx,%rdx,8)
 270         movq     -16(%rax,%rdx,8),%rsi
 271         movq     %rsi,-16(%rcx,%rdx,8)
 272         movq     -8(%rax,%rdx,8),%rsi
 273         movq     %rsi,-8(%rcx,%rdx,8)
 274         movq     (%rax,%rdx,8),%rsi
 275         movq     %rsi,(%rcx,%rdx,8)
 276 5:      addq     $4,%rdx
 277         jle      4b
 278         subq     $4,%rdx
 279         jl       1b
 280         jmp      2b
 281 aci_CopyLeft:
 282         testq    $1,%r8               # check for trailing dword
 283         jz       3f
 284         movl     -4(%rdi,%r8,4),%ecx  # copy trailing dword
 285         movl     %ecx,-4(%rsi,%r8,4)
 286         jmp      3f
 287 1:      movq     -8(%rdi,%rdx,8),%rcx
 288         movq     %rcx,-8(%rsi,%rdx,8)
 289         subq     $1,%rdx
 290         jnz      1b
 291         ret
 292         .p2align 4,,15
 293 2:      movq     24(%rdi,%rdx,8),%rcx
 294         movq     %rcx,24(%rsi,%rdx,8)
 295         movq     16(%rdi,%rdx,8),%rcx
 296         movq     %rcx,16(%rsi,%rdx,8)
 297         movq     8(%rdi,%rdx,8),%rcx
 298         movq     %rcx,8(%rsi,%rdx,8)
 299         movq     (%rdi,%rdx,8),%rcx
 300         movq     %rcx,(%rsi,%rdx,8)
 301 3:      subq     $4,%rdx
 302         jge      2b
 303         addq     $4,%rdx
 304         jg       1b
 305         ret
 306 
 307         # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
 308         #                                                jlong* to,
 309         #                                                size_t count)
 310         # Equivalent to
 311         #   conjoint_jlongs_atomic
 312         #   arrayof_conjoint_oops
 313         #   conjoint_oops_atomic
 314         #
 315         # rdi - from
 316         # rsi - to
 317         # rdx - count, treated as ssize_t
 318         #
 319         .p2align 4,,15
 320         .type    _Copy_arrayof_conjoint_jlongs,@function
 321         .type    _Copy_conjoint_jlongs_atomic,@function
 322 _Copy_arrayof_conjoint_jlongs:
 323 _Copy_conjoint_jlongs_atomic:
 324         cmpq     %rdi,%rsi
 325         leaq     -8(%rdi,%rdx,8),%rax # from + count*8 - 8
 326         jbe      acl_CopyRight
 327         cmpq     %rax,%rsi
 328         jbe      acl_CopyLeft 
 329 acl_CopyRight:
 330         leaq     -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
 331         negq     %rdx
 332         jmp      3f
 333 1:      movq     8(%rax,%rdx,8),%rsi
 334         movq     %rsi,8(%rcx,%rdx,8)
 335         addq     $1,%rdx
 336         jnz      1b
 337         ret
 338         .p2align 4,,15
 339 2:      movq     -24(%rax,%rdx,8),%rsi
 340         movq     %rsi,-24(%rcx,%rdx,8)
 341         movq     -16(%rax,%rdx,8),%rsi
 342         movq     %rsi,-16(%rcx,%rdx,8)
 343         movq     -8(%rax,%rdx,8),%rsi
 344         movq     %rsi,-8(%rcx,%rdx,8)
 345         movq     (%rax,%rdx,8),%rsi
 346         movq     %rsi,(%rcx,%rdx,8)
 347 3:      addq     $4,%rdx
 348         jle      2b
 349         subq     $4,%rdx
 350         jl       1b
 351         ret
 352 4:      movq     -8(%rdi,%rdx,8),%rcx
 353         movq     %rcx,-8(%rsi,%rdx,8)
 354         subq     $1,%rdx
 355         jnz      4b
 356         ret
 357         .p2align 4,,15
 358 5:      movq     24(%rdi,%rdx,8),%rcx
 359         movq     %rcx,24(%rsi,%rdx,8)
 360         movq     16(%rdi,%rdx,8),%rcx
 361         movq     %rcx,16(%rsi,%rdx,8)
 362         movq     8(%rdi,%rdx,8),%rcx
 363         movq     %rcx,8(%rsi,%rdx,8)
 364         movq     (%rdi,%rdx,8),%rcx
 365         movq     %rcx,(%rsi,%rdx,8)
 366 acl_CopyLeft:
 367         subq     $4,%rdx
 368         jge      5b
 369         addq     $4,%rdx
 370         jg       4b
 371         ret