1 # 
   2 # Copyright (c) 2004, 2007, Oracle and/or its affiliates. All rights reserved.
   3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 #
   5 # This code is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License version 2 only, as
   7 # published by the Free Software Foundation.
   8 #
   9 # This code is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 # version 2 for more details (a copy is included in the LICENSE file that
  13 # accompanied this code).
  14 #
  15 # You should have received a copy of the GNU General Public License version
  16 # 2 along with this work; if not, write to the Free Software Foundation,
  17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 #
  19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 # or visit www.oracle.com if you need additional information or have any
  21 # questions.
  22 #
  23 
  24 
  25         # NOTE WELL!  The _Copy functions are called directly
  26         # from server-compiler-generated code via CallLeafNoFP,
  27         # which means that they *must* either not use floating
  28         # point or use it in the same manner as does the server
  29         # compiler.
  30         
  31         .globl _Copy_arrayof_conjoint_bytes
  32         .globl _Copy_arrayof_conjoint_jshorts
  33         .globl _Copy_conjoint_jshorts_atomic
  34         .globl _Copy_arrayof_conjoint_jints
  35         .globl _Copy_conjoint_jints_atomic
  36         .globl _Copy_arrayof_conjoint_jlongs
  37         .globl _Copy_conjoint_jlongs_atomic
  38 
  39         .text
  40 
  41         .globl SpinPause
  42         .align 16
  43         .type  SpinPause,@function
  44 SpinPause:
  45         rep
  46         nop
  47         movq   $1, %rax
  48         ret
  49 
  50         # Support for void Copy::arrayof_conjoint_bytes(void* from,
  51         #                                               void* to,
  52         #                                               size_t count)
  53         # rdi - from
  54         # rsi - to
  55         # rdx - count, treated as ssize_t
  56         #
  57         .p2align 4,,15
  58         .type    _Copy_arrayof_conjoint_bytes,@function
  59 _Copy_arrayof_conjoint_bytes:
  60         movq     %rdx,%r8             # byte count
  61         shrq     $3,%rdx              # qword count
  62         cmpq     %rdi,%rsi
  63         leaq     -1(%rdi,%r8,1),%rax  # from + bcount*1 - 1
  64         jbe      acb_CopyRight
  65         cmpq     %rax,%rsi
  66         jbe      acb_CopyLeft 
  67 acb_CopyRight:
  68         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
  69         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
  70         negq     %rdx
  71         jmp      7f
  72         .p2align 4,,15
  73 1:      movq     8(%rax,%rdx,8),%rsi
  74         movq     %rsi,8(%rcx,%rdx,8)
  75         addq     $1,%rdx
  76         jnz      1b
  77 2:      testq    $4,%r8               # check for trailing dword
  78         jz       3f
  79         movl     8(%rax),%esi         # copy trailing dword
  80         movl     %esi,8(%rcx)
  81         addq     $4,%rax
  82         addq     $4,%rcx              # original %rsi is trashed, so we
  83                                       #  can't use it as a base register
  84 3:      testq    $2,%r8               # check for trailing word
  85         jz       4f
  86         movw     8(%rax),%si          # copy trailing word
  87         movw     %si,8(%rcx)
  88         addq     $2,%rcx
  89 4:      testq    $1,%r8               # check for trailing byte
  90         jz       5f
  91         movb     -1(%rdi,%r8,1),%al   # copy trailing byte
  92         movb     %al,8(%rcx)
  93 5:      ret
  94         .p2align 4,,15
  95 6:      movq     -24(%rax,%rdx,8),%rsi
  96         movq     %rsi,-24(%rcx,%rdx,8)
  97         movq     -16(%rax,%rdx,8),%rsi
  98         movq     %rsi,-16(%rcx,%rdx,8)
  99         movq     -8(%rax,%rdx,8),%rsi
 100         movq     %rsi,-8(%rcx,%rdx,8)
 101         movq     (%rax,%rdx,8),%rsi
 102         movq     %rsi,(%rcx,%rdx,8)
 103 7:      addq     $4,%rdx
 104         jle      6b
 105         subq     $4,%rdx
 106         jl       1b
 107         jmp      2b
 108 acb_CopyLeft:
 109         testq    $1,%r8               # check for trailing byte
 110         jz       1f
 111         movb     -1(%rdi,%r8,1),%cl   # copy trailing byte
 112         movb     %cl,-1(%rsi,%r8,1)
 113         subq     $1,%r8               # adjust for possible trailing word
 114 1:      testq    $2,%r8               # check for trailing word
 115         jz       2f
 116         movw     -2(%rdi,%r8,1),%cx   # copy trailing word
 117         movw     %cx,-2(%rsi,%r8,1)
 118 2:      testq    $4,%r8               # check for trailing dword
 119         jz       5f
 120         movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
 121         movl     %ecx,(%rsi,%rdx,8)
 122         jmp      5f
 123         .p2align 4,,15
 124 3:      movq     -8(%rdi,%rdx,8),%rcx
 125         movq     %rcx,-8(%rsi,%rdx,8)
 126         subq     $1,%rdx
 127         jnz      3b
 128         ret
 129         .p2align 4,,15
 130 4:      movq     24(%rdi,%rdx,8),%rcx
 131         movq     %rcx,24(%rsi,%rdx,8)
 132         movq     16(%rdi,%rdx,8),%rcx
 133         movq     %rcx,16(%rsi,%rdx,8)
 134         movq     8(%rdi,%rdx,8),%rcx
 135         movq     %rcx,8(%rsi,%rdx,8)
 136         movq     (%rdi,%rdx,8),%rcx
 137         movq     %rcx,(%rsi,%rdx,8)
 138 5:      subq     $4,%rdx
 139         jge      4b
 140         addq     $4,%rdx
 141         jg       3b
 142         ret
 143 
 144         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 145         #                                                 void* to,
 146         #                                                 size_t count)
 147         # Equivalent to
 148         #   conjoint_jshorts_atomic
 149         #
 150         # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
 151         # let the hardware handle it.  The tow or four words within dwords
 152         # or qwords that span cache line boundaries will still be loaded
 153         # and stored atomically.
 154         #
 155         # rdi - from
 156         # rsi - to
 157         # rdx - count, treated as ssize_t
 158         #
 159         .p2align 4,,15
 160         .type    _Copy_arrayof_conjoint_jshorts,@function
 161         .type    _Copy_conjoint_jshorts_atomic,@function
 162 _Copy_arrayof_conjoint_jshorts:
 163 _Copy_conjoint_jshorts_atomic:
 164         movq     %rdx,%r8             # word count
 165         shrq     $2,%rdx              # qword count
 166         cmpq     %rdi,%rsi
 167         leaq     -2(%rdi,%r8,2),%rax  # from + wcount*2 - 2
 168         jbe      acs_CopyRight
 169         cmpq     %rax,%rsi
 170         jbe      acs_CopyLeft 
 171 acs_CopyRight:
 172         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
 173         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
 174         negq     %rdx
 175         jmp      6f
 176 1:      movq     8(%rax,%rdx,8),%rsi
 177         movq     %rsi,8(%rcx,%rdx,8)
 178         addq     $1,%rdx
 179         jnz      1b
 180 2:      testq    $2,%r8               # check for trailing dword
 181         jz       3f
 182         movl     8(%rax),%esi         # copy trailing dword
 183         movl     %esi,8(%rcx)
 184         addq     $4,%rcx              # original %rsi is trashed, so we
 185                                       #  can't use it as a base register
 186 3:      testq    $1,%r8               # check for trailing word
 187         jz       4f
 188         movw     -2(%rdi,%r8,2),%si   # copy trailing word
 189         movw     %si,8(%rcx)
 190 4:      ret
 191         .p2align 4,,15
 192 5:      movq     -24(%rax,%rdx,8),%rsi
 193         movq     %rsi,-24(%rcx,%rdx,8)
 194         movq     -16(%rax,%rdx,8),%rsi
 195         movq     %rsi,-16(%rcx,%rdx,8)
 196         movq     -8(%rax,%rdx,8),%rsi
 197         movq     %rsi,-8(%rcx,%rdx,8)
 198         movq     (%rax,%rdx,8),%rsi
 199         movq     %rsi,(%rcx,%rdx,8)
 200 6:      addq     $4,%rdx
 201         jle      5b
 202         subq     $4,%rdx
 203         jl       1b
 204         jmp      2b
 205 acs_CopyLeft:
 206         testq    $1,%r8               # check for trailing word
 207         jz       1f
 208         movw     -2(%rdi,%r8,2),%cx   # copy trailing word
 209         movw     %cx,-2(%rsi,%r8,2)
 210 1:      testq    $2,%r8               # check for trailing dword
 211         jz       4f
 212         movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
 213         movl     %ecx,(%rsi,%rdx,8)
 214         jmp      4f
 215 2:      movq     -8(%rdi,%rdx,8),%rcx
 216         movq     %rcx,-8(%rsi,%rdx,8)
 217         subq     $1,%rdx
 218         jnz      2b
 219         ret
 220         .p2align 4,,15
 221 3:      movq     24(%rdi,%rdx,8),%rcx
 222         movq     %rcx,24(%rsi,%rdx,8)
 223         movq     16(%rdi,%rdx,8),%rcx
 224         movq     %rcx,16(%rsi,%rdx,8)
 225         movq     8(%rdi,%rdx,8),%rcx
 226         movq     %rcx,8(%rsi,%rdx,8)
 227         movq     (%rdi,%rdx,8),%rcx
 228         movq     %rcx,(%rsi,%rdx,8)
 229 4:      subq     $4,%rdx
 230         jge      3b
 231         addq     $4,%rdx
 232         jg       2b
 233         ret
 234 
 235         # Support for void Copy::arrayof_conjoint_jints(jint* from,
 236         #                                               jint* to,
 237         #                                               size_t count)
 238         # Equivalent to
 239         #   conjoint_jints_atomic
 240         #
 241         # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 242         # the hardware handle it.  The two dwords within qwords that span
 243         # cache line boundaries will still be loaded and stored atomically.
 244         #
 245         # rdi - from
 246         # rsi - to
 247         # rdx - count, treated as ssize_t
 248         #
 249         .p2align 4,,15
 250         .type    _Copy_arrayof_conjoint_jints,@function
 251         .type    _Copy_conjoint_jints_atomic,@function
 252 _Copy_arrayof_conjoint_jints:
 253 _Copy_conjoint_jints_atomic:
 254         movq     %rdx,%r8             # dword count
 255         shrq     %rdx                 # qword count
 256         cmpq     %rdi,%rsi
 257         leaq     -4(%rdi,%r8,4),%rax  # from + dcount*4 - 4
 258         jbe      aci_CopyRight
 259         cmpq     %rax,%rsi
 260         jbe      aci_CopyLeft 
 261 aci_CopyRight:
 262         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
 263         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
 264         negq     %rdx
 265         jmp      5f
 266         .p2align 4,,15
 267 1:      movq     8(%rax,%rdx,8),%rsi
 268         movq     %rsi,8(%rcx,%rdx,8)
 269         addq     $1,%rdx
 270         jnz       1b
 271 2:      testq    $1,%r8               # check for trailing dword
 272         jz       3f
 273         movl     8(%rax),%esi         # copy trailing dword
 274         movl     %esi,8(%rcx)
 275 3:      ret
 276         .p2align 4,,15
 277 4:      movq     -24(%rax,%rdx,8),%rsi
 278         movq     %rsi,-24(%rcx,%rdx,8)
 279         movq     -16(%rax,%rdx,8),%rsi
 280         movq     %rsi,-16(%rcx,%rdx,8)
 281         movq     -8(%rax,%rdx,8),%rsi
 282         movq     %rsi,-8(%rcx,%rdx,8)
 283         movq     (%rax,%rdx,8),%rsi
 284         movq     %rsi,(%rcx,%rdx,8)
 285 5:      addq     $4,%rdx
 286         jle      4b
 287         subq     $4,%rdx
 288         jl       1b
 289         jmp      2b
 290 aci_CopyLeft:
 291         testq    $1,%r8               # check for trailing dword
 292         jz       3f
 293         movl     -4(%rdi,%r8,4),%ecx  # copy trailing dword
 294         movl     %ecx,-4(%rsi,%r8,4)
 295         jmp      3f
 296 1:      movq     -8(%rdi,%rdx,8),%rcx
 297         movq     %rcx,-8(%rsi,%rdx,8)
 298         subq     $1,%rdx
 299         jnz      1b
 300         ret
 301         .p2align 4,,15
 302 2:      movq     24(%rdi,%rdx,8),%rcx
 303         movq     %rcx,24(%rsi,%rdx,8)
 304         movq     16(%rdi,%rdx,8),%rcx
 305         movq     %rcx,16(%rsi,%rdx,8)
 306         movq     8(%rdi,%rdx,8),%rcx
 307         movq     %rcx,8(%rsi,%rdx,8)
 308         movq     (%rdi,%rdx,8),%rcx
 309         movq     %rcx,(%rsi,%rdx,8)
 310 3:      subq     $4,%rdx
 311         jge      2b
 312         addq     $4,%rdx
 313         jg       1b
 314         ret
 315 
 316         # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
 317         #                                                jlong* to,
 318         #                                                size_t count)
 319         # Equivalent to
 320         #   conjoint_jlongs_atomic
 321         #   arrayof_conjoint_oops
 322         #   conjoint_oops_atomic
 323         #
 324         # rdi - from
 325         # rsi - to
 326         # rdx - count, treated as ssize_t
 327         #
 328         .p2align 4,,15
 329         .type    _Copy_arrayof_conjoint_jlongs,@function
 330         .type    _Copy_conjoint_jlongs_atomic,@function
 331 _Copy_arrayof_conjoint_jlongs:
 332 _Copy_conjoint_jlongs_atomic:
 333         cmpq     %rdi,%rsi
 334         leaq     -8(%rdi,%rdx,8),%rax # from + count*8 - 8
 335         jbe      acl_CopyRight
 336         cmpq     %rax,%rsi
 337         jbe      acl_CopyLeft 
 338 acl_CopyRight:
 339         leaq     -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
 340         negq     %rdx
 341         jmp      3f
 342 1:      movq     8(%rax,%rdx,8),%rsi
 343         movq     %rsi,8(%rcx,%rdx,8)
 344         addq     $1,%rdx
 345         jnz      1b
 346         ret
 347         .p2align 4,,15
 348 2:      movq     -24(%rax,%rdx,8),%rsi
 349         movq     %rsi,-24(%rcx,%rdx,8)
 350         movq     -16(%rax,%rdx,8),%rsi
 351         movq     %rsi,-16(%rcx,%rdx,8)
 352         movq     -8(%rax,%rdx,8),%rsi
 353         movq     %rsi,-8(%rcx,%rdx,8)
 354         movq     (%rax,%rdx,8),%rsi
 355         movq     %rsi,(%rcx,%rdx,8)
 356 3:      addq     $4,%rdx
 357         jle      2b
 358         subq     $4,%rdx
 359         jl       1b
 360         ret
 361 4:      movq     -8(%rdi,%rdx,8),%rcx
 362         movq     %rcx,-8(%rsi,%rdx,8)
 363         subq     $1,%rdx
 364         jnz      4b
 365         ret
 366         .p2align 4,,15
 367 5:      movq     24(%rdi,%rdx,8),%rcx
 368         movq     %rcx,24(%rsi,%rdx,8)
 369         movq     16(%rdi,%rdx,8),%rcx
 370         movq     %rcx,16(%rsi,%rdx,8)
 371         movq     8(%rdi,%rdx,8),%rcx
 372         movq     %rcx,8(%rsi,%rdx,8)
 373         movq     (%rdi,%rdx,8),%rcx
 374         movq     %rcx,(%rsi,%rdx,8)
 375 acl_CopyLeft:
 376         subq     $4,%rdx
 377         jge      5b
 378         addq     $4,%rdx
 379         jg       4b
 380         ret