1 # 
   2 # Copyright (c) 2004, 2007, Oracle and/or its affiliates. All rights reserved.
   3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 #
   5 # This code is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License version 2 only, as
   7 # published by the Free Software Foundation.
   8 #
   9 # This code is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 # version 2 for more details (a copy is included in the LICENSE file that
  13 # accompanied this code).
  14 #
  15 # You should have received a copy of the GNU General Public License version
  16 # 2 along with this work; if not, write to the Free Software Foundation,
  17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 #
  19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 # or visit www.oracle.com if you need additional information or have any
  21 # questions.
  22 #
  23 
  24 
  25         # NOTE WELL!  The _Copy functions are called directly
  26         # from server-compiler-generated code via CallLeafNoFP,
  27         # which means that they *must* either not use floating
  28         # point or use it in the same manner as does the server
  29         # compiler.
  30         
  31         .globl _Copy_arrayof_conjoint_bytes
  32         .globl _Copy_arrayof_conjoint_jshorts
  33         .globl _Copy_conjoint_jshorts_atomic
  34         .globl _Copy_arrayof_conjoint_jints
  35         .globl _Copy_conjoint_jints_atomic
  36         .globl _Copy_arrayof_conjoint_jlongs
  37         .globl _Copy_conjoint_jlongs_atomic
  38 
  39         .text
  40 
  41         .globl SafeFetch32, Fetch32PFI, Fetch32Resume
  42         .align  16
  43         .type   SafeFetch32,@function
  44         // Prototype: int SafeFetch32 (int * Adr, int ErrValue) 
  45 SafeFetch32:
  46         movl    %esi, %eax
  47 Fetch32PFI:
  48         movl    (%rdi), %eax
  49 Fetch32Resume:
  50         ret
  51 
  52         .globl SafeFetchN, FetchNPFI, FetchNResume
  53         .align  16
  54         .type   SafeFetchN,@function
  55         // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue) 
  56 SafeFetchN:
  57         movq    %rsi, %rax
  58 FetchNPFI:
  59         movq    (%rdi), %rax
  60 FetchNResume:
  61         ret
  62 
  63         .globl SpinPause
  64         .align 16
  65         .type  SpinPause,@function
  66 SpinPause:
  67         rep
  68         nop
  69         movq   $1, %rax
  70         ret
  71 
  72         # Support for void Copy::arrayof_conjoint_bytes(void* from,
  73         #                                               void* to,
  74         #                                               size_t count)
  75         # rdi - from
  76         # rsi - to
  77         # rdx - count, treated as ssize_t
  78         #
  79         .p2align 4,,15
  80         .type    _Copy_arrayof_conjoint_bytes,@function
  81 _Copy_arrayof_conjoint_bytes:
  82         movq     %rdx,%r8             # byte count
  83         shrq     $3,%rdx              # qword count
  84         cmpq     %rdi,%rsi
  85         leaq     -1(%rdi,%r8,1),%rax  # from + bcount*1 - 1
  86         jbe      acb_CopyRight
  87         cmpq     %rax,%rsi
  88         jbe      acb_CopyLeft 
  89 acb_CopyRight:
  90         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
  91         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
  92         negq     %rdx
  93         jmp      7f
  94         .p2align 4,,15
  95 1:      movq     8(%rax,%rdx,8),%rsi
  96         movq     %rsi,8(%rcx,%rdx,8)
  97         addq     $1,%rdx
  98         jnz      1b
  99 2:      testq    $4,%r8               # check for trailing dword
 100         jz       3f
 101         movl     8(%rax),%esi         # copy trailing dword
 102         movl     %esi,8(%rcx)
 103         addq     $4,%rax
 104         addq     $4,%rcx              # original %rsi is trashed, so we
 105                                       #  can't use it as a base register
 106 3:      testq    $2,%r8               # check for trailing word
 107         jz       4f
 108         movw     8(%rax),%si          # copy trailing word
 109         movw     %si,8(%rcx)
 110         addq     $2,%rcx
 111 4:      testq    $1,%r8               # check for trailing byte
 112         jz       5f
 113         movb     -1(%rdi,%r8,1),%al   # copy trailing byte
 114         movb     %al,8(%rcx)
 115 5:      ret
 116         .p2align 4,,15
 117 6:      movq     -24(%rax,%rdx,8),%rsi
 118         movq     %rsi,-24(%rcx,%rdx,8)
 119         movq     -16(%rax,%rdx,8),%rsi
 120         movq     %rsi,-16(%rcx,%rdx,8)
 121         movq     -8(%rax,%rdx,8),%rsi
 122         movq     %rsi,-8(%rcx,%rdx,8)
 123         movq     (%rax,%rdx,8),%rsi
 124         movq     %rsi,(%rcx,%rdx,8)
 125 7:      addq     $4,%rdx
 126         jle      6b
 127         subq     $4,%rdx
 128         jl       1b
 129         jmp      2b
 130 acb_CopyLeft:
 131         testq    $1,%r8               # check for trailing byte
 132         jz       1f
 133         movb     -1(%rdi,%r8,1),%cl   # copy trailing byte
 134         movb     %cl,-1(%rsi,%r8,1)
 135         subq     $1,%r8               # adjust for possible trailing word
 136 1:      testq    $2,%r8               # check for trailing word
 137         jz       2f
 138         movw     -2(%rdi,%r8,1),%cx   # copy trailing word
 139         movw     %cx,-2(%rsi,%r8,1)
 140 2:      testq    $4,%r8               # check for trailing dword
 141         jz       5f
 142         movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
 143         movl     %ecx,(%rsi,%rdx,8)
 144         jmp      5f
 145         .p2align 4,,15
 146 3:      movq     -8(%rdi,%rdx,8),%rcx
 147         movq     %rcx,-8(%rsi,%rdx,8)
 148         subq     $1,%rdx
 149         jnz      3b
 150         ret
 151         .p2align 4,,15
 152 4:      movq     24(%rdi,%rdx,8),%rcx
 153         movq     %rcx,24(%rsi,%rdx,8)
 154         movq     16(%rdi,%rdx,8),%rcx
 155         movq     %rcx,16(%rsi,%rdx,8)
 156         movq     8(%rdi,%rdx,8),%rcx
 157         movq     %rcx,8(%rsi,%rdx,8)
 158         movq     (%rdi,%rdx,8),%rcx
 159         movq     %rcx,(%rsi,%rdx,8)
 160 5:      subq     $4,%rdx
 161         jge      4b
 162         addq     $4,%rdx
 163         jg       3b
 164         ret
 165 
 166         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 167         #                                                 void* to,
 168         #                                                 size_t count)
 169         # Equivalent to
 170         #   conjoint_jshorts_atomic
 171         #
 172         # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
 173         # let the hardware handle it.  The tow or four words within dwords
 174         # or qwords that span cache line boundaries will still be loaded
 175         # and stored atomically.
 176         #
 177         # rdi - from
 178         # rsi - to
 179         # rdx - count, treated as ssize_t
 180         #
 181         .p2align 4,,15
 182         .type    _Copy_arrayof_conjoint_jshorts,@function
 183         .type    _Copy_conjoint_jshorts_atomic,@function
 184 _Copy_arrayof_conjoint_jshorts:
 185 _Copy_conjoint_jshorts_atomic:
 186         movq     %rdx,%r8             # word count
 187         shrq     $2,%rdx              # qword count
 188         cmpq     %rdi,%rsi
 189         leaq     -2(%rdi,%r8,2),%rax  # from + wcount*2 - 2
 190         jbe      acs_CopyRight
 191         cmpq     %rax,%rsi
 192         jbe      acs_CopyLeft 
 193 acs_CopyRight:
 194         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
 195         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
 196         negq     %rdx
 197         jmp      6f
 198 1:      movq     8(%rax,%rdx,8),%rsi
 199         movq     %rsi,8(%rcx,%rdx,8)
 200         addq     $1,%rdx
 201         jnz      1b
 202 2:      testq    $2,%r8               # check for trailing dword
 203         jz       3f
 204         movl     8(%rax),%esi         # copy trailing dword
 205         movl     %esi,8(%rcx)
 206         addq     $4,%rcx              # original %rsi is trashed, so we
 207                                       #  can't use it as a base register
 208 3:      testq    $1,%r8               # check for trailing word
 209         jz       4f
 210         movw     -2(%rdi,%r8,2),%si   # copy trailing word
 211         movw     %si,8(%rcx)
 212 4:      ret
 213         .p2align 4,,15
 214 5:      movq     -24(%rax,%rdx,8),%rsi
 215         movq     %rsi,-24(%rcx,%rdx,8)
 216         movq     -16(%rax,%rdx,8),%rsi
 217         movq     %rsi,-16(%rcx,%rdx,8)
 218         movq     -8(%rax,%rdx,8),%rsi
 219         movq     %rsi,-8(%rcx,%rdx,8)
 220         movq     (%rax,%rdx,8),%rsi
 221         movq     %rsi,(%rcx,%rdx,8)
 222 6:      addq     $4,%rdx
 223         jle      5b
 224         subq     $4,%rdx
 225         jl       1b
 226         jmp      2b
 227 acs_CopyLeft:
 228         testq    $1,%r8               # check for trailing word
 229         jz       1f
 230         movw     -2(%rdi,%r8,2),%cx   # copy trailing word
 231         movw     %cx,-2(%rsi,%r8,2)
 232 1:      testq    $2,%r8               # check for trailing dword
 233         jz       4f
 234         movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
 235         movl     %ecx,(%rsi,%rdx,8)
 236         jmp      4f
 237 2:      movq     -8(%rdi,%rdx,8),%rcx
 238         movq     %rcx,-8(%rsi,%rdx,8)
 239         subq     $1,%rdx
 240         jnz      2b
 241         ret
 242         .p2align 4,,15
 243 3:      movq     24(%rdi,%rdx,8),%rcx
 244         movq     %rcx,24(%rsi,%rdx,8)
 245         movq     16(%rdi,%rdx,8),%rcx
 246         movq     %rcx,16(%rsi,%rdx,8)
 247         movq     8(%rdi,%rdx,8),%rcx
 248         movq     %rcx,8(%rsi,%rdx,8)
 249         movq     (%rdi,%rdx,8),%rcx
 250         movq     %rcx,(%rsi,%rdx,8)
 251 4:      subq     $4,%rdx
 252         jge      3b
 253         addq     $4,%rdx
 254         jg       2b
 255         ret
 256 
 257         # Support for void Copy::arrayof_conjoint_jints(jint* from,
 258         #                                               jint* to,
 259         #                                               size_t count)
 260         # Equivalent to
 261         #   conjoint_jints_atomic
 262         #
 263         # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 264         # the hardware handle it.  The two dwords within qwords that span
 265         # cache line boundaries will still be loaded and stored atomically.
 266         #
 267         # rdi - from
 268         # rsi - to
 269         # rdx - count, treated as ssize_t
 270         #
 271         .p2align 4,,15
 272         .type    _Copy_arrayof_conjoint_jints,@function
 273         .type    _Copy_conjoint_jints_atomic,@function
 274 _Copy_arrayof_conjoint_jints:
 275 _Copy_conjoint_jints_atomic:
 276         movq     %rdx,%r8             # dword count
 277         shrq     %rdx                 # qword count
 278         cmpq     %rdi,%rsi
 279         leaq     -4(%rdi,%r8,4),%rax  # from + dcount*4 - 4
 280         jbe      aci_CopyRight
 281         cmpq     %rax,%rsi
 282         jbe      aci_CopyLeft 
 283 aci_CopyRight:
 284         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
 285         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
 286         negq     %rdx
 287         jmp      5f
 288         .p2align 4,,15
 289 1:      movq     8(%rax,%rdx,8),%rsi
 290         movq     %rsi,8(%rcx,%rdx,8)
 291         addq     $1,%rdx
 292         jnz       1b
 293 2:      testq    $1,%r8               # check for trailing dword
 294         jz       3f
 295         movl     8(%rax),%esi         # copy trailing dword
 296         movl     %esi,8(%rcx)
 297 3:      ret
 298         .p2align 4,,15
 299 4:      movq     -24(%rax,%rdx,8),%rsi
 300         movq     %rsi,-24(%rcx,%rdx,8)
 301         movq     -16(%rax,%rdx,8),%rsi
 302         movq     %rsi,-16(%rcx,%rdx,8)
 303         movq     -8(%rax,%rdx,8),%rsi
 304         movq     %rsi,-8(%rcx,%rdx,8)
 305         movq     (%rax,%rdx,8),%rsi
 306         movq     %rsi,(%rcx,%rdx,8)
 307 5:      addq     $4,%rdx
 308         jle      4b
 309         subq     $4,%rdx
 310         jl       1b
 311         jmp      2b
 312 aci_CopyLeft:
 313         testq    $1,%r8               # check for trailing dword
 314         jz       3f
 315         movl     -4(%rdi,%r8,4),%ecx  # copy trailing dword
 316         movl     %ecx,-4(%rsi,%r8,4)
 317         jmp      3f
 318 1:      movq     -8(%rdi,%rdx,8),%rcx
 319         movq     %rcx,-8(%rsi,%rdx,8)
 320         subq     $1,%rdx
 321         jnz      1b
 322         ret
 323         .p2align 4,,15
 324 2:      movq     24(%rdi,%rdx,8),%rcx
 325         movq     %rcx,24(%rsi,%rdx,8)
 326         movq     16(%rdi,%rdx,8),%rcx
 327         movq     %rcx,16(%rsi,%rdx,8)
 328         movq     8(%rdi,%rdx,8),%rcx
 329         movq     %rcx,8(%rsi,%rdx,8)
 330         movq     (%rdi,%rdx,8),%rcx
 331         movq     %rcx,(%rsi,%rdx,8)
 332 3:      subq     $4,%rdx
 333         jge      2b
 334         addq     $4,%rdx
 335         jg       1b
 336         ret
 337 
 338         # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
 339         #                                                jlong* to,
 340         #                                                size_t count)
 341         # Equivalent to
 342         #   conjoint_jlongs_atomic
 343         #   arrayof_conjoint_oops
 344         #   conjoint_oops_atomic
 345         #
 346         # rdi - from
 347         # rsi - to
 348         # rdx - count, treated as ssize_t
 349         #
 350         .p2align 4,,15
 351         .type    _Copy_arrayof_conjoint_jlongs,@function
 352         .type    _Copy_conjoint_jlongs_atomic,@function
 353 _Copy_arrayof_conjoint_jlongs:
 354 _Copy_conjoint_jlongs_atomic:
 355         cmpq     %rdi,%rsi
 356         leaq     -8(%rdi,%rdx,8),%rax # from + count*8 - 8
 357         jbe      acl_CopyRight
 358         cmpq     %rax,%rsi
 359         jbe      acl_CopyLeft 
 360 acl_CopyRight:
 361         leaq     -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
 362         negq     %rdx
 363         jmp      3f
 364 1:      movq     8(%rax,%rdx,8),%rsi
 365         movq     %rsi,8(%rcx,%rdx,8)
 366         addq     $1,%rdx
 367         jnz      1b
 368         ret
 369         .p2align 4,,15
 370 2:      movq     -24(%rax,%rdx,8),%rsi
 371         movq     %rsi,-24(%rcx,%rdx,8)
 372         movq     -16(%rax,%rdx,8),%rsi
 373         movq     %rsi,-16(%rcx,%rdx,8)
 374         movq     -8(%rax,%rdx,8),%rsi
 375         movq     %rsi,-8(%rcx,%rdx,8)
 376         movq     (%rax,%rdx,8),%rsi
 377         movq     %rsi,(%rcx,%rdx,8)
 378 3:      addq     $4,%rdx
 379         jle      2b
 380         subq     $4,%rdx
 381         jl       1b
 382         ret
 383 4:      movq     -8(%rdi,%rdx,8),%rcx
 384         movq     %rcx,-8(%rsi,%rdx,8)
 385         subq     $1,%rdx
 386         jnz      4b
 387         ret
 388         .p2align 4,,15
 389 5:      movq     24(%rdi,%rdx,8),%rcx
 390         movq     %rcx,24(%rsi,%rdx,8)
 391         movq     16(%rdi,%rdx,8),%rcx
 392         movq     %rcx,16(%rsi,%rdx,8)
 393         movq     8(%rdi,%rdx,8),%rcx
 394         movq     %rcx,8(%rsi,%rdx,8)
 395         movq     (%rdi,%rdx,8),%rcx
 396         movq     %rcx,(%rsi,%rdx,8)
 397 acl_CopyLeft:
 398         subq     $4,%rdx
 399         jge      5b
 400         addq     $4,%rdx
 401         jg       4b
 402         ret