1 # 
   2 # Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
   3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 #
   5 # This code is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License version 2 only, as
   7 # published by the Free Software Foundation.
   8 #
   9 # This code is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 # version 2 for more details (a copy is included in the LICENSE file that
  13 # accompanied this code).
  14 #
  15 # You should have received a copy of the GNU General Public License version
  16 # 2 along with this work; if not, write to the Free Software Foundation,
  17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 #
  19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 # or visit www.oracle.com if you need additional information or have any
  21 # questions.
  22 #
  23 
  24 #ifdef __APPLE__
  25 # Darwin uses _ prefixed global symbols
  26 #define SYMBOL(s) _ ## s
  27 #define ELF_TYPE(name, description)
  28 #else
  29 #define SYMBOL(s) s
  30 #define ELF_TYPE(name, description) .type name,description
  31 #endif
  32 
  33         # NOTE WELL!  The _Copy functions are called directly
  34         # from server-compiler-generated code via CallLeafNoFP,
  35         # which means that they *must* either not use floating
  36         # point or use it in the same manner as does the server
  37         # compiler.
  38         
  39         .globl SYMBOL(_Copy_arrayof_conjoint_bytes)
  40         .globl SYMBOL(_Copy_arrayof_conjoint_jshorts)
  41         .globl SYMBOL(_Copy_conjoint_jshorts_atomic)
  42         .globl SYMBOL(_Copy_arrayof_conjoint_jints)
  43         .globl SYMBOL(_Copy_conjoint_jints_atomic)
  44         .globl SYMBOL(_Copy_arrayof_conjoint_jlongs)
  45         .globl SYMBOL(_Copy_conjoint_jlongs_atomic)
  46 
  47         .text
  48 
  49         .globl SYMBOL(SpinPause)
  50         .p2align 4,,15
  51         ELF_TYPE(SpinPause,@function)
  52 SYMBOL(SpinPause):
  53         rep
  54         nop
  55         movq   $1, %rax
  56         ret
  57 
  58         # Support for void Copy::arrayof_conjoint_bytes(void* from,
  59         #                                               void* to,
  60         #                                               size_t count)
  61         # rdi - from
  62         # rsi - to
  63         # rdx - count, treated as ssize_t
  64         #
  65         .p2align 4,,15
  66         ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function)
  67 SYMBOL(_Copy_arrayof_conjoint_bytes):
  68         movq     %rdx,%r8             # byte count
  69         shrq     $3,%rdx              # qword count
  70         cmpq     %rdi,%rsi
  71         leaq     -1(%rdi,%r8,1),%rax  # from + bcount*1 - 1
  72         jbe      acb_CopyRight
  73         cmpq     %rax,%rsi
  74         jbe      acb_CopyLeft 
  75 acb_CopyRight:
  76         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
  77         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
  78         negq     %rdx
  79         jmp      7f
  80         .p2align 4,,15
  81 1:      movq     8(%rax,%rdx,8),%rsi
  82         movq     %rsi,8(%rcx,%rdx,8)
  83         addq     $1,%rdx
  84         jnz      1b
  85 2:      testq    $4,%r8               # check for trailing dword
  86         jz       3f
  87         movl     8(%rax),%esi         # copy trailing dword
  88         movl     %esi,8(%rcx)
  89         addq     $4,%rax
  90         addq     $4,%rcx              # original %rsi is trashed, so we
  91                                       #  can't use it as a base register
  92 3:      testq    $2,%r8               # check for trailing word
  93         jz       4f
  94         movw     8(%rax),%si          # copy trailing word
  95         movw     %si,8(%rcx)
  96         addq     $2,%rcx
  97 4:      testq    $1,%r8               # check for trailing byte
  98         jz       5f
  99         movb     -1(%rdi,%r8,1),%al   # copy trailing byte
 100         movb     %al,8(%rcx)
 101 5:      ret
 102         .p2align 4,,15
 103 6:      movq     -24(%rax,%rdx,8),%rsi
 104         movq     %rsi,-24(%rcx,%rdx,8)
 105         movq     -16(%rax,%rdx,8),%rsi
 106         movq     %rsi,-16(%rcx,%rdx,8)
 107         movq     -8(%rax,%rdx,8),%rsi
 108         movq     %rsi,-8(%rcx,%rdx,8)
 109         movq     (%rax,%rdx,8),%rsi
 110         movq     %rsi,(%rcx,%rdx,8)
 111 7:      addq     $4,%rdx
 112         jle      6b
 113         subq     $4,%rdx
 114         jl       1b
 115         jmp      2b
 116 acb_CopyLeft:
 117         testq    $1,%r8               # check for trailing byte
 118         jz       1f
 119         movb     -1(%rdi,%r8,1),%cl   # copy trailing byte
 120         movb     %cl,-1(%rsi,%r8,1)
 121         subq     $1,%r8               # adjust for possible trailing word
 122 1:      testq    $2,%r8               # check for trailing word
 123         jz       2f
 124         movw     -2(%rdi,%r8,1),%cx   # copy trailing word
 125         movw     %cx,-2(%rsi,%r8,1)
 126 2:      testq    $4,%r8               # check for trailing dword
 127         jz       5f
 128         movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
 129         movl     %ecx,(%rsi,%rdx,8)
 130         jmp      5f
 131         .p2align 4,,15
 132 3:      movq     -8(%rdi,%rdx,8),%rcx
 133         movq     %rcx,-8(%rsi,%rdx,8)
 134         subq     $1,%rdx
 135         jnz      3b
 136         ret
 137         .p2align 4,,15
 138 4:      movq     24(%rdi,%rdx,8),%rcx
 139         movq     %rcx,24(%rsi,%rdx,8)
 140         movq     16(%rdi,%rdx,8),%rcx
 141         movq     %rcx,16(%rsi,%rdx,8)
 142         movq     8(%rdi,%rdx,8),%rcx
 143         movq     %rcx,8(%rsi,%rdx,8)
 144         movq     (%rdi,%rdx,8),%rcx
 145         movq     %rcx,(%rsi,%rdx,8)
 146 5:      subq     $4,%rdx
 147         jge      4b
 148         addq     $4,%rdx
 149         jg       3b
 150         ret
 151 
 152         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 153         #                                                 void* to,
 154         #                                                 size_t count)
 155         # Equivalent to
 156         #   conjoint_jshorts_atomic
 157         #
 158         # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
 159         # let the hardware handle it.  The tow or four words within dwords
 160         # or qwords that span cache line boundaries will still be loaded
 161         # and stored atomically.
 162         #
 163         # rdi - from
 164         # rsi - to
 165         # rdx - count, treated as ssize_t
 166         #
 167         .p2align 4,,15
 168         ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function)
 169         ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function)
 170 SYMBOL(_Copy_arrayof_conjoint_jshorts):
 171 SYMBOL(_Copy_conjoint_jshorts_atomic):
 172         movq     %rdx,%r8             # word count
 173         shrq     $2,%rdx              # qword count
 174         cmpq     %rdi,%rsi
 175         leaq     -2(%rdi,%r8,2),%rax  # from + wcount*2 - 2
 176         jbe      acs_CopyRight
 177         cmpq     %rax,%rsi
 178         jbe      acs_CopyLeft 
 179 acs_CopyRight:
 180         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
 181         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
 182         negq     %rdx
 183         jmp      6f
 184 1:      movq     8(%rax,%rdx,8),%rsi
 185         movq     %rsi,8(%rcx,%rdx,8)
 186         addq     $1,%rdx
 187         jnz      1b
 188 2:      testq    $2,%r8               # check for trailing dword
 189         jz       3f
 190         movl     8(%rax),%esi         # copy trailing dword
 191         movl     %esi,8(%rcx)
 192         addq     $4,%rcx              # original %rsi is trashed, so we
 193                                       #  can't use it as a base register
 194 3:      testq    $1,%r8               # check for trailing word
 195         jz       4f
 196         movw     -2(%rdi,%r8,2),%si   # copy trailing word
 197         movw     %si,8(%rcx)
 198 4:      ret
 199         .p2align 4,,15
 200 5:      movq     -24(%rax,%rdx,8),%rsi
 201         movq     %rsi,-24(%rcx,%rdx,8)
 202         movq     -16(%rax,%rdx,8),%rsi
 203         movq     %rsi,-16(%rcx,%rdx,8)
 204         movq     -8(%rax,%rdx,8),%rsi
 205         movq     %rsi,-8(%rcx,%rdx,8)
 206         movq     (%rax,%rdx,8),%rsi
 207         movq     %rsi,(%rcx,%rdx,8)
 208 6:      addq     $4,%rdx
 209         jle      5b
 210         subq     $4,%rdx
 211         jl       1b
 212         jmp      2b
 213 acs_CopyLeft:
 214         testq    $1,%r8               # check for trailing word
 215         jz       1f
 216         movw     -2(%rdi,%r8,2),%cx   # copy trailing word
 217         movw     %cx,-2(%rsi,%r8,2)
 218 1:      testq    $2,%r8               # check for trailing dword
 219         jz       4f
 220         movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
 221         movl     %ecx,(%rsi,%rdx,8)
 222         jmp      4f
 223 2:      movq     -8(%rdi,%rdx,8),%rcx
 224         movq     %rcx,-8(%rsi,%rdx,8)
 225         subq     $1,%rdx
 226         jnz      2b
 227         ret
 228         .p2align 4,,15
 229 3:      movq     24(%rdi,%rdx,8),%rcx
 230         movq     %rcx,24(%rsi,%rdx,8)
 231         movq     16(%rdi,%rdx,8),%rcx
 232         movq     %rcx,16(%rsi,%rdx,8)
 233         movq     8(%rdi,%rdx,8),%rcx
 234         movq     %rcx,8(%rsi,%rdx,8)
 235         movq     (%rdi,%rdx,8),%rcx
 236         movq     %rcx,(%rsi,%rdx,8)
 237 4:      subq     $4,%rdx
 238         jge      3b
 239         addq     $4,%rdx
 240         jg       2b
 241         ret
 242 
 243         # Support for void Copy::arrayof_conjoint_jints(jint* from,
 244         #                                               jint* to,
 245         #                                               size_t count)
 246         # Equivalent to
 247         #   conjoint_jints_atomic
 248         #
 249         # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 250         # the hardware handle it.  The two dwords within qwords that span
 251         # cache line boundaries will still be loaded and stored atomically.
 252         #
 253         # rdi - from
 254         # rsi - to
 255         # rdx - count, treated as ssize_t
 256         #
 257         .p2align 4,,15
 258         ELF_TYPE(_Copy_arrayof_conjoint_jints,@function)
 259         ELF_TYPE(_Copy_conjoint_jints_atomic,@function)
 260 SYMBOL(_Copy_arrayof_conjoint_jints):
 261 SYMBOL(_Copy_conjoint_jints_atomic):
 262         movq     %rdx,%r8             # dword count
 263         shrq     %rdx                 # qword count
 264         cmpq     %rdi,%rsi
 265         leaq     -4(%rdi,%r8,4),%rax  # from + dcount*4 - 4
 266         jbe      aci_CopyRight
 267         cmpq     %rax,%rsi
 268         jbe      aci_CopyLeft 
 269 aci_CopyRight:
 270         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
 271         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
 272         negq     %rdx
 273         jmp      5f
 274         .p2align 4,,15
 275 1:      movq     8(%rax,%rdx,8),%rsi
 276         movq     %rsi,8(%rcx,%rdx,8)
 277         addq     $1,%rdx
 278         jnz       1b
 279 2:      testq    $1,%r8               # check for trailing dword
 280         jz       3f
 281         movl     8(%rax),%esi         # copy trailing dword
 282         movl     %esi,8(%rcx)
 283 3:      ret
 284         .p2align 4,,15
 285 4:      movq     -24(%rax,%rdx,8),%rsi
 286         movq     %rsi,-24(%rcx,%rdx,8)
 287         movq     -16(%rax,%rdx,8),%rsi
 288         movq     %rsi,-16(%rcx,%rdx,8)
 289         movq     -8(%rax,%rdx,8),%rsi
 290         movq     %rsi,-8(%rcx,%rdx,8)
 291         movq     (%rax,%rdx,8),%rsi
 292         movq     %rsi,(%rcx,%rdx,8)
 293 5:      addq     $4,%rdx
 294         jle      4b
 295         subq     $4,%rdx
 296         jl       1b
 297         jmp      2b
 298 aci_CopyLeft:
 299         testq    $1,%r8               # check for trailing dword
 300         jz       3f
 301         movl     -4(%rdi,%r8,4),%ecx  # copy trailing dword
 302         movl     %ecx,-4(%rsi,%r8,4)
 303         jmp      3f
 304 1:      movq     -8(%rdi,%rdx,8),%rcx
 305         movq     %rcx,-8(%rsi,%rdx,8)
 306         subq     $1,%rdx
 307         jnz      1b
 308         ret
 309         .p2align 4,,15
 310 2:      movq     24(%rdi,%rdx,8),%rcx
 311         movq     %rcx,24(%rsi,%rdx,8)
 312         movq     16(%rdi,%rdx,8),%rcx
 313         movq     %rcx,16(%rsi,%rdx,8)
 314         movq     8(%rdi,%rdx,8),%rcx
 315         movq     %rcx,8(%rsi,%rdx,8)
 316         movq     (%rdi,%rdx,8),%rcx
 317         movq     %rcx,(%rsi,%rdx,8)
 318 3:      subq     $4,%rdx
 319         jge      2b
 320         addq     $4,%rdx
 321         jg       1b
 322         ret
 323 
 324         # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
 325         #                                                jlong* to,
 326         #                                                size_t count)
 327         # Equivalent to
 328         #   conjoint_jlongs_atomic
 329         #   arrayof_conjoint_oops
 330         #   conjoint_oops_atomic
 331         #
 332         # rdi - from
 333         # rsi - to
 334         # rdx - count, treated as ssize_t
 335         #
 336         .p2align 4,,15
 337         ELF_TYPE(_Copy_arrayof_conjoint_jlongs,@function)
 338         ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function)
 339 SYMBOL(_Copy_arrayof_conjoint_jlongs):
 340 SYMBOL(_Copy_conjoint_jlongs_atomic):
 341         cmpq     %rdi,%rsi
 342         leaq     -8(%rdi,%rdx,8),%rax # from + count*8 - 8
 343         jbe      acl_CopyRight
 344         cmpq     %rax,%rsi
 345         jbe      acl_CopyLeft 
 346 acl_CopyRight:
 347         leaq     -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
 348         negq     %rdx
 349         jmp      3f
 350 1:      movq     8(%rax,%rdx,8),%rsi
 351         movq     %rsi,8(%rcx,%rdx,8)
 352         addq     $1,%rdx
 353         jnz      1b
 354         ret
 355         .p2align 4,,15
 356 2:      movq     -24(%rax,%rdx,8),%rsi
 357         movq     %rsi,-24(%rcx,%rdx,8)
 358         movq     -16(%rax,%rdx,8),%rsi
 359         movq     %rsi,-16(%rcx,%rdx,8)
 360         movq     -8(%rax,%rdx,8),%rsi
 361         movq     %rsi,-8(%rcx,%rdx,8)
 362         movq     (%rax,%rdx,8),%rsi
 363         movq     %rsi,(%rcx,%rdx,8)
 364 3:      addq     $4,%rdx
 365         jle      2b
 366         subq     $4,%rdx
 367         jl       1b
 368         ret
 369 4:      movq     -8(%rdi,%rdx,8),%rcx
 370         movq     %rcx,-8(%rsi,%rdx,8)
 371         subq     $1,%rdx
 372         jnz      4b
 373         ret
 374         .p2align 4,,15
 375 5:      movq     24(%rdi,%rdx,8),%rcx
 376         movq     %rcx,24(%rsi,%rdx,8)
 377         movq     16(%rdi,%rdx,8),%rcx
 378         movq     %rcx,16(%rsi,%rdx,8)
 379         movq     8(%rdi,%rdx,8),%rcx
 380         movq     %rcx,8(%rsi,%rdx,8)
 381         movq     (%rdi,%rdx,8),%rcx
 382         movq     %rcx,(%rsi,%rdx,8)
 383 acl_CopyLeft:
 384         subq     $4,%rdx
 385         jge      5b
 386         addq     $4,%rdx
 387         jg       4b
 388         ret