1 /
   2 / Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
   3 / DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 /
   5 / This code is free software; you can redistribute it and/or modify it
   6 / under the terms of the GNU General Public License version 2 only, as
   7 / published by the Free Software Foundation.
   8 /
   9 / This code is distributed in the hope that it will be useful, but WITHOUT
  10 / ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 / FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 / version 2 for more details (a copy is included in the LICENSE file that
  13 / accompanied this code).
  14 /
  15 / You should have received a copy of the GNU General Public License version
  16 / 2 along with this work; if not, write to the Free Software Foundation,
  17 / Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 /
  19 / Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 / or visit www.oracle.com if you need additional information or have any
  21 / questions.
  22 /
  23 
  24         .globl fs_load
  25         .globl fs_thread
  26 
  27         // NOTE WELL!  The _Copy functions are called directly
  28         // from server-compiler-generated code via CallLeafNoFP,
  29         // which means that they *must* either not use floating
  30         // point or use it in the same manner as does the server
  31         // compiler.
  32 
  33         .globl _Copy_arrayof_conjoint_bytes
  34         .globl _Copy_conjoint_jshorts_atomic
  35         .globl _Copy_arrayof_conjoint_jshorts
  36         .globl _Copy_conjoint_jints_atomic
  37         .globl _Copy_arrayof_conjoint_jints
  38         .globl _Copy_conjoint_jlongs_atomic
  39         .globl _Copy_arrayof_conjoint_jlongs
  40 
  41         .section .text,"ax"
  42 
  43         / Fast thread accessors, used by threadLS_solaris_amd64.cpp
  44         .align   16
  45 fs_load:
  46         movq %fs:(%rdi),%rax
  47         ret
  48 
  49         .align   16
  50 fs_thread:
  51         movq %fs:0x0,%rax
  52         ret
  53 
  54         .globl  SpinPause
  55         .align  16
  56 SpinPause:
  57         rep
  58         nop
  59         movq    $1, %rax
  60         ret
  61 
  62 
  63         / Support for void Copy::arrayof_conjoint_bytes(void* from,
  64         /                                               void* to,
  65         /                                               size_t count)
  66         / rdi - from
  67         / rsi - to
  68         / rdx - count, treated as ssize_t
  69         /
  70         .align   16
  71 _Copy_arrayof_conjoint_bytes:
  72         movq     %rdx,%r8             / byte count
  73         shrq     $3,%rdx              / qword count
  74         cmpq     %rdi,%rsi
  75         leaq     -1(%rdi,%r8,1),%rax  / from + bcount*1 - 1
  76         jbe      acb_CopyRight
  77         cmpq     %rax,%rsi
  78         jbe      acb_CopyLeft 
  79 acb_CopyRight:
  80         leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
  81         leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
  82         negq     %rdx
  83         jmp      7f
  84         .align   16
  85 1:      movq     8(%rax,%rdx,8),%rsi
  86         movq     %rsi,8(%rcx,%rdx,8)
  87         addq     $1,%rdx
  88         jnz      1b
  89 2:      testq    $4,%r8               / check for trailing dword
  90         jz       3f
  91         movl     8(%rax),%esi         / copy trailing dword
  92         movl     %esi,8(%rcx)
  93         addq     $4,%rax
  94         addq     $4,%rcx              / original %rsi is trashed, so we
  95                                       /  can't use it as a base register
  96 3:      testq    $2,%r8               / check for trailing word
  97         jz       4f
  98         movw     8(%rax),%si          / copy trailing word
  99         movw     %si,8(%rcx)
 100         addq     $2,%rcx
 101 4:      testq    $1,%r8               / check for trailing byte
 102         jz       5f
 103         movb     -1(%rdi,%r8,1),%al   / copy trailing byte
 104         movb     %al,8(%rcx)
 105 5:      ret
 106         .align   16
 107 6:      movq     -24(%rax,%rdx,8),%rsi
 108         movq     %rsi,-24(%rcx,%rdx,8)
 109         movq     -16(%rax,%rdx,8),%rsi
 110         movq     %rsi,-16(%rcx,%rdx,8)
 111         movq     -8(%rax,%rdx,8),%rsi
 112         movq     %rsi,-8(%rcx,%rdx,8)
 113         movq     (%rax,%rdx,8),%rsi
 114         movq     %rsi,(%rcx,%rdx,8)
 115 7:      addq     $4,%rdx
 116         jle      6b
 117         subq     $4,%rdx
 118         jl       1b
 119         jmp      2b
 120 acb_CopyLeft:
 121         testq    $1,%r8               / check for trailing byte
 122         jz       1f
 123         movb     -1(%rdi,%r8,1),%cl   / copy trailing byte
 124         movb     %cl,-1(%rsi,%r8,1)
 125         subq     $1,%r8               / adjust for possible trailing word
 126 1:      testq    $2,%r8               / check for trailing word
 127         jz       2f
 128         movw     -2(%rdi,%r8,1),%cx   / copy trailing word
 129         movw     %cx,-2(%rsi,%r8,1)
 130 2:      testq    $4,%r8               / check for trailing dword
 131         jz       5f
 132         movl     (%rdi,%rdx,8),%ecx   / copy trailing dword
 133         movl     %ecx,(%rsi,%rdx,8)
 134         jmp      5f
 135         .align   16
 136 3:      movq     -8(%rdi,%rdx,8),%rcx
 137         movq     %rcx,-8(%rsi,%rdx,8)
 138         subq     $1,%rdx
 139         jnz      3b
 140         ret
 141         .align   16
 142 4:      movq     24(%rdi,%rdx,8),%rcx
 143         movq     %rcx,24(%rsi,%rdx,8)
 144         movq     16(%rdi,%rdx,8),%rcx
 145         movq     %rcx,16(%rsi,%rdx,8)
 146         movq     8(%rdi,%rdx,8),%rcx
 147         movq     %rcx,8(%rsi,%rdx,8)
 148         movq     (%rdi,%rdx,8),%rcx
 149         movq     %rcx,(%rsi,%rdx,8)
 150 5:      subq     $4,%rdx
 151         jge      4b
 152         addq     $4,%rdx
 153         jg       3b
 154         ret
 155 
 156         / Support for void Copy::arrayof_conjoint_jshorts(void* from,
 157         /                                                 void* to,
 158         /                                                 size_t count)
 159         / Equivalent to
 160         /   conjoint_jshorts_atomic
 161         /
 162         / If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
 163         / let the hardware handle it.  The tow or four words within dwords
 164         / or qwords that span cache line boundaries will still be loaded
 165         / and stored atomically.
 166         /
 167         / rdi - from
 168         / rsi - to
 169         / rdx - count, treated as ssize_t
 170         /
 171         .align   16
 172 _Copy_arrayof_conjoint_jshorts:
 173 _Copy_conjoint_jshorts_atomic:
 174         movq     %rdx,%r8             / word count
 175         shrq     $2,%rdx              / qword count
 176         cmpq     %rdi,%rsi
 177         leaq     -2(%rdi,%r8,2),%rax  / from + wcount*2 - 2
 178         jbe      acs_CopyRight
 179         cmpq     %rax,%rsi
 180         jbe      acs_CopyLeft 
 181 acs_CopyRight:
 182         leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
 183         leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
 184         negq     %rdx
 185         jmp      6f
 186 1:      movq     8(%rax,%rdx,8),%rsi
 187         movq     %rsi,8(%rcx,%rdx,8)
 188         addq     $1,%rdx
 189         jnz      1b
 190 2:      testq    $2,%r8               / check for trailing dword
 191         jz       3f
 192         movl     8(%rax),%esi         / copy trailing dword
 193         movl     %esi,8(%rcx)
 194         addq     $4,%rcx              / original %rsi is trashed, so we
 195                                       /  can't use it as a base register
 196 3:      testq    $1,%r8               / check for trailing word
 197         jz       4f
 198         movw     -2(%rdi,%r8,2),%si   / copy trailing word
 199         movw     %si,8(%rcx)
 200 4:      ret
 201         .align   16
 202 5:      movq     -24(%rax,%rdx,8),%rsi
 203         movq     %rsi,-24(%rcx,%rdx,8)
 204         movq     -16(%rax,%rdx,8),%rsi
 205         movq     %rsi,-16(%rcx,%rdx,8)
 206         movq     -8(%rax,%rdx,8),%rsi
 207         movq     %rsi,-8(%rcx,%rdx,8)
 208         movq     (%rax,%rdx,8),%rsi
 209         movq     %rsi,(%rcx,%rdx,8)
 210 6:      addq     $4,%rdx
 211         jle      5b
 212         subq     $4,%rdx
 213         jl       1b
 214         jmp      2b
 215 acs_CopyLeft:
 216         testq    $1,%r8               / check for trailing word
 217         jz       1f
 218         movw     -2(%rdi,%r8,2),%cx   / copy trailing word
 219         movw     %cx,-2(%rsi,%r8,2)
 220 1:      testq    $2,%r8               / check for trailing dword
 221         jz       4f
 222         movl     (%rdi,%rdx,8),%ecx   / copy trailing dword
 223         movl     %ecx,(%rsi,%rdx,8)
 224         jmp      4f
 225 2:      movq     -8(%rdi,%rdx,8),%rcx
 226         movq     %rcx,-8(%rsi,%rdx,8)
 227         subq     $1,%rdx
 228         jnz      2b
 229         ret
 230         .align   16
 231 3:      movq     24(%rdi,%rdx,8),%rcx
 232         movq     %rcx,24(%rsi,%rdx,8)
 233         movq     16(%rdi,%rdx,8),%rcx
 234         movq     %rcx,16(%rsi,%rdx,8)
 235         movq     8(%rdi,%rdx,8),%rcx
 236         movq     %rcx,8(%rsi,%rdx,8)
 237         movq     (%rdi,%rdx,8),%rcx
 238         movq     %rcx,(%rsi,%rdx,8)
 239 4:      subq     $4,%rdx
 240         jge      3b
 241         addq     $4,%rdx
 242         jg       2b
 243         ret
 244 
 245         / Support for void Copy::arrayof_conjoint_jints(jint* from,
 246         /                                               jint* to,
 247         /                                               size_t count)
 248         / Equivalent to
 249         /   conjoint_jints_atomic
 250         /
 251         / If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 252         / the hardware handle it.  The two dwords within qwords that span
 253         / cache line boundaries will still be loaded and stored atomically.
 254         /
 255         / rdi - from
 256         / rsi - to
 257         / rdx - count, treated as ssize_t
 258         /
 259         .align   16
 260 _Copy_arrayof_conjoint_jints:
 261 _Copy_conjoint_jints_atomic:
 262         movq     %rdx,%r8             / dword count
 263         shrq     %rdx                 / qword count
 264         cmpq     %rdi,%rsi
 265         leaq     -4(%rdi,%r8,4),%rax  / from + dcount*4 - 4
 266         jbe      aci_CopyRight
 267         cmpq     %rax,%rsi
 268         jbe      aci_CopyLeft 
 269 aci_CopyRight:
 270         leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
 271         leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
 272         negq     %rdx
 273         jmp      5f
 274         .align   16
 275 1:      movq     8(%rax,%rdx,8),%rsi
 276         movq     %rsi,8(%rcx,%rdx,8)
 277         addq     $1,%rdx
 278         jnz       1b
 279 2:      testq    $1,%r8               / check for trailing dword
 280         jz       3f
 281         movl     8(%rax),%esi         / copy trailing dword
 282         movl     %esi,8(%rcx)
 283 3:      ret
 284         .align   16
 285 4:      movq     -24(%rax,%rdx,8),%rsi
 286         movq     %rsi,-24(%rcx,%rdx,8)
 287         movq     -16(%rax,%rdx,8),%rsi
 288         movq     %rsi,-16(%rcx,%rdx,8)
 289         movq     -8(%rax,%rdx,8),%rsi
 290         movq     %rsi,-8(%rcx,%rdx,8)
 291         movq     (%rax,%rdx,8),%rsi
 292         movq     %rsi,(%rcx,%rdx,8)
 293 5:      addq     $4,%rdx
 294         jle      4b
 295         subq     $4,%rdx
 296         jl       1b
 297         jmp      2b
 298 aci_CopyLeft:
 299         testq    $1,%r8               / check for trailing dword
 300         jz       3f
 301         movl     -4(%rdi,%r8,4),%ecx  / copy trailing dword
 302         movl     %ecx,-4(%rsi,%r8,4)
 303         jmp      3f
 304 1:      movq     -8(%rdi,%rdx,8),%rcx
 305         movq     %rcx,-8(%rsi,%rdx,8)
 306         subq     $1,%rdx
 307         jnz      1b
 308         ret
 309         .align   16
 310 2:      movq     24(%rdi,%rdx,8),%rcx
 311         movq     %rcx,24(%rsi,%rdx,8)
 312         movq     16(%rdi,%rdx,8),%rcx
 313         movq     %rcx,16(%rsi,%rdx,8)
 314         movq     8(%rdi,%rdx,8),%rcx
 315         movq     %rcx,8(%rsi,%rdx,8)
 316         movq     (%rdi,%rdx,8),%rcx
 317         movq     %rcx,(%rsi,%rdx,8)
 318 3:      subq     $4,%rdx
 319         jge      2b
 320         addq     $4,%rdx
 321         jg       1b
 322         ret
 323 
 324         / Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
 325         /                                                jlong* to,
 326         /                                                size_t count)
 327         / Equivalent to
 328         /   conjoint_jlongs_atomic
 329         /   arrayof_conjoint_oops
 330         /   conjoint_oops_atomic
 331         /
 332         / rdi - from
 333         / rsi - to
 334         / rdx - count, treated as ssize_t
 335         /
 336         .align   16
 337 _Copy_arrayof_conjoint_jlongs:
 338 _Copy_conjoint_jlongs_atomic:
 339         cmpq     %rdi,%rsi
 340         leaq     -8(%rdi,%rdx,8),%rax / from + count*8 - 8
 341         jbe      acl_CopyRight
 342         cmpq     %rax,%rsi
 343         jbe      acl_CopyLeft 
 344 acl_CopyRight:
 345         leaq     -8(%rsi,%rdx,8),%rcx / to + count*8 - 8
 346         negq     %rdx
 347         jmp      3f
 348 1:      movq     8(%rax,%rdx,8),%rsi
 349         movq     %rsi,8(%rcx,%rdx,8)
 350         addq     $1,%rdx
 351         jnz      1b
 352         ret
 353         .align   16
 354 2:      movq     -24(%rax,%rdx,8),%rsi
 355         movq     %rsi,-24(%rcx,%rdx,8)
 356         movq     -16(%rax,%rdx,8),%rsi
 357         movq     %rsi,-16(%rcx,%rdx,8)
 358         movq     -8(%rax,%rdx,8),%rsi
 359         movq     %rsi,-8(%rcx,%rdx,8)
 360         movq     (%rax,%rdx,8),%rsi
 361         movq     %rsi,(%rcx,%rdx,8)
 362 3:      addq     $4,%rdx
 363         jle      2b
 364         subq     $4,%rdx
 365         jl       1b
 366         ret
 367 4:      movq     -8(%rdi,%rdx,8),%rcx
 368         movq     %rcx,-8(%rsi,%rdx,8)
 369         subq     $1,%rdx
 370         jnz      4b
 371         ret
 372         .align   16
 373 5:      movq     24(%rdi,%rdx,8),%rcx
 374         movq     %rcx,24(%rsi,%rdx,8)
 375         movq     16(%rdi,%rdx,8),%rcx
 376         movq     %rcx,16(%rsi,%rdx,8)
 377         movq     8(%rdi,%rdx,8),%rcx
 378         movq     %rcx,8(%rsi,%rdx,8)
 379         movq     (%rdi,%rdx,8),%rcx
 380         movq     %rcx,(%rsi,%rdx,8)
 381 acl_CopyLeft:
 382         subq     $4,%rdx
 383         jge      5b
 384         addq     $4,%rdx
 385         jg       4b
 386         ret