1 /
   2 / Copyright (c) 2004, 2005, Oracle and/or its affiliates. All rights reserved.
   3 / DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 /
   5 / This code is free software; you can redistribute it and/or modify it
   6 / under the terms of the GNU General Public License version 2 only, as
   7 / published by the Free Software Foundation.
   8 /
   9 / This code is distributed in the hope that it will be useful, but WITHOUT
  10 / ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 / FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 / version 2 for more details (a copy is included in the LICENSE file that
  13 / accompanied this code).
  14 /
  15 / You should have received a copy of the GNU General Public License version
  16 / 2 along with this work; if not, write to the Free Software Foundation,
  17 / Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 /
  19 / Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 / or visit www.oracle.com if you need additional information or have any
  21 / questions.
  22 /
  23 
  24         .globl fs_load
  25         .globl fs_thread
  26 
  27         // NOTE WELL!  The _Copy functions are called directly
  28         // from server-compiler-generated code via CallLeafNoFP,
  29         // which means that they *must* either not use floating
  30         // point or use it in the same manner as does the server
  31         // compiler.
  32 
  33         .globl _Copy_arrayof_conjoint_bytes
  34         .globl _Copy_conjoint_jshorts_atomic
  35         .globl _Copy_arrayof_conjoint_jshorts
  36         .globl _Copy_conjoint_jints_atomic
  37         .globl _Copy_arrayof_conjoint_jints
  38         .globl _Copy_conjoint_jlongs_atomic
  39         .globl _Copy_arrayof_conjoint_jlongs
  40 
  41         .section .text,"ax"
  42 
  43         / Fast thread accessors, used by threadLS_solaris_amd64.cpp
  44         .align   16
  45 fs_load:
  46         movq %fs:(%rdi),%rax
  47         ret
  48 
  49         .align   16
  50 fs_thread:
  51         movq %fs:0x0,%rax
  52         ret
  53 
  54         .globl SafeFetch32, Fetch32PFI, Fetch32Resume
  55         .align  16
  56         // Prototype: int SafeFetch32 (int * Adr, int ErrValue) 
  57 SafeFetch32:
  58         movl    %esi, %eax
  59 Fetch32PFI:
  60         movl    (%rdi), %eax
  61 Fetch32Resume:
  62         ret
  63 
  64         .globl SafeFetchN, FetchNPFI, FetchNResume
  65         .align  16
  66         // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue) 
  67 SafeFetchN:
  68         movq    %rsi, %rax
  69 FetchNPFI:
  70         movq    (%rdi), %rax
  71 FetchNResume:
  72         ret
  73 
  74         .globl  SpinPause
  75         .align  16
  76 SpinPause:
  77         rep
  78         nop
  79         movq    $1, %rax
  80         ret
  81         
  82 
  83         / Support for void Copy::arrayof_conjoint_bytes(void* from,
  84         /                                               void* to,
  85         /                                               size_t count)
  86         / rdi - from
  87         / rsi - to
  88         / rdx - count, treated as ssize_t
  89         /
  90         .align   16
  91 _Copy_arrayof_conjoint_bytes:
  92         movq     %rdx,%r8             / byte count
  93         shrq     $3,%rdx              / qword count
  94         cmpq     %rdi,%rsi
  95         leaq     -1(%rdi,%r8,1),%rax  / from + bcount*1 - 1
  96         jbe      acb_CopyRight
  97         cmpq     %rax,%rsi
  98         jbe      acb_CopyLeft 
  99 acb_CopyRight:
 100         leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
 101         leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
 102         negq     %rdx
 103         jmp      7f
 104         .align   16
 105 1:      movq     8(%rax,%rdx,8),%rsi
 106         movq     %rsi,8(%rcx,%rdx,8)
 107         addq     $1,%rdx
 108         jnz      1b
 109 2:      testq    $4,%r8               / check for trailing dword
 110         jz       3f
 111         movl     8(%rax),%esi         / copy trailing dword
 112         movl     %esi,8(%rcx)
 113         addq     $4,%rax
 114         addq     $4,%rcx              / original %rsi is trashed, so we
 115                                       /  can't use it as a base register
 116 3:      testq    $2,%r8               / check for trailing word
 117         jz       4f
 118         movw     8(%rax),%si          / copy trailing word
 119         movw     %si,8(%rcx)
 120         addq     $2,%rcx
 121 4:      testq    $1,%r8               / check for trailing byte
 122         jz       5f
 123         movb     -1(%rdi,%r8,1),%al   / copy trailing byte
 124         movb     %al,8(%rcx)
 125 5:      ret
 126         .align   16
 127 6:      movq     -24(%rax,%rdx,8),%rsi
 128         movq     %rsi,-24(%rcx,%rdx,8)
 129         movq     -16(%rax,%rdx,8),%rsi
 130         movq     %rsi,-16(%rcx,%rdx,8)
 131         movq     -8(%rax,%rdx,8),%rsi
 132         movq     %rsi,-8(%rcx,%rdx,8)
 133         movq     (%rax,%rdx,8),%rsi
 134         movq     %rsi,(%rcx,%rdx,8)
 135 7:      addq     $4,%rdx
 136         jle      6b
 137         subq     $4,%rdx
 138         jl       1b
 139         jmp      2b
 140 acb_CopyLeft:
 141         testq    $1,%r8               / check for trailing byte
 142         jz       1f
 143         movb     -1(%rdi,%r8,1),%cl   / copy trailing byte
 144         movb     %cl,-1(%rsi,%r8,1)
 145         subq     $1,%r8               / adjust for possible trailing word
 146 1:      testq    $2,%r8               / check for trailing word
 147         jz       2f
 148         movw     -2(%rdi,%r8,1),%cx   / copy trailing word
 149         movw     %cx,-2(%rsi,%r8,1)
 150 2:      testq    $4,%r8               / check for trailing dword
 151         jz       5f
 152         movl     (%rdi,%rdx,8),%ecx   / copy trailing dword
 153         movl     %ecx,(%rsi,%rdx,8)
 154         jmp      5f
 155         .align   16
 156 3:      movq     -8(%rdi,%rdx,8),%rcx
 157         movq     %rcx,-8(%rsi,%rdx,8)
 158         subq     $1,%rdx
 159         jnz      3b
 160         ret
 161         .align   16
 162 4:      movq     24(%rdi,%rdx,8),%rcx
 163         movq     %rcx,24(%rsi,%rdx,8)
 164         movq     16(%rdi,%rdx,8),%rcx
 165         movq     %rcx,16(%rsi,%rdx,8)
 166         movq     8(%rdi,%rdx,8),%rcx
 167         movq     %rcx,8(%rsi,%rdx,8)
 168         movq     (%rdi,%rdx,8),%rcx
 169         movq     %rcx,(%rsi,%rdx,8)
 170 5:      subq     $4,%rdx
 171         jge      4b
 172         addq     $4,%rdx
 173         jg       3b
 174         ret
 175 
 176         / Support for void Copy::arrayof_conjoint_jshorts(void* from,
 177         /                                                 void* to,
 178         /                                                 size_t count)
 179         / Equivalent to
 180         /   conjoint_jshorts_atomic
 181         /
 182         / If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
 183         / let the hardware handle it.  The tow or four words within dwords
 184         / or qwords that span cache line boundaries will still be loaded
 185         / and stored atomically.
 186         /
 187         / rdi - from
 188         / rsi - to
 189         / rdx - count, treated as ssize_t
 190         /
 191         .align   16
 192 _Copy_arrayof_conjoint_jshorts:
 193 _Copy_conjoint_jshorts_atomic:
 194         movq     %rdx,%r8             / word count
 195         shrq     $2,%rdx              / qword count
 196         cmpq     %rdi,%rsi
 197         leaq     -2(%rdi,%r8,2),%rax  / from + wcount*2 - 2
 198         jbe      acs_CopyRight
 199         cmpq     %rax,%rsi
 200         jbe      acs_CopyLeft 
 201 acs_CopyRight:
 202         leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
 203         leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
 204         negq     %rdx
 205         jmp      6f
 206 1:      movq     8(%rax,%rdx,8),%rsi
 207         movq     %rsi,8(%rcx,%rdx,8)
 208         addq     $1,%rdx
 209         jnz      1b
 210 2:      testq    $2,%r8               / check for trailing dword
 211         jz       3f
 212         movl     8(%rax),%esi         / copy trailing dword
 213         movl     %esi,8(%rcx)
 214         addq     $4,%rcx              / original %rsi is trashed, so we
 215                                       /  can't use it as a base register
 216 3:      testq    $1,%r8               / check for trailing word
 217         jz       4f
 218         movw     -2(%rdi,%r8,2),%si   / copy trailing word
 219         movw     %si,8(%rcx)
 220 4:      ret
 221         .align   16
 222 5:      movq     -24(%rax,%rdx,8),%rsi
 223         movq     %rsi,-24(%rcx,%rdx,8)
 224         movq     -16(%rax,%rdx,8),%rsi
 225         movq     %rsi,-16(%rcx,%rdx,8)
 226         movq     -8(%rax,%rdx,8),%rsi
 227         movq     %rsi,-8(%rcx,%rdx,8)
 228         movq     (%rax,%rdx,8),%rsi
 229         movq     %rsi,(%rcx,%rdx,8)
 230 6:      addq     $4,%rdx
 231         jle      5b
 232         subq     $4,%rdx
 233         jl       1b
 234         jmp      2b
 235 acs_CopyLeft:
 236         testq    $1,%r8               / check for trailing word
 237         jz       1f
 238         movw     -2(%rdi,%r8,2),%cx   / copy trailing word
 239         movw     %cx,-2(%rsi,%r8,2)
 240 1:      testq    $2,%r8               / check for trailing dword
 241         jz       4f
 242         movl     (%rdi,%rdx,8),%ecx   / copy trailing dword
 243         movl     %ecx,(%rsi,%rdx,8)
 244         jmp      4f
 245 2:      movq     -8(%rdi,%rdx,8),%rcx
 246         movq     %rcx,-8(%rsi,%rdx,8)
 247         subq     $1,%rdx
 248         jnz      2b
 249         ret
 250         .align   16
 251 3:      movq     24(%rdi,%rdx,8),%rcx
 252         movq     %rcx,24(%rsi,%rdx,8)
 253         movq     16(%rdi,%rdx,8),%rcx
 254         movq     %rcx,16(%rsi,%rdx,8)
 255         movq     8(%rdi,%rdx,8),%rcx
 256         movq     %rcx,8(%rsi,%rdx,8)
 257         movq     (%rdi,%rdx,8),%rcx
 258         movq     %rcx,(%rsi,%rdx,8)
 259 4:      subq     $4,%rdx
 260         jge      3b
 261         addq     $4,%rdx
 262         jg       2b
 263         ret
 264 
 265         / Support for void Copy::arrayof_conjoint_jints(jint* from,
 266         /                                               jint* to,
 267         /                                               size_t count)
 268         / Equivalent to
 269         /   conjoint_jints_atomic
 270         /
 271         / If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 272         / the hardware handle it.  The two dwords within qwords that span
 273         / cache line boundaries will still be loaded and stored atomically.
 274         /
 275         / rdi - from
 276         / rsi - to
 277         / rdx - count, treated as ssize_t
 278         /
 279         .align   16
 280 _Copy_arrayof_conjoint_jints:
 281 _Copy_conjoint_jints_atomic:
 282         movq     %rdx,%r8             / dword count
 283         shrq     %rdx                 / qword count
 284         cmpq     %rdi,%rsi
 285         leaq     -4(%rdi,%r8,4),%rax  / from + dcount*4 - 4
 286         jbe      aci_CopyRight
 287         cmpq     %rax,%rsi
 288         jbe      aci_CopyLeft 
 289 aci_CopyRight:
 290         leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
 291         leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
 292         negq     %rdx
 293         jmp      5f
 294         .align   16
 295 1:      movq     8(%rax,%rdx,8),%rsi
 296         movq     %rsi,8(%rcx,%rdx,8)
 297         addq     $1,%rdx
 298         jnz       1b
 299 2:      testq    $1,%r8               / check for trailing dword
 300         jz       3f
 301         movl     8(%rax),%esi         / copy trailing dword
 302         movl     %esi,8(%rcx)
 303 3:      ret
 304         .align   16
 305 4:      movq     -24(%rax,%rdx,8),%rsi
 306         movq     %rsi,-24(%rcx,%rdx,8)
 307         movq     -16(%rax,%rdx,8),%rsi
 308         movq     %rsi,-16(%rcx,%rdx,8)
 309         movq     -8(%rax,%rdx,8),%rsi
 310         movq     %rsi,-8(%rcx,%rdx,8)
 311         movq     (%rax,%rdx,8),%rsi
 312         movq     %rsi,(%rcx,%rdx,8)
 313 5:      addq     $4,%rdx
 314         jle      4b
 315         subq     $4,%rdx
 316         jl       1b
 317         jmp      2b
 318 aci_CopyLeft:
 319         testq    $1,%r8               / check for trailing dword
 320         jz       3f
 321         movl     -4(%rdi,%r8,4),%ecx  / copy trailing dword
 322         movl     %ecx,-4(%rsi,%r8,4)
 323         jmp      3f
 324 1:      movq     -8(%rdi,%rdx,8),%rcx
 325         movq     %rcx,-8(%rsi,%rdx,8)
 326         subq     $1,%rdx
 327         jnz      1b
 328         ret
 329         .align   16
 330 2:      movq     24(%rdi,%rdx,8),%rcx
 331         movq     %rcx,24(%rsi,%rdx,8)
 332         movq     16(%rdi,%rdx,8),%rcx
 333         movq     %rcx,16(%rsi,%rdx,8)
 334         movq     8(%rdi,%rdx,8),%rcx
 335         movq     %rcx,8(%rsi,%rdx,8)
 336         movq     (%rdi,%rdx,8),%rcx
 337         movq     %rcx,(%rsi,%rdx,8)
 338 3:      subq     $4,%rdx
 339         jge      2b
 340         addq     $4,%rdx
 341         jg       1b
 342         ret
 343         
 344         / Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
 345         /                                                jlong* to,
 346         /                                                size_t count)
 347         / Equivalent to
 348         /   conjoint_jlongs_atomic
 349         /   arrayof_conjoint_oops
 350         /   conjoint_oops_atomic
 351         /
 352         / rdi - from
 353         / rsi - to
 354         / rdx - count, treated as ssize_t
 355         /
 356         .align   16
 357 _Copy_arrayof_conjoint_jlongs:
 358 _Copy_conjoint_jlongs_atomic:
 359         cmpq     %rdi,%rsi
 360         leaq     -8(%rdi,%rdx,8),%rax / from + count*8 - 8
 361         jbe      acl_CopyRight
 362         cmpq     %rax,%rsi
 363         jbe      acl_CopyLeft 
 364 acl_CopyRight:
 365         leaq     -8(%rsi,%rdx,8),%rcx / to + count*8 - 8
 366         negq     %rdx
 367         jmp      3f
 368 1:      movq     8(%rax,%rdx,8),%rsi
 369         movq     %rsi,8(%rcx,%rdx,8)
 370         addq     $1,%rdx
 371         jnz      1b
 372         ret
 373         .align   16
 374 2:      movq     -24(%rax,%rdx,8),%rsi
 375         movq     %rsi,-24(%rcx,%rdx,8)
 376         movq     -16(%rax,%rdx,8),%rsi
 377         movq     %rsi,-16(%rcx,%rdx,8)
 378         movq     -8(%rax,%rdx,8),%rsi
 379         movq     %rsi,-8(%rcx,%rdx,8)
 380         movq     (%rax,%rdx,8),%rsi
 381         movq     %rsi,(%rcx,%rdx,8)
 382 3:      addq     $4,%rdx
 383         jle      2b
 384         subq     $4,%rdx
 385         jl       1b
 386         ret
 387 4:      movq     -8(%rdi,%rdx,8),%rcx
 388         movq     %rcx,-8(%rsi,%rdx,8)
 389         subq     $1,%rdx
 390         jnz      4b
 391         ret
 392         .align   16
 393 5:      movq     24(%rdi,%rdx,8),%rcx
 394         movq     %rcx,24(%rsi,%rdx,8)
 395         movq     16(%rdi,%rdx,8),%rcx
 396         movq     %rcx,16(%rsi,%rdx,8)
 397         movq     8(%rdi,%rdx,8),%rcx
 398         movq     %rcx,8(%rsi,%rdx,8)
 399         movq     (%rdi,%rdx,8),%rcx
 400         movq     %rcx,(%rsi,%rdx,8)
 401 acl_CopyLeft:
 402         subq     $4,%rdx
 403         jge      5b
 404         addq     $4,%rdx
 405         jg       4b
 406         ret