1 / 2 / Copyright (c) 2004, 2005, Oracle and/or its affiliates. All rights reserved. 3 / DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 / 5 / This code is free software; you can redistribute it and/or modify it 6 / under the terms of the GNU General Public License version 2 only, as 7 / published by the Free Software Foundation. 8 / 9 / This code is distributed in the hope that it will be useful, but WITHOUT 10 / ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 / FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 / version 2 for more details (a copy is included in the LICENSE file that 13 / accompanied this code). 14 / 15 / You should have received a copy of the GNU General Public License version 16 / 2 along with this work; if not, write to the Free Software Foundation, 17 / Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 / 19 / Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 / or visit www.oracle.com if you need additional information or have any 21 / questions. 22 / 23 24 .globl fs_load 25 .globl fs_thread 26 27 // NOTE WELL! The _Copy functions are called directly 28 // from server-compiler-generated code via CallLeafNoFP, 29 // which means that they *must* either not use floating 30 // point or use it in the same manner as does the server 31 // compiler. 32 33 .globl _Copy_arrayof_conjoint_bytes 34 .globl _Copy_conjoint_jshorts_atomic 35 .globl _Copy_arrayof_conjoint_jshorts 36 .globl _Copy_conjoint_jints_atomic 37 .globl _Copy_arrayof_conjoint_jints 38 .globl _Copy_conjoint_jlongs_atomic 39 .globl _Copy_arrayof_conjoint_jlongs 40 41 .section .text,"ax" 42 43 / Fast thread accessors, used by threadLS_solaris_amd64.cpp 44 .align 16 45 fs_load: 46 movq %fs:(%rdi),%rax 47 ret 48 49 .align 16 50 fs_thread: 51 movq %fs:0x0,%rax 52 ret 53 54 .globl SafeFetch32, Fetch32PFI, Fetch32Resume 55 .align 16 56 // Prototype: int SafeFetch32 (int * Adr, int ErrValue) 57 SafeFetch32: 58 movl %esi, %eax 59 Fetch32PFI: 60 movl (%rdi), %eax 61 Fetch32Resume: 62 ret 63 64 .globl SafeFetchN, FetchNPFI, FetchNResume 65 .align 16 66 // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue) 67 SafeFetchN: 68 movq %rsi, %rax 69 FetchNPFI: 70 movq (%rdi), %rax 71 FetchNResume: 72 ret 73 74 .globl SpinPause 75 .align 16 76 SpinPause: 77 rep 78 nop 79 movq $1, %rax 80 ret 81 82 83 / Support for void Copy::arrayof_conjoint_bytes(void* from, 84 / void* to, 85 / size_t count) 86 / rdi - from 87 / rsi - to 88 / rdx - count, treated as ssize_t 89 / 90 .align 16 91 _Copy_arrayof_conjoint_bytes: 92 movq %rdx,%r8 / byte count 93 shrq $3,%rdx / qword count 94 cmpq %rdi,%rsi 95 leaq -1(%rdi,%r8,1),%rax / from + bcount*1 - 1 96 jbe acb_CopyRight 97 cmpq %rax,%rsi 98 jbe acb_CopyLeft 99 acb_CopyRight: 100 leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 101 leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 102 negq %rdx 103 jmp 7f 104 .align 16 105 1: movq 8(%rax,%rdx,8),%rsi 106 movq %rsi,8(%rcx,%rdx,8) 107 addq $1,%rdx 108 jnz 1b 109 2: testq $4,%r8 / check for trailing dword 110 jz 3f 111 movl 8(%rax),%esi / copy trailing dword 112 movl %esi,8(%rcx) 113 addq $4,%rax 114 addq $4,%rcx / original %rsi is trashed, so we 115 / can't use it as a base register 116 3: testq $2,%r8 / check for trailing word 117 jz 4f 118 movw 8(%rax),%si / copy trailing word 119 movw %si,8(%rcx) 120 addq $2,%rcx 121 4: testq $1,%r8 / check for trailing byte 122 jz 5f 123 movb -1(%rdi,%r8,1),%al / copy trailing byte 124 movb %al,8(%rcx) 125 5: ret 126 .align 16 127 6: movq -24(%rax,%rdx,8),%rsi 128 movq %rsi,-24(%rcx,%rdx,8) 129 movq -16(%rax,%rdx,8),%rsi 130 movq %rsi,-16(%rcx,%rdx,8) 131 movq -8(%rax,%rdx,8),%rsi 132 movq %rsi,-8(%rcx,%rdx,8) 133 movq (%rax,%rdx,8),%rsi 134 movq %rsi,(%rcx,%rdx,8) 135 7: addq $4,%rdx 136 jle 6b 137 subq $4,%rdx 138 jl 1b 139 jmp 2b 140 acb_CopyLeft: 141 testq $1,%r8 / check for trailing byte 142 jz 1f 143 movb -1(%rdi,%r8,1),%cl / copy trailing byte 144 movb %cl,-1(%rsi,%r8,1) 145 subq $1,%r8 / adjust for possible trailing word 146 1: testq $2,%r8 / check for trailing word 147 jz 2f 148 movw -2(%rdi,%r8,1),%cx / copy trailing word 149 movw %cx,-2(%rsi,%r8,1) 150 2: testq $4,%r8 / check for trailing dword 151 jz 5f 152 movl (%rdi,%rdx,8),%ecx / copy trailing dword 153 movl %ecx,(%rsi,%rdx,8) 154 jmp 5f 155 .align 16 156 3: movq -8(%rdi,%rdx,8),%rcx 157 movq %rcx,-8(%rsi,%rdx,8) 158 subq $1,%rdx 159 jnz 3b 160 ret 161 .align 16 162 4: movq 24(%rdi,%rdx,8),%rcx 163 movq %rcx,24(%rsi,%rdx,8) 164 movq 16(%rdi,%rdx,8),%rcx 165 movq %rcx,16(%rsi,%rdx,8) 166 movq 8(%rdi,%rdx,8),%rcx 167 movq %rcx,8(%rsi,%rdx,8) 168 movq (%rdi,%rdx,8),%rcx 169 movq %rcx,(%rsi,%rdx,8) 170 5: subq $4,%rdx 171 jge 4b 172 addq $4,%rdx 173 jg 3b 174 ret 175 176 / Support for void Copy::arrayof_conjoint_jshorts(void* from, 177 / void* to, 178 / size_t count) 179 / Equivalent to 180 / conjoint_jshorts_atomic 181 / 182 / If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 183 / let the hardware handle it. The tow or four words within dwords 184 / or qwords that span cache line boundaries will still be loaded 185 / and stored atomically. 186 / 187 / rdi - from 188 / rsi - to 189 / rdx - count, treated as ssize_t 190 / 191 .align 16 192 _Copy_arrayof_conjoint_jshorts: 193 _Copy_conjoint_jshorts_atomic: 194 movq %rdx,%r8 / word count 195 shrq $2,%rdx / qword count 196 cmpq %rdi,%rsi 197 leaq -2(%rdi,%r8,2),%rax / from + wcount*2 - 2 198 jbe acs_CopyRight 199 cmpq %rax,%rsi 200 jbe acs_CopyLeft 201 acs_CopyRight: 202 leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 203 leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 204 negq %rdx 205 jmp 6f 206 1: movq 8(%rax,%rdx,8),%rsi 207 movq %rsi,8(%rcx,%rdx,8) 208 addq $1,%rdx 209 jnz 1b 210 2: testq $2,%r8 / check for trailing dword 211 jz 3f 212 movl 8(%rax),%esi / copy trailing dword 213 movl %esi,8(%rcx) 214 addq $4,%rcx / original %rsi is trashed, so we 215 / can't use it as a base register 216 3: testq $1,%r8 / check for trailing word 217 jz 4f 218 movw -2(%rdi,%r8,2),%si / copy trailing word 219 movw %si,8(%rcx) 220 4: ret 221 .align 16 222 5: movq -24(%rax,%rdx,8),%rsi 223 movq %rsi,-24(%rcx,%rdx,8) 224 movq -16(%rax,%rdx,8),%rsi 225 movq %rsi,-16(%rcx,%rdx,8) 226 movq -8(%rax,%rdx,8),%rsi 227 movq %rsi,-8(%rcx,%rdx,8) 228 movq (%rax,%rdx,8),%rsi 229 movq %rsi,(%rcx,%rdx,8) 230 6: addq $4,%rdx 231 jle 5b 232 subq $4,%rdx 233 jl 1b 234 jmp 2b 235 acs_CopyLeft: 236 testq $1,%r8 / check for trailing word 237 jz 1f 238 movw -2(%rdi,%r8,2),%cx / copy trailing word 239 movw %cx,-2(%rsi,%r8,2) 240 1: testq $2,%r8 / check for trailing dword 241 jz 4f 242 movl (%rdi,%rdx,8),%ecx / copy trailing dword 243 movl %ecx,(%rsi,%rdx,8) 244 jmp 4f 245 2: movq -8(%rdi,%rdx,8),%rcx 246 movq %rcx,-8(%rsi,%rdx,8) 247 subq $1,%rdx 248 jnz 2b 249 ret 250 .align 16 251 3: movq 24(%rdi,%rdx,8),%rcx 252 movq %rcx,24(%rsi,%rdx,8) 253 movq 16(%rdi,%rdx,8),%rcx 254 movq %rcx,16(%rsi,%rdx,8) 255 movq 8(%rdi,%rdx,8),%rcx 256 movq %rcx,8(%rsi,%rdx,8) 257 movq (%rdi,%rdx,8),%rcx 258 movq %rcx,(%rsi,%rdx,8) 259 4: subq $4,%rdx 260 jge 3b 261 addq $4,%rdx 262 jg 2b 263 ret 264 265 / Support for void Copy::arrayof_conjoint_jints(jint* from, 266 / jint* to, 267 / size_t count) 268 / Equivalent to 269 / conjoint_jints_atomic 270 / 271 / If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 272 / the hardware handle it. The two dwords within qwords that span 273 / cache line boundaries will still be loaded and stored atomically. 274 / 275 / rdi - from 276 / rsi - to 277 / rdx - count, treated as ssize_t 278 / 279 .align 16 280 _Copy_arrayof_conjoint_jints: 281 _Copy_conjoint_jints_atomic: 282 movq %rdx,%r8 / dword count 283 shrq %rdx / qword count 284 cmpq %rdi,%rsi 285 leaq -4(%rdi,%r8,4),%rax / from + dcount*4 - 4 286 jbe aci_CopyRight 287 cmpq %rax,%rsi 288 jbe aci_CopyLeft 289 aci_CopyRight: 290 leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 291 leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 292 negq %rdx 293 jmp 5f 294 .align 16 295 1: movq 8(%rax,%rdx,8),%rsi 296 movq %rsi,8(%rcx,%rdx,8) 297 addq $1,%rdx 298 jnz 1b 299 2: testq $1,%r8 / check for trailing dword 300 jz 3f 301 movl 8(%rax),%esi / copy trailing dword 302 movl %esi,8(%rcx) 303 3: ret 304 .align 16 305 4: movq -24(%rax,%rdx,8),%rsi 306 movq %rsi,-24(%rcx,%rdx,8) 307 movq -16(%rax,%rdx,8),%rsi 308 movq %rsi,-16(%rcx,%rdx,8) 309 movq -8(%rax,%rdx,8),%rsi 310 movq %rsi,-8(%rcx,%rdx,8) 311 movq (%rax,%rdx,8),%rsi 312 movq %rsi,(%rcx,%rdx,8) 313 5: addq $4,%rdx 314 jle 4b 315 subq $4,%rdx 316 jl 1b 317 jmp 2b 318 aci_CopyLeft: 319 testq $1,%r8 / check for trailing dword 320 jz 3f 321 movl -4(%rdi,%r8,4),%ecx / copy trailing dword 322 movl %ecx,-4(%rsi,%r8,4) 323 jmp 3f 324 1: movq -8(%rdi,%rdx,8),%rcx 325 movq %rcx,-8(%rsi,%rdx,8) 326 subq $1,%rdx 327 jnz 1b 328 ret 329 .align 16 330 2: movq 24(%rdi,%rdx,8),%rcx 331 movq %rcx,24(%rsi,%rdx,8) 332 movq 16(%rdi,%rdx,8),%rcx 333 movq %rcx,16(%rsi,%rdx,8) 334 movq 8(%rdi,%rdx,8),%rcx 335 movq %rcx,8(%rsi,%rdx,8) 336 movq (%rdi,%rdx,8),%rcx 337 movq %rcx,(%rsi,%rdx,8) 338 3: subq $4,%rdx 339 jge 2b 340 addq $4,%rdx 341 jg 1b 342 ret 343 344 / Support for void Copy::arrayof_conjoint_jlongs(jlong* from, 345 / jlong* to, 346 / size_t count) 347 / Equivalent to 348 / conjoint_jlongs_atomic 349 / arrayof_conjoint_oops 350 / conjoint_oops_atomic 351 / 352 / rdi - from 353 / rsi - to 354 / rdx - count, treated as ssize_t 355 / 356 .align 16 357 _Copy_arrayof_conjoint_jlongs: 358 _Copy_conjoint_jlongs_atomic: 359 cmpq %rdi,%rsi 360 leaq -8(%rdi,%rdx,8),%rax / from + count*8 - 8 361 jbe acl_CopyRight 362 cmpq %rax,%rsi 363 jbe acl_CopyLeft 364 acl_CopyRight: 365 leaq -8(%rsi,%rdx,8),%rcx / to + count*8 - 8 366 negq %rdx 367 jmp 3f 368 1: movq 8(%rax,%rdx,8),%rsi 369 movq %rsi,8(%rcx,%rdx,8) 370 addq $1,%rdx 371 jnz 1b 372 ret 373 .align 16 374 2: movq -24(%rax,%rdx,8),%rsi 375 movq %rsi,-24(%rcx,%rdx,8) 376 movq -16(%rax,%rdx,8),%rsi 377 movq %rsi,-16(%rcx,%rdx,8) 378 movq -8(%rax,%rdx,8),%rsi 379 movq %rsi,-8(%rcx,%rdx,8) 380 movq (%rax,%rdx,8),%rsi 381 movq %rsi,(%rcx,%rdx,8) 382 3: addq $4,%rdx 383 jle 2b 384 subq $4,%rdx 385 jl 1b 386 ret 387 4: movq -8(%rdi,%rdx,8),%rcx 388 movq %rcx,-8(%rsi,%rdx,8) 389 subq $1,%rdx 390 jnz 4b 391 ret 392 .align 16 393 5: movq 24(%rdi,%rdx,8),%rcx 394 movq %rcx,24(%rsi,%rdx,8) 395 movq 16(%rdi,%rdx,8),%rcx 396 movq %rcx,16(%rsi,%rdx,8) 397 movq 8(%rdi,%rdx,8),%rcx 398 movq %rcx,8(%rsi,%rdx,8) 399 movq (%rdi,%rdx,8),%rcx 400 movq %rcx,(%rsi,%rdx,8) 401 acl_CopyLeft: 402 subq $4,%rdx 403 jge 5b 404 addq $4,%rdx 405 jg 4b 406 ret