1 / 2 / Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. 3 / DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 / 5 / This code is free software; you can redistribute it and/or modify it 6 / under the terms of the GNU General Public License version 2 only, as 7 / published by the Free Software Foundation. 8 / 9 / This code is distributed in the hope that it will be useful, but WITHOUT 10 / ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 / FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 / version 2 for more details (a copy is included in the LICENSE file that 13 / accompanied this code). 14 / 15 / You should have received a copy of the GNU General Public License version 16 / 2 along with this work; if not, write to the Free Software Foundation, 17 / Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 / 19 / Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 / or visit www.oracle.com if you need additional information or have any 21 / questions. 22 / 23 24 .globl fs_load 25 .globl fs_thread 26 27 // NOTE WELL! The _Copy functions are called directly 28 // from server-compiler-generated code via CallLeafNoFP, 29 // which means that they *must* either not use floating 30 // point or use it in the same manner as does the server 31 // compiler. 32 33 .globl _Copy_arrayof_conjoint_bytes 34 .globl _Copy_conjoint_jshorts_atomic 35 .globl _Copy_arrayof_conjoint_jshorts 36 .globl _Copy_conjoint_jints_atomic 37 .globl _Copy_arrayof_conjoint_jints 38 .globl _Copy_conjoint_jlongs_atomic 39 .globl _Copy_arrayof_conjoint_jlongs 40 41 .section .text,"ax" 42 43 / Fast thread accessors, used by threadLS_solaris_amd64.cpp 44 .align 16 45 fs_load: 46 movq %fs:(%rdi),%rax 47 ret 48 49 .align 16 50 fs_thread: 51 movq %fs:0x0,%rax 52 ret 53 54 .globl SpinPause 55 .align 16 56 SpinPause: 57 rep 58 nop 59 movq $1, %rax 60 ret 61 62 63 / Support for void Copy::arrayof_conjoint_bytes(void* from, 64 / void* to, 65 / size_t count) 66 / rdi - from 67 / rsi - to 68 / rdx - count, treated as ssize_t 69 / 70 .align 16 71 _Copy_arrayof_conjoint_bytes: 72 movq %rdx,%r8 / byte count 73 shrq $3,%rdx / qword count 74 cmpq %rdi,%rsi 75 leaq -1(%rdi,%r8,1),%rax / from + bcount*1 - 1 76 jbe acb_CopyRight 77 cmpq %rax,%rsi 78 jbe acb_CopyLeft 79 acb_CopyRight: 80 leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 81 leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 82 negq %rdx 83 jmp 7f 84 .align 16 85 1: movq 8(%rax,%rdx,8),%rsi 86 movq %rsi,8(%rcx,%rdx,8) 87 addq $1,%rdx 88 jnz 1b 89 2: testq $4,%r8 / check for trailing dword 90 jz 3f 91 movl 8(%rax),%esi / copy trailing dword 92 movl %esi,8(%rcx) 93 addq $4,%rax 94 addq $4,%rcx / original %rsi is trashed, so we 95 / can't use it as a base register 96 3: testq $2,%r8 / check for trailing word 97 jz 4f 98 movw 8(%rax),%si / copy trailing word 99 movw %si,8(%rcx) 100 addq $2,%rcx 101 4: testq $1,%r8 / check for trailing byte 102 jz 5f 103 movb -1(%rdi,%r8,1),%al / copy trailing byte 104 movb %al,8(%rcx) 105 5: ret 106 .align 16 107 6: movq -24(%rax,%rdx,8),%rsi 108 movq %rsi,-24(%rcx,%rdx,8) 109 movq -16(%rax,%rdx,8),%rsi 110 movq %rsi,-16(%rcx,%rdx,8) 111 movq -8(%rax,%rdx,8),%rsi 112 movq %rsi,-8(%rcx,%rdx,8) 113 movq (%rax,%rdx,8),%rsi 114 movq %rsi,(%rcx,%rdx,8) 115 7: addq $4,%rdx 116 jle 6b 117 subq $4,%rdx 118 jl 1b 119 jmp 2b 120 acb_CopyLeft: 121 testq $1,%r8 / check for trailing byte 122 jz 1f 123 movb -1(%rdi,%r8,1),%cl / copy trailing byte 124 movb %cl,-1(%rsi,%r8,1) 125 subq $1,%r8 / adjust for possible trailing word 126 1: testq $2,%r8 / check for trailing word 127 jz 2f 128 movw -2(%rdi,%r8,1),%cx / copy trailing word 129 movw %cx,-2(%rsi,%r8,1) 130 2: testq $4,%r8 / check for trailing dword 131 jz 5f 132 movl (%rdi,%rdx,8),%ecx / copy trailing dword 133 movl %ecx,(%rsi,%rdx,8) 134 jmp 5f 135 .align 16 136 3: movq -8(%rdi,%rdx,8),%rcx 137 movq %rcx,-8(%rsi,%rdx,8) 138 subq $1,%rdx 139 jnz 3b 140 ret 141 .align 16 142 4: movq 24(%rdi,%rdx,8),%rcx 143 movq %rcx,24(%rsi,%rdx,8) 144 movq 16(%rdi,%rdx,8),%rcx 145 movq %rcx,16(%rsi,%rdx,8) 146 movq 8(%rdi,%rdx,8),%rcx 147 movq %rcx,8(%rsi,%rdx,8) 148 movq (%rdi,%rdx,8),%rcx 149 movq %rcx,(%rsi,%rdx,8) 150 5: subq $4,%rdx 151 jge 4b 152 addq $4,%rdx 153 jg 3b 154 ret 155 156 / Support for void Copy::arrayof_conjoint_jshorts(void* from, 157 / void* to, 158 / size_t count) 159 / Equivalent to 160 / conjoint_jshorts_atomic 161 / 162 / If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 163 / let the hardware handle it. The tow or four words within dwords 164 / or qwords that span cache line boundaries will still be loaded 165 / and stored atomically. 166 / 167 / rdi - from 168 / rsi - to 169 / rdx - count, treated as ssize_t 170 / 171 .align 16 172 _Copy_arrayof_conjoint_jshorts: 173 _Copy_conjoint_jshorts_atomic: 174 movq %rdx,%r8 / word count 175 shrq $2,%rdx / qword count 176 cmpq %rdi,%rsi 177 leaq -2(%rdi,%r8,2),%rax / from + wcount*2 - 2 178 jbe acs_CopyRight 179 cmpq %rax,%rsi 180 jbe acs_CopyLeft 181 acs_CopyRight: 182 leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 183 leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 184 negq %rdx 185 jmp 6f 186 1: movq 8(%rax,%rdx,8),%rsi 187 movq %rsi,8(%rcx,%rdx,8) 188 addq $1,%rdx 189 jnz 1b 190 2: testq $2,%r8 / check for trailing dword 191 jz 3f 192 movl 8(%rax),%esi / copy trailing dword 193 movl %esi,8(%rcx) 194 addq $4,%rcx / original %rsi is trashed, so we 195 / can't use it as a base register 196 3: testq $1,%r8 / check for trailing word 197 jz 4f 198 movw -2(%rdi,%r8,2),%si / copy trailing word 199 movw %si,8(%rcx) 200 4: ret 201 .align 16 202 5: movq -24(%rax,%rdx,8),%rsi 203 movq %rsi,-24(%rcx,%rdx,8) 204 movq -16(%rax,%rdx,8),%rsi 205 movq %rsi,-16(%rcx,%rdx,8) 206 movq -8(%rax,%rdx,8),%rsi 207 movq %rsi,-8(%rcx,%rdx,8) 208 movq (%rax,%rdx,8),%rsi 209 movq %rsi,(%rcx,%rdx,8) 210 6: addq $4,%rdx 211 jle 5b 212 subq $4,%rdx 213 jl 1b 214 jmp 2b 215 acs_CopyLeft: 216 testq $1,%r8 / check for trailing word 217 jz 1f 218 movw -2(%rdi,%r8,2),%cx / copy trailing word 219 movw %cx,-2(%rsi,%r8,2) 220 1: testq $2,%r8 / check for trailing dword 221 jz 4f 222 movl (%rdi,%rdx,8),%ecx / copy trailing dword 223 movl %ecx,(%rsi,%rdx,8) 224 jmp 4f 225 2: movq -8(%rdi,%rdx,8),%rcx 226 movq %rcx,-8(%rsi,%rdx,8) 227 subq $1,%rdx 228 jnz 2b 229 ret 230 .align 16 231 3: movq 24(%rdi,%rdx,8),%rcx 232 movq %rcx,24(%rsi,%rdx,8) 233 movq 16(%rdi,%rdx,8),%rcx 234 movq %rcx,16(%rsi,%rdx,8) 235 movq 8(%rdi,%rdx,8),%rcx 236 movq %rcx,8(%rsi,%rdx,8) 237 movq (%rdi,%rdx,8),%rcx 238 movq %rcx,(%rsi,%rdx,8) 239 4: subq $4,%rdx 240 jge 3b 241 addq $4,%rdx 242 jg 2b 243 ret 244 245 / Support for void Copy::arrayof_conjoint_jints(jint* from, 246 / jint* to, 247 / size_t count) 248 / Equivalent to 249 / conjoint_jints_atomic 250 / 251 / If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 252 / the hardware handle it. The two dwords within qwords that span 253 / cache line boundaries will still be loaded and stored atomically. 254 / 255 / rdi - from 256 / rsi - to 257 / rdx - count, treated as ssize_t 258 / 259 .align 16 260 _Copy_arrayof_conjoint_jints: 261 _Copy_conjoint_jints_atomic: 262 movq %rdx,%r8 / dword count 263 shrq %rdx / qword count 264 cmpq %rdi,%rsi 265 leaq -4(%rdi,%r8,4),%rax / from + dcount*4 - 4 266 jbe aci_CopyRight 267 cmpq %rax,%rsi 268 jbe aci_CopyLeft 269 aci_CopyRight: 270 leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 271 leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 272 negq %rdx 273 jmp 5f 274 .align 16 275 1: movq 8(%rax,%rdx,8),%rsi 276 movq %rsi,8(%rcx,%rdx,8) 277 addq $1,%rdx 278 jnz 1b 279 2: testq $1,%r8 / check for trailing dword 280 jz 3f 281 movl 8(%rax),%esi / copy trailing dword 282 movl %esi,8(%rcx) 283 3: ret 284 .align 16 285 4: movq -24(%rax,%rdx,8),%rsi 286 movq %rsi,-24(%rcx,%rdx,8) 287 movq -16(%rax,%rdx,8),%rsi 288 movq %rsi,-16(%rcx,%rdx,8) 289 movq -8(%rax,%rdx,8),%rsi 290 movq %rsi,-8(%rcx,%rdx,8) 291 movq (%rax,%rdx,8),%rsi 292 movq %rsi,(%rcx,%rdx,8) 293 5: addq $4,%rdx 294 jle 4b 295 subq $4,%rdx 296 jl 1b 297 jmp 2b 298 aci_CopyLeft: 299 testq $1,%r8 / check for trailing dword 300 jz 3f 301 movl -4(%rdi,%r8,4),%ecx / copy trailing dword 302 movl %ecx,-4(%rsi,%r8,4) 303 jmp 3f 304 1: movq -8(%rdi,%rdx,8),%rcx 305 movq %rcx,-8(%rsi,%rdx,8) 306 subq $1,%rdx 307 jnz 1b 308 ret 309 .align 16 310 2: movq 24(%rdi,%rdx,8),%rcx 311 movq %rcx,24(%rsi,%rdx,8) 312 movq 16(%rdi,%rdx,8),%rcx 313 movq %rcx,16(%rsi,%rdx,8) 314 movq 8(%rdi,%rdx,8),%rcx 315 movq %rcx,8(%rsi,%rdx,8) 316 movq (%rdi,%rdx,8),%rcx 317 movq %rcx,(%rsi,%rdx,8) 318 3: subq $4,%rdx 319 jge 2b 320 addq $4,%rdx 321 jg 1b 322 ret 323 324 / Support for void Copy::arrayof_conjoint_jlongs(jlong* from, 325 / jlong* to, 326 / size_t count) 327 / Equivalent to 328 / conjoint_jlongs_atomic 329 / arrayof_conjoint_oops 330 / conjoint_oops_atomic 331 / 332 / rdi - from 333 / rsi - to 334 / rdx - count, treated as ssize_t 335 / 336 .align 16 337 _Copy_arrayof_conjoint_jlongs: 338 _Copy_conjoint_jlongs_atomic: 339 cmpq %rdi,%rsi 340 leaq -8(%rdi,%rdx,8),%rax / from + count*8 - 8 341 jbe acl_CopyRight 342 cmpq %rax,%rsi 343 jbe acl_CopyLeft 344 acl_CopyRight: 345 leaq -8(%rsi,%rdx,8),%rcx / to + count*8 - 8 346 negq %rdx 347 jmp 3f 348 1: movq 8(%rax,%rdx,8),%rsi 349 movq %rsi,8(%rcx,%rdx,8) 350 addq $1,%rdx 351 jnz 1b 352 ret 353 .align 16 354 2: movq -24(%rax,%rdx,8),%rsi 355 movq %rsi,-24(%rcx,%rdx,8) 356 movq -16(%rax,%rdx,8),%rsi 357 movq %rsi,-16(%rcx,%rdx,8) 358 movq -8(%rax,%rdx,8),%rsi 359 movq %rsi,-8(%rcx,%rdx,8) 360 movq (%rax,%rdx,8),%rsi 361 movq %rsi,(%rcx,%rdx,8) 362 3: addq $4,%rdx 363 jle 2b 364 subq $4,%rdx 365 jl 1b 366 ret 367 4: movq -8(%rdi,%rdx,8),%rcx 368 movq %rcx,-8(%rsi,%rdx,8) 369 subq $1,%rdx 370 jnz 4b 371 ret 372 .align 16 373 5: movq 24(%rdi,%rdx,8),%rcx 374 movq %rcx,24(%rsi,%rdx,8) 375 movq 16(%rdi,%rdx,8),%rcx 376 movq %rcx,16(%rsi,%rdx,8) 377 movq 8(%rdi,%rdx,8),%rcx 378 movq %rcx,8(%rsi,%rdx,8) 379 movq (%rdi,%rdx,8),%rcx 380 movq %rcx,(%rsi,%rdx,8) 381 acl_CopyLeft: 382 subq $4,%rdx 383 jge 5b 384 addq $4,%rdx 385 jg 4b 386 ret