1 # 2 # Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. 3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 # 5 # This code is free software; you can redistribute it and/or modify it 6 # under the terms of the GNU General Public License version 2 only, as 7 # published by the Free Software Foundation. 8 # 9 # This code is distributed in the hope that it will be useful, but WITHOUT 10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 # version 2 for more details (a copy is included in the LICENSE file that 13 # accompanied this code). 14 # 15 # You should have received a copy of the GNU General Public License version 16 # 2 along with this work; if not, write to the Free Software Foundation, 17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 # 19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 # or visit www.oracle.com if you need additional information or have any 21 # questions. 22 # 23 24 #ifdef __APPLE__ 25 # Darwin uses _ prefixed global symbols 26 #define SYMBOL(s) _ ## s 27 #define ELF_TYPE(name, description) 28 #else 29 #define SYMBOL(s) s 30 #define ELF_TYPE(name, description) .type name,description 31 #endif 32 33 # NOTE WELL! The _Copy functions are called directly 34 # from server-compiler-generated code via CallLeafNoFP, 35 # which means that they *must* either not use floating 36 # point or use it in the same manner as does the server 37 # compiler. 38 39 .globl SYMBOL(_Copy_arrayof_conjoint_bytes) 40 .globl SYMBOL(_Copy_arrayof_conjoint_jshorts) 41 .globl SYMBOL(_Copy_conjoint_jshorts_atomic) 42 .globl SYMBOL(_Copy_arrayof_conjoint_jints) 43 .globl SYMBOL(_Copy_conjoint_jints_atomic) 44 .globl SYMBOL(_Copy_arrayof_conjoint_jlongs) 45 .globl SYMBOL(_Copy_conjoint_jlongs_atomic) 46 47 .text 48 49 # Support for void Copy::arrayof_conjoint_bytes(void* from, 50 # void* to, 51 # size_t count) 52 # rdi - from 53 # rsi - to 54 # rdx - count, treated as ssize_t 55 # 56 .p2align 4,,15 57 ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function) 58 SYMBOL(_Copy_arrayof_conjoint_bytes): 59 movq %rdx,%r8 # byte count 60 shrq $3,%rdx # qword count 61 cmpq %rdi,%rsi 62 leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 1 63 jbe acb_CopyRight 64 cmpq %rax,%rsi 65 jbe acb_CopyLeft 66 acb_CopyRight: 67 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 68 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 69 negq %rdx 70 jmp 7f 71 .p2align 4,,15 72 1: movq 8(%rax,%rdx,8),%rsi 73 movq %rsi,8(%rcx,%rdx,8) 74 addq $1,%rdx 75 jnz 1b 76 2: testq $4,%r8 # check for trailing dword 77 jz 3f 78 movl 8(%rax),%esi # copy trailing dword 79 movl %esi,8(%rcx) 80 addq $4,%rax 81 addq $4,%rcx # original %rsi is trashed, so we 82 # can't use it as a base register 83 3: testq $2,%r8 # check for trailing word 84 jz 4f 85 movw 8(%rax),%si # copy trailing word 86 movw %si,8(%rcx) 87 addq $2,%rcx 88 4: testq $1,%r8 # check for trailing byte 89 jz 5f 90 movb -1(%rdi,%r8,1),%al # copy trailing byte 91 movb %al,8(%rcx) 92 5: ret 93 .p2align 4,,15 94 6: movq -24(%rax,%rdx,8),%rsi 95 movq %rsi,-24(%rcx,%rdx,8) 96 movq -16(%rax,%rdx,8),%rsi 97 movq %rsi,-16(%rcx,%rdx,8) 98 movq -8(%rax,%rdx,8),%rsi 99 movq %rsi,-8(%rcx,%rdx,8) 100 movq (%rax,%rdx,8),%rsi 101 movq %rsi,(%rcx,%rdx,8) 102 7: addq $4,%rdx 103 jle 6b 104 subq $4,%rdx 105 jl 1b 106 jmp 2b 107 acb_CopyLeft: 108 testq $1,%r8 # check for trailing byte 109 jz 1f 110 movb -1(%rdi,%r8,1),%cl # copy trailing byte 111 movb %cl,-1(%rsi,%r8,1) 112 subq $1,%r8 # adjust for possible trailing word 113 1: testq $2,%r8 # check for trailing word 114 jz 2f 115 movw -2(%rdi,%r8,1),%cx # copy trailing word 116 movw %cx,-2(%rsi,%r8,1) 117 2: testq $4,%r8 # check for trailing dword 118 jz 5f 119 movl (%rdi,%rdx,8),%ecx # copy trailing dword 120 movl %ecx,(%rsi,%rdx,8) 121 jmp 5f 122 .p2align 4,,15 123 3: movq -8(%rdi,%rdx,8),%rcx 124 movq %rcx,-8(%rsi,%rdx,8) 125 subq $1,%rdx 126 jnz 3b 127 ret 128 .p2align 4,,15 129 4: movq 24(%rdi,%rdx,8),%rcx 130 movq %rcx,24(%rsi,%rdx,8) 131 movq 16(%rdi,%rdx,8),%rcx 132 movq %rcx,16(%rsi,%rdx,8) 133 movq 8(%rdi,%rdx,8),%rcx 134 movq %rcx,8(%rsi,%rdx,8) 135 movq (%rdi,%rdx,8),%rcx 136 movq %rcx,(%rsi,%rdx,8) 137 5: subq $4,%rdx 138 jge 4b 139 addq $4,%rdx 140 jg 3b 141 ret 142 143 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 144 # void* to, 145 # size_t count) 146 # Equivalent to 147 # conjoint_jshorts_atomic 148 # 149 # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 150 # let the hardware handle it. The tow or four words within dwords 151 # or qwords that span cache line boundaries will still be loaded 152 # and stored atomically. 153 # 154 # rdi - from 155 # rsi - to 156 # rdx - count, treated as ssize_t 157 # 158 .p2align 4,,15 159 ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function) 160 ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function) 161 SYMBOL(_Copy_arrayof_conjoint_jshorts): 162 SYMBOL(_Copy_conjoint_jshorts_atomic): 163 movq %rdx,%r8 # word count 164 shrq $2,%rdx # qword count 165 cmpq %rdi,%rsi 166 leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2 167 jbe acs_CopyRight 168 cmpq %rax,%rsi 169 jbe acs_CopyLeft 170 acs_CopyRight: 171 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 172 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 173 negq %rdx 174 jmp 6f 175 1: movq 8(%rax,%rdx,8),%rsi 176 movq %rsi,8(%rcx,%rdx,8) 177 addq $1,%rdx 178 jnz 1b 179 2: testq $2,%r8 # check for trailing dword 180 jz 3f 181 movl 8(%rax),%esi # copy trailing dword 182 movl %esi,8(%rcx) 183 addq $4,%rcx # original %rsi is trashed, so we 184 # can't use it as a base register 185 3: testq $1,%r8 # check for trailing word 186 jz 4f 187 movw -2(%rdi,%r8,2),%si # copy trailing word 188 movw %si,8(%rcx) 189 4: ret 190 .p2align 4,,15 191 5: movq -24(%rax,%rdx,8),%rsi 192 movq %rsi,-24(%rcx,%rdx,8) 193 movq -16(%rax,%rdx,8),%rsi 194 movq %rsi,-16(%rcx,%rdx,8) 195 movq -8(%rax,%rdx,8),%rsi 196 movq %rsi,-8(%rcx,%rdx,8) 197 movq (%rax,%rdx,8),%rsi 198 movq %rsi,(%rcx,%rdx,8) 199 6: addq $4,%rdx 200 jle 5b 201 subq $4,%rdx 202 jl 1b 203 jmp 2b 204 acs_CopyLeft: 205 testq $1,%r8 # check for trailing word 206 jz 1f 207 movw -2(%rdi,%r8,2),%cx # copy trailing word 208 movw %cx,-2(%rsi,%r8,2) 209 1: testq $2,%r8 # check for trailing dword 210 jz 4f 211 movl (%rdi,%rdx,8),%ecx # copy trailing dword 212 movl %ecx,(%rsi,%rdx,8) 213 jmp 4f 214 2: movq -8(%rdi,%rdx,8),%rcx 215 movq %rcx,-8(%rsi,%rdx,8) 216 subq $1,%rdx 217 jnz 2b 218 ret 219 .p2align 4,,15 220 3: movq 24(%rdi,%rdx,8),%rcx 221 movq %rcx,24(%rsi,%rdx,8) 222 movq 16(%rdi,%rdx,8),%rcx 223 movq %rcx,16(%rsi,%rdx,8) 224 movq 8(%rdi,%rdx,8),%rcx 225 movq %rcx,8(%rsi,%rdx,8) 226 movq (%rdi,%rdx,8),%rcx 227 movq %rcx,(%rsi,%rdx,8) 228 4: subq $4,%rdx 229 jge 3b 230 addq $4,%rdx 231 jg 2b 232 ret 233 234 # Support for void Copy::arrayof_conjoint_jints(jint* from, 235 # jint* to, 236 # size_t count) 237 # Equivalent to 238 # conjoint_jints_atomic 239 # 240 # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 241 # the hardware handle it. The two dwords within qwords that span 242 # cache line boundaries will still be loaded and stored atomically. 243 # 244 # rdi - from 245 # rsi - to 246 # rdx - count, treated as ssize_t 247 # 248 .p2align 4,,15 249 ELF_TYPE(_Copy_arrayof_conjoint_jints,@function) 250 ELF_TYPE(_Copy_conjoint_jints_atomic,@function) 251 SYMBOL(_Copy_arrayof_conjoint_jints): 252 SYMBOL(_Copy_conjoint_jints_atomic): 253 movq %rdx,%r8 # dword count 254 shrq %rdx # qword count 255 cmpq %rdi,%rsi 256 leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4 257 jbe aci_CopyRight 258 cmpq %rax,%rsi 259 jbe aci_CopyLeft 260 aci_CopyRight: 261 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 262 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 263 negq %rdx 264 jmp 5f 265 .p2align 4,,15 266 1: movq 8(%rax,%rdx,8),%rsi 267 movq %rsi,8(%rcx,%rdx,8) 268 addq $1,%rdx 269 jnz 1b 270 2: testq $1,%r8 # check for trailing dword 271 jz 3f 272 movl 8(%rax),%esi # copy trailing dword 273 movl %esi,8(%rcx) 274 3: ret 275 .p2align 4,,15 276 4: movq -24(%rax,%rdx,8),%rsi 277 movq %rsi,-24(%rcx,%rdx,8) 278 movq -16(%rax,%rdx,8),%rsi 279 movq %rsi,-16(%rcx,%rdx,8) 280 movq -8(%rax,%rdx,8),%rsi 281 movq %rsi,-8(%rcx,%rdx,8) 282 movq (%rax,%rdx,8),%rsi 283 movq %rsi,(%rcx,%rdx,8) 284 5: addq $4,%rdx 285 jle 4b 286 subq $4,%rdx 287 jl 1b 288 jmp 2b 289 aci_CopyLeft: 290 testq $1,%r8 # check for trailing dword 291 jz 3f 292 movl -4(%rdi,%r8,4),%ecx # copy trailing dword 293 movl %ecx,-4(%rsi,%r8,4) 294 jmp 3f 295 1: movq -8(%rdi,%rdx,8),%rcx 296 movq %rcx,-8(%rsi,%rdx,8) 297 subq $1,%rdx 298 jnz 1b 299 ret 300 .p2align 4,,15 301 2: movq 24(%rdi,%rdx,8),%rcx 302 movq %rcx,24(%rsi,%rdx,8) 303 movq 16(%rdi,%rdx,8),%rcx 304 movq %rcx,16(%rsi,%rdx,8) 305 movq 8(%rdi,%rdx,8),%rcx 306 movq %rcx,8(%rsi,%rdx,8) 307 movq (%rdi,%rdx,8),%rcx 308 movq %rcx,(%rsi,%rdx,8) 309 3: subq $4,%rdx 310 jge 2b 311 addq $4,%rdx 312 jg 1b 313 ret 314 315 # Support for void Copy::arrayof_conjoint_jlongs(jlong* from, 316 # jlong* to, 317 # size_t count) 318 # Equivalent to 319 # conjoint_jlongs_atomic 320 # arrayof_conjoint_oops 321 # conjoint_oops_atomic 322 # 323 # rdi - from 324 # rsi - to 325 # rdx - count, treated as ssize_t 326 # 327 .p2align 4,,15 328 ELF_TYPE(_Copy_arrayof_conjoint_jlongs,@function) 329 ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function) 330 SYMBOL(_Copy_arrayof_conjoint_jlongs): 331 SYMBOL(_Copy_conjoint_jlongs_atomic): 332 cmpq %rdi,%rsi 333 leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8 334 jbe acl_CopyRight 335 cmpq %rax,%rsi 336 jbe acl_CopyLeft 337 acl_CopyRight: 338 leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8 339 negq %rdx 340 jmp 3f 341 1: movq 8(%rax,%rdx,8),%rsi 342 movq %rsi,8(%rcx,%rdx,8) 343 addq $1,%rdx 344 jnz 1b 345 ret 346 .p2align 4,,15 347 2: movq -24(%rax,%rdx,8),%rsi 348 movq %rsi,-24(%rcx,%rdx,8) 349 movq -16(%rax,%rdx,8),%rsi 350 movq %rsi,-16(%rcx,%rdx,8) 351 movq -8(%rax,%rdx,8),%rsi 352 movq %rsi,-8(%rcx,%rdx,8) 353 movq (%rax,%rdx,8),%rsi 354 movq %rsi,(%rcx,%rdx,8) 355 3: addq $4,%rdx 356 jle 2b 357 subq $4,%rdx 358 jl 1b 359 ret 360 4: movq -8(%rdi,%rdx,8),%rcx 361 movq %rcx,-8(%rsi,%rdx,8) 362 subq $1,%rdx 363 jnz 4b 364 ret 365 .p2align 4,,15 366 5: movq 24(%rdi,%rdx,8),%rcx 367 movq %rcx,24(%rsi,%rdx,8) 368 movq 16(%rdi,%rdx,8),%rcx 369 movq %rcx,16(%rsi,%rdx,8) 370 movq 8(%rdi,%rdx,8),%rcx 371 movq %rcx,8(%rsi,%rdx,8) 372 movq (%rdi,%rdx,8),%rcx 373 movq %rcx,(%rsi,%rdx,8) 374 acl_CopyLeft: 375 subq $4,%rdx 376 jge 5b 377 addq $4,%rdx 378 jg 4b 379 ret