1 # 2 # Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. 3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 # 5 # This code is free software; you can redistribute it and/or modify it 6 # under the terms of the GNU General Public License version 2 only, as 7 # published by the Free Software Foundation. 8 # 9 # This code is distributed in the hope that it will be useful, but WITHOUT 10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 # version 2 for more details (a copy is included in the LICENSE file that 13 # accompanied this code). 14 # 15 # You should have received a copy of the GNU General Public License version 16 # 2 along with this work; if not, write to the Free Software Foundation, 17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 # 19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 # or visit www.oracle.com if you need additional information or have any 21 # questions. 22 # 23 24 25 # NOTE WELL! The _Copy functions are called directly 26 # from server-compiler-generated code via CallLeafNoFP, 27 # which means that they *must* either not use floating 28 # point or use it in the same manner as does the server 29 # compiler. 30 31 .globl _Copy_arrayof_conjoint_bytes 32 .globl _Copy_arrayof_conjoint_jshorts 33 .globl _Copy_conjoint_jshorts_atomic 34 .globl _Copy_arrayof_conjoint_jints 35 .globl _Copy_conjoint_jints_atomic 36 .globl _Copy_arrayof_conjoint_jlongs 37 .globl _Copy_conjoint_jlongs_atomic 38 39 .text 40 41 # Support for void Copy::arrayof_conjoint_bytes(void* from, 42 # void* to, 43 # size_t count) 44 # rdi - from 45 # rsi - to 46 # rdx - count, treated as ssize_t 47 # 48 .p2align 4,,15 49 .type _Copy_arrayof_conjoint_bytes,@function 50 _Copy_arrayof_conjoint_bytes: 51 movq %rdx,%r8 # byte count 52 shrq $3,%rdx # qword count 53 cmpq %rdi,%rsi 54 leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 1 55 jbe acb_CopyRight 56 cmpq %rax,%rsi 57 jbe acb_CopyLeft 58 acb_CopyRight: 59 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 60 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 61 negq %rdx 62 jmp 7f 63 .p2align 4,,15 64 1: movq 8(%rax,%rdx,8),%rsi 65 movq %rsi,8(%rcx,%rdx,8) 66 addq $1,%rdx 67 jnz 1b 68 2: testq $4,%r8 # check for trailing dword 69 jz 3f 70 movl 8(%rax),%esi # copy trailing dword 71 movl %esi,8(%rcx) 72 addq $4,%rax 73 addq $4,%rcx # original %rsi is trashed, so we 74 # can't use it as a base register 75 3: testq $2,%r8 # check for trailing word 76 jz 4f 77 movw 8(%rax),%si # copy trailing word 78 movw %si,8(%rcx) 79 addq $2,%rcx 80 4: testq $1,%r8 # check for trailing byte 81 jz 5f 82 movb -1(%rdi,%r8,1),%al # copy trailing byte 83 movb %al,8(%rcx) 84 5: ret 85 .p2align 4,,15 86 6: movq -24(%rax,%rdx,8),%rsi 87 movq %rsi,-24(%rcx,%rdx,8) 88 movq -16(%rax,%rdx,8),%rsi 89 movq %rsi,-16(%rcx,%rdx,8) 90 movq -8(%rax,%rdx,8),%rsi 91 movq %rsi,-8(%rcx,%rdx,8) 92 movq (%rax,%rdx,8),%rsi 93 movq %rsi,(%rcx,%rdx,8) 94 7: addq $4,%rdx 95 jle 6b 96 subq $4,%rdx 97 jl 1b 98 jmp 2b 99 acb_CopyLeft: 100 testq $1,%r8 # check for trailing byte 101 jz 1f 102 movb -1(%rdi,%r8,1),%cl # copy trailing byte 103 movb %cl,-1(%rsi,%r8,1) 104 subq $1,%r8 # adjust for possible trailing word 105 1: testq $2,%r8 # check for trailing word 106 jz 2f 107 movw -2(%rdi,%r8,1),%cx # copy trailing word 108 movw %cx,-2(%rsi,%r8,1) 109 2: testq $4,%r8 # check for trailing dword 110 jz 5f 111 movl (%rdi,%rdx,8),%ecx # copy trailing dword 112 movl %ecx,(%rsi,%rdx,8) 113 jmp 5f 114 .p2align 4,,15 115 3: movq -8(%rdi,%rdx,8),%rcx 116 movq %rcx,-8(%rsi,%rdx,8) 117 subq $1,%rdx 118 jnz 3b 119 ret 120 .p2align 4,,15 121 4: movq 24(%rdi,%rdx,8),%rcx 122 movq %rcx,24(%rsi,%rdx,8) 123 movq 16(%rdi,%rdx,8),%rcx 124 movq %rcx,16(%rsi,%rdx,8) 125 movq 8(%rdi,%rdx,8),%rcx 126 movq %rcx,8(%rsi,%rdx,8) 127 movq (%rdi,%rdx,8),%rcx 128 movq %rcx,(%rsi,%rdx,8) 129 5: subq $4,%rdx 130 jge 4b 131 addq $4,%rdx 132 jg 3b 133 ret 134 135 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 136 # void* to, 137 # size_t count) 138 # Equivalent to 139 # conjoint_jshorts_atomic 140 # 141 # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 142 # let the hardware handle it. The tow or four words within dwords 143 # or qwords that span cache line boundaries will still be loaded 144 # and stored atomically. 145 # 146 # rdi - from 147 # rsi - to 148 # rdx - count, treated as ssize_t 149 # 150 .p2align 4,,15 151 .type _Copy_arrayof_conjoint_jshorts,@function 152 .type _Copy_conjoint_jshorts_atomic,@function 153 _Copy_arrayof_conjoint_jshorts: 154 _Copy_conjoint_jshorts_atomic: 155 movq %rdx,%r8 # word count 156 shrq $2,%rdx # qword count 157 cmpq %rdi,%rsi 158 leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2 159 jbe acs_CopyRight 160 cmpq %rax,%rsi 161 jbe acs_CopyLeft 162 acs_CopyRight: 163 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 164 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 165 negq %rdx 166 jmp 6f 167 1: movq 8(%rax,%rdx,8),%rsi 168 movq %rsi,8(%rcx,%rdx,8) 169 addq $1,%rdx 170 jnz 1b 171 2: testq $2,%r8 # check for trailing dword 172 jz 3f 173 movl 8(%rax),%esi # copy trailing dword 174 movl %esi,8(%rcx) 175 addq $4,%rcx # original %rsi is trashed, so we 176 # can't use it as a base register 177 3: testq $1,%r8 # check for trailing word 178 jz 4f 179 movw -2(%rdi,%r8,2),%si # copy trailing word 180 movw %si,8(%rcx) 181 4: ret 182 .p2align 4,,15 183 5: movq -24(%rax,%rdx,8),%rsi 184 movq %rsi,-24(%rcx,%rdx,8) 185 movq -16(%rax,%rdx,8),%rsi 186 movq %rsi,-16(%rcx,%rdx,8) 187 movq -8(%rax,%rdx,8),%rsi 188 movq %rsi,-8(%rcx,%rdx,8) 189 movq (%rax,%rdx,8),%rsi 190 movq %rsi,(%rcx,%rdx,8) 191 6: addq $4,%rdx 192 jle 5b 193 subq $4,%rdx 194 jl 1b 195 jmp 2b 196 acs_CopyLeft: 197 testq $1,%r8 # check for trailing word 198 jz 1f 199 movw -2(%rdi,%r8,2),%cx # copy trailing word 200 movw %cx,-2(%rsi,%r8,2) 201 1: testq $2,%r8 # check for trailing dword 202 jz 4f 203 movl (%rdi,%rdx,8),%ecx # copy trailing dword 204 movl %ecx,(%rsi,%rdx,8) 205 jmp 4f 206 2: movq -8(%rdi,%rdx,8),%rcx 207 movq %rcx,-8(%rsi,%rdx,8) 208 subq $1,%rdx 209 jnz 2b 210 ret 211 .p2align 4,,15 212 3: movq 24(%rdi,%rdx,8),%rcx 213 movq %rcx,24(%rsi,%rdx,8) 214 movq 16(%rdi,%rdx,8),%rcx 215 movq %rcx,16(%rsi,%rdx,8) 216 movq 8(%rdi,%rdx,8),%rcx 217 movq %rcx,8(%rsi,%rdx,8) 218 movq (%rdi,%rdx,8),%rcx 219 movq %rcx,(%rsi,%rdx,8) 220 4: subq $4,%rdx 221 jge 3b 222 addq $4,%rdx 223 jg 2b 224 ret 225 226 # Support for void Copy::arrayof_conjoint_jints(jint* from, 227 # jint* to, 228 # size_t count) 229 # Equivalent to 230 # conjoint_jints_atomic 231 # 232 # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 233 # the hardware handle it. The two dwords within qwords that span 234 # cache line boundaries will still be loaded and stored atomically. 235 # 236 # rdi - from 237 # rsi - to 238 # rdx - count, treated as ssize_t 239 # 240 .p2align 4,,15 241 .type _Copy_arrayof_conjoint_jints,@function 242 .type _Copy_conjoint_jints_atomic,@function 243 _Copy_arrayof_conjoint_jints: 244 _Copy_conjoint_jints_atomic: 245 movq %rdx,%r8 # dword count 246 shrq %rdx # qword count 247 cmpq %rdi,%rsi 248 leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4 249 jbe aci_CopyRight 250 cmpq %rax,%rsi 251 jbe aci_CopyLeft 252 aci_CopyRight: 253 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 254 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 255 negq %rdx 256 jmp 5f 257 .p2align 4,,15 258 1: movq 8(%rax,%rdx,8),%rsi 259 movq %rsi,8(%rcx,%rdx,8) 260 addq $1,%rdx 261 jnz 1b 262 2: testq $1,%r8 # check for trailing dword 263 jz 3f 264 movl 8(%rax),%esi # copy trailing dword 265 movl %esi,8(%rcx) 266 3: ret 267 .p2align 4,,15 268 4: movq -24(%rax,%rdx,8),%rsi 269 movq %rsi,-24(%rcx,%rdx,8) 270 movq -16(%rax,%rdx,8),%rsi 271 movq %rsi,-16(%rcx,%rdx,8) 272 movq -8(%rax,%rdx,8),%rsi 273 movq %rsi,-8(%rcx,%rdx,8) 274 movq (%rax,%rdx,8),%rsi 275 movq %rsi,(%rcx,%rdx,8) 276 5: addq $4,%rdx 277 jle 4b 278 subq $4,%rdx 279 jl 1b 280 jmp 2b 281 aci_CopyLeft: 282 testq $1,%r8 # check for trailing dword 283 jz 3f 284 movl -4(%rdi,%r8,4),%ecx # copy trailing dword 285 movl %ecx,-4(%rsi,%r8,4) 286 jmp 3f 287 1: movq -8(%rdi,%rdx,8),%rcx 288 movq %rcx,-8(%rsi,%rdx,8) 289 subq $1,%rdx 290 jnz 1b 291 ret 292 .p2align 4,,15 293 2: movq 24(%rdi,%rdx,8),%rcx 294 movq %rcx,24(%rsi,%rdx,8) 295 movq 16(%rdi,%rdx,8),%rcx 296 movq %rcx,16(%rsi,%rdx,8) 297 movq 8(%rdi,%rdx,8),%rcx 298 movq %rcx,8(%rsi,%rdx,8) 299 movq (%rdi,%rdx,8),%rcx 300 movq %rcx,(%rsi,%rdx,8) 301 3: subq $4,%rdx 302 jge 2b 303 addq $4,%rdx 304 jg 1b 305 ret 306 307 # Support for void Copy::arrayof_conjoint_jlongs(jlong* from, 308 # jlong* to, 309 # size_t count) 310 # Equivalent to 311 # conjoint_jlongs_atomic 312 # arrayof_conjoint_oops 313 # conjoint_oops_atomic 314 # 315 # rdi - from 316 # rsi - to 317 # rdx - count, treated as ssize_t 318 # 319 .p2align 4,,15 320 .type _Copy_arrayof_conjoint_jlongs,@function 321 .type _Copy_conjoint_jlongs_atomic,@function 322 _Copy_arrayof_conjoint_jlongs: 323 _Copy_conjoint_jlongs_atomic: 324 cmpq %rdi,%rsi 325 leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8 326 jbe acl_CopyRight 327 cmpq %rax,%rsi 328 jbe acl_CopyLeft 329 acl_CopyRight: 330 leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8 331 negq %rdx 332 jmp 3f 333 1: movq 8(%rax,%rdx,8),%rsi 334 movq %rsi,8(%rcx,%rdx,8) 335 addq $1,%rdx 336 jnz 1b 337 ret 338 .p2align 4,,15 339 2: movq -24(%rax,%rdx,8),%rsi 340 movq %rsi,-24(%rcx,%rdx,8) 341 movq -16(%rax,%rdx,8),%rsi 342 movq %rsi,-16(%rcx,%rdx,8) 343 movq -8(%rax,%rdx,8),%rsi 344 movq %rsi,-8(%rcx,%rdx,8) 345 movq (%rax,%rdx,8),%rsi 346 movq %rsi,(%rcx,%rdx,8) 347 3: addq $4,%rdx 348 jle 2b 349 subq $4,%rdx 350 jl 1b 351 ret 352 4: movq -8(%rdi,%rdx,8),%rcx 353 movq %rcx,-8(%rsi,%rdx,8) 354 subq $1,%rdx 355 jnz 4b 356 ret 357 .p2align 4,,15 358 5: movq 24(%rdi,%rdx,8),%rcx 359 movq %rcx,24(%rsi,%rdx,8) 360 movq 16(%rdi,%rdx,8),%rcx 361 movq %rcx,16(%rsi,%rdx,8) 362 movq 8(%rdi,%rdx,8),%rcx 363 movq %rcx,8(%rsi,%rdx,8) 364 movq (%rdi,%rdx,8),%rcx 365 movq %rcx,(%rsi,%rdx,8) 366 acl_CopyLeft: 367 subq $4,%rdx 368 jge 5b 369 addq $4,%rdx 370 jg 4b 371 ret