1 # 2 # Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved. 3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 # 5 # This code is free software; you can redistribute it and/or modify it 6 # under the terms of the GNU General Public License version 2 only, as 7 # published by the Free Software Foundation. 8 # 9 # This code is distributed in the hope that it will be useful, but WITHOUT 10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 # version 2 for more details (a copy is included in the LICENSE file that 13 # accompanied this code). 14 # 15 # You should have received a copy of the GNU General Public License version 16 # 2 along with this work; if not, write to the Free Software Foundation, 17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 # 19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 # or visit www.oracle.com if you need additional information or have any 21 # questions. 22 # 23 24 25 # NOTE WELL! The _Copy functions are called directly 26 # from server-compiler-generated code via CallLeafNoFP, 27 # which means that they *must* either not use floating 28 # point or use it in the same manner as does the server 29 # compiler. 30 31 .globl _Copy_conjoint_bytes 32 .globl _Copy_arrayof_conjoint_bytes 33 .globl _Copy_conjoint_jshorts_atomic 34 .globl _Copy_arrayof_conjoint_jshorts 35 .globl _Copy_conjoint_jints_atomic 36 .globl _Copy_arrayof_conjoint_jints 37 .globl _Copy_conjoint_jlongs_atomic 38 .globl _mmx_Copy_arrayof_conjoint_jshorts 39 40 .globl _Atomic_cmpxchg_long 41 .globl _Atomic_move_long 42 43 .text 44 45 # Support for void Copy::conjoint_bytes(void* from, 46 # void* to, 47 # size_t count) 48 .p2align 4,,15 49 .type _Copy_conjoint_bytes,@function 50 _Copy_conjoint_bytes: 51 pushl %esi 52 movl 4+12(%esp),%ecx # count 53 pushl %edi 54 movl 8+ 4(%esp),%esi # from 55 movl 8+ 8(%esp),%edi # to 56 cmpl %esi,%edi 57 leal -1(%esi,%ecx),%eax # from + count - 1 58 jbe cb_CopyRight 59 cmpl %eax,%edi 60 jbe cb_CopyLeft 61 # copy from low to high 62 cb_CopyRight: 63 cmpl $3,%ecx 64 jbe 5f # <= 3 bytes 65 # align source address at dword address boundary 66 movl %ecx,%eax # original count 67 movl $4,%ecx 68 subl %esi,%ecx 69 andl $3,%ecx # prefix byte count 70 jz 1f # no prefix 71 subl %ecx,%eax # byte count less prefix 72 # copy prefix 73 subl %esi,%edi 74 0: movb (%esi),%dl 75 movb %dl,(%edi,%esi,1) 76 addl $1,%esi 77 subl $1,%ecx 78 jnz 0b 79 addl %esi,%edi 80 1: movl %eax,%ecx # byte count less prefix 81 shrl $2,%ecx # dword count 82 jz 4f # no dwords to move 83 cmpl $32,%ecx 84 jbe 2f # <= 32 dwords 85 # copy aligned dwords 86 rep; smovl 87 jmp 4f 88 # copy aligned dwords 89 2: subl %esi,%edi 90 .p2align 4,,15 91 3: movl (%esi),%edx 92 movl %edx,(%edi,%esi,1) 93 addl $4,%esi 94 subl $1,%ecx 95 jnz 3b 96 addl %esi,%edi 97 4: movl %eax,%ecx # byte count less prefix 98 5: andl $3,%ecx # suffix byte count 99 jz 7f # no suffix 100 # copy suffix 101 xorl %eax,%eax 102 6: movb (%esi,%eax,1),%dl 103 movb %dl,(%edi,%eax,1) 104 addl $1,%eax 105 subl $1,%ecx 106 jnz 6b 107 7: popl %edi 108 popl %esi 109 ret 110 # copy from high to low 111 cb_CopyLeft: 112 std 113 leal -4(%edi,%ecx),%edi # to + count - 4 114 movl %eax,%esi # from + count - 1 115 movl %ecx,%eax 116 subl $3,%esi # from + count - 4 117 cmpl $3,%ecx 118 jbe 5f # <= 3 bytes 119 1: shrl $2,%ecx # dword count 120 jz 4f # no dwords to move 121 cmpl $32,%ecx 122 ja 3f # > 32 dwords 123 # copy dwords, aligned or not 124 subl %esi,%edi 125 .p2align 4,,15 126 2: movl (%esi),%edx 127 movl %edx,(%edi,%esi,1) 128 subl $4,%esi 129 subl $1,%ecx 130 jnz 2b 131 addl %esi,%edi 132 jmp 4f 133 # copy dwords, aligned or not 134 3: rep; smovl 135 4: movl %eax,%ecx # byte count 136 5: andl $3,%ecx # suffix byte count 137 jz 7f # no suffix 138 # copy suffix 139 subl %esi,%edi 140 addl $3,%esi 141 6: movb (%esi),%dl 142 movb %dl,(%edi,%esi,1) 143 subl $1,%esi 144 subl $1,%ecx 145 jnz 6b 146 7: cld 147 popl %edi 148 popl %esi 149 ret 150 151 # Support for void Copy::arrayof_conjoint_bytes(void* from, 152 # void* to, 153 # size_t count) 154 # 155 # Same as _Copy_conjoint_bytes, except no source alignment check. 156 .p2align 4,,15 157 .type _Copy_arrayof_conjoint_bytes,@function 158 _Copy_arrayof_conjoint_bytes: 159 pushl %esi 160 movl 4+12(%esp),%ecx # count 161 pushl %edi 162 movl 8+ 4(%esp),%esi # from 163 movl 8+ 8(%esp),%edi # to 164 cmpl %esi,%edi 165 leal -1(%esi,%ecx),%eax # from + count - 1 166 jbe acb_CopyRight 167 cmpl %eax,%edi 168 jbe acb_CopyLeft 169 # copy from low to high 170 acb_CopyRight: 171 cmpl $3,%ecx 172 jbe 5f 173 1: movl %ecx,%eax 174 shrl $2,%ecx 175 jz 4f 176 cmpl $32,%ecx 177 ja 3f 178 # copy aligned dwords 179 subl %esi,%edi 180 .p2align 4,,15 181 2: movl (%esi),%edx 182 movl %edx,(%edi,%esi,1) 183 addl $4,%esi 184 subl $1,%ecx 185 jnz 2b 186 addl %esi,%edi 187 jmp 4f 188 # copy aligned dwords 189 3: rep; smovl 190 4: movl %eax,%ecx 191 5: andl $3,%ecx 192 jz 7f 193 # copy suffix 194 xorl %eax,%eax 195 6: movb (%esi,%eax,1),%dl 196 movb %dl,(%edi,%eax,1) 197 addl $1,%eax 198 subl $1,%ecx 199 jnz 6b 200 7: popl %edi 201 popl %esi 202 ret 203 acb_CopyLeft: 204 std 205 leal -4(%edi,%ecx),%edi # to + count - 4 206 movl %eax,%esi # from + count - 1 207 movl %ecx,%eax 208 subl $3,%esi # from + count - 4 209 cmpl $3,%ecx 210 jbe 5f 211 1: shrl $2,%ecx 212 jz 4f 213 cmpl $32,%ecx 214 jbe 2f # <= 32 dwords 215 rep; smovl 216 jmp 4f 217 .space 8 218 2: subl %esi,%edi 219 .p2align 4,,15 220 3: movl (%esi),%edx 221 movl %edx,(%edi,%esi,1) 222 subl $4,%esi 223 subl $1,%ecx 224 jnz 3b 225 addl %esi,%edi 226 4: movl %eax,%ecx 227 5: andl $3,%ecx 228 jz 7f 229 subl %esi,%edi 230 addl $3,%esi 231 6: movb (%esi),%dl 232 movb %dl,(%edi,%esi,1) 233 subl $1,%esi 234 subl $1,%ecx 235 jnz 6b 236 7: cld 237 popl %edi 238 popl %esi 239 ret 240 241 # Support for void Copy::conjoint_jshorts_atomic(void* from, 242 # void* to, 243 # size_t count) 244 .p2align 4,,15 245 .type _Copy_conjoint_jshorts_atomic,@function 246 _Copy_conjoint_jshorts_atomic: 247 pushl %esi 248 movl 4+12(%esp),%ecx # count 249 pushl %edi 250 movl 8+ 4(%esp),%esi # from 251 movl 8+ 8(%esp),%edi # to 252 cmpl %esi,%edi 253 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 254 jbe cs_CopyRight 255 cmpl %eax,%edi 256 jbe cs_CopyLeft 257 # copy from low to high 258 cs_CopyRight: 259 # align source address at dword address boundary 260 movl %esi,%eax # original from 261 andl $3,%eax # either 0 or 2 262 jz 1f # no prefix 263 # copy prefix 264 subl $1,%ecx 265 jl 5f # zero count 266 movw (%esi),%dx 267 movw %dx,(%edi) 268 addl %eax,%esi # %eax == 2 269 addl %eax,%edi 270 1: movl %ecx,%eax # word count less prefix 271 sarl %ecx # dword count 272 jz 4f # no dwords to move 273 cmpl $32,%ecx 274 jbe 2f # <= 32 dwords 275 # copy aligned dwords 276 rep; smovl 277 jmp 4f 278 # copy aligned dwords 279 2: subl %esi,%edi 280 .p2align 4,,15 281 3: movl (%esi),%edx 282 movl %edx,(%edi,%esi,1) 283 addl $4,%esi 284 subl $1,%ecx 285 jnz 3b 286 addl %esi,%edi 287 4: andl $1,%eax # suffix count 288 jz 5f # no suffix 289 # copy suffix 290 movw (%esi),%dx 291 movw %dx,(%edi) 292 5: popl %edi 293 popl %esi 294 ret 295 # copy from high to low 296 cs_CopyLeft: 297 std 298 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 299 movl %eax,%esi # from + count*2 - 2 300 movl %ecx,%eax 301 subl $2,%esi # from + count*2 - 4 302 1: sarl %ecx # dword count 303 jz 4f # no dwords to move 304 cmpl $32,%ecx 305 ja 3f # > 32 dwords 306 subl %esi,%edi 307 .p2align 4,,15 308 2: movl (%esi),%edx 309 movl %edx,(%edi,%esi,1) 310 subl $4,%esi 311 subl $1,%ecx 312 jnz 2b 313 addl %esi,%edi 314 jmp 4f 315 3: rep; smovl 316 4: andl $1,%eax # suffix count 317 jz 5f # no suffix 318 # copy suffix 319 addl $2,%esi 320 addl $2,%edi 321 movw (%esi),%dx 322 movw %dx,(%edi) 323 5: cld 324 popl %edi 325 popl %esi 326 ret 327 328 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 329 # void* to, 330 # size_t count) 331 .p2align 4,,15 332 .type _Copy_arrayof_conjoint_jshorts,@function 333 _Copy_arrayof_conjoint_jshorts: 334 pushl %esi 335 movl 4+12(%esp),%ecx # count 336 pushl %edi 337 movl 8+ 4(%esp),%esi # from 338 movl 8+ 8(%esp),%edi # to 339 cmpl %esi,%edi 340 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 341 jbe acs_CopyRight 342 cmpl %eax,%edi 343 jbe acs_CopyLeft 344 acs_CopyRight: 345 movl %ecx,%eax # word count 346 sarl %ecx # dword count 347 jz 4f # no dwords to move 348 cmpl $32,%ecx 349 jbe 2f # <= 32 dwords 350 # copy aligned dwords 351 rep; smovl 352 jmp 4f 353 # copy aligned dwords 354 .space 5 355 2: subl %esi,%edi 356 .p2align 4,,15 357 3: movl (%esi),%edx 358 movl %edx,(%edi,%esi,1) 359 addl $4,%esi 360 subl $1,%ecx 361 jnz 3b 362 addl %esi,%edi 363 4: andl $1,%eax # suffix count 364 jz 5f # no suffix 365 # copy suffix 366 movw (%esi),%dx 367 movw %dx,(%edi) 368 5: popl %edi 369 popl %esi 370 ret 371 acs_CopyLeft: 372 std 373 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 374 movl %eax,%esi # from + count*2 - 2 375 movl %ecx,%eax 376 subl $2,%esi # from + count*2 - 4 377 sarl %ecx # dword count 378 jz 4f # no dwords to move 379 cmpl $32,%ecx 380 ja 3f # > 32 dwords 381 subl %esi,%edi 382 .p2align 4,,15 383 2: movl (%esi),%edx 384 movl %edx,(%edi,%esi,1) 385 subl $4,%esi 386 subl $1,%ecx 387 jnz 2b 388 addl %esi,%edi 389 jmp 4f 390 3: rep; smovl 391 4: andl $1,%eax # suffix count 392 jz 5f # no suffix 393 # copy suffix 394 addl $2,%esi 395 addl $2,%edi 396 movw (%esi),%dx 397 movw %dx,(%edi) 398 5: cld 399 popl %edi 400 popl %esi 401 ret 402 403 # Support for void Copy::conjoint_jints_atomic(void* from, 404 # void* to, 405 # size_t count) 406 # Equivalent to 407 # arrayof_conjoint_jints 408 .p2align 4,,15 409 .type _Copy_conjoint_jints_atomic,@function 410 .type _Copy_arrayof_conjoint_jints,@function 411 _Copy_conjoint_jints_atomic: 412 _Copy_arrayof_conjoint_jints: 413 pushl %esi 414 movl 4+12(%esp),%ecx # count 415 pushl %edi 416 movl 8+ 4(%esp),%esi # from 417 movl 8+ 8(%esp),%edi # to 418 cmpl %esi,%edi 419 leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 420 jbe ci_CopyRight 421 cmpl %eax,%edi 422 jbe ci_CopyLeft 423 ci_CopyRight: 424 cmpl $32,%ecx 425 jbe 2f # <= 32 dwords 426 rep; smovl 427 popl %edi 428 popl %esi 429 ret 430 .space 10 431 2: subl %esi,%edi 432 jmp 4f 433 .p2align 4,,15 434 3: movl (%esi),%edx 435 movl %edx,(%edi,%esi,1) 436 addl $4,%esi 437 4: subl $1,%ecx 438 jge 3b 439 popl %edi 440 popl %esi 441 ret 442 ci_CopyLeft: 443 std 444 leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 445 cmpl $32,%ecx 446 ja 4f # > 32 dwords 447 subl %eax,%edi # eax == from + count*4 - 4 448 jmp 3f 449 .p2align 4,,15 450 2: movl (%eax),%edx 451 movl %edx,(%edi,%eax,1) 452 subl $4,%eax 453 3: subl $1,%ecx 454 jge 2b 455 cld 456 popl %edi 457 popl %esi 458 ret 459 4: movl %eax,%esi # from + count*4 - 4 460 rep; smovl 461 cld 462 popl %edi 463 popl %esi 464 ret 465 466 # Support for void Copy::conjoint_jlongs_atomic(jlong* from, 467 # jlong* to, 468 # size_t count) 469 # 470 # 32-bit 471 # 472 # count treated as signed 473 # 474 # if (from > to) { 475 # while (--count >= 0) { 476 # *to++ = *from++; 477 # } 478 # } else { 479 # while (--count >= 0) { 480 # to[count] = from[count]; 481 # } 482 # } 483 .p2align 4,,15 484 .type _Copy_conjoint_jlongs_atomic,@function 485 _Copy_conjoint_jlongs_atomic: 486 movl 4+8(%esp),%ecx # count 487 movl 4+0(%esp),%eax # from 488 movl 4+4(%esp),%edx # to 489 cmpl %eax,%edx 490 jae cla_CopyLeft 491 cla_CopyRight: 492 subl %eax,%edx 493 jmp 2f 494 .p2align 4,,15 495 1: fildll (%eax) 496 fistpll (%edx,%eax,1) 497 addl $8,%eax 498 2: subl $1,%ecx 499 jge 1b 500 ret 501 .p2align 4,,15 502 3: fildll (%eax,%ecx,8) 503 fistpll (%edx,%ecx,8) 504 cla_CopyLeft: 505 subl $1,%ecx 506 jge 3b 507 ret 508 509 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 510 # void* to, 511 # size_t count) 512 .p2align 4,,15 513 .type _mmx_Copy_arrayof_conjoint_jshorts,@function 514 _mmx_Copy_arrayof_conjoint_jshorts: 515 pushl %esi 516 movl 4+12(%esp),%ecx 517 pushl %edi 518 movl 8+ 4(%esp),%esi 519 movl 8+ 8(%esp),%edi 520 cmpl %esi,%edi 521 leal -2(%esi,%ecx,2),%eax 522 jbe mmx_acs_CopyRight 523 cmpl %eax,%edi 524 jbe mmx_acs_CopyLeft 525 mmx_acs_CopyRight: 526 movl %ecx,%eax 527 sarl %ecx 528 je 5f 529 cmpl $33,%ecx 530 jae 3f 531 1: subl %esi,%edi 532 .p2align 4,,15 533 2: movl (%esi),%edx 534 movl %edx,(%edi,%esi,1) 535 addl $4,%esi 536 subl $1,%ecx 537 jnz 2b 538 addl %esi,%edi 539 jmp 5f 540 3: smovl # align to 8 bytes, we know we are 4 byte aligned to start 541 subl $1,%ecx 542 4: .p2align 4,,15 543 movq 0(%esi),%mm0 544 addl $64,%edi 545 movq 8(%esi),%mm1 546 subl $16,%ecx 547 movq 16(%esi),%mm2 548 movq %mm0,-64(%edi) 549 movq 24(%esi),%mm0 550 movq %mm1,-56(%edi) 551 movq 32(%esi),%mm1 552 movq %mm2,-48(%edi) 553 movq 40(%esi),%mm2 554 movq %mm0,-40(%edi) 555 movq 48(%esi),%mm0 556 movq %mm1,-32(%edi) 557 movq 56(%esi),%mm1 558 movq %mm2,-24(%edi) 559 movq %mm0,-16(%edi) 560 addl $64,%esi 561 movq %mm1,-8(%edi) 562 cmpl $16,%ecx 563 jge 4b 564 emms 565 testl %ecx,%ecx 566 ja 1b 567 5: andl $1,%eax 568 je 7f 569 6: movw (%esi),%dx 570 movw %dx,(%edi) 571 7: popl %edi 572 popl %esi 573 ret 574 mmx_acs_CopyLeft: 575 std 576 leal -4(%edi,%ecx,2),%edi 577 movl %eax,%esi 578 movl %ecx,%eax 579 subl $2,%esi 580 sarl %ecx 581 je 4f 582 cmpl $32,%ecx 583 ja 3f 584 subl %esi,%edi 585 .p2align 4,,15 586 2: movl (%esi),%edx 587 movl %edx,(%edi,%esi,1) 588 subl $4,%esi 589 subl $1,%ecx 590 jnz 2b 591 addl %esi,%edi 592 jmp 4f 593 3: rep; smovl 594 4: andl $1,%eax 595 je 6f 596 addl $2,%esi 597 addl $2,%edi 598 5: movw (%esi),%dx 599 movw %dx,(%edi) 600 6: cld 601 popl %edi 602 popl %esi 603 ret 604 605 606 # Support for jlong Atomic::cmpxchg(jlong exchange_value, 607 # volatile jlong* dest, 608 # jlong compare_value) 609 # 610 .p2align 4,,15 611 .type _Atomic_cmpxchg_long,@function 612 _Atomic_cmpxchg_long: 613 # 8(%esp) : return PC 614 pushl %ebx # 4(%esp) : old %ebx 615 pushl %edi # 0(%esp) : old %edi 616 movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) 617 movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) 618 movl 24(%esp), %eax # 24(%esp) : compare_value (low) 619 movl 28(%esp), %edx # 28(%esp) : compare_value (high) 620 movl 20(%esp), %edi # 20(%esp) : dest 621 lock cmpxchg8b (%edi) 622 popl %edi 623 popl %ebx 624 ret 625 626 627 # Support for jlong Atomic::load and Atomic::store. 628 # void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst) 629 .p2align 4,,15 630 .type _Atomic_move_long,@function 631 _Atomic_move_long: 632 movl 4(%esp), %eax # src 633 fildll (%eax) 634 movl 8(%esp), %eax # dest 635 fistpll (%eax) 636 ret 637