1 # 2 # Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved. 3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 # 5 # This code is free software; you can redistribute it and/or modify it 6 # under the terms of the GNU General Public License version 2 only, as 7 # published by the Free Software Foundation. 8 # 9 # This code is distributed in the hope that it will be useful, but WITHOUT 10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 # version 2 for more details (a copy is included in the LICENSE file that 13 # accompanied this code). 14 # 15 # You should have received a copy of the GNU General Public License version 16 # 2 along with this work; if not, write to the Free Software Foundation, 17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 # 19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 # or visit www.oracle.com if you need additional information or have any 21 # questions. 22 # 23 24 25 #ifdef __APPLE__ 26 # Darwin uses _ prefixed global symbols 27 #define SYMBOL(s) _ ## s 28 #define ELF_TYPE(name, description) 29 #else 30 #define SYMBOL(s) s 31 #define ELF_TYPE(name, description) .type name,description 32 #endif 33 34 .globl SYMBOL(fixcw) 35 36 # NOTE WELL! The _Copy functions are called directly 37 # from server-compiler-generated code via CallLeafNoFP, 38 # which means that they *must* either not use floating 39 # point or use it in the same manner as does the server 40 # compiler. 41 42 .globl SYMBOL(_Copy_conjoint_bytes) 43 .globl SYMBOL(_Copy_arrayof_conjoint_bytes) 44 .globl SYMBOL(_Copy_conjoint_jshorts_atomic) 45 .globl SYMBOL(_Copy_arrayof_conjoint_jshorts) 46 .globl SYMBOL(_Copy_conjoint_jints_atomic) 47 .globl SYMBOL(_Copy_arrayof_conjoint_jints) 48 .globl SYMBOL(_Copy_conjoint_jlongs_atomic) 49 .globl SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts) 50 51 .globl SYMBOL(_Atomic_cmpxchg_long) 52 .globl SYMBOL(_Atomic_move_long) 53 54 .text 55 56 # Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp 57 # Set fpu to 53 bit precision. This happens too early to use a stub. 58 # ported from solaris_x86_32.s 59 .p2align 4,,15 60 SYMBOL(fixcw): 61 pushl $0x27f 62 fldcw 0(%esp) 63 popl %eax 64 ret 65 66 # Support for void Copy::conjoint_bytes(void* from, 67 # void* to, 68 # size_t count) 69 .p2align 4,,15 70 ELF_TYPE(_Copy_conjoint_bytes,@function) 71 SYMBOL(_Copy_conjoint_bytes): 72 pushl %esi 73 movl 4+12(%esp),%ecx # count 74 pushl %edi 75 movl 8+ 4(%esp),%esi # from 76 movl 8+ 8(%esp),%edi # to 77 cmpl %esi,%edi 78 leal -1(%esi,%ecx),%eax # from + count - 1 79 jbe cb_CopyRight 80 cmpl %eax,%edi 81 jbe cb_CopyLeft 82 # copy from low to high 83 cb_CopyRight: 84 cmpl $3,%ecx 85 jbe 5f # <= 3 bytes 86 # align source address at dword address boundary 87 movl %ecx,%eax # original count 88 movl $4,%ecx 89 subl %esi,%ecx 90 andl $3,%ecx # prefix byte count 91 jz 1f # no prefix 92 subl %ecx,%eax # byte count less prefix 93 # copy prefix 94 subl %esi,%edi 95 0: movb (%esi),%dl 96 movb %dl,(%edi,%esi,1) 97 addl $1,%esi 98 subl $1,%ecx 99 jnz 0b 100 addl %esi,%edi 101 1: movl %eax,%ecx # byte count less prefix 102 shrl $2,%ecx # dword count 103 jz 4f # no dwords to move 104 cmpl $32,%ecx 105 jbe 2f # <= 32 dwords 106 # copy aligned dwords 107 rep; smovl 108 jmp 4f 109 # copy aligned dwords 110 2: subl %esi,%edi 111 .p2align 4,,15 112 3: movl (%esi),%edx 113 movl %edx,(%edi,%esi,1) 114 addl $4,%esi 115 subl $1,%ecx 116 jnz 3b 117 addl %esi,%edi 118 4: movl %eax,%ecx # byte count less prefix 119 5: andl $3,%ecx # suffix byte count 120 jz 7f # no suffix 121 # copy suffix 122 xorl %eax,%eax 123 6: movb (%esi,%eax,1),%dl 124 movb %dl,(%edi,%eax,1) 125 addl $1,%eax 126 subl $1,%ecx 127 jnz 6b 128 7: popl %edi 129 popl %esi 130 ret 131 # copy from high to low 132 cb_CopyLeft: 133 std 134 leal -4(%edi,%ecx),%edi # to + count - 4 135 movl %eax,%esi # from + count - 1 136 movl %ecx,%eax 137 subl $3,%esi # from + count - 4 138 cmpl $3,%ecx 139 jbe 5f # <= 3 bytes 140 1: shrl $2,%ecx # dword count 141 jz 4f # no dwords to move 142 cmpl $32,%ecx 143 ja 3f # > 32 dwords 144 # copy dwords, aligned or not 145 subl %esi,%edi 146 .p2align 4,,15 147 2: movl (%esi),%edx 148 movl %edx,(%edi,%esi,1) 149 subl $4,%esi 150 subl $1,%ecx 151 jnz 2b 152 addl %esi,%edi 153 jmp 4f 154 # copy dwords, aligned or not 155 3: rep; smovl 156 4: movl %eax,%ecx # byte count 157 5: andl $3,%ecx # suffix byte count 158 jz 7f # no suffix 159 # copy suffix 160 subl %esi,%edi 161 addl $3,%esi 162 6: movb (%esi),%dl 163 movb %dl,(%edi,%esi,1) 164 subl $1,%esi 165 subl $1,%ecx 166 jnz 6b 167 7: cld 168 popl %edi 169 popl %esi 170 ret 171 172 # Support for void Copy::arrayof_conjoint_bytes(void* from, 173 # void* to, 174 # size_t count) 175 # 176 # Same as _Copy_conjoint_bytes, except no source alignment check. 177 .p2align 4,,15 178 ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function) 179 SYMBOL(_Copy_arrayof_conjoint_bytes): 180 pushl %esi 181 movl 4+12(%esp),%ecx # count 182 pushl %edi 183 movl 8+ 4(%esp),%esi # from 184 movl 8+ 8(%esp),%edi # to 185 cmpl %esi,%edi 186 leal -1(%esi,%ecx),%eax # from + count - 1 187 jbe acb_CopyRight 188 cmpl %eax,%edi 189 jbe acb_CopyLeft 190 # copy from low to high 191 acb_CopyRight: 192 cmpl $3,%ecx 193 jbe 5f 194 1: movl %ecx,%eax 195 shrl $2,%ecx 196 jz 4f 197 cmpl $32,%ecx 198 ja 3f 199 # copy aligned dwords 200 subl %esi,%edi 201 .p2align 4,,15 202 2: movl (%esi),%edx 203 movl %edx,(%edi,%esi,1) 204 addl $4,%esi 205 subl $1,%ecx 206 jnz 2b 207 addl %esi,%edi 208 jmp 4f 209 # copy aligned dwords 210 3: rep; smovl 211 4: movl %eax,%ecx 212 5: andl $3,%ecx 213 jz 7f 214 # copy suffix 215 xorl %eax,%eax 216 6: movb (%esi,%eax,1),%dl 217 movb %dl,(%edi,%eax,1) 218 addl $1,%eax 219 subl $1,%ecx 220 jnz 6b 221 7: popl %edi 222 popl %esi 223 ret 224 acb_CopyLeft: 225 std 226 leal -4(%edi,%ecx),%edi # to + count - 4 227 movl %eax,%esi # from + count - 1 228 movl %ecx,%eax 229 subl $3,%esi # from + count - 4 230 cmpl $3,%ecx 231 jbe 5f 232 1: shrl $2,%ecx 233 jz 4f 234 cmpl $32,%ecx 235 jbe 2f # <= 32 dwords 236 rep; smovl 237 jmp 4f 238 .space 8 239 2: subl %esi,%edi 240 .p2align 4,,15 241 3: movl (%esi),%edx 242 movl %edx,(%edi,%esi,1) 243 subl $4,%esi 244 subl $1,%ecx 245 jnz 3b 246 addl %esi,%edi 247 4: movl %eax,%ecx 248 5: andl $3,%ecx 249 jz 7f 250 subl %esi,%edi 251 addl $3,%esi 252 6: movb (%esi),%dl 253 movb %dl,(%edi,%esi,1) 254 subl $1,%esi 255 subl $1,%ecx 256 jnz 6b 257 7: cld 258 popl %edi 259 popl %esi 260 ret 261 262 # Support for void Copy::conjoint_jshorts_atomic(void* from, 263 # void* to, 264 # size_t count) 265 .p2align 4,,15 266 ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function) 267 SYMBOL(_Copy_conjoint_jshorts_atomic): 268 pushl %esi 269 movl 4+12(%esp),%ecx # count 270 pushl %edi 271 movl 8+ 4(%esp),%esi # from 272 movl 8+ 8(%esp),%edi # to 273 cmpl %esi,%edi 274 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 275 jbe cs_CopyRight 276 cmpl %eax,%edi 277 jbe cs_CopyLeft 278 # copy from low to high 279 cs_CopyRight: 280 # align source address at dword address boundary 281 movl %esi,%eax # original from 282 andl $3,%eax # either 0 or 2 283 jz 1f # no prefix 284 # copy prefix 285 subl $1,%ecx 286 jl 5f # zero count 287 movw (%esi),%dx 288 movw %dx,(%edi) 289 addl %eax,%esi # %eax == 2 290 addl %eax,%edi 291 1: movl %ecx,%eax # word count less prefix 292 sarl %ecx # dword count 293 jz 4f # no dwords to move 294 cmpl $32,%ecx 295 jbe 2f # <= 32 dwords 296 # copy aligned dwords 297 rep; smovl 298 jmp 4f 299 # copy aligned dwords 300 2: subl %esi,%edi 301 .p2align 4,,15 302 3: movl (%esi),%edx 303 movl %edx,(%edi,%esi,1) 304 addl $4,%esi 305 subl $1,%ecx 306 jnz 3b 307 addl %esi,%edi 308 4: andl $1,%eax # suffix count 309 jz 5f # no suffix 310 # copy suffix 311 movw (%esi),%dx 312 movw %dx,(%edi) 313 5: popl %edi 314 popl %esi 315 ret 316 # copy from high to low 317 cs_CopyLeft: 318 std 319 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 320 movl %eax,%esi # from + count*2 - 2 321 movl %ecx,%eax 322 subl $2,%esi # from + count*2 - 4 323 1: sarl %ecx # dword count 324 jz 4f # no dwords to move 325 cmpl $32,%ecx 326 ja 3f # > 32 dwords 327 subl %esi,%edi 328 .p2align 4,,15 329 2: movl (%esi),%edx 330 movl %edx,(%edi,%esi,1) 331 subl $4,%esi 332 subl $1,%ecx 333 jnz 2b 334 addl %esi,%edi 335 jmp 4f 336 3: rep; smovl 337 4: andl $1,%eax # suffix count 338 jz 5f # no suffix 339 # copy suffix 340 addl $2,%esi 341 addl $2,%edi 342 movw (%esi),%dx 343 movw %dx,(%edi) 344 5: cld 345 popl %edi 346 popl %esi 347 ret 348 349 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 350 # void* to, 351 # size_t count) 352 .p2align 4,,15 353 ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function) 354 SYMBOL(_Copy_arrayof_conjoint_jshorts): 355 pushl %esi 356 movl 4+12(%esp),%ecx # count 357 pushl %edi 358 movl 8+ 4(%esp),%esi # from 359 movl 8+ 8(%esp),%edi # to 360 cmpl %esi,%edi 361 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 362 jbe acs_CopyRight 363 cmpl %eax,%edi 364 jbe acs_CopyLeft 365 acs_CopyRight: 366 movl %ecx,%eax # word count 367 sarl %ecx # dword count 368 jz 4f # no dwords to move 369 cmpl $32,%ecx 370 jbe 2f # <= 32 dwords 371 # copy aligned dwords 372 rep; smovl 373 jmp 4f 374 # copy aligned dwords 375 .space 5 376 2: subl %esi,%edi 377 .p2align 4,,15 378 3: movl (%esi),%edx 379 movl %edx,(%edi,%esi,1) 380 addl $4,%esi 381 subl $1,%ecx 382 jnz 3b 383 addl %esi,%edi 384 4: andl $1,%eax # suffix count 385 jz 5f # no suffix 386 # copy suffix 387 movw (%esi),%dx 388 movw %dx,(%edi) 389 5: popl %edi 390 popl %esi 391 ret 392 acs_CopyLeft: 393 std 394 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 395 movl %eax,%esi # from + count*2 - 2 396 movl %ecx,%eax 397 subl $2,%esi # from + count*2 - 4 398 sarl %ecx # dword count 399 jz 4f # no dwords to move 400 cmpl $32,%ecx 401 ja 3f # > 32 dwords 402 subl %esi,%edi 403 .p2align 4,,15 404 2: movl (%esi),%edx 405 movl %edx,(%edi,%esi,1) 406 subl $4,%esi 407 subl $1,%ecx 408 jnz 2b 409 addl %esi,%edi 410 jmp 4f 411 3: rep; smovl 412 4: andl $1,%eax # suffix count 413 jz 5f # no suffix 414 # copy suffix 415 addl $2,%esi 416 addl $2,%edi 417 movw (%esi),%dx 418 movw %dx,(%edi) 419 5: cld 420 popl %edi 421 popl %esi 422 ret 423 424 # Support for void Copy::conjoint_jints_atomic(void* from, 425 # void* to, 426 # size_t count) 427 # Equivalent to 428 # arrayof_conjoint_jints 429 .p2align 4,,15 430 ELF_TYPE(_Copy_conjoint_jints_atomic,@function) 431 ELF_TYPE(_Copy_arrayof_conjoint_jints,@function) 432 SYMBOL(_Copy_conjoint_jints_atomic): 433 SYMBOL(_Copy_arrayof_conjoint_jints): 434 pushl %esi 435 movl 4+12(%esp),%ecx # count 436 pushl %edi 437 movl 8+ 4(%esp),%esi # from 438 movl 8+ 8(%esp),%edi # to 439 cmpl %esi,%edi 440 leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 441 jbe ci_CopyRight 442 cmpl %eax,%edi 443 jbe ci_CopyLeft 444 ci_CopyRight: 445 cmpl $32,%ecx 446 jbe 2f # <= 32 dwords 447 rep; smovl 448 popl %edi 449 popl %esi 450 ret 451 .space 10 452 2: subl %esi,%edi 453 jmp 4f 454 .p2align 4,,15 455 3: movl (%esi),%edx 456 movl %edx,(%edi,%esi,1) 457 addl $4,%esi 458 4: subl $1,%ecx 459 jge 3b 460 popl %edi 461 popl %esi 462 ret 463 ci_CopyLeft: 464 std 465 leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 466 cmpl $32,%ecx 467 ja 4f # > 32 dwords 468 subl %eax,%edi # eax == from + count*4 - 4 469 jmp 3f 470 .p2align 4,,15 471 2: movl (%eax),%edx 472 movl %edx,(%edi,%eax,1) 473 subl $4,%eax 474 3: subl $1,%ecx 475 jge 2b 476 cld 477 popl %edi 478 popl %esi 479 ret 480 4: movl %eax,%esi # from + count*4 - 4 481 rep; smovl 482 cld 483 popl %edi 484 popl %esi 485 ret 486 487 # Support for void Copy::conjoint_jlongs_atomic(jlong* from, 488 # jlong* to, 489 # size_t count) 490 # 491 # 32-bit 492 # 493 # count treated as signed 494 # 495 # // if (from > to) { 496 # while (--count >= 0) { 497 # *to++ = *from++; 498 # } 499 # } else { 500 # while (--count >= 0) { 501 # to[count] = from[count]; 502 # } 503 # } 504 .p2align 4,,15 505 ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function) 506 SYMBOL(_Copy_conjoint_jlongs_atomic): 507 movl 4+8(%esp),%ecx # count 508 movl 4+0(%esp),%eax # from 509 movl 4+4(%esp),%edx # to 510 cmpl %eax,%edx 511 jae cla_CopyLeft 512 cla_CopyRight: 513 subl %eax,%edx 514 jmp 2f 515 .p2align 4,,15 516 1: fildll (%eax) 517 fistpll (%edx,%eax,1) 518 addl $8,%eax 519 2: subl $1,%ecx 520 jge 1b 521 ret 522 .p2align 4,,15 523 3: fildll (%eax,%ecx,8) 524 fistpll (%edx,%ecx,8) 525 cla_CopyLeft: 526 subl $1,%ecx 527 jge 3b 528 ret 529 530 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 531 # void* to, 532 # size_t count) 533 .p2align 4,,15 534 ELF_TYPE(_mmx_Copy_arrayof_conjoint_jshorts,@function) 535 SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts): 536 pushl %esi 537 movl 4+12(%esp),%ecx 538 pushl %edi 539 movl 8+ 4(%esp),%esi 540 movl 8+ 8(%esp),%edi 541 cmpl %esi,%edi 542 leal -2(%esi,%ecx,2),%eax 543 jbe mmx_acs_CopyRight 544 cmpl %eax,%edi 545 jbe mmx_acs_CopyLeft 546 mmx_acs_CopyRight: 547 movl %ecx,%eax 548 sarl %ecx 549 je 5f 550 cmpl $33,%ecx 551 jae 3f 552 1: subl %esi,%edi 553 .p2align 4,,15 554 2: movl (%esi),%edx 555 movl %edx,(%edi,%esi,1) 556 addl $4,%esi 557 subl $1,%ecx 558 jnz 2b 559 addl %esi,%edi 560 jmp 5f 561 3: smovl # align to 8 bytes, we know we are 4 byte aligned to start 562 subl $1,%ecx 563 4: .p2align 4,,15 564 movq 0(%esi),%mm0 565 addl $64,%edi 566 movq 8(%esi),%mm1 567 subl $16,%ecx 568 movq 16(%esi),%mm2 569 movq %mm0,-64(%edi) 570 movq 24(%esi),%mm0 571 movq %mm1,-56(%edi) 572 movq 32(%esi),%mm1 573 movq %mm2,-48(%edi) 574 movq 40(%esi),%mm2 575 movq %mm0,-40(%edi) 576 movq 48(%esi),%mm0 577 movq %mm1,-32(%edi) 578 movq 56(%esi),%mm1 579 movq %mm2,-24(%edi) 580 movq %mm0,-16(%edi) 581 addl $64,%esi 582 movq %mm1,-8(%edi) 583 cmpl $16,%ecx 584 jge 4b 585 emms 586 testl %ecx,%ecx 587 ja 1b 588 5: andl $1,%eax 589 je 7f 590 6: movw (%esi),%dx 591 movw %dx,(%edi) 592 7: popl %edi 593 popl %esi 594 ret 595 mmx_acs_CopyLeft: 596 std 597 leal -4(%edi,%ecx,2),%edi 598 movl %eax,%esi 599 movl %ecx,%eax 600 subl $2,%esi 601 sarl %ecx 602 je 4f 603 cmpl $32,%ecx 604 ja 3f 605 subl %esi,%edi 606 .p2align 4,,15 607 2: movl (%esi),%edx 608 movl %edx,(%edi,%esi,1) 609 subl $4,%esi 610 subl $1,%ecx 611 jnz 2b 612 addl %esi,%edi 613 jmp 4f 614 3: rep; smovl 615 4: andl $1,%eax 616 je 6f 617 addl $2,%esi 618 addl $2,%edi 619 5: movw (%esi),%dx 620 movw %dx,(%edi) 621 6: cld 622 popl %edi 623 popl %esi 624 ret 625 626 627 # Support for int64_t Atomic::cmpxchg(int64_t exchange_value, 628 # volatile int64_t* dest, 629 # int64_t compare_value, 630 # bool is_MP) 631 # 632 .p2align 4,,15 633 ELF_TYPE(_Atomic_cmpxchg_long,@function) 634 SYMBOL(_Atomic_cmpxchg_long): 635 # 8(%esp) : return PC 636 pushl %ebx # 4(%esp) : old %ebx 637 pushl %edi # 0(%esp) : old %edi 638 movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) 639 movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) 640 movl 24(%esp), %eax # 24(%esp) : compare_value (low) 641 movl 28(%esp), %edx # 28(%esp) : compare_value (high) 642 movl 20(%esp), %edi # 20(%esp) : dest 643 cmpl $0, 32(%esp) # 32(%esp) : is_MP 644 je 1f 645 lock 646 1: cmpxchg8b (%edi) 647 popl %edi 648 popl %ebx 649 ret 650 651 652 # Support for int64_t Atomic::load and Atomic::store. 653 # void _Atomic_move_long(const volatile int64_t* src, volatile int64_t* dst) 654 .p2align 4,,15 655 ELF_TYPE(_Atomic_move_long,@function) 656 SYMBOL(_Atomic_move_long): 657 movl 4(%esp), %eax # src 658 fildll (%eax) 659 movl 8(%esp), %eax # dest 660 fistpll (%eax) 661 ret 662