1 // 2 // Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 24 .globl fixcw 25 .globl sse_check 26 .globl sse_unavailable 27 .globl gs_load 28 .globl gs_thread 29 .globl _Atomic_cmpxchg_long_gcc 30 31 // NOTE WELL! The _Copy functions are called directly 32 // from server-compiler-generated code via CallLeafNoFP, 33 // which means that they *must* either not use floating 34 // point or use it in the same manner as does the server 35 // compiler. 36 37 .globl _Copy_conjoint_bytes 38 .globl _Copy_arrayof_conjoint_bytes 39 .globl _Copy_conjoint_jshorts_atomic 40 .globl _Copy_arrayof_conjoint_jshorts 41 .globl _Copy_conjoint_jints_atomic 42 .globl _Copy_arrayof_conjoint_jints 43 .globl _Copy_conjoint_jlongs_atomic 44 .globl _mmx_Copy_arrayof_conjoint_jshorts 45 46 .section .text,"ax" 47 48 / Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp 49 / Set fpu to 53 bit precision. This happens too early to use a stub. 50 .align 16 51 fixcw: 52 pushl $0x27f 53 fldcw 0(%esp) 54 popl %eax 55 ret 56 57 .align 16 58 .globl SpinPause 59 SpinPause: 60 rep 61 nop 62 movl $1, %eax 63 ret 64 65 66 / Test SSE availability, used by os_solaris_i486.cpp 67 .align 16 68 sse_check: 69 / Fault if SSE not available 70 xorps %xmm0,%xmm0 71 / No fault 72 movl $1,%eax 73 ret 74 / Signal handler continues here if SSE is not available 75 sse_unavailable: 76 xorl %eax,%eax 77 ret 78 79 / Fast thread accessors, used by threadLS_solaris_i486.cpp 80 .align 16 81 gs_load: 82 movl 4(%esp),%ecx 83 movl %gs:(%ecx),%eax 84 ret 85 86 .align 16 87 gs_thread: 88 movl %gs:0x0,%eax 89 ret 90 91 / Support for void Copy::conjoint_bytes(void* from, 92 / void* to, 93 / size_t count) 94 .align 16 95 _Copy_conjoint_bytes: 96 pushl %esi 97 movl 4+12(%esp),%ecx / count 98 pushl %edi 99 movl 8+ 4(%esp),%esi / from 100 movl 8+ 8(%esp),%edi / to 101 cmpl %esi,%edi 102 leal -1(%esi,%ecx),%eax / from + count - 1 103 jbe cb_CopyRight 104 cmpl %eax,%edi 105 jbe cb_CopyLeft 106 / copy from low to high 107 cb_CopyRight: 108 cmpl $3,%ecx 109 jbe 5f / <= 3 bytes 110 / align source address at dword address boundary 111 movl %ecx,%eax / original count 112 movl $4,%ecx 113 subl %esi,%ecx 114 andl $3,%ecx / prefix byte count 115 jz 1f / no prefix 116 subl %ecx,%eax / byte count less prefix 117 / copy prefix 118 subl %esi,%edi 119 0: movb (%esi),%dl 120 movb %dl,(%edi,%esi,1) 121 addl $1,%esi 122 subl $1,%ecx 123 jnz 0b 124 addl %esi,%edi 125 1: movl %eax,%ecx / byte count less prefix 126 shrl $2,%ecx / dword count 127 jz 4f / no dwords to move 128 cmpl $32,%ecx 129 jbe 2f / <= 32 dwords 130 / copy aligned dwords 131 rep; smovl 132 jmp 4f 133 / copy aligned dwords 134 2: subl %esi,%edi 135 .align 16 136 3: movl (%esi),%edx 137 movl %edx,(%edi,%esi,1) 138 addl $4,%esi 139 subl $1,%ecx 140 jnz 3b 141 addl %esi,%edi 142 4: movl %eax,%ecx / byte count less prefix 143 5: andl $3,%ecx / suffix byte count 144 jz 7f / no suffix 145 / copy suffix 146 xorl %eax,%eax 147 6: movb (%esi,%eax,1),%dl 148 movb %dl,(%edi,%eax,1) 149 addl $1,%eax 150 subl $1,%ecx 151 jnz 6b 152 7: popl %edi 153 popl %esi 154 ret 155 / copy from high to low 156 cb_CopyLeft: 157 std 158 leal -4(%edi,%ecx),%edi / to + count - 4 159 movl %eax,%esi / from + count - 1 160 movl %ecx,%eax 161 subl $3,%esi / from + count - 4 162 cmpl $3,%ecx 163 jbe 5f / <= 3 bytes 164 1: shrl $2,%ecx / dword count 165 jz 4f / no dwords to move 166 cmpl $32,%ecx 167 ja 3f / > 32 dwords 168 / copy dwords, aligned or not 169 subl %esi,%edi 170 .align 16 171 2: movl (%esi),%edx 172 movl %edx,(%edi,%esi,1) 173 subl $4,%esi 174 subl $1,%ecx 175 jnz 2b 176 addl %esi,%edi 177 jmp 4f 178 / copy dwords, aligned or not 179 3: rep; smovl 180 4: movl %eax,%ecx / byte count 181 5: andl $3,%ecx / suffix byte count 182 jz 7f / no suffix 183 / copy suffix 184 subl %esi,%edi 185 addl $3,%esi 186 6: movb (%esi),%dl 187 movb %dl,(%edi,%esi,1) 188 subl $1,%esi 189 subl $1,%ecx 190 jnz 6b 191 7: cld 192 popl %edi 193 popl %esi 194 ret 195 196 / Support for void Copy::arrayof_conjoint_bytes(void* from, 197 / void* to, 198 / size_t count) 199 / 200 / Same as _Copy_conjoint_bytes, except no source alignment check. 201 .align 16 202 _Copy_arrayof_conjoint_bytes: 203 pushl %esi 204 movl 4+12(%esp),%ecx / count 205 pushl %edi 206 movl 8+ 4(%esp),%esi / from 207 movl 8+ 8(%esp),%edi / to 208 cmpl %esi,%edi 209 leal -1(%esi,%ecx),%eax / from + count - 1 210 jbe acb_CopyRight 211 cmpl %eax,%edi 212 jbe acb_CopyLeft 213 / copy from low to high 214 acb_CopyRight: 215 cmpl $3,%ecx 216 jbe 5f 217 1: movl %ecx,%eax 218 shrl $2,%ecx 219 jz 4f 220 cmpl $32,%ecx 221 ja 3f 222 / copy aligned dwords 223 subl %esi,%edi 224 .align 16 225 2: movl (%esi),%edx 226 movl %edx,(%edi,%esi,1) 227 addl $4,%esi 228 subl $1,%ecx 229 jnz 2b 230 addl %esi,%edi 231 jmp 4f 232 / copy aligned dwords 233 3: rep; smovl 234 4: movl %eax,%ecx 235 5: andl $3,%ecx 236 jz 7f 237 / copy suffix 238 xorl %eax,%eax 239 6: movb (%esi,%eax,1),%dl 240 movb %dl,(%edi,%eax,1) 241 addl $1,%eax 242 subl $1,%ecx 243 jnz 6b 244 7: popl %edi 245 popl %esi 246 ret 247 acb_CopyLeft: 248 std 249 leal -4(%edi,%ecx),%edi / to + count - 4 250 movl %eax,%esi / from + count - 1 251 movl %ecx,%eax 252 subl $3,%esi / from + count - 4 253 cmpl $3,%ecx 254 jbe 5f 255 1: shrl $2,%ecx 256 jz 4f 257 cmpl $32,%ecx 258 jbe 2f / <= 32 dwords 259 rep; smovl 260 jmp 4f 261 .=.+8 262 2: subl %esi,%edi 263 .align 16 264 3: movl (%esi),%edx 265 movl %edx,(%edi,%esi,1) 266 subl $4,%esi 267 subl $1,%ecx 268 jnz 3b 269 addl %esi,%edi 270 4: movl %eax,%ecx 271 5: andl $3,%ecx 272 jz 7f 273 subl %esi,%edi 274 addl $3,%esi 275 6: movb (%esi),%dl 276 movb %dl,(%edi,%esi,1) 277 subl $1,%esi 278 subl $1,%ecx 279 jnz 6b 280 7: cld 281 popl %edi 282 popl %esi 283 ret 284 285 / Support for void Copy::conjoint_jshorts_atomic(void* from, 286 / void* to, 287 / size_t count) 288 .align 16 289 _Copy_conjoint_jshorts_atomic: 290 pushl %esi 291 movl 4+12(%esp),%ecx / count 292 pushl %edi 293 movl 8+ 4(%esp),%esi / from 294 movl 8+ 8(%esp),%edi / to 295 cmpl %esi,%edi 296 leal -2(%esi,%ecx,2),%eax / from + count*2 - 2 297 jbe cs_CopyRight 298 cmpl %eax,%edi 299 jbe cs_CopyLeft 300 / copy from low to high 301 cs_CopyRight: 302 / align source address at dword address boundary 303 movl %esi,%eax / original from 304 andl $3,%eax / either 0 or 2 305 jz 1f / no prefix 306 / copy prefix 307 subl $1,%ecx 308 jl 5f / zero count 309 movw (%esi),%dx 310 movw %dx,(%edi) 311 addl %eax,%esi / %eax == 2 312 addl %eax,%edi 313 1: movl %ecx,%eax / word count less prefix 314 sarl %ecx / dword count 315 jz 4f / no dwords to move 316 cmpl $32,%ecx 317 jbe 2f / <= 32 dwords 318 / copy aligned dwords 319 rep; smovl 320 jmp 4f 321 / copy aligned dwords 322 2: subl %esi,%edi 323 .align 16 324 3: movl (%esi),%edx 325 movl %edx,(%edi,%esi,1) 326 addl $4,%esi 327 subl $1,%ecx 328 jnz 3b 329 addl %esi,%edi 330 4: andl $1,%eax / suffix count 331 jz 5f / no suffix 332 / copy suffix 333 movw (%esi),%dx 334 movw %dx,(%edi) 335 5: popl %edi 336 popl %esi 337 ret 338 / copy from high to low 339 cs_CopyLeft: 340 std 341 leal -4(%edi,%ecx,2),%edi / to + count*2 - 4 342 movl %eax,%esi / from + count*2 - 2 343 movl %ecx,%eax 344 subl $2,%esi / from + count*2 - 4 345 1: sarl %ecx / dword count 346 jz 4f / no dwords to move 347 cmpl $32,%ecx 348 ja 3f / > 32 dwords 349 subl %esi,%edi 350 .align 16 351 2: movl (%esi),%edx 352 movl %edx,(%edi,%esi,1) 353 subl $4,%esi 354 subl $1,%ecx 355 jnz 2b 356 addl %esi,%edi 357 jmp 4f 358 3: rep; smovl 359 4: andl $1,%eax / suffix count 360 jz 5f / no suffix 361 / copy suffix 362 addl $2,%esi 363 addl $2,%edi 364 movw (%esi),%dx 365 movw %dx,(%edi) 366 5: cld 367 popl %edi 368 popl %esi 369 ret 370 371 / Support for void Copy::arrayof_conjoint_jshorts(void* from, 372 / void* to, 373 / size_t count) 374 .align 16 375 _Copy_arrayof_conjoint_jshorts: 376 pushl %esi 377 movl 4+12(%esp),%ecx / count 378 pushl %edi 379 movl 8+ 4(%esp),%esi / from 380 movl 8+ 8(%esp),%edi / to 381 cmpl %esi,%edi 382 leal -2(%esi,%ecx,2),%eax / from + count*2 - 2 383 jbe acs_CopyRight 384 cmpl %eax,%edi 385 jbe acs_CopyLeft 386 acs_CopyRight: 387 movl %ecx,%eax / word count 388 sarl %ecx / dword count 389 jz 4f / no dwords to move 390 cmpl $32,%ecx 391 jbe 2f / <= 32 dwords 392 / copy aligned dwords 393 rep; smovl 394 jmp 4f 395 / copy aligned dwords 396 .=.+5 397 2: subl %esi,%edi 398 .align 16 399 3: movl (%esi),%edx 400 movl %edx,(%edi,%esi,1) 401 addl $4,%esi 402 subl $1,%ecx 403 jnz 3b 404 addl %esi,%edi 405 4: andl $1,%eax / suffix count 406 jz 5f / no suffix 407 / copy suffix 408 movw (%esi),%dx 409 movw %dx,(%edi) 410 5: popl %edi 411 popl %esi 412 ret 413 acs_CopyLeft: 414 std 415 leal -4(%edi,%ecx,2),%edi / to + count*2 - 4 416 movl %eax,%esi / from + count*2 - 2 417 movl %ecx,%eax 418 subl $2,%esi / from + count*2 - 4 419 sarl %ecx / dword count 420 jz 4f / no dwords to move 421 cmpl $32,%ecx 422 ja 3f / > 32 dwords 423 subl %esi,%edi 424 .align 16 425 2: movl (%esi),%edx 426 movl %edx,(%edi,%esi,1) 427 subl $4,%esi 428 subl $1,%ecx 429 jnz 2b 430 addl %esi,%edi 431 jmp 4f 432 3: rep; smovl 433 4: andl $1,%eax / suffix count 434 jz 5f / no suffix 435 / copy suffix 436 addl $2,%esi 437 addl $2,%edi 438 movw (%esi),%dx 439 movw %dx,(%edi) 440 5: cld 441 popl %edi 442 popl %esi 443 ret 444 445 / Support for void Copy::conjoint_jints_atomic(void* from, 446 / void* to, 447 / size_t count) 448 / Equivalent to 449 / arrayof_conjoint_jints 450 .align 16 451 _Copy_conjoint_jints_atomic: 452 _Copy_arrayof_conjoint_jints: 453 pushl %esi 454 movl 4+12(%esp),%ecx / count 455 pushl %edi 456 movl 8+ 4(%esp),%esi / from 457 movl 8+ 8(%esp),%edi / to 458 cmpl %esi,%edi 459 leal -4(%esi,%ecx,4),%eax / from + count*4 - 4 460 jbe ci_CopyRight 461 cmpl %eax,%edi 462 jbe ci_CopyLeft 463 ci_CopyRight: 464 cmpl $32,%ecx 465 jbe 2f / <= 32 dwords 466 rep; smovl 467 popl %edi 468 popl %esi 469 ret 470 .=.+10 471 2: subl %esi,%edi 472 jmp 4f 473 .align 16 474 3: movl (%esi),%edx 475 movl %edx,(%edi,%esi,1) 476 addl $4,%esi 477 4: subl $1,%ecx 478 jge 3b 479 popl %edi 480 popl %esi 481 ret 482 ci_CopyLeft: 483 std 484 leal -4(%edi,%ecx,4),%edi / to + count*4 - 4 485 cmpl $32,%ecx 486 ja 4f / > 32 dwords 487 subl %eax,%edi / eax == from + count*4 - 4 488 jmp 3f 489 .align 16 490 2: movl (%eax),%edx 491 movl %edx,(%edi,%eax,1) 492 subl $4,%eax 493 3: subl $1,%ecx 494 jge 2b 495 cld 496 popl %edi 497 popl %esi 498 ret 499 4: movl %eax,%esi / from + count*4 - 4 500 rep; smovl 501 cld 502 popl %edi 503 popl %esi 504 ret 505 506 / Support for void Copy::conjoint_jlongs_atomic(jlong* from, 507 / jlong* to, 508 / size_t count) 509 / 510 / 32-bit 511 / 512 / count treated as signed 513 / 514 / if (from > to) { 515 / while (--count >= 0) { 516 / *to++ = *from++; 517 / } 518 / } else { 519 / while (--count >= 0) { 520 / to[count] = from[count]; 521 / } 522 / } 523 .align 16 524 _Copy_conjoint_jlongs_atomic: 525 movl 4+8(%esp),%ecx / count 526 movl 4+0(%esp),%eax / from 527 movl 4+4(%esp),%edx / to 528 cmpl %eax,%edx 529 jae cla_CopyLeft 530 cla_CopyRight: 531 subl %eax,%edx 532 jmp 2f 533 .align 16 534 1: fildll (%eax) 535 fistpll (%edx,%eax,1) 536 addl $8,%eax 537 2: subl $1,%ecx 538 jge 1b 539 ret 540 .align 16 541 3: fildll (%eax,%ecx,8) 542 fistpll (%edx,%ecx,8) 543 cla_CopyLeft: 544 subl $1,%ecx 545 jge 3b 546 ret 547 548 / Support for void Copy::arrayof_conjoint_jshorts(void* from, 549 / void* to, 550 / size_t count) 551 .align 16 552 _mmx_Copy_arrayof_conjoint_jshorts: 553 pushl %esi 554 movl 4+12(%esp),%ecx 555 pushl %edi 556 movl 8+ 4(%esp),%esi 557 movl 8+ 8(%esp),%edi 558 cmpl %esi,%edi 559 leal -2(%esi,%ecx,2),%eax 560 jbe mmx_acs_CopyRight 561 cmpl %eax,%edi 562 jbe mmx_acs_CopyLeft 563 mmx_acs_CopyRight: 564 movl %ecx,%eax 565 sarl %ecx 566 je 5f 567 cmpl $33,%ecx 568 jae 3f 569 1: subl %esi,%edi 570 .align 16 571 2: movl (%esi),%edx 572 movl %edx,(%edi,%esi,1) 573 addl $4,%esi 574 subl $1,%ecx 575 jnz 2b 576 addl %esi,%edi 577 jmp 5f 578 3: smovl / align to 8 bytes, we know we are 4 byte aligned to start 579 subl $1,%ecx 580 4: .align 16 581 movq 0(%esi),%mm0 582 addl $64,%edi 583 movq 8(%esi),%mm1 584 subl $16,%ecx 585 movq 16(%esi),%mm2 586 movq %mm0,-64(%edi) 587 movq 24(%esi),%mm0 588 movq %mm1,-56(%edi) 589 movq 32(%esi),%mm1 590 movq %mm2,-48(%edi) 591 movq 40(%esi),%mm2 592 movq %mm0,-40(%edi) 593 movq 48(%esi),%mm0 594 movq %mm1,-32(%edi) 595 movq 56(%esi),%mm1 596 movq %mm2,-24(%edi) 597 movq %mm0,-16(%edi) 598 addl $64,%esi 599 movq %mm1,-8(%edi) 600 cmpl $16,%ecx 601 jge 4b 602 emms 603 testl %ecx,%ecx 604 ja 1b 605 5: andl $1,%eax 606 je 7f 607 6: movw (%esi),%dx 608 movw %dx,(%edi) 609 7: popl %edi 610 popl %esi 611 ret 612 mmx_acs_CopyLeft: 613 std 614 leal -4(%edi,%ecx,2),%edi 615 movl %eax,%esi 616 movl %ecx,%eax 617 subl $2,%esi 618 sarl %ecx 619 je 4f 620 cmpl $32,%ecx 621 ja 3f 622 subl %esi,%edi 623 .align 16 624 2: movl (%esi),%edx 625 movl %edx,(%edi,%esi,1) 626 subl $4,%esi 627 subl $1,%ecx 628 jnz 2b 629 addl %esi,%edi 630 jmp 4f 631 3: rep; smovl 632 4: andl $1,%eax 633 je 6f 634 addl $2,%esi 635 addl $2,%edi 636 5: movw (%esi),%dx 637 movw %dx,(%edi) 638 6: cld 639 popl %edi 640 popl %esi 641 ret 642 643 644 / Support for jlong Atomic::cmpxchg(jlong exchange_value, 645 / volatile jlong* dest, 646 / jlong compare_value, 647 / bool is_MP) 648 / Used only for Solaris/gcc builds 649 .align 16 650 _Atomic_cmpxchg_long_gcc: 651 / 8(%esp) : return PC 652 pushl %ebx / 4(%esp) : old %ebx 653 pushl %edi / 0(%esp) : old %edi 654 movl 12(%esp), %ebx / 12(%esp) : exchange_value (low) 655 movl 16(%esp), %ecx / 16(%esp) : exchange_value (high) 656 movl 24(%esp), %eax / 24(%esp) : compare_value (low) 657 movl 28(%esp), %edx / 28(%esp) : compare_value (high) 658 movl 20(%esp), %edi / 20(%esp) : dest 659 cmpl $0, 32(%esp) / 32(%esp) : is_MP 660 je 1f 661 lock 662 1: cmpxchg8b (%edi) 663 popl %edi 664 popl %ebx 665 ret