1758 // bswap lo
1759 emit_opcode(cbuf, 0x0F);
1760 emit_cc(cbuf, 0xC8, destlo);
1761 // bswap hi
1762 emit_opcode(cbuf, 0x0F);
1763 emit_cc(cbuf, 0xC8, desthi);
1764 // xchg lo and hi
1765 emit_opcode(cbuf, 0x87);
1766 emit_rm(cbuf, 0x3, destlo, desthi);
1767 %}
1768
1769 enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ...
1770 emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1771 %}
1772
1773 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1774 $$$emit8$primary;
1775 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1776 %}
1777
1778 enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
1779 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1780 emit_d8(cbuf, op >> 8 );
1781 emit_d8(cbuf, op & 255);
1782 %}
1783
1784 // emulate a CMOV with a conditional branch around a MOV
1785 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1786 // Invert sense of branch from sense of CMOV
1787 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1788 emit_d8( cbuf, $brOffs$$constant );
1789 %}
1790
1791 enc_class enc_PartialSubtypeCheck( ) %{
1792 Register Redi = as_Register(EDI_enc); // result register
1793 Register Reax = as_Register(EAX_enc); // super class
1794 Register Recx = as_Register(ECX_enc); // killed
1795 Register Resi = as_Register(ESI_enc); // sub class
1796 Label miss;
1797
1798 MacroAssembler _masm(&cbuf);
2046 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2047 %}
2048
2049 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
2050 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2051 %}
2052
2053 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
2054 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2055 %}
2056
2057 enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
2058 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2059 %}
2060
2061 enc_class Con32 (immI src) %{ // Con32(storeImmI)
2062 // Output immediate
2063 $$$emit32$src$$constant;
2064 %}
2065
2066 enc_class Con32F_as_bits(immF src) %{ // storeF_imm
2067 // Output Float immediate bits
2068 jfloat jf = $src$$constant;
2069 int jf_as_bits = jint_cast( jf );
2070 emit_d32(cbuf, jf_as_bits);
2071 %}
2072
2073 enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm
2074 // Output Float immediate bits
2075 jfloat jf = $src$$constant;
2076 int jf_as_bits = jint_cast( jf );
2077 emit_d32(cbuf, jf_as_bits);
2078 %}
2079
2080 enc_class Con16 (immI src) %{ // Con16(storeImmI)
2081 // Output immediate
2082 $$$emit16$src$$constant;
2083 %}
2084
2085 enc_class Con_d32(immI src) %{
2086 emit_d32(cbuf,$src$$constant);
2087 %}
2088
2089 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI)
2090 // Output immediate memory reference
2091 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2092 emit_d32(cbuf, 0x00);
2093 %}
2266 // jmp dst < src around move
2267 emit_opcode(cbuf,0x7C);
2268 emit_d8(cbuf,2);
2269 // move dst,src
2270 emit_opcode(cbuf,0x8B);
2271 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2272 %}
2273
2274 enc_class max_enc (eRegI dst, eRegI src) %{ // MAX
2275 // Compare dst,src
2276 emit_opcode(cbuf,0x3B);
2277 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2278 // jmp dst > src around move
2279 emit_opcode(cbuf,0x7F);
2280 emit_d8(cbuf,2);
2281 // move dst,src
2282 emit_opcode(cbuf,0x8B);
2283 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2284 %}
2285
2286 enc_class enc_FP_store(memory mem, regD src) %{
2287 // If src is FPR1, we can just FST to store it.
2288 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2289 int reg_encoding = 0x2; // Just store
2290 int base = $mem$$base;
2291 int index = $mem$$index;
2292 int scale = $mem$$scale;
2293 int displace = $mem$$disp;
2294 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2295 if( $src$$reg != FPR1L_enc ) {
2296 reg_encoding = 0x3; // Store & pop
2297 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2298 emit_d8( cbuf, 0xC0-1+$src$$reg );
2299 }
2300 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2301 emit_opcode(cbuf,$primary);
2302 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2303 %}
2304
2305 enc_class neg_reg(eRegI dst) %{
2306 // NEG $dst
2415 // MOV $dst.lo,$dst.hi
2416 emit_opcode( cbuf, 0x8B );
2417 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2418 // SAR $dst.hi,31
2419 emit_opcode(cbuf, 0xC1);
2420 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2421 emit_d8(cbuf, 0x1F );
2422 // small:
2423 // SHRD $dst.lo,$dst.hi,$shift
2424 emit_opcode(cbuf,0x0F);
2425 emit_opcode(cbuf,0xAD);
2426 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2427 // SAR $dst.hi,$shift"
2428 emit_opcode(cbuf,0xD3);
2429 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2430 %}
2431
2432
2433 // ----------------- Encodings for floating point unit -----------------
2434 // May leave result in FPU-TOS or FPU reg depending on opcodes
2435 enc_class OpcReg_F (regF src) %{ // FMUL, FDIV
2436 $$$emit8$primary;
2437 emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2438 %}
2439
2440 // Pop argument in FPR0 with FSTP ST(0)
2441 enc_class PopFPU() %{
2442 emit_opcode( cbuf, 0xDD );
2443 emit_d8( cbuf, 0xD8 );
2444 %}
2445
2446 // !!!!! equivalent to Pop_Reg_F
2447 enc_class Pop_Reg_D( regD dst ) %{
2448 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2449 emit_d8( cbuf, 0xD8+$dst$$reg );
2450 %}
2451
2452 enc_class Push_Reg_D( regD dst ) %{
2453 emit_opcode( cbuf, 0xD9 );
2454 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2455 %}
2456
2457 enc_class strictfp_bias1( regD dst ) %{
2458 emit_opcode( cbuf, 0xDB ); // FLD m80real
2459 emit_opcode( cbuf, 0x2D );
2460 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2461 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2462 emit_opcode( cbuf, 0xC8+$dst$$reg );
2463 %}
2464
2465 enc_class strictfp_bias2( regD dst ) %{
2466 emit_opcode( cbuf, 0xDB ); // FLD m80real
2467 emit_opcode( cbuf, 0x2D );
2468 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2469 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2470 emit_opcode( cbuf, 0xC8+$dst$$reg );
2471 %}
2472
2473 // Special case for moving an integer register to a stack slot.
2474 enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2475 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2476 %}
2477
2478 // Special case for moving a register to a stack slot.
2479 enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2480 // Opcode already emitted
2481 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte
2482 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
2483 emit_d32(cbuf, $dst$$disp); // Displacement
2484 %}
2485
2486 // Push the integer in stackSlot 'src' onto FP-stack
2487 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2488 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2489 %}
2490
2491 // Push the float in stackSlot 'src' onto FP-stack
2492 enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src]
2493 store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2494 %}
2495
2496 // Push the double in stackSlot 'src' onto FP-stack
2497 enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src]
2498 store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2499 %}
2500
2501 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2502 enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2503 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2504 %}
2505
2506 // Same as Pop_Mem_F except for opcode
2507 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2508 enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2509 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2510 %}
2511
2512 enc_class Pop_Reg_F( regF dst ) %{
2513 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2514 emit_d8( cbuf, 0xD8+$dst$$reg );
2515 %}
2516
2517 enc_class Push_Reg_F( regF dst ) %{
2518 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2519 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2520 %}
2521
2522 // Push FPU's float to a stack-slot, and pop FPU-stack
2523 enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
2524 int pop = 0x02;
2525 if ($src$$reg != FPR1L_enc) {
2526 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2527 emit_d8( cbuf, 0xC0-1+$src$$reg );
2528 pop = 0x03;
2529 }
2530 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2531 %}
2532
2533 // Push FPU's double to a stack-slot, and pop FPU-stack
2534 enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
2535 int pop = 0x02;
2536 if ($src$$reg != FPR1L_enc) {
2537 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2538 emit_d8( cbuf, 0xC0-1+$src$$reg );
2539 pop = 0x03;
2540 }
2541 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2542 %}
2543
2544 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2545 enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
2546 int pop = 0xD0 - 1; // -1 since we skip FLD
2547 if ($src$$reg != FPR1L_enc) {
2548 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
2549 emit_d8( cbuf, 0xC0-1+$src$$reg );
2550 pop = 0xD8;
2551 }
2552 emit_opcode( cbuf, 0xDD );
2553 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i)
2554 %}
2555
2556
2557 enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
2558 MacroAssembler masm(&cbuf);
2559 masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg
2560 masm.fmul( $src2$$reg+0); // value at TOS
2561 masm.fadd( $src$$reg+0); // value at TOS
2562 masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store
2563 %}
2564
2565
2566 enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2567 // load dst in FPR0
2568 emit_opcode( cbuf, 0xD9 );
2569 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2570 if ($src$$reg != FPR1L_enc) {
2571 // fincstp
2572 emit_opcode (cbuf, 0xD9);
2573 emit_opcode (cbuf, 0xF7);
2574 // swap src with FPR1:
2575 // FXCH FPR1 with src
2576 emit_opcode(cbuf, 0xD9);
2577 emit_d8(cbuf, 0xC8-1+$src$$reg );
2578 // fdecstp
2579 emit_opcode (cbuf, 0xD9);
2580 emit_opcode (cbuf, 0xF6);
2581 }
2582 %}
2583
2584 enc_class Push_ModD_encoding(regXD src0, regXD src1) %{
2585 MacroAssembler _masm(&cbuf);
2586 __ subptr(rsp, 8);
2587 __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2588 __ fld_d(Address(rsp, 0));
2589 __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2590 __ fld_d(Address(rsp, 0));
2591 %}
2592
2593 enc_class Push_ModX_encoding(regX src0, regX src1) %{
2594 MacroAssembler _masm(&cbuf);
2595 __ subptr(rsp, 4);
2596 __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2597 __ fld_s(Address(rsp, 0));
2598 __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2599 __ fld_s(Address(rsp, 0));
2600 %}
2601
2602 enc_class Push_ResultXD(regXD dst) %{
2603 MacroAssembler _masm(&cbuf);
2604 __ fstp_d(Address(rsp, 0));
2605 __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2606 __ addptr(rsp, 8);
2607 %}
2608
2609 enc_class Push_ResultX(regX dst, immI d8) %{
2610 MacroAssembler _masm(&cbuf);
2611 __ fstp_s(Address(rsp, 0));
2612 __ movflt($dst$$XMMRegister, Address(rsp, 0));
2613 __ addptr(rsp, $d8$$constant);
2614 %}
2615
2616 enc_class Push_SrcXD(regXD src) %{
2617 MacroAssembler _masm(&cbuf);
2618 __ subptr(rsp, 8);
2619 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2620 __ fld_d(Address(rsp, 0));
2621 %}
2622
2623 enc_class push_stack_temp_qword() %{
2624 MacroAssembler _masm(&cbuf);
2625 __ subptr(rsp, 8);
2626 %}
2627
2628 enc_class pop_stack_temp_qword() %{
2629 MacroAssembler _masm(&cbuf);
2630 __ addptr(rsp, 8);
2631 %}
2632
2633 enc_class push_xmm_to_fpr1(regXD src) %{
2634 MacroAssembler _masm(&cbuf);
2635 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2636 __ fld_d(Address(rsp, 0));
2637 %}
2638
2639 // Compute X^Y using Intel's fast hardware instructions, if possible.
2640 // Otherwise return a NaN.
2641 enc_class pow_exp_core_encoding %{
2642 // FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X))
2643 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q
2644 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q
2645 emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q)
2646 emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q)
2647 emit_opcode(cbuf,0x1C);
2648 emit_d8(cbuf,0x24);
2649 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1
2650 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1
2651 emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q)
2652 emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q)
2653 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2658 emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2659 emit_d32(cbuf,1023);
2660 emit_opcode(cbuf,0x8B); // mov rbx,eax
2661 emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2662 emit_opcode(cbuf,0xC1); // shl rax,20 - Slide to exponent position
2663 emit_rm(cbuf,0x3,0x4,EAX_enc);
2664 emit_d8(cbuf,20);
2665 emit_opcode(cbuf,0x85); // test rbx,ecx - check for overflow
2666 emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2667 emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45); // CMOVne rax,ecx - overflow; stuff NAN into EAX
2668 emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2669 emit_opcode(cbuf,0x89); // mov [esp+4],eax - Store as part of double word
2670 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2671 emit_opcode(cbuf,0xC7); // mov [esp+0],0 - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2672 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2673 emit_d32(cbuf,0);
2674 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2675 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2676 %}
2677
2678 // enc_class Pop_Reg_Mod_D( regD dst, regD src)
2679 // was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2680
2681 enc_class Push_Result_Mod_D( regD src) %{
2682 if ($src$$reg != FPR1L_enc) {
2683 // fincstp
2684 emit_opcode (cbuf, 0xD9);
2685 emit_opcode (cbuf, 0xF7);
2686 // FXCH FPR1 with src
2687 emit_opcode(cbuf, 0xD9);
2688 emit_d8(cbuf, 0xC8-1+$src$$reg );
2689 // fdecstp
2690 emit_opcode (cbuf, 0xD9);
2691 emit_opcode (cbuf, 0xF6);
2692 }
2693 // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2694 // // FSTP FPR$dst$$reg
2695 // emit_opcode( cbuf, 0xDD );
2696 // emit_d8( cbuf, 0xD8+$dst$$reg );
2697 %}
2698
2699 enc_class fnstsw_sahf_skip_parity() %{
2700 // fnstsw ax
2701 emit_opcode( cbuf, 0xDF );
2702 emit_opcode( cbuf, 0xE0 );
2703 // sahf
2704 emit_opcode( cbuf, 0x9E );
2705 // jnp ::skip
2706 emit_opcode( cbuf, 0x7B );
2707 emit_opcode( cbuf, 0x05 );
2708 %}
2709
2710 enc_class emitModD() %{
2711 // fprem must be iterative
2712 // :: loop
2713 // fprem
2714 emit_opcode( cbuf, 0xD9 );
2715 emit_opcode( cbuf, 0xF8 );
2716 // wait
2717 emit_opcode( cbuf, 0x9b );
2718 // fnstsw ax
2719 emit_opcode( cbuf, 0xDF );
2720 emit_opcode( cbuf, 0xE0 );
2721 // sahf
2722 emit_opcode( cbuf, 0x9E );
2723 // jp ::loop
2724 emit_opcode( cbuf, 0x0F );
2725 emit_opcode( cbuf, 0x8A );
2726 emit_opcode( cbuf, 0xF4 );
2727 emit_opcode( cbuf, 0xFF );
2728 emit_opcode( cbuf, 0xFF );
2729 emit_opcode( cbuf, 0xFF );
2730 %}
3570 %}
3571
3572
3573 enc_class enc_pop_rdx() %{
3574 emit_opcode(cbuf,0x5A);
3575 %}
3576
3577 enc_class enc_rethrow() %{
3578 cbuf.set_insts_mark();
3579 emit_opcode(cbuf, 0xE9); // jmp entry
3580 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
3581 runtime_call_Relocation::spec(), RELOC_IMM32 );
3582 %}
3583
3584
3585 // Convert a double to an int. Java semantics require we do complex
3586 // manglelations in the corner cases. So we set the rounding mode to
3587 // 'zero', store the darned double down as an int, and reset the
3588 // rounding mode to 'nearest'. The hardware throws an exception which
3589 // patches up the correct value directly to the stack.
3590 enc_class D2I_encoding( regD src ) %{
3591 // Flip to round-to-zero mode. We attempted to allow invalid-op
3592 // exceptions here, so that a NAN or other corner-case value will
3593 // thrown an exception (but normal values get converted at full speed).
3594 // However, I2C adapters and other float-stack manglers leave pending
3595 // invalid-op exceptions hanging. We would have to clear them before
3596 // enabling them and that is more expensive than just testing for the
3597 // invalid value Intel stores down in the corner cases.
3598 emit_opcode(cbuf,0xD9); // FLDCW trunc
3599 emit_opcode(cbuf,0x2D);
3600 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3601 // Allocate a word
3602 emit_opcode(cbuf,0x83); // SUB ESP,4
3603 emit_opcode(cbuf,0xEC);
3604 emit_d8(cbuf,0x04);
3605 // Encoding assumes a double has been pushed into FPR0.
3606 // Store down the double as an int, popping the FPU stack
3607 emit_opcode(cbuf,0xDB); // FISTP [ESP]
3608 emit_opcode(cbuf,0x1C);
3609 emit_d8(cbuf,0x24);
3610 // Restore the rounding mode; mask the exception
3613 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3614 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3615 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3616
3617 // Load the converted int; adjust CPU stack
3618 emit_opcode(cbuf,0x58); // POP EAX
3619 emit_opcode(cbuf,0x3D); // CMP EAX,imm
3620 emit_d32 (cbuf,0x80000000); // 0x80000000
3621 emit_opcode(cbuf,0x75); // JNE around_slow_call
3622 emit_d8 (cbuf,0x07); // Size of slow_call
3623 // Push src onto stack slow-path
3624 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3625 emit_d8 (cbuf,0xC0-1+$src$$reg );
3626 // CALL directly to the runtime
3627 cbuf.set_insts_mark();
3628 emit_opcode(cbuf,0xE8); // Call into runtime
3629 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3630 // Carry on here...
3631 %}
3632
3633 enc_class D2L_encoding( regD src ) %{
3634 emit_opcode(cbuf,0xD9); // FLDCW trunc
3635 emit_opcode(cbuf,0x2D);
3636 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3637 // Allocate a word
3638 emit_opcode(cbuf,0x83); // SUB ESP,8
3639 emit_opcode(cbuf,0xEC);
3640 emit_d8(cbuf,0x08);
3641 // Encoding assumes a double has been pushed into FPR0.
3642 // Store down the double as a long, popping the FPU stack
3643 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3644 emit_opcode(cbuf,0x3C);
3645 emit_d8(cbuf,0x24);
3646 // Restore the rounding mode; mask the exception
3647 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3648 emit_opcode(cbuf,0x2D);
3649 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3650 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3651 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3652
3653 // Load the converted int; adjust CPU stack
3655 emit_opcode(cbuf,0x5A); // POP EDX
3656 emit_opcode(cbuf,0x81); // CMP EDX,imm
3657 emit_d8 (cbuf,0xFA); // rdx
3658 emit_d32 (cbuf,0x80000000); // 0x80000000
3659 emit_opcode(cbuf,0x75); // JNE around_slow_call
3660 emit_d8 (cbuf,0x07+4); // Size of slow_call
3661 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3662 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3663 emit_opcode(cbuf,0x75); // JNE around_slow_call
3664 emit_d8 (cbuf,0x07); // Size of slow_call
3665 // Push src onto stack slow-path
3666 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3667 emit_d8 (cbuf,0xC0-1+$src$$reg );
3668 // CALL directly to the runtime
3669 cbuf.set_insts_mark();
3670 emit_opcode(cbuf,0xE8); // Call into runtime
3671 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3672 // Carry on here...
3673 %}
3674
3675 enc_class FMul_ST_reg( eRegF src1 ) %{
3676 // Operand was loaded from memory into fp ST (stack top)
3677 // FMUL ST,$src /* D8 C8+i */
3678 emit_opcode(cbuf, 0xD8);
3679 emit_opcode(cbuf, 0xC8 + $src1$$reg);
3680 %}
3681
3682 enc_class FAdd_ST_reg( eRegF src2 ) %{
3683 // FADDP ST,src2 /* D8 C0+i */
3684 emit_opcode(cbuf, 0xD8);
3685 emit_opcode(cbuf, 0xC0 + $src2$$reg);
3686 //could use FADDP src2,fpST /* DE C0+i */
3687 %}
3688
3689 enc_class FAddP_reg_ST( eRegF src2 ) %{
3690 // FADDP src2,ST /* DE C0+i */
3691 emit_opcode(cbuf, 0xDE);
3692 emit_opcode(cbuf, 0xC0 + $src2$$reg);
3693 %}
3694
3695 enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
3696 // Operand has been loaded into fp ST (stack top)
3697 // FSUB ST,$src1
3698 emit_opcode(cbuf, 0xD8);
3699 emit_opcode(cbuf, 0xE0 + $src1$$reg);
3700
3701 // FDIV
3702 emit_opcode(cbuf, 0xD8);
3703 emit_opcode(cbuf, 0xF0 + $src2$$reg);
3704 %}
3705
3706 enc_class MulFAddF (eRegF src1, eRegF src2) %{
3707 // Operand was loaded from memory into fp ST (stack top)
3708 // FADD ST,$src /* D8 C0+i */
3709 emit_opcode(cbuf, 0xD8);
3710 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3711
3712 // FMUL ST,src2 /* D8 C*+i */
3713 emit_opcode(cbuf, 0xD8);
3714 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3715 %}
3716
3717
3718 enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
3719 // Operand was loaded from memory into fp ST (stack top)
3720 // FADD ST,$src /* D8 C0+i */
3721 emit_opcode(cbuf, 0xD8);
3722 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3723
3724 // FMULP src2,ST /* DE C8+i */
3725 emit_opcode(cbuf, 0xDE);
3726 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3727 %}
3728
3729 // Atomically load the volatile long
3730 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3731 emit_opcode(cbuf,0xDF);
3732 int rm_byte_opcode = 0x05;
3733 int base = $mem$$base;
3734 int index = $mem$$index;
3735 int scale = $mem$$scale;
3736 int displace = $mem$$disp;
3737 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
3738 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4131 operand immL_32bits() %{
4132 predicate(n->get_long() == 0xFFFFFFFFL);
4133 match(ConL);
4134 op_cost(0);
4135
4136 format %{ %}
4137 interface(CONST_INTER);
4138 %}
4139
4140 // Long Immediate: low 32-bit mask
4141 operand immL32() %{
4142 predicate(n->get_long() == (int)(n->get_long()));
4143 match(ConL);
4144 op_cost(20);
4145
4146 format %{ %}
4147 interface(CONST_INTER);
4148 %}
4149
4150 //Double Immediate zero
4151 operand immD0() %{
4152 // Do additional (and counter-intuitive) test against NaN to work around VC++
4153 // bug that generates code such that NaNs compare equal to 0.0
4154 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4155 match(ConD);
4156
4157 op_cost(5);
4158 format %{ %}
4159 interface(CONST_INTER);
4160 %}
4161
4162 // Double Immediate one
4163 operand immD1() %{
4164 predicate( UseSSE<=1 && n->getd() == 1.0 );
4165 match(ConD);
4166
4167 op_cost(5);
4168 format %{ %}
4169 interface(CONST_INTER);
4170 %}
4171
4172 // Double Immediate
4173 operand immD() %{
4174 predicate(UseSSE<=1);
4175 match(ConD);
4176
4177 op_cost(5);
4178 format %{ %}
4179 interface(CONST_INTER);
4180 %}
4181
4182 operand immXD() %{
4183 predicate(UseSSE>=2);
4184 match(ConD);
4185
4186 op_cost(5);
4187 format %{ %}
4188 interface(CONST_INTER);
4189 %}
4190
4191 // Double Immediate zero
4192 operand immXD0() %{
4193 // Do additional (and counter-intuitive) test against NaN to work around VC++
4194 // bug that generates code such that NaNs compare equal to 0.0 AND do not
4195 // compare equal to -0.0.
4196 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4197 match(ConD);
4198
4199 format %{ %}
4200 interface(CONST_INTER);
4201 %}
4202
4203 // Float Immediate zero
4204 operand immF0() %{
4205 predicate(UseSSE == 0 && n->getf() == 0.0F);
4206 match(ConF);
4207
4208 op_cost(5);
4209 format %{ %}
4210 interface(CONST_INTER);
4211 %}
4212
4213 // Float Immediate one
4214 operand immF1() %{
4215 predicate(UseSSE == 0 && n->getf() == 1.0F);
4216 match(ConF);
4217
4218 op_cost(5);
4219 format %{ %}
4220 interface(CONST_INTER);
4221 %}
4222
4223 // Float Immediate
4224 operand immF() %{
4225 predicate( UseSSE == 0 );
4226 match(ConF);
4227
4228 op_cost(5);
4229 format %{ %}
4230 interface(CONST_INTER);
4231 %}
4232
4233 // Float Immediate
4234 operand immXF() %{
4235 predicate(UseSSE >= 1);
4236 match(ConF);
4237
4238 op_cost(5);
4239 format %{ %}
4240 interface(CONST_INTER);
4241 %}
4242
4243 // Float Immediate zero. Zero and not -0.0
4244 operand immXF0() %{
4245 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4246 match(ConF);
4247
4248 op_cost(5);
4249 format %{ %}
4250 interface(CONST_INTER);
4251 %}
4252
4253 // Immediates for special shifts (sign extend)
4254
4255 // Constants for increment
4256 operand immI_16() %{
4257 predicate( n->get_int() == 16 );
4258 match(ConI);
4259
4260 format %{ %}
4261 interface(CONST_INTER);
4262 %}
4263
4264 operand immI_24() %{
4600 operand flagsReg_long_LTGE() %{
4601 constraint(ALLOC_IN_RC(int_flags));
4602 match(RegFlags);
4603 format %{ "FLAGS_LTGE" %}
4604 interface(REG_INTER);
4605 %}
4606 operand flagsReg_long_EQNE() %{
4607 constraint(ALLOC_IN_RC(int_flags));
4608 match(RegFlags);
4609 format %{ "FLAGS_EQNE" %}
4610 interface(REG_INTER);
4611 %}
4612 operand flagsReg_long_LEGT() %{
4613 constraint(ALLOC_IN_RC(int_flags));
4614 match(RegFlags);
4615 format %{ "FLAGS_LEGT" %}
4616 interface(REG_INTER);
4617 %}
4618
4619 // Float register operands
4620 operand regD() %{
4621 predicate( UseSSE < 2 );
4622 constraint(ALLOC_IN_RC(dbl_reg));
4623 match(RegD);
4624 match(regDPR1);
4625 match(regDPR2);
4626 format %{ %}
4627 interface(REG_INTER);
4628 %}
4629
4630 operand regDPR1(regD reg) %{
4631 predicate( UseSSE < 2 );
4632 constraint(ALLOC_IN_RC(dbl_reg0));
4633 match(reg);
4634 format %{ "FPR1" %}
4635 interface(REG_INTER);
4636 %}
4637
4638 operand regDPR2(regD reg) %{
4639 predicate( UseSSE < 2 );
4640 constraint(ALLOC_IN_RC(dbl_reg1));
4641 match(reg);
4642 format %{ "FPR2" %}
4643 interface(REG_INTER);
4644 %}
4645
4646 operand regnotDPR1(regD reg) %{
4647 predicate( UseSSE < 2 );
4648 constraint(ALLOC_IN_RC(dbl_notreg0));
4649 match(reg);
4650 format %{ %}
4651 interface(REG_INTER);
4652 %}
4653
4654 // XMM Double register operands
4655 operand regXD() %{
4656 predicate( UseSSE>=2 );
4657 constraint(ALLOC_IN_RC(xdb_reg));
4658 match(RegD);
4659 match(regXD6);
4660 match(regXD7);
4661 format %{ %}
4662 interface(REG_INTER);
4663 %}
4664
4665 // XMM6 double register operands
4666 operand regXD6(regXD reg) %{
4667 predicate( UseSSE>=2 );
4668 constraint(ALLOC_IN_RC(xdb_reg6));
4669 match(reg);
4670 format %{ "XMM6" %}
4671 interface(REG_INTER);
4672 %}
4673
4674 // XMM7 double register operands
4675 operand regXD7(regXD reg) %{
4676 predicate( UseSSE>=2 );
4677 constraint(ALLOC_IN_RC(xdb_reg7));
4678 match(reg);
4679 format %{ "XMM7" %}
4680 interface(REG_INTER);
4681 %}
4682
4683 // Float register operands
4684 operand regF() %{
4685 predicate( UseSSE < 2 );
4686 constraint(ALLOC_IN_RC(flt_reg));
4687 match(RegF);
4688 match(regFPR1);
4689 format %{ %}
4690 interface(REG_INTER);
4691 %}
4692
4693 // Float register operands
4694 operand regFPR1(regF reg) %{
4695 predicate( UseSSE < 2 );
4696 constraint(ALLOC_IN_RC(flt_reg0));
4697 match(reg);
4698 format %{ "FPR1" %}
4699 interface(REG_INTER);
4700 %}
4701
4702 // XMM register operands
4703 operand regX() %{
4704 predicate( UseSSE>=1 );
4705 constraint(ALLOC_IN_RC(xmm_reg));
4706 match(RegF);
4707 format %{ %}
4708 interface(REG_INTER);
4709 %}
4710
4711
4712 //----------Memory Operands----------------------------------------------------
4713 // Direct Memory Operand
4714 operand direct(immP addr) %{
4715 match(addr);
4716
4717 format %{ "[$addr]" %}
4718 interface(MEMORY_INTER) %{
4719 base(0xFFFFFFFF);
4720 index(0x4);
4721 scale(0x0);
4722 disp($addr);
4723 %}
5427 // Conditional move reg-mem
5428 pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
5429 single_instruction;
5430 dst : S4(write);
5431 src : S3(read);
5432 cr : S3(read);
5433 DECODE : S0; // any decoder
5434 MEM : S3;
5435 %}
5436
5437 // Conditional move reg-reg long
5438 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
5439 single_instruction;
5440 dst : S4(write);
5441 src : S3(read);
5442 cr : S3(read);
5443 DECODE : S0(2); // any 2 decoders
5444 %}
5445
5446 // Conditional move double reg-reg
5447 pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
5448 single_instruction;
5449 dst : S4(write);
5450 src : S3(read);
5451 cr : S3(read);
5452 DECODE : S0; // any decoder
5453 %}
5454
5455 // Float reg-reg operation
5456 pipe_class fpu_reg(regD dst) %{
5457 instruction_count(2);
5458 dst : S3(read);
5459 DECODE : S0(2); // any 2 decoders
5460 FPU : S3;
5461 %}
5462
5463 // Float reg-reg operation
5464 pipe_class fpu_reg_reg(regD dst, regD src) %{
5465 instruction_count(2);
5466 dst : S4(write);
5467 src : S3(read);
5468 DECODE : S0(2); // any 2 decoders
5469 FPU : S3;
5470 %}
5471
5472 // Float reg-reg operation
5473 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
5474 instruction_count(3);
5475 dst : S4(write);
5476 src1 : S3(read);
5477 src2 : S3(read);
5478 DECODE : S0(3); // any 3 decoders
5479 FPU : S3(2);
5480 %}
5481
5482 // Float reg-reg operation
5483 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
5484 instruction_count(4);
5485 dst : S4(write);
5486 src1 : S3(read);
5487 src2 : S3(read);
5488 src3 : S3(read);
5489 DECODE : S0(4); // any 3 decoders
5490 FPU : S3(2);
5491 %}
5492
5493 // Float reg-reg operation
5494 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
5495 instruction_count(4);
5496 dst : S4(write);
5497 src1 : S3(read);
5498 src2 : S3(read);
5499 src3 : S3(read);
5500 DECODE : S1(3); // any 3 decoders
5501 D0 : S0; // Big decoder only
5502 FPU : S3(2);
5503 MEM : S3;
5504 %}
5505
5506 // Float reg-mem operation
5507 pipe_class fpu_reg_mem(regD dst, memory mem) %{
5508 instruction_count(2);
5509 dst : S5(write);
5510 mem : S3(read);
5511 D0 : S0; // big decoder only
5512 DECODE : S1; // any decoder for FPU POP
5513 FPU : S4;
5514 MEM : S3; // any mem
5515 %}
5516
5517 // Float reg-mem operation
5518 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
5519 instruction_count(3);
5520 dst : S5(write);
5521 src1 : S3(read);
5522 mem : S3(read);
5523 D0 : S0; // big decoder only
5524 DECODE : S1(2); // any decoder for FPU POP
5525 FPU : S4;
5526 MEM : S3; // any mem
5527 %}
5528
5529 // Float mem-reg operation
5530 pipe_class fpu_mem_reg(memory mem, regD src) %{
5531 instruction_count(2);
5532 src : S5(read);
5533 mem : S3(read);
5534 DECODE : S0; // any decoder for FPU PUSH
5535 D0 : S1; // big decoder only
5536 FPU : S4;
5537 MEM : S3; // any mem
5538 %}
5539
5540 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
5541 instruction_count(3);
5542 src1 : S3(read);
5543 src2 : S3(read);
5544 mem : S3(read);
5545 DECODE : S0(2); // any decoder for FPU PUSH
5546 D0 : S1; // big decoder only
5547 FPU : S4;
5548 MEM : S3; // any mem
5549 %}
5550
5551 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
5552 instruction_count(3);
5553 src1 : S3(read);
5554 src2 : S3(read);
5555 mem : S4(read);
5556 DECODE : S0; // any decoder for FPU PUSH
5557 D0 : S0(2); // big decoder only
5558 FPU : S4;
5559 MEM : S3(2); // any mem
5560 %}
5561
5562 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5563 instruction_count(2);
5564 src1 : S3(read);
5565 dst : S4(read);
5566 D0 : S0(2); // big decoder only
5567 MEM : S3(2); // any mem
5568 %}
5569
5570 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5571 instruction_count(3);
5572 src1 : S3(read);
5573 src2 : S3(read);
5574 dst : S4(read);
5575 D0 : S0(3); // big decoder only
5576 FPU : S4;
5577 MEM : S3(3); // any mem
5578 %}
5579
5580 pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
5581 instruction_count(3);
5582 src1 : S4(read);
5583 mem : S4(read);
5584 DECODE : S0; // any decoder for FPU PUSH
5585 D0 : S0(2); // big decoder only
5586 FPU : S4;
5587 MEM : S3(2); // any mem
5588 %}
5589
5590 // Float load constant
5591 pipe_class fpu_reg_con(regD dst) %{
5592 instruction_count(2);
5593 dst : S5(write);
5594 D0 : S0; // big decoder only for the load
5595 DECODE : S1; // any decoder for FPU POP
5596 FPU : S4;
5597 MEM : S3; // any mem
5598 %}
5599
5600 // Float load constant
5601 pipe_class fpu_reg_reg_con(regD dst, regD src) %{
5602 instruction_count(3);
5603 dst : S5(write);
5604 src : S3(read);
5605 D0 : S0; // big decoder only for the load
5606 DECODE : S1(2); // any decoder for FPU POP
5607 FPU : S4;
5608 MEM : S3; // any mem
5609 %}
5610
5611 // UnConditional branch
5612 pipe_class pipe_jmp( label labl ) %{
5613 single_instruction;
5614 BR : S3;
5615 %}
5616
5617 // Conditional branch
5618 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5619 single_instruction;
5620 cr : S1(read);
5621 BR : S3;
6296 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
6297 %}
6298
6299 ins_pipe(ialu_reg_long_mem);
6300 %}
6301
6302 // Volatile Load Long. Must be atomic, so do 64-bit FILD
6303 // then store it down to the stack and reload on the int
6304 // side.
6305 instruct loadL_volatile(stackSlotL dst, memory mem) %{
6306 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6307 match(Set dst (LoadL mem));
6308
6309 ins_cost(200);
6310 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
6311 "FISTp $dst" %}
6312 ins_encode(enc_loadL_volatile(mem,dst));
6313 ins_pipe( fpu_reg_mem );
6314 %}
6315
6316 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6317 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6318 match(Set dst (LoadL mem));
6319 effect(TEMP tmp);
6320 ins_cost(180);
6321 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6322 "MOVSD $dst,$tmp" %}
6323 ins_encode %{
6324 __ movdbl($tmp$$XMMRegister, $mem$$Address);
6325 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
6326 %}
6327 ins_pipe( pipe_slow );
6328 %}
6329
6330 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6331 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6332 match(Set dst (LoadL mem));
6333 effect(TEMP tmp);
6334 ins_cost(160);
6335 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6336 "MOVD $dst.lo,$tmp\n\t"
6337 "PSRLQ $tmp,32\n\t"
6338 "MOVD $dst.hi,$tmp" %}
6339 ins_encode %{
6340 __ movdbl($tmp$$XMMRegister, $mem$$Address);
6341 __ movdl($dst$$Register, $tmp$$XMMRegister);
6342 __ psrlq($tmp$$XMMRegister, 32);
6343 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
6344 %}
6345 ins_pipe( pipe_slow );
6346 %}
6347
6348 // Load Range
6349 instruct loadRange(eRegI dst, memory mem) %{
6350 match(Set dst (LoadRange mem));
6363
6364 ins_cost(125);
6365 format %{ "MOV $dst,$mem" %}
6366 opcode(0x8B);
6367 ins_encode( OpcP, RegMem(dst,mem));
6368 ins_pipe( ialu_reg_mem );
6369 %}
6370
6371 // Load Klass Pointer
6372 instruct loadKlass(eRegP dst, memory mem) %{
6373 match(Set dst (LoadKlass mem));
6374
6375 ins_cost(125);
6376 format %{ "MOV $dst,$mem" %}
6377 opcode(0x8B);
6378 ins_encode( OpcP, RegMem(dst,mem));
6379 ins_pipe( ialu_reg_mem );
6380 %}
6381
6382 // Load Double
6383 instruct loadD(regD dst, memory mem) %{
6384 predicate(UseSSE<=1);
6385 match(Set dst (LoadD mem));
6386
6387 ins_cost(150);
6388 format %{ "FLD_D ST,$mem\n\t"
6389 "FSTP $dst" %}
6390 opcode(0xDD); /* DD /0 */
6391 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6392 Pop_Reg_D(dst) );
6393 ins_pipe( fpu_reg_mem );
6394 %}
6395
6396 // Load Double to XMM
6397 instruct loadXD(regXD dst, memory mem) %{
6398 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6399 match(Set dst (LoadD mem));
6400 ins_cost(145);
6401 format %{ "MOVSD $dst,$mem" %}
6402 ins_encode %{
6403 __ movdbl ($dst$$XMMRegister, $mem$$Address);
6404 %}
6405 ins_pipe( pipe_slow );
6406 %}
6407
6408 instruct loadXD_partial(regXD dst, memory mem) %{
6409 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6410 match(Set dst (LoadD mem));
6411 ins_cost(145);
6412 format %{ "MOVLPD $dst,$mem" %}
6413 ins_encode %{
6414 __ movdbl ($dst$$XMMRegister, $mem$$Address);
6415 %}
6416 ins_pipe( pipe_slow );
6417 %}
6418
6419 // Load to XMM register (single-precision floating point)
6420 // MOVSS instruction
6421 instruct loadX(regX dst, memory mem) %{
6422 predicate(UseSSE>=1);
6423 match(Set dst (LoadF mem));
6424 ins_cost(145);
6425 format %{ "MOVSS $dst,$mem" %}
6426 ins_encode %{
6427 __ movflt ($dst$$XMMRegister, $mem$$Address);
6428 %}
6429 ins_pipe( pipe_slow );
6430 %}
6431
6432 // Load Float
6433 instruct loadF(regF dst, memory mem) %{
6434 predicate(UseSSE==0);
6435 match(Set dst (LoadF mem));
6436
6437 ins_cost(150);
6438 format %{ "FLD_S ST,$mem\n\t"
6439 "FSTP $dst" %}
6440 opcode(0xD9); /* D9 /0 */
6441 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6442 Pop_Reg_F(dst) );
6443 ins_pipe( fpu_reg_mem );
6444 %}
6445
6446 // Load Aligned Packed Byte to XMM register
6447 instruct loadA8B(regXD dst, memory mem) %{
6448 predicate(UseSSE>=1);
6449 match(Set dst (Load8B mem));
6450 ins_cost(125);
6451 format %{ "MOVQ $dst,$mem\t! packed8B" %}
6452 ins_encode %{
6453 __ movq($dst$$XMMRegister, $mem$$Address);
6454 %}
6455 ins_pipe( pipe_slow );
6456 %}
6457
6458 // Load Aligned Packed Short to XMM register
6459 instruct loadA4S(regXD dst, memory mem) %{
6460 predicate(UseSSE>=1);
6461 match(Set dst (Load4S mem));
6462 ins_cost(125);
6463 format %{ "MOVQ $dst,$mem\t! packed4S" %}
6464 ins_encode %{
6465 __ movq($dst$$XMMRegister, $mem$$Address);
6466 %}
6467 ins_pipe( pipe_slow );
6468 %}
6469
6470 // Load Aligned Packed Char to XMM register
6471 instruct loadA4C(regXD dst, memory mem) %{
6472 predicate(UseSSE>=1);
6473 match(Set dst (Load4C mem));
6474 ins_cost(125);
6475 format %{ "MOVQ $dst,$mem\t! packed4C" %}
6476 ins_encode %{
6477 __ movq($dst$$XMMRegister, $mem$$Address);
6478 %}
6479 ins_pipe( pipe_slow );
6480 %}
6481
6482 // Load Aligned Packed Integer to XMM register
6483 instruct load2IU(regXD dst, memory mem) %{
6484 predicate(UseSSE>=1);
6485 match(Set dst (Load2I mem));
6486 ins_cost(125);
6487 format %{ "MOVQ $dst,$mem\t! packed2I" %}
6488 ins_encode %{
6489 __ movq($dst$$XMMRegister, $mem$$Address);
6490 %}
6491 ins_pipe( pipe_slow );
6492 %}
6493
6494 // Load Aligned Packed Single to XMM
6495 instruct loadA2F(regXD dst, memory mem) %{
6496 predicate(UseSSE>=1);
6497 match(Set dst (Load2F mem));
6498 ins_cost(145);
6499 format %{ "MOVQ $dst,$mem\t! packed2F" %}
6500 ins_encode %{
6501 __ movq($dst$$XMMRegister, $mem$$Address);
6502 %}
6503 ins_pipe( pipe_slow );
6504 %}
6505
6506 // Load Effective Address
6507 instruct leaP8(eRegP dst, indOffset8 mem) %{
6508 match(Set dst mem);
6509
6510 ins_cost(110);
6511 format %{ "LEA $dst,$mem" %}
6512 opcode(0x8D);
6513 ins_encode( OpcP, RegMem(dst,mem));
6514 ins_pipe( ialu_reg_reg_fat );
6515 %}
6589 effect(KILL cr);
6590 ins_cost(200);
6591 format %{ "MOV $dst.lo,$src.lo\n\t"
6592 "MOV $dst.hi,$src.hi" %}
6593 opcode(0xB8);
6594 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6595 ins_pipe( ialu_reg_long_fat );
6596 %}
6597
6598 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6599 match(Set dst src);
6600 effect(KILL cr);
6601 ins_cost(150);
6602 format %{ "XOR $dst.lo,$dst.lo\n\t"
6603 "XOR $dst.hi,$dst.hi" %}
6604 opcode(0x33,0x33);
6605 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6606 ins_pipe( ialu_reg_long );
6607 %}
6608
6609 // The instruction usage is guarded by predicate in operand immF().
6610 instruct loadConF(regF dst, immF con) %{
6611 match(Set dst con);
6612 ins_cost(125);
6613 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6614 "FSTP $dst" %}
6615 ins_encode %{
6616 __ fld_s($constantaddress($con));
6617 __ fstp_d($dst$$reg);
6618 %}
6619 ins_pipe(fpu_reg_con);
6620 %}
6621
6622 // The instruction usage is guarded by predicate in operand immF0().
6623 instruct loadConF0(regF dst, immF0 con) %{
6624 match(Set dst con);
6625 ins_cost(125);
6626 format %{ "FLDZ ST\n\t"
6627 "FSTP $dst" %}
6628 ins_encode %{
6629 __ fldz();
6630 __ fstp_d($dst$$reg);
6631 %}
6632 ins_pipe(fpu_reg_con);
6633 %}
6634
6635 // The instruction usage is guarded by predicate in operand immF1().
6636 instruct loadConF1(regF dst, immF1 con) %{
6637 match(Set dst con);
6638 ins_cost(125);
6639 format %{ "FLD1 ST\n\t"
6640 "FSTP $dst" %}
6641 ins_encode %{
6642 __ fld1();
6643 __ fstp_d($dst$$reg);
6644 %}
6645 ins_pipe(fpu_reg_con);
6646 %}
6647
6648 // The instruction usage is guarded by predicate in operand immXF().
6649 instruct loadConX(regX dst, immXF con) %{
6650 match(Set dst con);
6651 ins_cost(125);
6652 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6653 ins_encode %{
6654 __ movflt($dst$$XMMRegister, $constantaddress($con));
6655 %}
6656 ins_pipe(pipe_slow);
6657 %}
6658
6659 // The instruction usage is guarded by predicate in operand immXF0().
6660 instruct loadConX0(regX dst, immXF0 src) %{
6661 match(Set dst src);
6662 ins_cost(100);
6663 format %{ "XORPS $dst,$dst\t# float 0.0" %}
6664 ins_encode %{
6665 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6666 %}
6667 ins_pipe(pipe_slow);
6668 %}
6669
6670 // The instruction usage is guarded by predicate in operand immD().
6671 instruct loadConD(regD dst, immD con) %{
6672 match(Set dst con);
6673 ins_cost(125);
6674
6675 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6676 "FSTP $dst" %}
6677 ins_encode %{
6678 __ fld_d($constantaddress($con));
6679 __ fstp_d($dst$$reg);
6680 %}
6681 ins_pipe(fpu_reg_con);
6682 %}
6683
6684 // The instruction usage is guarded by predicate in operand immD0().
6685 instruct loadConD0(regD dst, immD0 con) %{
6686 match(Set dst con);
6687 ins_cost(125);
6688
6689 format %{ "FLDZ ST\n\t"
6690 "FSTP $dst" %}
6691 ins_encode %{
6692 __ fldz();
6693 __ fstp_d($dst$$reg);
6694 %}
6695 ins_pipe(fpu_reg_con);
6696 %}
6697
6698 // The instruction usage is guarded by predicate in operand immD1().
6699 instruct loadConD1(regD dst, immD1 con) %{
6700 match(Set dst con);
6701 ins_cost(125);
6702
6703 format %{ "FLD1 ST\n\t"
6704 "FSTP $dst" %}
6705 ins_encode %{
6706 __ fld1();
6707 __ fstp_d($dst$$reg);
6708 %}
6709 ins_pipe(fpu_reg_con);
6710 %}
6711
6712 // The instruction usage is guarded by predicate in operand immXD().
6713 instruct loadConXD(regXD dst, immXD con) %{
6714 match(Set dst con);
6715 ins_cost(125);
6716 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6717 ins_encode %{
6718 __ movdbl($dst$$XMMRegister, $constantaddress($con));
6719 %}
6720 ins_pipe(pipe_slow);
6721 %}
6722
6723 // The instruction usage is guarded by predicate in operand immXD0().
6724 instruct loadConXD0(regXD dst, immXD0 src) %{
6725 match(Set dst src);
6726 ins_cost(100);
6727 format %{ "XORPD $dst,$dst\t# double 0.0" %}
6728 ins_encode %{
6729 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6730 %}
6731 ins_pipe( pipe_slow );
6732 %}
6733
6734 // Load Stack Slot
6735 instruct loadSSI(eRegI dst, stackSlotI src) %{
6736 match(Set dst src);
6737 ins_cost(125);
6738
6739 format %{ "MOV $dst,$src" %}
6740 opcode(0x8B);
6741 ins_encode( OpcP, RegMem(dst,src));
6742 ins_pipe( ialu_reg_mem );
6743 %}
6744
6748 ins_cost(200);
6749 format %{ "MOV $dst,$src.lo\n\t"
6750 "MOV $dst+4,$src.hi" %}
6751 opcode(0x8B, 0x8B);
6752 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6753 ins_pipe( ialu_mem_long_reg );
6754 %}
6755
6756 // Load Stack Slot
6757 instruct loadSSP(eRegP dst, stackSlotP src) %{
6758 match(Set dst src);
6759 ins_cost(125);
6760
6761 format %{ "MOV $dst,$src" %}
6762 opcode(0x8B);
6763 ins_encode( OpcP, RegMem(dst,src));
6764 ins_pipe( ialu_reg_mem );
6765 %}
6766
6767 // Load Stack Slot
6768 instruct loadSSF(regF dst, stackSlotF src) %{
6769 match(Set dst src);
6770 ins_cost(125);
6771
6772 format %{ "FLD_S $src\n\t"
6773 "FSTP $dst" %}
6774 opcode(0xD9); /* D9 /0, FLD m32real */
6775 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6776 Pop_Reg_F(dst) );
6777 ins_pipe( fpu_reg_mem );
6778 %}
6779
6780 // Load Stack Slot
6781 instruct loadSSD(regD dst, stackSlotD src) %{
6782 match(Set dst src);
6783 ins_cost(125);
6784
6785 format %{ "FLD_D $src\n\t"
6786 "FSTP $dst" %}
6787 opcode(0xDD); /* DD /0, FLD m64real */
6788 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6789 Pop_Reg_D(dst) );
6790 ins_pipe( fpu_reg_mem );
6791 %}
6792
6793 // Prefetch instructions.
6794 // Must be safe to execute with invalid address (cannot fault).
6795
6796 instruct prefetchr0( memory mem ) %{
6797 predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
6798 match(PrefetchRead mem);
6799 ins_cost(0);
6800 size(0);
6801 format %{ "PREFETCHR (non-SSE is empty encoding)" %}
6802 ins_encode();
6803 ins_pipe(empty);
6804 %}
6805
6806 instruct prefetchr( memory mem ) %{
6807 predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || ReadPrefetchInstr==3);
6808 match(PrefetchRead mem);
6809 ins_cost(100);
7004 ins_pipe(ialu_mem_reg);
7005 %}
7006
7007 // Volatile Store Long. Must be atomic, so move it into
7008 // the FP TOS and then do a 64-bit FIST. Has to probe the
7009 // target address before the store (for null-ptr checks)
7010 // so the memory operand is used twice in the encoding.
7011 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
7012 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7013 match(Set mem (StoreL mem src));
7014 effect( KILL cr );
7015 ins_cost(400);
7016 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7017 "FILD $src\n\t"
7018 "FISTp $mem\t # 64-bit atomic volatile long store" %}
7019 opcode(0x3B);
7020 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7021 ins_pipe( fpu_reg_mem );
7022 %}
7023
7024 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7025 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7026 match(Set mem (StoreL mem src));
7027 effect( TEMP tmp, KILL cr );
7028 ins_cost(380);
7029 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7030 "MOVSD $tmp,$src\n\t"
7031 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7032 ins_encode %{
7033 __ cmpl(rax, $mem$$Address);
7034 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
7035 __ movdbl($mem$$Address, $tmp$$XMMRegister);
7036 %}
7037 ins_pipe( pipe_slow );
7038 %}
7039
7040 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7041 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7042 match(Set mem (StoreL mem src));
7043 effect( TEMP tmp2 , TEMP tmp, KILL cr );
7044 ins_cost(360);
7045 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7046 "MOVD $tmp,$src.lo\n\t"
7047 "MOVD $tmp2,$src.hi\n\t"
7048 "PUNPCKLDQ $tmp,$tmp2\n\t"
7049 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7050 ins_encode %{
7051 __ cmpl(rax, $mem$$Address);
7052 __ movdl($tmp$$XMMRegister, $src$$Register);
7053 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
7054 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
7055 __ movdbl($mem$$Address, $tmp$$XMMRegister);
7056 %}
7057 ins_pipe( pipe_slow );
7058 %}
7059
7060 // Store Pointer; for storing unknown oops and raw pointers
7098
7099 ins_cost(150);
7100 format %{ "MOV $mem,$src" %}
7101 opcode(0xC7); /* C7 /0 */
7102 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
7103 ins_pipe( ialu_mem_imm );
7104 %}
7105
7106 // Store Byte Immediate
7107 instruct storeImmB(memory mem, immI8 src) %{
7108 match(Set mem (StoreB mem src));
7109
7110 ins_cost(150);
7111 format %{ "MOV8 $mem,$src" %}
7112 opcode(0xC6); /* C6 /0 */
7113 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7114 ins_pipe( ialu_mem_imm );
7115 %}
7116
7117 // Store Aligned Packed Byte XMM register to memory
7118 instruct storeA8B(memory mem, regXD src) %{
7119 predicate(UseSSE>=1);
7120 match(Set mem (Store8B mem src));
7121 ins_cost(145);
7122 format %{ "MOVQ $mem,$src\t! packed8B" %}
7123 ins_encode %{
7124 __ movq($mem$$Address, $src$$XMMRegister);
7125 %}
7126 ins_pipe( pipe_slow );
7127 %}
7128
7129 // Store Aligned Packed Char/Short XMM register to memory
7130 instruct storeA4C(memory mem, regXD src) %{
7131 predicate(UseSSE>=1);
7132 match(Set mem (Store4C mem src));
7133 ins_cost(145);
7134 format %{ "MOVQ $mem,$src\t! packed4C" %}
7135 ins_encode %{
7136 __ movq($mem$$Address, $src$$XMMRegister);
7137 %}
7138 ins_pipe( pipe_slow );
7139 %}
7140
7141 // Store Aligned Packed Integer XMM register to memory
7142 instruct storeA2I(memory mem, regXD src) %{
7143 predicate(UseSSE>=1);
7144 match(Set mem (Store2I mem src));
7145 ins_cost(145);
7146 format %{ "MOVQ $mem,$src\t! packed2I" %}
7147 ins_encode %{
7148 __ movq($mem$$Address, $src$$XMMRegister);
7149 %}
7150 ins_pipe( pipe_slow );
7151 %}
7152
7153 // Store CMS card-mark Immediate
7154 instruct storeImmCM(memory mem, immI8 src) %{
7155 match(Set mem (StoreCM mem src));
7156
7157 ins_cost(150);
7158 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %}
7159 opcode(0xC6); /* C6 /0 */
7160 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7161 ins_pipe( ialu_mem_imm );
7162 %}
7163
7164 // Store Double
7165 instruct storeD( memory mem, regDPR1 src) %{
7166 predicate(UseSSE<=1);
7167 match(Set mem (StoreD mem src));
7168
7169 ins_cost(100);
7170 format %{ "FST_D $mem,$src" %}
7171 opcode(0xDD); /* DD /2 */
7172 ins_encode( enc_FP_store(mem,src) );
7173 ins_pipe( fpu_mem_reg );
7174 %}
7175
7176 // Store double does rounding on x86
7177 instruct storeD_rounded( memory mem, regDPR1 src) %{
7178 predicate(UseSSE<=1);
7179 match(Set mem (StoreD mem (RoundDouble src)));
7180
7181 ins_cost(100);
7182 format %{ "FST_D $mem,$src\t# round" %}
7183 opcode(0xDD); /* DD /2 */
7184 ins_encode( enc_FP_store(mem,src) );
7185 ins_pipe( fpu_mem_reg );
7186 %}
7187
7188 // Store XMM register to memory (double-precision floating points)
7189 // MOVSD instruction
7190 instruct storeXD(memory mem, regXD src) %{
7191 predicate(UseSSE>=2);
7192 match(Set mem (StoreD mem src));
7193 ins_cost(95);
7194 format %{ "MOVSD $mem,$src" %}
7195 ins_encode %{
7196 __ movdbl($mem$$Address, $src$$XMMRegister);
7197 %}
7198 ins_pipe( pipe_slow );
7199 %}
7200
7201 // Store XMM register to memory (single-precision floating point)
7202 // MOVSS instruction
7203 instruct storeX(memory mem, regX src) %{
7204 predicate(UseSSE>=1);
7205 match(Set mem (StoreF mem src));
7206 ins_cost(95);
7207 format %{ "MOVSS $mem,$src" %}
7208 ins_encode %{
7209 __ movflt($mem$$Address, $src$$XMMRegister);
7210 %}
7211 ins_pipe( pipe_slow );
7212 %}
7213
7214 // Store Aligned Packed Single Float XMM register to memory
7215 instruct storeA2F(memory mem, regXD src) %{
7216 predicate(UseSSE>=1);
7217 match(Set mem (Store2F mem src));
7218 ins_cost(145);
7219 format %{ "MOVQ $mem,$src\t! packed2F" %}
7220 ins_encode %{
7221 __ movq($mem$$Address, $src$$XMMRegister);
7222 %}
7223 ins_pipe( pipe_slow );
7224 %}
7225
7226 // Store Float
7227 instruct storeF( memory mem, regFPR1 src) %{
7228 predicate(UseSSE==0);
7229 match(Set mem (StoreF mem src));
7230
7231 ins_cost(100);
7232 format %{ "FST_S $mem,$src" %}
7233 opcode(0xD9); /* D9 /2 */
7234 ins_encode( enc_FP_store(mem,src) );
7235 ins_pipe( fpu_mem_reg );
7236 %}
7237
7238 // Store Float does rounding on x86
7239 instruct storeF_rounded( memory mem, regFPR1 src) %{
7240 predicate(UseSSE==0);
7241 match(Set mem (StoreF mem (RoundFloat src)));
7242
7243 ins_cost(100);
7244 format %{ "FST_S $mem,$src\t# round" %}
7245 opcode(0xD9); /* D9 /2 */
7246 ins_encode( enc_FP_store(mem,src) );
7247 ins_pipe( fpu_mem_reg );
7248 %}
7249
7250 // Store Float does rounding on x86
7251 instruct storeF_Drounded( memory mem, regDPR1 src) %{
7252 predicate(UseSSE<=1);
7253 match(Set mem (StoreF mem (ConvD2F src)));
7254
7255 ins_cost(100);
7256 format %{ "FST_S $mem,$src\t# D-round" %}
7257 opcode(0xD9); /* D9 /2 */
7258 ins_encode( enc_FP_store(mem,src) );
7259 ins_pipe( fpu_mem_reg );
7260 %}
7261
7262 // Store immediate Float value (it is faster than store from FPU register)
7263 // The instruction usage is guarded by predicate in operand immF().
7264 instruct storeF_imm( memory mem, immF src) %{
7265 match(Set mem (StoreF mem src));
7266
7267 ins_cost(50);
7268 format %{ "MOV $mem,$src\t# store float" %}
7269 opcode(0xC7); /* C7 /0 */
7270 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src ));
7271 ins_pipe( ialu_mem_imm );
7272 %}
7273
7274 // Store immediate Float value (it is faster than store from XMM register)
7275 // The instruction usage is guarded by predicate in operand immXF().
7276 instruct storeX_imm( memory mem, immXF src) %{
7277 match(Set mem (StoreF mem src));
7278
7279 ins_cost(50);
7280 format %{ "MOV $mem,$src\t# store float" %}
7281 opcode(0xC7); /* C7 /0 */
7282 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src ));
7283 ins_pipe( ialu_mem_imm );
7284 %}
7285
7286 // Store Integer to stack slot
7287 instruct storeSSI(stackSlotI dst, eRegI src) %{
7288 match(Set dst src);
7289
7290 ins_cost(100);
7291 format %{ "MOV $dst,$src" %}
7292 opcode(0x89);
7293 ins_encode( OpcPRegSS( dst, src ) );
7294 ins_pipe( ialu_mem_reg );
7295 %}
7296
7297 // Store Integer to stack slot
7298 instruct storeSSP(stackSlotP dst, eRegP src) %{
7299 match(Set dst src);
7300
7301 ins_cost(100);
7302 format %{ "MOV $dst,$src" %}
7560 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7561 // ins_cost(250);
7562 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
7563 // opcode(0x0F,0x40);
7564 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7565 // ins_pipe( pipe_cmov_mem );
7566 //%}
7567 //
7568 //// Conditional move
7569 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
7570 // predicate(VM_Version::supports_cmov() );
7571 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7572 // ins_cost(250);
7573 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
7574 // opcode(0x0F,0x40);
7575 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7576 // ins_pipe( pipe_cmov_mem );
7577 //%}
7578
7579 // Conditional move
7580 instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
7581 predicate(UseSSE<=1);
7582 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7583 ins_cost(200);
7584 format %{ "FCMOV$cop $dst,$src\t# double" %}
7585 opcode(0xDA);
7586 ins_encode( enc_cmov_d(cop,src) );
7587 ins_pipe( pipe_cmovD_reg );
7588 %}
7589
7590 // Conditional move
7591 instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
7592 predicate(UseSSE==0);
7593 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7594 ins_cost(200);
7595 format %{ "FCMOV$cop $dst,$src\t# float" %}
7596 opcode(0xDA);
7597 ins_encode( enc_cmov_d(cop,src) );
7598 ins_pipe( pipe_cmovD_reg );
7599 %}
7600
7601 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7602 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7603 predicate(UseSSE<=1);
7604 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7605 ins_cost(200);
7606 format %{ "Jn$cop skip\n\t"
7607 "MOV $dst,$src\t# double\n"
7608 "skip:" %}
7609 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
7610 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
7611 ins_pipe( pipe_cmovD_reg );
7612 %}
7613
7614 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7615 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7616 predicate(UseSSE==0);
7617 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7618 ins_cost(200);
7619 format %{ "Jn$cop skip\n\t"
7620 "MOV $dst,$src\t# float\n"
7621 "skip:" %}
7622 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
7623 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
7624 ins_pipe( pipe_cmovD_reg );
7625 %}
7626
7627 // No CMOVE with SSE/SSE2
7628 instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
7629 predicate (UseSSE>=1);
7630 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7631 ins_cost(200);
7632 format %{ "Jn$cop skip\n\t"
7633 "MOVSS $dst,$src\t# float\n"
7634 "skip:" %}
7635 ins_encode %{
7636 Label skip;
7637 // Invert sense of branch from sense of CMOV
7638 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7639 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7640 __ bind(skip);
7641 %}
7642 ins_pipe( pipe_slow );
7643 %}
7644
7645 // No CMOVE with SSE/SSE2
7646 instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
7647 predicate (UseSSE>=2);
7648 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7649 ins_cost(200);
7650 format %{ "Jn$cop skip\n\t"
7651 "MOVSD $dst,$src\t# float\n"
7652 "skip:" %}
7653 ins_encode %{
7654 Label skip;
7655 // Invert sense of branch from sense of CMOV
7656 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7657 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7658 __ bind(skip);
7659 %}
7660 ins_pipe( pipe_slow );
7661 %}
7662
7663 // unsigned version
7664 instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
7665 predicate (UseSSE>=1);
7666 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7667 ins_cost(200);
7668 format %{ "Jn$cop skip\n\t"
7669 "MOVSS $dst,$src\t# float\n"
7670 "skip:" %}
7671 ins_encode %{
7672 Label skip;
7673 // Invert sense of branch from sense of CMOV
7674 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7675 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7676 __ bind(skip);
7677 %}
7678 ins_pipe( pipe_slow );
7679 %}
7680
7681 instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
7682 predicate (UseSSE>=1);
7683 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7684 ins_cost(200);
7685 expand %{
7686 fcmovX_regU(cop, cr, dst, src);
7687 %}
7688 %}
7689
7690 // unsigned version
7691 instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
7692 predicate (UseSSE>=2);
7693 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7694 ins_cost(200);
7695 format %{ "Jn$cop skip\n\t"
7696 "MOVSD $dst,$src\t# float\n"
7697 "skip:" %}
7698 ins_encode %{
7699 Label skip;
7700 // Invert sense of branch from sense of CMOV
7701 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7702 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7703 __ bind(skip);
7704 %}
7705 ins_pipe( pipe_slow );
7706 %}
7707
7708 instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
7709 predicate (UseSSE>=2);
7710 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7711 ins_cost(200);
7712 expand %{
7713 fcmovXD_regU(cop, cr, dst, src);
7714 %}
7715 %}
7716
7717 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7718 predicate(VM_Version::supports_cmov() );
7719 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7720 ins_cost(200);
7721 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7722 "CMOV$cop $dst.hi,$src.hi" %}
7723 opcode(0x0F,0x40);
7724 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7725 ins_pipe( pipe_cmov_reg_long );
7726 %}
7727
7728 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7729 predicate(VM_Version::supports_cmov() );
7730 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7731 ins_cost(200);
7732 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7733 "CMOV$cop $dst.hi,$src.hi" %}
7923
7924 ins_cost(125);
7925 format %{ "MOV $dst,$mem\t# Load ptr. locked" %}
7926 opcode(0x8B);
7927 ins_encode( OpcP, RegMem(dst,mem));
7928 ins_pipe( ialu_reg_mem );
7929 %}
7930
7931 // LoadLong-locked - same as a volatile long load when used with compare-swap
7932 instruct loadLLocked(stackSlotL dst, memory mem) %{
7933 predicate(UseSSE<=1);
7934 match(Set dst (LoadLLocked mem));
7935
7936 ins_cost(200);
7937 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
7938 "FISTp $dst" %}
7939 ins_encode(enc_loadL_volatile(mem,dst));
7940 ins_pipe( fpu_reg_mem );
7941 %}
7942
7943 instruct loadLX_Locked(stackSlotL dst, memory mem, regXD tmp) %{
7944 predicate(UseSSE>=2);
7945 match(Set dst (LoadLLocked mem));
7946 effect(TEMP tmp);
7947 ins_cost(180);
7948 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
7949 "MOVSD $dst,$tmp" %}
7950 ins_encode %{
7951 __ movdbl($tmp$$XMMRegister, $mem$$Address);
7952 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
7953 %}
7954 ins_pipe( pipe_slow );
7955 %}
7956
7957 instruct loadLX_reg_Locked(eRegL dst, memory mem, regXD tmp) %{
7958 predicate(UseSSE>=2);
7959 match(Set dst (LoadLLocked mem));
7960 effect(TEMP tmp);
7961 ins_cost(160);
7962 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
7963 "MOVD $dst.lo,$tmp\n\t"
7964 "PSRLQ $tmp,32\n\t"
7965 "MOVD $dst.hi,$tmp" %}
7966 ins_encode %{
7967 __ movdbl($tmp$$XMMRegister, $mem$$Address);
7968 __ movdl($dst$$Register, $tmp$$XMMRegister);
7969 __ psrlq($tmp$$XMMRegister, 32);
7970 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
7971 %}
7972 ins_pipe( pipe_slow );
7973 %}
7974
7975 // Conditional-store of the updated heap-top.
7976 // Used during allocation of the shared heap.
7977 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel.
9534 effect(KILL cr);
9535 ins_cost(600);
9536 size(18);
9537 format %{ "TEST $shift,32\n\t"
9538 "JEQ,s small\n\t"
9539 "MOV $dst.lo,$dst.hi\n\t"
9540 "SAR $dst.hi,31\n"
9541 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9542 "SAR $dst.hi,$shift" %}
9543 ins_encode( shift_right_arith_long( dst, shift ) );
9544 ins_pipe( pipe_slow );
9545 %}
9546
9547
9548 //----------Double Instructions------------------------------------------------
9549 // Double Math
9550
9551 // Compare & branch
9552
9553 // P6 version of float compare, sets condition codes in EFLAGS
9554 instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9555 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9556 match(Set cr (CmpD src1 src2));
9557 effect(KILL rax);
9558 ins_cost(150);
9559 format %{ "FLD $src1\n\t"
9560 "FUCOMIP ST,$src2 // P6 instruction\n\t"
9561 "JNP exit\n\t"
9562 "MOV ah,1 // saw a NaN, set CF\n\t"
9563 "SAHF\n"
9564 "exit:\tNOP // avoid branch to branch" %}
9565 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9566 ins_encode( Push_Reg_D(src1),
9567 OpcP, RegOpc(src2),
9568 cmpF_P6_fixup );
9569 ins_pipe( pipe_slow );
9570 %}
9571
9572 instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
9573 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9574 match(Set cr (CmpD src1 src2));
9575 ins_cost(150);
9576 format %{ "FLD $src1\n\t"
9577 "FUCOMIP ST,$src2 // P6 instruction" %}
9578 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9579 ins_encode( Push_Reg_D(src1),
9580 OpcP, RegOpc(src2));
9581 ins_pipe( pipe_slow );
9582 %}
9583
9584 // Compare & branch
9585 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9586 predicate(UseSSE<=1);
9587 match(Set cr (CmpD src1 src2));
9588 effect(KILL rax);
9589 ins_cost(200);
9590 format %{ "FLD $src1\n\t"
9591 "FCOMp $src2\n\t"
9592 "FNSTSW AX\n\t"
9593 "TEST AX,0x400\n\t"
9594 "JZ,s flags\n\t"
9595 "MOV AH,1\t# unordered treat as LT\n"
9596 "flags:\tSAHF" %}
9597 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9598 ins_encode( Push_Reg_D(src1),
9599 OpcP, RegOpc(src2),
9600 fpu_flags);
9601 ins_pipe( pipe_slow );
9602 %}
9603
9604 // Compare vs zero into -1,0,1
9605 instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
9606 predicate(UseSSE<=1);
9607 match(Set dst (CmpD3 src1 zero));
9608 effect(KILL cr, KILL rax);
9609 ins_cost(280);
9610 format %{ "FTSTD $dst,$src1" %}
9611 opcode(0xE4, 0xD9);
9612 ins_encode( Push_Reg_D(src1),
9613 OpcS, OpcP, PopFPU,
9614 CmpF_Result(dst));
9615 ins_pipe( pipe_slow );
9616 %}
9617
9618 // Compare into -1,0,1
9619 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
9620 predicate(UseSSE<=1);
9621 match(Set dst (CmpD3 src1 src2));
9622 effect(KILL cr, KILL rax);
9623 ins_cost(300);
9624 format %{ "FCMPD $dst,$src1,$src2" %}
9625 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9626 ins_encode( Push_Reg_D(src1),
9627 OpcP, RegOpc(src2),
9628 CmpF_Result(dst));
9629 ins_pipe( pipe_slow );
9630 %}
9631
9632 // float compare and set condition codes in EFLAGS by XMM regs
9633 instruct cmpXD_cc(eFlagsRegU cr, regXD src1, regXD src2) %{
9634 predicate(UseSSE>=2);
9635 match(Set cr (CmpD src1 src2));
9636 ins_cost(145);
9637 format %{ "UCOMISD $src1,$src2\n\t"
9638 "JNP,s exit\n\t"
9639 "PUSHF\t# saw NaN, set CF\n\t"
9640 "AND [rsp], #0xffffff2b\n\t"
9641 "POPF\n"
9642 "exit:" %}
9643 ins_encode %{
9644 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9645 emit_cmpfp_fixup(_masm);
9646 %}
9647 ins_pipe( pipe_slow );
9648 %}
9649
9650 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD src1, regXD src2) %{
9651 predicate(UseSSE>=2);
9652 match(Set cr (CmpD src1 src2));
9653 ins_cost(100);
9654 format %{ "UCOMISD $src1,$src2" %}
9655 ins_encode %{
9656 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9657 %}
9658 ins_pipe( pipe_slow );
9659 %}
9660
9661 // float compare and set condition codes in EFLAGS by XMM regs
9662 instruct cmpXD_ccmem(eFlagsRegU cr, regXD src1, memory src2) %{
9663 predicate(UseSSE>=2);
9664 match(Set cr (CmpD src1 (LoadD src2)));
9665 ins_cost(145);
9666 format %{ "UCOMISD $src1,$src2\n\t"
9667 "JNP,s exit\n\t"
9668 "PUSHF\t# saw NaN, set CF\n\t"
9669 "AND [rsp], #0xffffff2b\n\t"
9670 "POPF\n"
9671 "exit:" %}
9672 ins_encode %{
9673 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9674 emit_cmpfp_fixup(_masm);
9675 %}
9676 ins_pipe( pipe_slow );
9677 %}
9678
9679 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD src1, memory src2) %{
9680 predicate(UseSSE>=2);
9681 match(Set cr (CmpD src1 (LoadD src2)));
9682 ins_cost(100);
9683 format %{ "UCOMISD $src1,$src2" %}
9684 ins_encode %{
9685 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9686 %}
9687 ins_pipe( pipe_slow );
9688 %}
9689
9690 // Compare into -1,0,1 in XMM
9691 instruct cmpXD_reg(xRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
9692 predicate(UseSSE>=2);
9693 match(Set dst (CmpD3 src1 src2));
9694 effect(KILL cr);
9695 ins_cost(255);
9696 format %{ "UCOMISD $src1, $src2\n\t"
9697 "MOV $dst, #-1\n\t"
9698 "JP,s done\n\t"
9699 "JB,s done\n\t"
9700 "SETNE $dst\n\t"
9701 "MOVZB $dst, $dst\n"
9702 "done:" %}
9703 ins_encode %{
9704 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9705 emit_cmpfp3(_masm, $dst$$Register);
9706 %}
9707 ins_pipe( pipe_slow );
9708 %}
9709
9710 // Compare into -1,0,1 in XMM and memory
9711 instruct cmpXD_regmem(xRegI dst, regXD src1, memory src2, eFlagsReg cr) %{
9712 predicate(UseSSE>=2);
9713 match(Set dst (CmpD3 src1 (LoadD src2)));
9714 effect(KILL cr);
9715 ins_cost(275);
9716 format %{ "UCOMISD $src1, $src2\n\t"
9717 "MOV $dst, #-1\n\t"
9718 "JP,s done\n\t"
9719 "JB,s done\n\t"
9720 "SETNE $dst\n\t"
9721 "MOVZB $dst, $dst\n"
9722 "done:" %}
9723 ins_encode %{
9724 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9725 emit_cmpfp3(_masm, $dst$$Register);
9726 %}
9727 ins_pipe( pipe_slow );
9728 %}
9729
9730
9731 instruct subD_reg(regD dst, regD src) %{
9732 predicate (UseSSE <=1);
9733 match(Set dst (SubD dst src));
9734
9735 format %{ "FLD $src\n\t"
9736 "DSUBp $dst,ST" %}
9737 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
9738 ins_cost(150);
9739 ins_encode( Push_Reg_D(src),
9740 OpcP, RegOpc(dst) );
9741 ins_pipe( fpu_reg_reg );
9742 %}
9743
9744 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
9745 predicate (UseSSE <=1);
9746 match(Set dst (RoundDouble (SubD src1 src2)));
9747 ins_cost(250);
9748
9749 format %{ "FLD $src2\n\t"
9750 "DSUB ST,$src1\n\t"
9751 "FSTP_D $dst\t# D-round" %}
9752 opcode(0xD8, 0x5);
9753 ins_encode( Push_Reg_D(src2),
9754 OpcP, RegOpc(src1), Pop_Mem_D(dst) );
9755 ins_pipe( fpu_mem_reg_reg );
9756 %}
9757
9758
9759 instruct subD_reg_mem(regD dst, memory src) %{
9760 predicate (UseSSE <=1);
9761 match(Set dst (SubD dst (LoadD src)));
9762 ins_cost(150);
9763
9764 format %{ "FLD $src\n\t"
9765 "DSUBp $dst,ST" %}
9766 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9767 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9768 OpcP, RegOpc(dst) );
9769 ins_pipe( fpu_reg_mem );
9770 %}
9771
9772 instruct absD_reg(regDPR1 dst, regDPR1 src) %{
9773 predicate (UseSSE<=1);
9774 match(Set dst (AbsD src));
9775 ins_cost(100);
9776 format %{ "FABS" %}
9777 opcode(0xE1, 0xD9);
9778 ins_encode( OpcS, OpcP );
9779 ins_pipe( fpu_reg_reg );
9780 %}
9781
9782 instruct absXD_reg( regXD dst ) %{
9783 predicate(UseSSE>=2);
9784 match(Set dst (AbsD dst));
9785 ins_cost(150);
9786 format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
9787 ins_encode %{
9788 __ andpd($dst$$XMMRegister,
9789 ExternalAddress((address)double_signmask_pool));
9790 %}
9791 ins_pipe( pipe_slow );
9792 %}
9793
9794 instruct negD_reg(regDPR1 dst, regDPR1 src) %{
9795 predicate(UseSSE<=1);
9796 match(Set dst (NegD src));
9797 ins_cost(100);
9798 format %{ "FCHS" %}
9799 opcode(0xE0, 0xD9);
9800 ins_encode( OpcS, OpcP );
9801 ins_pipe( fpu_reg_reg );
9802 %}
9803
9804 instruct negXD_reg( regXD dst ) %{
9805 predicate(UseSSE>=2);
9806 match(Set dst (NegD dst));
9807 ins_cost(150);
9808 format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
9809 ins_encode %{
9810 __ xorpd($dst$$XMMRegister,
9811 ExternalAddress((address)double_signflip_pool));
9812 %}
9813 ins_pipe( pipe_slow );
9814 %}
9815
9816 instruct addD_reg(regD dst, regD src) %{
9817 predicate(UseSSE<=1);
9818 match(Set dst (AddD dst src));
9819 format %{ "FLD $src\n\t"
9820 "DADD $dst,ST" %}
9821 size(4);
9822 ins_cost(150);
9823 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9824 ins_encode( Push_Reg_D(src),
9825 OpcP, RegOpc(dst) );
9826 ins_pipe( fpu_reg_reg );
9827 %}
9828
9829
9830 instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
9831 predicate(UseSSE<=1);
9832 match(Set dst (RoundDouble (AddD src1 src2)));
9833 ins_cost(250);
9834
9835 format %{ "FLD $src2\n\t"
9836 "DADD ST,$src1\n\t"
9837 "FSTP_D $dst\t# D-round" %}
9838 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9839 ins_encode( Push_Reg_D(src2),
9840 OpcP, RegOpc(src1), Pop_Mem_D(dst) );
9841 ins_pipe( fpu_mem_reg_reg );
9842 %}
9843
9844
9845 instruct addD_reg_mem(regD dst, memory src) %{
9846 predicate(UseSSE<=1);
9847 match(Set dst (AddD dst (LoadD src)));
9848 ins_cost(150);
9849
9850 format %{ "FLD $src\n\t"
9851 "DADDp $dst,ST" %}
9852 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9853 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9854 OpcP, RegOpc(dst) );
9855 ins_pipe( fpu_reg_mem );
9856 %}
9857
9858 // add-to-memory
9859 instruct addD_mem_reg(memory dst, regD src) %{
9860 predicate(UseSSE<=1);
9861 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9862 ins_cost(150);
9863
9864 format %{ "FLD_D $dst\n\t"
9865 "DADD ST,$src\n\t"
9866 "FST_D $dst" %}
9867 opcode(0xDD, 0x0);
9868 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9869 Opcode(0xD8), RegOpc(src),
9870 set_instruction_start,
9871 Opcode(0xDD), RMopc_Mem(0x03,dst) );
9872 ins_pipe( fpu_reg_mem );
9873 %}
9874
9875 instruct addD_reg_imm1(regD dst, immD1 con) %{
9876 predicate(UseSSE<=1);
9877 match(Set dst (AddD dst con));
9878 ins_cost(125);
9879 format %{ "FLD1\n\t"
9880 "DADDp $dst,ST" %}
9881 ins_encode %{
9882 __ fld1();
9883 __ faddp($dst$$reg);
9884 %}
9885 ins_pipe(fpu_reg);
9886 %}
9887
9888 instruct addD_reg_imm(regD dst, immD con) %{
9889 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9890 match(Set dst (AddD dst con));
9891 ins_cost(200);
9892 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9893 "DADDp $dst,ST" %}
9894 ins_encode %{
9895 __ fld_d($constantaddress($con));
9896 __ faddp($dst$$reg);
9897 %}
9898 ins_pipe(fpu_reg_mem);
9899 %}
9900
9901 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
9902 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9903 match(Set dst (RoundDouble (AddD src con)));
9904 ins_cost(200);
9905 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9906 "DADD ST,$src\n\t"
9907 "FSTP_D $dst\t# D-round" %}
9908 ins_encode %{
9909 __ fld_d($constantaddress($con));
9910 __ fadd($src$$reg);
9911 __ fstp_d(Address(rsp, $dst$$disp));
9912 %}
9913 ins_pipe(fpu_mem_reg_con);
9914 %}
9915
9916 // Add two double precision floating point values in xmm
9917 instruct addXD_reg(regXD dst, regXD src) %{
9918 predicate(UseSSE>=2);
9919 match(Set dst (AddD dst src));
9920 format %{ "ADDSD $dst,$src" %}
9921 ins_encode %{
9922 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
9923 %}
9924 ins_pipe( pipe_slow );
9925 %}
9926
9927 instruct addXD_imm(regXD dst, immXD con) %{
9928 predicate(UseSSE>=2);
9929 match(Set dst (AddD dst con));
9930 format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
9931 ins_encode %{
9932 __ addsd($dst$$XMMRegister, $constantaddress($con));
9933 %}
9934 ins_pipe(pipe_slow);
9935 %}
9936
9937 instruct addXD_mem(regXD dst, memory mem) %{
9938 predicate(UseSSE>=2);
9939 match(Set dst (AddD dst (LoadD mem)));
9940 format %{ "ADDSD $dst,$mem" %}
9941 ins_encode %{
9942 __ addsd($dst$$XMMRegister, $mem$$Address);
9943 %}
9944 ins_pipe( pipe_slow );
9945 %}
9946
9947 // Sub two double precision floating point values in xmm
9948 instruct subXD_reg(regXD dst, regXD src) %{
9949 predicate(UseSSE>=2);
9950 match(Set dst (SubD dst src));
9951 ins_cost(150);
9952 format %{ "SUBSD $dst,$src" %}
9953 ins_encode %{
9954 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
9955 %}
9956 ins_pipe( pipe_slow );
9957 %}
9958
9959 instruct subXD_imm(regXD dst, immXD con) %{
9960 predicate(UseSSE>=2);
9961 match(Set dst (SubD dst con));
9962 ins_cost(150);
9963 format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
9964 ins_encode %{
9965 __ subsd($dst$$XMMRegister, $constantaddress($con));
9966 %}
9967 ins_pipe(pipe_slow);
9968 %}
9969
9970 instruct subXD_mem(regXD dst, memory mem) %{
9971 predicate(UseSSE>=2);
9972 match(Set dst (SubD dst (LoadD mem)));
9973 ins_cost(150);
9974 format %{ "SUBSD $dst,$mem" %}
9975 ins_encode %{
9976 __ subsd($dst$$XMMRegister, $mem$$Address);
9977 %}
9978 ins_pipe( pipe_slow );
9979 %}
9980
9981 // Mul two double precision floating point values in xmm
9982 instruct mulXD_reg(regXD dst, regXD src) %{
9983 predicate(UseSSE>=2);
9984 match(Set dst (MulD dst src));
9985 format %{ "MULSD $dst,$src" %}
9986 ins_encode %{
9987 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
9988 %}
9989 ins_pipe( pipe_slow );
9990 %}
9991
9992 instruct mulXD_imm(regXD dst, immXD con) %{
9993 predicate(UseSSE>=2);
9994 match(Set dst (MulD dst con));
9995 format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
9996 ins_encode %{
9997 __ mulsd($dst$$XMMRegister, $constantaddress($con));
9998 %}
9999 ins_pipe(pipe_slow);
10000 %}
10001
10002 instruct mulXD_mem(regXD dst, memory mem) %{
10003 predicate(UseSSE>=2);
10004 match(Set dst (MulD dst (LoadD mem)));
10005 format %{ "MULSD $dst,$mem" %}
10006 ins_encode %{
10007 __ mulsd($dst$$XMMRegister, $mem$$Address);
10008 %}
10009 ins_pipe( pipe_slow );
10010 %}
10011
10012 // Div two double precision floating point values in xmm
10013 instruct divXD_reg(regXD dst, regXD src) %{
10014 predicate(UseSSE>=2);
10015 match(Set dst (DivD dst src));
10016 format %{ "DIVSD $dst,$src" %}
10017 opcode(0xF2, 0x0F, 0x5E);
10018 ins_encode %{
10019 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
10020 %}
10021 ins_pipe( pipe_slow );
10022 %}
10023
10024 instruct divXD_imm(regXD dst, immXD con) %{
10025 predicate(UseSSE>=2);
10026 match(Set dst (DivD dst con));
10027 format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10028 ins_encode %{
10029 __ divsd($dst$$XMMRegister, $constantaddress($con));
10030 %}
10031 ins_pipe(pipe_slow);
10032 %}
10033
10034 instruct divXD_mem(regXD dst, memory mem) %{
10035 predicate(UseSSE>=2);
10036 match(Set dst (DivD dst (LoadD mem)));
10037 format %{ "DIVSD $dst,$mem" %}
10038 ins_encode %{
10039 __ divsd($dst$$XMMRegister, $mem$$Address);
10040 %}
10041 ins_pipe( pipe_slow );
10042 %}
10043
10044
10045 instruct mulD_reg(regD dst, regD src) %{
10046 predicate(UseSSE<=1);
10047 match(Set dst (MulD dst src));
10048 format %{ "FLD $src\n\t"
10049 "DMULp $dst,ST" %}
10050 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10051 ins_cost(150);
10052 ins_encode( Push_Reg_D(src),
10053 OpcP, RegOpc(dst) );
10054 ins_pipe( fpu_reg_reg );
10055 %}
10056
10057 // Strict FP instruction biases argument before multiply then
10058 // biases result to avoid double rounding of subnormals.
10059 //
10060 // scale arg1 by multiplying arg1 by 2^(-15360)
10061 // load arg2
10062 // multiply scaled arg1 by arg2
10063 // rescale product by 2^(15360)
10064 //
10065 instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
10066 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10067 match(Set dst (MulD dst src));
10068 ins_cost(1); // Select this instruction for all strict FP double multiplies
10069
10070 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
10071 "DMULp $dst,ST\n\t"
10072 "FLD $src\n\t"
10073 "DMULp $dst,ST\n\t"
10074 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
10075 "DMULp $dst,ST\n\t" %}
10076 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10077 ins_encode( strictfp_bias1(dst),
10078 Push_Reg_D(src),
10079 OpcP, RegOpc(dst),
10080 strictfp_bias2(dst) );
10081 ins_pipe( fpu_reg_reg );
10082 %}
10083
10084 instruct mulD_reg_imm(regD dst, immD con) %{
10085 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10086 match(Set dst (MulD dst con));
10087 ins_cost(200);
10088 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
10089 "DMULp $dst,ST" %}
10090 ins_encode %{
10091 __ fld_d($constantaddress($con));
10092 __ fmulp($dst$$reg);
10093 %}
10094 ins_pipe(fpu_reg_mem);
10095 %}
10096
10097
10098 instruct mulD_reg_mem(regD dst, memory src) %{
10099 predicate( UseSSE<=1 );
10100 match(Set dst (MulD dst (LoadD src)));
10101 ins_cost(200);
10102 format %{ "FLD_D $src\n\t"
10103 "DMULp $dst,ST" %}
10104 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */
10105 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10106 OpcP, RegOpc(dst) );
10107 ins_pipe( fpu_reg_mem );
10108 %}
10109
10110 //
10111 // Cisc-alternate to reg-reg multiply
10112 instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
10113 predicate( UseSSE<=1 );
10114 match(Set dst (MulD src (LoadD mem)));
10115 ins_cost(250);
10116 format %{ "FLD_D $mem\n\t"
10117 "DMUL ST,$src\n\t"
10118 "FSTP_D $dst" %}
10119 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
10120 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
10121 OpcReg_F(src),
10122 Pop_Reg_D(dst) );
10123 ins_pipe( fpu_reg_reg_mem );
10124 %}
10125
10126
10127 // MACRO3 -- addD a mulD
10128 // This instruction is a '2-address' instruction in that the result goes
10129 // back to src2. This eliminates a move from the macro; possibly the
10130 // register allocator will have to add it back (and maybe not).
10131 instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
10132 predicate( UseSSE<=1 );
10133 match(Set src2 (AddD (MulD src0 src1) src2));
10134 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
10135 "DMUL ST,$src1\n\t"
10136 "DADDp $src2,ST" %}
10137 ins_cost(250);
10138 opcode(0xDD); /* LoadD DD /0 */
10139 ins_encode( Push_Reg_F(src0),
10140 FMul_ST_reg(src1),
10141 FAddP_reg_ST(src2) );
10142 ins_pipe( fpu_reg_reg_reg );
10143 %}
10144
10145
10146 // MACRO3 -- subD a mulD
10147 instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
10148 predicate( UseSSE<=1 );
10149 match(Set src2 (SubD (MulD src0 src1) src2));
10150 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
10151 "DMUL ST,$src1\n\t"
10152 "DSUBRp $src2,ST" %}
10153 ins_cost(250);
10154 ins_encode( Push_Reg_F(src0),
10155 FMul_ST_reg(src1),
10156 Opcode(0xDE), Opc_plus(0xE0,src2));
10157 ins_pipe( fpu_reg_reg_reg );
10158 %}
10159
10160
10161 instruct divD_reg(regD dst, regD src) %{
10162 predicate( UseSSE<=1 );
10163 match(Set dst (DivD dst src));
10164
10165 format %{ "FLD $src\n\t"
10166 "FDIVp $dst,ST" %}
10167 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10168 ins_cost(150);
10169 ins_encode( Push_Reg_D(src),
10170 OpcP, RegOpc(dst) );
10171 ins_pipe( fpu_reg_reg );
10172 %}
10173
10174 // Strict FP instruction biases argument before division then
10175 // biases result, to avoid double rounding of subnormals.
10176 //
10177 // scale dividend by multiplying dividend by 2^(-15360)
10178 // load divisor
10179 // divide scaled dividend by divisor
10180 // rescale quotient by 2^(15360)
10181 //
10182 instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
10183 predicate (UseSSE<=1);
10184 match(Set dst (DivD dst src));
10185 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10186 ins_cost(01);
10187
10188 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
10189 "DMULp $dst,ST\n\t"
10190 "FLD $src\n\t"
10191 "FDIVp $dst,ST\n\t"
10192 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
10193 "DMULp $dst,ST\n\t" %}
10194 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10195 ins_encode( strictfp_bias1(dst),
10196 Push_Reg_D(src),
10197 OpcP, RegOpc(dst),
10198 strictfp_bias2(dst) );
10199 ins_pipe( fpu_reg_reg );
10200 %}
10201
10202 instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10203 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10204 match(Set dst (RoundDouble (DivD src1 src2)));
10205
10206 format %{ "FLD $src1\n\t"
10207 "FDIV ST,$src2\n\t"
10208 "FSTP_D $dst\t# D-round" %}
10209 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10210 ins_encode( Push_Reg_D(src1),
10211 OpcP, RegOpc(src2), Pop_Mem_D(dst) );
10212 ins_pipe( fpu_mem_reg_reg );
10213 %}
10214
10215
10216 instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
10217 predicate(UseSSE<=1);
10218 match(Set dst (ModD dst src));
10219 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
10220
10221 format %{ "DMOD $dst,$src" %}
10222 ins_cost(250);
10223 ins_encode(Push_Reg_Mod_D(dst, src),
10224 emitModD(),
10225 Push_Result_Mod_D(src),
10226 Pop_Reg_D(dst));
10227 ins_pipe( pipe_slow );
10228 %}
10229
10230 instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
10231 predicate(UseSSE>=2);
10232 match(Set dst (ModD src0 src1));
10233 effect(KILL rax, KILL cr);
10234
10235 format %{ "SUB ESP,8\t # DMOD\n"
10236 "\tMOVSD [ESP+0],$src1\n"
10237 "\tFLD_D [ESP+0]\n"
10238 "\tMOVSD [ESP+0],$src0\n"
10239 "\tFLD_D [ESP+0]\n"
10240 "loop:\tFPREM\n"
10241 "\tFWAIT\n"
10242 "\tFNSTSW AX\n"
10243 "\tSAHF\n"
10244 "\tJP loop\n"
10245 "\tFSTP_D [ESP+0]\n"
10246 "\tMOVSD $dst,[ESP+0]\n"
10247 "\tADD ESP,8\n"
10248 "\tFSTP ST0\t # Restore FPU Stack"
10249 %}
10250 ins_cost(250);
10251 ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
10252 ins_pipe( pipe_slow );
10253 %}
10254
10255 instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
10256 predicate (UseSSE<=1);
10257 match(Set dst (SinD src));
10258 ins_cost(1800);
10259 format %{ "DSIN $dst" %}
10260 opcode(0xD9, 0xFE);
10261 ins_encode( OpcP, OpcS );
10262 ins_pipe( pipe_slow );
10263 %}
10264
10265 instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
10266 predicate (UseSSE>=2);
10267 match(Set dst (SinD dst));
10268 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10269 ins_cost(1800);
10270 format %{ "DSIN $dst" %}
10271 opcode(0xD9, 0xFE);
10272 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10273 ins_pipe( pipe_slow );
10274 %}
10275
10276 instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
10277 predicate (UseSSE<=1);
10278 match(Set dst (CosD src));
10279 ins_cost(1800);
10280 format %{ "DCOS $dst" %}
10281 opcode(0xD9, 0xFF);
10282 ins_encode( OpcP, OpcS );
10283 ins_pipe( pipe_slow );
10284 %}
10285
10286 instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
10287 predicate (UseSSE>=2);
10288 match(Set dst (CosD dst));
10289 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10290 ins_cost(1800);
10291 format %{ "DCOS $dst" %}
10292 opcode(0xD9, 0xFF);
10293 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10294 ins_pipe( pipe_slow );
10295 %}
10296
10297 instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
10298 predicate (UseSSE<=1);
10299 match(Set dst(TanD src));
10300 format %{ "DTAN $dst" %}
10301 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
10302 Opcode(0xDD), Opcode(0xD8)); // fstp st
10303 ins_pipe( pipe_slow );
10304 %}
10305
10306 instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
10307 predicate (UseSSE>=2);
10308 match(Set dst(TanD dst));
10309 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10310 format %{ "DTAN $dst" %}
10311 ins_encode( Push_SrcXD(dst),
10312 Opcode(0xD9), Opcode(0xF2), // fptan
10313 Opcode(0xDD), Opcode(0xD8), // fstp st
10314 Push_ResultXD(dst) );
10315 ins_pipe( pipe_slow );
10316 %}
10317
10318 instruct atanD_reg(regD dst, regD src) %{
10319 predicate (UseSSE<=1);
10320 match(Set dst(AtanD dst src));
10321 format %{ "DATA $dst,$src" %}
10322 opcode(0xD9, 0xF3);
10323 ins_encode( Push_Reg_D(src),
10324 OpcP, OpcS, RegOpc(dst) );
10325 ins_pipe( pipe_slow );
10326 %}
10327
10328 instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10329 predicate (UseSSE>=2);
10330 match(Set dst(AtanD dst src));
10331 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10332 format %{ "DATA $dst,$src" %}
10333 opcode(0xD9, 0xF3);
10334 ins_encode( Push_SrcXD(src),
10335 OpcP, OpcS, Push_ResultXD(dst) );
10336 ins_pipe( pipe_slow );
10337 %}
10338
10339 instruct sqrtD_reg(regD dst, regD src) %{
10340 predicate (UseSSE<=1);
10341 match(Set dst (SqrtD src));
10342 format %{ "DSQRT $dst,$src" %}
10343 opcode(0xFA, 0xD9);
10344 ins_encode( Push_Reg_D(src),
10345 OpcS, OpcP, Pop_Reg_D(dst) );
10346 ins_pipe( pipe_slow );
10347 %}
10348
10349 instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10350 predicate (UseSSE<=1);
10351 match(Set Y (PowD X Y)); // Raise X to the Yth power
10352 effect(KILL rax, KILL rbx, KILL rcx);
10353 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10354 "FLD_D $X\n\t"
10355 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t"
10356
10357 "FDUP \t\t\t# Q Q\n\t"
10358 "FRNDINT\t\t\t# int(Q) Q\n\t"
10359 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10360 "FISTP dword [ESP]\n\t"
10361 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10362 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10363 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10364 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10365 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10366 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10367 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10368 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10369 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10370 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10371 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10372 "MOV [ESP+0],0\n\t"
10373 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10374
10375 "ADD ESP,8"
10376 %}
10377 ins_encode( push_stack_temp_qword,
10378 Push_Reg_D(X),
10379 Opcode(0xD9), Opcode(0xF1), // fyl2x
10380 pow_exp_core_encoding,
10381 pop_stack_temp_qword);
10382 ins_pipe( pipe_slow );
10383 %}
10384
10385 instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10386 predicate (UseSSE>=2);
10387 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
10388 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10389 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10390 "MOVSD [ESP],$src1\n\t"
10391 "FLD FPR1,$src1\n\t"
10392 "MOVSD [ESP],$src0\n\t"
10393 "FLD FPR1,$src0\n\t"
10394 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t"
10395
10396 "FDUP \t\t\t# Q Q\n\t"
10397 "FRNDINT\t\t\t# int(Q) Q\n\t"
10398 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10399 "FISTP dword [ESP]\n\t"
10400 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10401 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10402 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10403 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10404 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10405 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10406 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10407 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10408 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10409 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10410 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10411 "MOV [ESP+0],0\n\t"
10412 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10413
10414 "FST_D [ESP]\n\t"
10415 "MOVSD $dst,[ESP]\n\t"
10416 "ADD ESP,8"
10417 %}
10418 ins_encode( push_stack_temp_qword,
10419 push_xmm_to_fpr1(src1),
10420 push_xmm_to_fpr1(src0),
10421 Opcode(0xD9), Opcode(0xF1), // fyl2x
10422 pow_exp_core_encoding,
10423 Push_ResultXD(dst) );
10424 ins_pipe( pipe_slow );
10425 %}
10426
10427
10428 instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10429 predicate (UseSSE<=1);
10430 match(Set dpr1 (ExpD dpr1));
10431 effect(KILL rax, KILL rbx, KILL rcx);
10432 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding"
10433 "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10434 "FMULP \t\t\t# Q=X*log2(e)\n\t"
10435
10436 "FDUP \t\t\t# Q Q\n\t"
10437 "FRNDINT\t\t\t# int(Q) Q\n\t"
10438 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10439 "FISTP dword [ESP]\n\t"
10440 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10441 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10442 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10443 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10444 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10445 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10446 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10447 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10448 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10449 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10450 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10451 "MOV [ESP+0],0\n\t"
10452 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10453
10454 "ADD ESP,8"
10455 %}
10456 ins_encode( push_stack_temp_qword,
10457 Opcode(0xD9), Opcode(0xEA), // fldl2e
10458 Opcode(0xDE), Opcode(0xC9), // fmulp
10459 pow_exp_core_encoding,
10460 pop_stack_temp_qword);
10461 ins_pipe( pipe_slow );
10462 %}
10463
10464 instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10465 predicate (UseSSE>=2);
10466 match(Set dst (ExpD src));
10467 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10468 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t"
10469 "MOVSD [ESP],$src\n\t"
10470 "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10471 "FMULP \t\t\t# Q=X*log2(e) X\n\t"
10472
10473 "FDUP \t\t\t# Q Q\n\t"
10474 "FRNDINT\t\t\t# int(Q) Q\n\t"
10475 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10476 "FISTP dword [ESP]\n\t"
10477 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10478 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10479 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10480 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10481 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10482 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10483 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10484 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10485 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10486 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10487 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10488 "MOV [ESP+0],0\n\t"
10489 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10490
10491 "FST_D [ESP]\n\t"
10492 "MOVSD $dst,[ESP]\n\t"
10493 "ADD ESP,8"
10494 %}
10495 ins_encode( Push_SrcXD(src),
10496 Opcode(0xD9), Opcode(0xEA), // fldl2e
10497 Opcode(0xDE), Opcode(0xC9), // fmulp
10498 pow_exp_core_encoding,
10499 Push_ResultXD(dst) );
10500 ins_pipe( pipe_slow );
10501 %}
10502
10503
10504
10505 instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
10506 predicate (UseSSE<=1);
10507 // The source Double operand on FPU stack
10508 match(Set dst (Log10D src));
10509 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10510 // fxch ; swap ST(0) with ST(1)
10511 // fyl2x ; compute log_10(2) * log_2(x)
10512 format %{ "FLDLG2 \t\t\t#Log10\n\t"
10513 "FXCH \n\t"
10514 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
10515 %}
10516 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
10517 Opcode(0xD9), Opcode(0xC9), // fxch
10518 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10519
10520 ins_pipe( pipe_slow );
10521 %}
10522
10523 instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10524 predicate (UseSSE>=2);
10525 effect(KILL cr);
10526 match(Set dst (Log10D src));
10527 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10528 // fyl2x ; compute log_10(2) * log_2(x)
10529 format %{ "FLDLG2 \t\t\t#Log10\n\t"
10530 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
10531 %}
10532 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
10533 Push_SrcXD(src),
10534 Opcode(0xD9), Opcode(0xF1), // fyl2x
10535 Push_ResultXD(dst));
10536
10537 ins_pipe( pipe_slow );
10538 %}
10539
10540 instruct logD_reg(regDPR1 dst, regDPR1 src) %{
10541 predicate (UseSSE<=1);
10542 // The source Double operand on FPU stack
10543 match(Set dst (LogD src));
10544 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10545 // fxch ; swap ST(0) with ST(1)
10546 // fyl2x ; compute log_e(2) * log_2(x)
10547 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10548 "FXCH \n\t"
10549 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
10550 %}
10551 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
10552 Opcode(0xD9), Opcode(0xC9), // fxch
10553 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10554
10555 ins_pipe( pipe_slow );
10556 %}
10557
10558 instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10559 predicate (UseSSE>=2);
10560 effect(KILL cr);
10561 // The source and result Double operands in XMM registers
10562 match(Set dst (LogD src));
10563 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10564 // fyl2x ; compute log_e(2) * log_2(x)
10565 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10566 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
10567 %}
10568 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
10569 Push_SrcXD(src),
10570 Opcode(0xD9), Opcode(0xF1), // fyl2x
10571 Push_ResultXD(dst));
10572 ins_pipe( pipe_slow );
10573 %}
10574
10575 //-------------Float Instructions-------------------------------
10576 // Float Math
10577
10578 // Code for float compare:
10579 // fcompp();
10580 // fwait(); fnstsw_ax();
10581 // sahf();
10582 // movl(dst, unordered_result);
10583 // jcc(Assembler::parity, exit);
10584 // movl(dst, less_result);
10585 // jcc(Assembler::below, exit);
10586 // movl(dst, equal_result);
10587 // jcc(Assembler::equal, exit);
10588 // movl(dst, greater_result);
10589 // exit:
10590
10591 // P6 version of float compare, sets condition codes in EFLAGS
10592 instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10593 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10594 match(Set cr (CmpF src1 src2));
10595 effect(KILL rax);
10596 ins_cost(150);
10597 format %{ "FLD $src1\n\t"
10598 "FUCOMIP ST,$src2 // P6 instruction\n\t"
10599 "JNP exit\n\t"
10600 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t"
10601 "SAHF\n"
10602 "exit:\tNOP // avoid branch to branch" %}
10603 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10604 ins_encode( Push_Reg_D(src1),
10605 OpcP, RegOpc(src2),
10606 cmpF_P6_fixup );
10607 ins_pipe( pipe_slow );
10608 %}
10609
10610 instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
10611 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10612 match(Set cr (CmpF src1 src2));
10613 ins_cost(100);
10614 format %{ "FLD $src1\n\t"
10615 "FUCOMIP ST,$src2 // P6 instruction" %}
10616 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10617 ins_encode( Push_Reg_D(src1),
10618 OpcP, RegOpc(src2));
10619 ins_pipe( pipe_slow );
10620 %}
10621
10622
10623 // Compare & branch
10624 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10625 predicate(UseSSE == 0);
10626 match(Set cr (CmpF src1 src2));
10627 effect(KILL rax);
10628 ins_cost(200);
10629 format %{ "FLD $src1\n\t"
10630 "FCOMp $src2\n\t"
10631 "FNSTSW AX\n\t"
10632 "TEST AX,0x400\n\t"
10633 "JZ,s flags\n\t"
10634 "MOV AH,1\t# unordered treat as LT\n"
10635 "flags:\tSAHF" %}
10636 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10637 ins_encode( Push_Reg_D(src1),
10638 OpcP, RegOpc(src2),
10639 fpu_flags);
10640 ins_pipe( pipe_slow );
10641 %}
10642
10643 // Compare vs zero into -1,0,1
10644 instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
10645 predicate(UseSSE == 0);
10646 match(Set dst (CmpF3 src1 zero));
10647 effect(KILL cr, KILL rax);
10648 ins_cost(280);
10649 format %{ "FTSTF $dst,$src1" %}
10650 opcode(0xE4, 0xD9);
10651 ins_encode( Push_Reg_D(src1),
10652 OpcS, OpcP, PopFPU,
10653 CmpF_Result(dst));
10654 ins_pipe( pipe_slow );
10655 %}
10656
10657 // Compare into -1,0,1
10658 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
10659 predicate(UseSSE == 0);
10660 match(Set dst (CmpF3 src1 src2));
10661 effect(KILL cr, KILL rax);
10662 ins_cost(300);
10663 format %{ "FCMPF $dst,$src1,$src2" %}
10664 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10665 ins_encode( Push_Reg_D(src1),
10666 OpcP, RegOpc(src2),
10667 CmpF_Result(dst));
10668 ins_pipe( pipe_slow );
10669 %}
10670
10671 // float compare and set condition codes in EFLAGS by XMM regs
10672 instruct cmpX_cc(eFlagsRegU cr, regX src1, regX src2) %{
10673 predicate(UseSSE>=1);
10674 match(Set cr (CmpF src1 src2));
10675 ins_cost(145);
10676 format %{ "UCOMISS $src1,$src2\n\t"
10677 "JNP,s exit\n\t"
10678 "PUSHF\t# saw NaN, set CF\n\t"
10679 "AND [rsp], #0xffffff2b\n\t"
10680 "POPF\n"
10681 "exit:" %}
10682 ins_encode %{
10683 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10684 emit_cmpfp_fixup(_masm);
10685 %}
10686 ins_pipe( pipe_slow );
10687 %}
10688
10689 instruct cmpX_ccCF(eFlagsRegUCF cr, regX src1, regX src2) %{
10690 predicate(UseSSE>=1);
10691 match(Set cr (CmpF src1 src2));
10692 ins_cost(100);
10693 format %{ "UCOMISS $src1,$src2" %}
10694 ins_encode %{
10695 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10696 %}
10697 ins_pipe( pipe_slow );
10698 %}
10699
10700 // float compare and set condition codes in EFLAGS by XMM regs
10701 instruct cmpX_ccmem(eFlagsRegU cr, regX src1, memory src2) %{
10702 predicate(UseSSE>=1);
10703 match(Set cr (CmpF src1 (LoadF src2)));
10704 ins_cost(165);
10705 format %{ "UCOMISS $src1,$src2\n\t"
10706 "JNP,s exit\n\t"
10707 "PUSHF\t# saw NaN, set CF\n\t"
10708 "AND [rsp], #0xffffff2b\n\t"
10709 "POPF\n"
10710 "exit:" %}
10711 ins_encode %{
10712 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10713 emit_cmpfp_fixup(_masm);
10714 %}
10715 ins_pipe( pipe_slow );
10716 %}
10717
10718 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX src1, memory src2) %{
10719 predicate(UseSSE>=1);
10720 match(Set cr (CmpF src1 (LoadF src2)));
10721 ins_cost(100);
10722 format %{ "UCOMISS $src1,$src2" %}
10723 ins_encode %{
10724 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10725 %}
10726 ins_pipe( pipe_slow );
10727 %}
10728
10729 // Compare into -1,0,1 in XMM
10730 instruct cmpX_reg(xRegI dst, regX src1, regX src2, eFlagsReg cr) %{
10731 predicate(UseSSE>=1);
10732 match(Set dst (CmpF3 src1 src2));
10733 effect(KILL cr);
10734 ins_cost(255);
10735 format %{ "UCOMISS $src1, $src2\n\t"
10736 "MOV $dst, #-1\n\t"
10737 "JP,s done\n\t"
10738 "JB,s done\n\t"
10739 "SETNE $dst\n\t"
10740 "MOVZB $dst, $dst\n"
10741 "done:" %}
10742 ins_encode %{
10743 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10744 emit_cmpfp3(_masm, $dst$$Register);
10745 %}
10746 ins_pipe( pipe_slow );
10747 %}
10748
10749 // Compare into -1,0,1 in XMM and memory
10750 instruct cmpX_regmem(xRegI dst, regX src1, memory src2, eFlagsReg cr) %{
10751 predicate(UseSSE>=1);
10752 match(Set dst (CmpF3 src1 (LoadF src2)));
10753 effect(KILL cr);
10754 ins_cost(275);
10755 format %{ "UCOMISS $src1, $src2\n\t"
10756 "MOV $dst, #-1\n\t"
10757 "JP,s done\n\t"
10758 "JB,s done\n\t"
10759 "SETNE $dst\n\t"
10760 "MOVZB $dst, $dst\n"
10761 "done:" %}
10762 ins_encode %{
10763 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10764 emit_cmpfp3(_masm, $dst$$Register);
10765 %}
10766 ins_pipe( pipe_slow );
10767 %}
10768
10769 // Spill to obtain 24-bit precision
10770 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
10771 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10772 match(Set dst (SubF src1 src2));
10773
10774 format %{ "FSUB $dst,$src1 - $src2" %}
10775 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10776 ins_encode( Push_Reg_F(src1),
10777 OpcReg_F(src2),
10778 Pop_Mem_F(dst) );
10779 ins_pipe( fpu_mem_reg_reg );
10780 %}
10781 //
10782 // This instruction does not round to 24-bits
10783 instruct subF_reg(regF dst, regF src) %{
10784 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10785 match(Set dst (SubF dst src));
10786
10787 format %{ "FSUB $dst,$src" %}
10788 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10789 ins_encode( Push_Reg_F(src),
10790 OpcP, RegOpc(dst) );
10791 ins_pipe( fpu_reg_reg );
10792 %}
10793
10794 // Spill to obtain 24-bit precision
10795 instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
10796 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10797 match(Set dst (AddF src1 src2));
10798
10799 format %{ "FADD $dst,$src1,$src2" %}
10800 opcode(0xD8, 0x0); /* D8 C0+i */
10801 ins_encode( Push_Reg_F(src2),
10802 OpcReg_F(src1),
10803 Pop_Mem_F(dst) );
10804 ins_pipe( fpu_mem_reg_reg );
10805 %}
10806 //
10807 // This instruction does not round to 24-bits
10808 instruct addF_reg(regF dst, regF src) %{
10809 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10810 match(Set dst (AddF dst src));
10811
10812 format %{ "FLD $src\n\t"
10813 "FADDp $dst,ST" %}
10814 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10815 ins_encode( Push_Reg_F(src),
10816 OpcP, RegOpc(dst) );
10817 ins_pipe( fpu_reg_reg );
10818 %}
10819
10820 // Add two single precision floating point values in xmm
10821 instruct addX_reg(regX dst, regX src) %{
10822 predicate(UseSSE>=1);
10823 match(Set dst (AddF dst src));
10824 format %{ "ADDSS $dst,$src" %}
10825 ins_encode %{
10826 __ addss($dst$$XMMRegister, $src$$XMMRegister);
10827 %}
10828 ins_pipe( pipe_slow );
10829 %}
10830
10831 instruct addX_imm(regX dst, immXF con) %{
10832 predicate(UseSSE>=1);
10833 match(Set dst (AddF dst con));
10834 format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10835 ins_encode %{
10836 __ addss($dst$$XMMRegister, $constantaddress($con));
10837 %}
10838 ins_pipe(pipe_slow);
10839 %}
10840
10841 instruct addX_mem(regX dst, memory mem) %{
10842 predicate(UseSSE>=1);
10843 match(Set dst (AddF dst (LoadF mem)));
10844 format %{ "ADDSS $dst,$mem" %}
10845 ins_encode %{
10846 __ addss($dst$$XMMRegister, $mem$$Address);
10847 %}
10848 ins_pipe( pipe_slow );
10849 %}
10850
10851 // Subtract two single precision floating point values in xmm
10852 instruct subX_reg(regX dst, regX src) %{
10853 predicate(UseSSE>=1);
10854 match(Set dst (SubF dst src));
10855 ins_cost(150);
10856 format %{ "SUBSS $dst,$src" %}
10857 ins_encode %{
10858 __ subss($dst$$XMMRegister, $src$$XMMRegister);
10859 %}
10860 ins_pipe( pipe_slow );
10861 %}
10862
10863 instruct subX_imm(regX dst, immXF con) %{
10864 predicate(UseSSE>=1);
10865 match(Set dst (SubF dst con));
10866 ins_cost(150);
10867 format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10868 ins_encode %{
10869 __ subss($dst$$XMMRegister, $constantaddress($con));
10870 %}
10871 ins_pipe(pipe_slow);
10872 %}
10873
10874 instruct subX_mem(regX dst, memory mem) %{
10875 predicate(UseSSE>=1);
10876 match(Set dst (SubF dst (LoadF mem)));
10877 ins_cost(150);
10878 format %{ "SUBSS $dst,$mem" %}
10879 ins_encode %{
10880 __ subss($dst$$XMMRegister, $mem$$Address);
10881 %}
10882 ins_pipe( pipe_slow );
10883 %}
10884
10885 // Multiply two single precision floating point values in xmm
10886 instruct mulX_reg(regX dst, regX src) %{
10887 predicate(UseSSE>=1);
10888 match(Set dst (MulF dst src));
10889 format %{ "MULSS $dst,$src" %}
10890 ins_encode %{
10891 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
10892 %}
10893 ins_pipe( pipe_slow );
10894 %}
10895
10896 instruct mulX_imm(regX dst, immXF con) %{
10897 predicate(UseSSE>=1);
10898 match(Set dst (MulF dst con));
10899 format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10900 ins_encode %{
10901 __ mulss($dst$$XMMRegister, $constantaddress($con));
10902 %}
10903 ins_pipe(pipe_slow);
10904 %}
10905
10906 instruct mulX_mem(regX dst, memory mem) %{
10907 predicate(UseSSE>=1);
10908 match(Set dst (MulF dst (LoadF mem)));
10909 format %{ "MULSS $dst,$mem" %}
10910 ins_encode %{
10911 __ mulss($dst$$XMMRegister, $mem$$Address);
10912 %}
10913 ins_pipe( pipe_slow );
10914 %}
10915
10916 // Divide two single precision floating point values in xmm
10917 instruct divX_reg(regX dst, regX src) %{
10918 predicate(UseSSE>=1);
10919 match(Set dst (DivF dst src));
10920 format %{ "DIVSS $dst,$src" %}
10921 ins_encode %{
10922 __ divss($dst$$XMMRegister, $src$$XMMRegister);
10923 %}
10924 ins_pipe( pipe_slow );
10925 %}
10926
10927 instruct divX_imm(regX dst, immXF con) %{
10928 predicate(UseSSE>=1);
10929 match(Set dst (DivF dst con));
10930 format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10931 ins_encode %{
10932 __ divss($dst$$XMMRegister, $constantaddress($con));
10933 %}
10934 ins_pipe(pipe_slow);
10935 %}
10936
10937 instruct divX_mem(regX dst, memory mem) %{
10938 predicate(UseSSE>=1);
10939 match(Set dst (DivF dst (LoadF mem)));
10940 format %{ "DIVSS $dst,$mem" %}
10941 ins_encode %{
10942 __ divss($dst$$XMMRegister, $mem$$Address);
10943 %}
10944 ins_pipe( pipe_slow );
10945 %}
10946
10947 // Get the square root of a single precision floating point values in xmm
10948 instruct sqrtX_reg(regX dst, regX src) %{
10949 predicate(UseSSE>=1);
10950 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10951 ins_cost(150);
10952 format %{ "SQRTSS $dst,$src" %}
10953 ins_encode %{
10954 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
10955 %}
10956 ins_pipe( pipe_slow );
10957 %}
10958
10959 instruct sqrtX_mem(regX dst, memory mem) %{
10960 predicate(UseSSE>=1);
10961 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
10962 ins_cost(150);
10963 format %{ "SQRTSS $dst,$mem" %}
10964 ins_encode %{
10965 __ sqrtss($dst$$XMMRegister, $mem$$Address);
10966 %}
10967 ins_pipe( pipe_slow );
10968 %}
10969
10970 // Get the square root of a double precision floating point values in xmm
10971 instruct sqrtXD_reg(regXD dst, regXD src) %{
10972 predicate(UseSSE>=2);
10973 match(Set dst (SqrtD src));
10974 ins_cost(150);
10975 format %{ "SQRTSD $dst,$src" %}
10976 ins_encode %{
10977 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
10978 %}
10979 ins_pipe( pipe_slow );
10980 %}
10981
10982 instruct sqrtXD_mem(regXD dst, memory mem) %{
10983 predicate(UseSSE>=2);
10984 match(Set dst (SqrtD (LoadD mem)));
10985 ins_cost(150);
10986 format %{ "SQRTSD $dst,$mem" %}
10987 ins_encode %{
10988 __ sqrtsd($dst$$XMMRegister, $mem$$Address);
10989 %}
10990 ins_pipe( pipe_slow );
10991 %}
10992
10993 instruct absF_reg(regFPR1 dst, regFPR1 src) %{
10994 predicate(UseSSE==0);
10995 match(Set dst (AbsF src));
10996 ins_cost(100);
10997 format %{ "FABS" %}
10998 opcode(0xE1, 0xD9);
10999 ins_encode( OpcS, OpcP );
11000 ins_pipe( fpu_reg_reg );
11001 %}
11002
11003 instruct absX_reg(regX dst ) %{
11004 predicate(UseSSE>=1);
11005 match(Set dst (AbsF dst));
11006 ins_cost(150);
11007 format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11008 ins_encode %{
11009 __ andps($dst$$XMMRegister,
11010 ExternalAddress((address)float_signmask_pool));
11011 %}
11012 ins_pipe( pipe_slow );
11013 %}
11014
11015 instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11016 predicate(UseSSE==0);
11017 match(Set dst (NegF src));
11018 ins_cost(100);
11019 format %{ "FCHS" %}
11020 opcode(0xE0, 0xD9);
11021 ins_encode( OpcS, OpcP );
11022 ins_pipe( fpu_reg_reg );
11023 %}
11024
11025 instruct negX_reg( regX dst ) %{
11026 predicate(UseSSE>=1);
11027 match(Set dst (NegF dst));
11028 ins_cost(150);
11029 format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %}
11030 ins_encode %{
11031 __ xorps($dst$$XMMRegister,
11032 ExternalAddress((address)float_signflip_pool));
11033 %}
11034 ins_pipe( pipe_slow );
11035 %}
11036
11037 // Cisc-alternate to addF_reg
11038 // Spill to obtain 24-bit precision
11039 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11040 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11041 match(Set dst (AddF src1 (LoadF src2)));
11042
11043 format %{ "FLD $src2\n\t"
11044 "FADD ST,$src1\n\t"
11045 "FSTP_S $dst" %}
11046 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11047 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11048 OpcReg_F(src1),
11049 Pop_Mem_F(dst) );
11050 ins_pipe( fpu_mem_reg_mem );
11051 %}
11052 //
11053 // Cisc-alternate to addF_reg
11054 // This instruction does not round to 24-bits
11055 instruct addF_reg_mem(regF dst, memory src) %{
11056 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11057 match(Set dst (AddF dst (LoadF src)));
11058
11059 format %{ "FADD $dst,$src" %}
11060 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */
11061 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
11062 OpcP, RegOpc(dst) );
11063 ins_pipe( fpu_reg_mem );
11064 %}
11065
11066 // // Following two instructions for _222_mpegaudio
11067 // Spill to obtain 24-bit precision
11068 instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
11069 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11070 match(Set dst (AddF src1 src2));
11071
11072 format %{ "FADD $dst,$src1,$src2" %}
11073 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11074 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
11075 OpcReg_F(src2),
11076 Pop_Mem_F(dst) );
11077 ins_pipe( fpu_mem_reg_mem );
11078 %}
11079
11080 // Cisc-spill variant
11081 // Spill to obtain 24-bit precision
11082 instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
11083 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11084 match(Set dst (AddF src1 (LoadF src2)));
11085
11086 format %{ "FADD $dst,$src1,$src2 cisc" %}
11087 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11088 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11089 set_instruction_start,
11090 OpcP, RMopc_Mem(secondary,src1),
11091 Pop_Mem_F(dst) );
11092 ins_pipe( fpu_mem_mem_mem );
11093 %}
11094
11095 // Spill to obtain 24-bit precision
11096 instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11097 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11098 match(Set dst (AddF src1 src2));
11099
11100 format %{ "FADD $dst,$src1,$src2" %}
11101 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */
11102 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11103 set_instruction_start,
11104 OpcP, RMopc_Mem(secondary,src1),
11105 Pop_Mem_F(dst) );
11106 ins_pipe( fpu_mem_mem_mem );
11107 %}
11108
11109
11110 // Spill to obtain 24-bit precision
11111 instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11112 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11113 match(Set dst (AddF src con));
11114 format %{ "FLD $src\n\t"
11115 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11116 "FSTP_S $dst" %}
11117 ins_encode %{
11118 __ fld_s($src$$reg - 1); // FLD ST(i-1)
11119 __ fadd_s($constantaddress($con));
11120 __ fstp_s(Address(rsp, $dst$$disp));
11121 %}
11122 ins_pipe(fpu_mem_reg_con);
11123 %}
11124 //
11125 // This instruction does not round to 24-bits
11126 instruct addF_reg_imm(regF dst, regF src, immF con) %{
11127 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11128 match(Set dst (AddF src con));
11129 format %{ "FLD $src\n\t"
11130 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11131 "FSTP $dst" %}
11132 ins_encode %{
11133 __ fld_s($src$$reg - 1); // FLD ST(i-1)
11134 __ fadd_s($constantaddress($con));
11135 __ fstp_d($dst$$reg);
11136 %}
11137 ins_pipe(fpu_reg_reg_con);
11138 %}
11139
11140 // Spill to obtain 24-bit precision
11141 instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
11142 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11143 match(Set dst (MulF src1 src2));
11144
11145 format %{ "FLD $src1\n\t"
11146 "FMUL $src2\n\t"
11147 "FSTP_S $dst" %}
11148 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
11149 ins_encode( Push_Reg_F(src1),
11150 OpcReg_F(src2),
11151 Pop_Mem_F(dst) );
11152 ins_pipe( fpu_mem_reg_reg );
11153 %}
11154 //
11155 // This instruction does not round to 24-bits
11156 instruct mulF_reg(regF dst, regF src1, regF src2) %{
11157 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11158 match(Set dst (MulF src1 src2));
11159
11160 format %{ "FLD $src1\n\t"
11161 "FMUL $src2\n\t"
11162 "FSTP_S $dst" %}
11163 opcode(0xD8, 0x1); /* D8 C8+i */
11164 ins_encode( Push_Reg_F(src2),
11165 OpcReg_F(src1),
11166 Pop_Reg_F(dst) );
11167 ins_pipe( fpu_reg_reg_reg );
11168 %}
11169
11170
11171 // Spill to obtain 24-bit precision
11172 // Cisc-alternate to reg-reg multiply
11173 instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11174 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11175 match(Set dst (MulF src1 (LoadF src2)));
11176
11177 format %{ "FLD_S $src2\n\t"
11178 "FMUL $src1\n\t"
11179 "FSTP_S $dst" %}
11180 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
11181 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11182 OpcReg_F(src1),
11183 Pop_Mem_F(dst) );
11184 ins_pipe( fpu_mem_reg_mem );
11185 %}
11186 //
11187 // This instruction does not round to 24-bits
11188 // Cisc-alternate to reg-reg multiply
11189 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
11190 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11191 match(Set dst (MulF src1 (LoadF src2)));
11192
11193 format %{ "FMUL $dst,$src1,$src2" %}
11194 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
11195 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11196 OpcReg_F(src1),
11197 Pop_Reg_F(dst) );
11198 ins_pipe( fpu_reg_reg_mem );
11199 %}
11200
11201 // Spill to obtain 24-bit precision
11202 instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11203 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11204 match(Set dst (MulF src1 src2));
11205
11206 format %{ "FMUL $dst,$src1,$src2" %}
11207 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */
11208 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11209 set_instruction_start,
11210 OpcP, RMopc_Mem(secondary,src1),
11211 Pop_Mem_F(dst) );
11212 ins_pipe( fpu_mem_mem_mem );
11213 %}
11214
11215 // Spill to obtain 24-bit precision
11216 instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11217 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11218 match(Set dst (MulF src con));
11219
11220 format %{ "FLD $src\n\t"
11221 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11222 "FSTP_S $dst" %}
11223 ins_encode %{
11224 __ fld_s($src$$reg - 1); // FLD ST(i-1)
11225 __ fmul_s($constantaddress($con));
11226 __ fstp_s(Address(rsp, $dst$$disp));
11227 %}
11228 ins_pipe(fpu_mem_reg_con);
11229 %}
11230 //
11231 // This instruction does not round to 24-bits
11232 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
11233 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11234 match(Set dst (MulF src con));
11235
11236 format %{ "FLD $src\n\t"
11237 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11238 "FSTP $dst" %}
11239 ins_encode %{
11240 __ fld_s($src$$reg - 1); // FLD ST(i-1)
11241 __ fmul_s($constantaddress($con));
11242 __ fstp_d($dst$$reg);
11243 %}
11244 ins_pipe(fpu_reg_reg_con);
11245 %}
11246
11247
11248 //
11249 // MACRO1 -- subsume unshared load into mulF
11250 // This instruction does not round to 24-bits
11251 instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
11252 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11253 match(Set dst (MulF (LoadF mem1) src));
11254
11255 format %{ "FLD $mem1 ===MACRO1===\n\t"
11256 "FMUL ST,$src\n\t"
11257 "FSTP $dst" %}
11258 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
11259 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
11260 OpcReg_F(src),
11261 Pop_Reg_F(dst) );
11262 ins_pipe( fpu_reg_reg_mem );
11263 %}
11264 //
11265 // MACRO2 -- addF a mulF which subsumed an unshared load
11266 // This instruction does not round to 24-bits
11267 instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
11268 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11269 match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
11270 ins_cost(95);
11271
11272 format %{ "FLD $mem1 ===MACRO2===\n\t"
11273 "FMUL ST,$src1 subsume mulF left load\n\t"
11274 "FADD ST,$src2\n\t"
11275 "FSTP $dst" %}
11276 opcode(0xD9); /* LoadF D9 /0 */
11277 ins_encode( OpcP, RMopc_Mem(0x00,mem1),
11278 FMul_ST_reg(src1),
11279 FAdd_ST_reg(src2),
11280 Pop_Reg_F(dst) );
11281 ins_pipe( fpu_reg_mem_reg_reg );
11282 %}
11283
11284 // MACRO3 -- addF a mulF
11285 // This instruction does not round to 24-bits. It is a '2-address'
11286 // instruction in that the result goes back to src2. This eliminates
11287 // a move from the macro; possibly the register allocator will have
11288 // to add it back (and maybe not).
11289 instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
11290 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11291 match(Set src2 (AddF (MulF src0 src1) src2));
11292
11293 format %{ "FLD $src0 ===MACRO3===\n\t"
11294 "FMUL ST,$src1\n\t"
11295 "FADDP $src2,ST" %}
11296 opcode(0xD9); /* LoadF D9 /0 */
11297 ins_encode( Push_Reg_F(src0),
11298 FMul_ST_reg(src1),
11299 FAddP_reg_ST(src2) );
11300 ins_pipe( fpu_reg_reg_reg );
11301 %}
11302
11303 // MACRO4 -- divF subF
11304 // This instruction does not round to 24-bits
11305 instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
11306 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11307 match(Set dst (DivF (SubF src2 src1) src3));
11308
11309 format %{ "FLD $src2 ===MACRO4===\n\t"
11310 "FSUB ST,$src1\n\t"
11311 "FDIV ST,$src3\n\t"
11312 "FSTP $dst" %}
11313 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11314 ins_encode( Push_Reg_F(src2),
11315 subF_divF_encode(src1,src3),
11316 Pop_Reg_F(dst) );
11317 ins_pipe( fpu_reg_reg_reg_reg );
11318 %}
11319
11320 // Spill to obtain 24-bit precision
11321 instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
11322 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11323 match(Set dst (DivF src1 src2));
11324
11325 format %{ "FDIV $dst,$src1,$src2" %}
11326 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
11327 ins_encode( Push_Reg_F(src1),
11328 OpcReg_F(src2),
11329 Pop_Mem_F(dst) );
11330 ins_pipe( fpu_mem_reg_reg );
11331 %}
11332 //
11333 // This instruction does not round to 24-bits
11334 instruct divF_reg(regF dst, regF src) %{
11335 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11336 match(Set dst (DivF dst src));
11337
11338 format %{ "FDIV $dst,$src" %}
11339 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11340 ins_encode( Push_Reg_F(src),
11341 OpcP, RegOpc(dst) );
11342 ins_pipe( fpu_reg_reg );
11343 %}
11344
11345
11346 // Spill to obtain 24-bit precision
11347 instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11348 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11349 match(Set dst (ModF src1 src2));
11350 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11351
11352 format %{ "FMOD $dst,$src1,$src2" %}
11353 ins_encode( Push_Reg_Mod_D(src1, src2),
11354 emitModD(),
11355 Push_Result_Mod_D(src2),
11356 Pop_Mem_F(dst));
11357 ins_pipe( pipe_slow );
11358 %}
11359 //
11360 // This instruction does not round to 24-bits
11361 instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
11362 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11363 match(Set dst (ModF dst src));
11364 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11365
11366 format %{ "FMOD $dst,$src" %}
11367 ins_encode(Push_Reg_Mod_D(dst, src),
11368 emitModD(),
11369 Push_Result_Mod_D(src),
11370 Pop_Reg_F(dst));
11371 ins_pipe( pipe_slow );
11372 %}
11373
11374 instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
11375 predicate(UseSSE>=1);
11376 match(Set dst (ModF src0 src1));
11377 effect(KILL rax, KILL cr);
11378 format %{ "SUB ESP,4\t # FMOD\n"
11379 "\tMOVSS [ESP+0],$src1\n"
11380 "\tFLD_S [ESP+0]\n"
11381 "\tMOVSS [ESP+0],$src0\n"
11382 "\tFLD_S [ESP+0]\n"
11383 "loop:\tFPREM\n"
11384 "\tFWAIT\n"
11385 "\tFNSTSW AX\n"
11386 "\tSAHF\n"
11387 "\tJP loop\n"
11388 "\tFSTP_S [ESP+0]\n"
11389 "\tMOVSS $dst,[ESP+0]\n"
11390 "\tADD ESP,4\n"
11391 "\tFSTP ST0\t # Restore FPU Stack"
11392 %}
11393 ins_cost(250);
11394 ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
11395 ins_pipe( pipe_slow );
11396 %}
11397
11398
11399 //----------Arithmetic Conversion Instructions---------------------------------
11400 // The conversions operations are all Alpha sorted. Please keep it that way!
11401
11402 instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
11403 predicate(UseSSE==0);
11404 match(Set dst (RoundFloat src));
11405 ins_cost(125);
11406 format %{ "FST_S $dst,$src\t# F-round" %}
11407 ins_encode( Pop_Mem_Reg_F(dst, src) );
11408 ins_pipe( fpu_mem_reg );
11409 %}
11410
11411 instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
11412 predicate(UseSSE<=1);
11413 match(Set dst (RoundDouble src));
11414 ins_cost(125);
11415 format %{ "FST_D $dst,$src\t# D-round" %}
11416 ins_encode( Pop_Mem_Reg_D(dst, src) );
11417 ins_pipe( fpu_mem_reg );
11418 %}
11419
11420 // Force rounding to 24-bit precision and 6-bit exponent
11421 instruct convD2F_reg(stackSlotF dst, regD src) %{
11422 predicate(UseSSE==0);
11423 match(Set dst (ConvD2F src));
11424 format %{ "FST_S $dst,$src\t# F-round" %}
11425 expand %{
11426 roundFloat_mem_reg(dst,src);
11427 %}
11428 %}
11429
11430 // Force rounding to 24-bit precision and 6-bit exponent
11431 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11432 predicate(UseSSE==1);
11433 match(Set dst (ConvD2F src));
11434 effect( KILL cr );
11435 format %{ "SUB ESP,4\n\t"
11436 "FST_S [ESP],$src\t# F-round\n\t"
11437 "MOVSS $dst,[ESP]\n\t"
11438 "ADD ESP,4" %}
11439 ins_encode %{
11440 __ subptr(rsp, 4);
11441 if ($src$$reg != FPR1L_enc) {
11442 __ fld_s($src$$reg-1);
11443 __ fstp_s(Address(rsp, 0));
11444 } else {
11445 __ fst_s(Address(rsp, 0));
11446 }
11447 __ movflt($dst$$XMMRegister, Address(rsp, 0));
11448 __ addptr(rsp, 4);
11449 %}
11450 ins_pipe( pipe_slow );
11451 %}
11452
11453 // Force rounding double precision to single precision
11454 instruct convXD2X_reg(regX dst, regXD src) %{
11455 predicate(UseSSE>=2);
11456 match(Set dst (ConvD2F src));
11457 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11458 ins_encode %{
11459 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
11460 %}
11461 ins_pipe( pipe_slow );
11462 %}
11463
11464 instruct convF2D_reg_reg(regD dst, regF src) %{
11465 predicate(UseSSE==0);
11466 match(Set dst (ConvF2D src));
11467 format %{ "FST_S $dst,$src\t# D-round" %}
11468 ins_encode( Pop_Reg_Reg_D(dst, src));
11469 ins_pipe( fpu_reg_reg );
11470 %}
11471
11472 instruct convF2D_reg(stackSlotD dst, regF src) %{
11473 predicate(UseSSE==1);
11474 match(Set dst (ConvF2D src));
11475 format %{ "FST_D $dst,$src\t# D-round" %}
11476 expand %{
11477 roundDouble_mem_reg(dst,src);
11478 %}
11479 %}
11480
11481 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11482 predicate(UseSSE==1);
11483 match(Set dst (ConvF2D src));
11484 effect( KILL cr );
11485 format %{ "SUB ESP,4\n\t"
11486 "MOVSS [ESP] $src\n\t"
11487 "FLD_S [ESP]\n\t"
11488 "ADD ESP,4\n\t"
11489 "FSTP $dst\t# D-round" %}
11490 ins_encode %{
11491 __ subptr(rsp, 4);
11492 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11493 __ fld_s(Address(rsp, 0));
11494 __ addptr(rsp, 4);
11495 __ fstp_d($dst$$reg);
11496 %}
11497 ins_pipe( pipe_slow );
11498 %}
11499
11500 instruct convX2XD_reg(regXD dst, regX src) %{
11501 predicate(UseSSE>=2);
11502 match(Set dst (ConvF2D src));
11503 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11504 ins_encode %{
11505 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
11506 %}
11507 ins_pipe( pipe_slow );
11508 %}
11509
11510 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11511 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11512 predicate(UseSSE<=1);
11513 match(Set dst (ConvD2I src));
11514 effect( KILL tmp, KILL cr );
11515 format %{ "FLD $src\t# Convert double to int \n\t"
11516 "FLDCW trunc mode\n\t"
11517 "SUB ESP,4\n\t"
11518 "FISTp [ESP + #0]\n\t"
11519 "FLDCW std/24-bit mode\n\t"
11520 "POP EAX\n\t"
11521 "CMP EAX,0x80000000\n\t"
11522 "JNE,s fast\n\t"
11523 "FLD_D $src\n\t"
11524 "CALL d2i_wrapper\n"
11525 "fast:" %}
11526 ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11527 ins_pipe( pipe_slow );
11528 %}
11529
11530 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11531 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11532 predicate(UseSSE>=2);
11533 match(Set dst (ConvD2I src));
11534 effect( KILL tmp, KILL cr );
11535 format %{ "CVTTSD2SI $dst, $src\n\t"
11536 "CMP $dst,0x80000000\n\t"
11537 "JNE,s fast\n\t"
11538 "SUB ESP, 8\n\t"
11539 "MOVSD [ESP], $src\n\t"
11540 "FLD_D [ESP]\n\t"
11541 "ADD ESP, 8\n\t"
11542 "CALL d2i_wrapper\n"
11543 "fast:" %}
11544 ins_encode %{
11545 Label fast;
11546 __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
11547 __ cmpl($dst$$Register, 0x80000000);
11548 __ jccb(Assembler::notEqual, fast);
11549 __ subptr(rsp, 8);
11550 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11551 __ fld_d(Address(rsp, 0));
11552 __ addptr(rsp, 8);
11553 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11554 __ bind(fast);
11555 %}
11556 ins_pipe( pipe_slow );
11557 %}
11558
11559 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11560 predicate(UseSSE<=1);
11561 match(Set dst (ConvD2L src));
11562 effect( KILL cr );
11563 format %{ "FLD $src\t# Convert double to long\n\t"
11564 "FLDCW trunc mode\n\t"
11565 "SUB ESP,8\n\t"
11566 "FISTp [ESP + #0]\n\t"
11567 "FLDCW std/24-bit mode\n\t"
11568 "POP EAX\n\t"
11569 "POP EDX\n\t"
11570 "CMP EDX,0x80000000\n\t"
11571 "JNE,s fast\n\t"
11572 "TEST EAX,EAX\n\t"
11573 "JNE,s fast\n\t"
11574 "FLD $src\n\t"
11575 "CALL d2l_wrapper\n"
11576 "fast:" %}
11577 ins_encode( Push_Reg_D(src), D2L_encoding(src) );
11578 ins_pipe( pipe_slow );
11579 %}
11580
11581 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11582 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11583 predicate (UseSSE>=2);
11584 match(Set dst (ConvD2L src));
11585 effect( KILL cr );
11586 format %{ "SUB ESP,8\t# Convert double to long\n\t"
11587 "MOVSD [ESP],$src\n\t"
11588 "FLD_D [ESP]\n\t"
11589 "FLDCW trunc mode\n\t"
11590 "FISTp [ESP + #0]\n\t"
11591 "FLDCW std/24-bit mode\n\t"
11592 "POP EAX\n\t"
11593 "POP EDX\n\t"
11594 "CMP EDX,0x80000000\n\t"
11595 "JNE,s fast\n\t"
11596 "TEST EAX,EAX\n\t"
11597 "JNE,s fast\n\t"
11598 "SUB ESP,8\n\t"
11599 "MOVSD [ESP],$src\n\t"
11600 "FLD_D [ESP]\n\t"
11601 "ADD ESP,8\n\t"
11602 "CALL d2l_wrapper\n"
11620 __ cmpl(rdx, 0x80000000);
11621 __ jccb(Assembler::notEqual, fast);
11622 __ testl(rax, rax);
11623 __ jccb(Assembler::notEqual, fast);
11624 __ subptr(rsp, 8);
11625 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11626 __ fld_d(Address(rsp, 0));
11627 __ addptr(rsp, 8);
11628 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11629 __ bind(fast);
11630 %}
11631 ins_pipe( pipe_slow );
11632 %}
11633
11634 // Convert a double to an int. Java semantics require we do complex
11635 // manglations in the corner cases. So we set the rounding mode to
11636 // 'zero', store the darned double down as an int, and reset the
11637 // rounding mode to 'nearest'. The hardware stores a flag value down
11638 // if we would overflow or converted a NAN; we check for this and
11639 // and go the slow path if needed.
11640 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11641 predicate(UseSSE==0);
11642 match(Set dst (ConvF2I src));
11643 effect( KILL tmp, KILL cr );
11644 format %{ "FLD $src\t# Convert float to int \n\t"
11645 "FLDCW trunc mode\n\t"
11646 "SUB ESP,4\n\t"
11647 "FISTp [ESP + #0]\n\t"
11648 "FLDCW std/24-bit mode\n\t"
11649 "POP EAX\n\t"
11650 "CMP EAX,0x80000000\n\t"
11651 "JNE,s fast\n\t"
11652 "FLD $src\n\t"
11653 "CALL d2i_wrapper\n"
11654 "fast:" %}
11655 // D2I_encoding works for F2I
11656 ins_encode( Push_Reg_F(src), D2I_encoding(src) );
11657 ins_pipe( pipe_slow );
11658 %}
11659
11660 // Convert a float in xmm to an int reg.
11661 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
11662 predicate(UseSSE>=1);
11663 match(Set dst (ConvF2I src));
11664 effect( KILL tmp, KILL cr );
11665 format %{ "CVTTSS2SI $dst, $src\n\t"
11666 "CMP $dst,0x80000000\n\t"
11667 "JNE,s fast\n\t"
11668 "SUB ESP, 4\n\t"
11669 "MOVSS [ESP], $src\n\t"
11670 "FLD [ESP]\n\t"
11671 "ADD ESP, 4\n\t"
11672 "CALL d2i_wrapper\n"
11673 "fast:" %}
11674 ins_encode %{
11675 Label fast;
11676 __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11677 __ cmpl($dst$$Register, 0x80000000);
11678 __ jccb(Assembler::notEqual, fast);
11679 __ subptr(rsp, 4);
11680 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11681 __ fld_s(Address(rsp, 0));
11682 __ addptr(rsp, 4);
11683 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11684 __ bind(fast);
11685 %}
11686 ins_pipe( pipe_slow );
11687 %}
11688
11689 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11690 predicate(UseSSE==0);
11691 match(Set dst (ConvF2L src));
11692 effect( KILL cr );
11693 format %{ "FLD $src\t# Convert float to long\n\t"
11694 "FLDCW trunc mode\n\t"
11695 "SUB ESP,8\n\t"
11696 "FISTp [ESP + #0]\n\t"
11697 "FLDCW std/24-bit mode\n\t"
11698 "POP EAX\n\t"
11699 "POP EDX\n\t"
11700 "CMP EDX,0x80000000\n\t"
11701 "JNE,s fast\n\t"
11702 "TEST EAX,EAX\n\t"
11703 "JNE,s fast\n\t"
11704 "FLD $src\n\t"
11705 "CALL d2l_wrapper\n"
11706 "fast:" %}
11707 // D2L_encoding works for F2L
11708 ins_encode( Push_Reg_F(src), D2L_encoding(src) );
11709 ins_pipe( pipe_slow );
11710 %}
11711
11712 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11713 instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
11714 predicate (UseSSE>=1);
11715 match(Set dst (ConvF2L src));
11716 effect( KILL cr );
11717 format %{ "SUB ESP,8\t# Convert float to long\n\t"
11718 "MOVSS [ESP],$src\n\t"
11719 "FLD_S [ESP]\n\t"
11720 "FLDCW trunc mode\n\t"
11721 "FISTp [ESP + #0]\n\t"
11722 "FLDCW std/24-bit mode\n\t"
11723 "POP EAX\n\t"
11724 "POP EDX\n\t"
11725 "CMP EDX,0x80000000\n\t"
11726 "JNE,s fast\n\t"
11727 "TEST EAX,EAX\n\t"
11728 "JNE,s fast\n\t"
11729 "SUB ESP,4\t# Convert float to long\n\t"
11730 "MOVSS [ESP],$src\n\t"
11731 "FLD_S [ESP]\n\t"
11732 "ADD ESP,4\n\t"
11733 "CALL d2l_wrapper\n"
11745 } else {
11746 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11747 }
11748 // Load the converted long, adjust CPU stack
11749 __ pop(rax);
11750 __ pop(rdx);
11751 __ cmpl(rdx, 0x80000000);
11752 __ jccb(Assembler::notEqual, fast);
11753 __ testl(rax, rax);
11754 __ jccb(Assembler::notEqual, fast);
11755 __ subptr(rsp, 4);
11756 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11757 __ fld_s(Address(rsp, 0));
11758 __ addptr(rsp, 4);
11759 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11760 __ bind(fast);
11761 %}
11762 ins_pipe( pipe_slow );
11763 %}
11764
11765 instruct convI2D_reg(regD dst, stackSlotI src) %{
11766 predicate( UseSSE<=1 );
11767 match(Set dst (ConvI2D src));
11768 format %{ "FILD $src\n\t"
11769 "FSTP $dst" %}
11770 opcode(0xDB, 0x0); /* DB /0 */
11771 ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
11772 ins_pipe( fpu_reg_mem );
11773 %}
11774
11775 instruct convI2XD_reg(regXD dst, eRegI src) %{
11776 predicate( UseSSE>=2 && !UseXmmI2D );
11777 match(Set dst (ConvI2D src));
11778 format %{ "CVTSI2SD $dst,$src" %}
11779 ins_encode %{
11780 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11781 %}
11782 ins_pipe( pipe_slow );
11783 %}
11784
11785 instruct convI2XD_mem(regXD dst, memory mem) %{
11786 predicate( UseSSE>=2 );
11787 match(Set dst (ConvI2D (LoadI mem)));
11788 format %{ "CVTSI2SD $dst,$mem" %}
11789 ins_encode %{
11790 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11791 %}
11792 ins_pipe( pipe_slow );
11793 %}
11794
11795 instruct convXI2XD_reg(regXD dst, eRegI src)
11796 %{
11797 predicate( UseSSE>=2 && UseXmmI2D );
11798 match(Set dst (ConvI2D src));
11799
11800 format %{ "MOVD $dst,$src\n\t"
11801 "CVTDQ2PD $dst,$dst\t# i2d" %}
11802 ins_encode %{
11803 __ movdl($dst$$XMMRegister, $src$$Register);
11804 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11805 %}
11806 ins_pipe(pipe_slow); // XXX
11807 %}
11808
11809 instruct convI2D_mem(regD dst, memory mem) %{
11810 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11811 match(Set dst (ConvI2D (LoadI mem)));
11812 format %{ "FILD $mem\n\t"
11813 "FSTP $dst" %}
11814 opcode(0xDB); /* DB /0 */
11815 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11816 Pop_Reg_D(dst));
11817 ins_pipe( fpu_reg_mem );
11818 %}
11819
11820 // Convert a byte to a float; no rounding step needed.
11821 instruct conv24I2F_reg(regF dst, stackSlotI src) %{
11822 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11823 match(Set dst (ConvI2F src));
11824 format %{ "FILD $src\n\t"
11825 "FSTP $dst" %}
11826
11827 opcode(0xDB, 0x0); /* DB /0 */
11828 ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
11829 ins_pipe( fpu_reg_mem );
11830 %}
11831
11832 // In 24-bit mode, force exponent rounding by storing back out
11833 instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
11834 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11835 match(Set dst (ConvI2F src));
11836 ins_cost(200);
11837 format %{ "FILD $src\n\t"
11838 "FSTP_S $dst" %}
11839 opcode(0xDB, 0x0); /* DB /0 */
11840 ins_encode( Push_Mem_I(src),
11841 Pop_Mem_F(dst));
11842 ins_pipe( fpu_mem_mem );
11843 %}
11844
11845 // In 24-bit mode, force exponent rounding by storing back out
11846 instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
11847 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11848 match(Set dst (ConvI2F (LoadI mem)));
11849 ins_cost(200);
11850 format %{ "FILD $mem\n\t"
11851 "FSTP_S $dst" %}
11852 opcode(0xDB); /* DB /0 */
11853 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11854 Pop_Mem_F(dst));
11855 ins_pipe( fpu_mem_mem );
11856 %}
11857
11858 // This instruction does not round to 24-bits
11859 instruct convI2F_reg(regF dst, stackSlotI src) %{
11860 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11861 match(Set dst (ConvI2F src));
11862 format %{ "FILD $src\n\t"
11863 "FSTP $dst" %}
11864 opcode(0xDB, 0x0); /* DB /0 */
11865 ins_encode( Push_Mem_I(src),
11866 Pop_Reg_F(dst));
11867 ins_pipe( fpu_reg_mem );
11868 %}
11869
11870 // This instruction does not round to 24-bits
11871 instruct convI2F_mem(regF dst, memory mem) %{
11872 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11873 match(Set dst (ConvI2F (LoadI mem)));
11874 format %{ "FILD $mem\n\t"
11875 "FSTP $dst" %}
11876 opcode(0xDB); /* DB /0 */
11877 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11878 Pop_Reg_F(dst));
11879 ins_pipe( fpu_reg_mem );
11880 %}
11881
11882 // Convert an int to a float in xmm; no rounding step needed.
11883 instruct convI2X_reg(regX dst, eRegI src) %{
11884 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11885 match(Set dst (ConvI2F src));
11886 format %{ "CVTSI2SS $dst, $src" %}
11887 ins_encode %{
11888 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11889 %}
11890 ins_pipe( pipe_slow );
11891 %}
11892
11893 instruct convXI2X_reg(regX dst, eRegI src)
11894 %{
11895 predicate( UseSSE>=2 && UseXmmI2F );
11896 match(Set dst (ConvI2F src));
11897
11898 format %{ "MOVD $dst,$src\n\t"
11899 "CVTDQ2PS $dst,$dst\t# i2f" %}
11900 ins_encode %{
11901 __ movdl($dst$$XMMRegister, $src$$Register);
11902 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11903 %}
11904 ins_pipe(pipe_slow); // XXX
11905 %}
11906
11907 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
11908 match(Set dst (ConvI2L src));
11909 effect(KILL cr);
11910 ins_cost(375);
11911 format %{ "MOV $dst.lo,$src\n\t"
11912 "MOV $dst.hi,$src\n\t"
11913 "SAR $dst.hi,31" %}
11922 ins_cost(250);
11923 format %{ "MOV $dst.lo,$src\n\t"
11924 "XOR $dst.hi,$dst.hi" %}
11925 opcode(0x33); // XOR
11926 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11927 ins_pipe( ialu_reg_reg_long );
11928 %}
11929
11930 // Zero-extend long
11931 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11932 match(Set dst (AndL src mask) );
11933 effect( KILL flags );
11934 ins_cost(250);
11935 format %{ "MOV $dst.lo,$src.lo\n\t"
11936 "XOR $dst.hi,$dst.hi\n\t" %}
11937 opcode(0x33); // XOR
11938 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11939 ins_pipe( ialu_reg_reg_long );
11940 %}
11941
11942 instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11943 predicate (UseSSE<=1);
11944 match(Set dst (ConvL2D src));
11945 effect( KILL cr );
11946 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11947 "PUSH $src.lo\n\t"
11948 "FILD ST,[ESP + #0]\n\t"
11949 "ADD ESP,8\n\t"
11950 "FSTP_D $dst\t# D-round" %}
11951 opcode(0xDF, 0x5); /* DF /5 */
11952 ins_encode(convert_long_double(src), Pop_Mem_D(dst));
11953 ins_pipe( pipe_slow );
11954 %}
11955
11956 instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
11957 predicate (UseSSE>=2);
11958 match(Set dst (ConvL2D src));
11959 effect( KILL cr );
11960 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11961 "PUSH $src.lo\n\t"
11962 "FILD_D [ESP]\n\t"
11963 "FSTP_D [ESP]\n\t"
11964 "MOVSD $dst,[ESP]\n\t"
11965 "ADD ESP,8" %}
11966 opcode(0xDF, 0x5); /* DF /5 */
11967 ins_encode(convert_long_double2(src), Push_ResultXD(dst));
11968 ins_pipe( pipe_slow );
11969 %}
11970
11971 instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
11972 predicate (UseSSE>=1);
11973 match(Set dst (ConvL2F src));
11974 effect( KILL cr );
11975 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11976 "PUSH $src.lo\n\t"
11977 "FILD_D [ESP]\n\t"
11978 "FSTP_S [ESP]\n\t"
11979 "MOVSS $dst,[ESP]\n\t"
11980 "ADD ESP,8" %}
11981 opcode(0xDF, 0x5); /* DF /5 */
11982 ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
11983 ins_pipe( pipe_slow );
11984 %}
11985
11986 instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11987 match(Set dst (ConvL2F src));
11988 effect( KILL cr );
11989 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11990 "PUSH $src.lo\n\t"
11991 "FILD ST,[ESP + #0]\n\t"
11992 "ADD ESP,8\n\t"
11993 "FSTP_S $dst\t# F-round" %}
11994 opcode(0xDF, 0x5); /* DF /5 */
11995 ins_encode(convert_long_double(src), Pop_Mem_F(dst));
11996 ins_pipe( pipe_slow );
11997 %}
11998
11999 instruct convL2I_reg( eRegI dst, eRegL src ) %{
12000 match(Set dst (ConvL2I src));
12001 effect( DEF dst, USE src );
12002 format %{ "MOV $dst,$src.lo" %}
12003 ins_encode(enc_CopyL_Lo(dst,src));
12004 ins_pipe( ialu_reg_reg );
12005 %}
12006
12007
12008 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
12009 match(Set dst (MoveF2I src));
12010 effect( DEF dst, USE src );
12011 ins_cost(100);
12012 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
12013 ins_encode %{
12014 __ movl($dst$$Register, Address(rsp, $src$$disp));
12015 %}
12016 ins_pipe( ialu_reg_mem );
12017 %}
12018
12019 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
12020 predicate(UseSSE==0);
12021 match(Set dst (MoveF2I src));
12022 effect( DEF dst, USE src );
12023
12024 ins_cost(125);
12025 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
12026 ins_encode( Pop_Mem_Reg_F(dst, src) );
12027 ins_pipe( fpu_mem_reg );
12028 %}
12029
12030 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
12031 predicate(UseSSE>=1);
12032 match(Set dst (MoveF2I src));
12033 effect( DEF dst, USE src );
12034
12035 ins_cost(95);
12036 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
12037 ins_encode %{
12038 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
12039 %}
12040 ins_pipe( pipe_slow );
12041 %}
12042
12043 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
12044 predicate(UseSSE>=2);
12045 match(Set dst (MoveF2I src));
12046 effect( DEF dst, USE src );
12047 ins_cost(85);
12048 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
12049 ins_encode %{
12050 __ movdl($dst$$Register, $src$$XMMRegister);
12051 %}
12052 ins_pipe( pipe_slow );
12053 %}
12054
12055 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
12056 match(Set dst (MoveI2F src));
12057 effect( DEF dst, USE src );
12058
12059 ins_cost(100);
12060 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
12061 ins_encode %{
12062 __ movl(Address(rsp, $dst$$disp), $src$$Register);
12063 %}
12064 ins_pipe( ialu_mem_reg );
12065 %}
12066
12067
12068 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
12069 predicate(UseSSE==0);
12070 match(Set dst (MoveI2F src));
12071 effect(DEF dst, USE src);
12072
12073 ins_cost(125);
12074 format %{ "FLD_S $src\n\t"
12075 "FSTP $dst\t# MoveI2F_stack_reg" %}
12076 opcode(0xD9); /* D9 /0, FLD m32real */
12077 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12078 Pop_Reg_F(dst) );
12079 ins_pipe( fpu_reg_mem );
12080 %}
12081
12082 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
12083 predicate(UseSSE>=1);
12084 match(Set dst (MoveI2F src));
12085 effect( DEF dst, USE src );
12086
12087 ins_cost(95);
12088 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
12089 ins_encode %{
12090 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
12091 %}
12092 ins_pipe( pipe_slow );
12093 %}
12094
12095 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
12096 predicate(UseSSE>=2);
12097 match(Set dst (MoveI2F src));
12098 effect( DEF dst, USE src );
12099
12100 ins_cost(85);
12101 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
12102 ins_encode %{
12103 __ movdl($dst$$XMMRegister, $src$$Register);
12104 %}
12105 ins_pipe( pipe_slow );
12106 %}
12107
12108 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12109 match(Set dst (MoveD2L src));
12110 effect(DEF dst, USE src);
12111
12112 ins_cost(250);
12113 format %{ "MOV $dst.lo,$src\n\t"
12114 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12115 opcode(0x8B, 0x8B);
12116 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12117 ins_pipe( ialu_mem_long_reg );
12118 %}
12119
12120 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12121 predicate(UseSSE<=1);
12122 match(Set dst (MoveD2L src));
12123 effect(DEF dst, USE src);
12124
12125 ins_cost(125);
12126 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
12127 ins_encode( Pop_Mem_Reg_D(dst, src) );
12128 ins_pipe( fpu_mem_reg );
12129 %}
12130
12131 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12132 predicate(UseSSE>=2);
12133 match(Set dst (MoveD2L src));
12134 effect(DEF dst, USE src);
12135 ins_cost(95);
12136 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
12137 ins_encode %{
12138 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
12139 %}
12140 ins_pipe( pipe_slow );
12141 %}
12142
12143 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12144 predicate(UseSSE>=2);
12145 match(Set dst (MoveD2L src));
12146 effect(DEF dst, USE src, TEMP tmp);
12147 ins_cost(85);
12148 format %{ "MOVD $dst.lo,$src\n\t"
12149 "PSHUFLW $tmp,$src,0x4E\n\t"
12150 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12151 ins_encode %{
12152 __ movdl($dst$$Register, $src$$XMMRegister);
12153 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
12154 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
12155 %}
12156 ins_pipe( pipe_slow );
12157 %}
12158
12159 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12160 match(Set dst (MoveL2D src));
12161 effect(DEF dst, USE src);
12162
12163 ins_cost(200);
12164 format %{ "MOV $dst,$src.lo\n\t"
12165 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12166 opcode(0x89, 0x89);
12167 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12168 ins_pipe( ialu_mem_long_reg );
12169 %}
12170
12171
12172 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12173 predicate(UseSSE<=1);
12174 match(Set dst (MoveL2D src));
12175 effect(DEF dst, USE src);
12176 ins_cost(125);
12177
12178 format %{ "FLD_D $src\n\t"
12179 "FSTP $dst\t# MoveL2D_stack_reg" %}
12180 opcode(0xDD); /* DD /0, FLD m64real */
12181 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12182 Pop_Reg_D(dst) );
12183 ins_pipe( fpu_reg_mem );
12184 %}
12185
12186
12187 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12188 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12189 match(Set dst (MoveL2D src));
12190 effect(DEF dst, USE src);
12191
12192 ins_cost(95);
12193 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12194 ins_encode %{
12195 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
12196 %}
12197 ins_pipe( pipe_slow );
12198 %}
12199
12200 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12201 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12202 match(Set dst (MoveL2D src));
12203 effect(DEF dst, USE src);
12204
12205 ins_cost(95);
12206 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12207 ins_encode %{
12208 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
12209 %}
12210 ins_pipe( pipe_slow );
12211 %}
12212
12213 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12214 predicate(UseSSE>=2);
12215 match(Set dst (MoveL2D src));
12216 effect(TEMP dst, USE src, TEMP tmp);
12217 ins_cost(85);
12218 format %{ "MOVD $dst,$src.lo\n\t"
12219 "MOVD $tmp,$src.hi\n\t"
12220 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12221 ins_encode %{
12222 __ movdl($dst$$XMMRegister, $src$$Register);
12223 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
12224 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
12225 %}
12226 ins_pipe( pipe_slow );
12227 %}
12228
12229 // Replicate scalar to packed byte (1 byte) values in xmm
12230 instruct Repl8B_reg(regXD dst, regXD src) %{
12231 predicate(UseSSE>=2);
12232 match(Set dst (Replicate8B src));
12233 format %{ "MOVDQA $dst,$src\n\t"
12234 "PUNPCKLBW $dst,$dst\n\t"
12235 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12236 ins_encode %{
12237 if ($dst$$reg != $src$$reg) {
12238 __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
12239 }
12240 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
12241 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12242 %}
12243 ins_pipe( pipe_slow );
12244 %}
12245
12246 // Replicate scalar to packed byte (1 byte) values in xmm
12247 instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12248 predicate(UseSSE>=2);
12249 match(Set dst (Replicate8B src));
12250 format %{ "MOVD $dst,$src\n\t"
12251 "PUNPCKLBW $dst,$dst\n\t"
12252 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12253 ins_encode %{
12254 __ movdl($dst$$XMMRegister, $src$$Register);
12255 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
12256 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12257 %}
12258 ins_pipe( pipe_slow );
12259 %}
12260
12261 // Replicate scalar zero to packed byte (1 byte) values in xmm
12262 instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12263 predicate(UseSSE>=2);
12264 match(Set dst (Replicate8B zero));
12265 format %{ "PXOR $dst,$dst\t! replicate8B" %}
12266 ins_encode %{
12267 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12268 %}
12269 ins_pipe( fpu_reg_reg );
12270 %}
12271
12272 // Replicate scalar to packed shore (2 byte) values in xmm
12273 instruct Repl4S_reg(regXD dst, regXD src) %{
12274 predicate(UseSSE>=2);
12275 match(Set dst (Replicate4S src));
12276 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12277 ins_encode %{
12278 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
12279 %}
12280 ins_pipe( fpu_reg_reg );
12281 %}
12282
12283 // Replicate scalar to packed shore (2 byte) values in xmm
12284 instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12285 predicate(UseSSE>=2);
12286 match(Set dst (Replicate4S src));
12287 format %{ "MOVD $dst,$src\n\t"
12288 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12289 ins_encode %{
12290 __ movdl($dst$$XMMRegister, $src$$Register);
12291 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12292 %}
12293 ins_pipe( fpu_reg_reg );
12294 %}
12295
12296 // Replicate scalar zero to packed short (2 byte) values in xmm
12297 instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12298 predicate(UseSSE>=2);
12299 match(Set dst (Replicate4S zero));
12300 format %{ "PXOR $dst,$dst\t! replicate4S" %}
12301 ins_encode %{
12302 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12303 %}
12304 ins_pipe( fpu_reg_reg );
12305 %}
12306
12307 // Replicate scalar to packed char (2 byte) values in xmm
12308 instruct Repl4C_reg(regXD dst, regXD src) %{
12309 predicate(UseSSE>=2);
12310 match(Set dst (Replicate4C src));
12311 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12312 ins_encode %{
12313 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
12314 %}
12315 ins_pipe( fpu_reg_reg );
12316 %}
12317
12318 // Replicate scalar to packed char (2 byte) values in xmm
12319 instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12320 predicate(UseSSE>=2);
12321 match(Set dst (Replicate4C src));
12322 format %{ "MOVD $dst,$src\n\t"
12323 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12324 ins_encode %{
12325 __ movdl($dst$$XMMRegister, $src$$Register);
12326 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12327 %}
12328 ins_pipe( fpu_reg_reg );
12329 %}
12330
12331 // Replicate scalar zero to packed char (2 byte) values in xmm
12332 instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12333 predicate(UseSSE>=2);
12334 match(Set dst (Replicate4C zero));
12335 format %{ "PXOR $dst,$dst\t! replicate4C" %}
12336 ins_encode %{
12337 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12338 %}
12339 ins_pipe( fpu_reg_reg );
12340 %}
12341
12342 // Replicate scalar to packed integer (4 byte) values in xmm
12343 instruct Repl2I_reg(regXD dst, regXD src) %{
12344 predicate(UseSSE>=2);
12345 match(Set dst (Replicate2I src));
12346 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12347 ins_encode %{
12348 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
12349 %}
12350 ins_pipe( fpu_reg_reg );
12351 %}
12352
12353 // Replicate scalar to packed integer (4 byte) values in xmm
12354 instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12355 predicate(UseSSE>=2);
12356 match(Set dst (Replicate2I src));
12357 format %{ "MOVD $dst,$src\n\t"
12358 "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12359 ins_encode %{
12360 __ movdl($dst$$XMMRegister, $src$$Register);
12361 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12362 %}
12363 ins_pipe( fpu_reg_reg );
12364 %}
12365
12366 // Replicate scalar zero to packed integer (2 byte) values in xmm
12367 instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12368 predicate(UseSSE>=2);
12369 match(Set dst (Replicate2I zero));
12370 format %{ "PXOR $dst,$dst\t! replicate2I" %}
12371 ins_encode %{
12372 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12373 %}
12374 ins_pipe( fpu_reg_reg );
12375 %}
12376
12377 // Replicate scalar to packed single precision floating point values in xmm
12378 instruct Repl2F_reg(regXD dst, regXD src) %{
12379 predicate(UseSSE>=2);
12380 match(Set dst (Replicate2F src));
12381 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12382 ins_encode %{
12383 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
12384 %}
12385 ins_pipe( fpu_reg_reg );
12386 %}
12387
12388 // Replicate scalar to packed single precision floating point values in xmm
12389 instruct Repl2F_regX(regXD dst, regX src) %{
12390 predicate(UseSSE>=2);
12391 match(Set dst (Replicate2F src));
12392 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12393 ins_encode %{
12394 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
12395 %}
12396 ins_pipe( fpu_reg_reg );
12397 %}
12398
12399 // Replicate scalar to packed single precision floating point values in xmm
12400 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12401 predicate(UseSSE>=2);
12402 match(Set dst (Replicate2F zero));
12403 format %{ "PXOR $dst,$dst\t! replicate2F" %}
12404 ins_encode %{
12405 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12406 %}
12407 ins_pipe( fpu_reg_reg );
12408 %}
12409
12410 // =======================================================================
12411 // fast clearing of an array
12412 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12413 match(Set dummy (ClearArray cnt base));
12414 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12415 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t"
12416 "XOR EAX,EAX\n\t"
12417 "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12418 opcode(0,0x4);
12419 ins_encode( Opcode(0xD1), RegOpc(ECX),
12420 OpcRegReg(0x33,EAX,EAX),
12421 Opcode(0xF3), Opcode(0xAB) );
12422 ins_pipe( pipe_slow );
12423 %}
12424
12425 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
12426 eAXRegI result, regXD tmp1, eFlagsReg cr) %{
12427 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12428 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12429
12430 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12431 ins_encode %{
12432 __ string_compare($str1$$Register, $str2$$Register,
12433 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12434 $tmp1$$XMMRegister);
12435 %}
12436 ins_pipe( pipe_slow );
12437 %}
12438
12439 // fast string equals
12440 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
12441 regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
12442 match(Set result (StrEquals (Binary str1 str2) cnt));
12443 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12444
12445 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12446 ins_encode %{
12447 __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
12448 $cnt$$Register, $result$$Register, $tmp3$$Register,
12449 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12450 %}
12451 ins_pipe( pipe_slow );
12452 %}
12453
12454 // fast search of substring with known size.
12455 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12456 eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12457 predicate(UseSSE42Intrinsics);
12458 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12459 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12460
12461 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %}
12462 ins_encode %{
12463 int icnt2 = (int)$int_cnt2$$constant;
12464 if (icnt2 >= 8) {
12465 // IndexOf for constant substrings with size >= 8 elements
12466 // which don't need to be loaded through stack.
12467 __ string_indexofC8($str1$$Register, $str2$$Register,
12468 $cnt1$$Register, $cnt2$$Register,
12469 icnt2, $result$$Register,
12470 $vec$$XMMRegister, $tmp$$Register);
12471 } else {
12472 // Small strings are loaded through stack if they cross page boundary.
12473 __ string_indexof($str1$$Register, $str2$$Register,
12474 $cnt1$$Register, $cnt2$$Register,
12475 icnt2, $result$$Register,
12476 $vec$$XMMRegister, $tmp$$Register);
12477 }
12478 %}
12479 ins_pipe( pipe_slow );
12480 %}
12481
12482 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12483 eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{
12484 predicate(UseSSE42Intrinsics);
12485 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12486 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12487
12488 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
12489 ins_encode %{
12490 __ string_indexof($str1$$Register, $str2$$Register,
12491 $cnt1$$Register, $cnt2$$Register,
12492 (-1), $result$$Register,
12493 $vec$$XMMRegister, $tmp$$Register);
12494 %}
12495 ins_pipe( pipe_slow );
12496 %}
12497
12498 // fast array equals
12499 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12500 regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12501 %{
12502 match(Set result (AryEq ary1 ary2));
12503 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12504 //ins_cost(300);
12505
12506 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12507 ins_encode %{
12508 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
12509 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12510 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12511 %}
12512 ins_pipe( pipe_slow );
12513 %}
12514
12515 //----------Control Flow Instructions------------------------------------------
12516 // Signed compare Instructions
12517 instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
12518 match(Set cr (CmpI op1 op2));
12519 effect( DEF cr, USE op1, USE op2 );
12520 format %{ "CMP $op1,$op2" %}
13306 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13307 ins_cost(250);
13308 format %{ "CMOV$cmp $dst,$src" %}
13309 opcode(0x0F,0x40);
13310 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13311 ins_pipe( pipe_cmov_mem );
13312 %}
13313
13314 // Compare 2 longs and CMOVE ints.
13315 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13316 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13317 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13318 ins_cost(200);
13319 format %{ "CMOV$cmp $dst,$src" %}
13320 opcode(0x0F,0x40);
13321 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13322 ins_pipe( pipe_cmov_reg );
13323 %}
13324
13325 // Compare 2 longs and CMOVE doubles
13326 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13327 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13328 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13329 ins_cost(200);
13330 expand %{
13331 fcmovD_regS(cmp,flags,dst,src);
13332 %}
13333 %}
13334
13335 // Compare 2 longs and CMOVE doubles
13336 instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
13337 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13338 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13339 ins_cost(200);
13340 expand %{
13341 fcmovXD_regS(cmp,flags,dst,src);
13342 %}
13343 %}
13344
13345 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13346 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13347 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13348 ins_cost(200);
13349 expand %{
13350 fcmovF_regS(cmp,flags,dst,src);
13351 %}
13352 %}
13353
13354 instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
13355 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13356 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13357 ins_cost(200);
13358 expand %{
13359 fcmovX_regS(cmp,flags,dst,src);
13360 %}
13361 %}
13362
13363 //======
13364 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13365 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
13366 match( Set flags (CmpL src zero ));
13367 effect(TEMP tmp);
13368 ins_cost(200);
13369 format %{ "MOV $tmp,$src.lo\n\t"
13370 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13371 ins_encode( long_cmp_flags0( src, tmp ) );
13372 ins_pipe( ialu_reg_reg_long );
13373 %}
13374
13375 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13376 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13377 match( Set flags (CmpL src1 src2 ));
13378 ins_cost(200+300);
13379 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13434 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13435 ins_cost(250);
13436 format %{ "CMOV$cmp $dst,$src" %}
13437 opcode(0x0F,0x40);
13438 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13439 ins_pipe( pipe_cmov_mem );
13440 %}
13441
13442 // Compare 2 longs and CMOVE ints.
13443 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13444 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13445 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13446 ins_cost(200);
13447 format %{ "CMOV$cmp $dst,$src" %}
13448 opcode(0x0F,0x40);
13449 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13450 ins_pipe( pipe_cmov_reg );
13451 %}
13452
13453 // Compare 2 longs and CMOVE doubles
13454 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13455 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13456 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13457 ins_cost(200);
13458 expand %{
13459 fcmovD_regS(cmp,flags,dst,src);
13460 %}
13461 %}
13462
13463 // Compare 2 longs and CMOVE doubles
13464 instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
13465 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13466 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13467 ins_cost(200);
13468 expand %{
13469 fcmovXD_regS(cmp,flags,dst,src);
13470 %}
13471 %}
13472
13473 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13474 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13475 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13476 ins_cost(200);
13477 expand %{
13478 fcmovF_regS(cmp,flags,dst,src);
13479 %}
13480 %}
13481
13482 instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
13483 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13484 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13485 ins_cost(200);
13486 expand %{
13487 fcmovX_regS(cmp,flags,dst,src);
13488 %}
13489 %}
13490
13491 //======
13492 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13493 // Same as cmpL_reg_flags_LEGT except must negate src
13494 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
13495 match( Set flags (CmpL src zero ));
13496 effect( TEMP tmp );
13497 ins_cost(300);
13498 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13499 "CMP $tmp,$src.lo\n\t"
13500 "SBB $tmp,$src.hi\n\t" %}
13501 ins_encode( long_cmp_flags3(src, tmp) );
13502 ins_pipe( ialu_reg_reg_long );
13503 %}
13504
13505 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13506 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands
13507 // requires a commuted test to get the same result.
13567 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13568 ins_cost(250);
13569 format %{ "CMOV$cmp $dst,$src" %}
13570 opcode(0x0F,0x40);
13571 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13572 ins_pipe( pipe_cmov_mem );
13573 %}
13574
13575 // Compare 2 longs and CMOVE ptrs.
13576 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13577 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13578 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13579 ins_cost(200);
13580 format %{ "CMOV$cmp $dst,$src" %}
13581 opcode(0x0F,0x40);
13582 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13583 ins_pipe( pipe_cmov_reg );
13584 %}
13585
13586 // Compare 2 longs and CMOVE doubles
13587 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13588 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13589 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13590 ins_cost(200);
13591 expand %{
13592 fcmovD_regS(cmp,flags,dst,src);
13593 %}
13594 %}
13595
13596 // Compare 2 longs and CMOVE doubles
13597 instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
13598 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13599 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13600 ins_cost(200);
13601 expand %{
13602 fcmovXD_regS(cmp,flags,dst,src);
13603 %}
13604 %}
13605
13606 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13607 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13608 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13609 ins_cost(200);
13610 expand %{
13611 fcmovF_regS(cmp,flags,dst,src);
13612 %}
13613 %}
13614
13615
13616 instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
13617 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13618 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13619 ins_cost(200);
13620 expand %{
13621 fcmovX_regS(cmp,flags,dst,src);
13622 %}
13623 %}
13624
13625
13626 // ============================================================================
13627 // Procedure Call/Return Instructions
13628 // Call Java Static Instruction
13629 // Note: If this code changes, the corresponding ret_addr_offset() and
13630 // compute_padding() functions will have to be adjusted.
13631 instruct CallStaticJavaDirect(method meth) %{
13632 match(CallStaticJava);
13633 predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
13634 effect(USE meth);
13635
13636 ins_cost(300);
13637 format %{ "CALL,static " %}
13638 opcode(0xE8); /* E8 cd */
13639 ins_encode( pre_call_FPU,
13640 Java_Static_Call( meth ),
13641 call_epilog,
|
1758 // bswap lo
1759 emit_opcode(cbuf, 0x0F);
1760 emit_cc(cbuf, 0xC8, destlo);
1761 // bswap hi
1762 emit_opcode(cbuf, 0x0F);
1763 emit_cc(cbuf, 0xC8, desthi);
1764 // xchg lo and hi
1765 emit_opcode(cbuf, 0x87);
1766 emit_rm(cbuf, 0x3, destlo, desthi);
1767 %}
1768
1769 enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ...
1770 emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1771 %}
1772
1773 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1774 $$$emit8$primary;
1775 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1776 %}
1777
1778 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1779 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1780 emit_d8(cbuf, op >> 8 );
1781 emit_d8(cbuf, op & 255);
1782 %}
1783
1784 // emulate a CMOV with a conditional branch around a MOV
1785 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1786 // Invert sense of branch from sense of CMOV
1787 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1788 emit_d8( cbuf, $brOffs$$constant );
1789 %}
1790
1791 enc_class enc_PartialSubtypeCheck( ) %{
1792 Register Redi = as_Register(EDI_enc); // result register
1793 Register Reax = as_Register(EAX_enc); // super class
1794 Register Recx = as_Register(ECX_enc); // killed
1795 Register Resi = as_Register(ESI_enc); // sub class
1796 Label miss;
1797
1798 MacroAssembler _masm(&cbuf);
2046 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2047 %}
2048
2049 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
2050 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2051 %}
2052
2053 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
2054 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2055 %}
2056
2057 enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
2058 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2059 %}
2060
2061 enc_class Con32 (immI src) %{ // Con32(storeImmI)
2062 // Output immediate
2063 $$$emit32$src$$constant;
2064 %}
2065
2066 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm
2067 // Output Float immediate bits
2068 jfloat jf = $src$$constant;
2069 int jf_as_bits = jint_cast( jf );
2070 emit_d32(cbuf, jf_as_bits);
2071 %}
2072
2073 enc_class Con32F_as_bits(immF src) %{ // storeX_imm
2074 // Output Float immediate bits
2075 jfloat jf = $src$$constant;
2076 int jf_as_bits = jint_cast( jf );
2077 emit_d32(cbuf, jf_as_bits);
2078 %}
2079
2080 enc_class Con16 (immI src) %{ // Con16(storeImmI)
2081 // Output immediate
2082 $$$emit16$src$$constant;
2083 %}
2084
2085 enc_class Con_d32(immI src) %{
2086 emit_d32(cbuf,$src$$constant);
2087 %}
2088
2089 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI)
2090 // Output immediate memory reference
2091 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2092 emit_d32(cbuf, 0x00);
2093 %}
2266 // jmp dst < src around move
2267 emit_opcode(cbuf,0x7C);
2268 emit_d8(cbuf,2);
2269 // move dst,src
2270 emit_opcode(cbuf,0x8B);
2271 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2272 %}
2273
2274 enc_class max_enc (eRegI dst, eRegI src) %{ // MAX
2275 // Compare dst,src
2276 emit_opcode(cbuf,0x3B);
2277 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2278 // jmp dst > src around move
2279 emit_opcode(cbuf,0x7F);
2280 emit_d8(cbuf,2);
2281 // move dst,src
2282 emit_opcode(cbuf,0x8B);
2283 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2284 %}
2285
2286 enc_class enc_FPR_store(memory mem, regDPR src) %{
2287 // If src is FPR1, we can just FST to store it.
2288 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2289 int reg_encoding = 0x2; // Just store
2290 int base = $mem$$base;
2291 int index = $mem$$index;
2292 int scale = $mem$$scale;
2293 int displace = $mem$$disp;
2294 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2295 if( $src$$reg != FPR1L_enc ) {
2296 reg_encoding = 0x3; // Store & pop
2297 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2298 emit_d8( cbuf, 0xC0-1+$src$$reg );
2299 }
2300 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2301 emit_opcode(cbuf,$primary);
2302 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2303 %}
2304
2305 enc_class neg_reg(eRegI dst) %{
2306 // NEG $dst
2415 // MOV $dst.lo,$dst.hi
2416 emit_opcode( cbuf, 0x8B );
2417 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2418 // SAR $dst.hi,31
2419 emit_opcode(cbuf, 0xC1);
2420 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2421 emit_d8(cbuf, 0x1F );
2422 // small:
2423 // SHRD $dst.lo,$dst.hi,$shift
2424 emit_opcode(cbuf,0x0F);
2425 emit_opcode(cbuf,0xAD);
2426 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2427 // SAR $dst.hi,$shift"
2428 emit_opcode(cbuf,0xD3);
2429 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2430 %}
2431
2432
2433 // ----------------- Encodings for floating point unit -----------------
2434 // May leave result in FPU-TOS or FPU reg depending on opcodes
2435 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV
2436 $$$emit8$primary;
2437 emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2438 %}
2439
2440 // Pop argument in FPR0 with FSTP ST(0)
2441 enc_class PopFPU() %{
2442 emit_opcode( cbuf, 0xDD );
2443 emit_d8( cbuf, 0xD8 );
2444 %}
2445
2446 // !!!!! equivalent to Pop_Reg_F
2447 enc_class Pop_Reg_DPR( regDPR dst ) %{
2448 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2449 emit_d8( cbuf, 0xD8+$dst$$reg );
2450 %}
2451
2452 enc_class Push_Reg_DPR( regDPR dst ) %{
2453 emit_opcode( cbuf, 0xD9 );
2454 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2455 %}
2456
2457 enc_class strictfp_bias1( regDPR dst ) %{
2458 emit_opcode( cbuf, 0xDB ); // FLD m80real
2459 emit_opcode( cbuf, 0x2D );
2460 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2461 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2462 emit_opcode( cbuf, 0xC8+$dst$$reg );
2463 %}
2464
2465 enc_class strictfp_bias2( regDPR dst ) %{
2466 emit_opcode( cbuf, 0xDB ); // FLD m80real
2467 emit_opcode( cbuf, 0x2D );
2468 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2469 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2470 emit_opcode( cbuf, 0xC8+$dst$$reg );
2471 %}
2472
2473 // Special case for moving an integer register to a stack slot.
2474 enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2475 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2476 %}
2477
2478 // Special case for moving a register to a stack slot.
2479 enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2480 // Opcode already emitted
2481 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte
2482 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
2483 emit_d32(cbuf, $dst$$disp); // Displacement
2484 %}
2485
2486 // Push the integer in stackSlot 'src' onto FP-stack
2487 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2488 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2489 %}
2490
2491 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2492 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2493 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2494 %}
2495
2496 // Same as Pop_Mem_F except for opcode
2497 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2498 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2499 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2500 %}
2501
2502 enc_class Pop_Reg_FPR( regFPR dst ) %{
2503 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2504 emit_d8( cbuf, 0xD8+$dst$$reg );
2505 %}
2506
2507 enc_class Push_Reg_FPR( regFPR dst ) %{
2508 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2509 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2510 %}
2511
2512 // Push FPU's float to a stack-slot, and pop FPU-stack
2513 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2514 int pop = 0x02;
2515 if ($src$$reg != FPR1L_enc) {
2516 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2517 emit_d8( cbuf, 0xC0-1+$src$$reg );
2518 pop = 0x03;
2519 }
2520 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2521 %}
2522
2523 // Push FPU's double to a stack-slot, and pop FPU-stack
2524 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2525 int pop = 0x02;
2526 if ($src$$reg != FPR1L_enc) {
2527 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2528 emit_d8( cbuf, 0xC0-1+$src$$reg );
2529 pop = 0x03;
2530 }
2531 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2532 %}
2533
2534 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2535 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2536 int pop = 0xD0 - 1; // -1 since we skip FLD
2537 if ($src$$reg != FPR1L_enc) {
2538 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
2539 emit_d8( cbuf, 0xC0-1+$src$$reg );
2540 pop = 0xD8;
2541 }
2542 emit_opcode( cbuf, 0xDD );
2543 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i)
2544 %}
2545
2546
2547 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2548 // load dst in FPR0
2549 emit_opcode( cbuf, 0xD9 );
2550 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2551 if ($src$$reg != FPR1L_enc) {
2552 // fincstp
2553 emit_opcode (cbuf, 0xD9);
2554 emit_opcode (cbuf, 0xF7);
2555 // swap src with FPR1:
2556 // FXCH FPR1 with src
2557 emit_opcode(cbuf, 0xD9);
2558 emit_d8(cbuf, 0xC8-1+$src$$reg );
2559 // fdecstp
2560 emit_opcode (cbuf, 0xD9);
2561 emit_opcode (cbuf, 0xF6);
2562 }
2563 %}
2564
2565 enc_class Push_ModD_encoding(regD src0, regD src1) %{
2566 MacroAssembler _masm(&cbuf);
2567 __ subptr(rsp, 8);
2568 __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2569 __ fld_d(Address(rsp, 0));
2570 __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2571 __ fld_d(Address(rsp, 0));
2572 %}
2573
2574 enc_class Push_ModF_encoding(regF src0, regF src1) %{
2575 MacroAssembler _masm(&cbuf);
2576 __ subptr(rsp, 4);
2577 __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2578 __ fld_s(Address(rsp, 0));
2579 __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2580 __ fld_s(Address(rsp, 0));
2581 %}
2582
2583 enc_class Push_ResultD(regD dst) %{
2584 MacroAssembler _masm(&cbuf);
2585 __ fstp_d(Address(rsp, 0));
2586 __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2587 __ addptr(rsp, 8);
2588 %}
2589
2590 enc_class Push_ResultF(regF dst, immI d8) %{
2591 MacroAssembler _masm(&cbuf);
2592 __ fstp_s(Address(rsp, 0));
2593 __ movflt($dst$$XMMRegister, Address(rsp, 0));
2594 __ addptr(rsp, $d8$$constant);
2595 %}
2596
2597 enc_class Push_SrcD(regD src) %{
2598 MacroAssembler _masm(&cbuf);
2599 __ subptr(rsp, 8);
2600 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2601 __ fld_d(Address(rsp, 0));
2602 %}
2603
2604 enc_class push_stack_temp_qword() %{
2605 MacroAssembler _masm(&cbuf);
2606 __ subptr(rsp, 8);
2607 %}
2608
2609 enc_class pop_stack_temp_qword() %{
2610 MacroAssembler _masm(&cbuf);
2611 __ addptr(rsp, 8);
2612 %}
2613
2614 enc_class push_xmm_to_fpr1(regD src) %{
2615 MacroAssembler _masm(&cbuf);
2616 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2617 __ fld_d(Address(rsp, 0));
2618 %}
2619
2620 // Compute X^Y using Intel's fast hardware instructions, if possible.
2621 // Otherwise return a NaN.
2622 enc_class pow_exp_core_encoding %{
2623 // FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X))
2624 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q
2625 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q
2626 emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q)
2627 emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q)
2628 emit_opcode(cbuf,0x1C);
2629 emit_d8(cbuf,0x24);
2630 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1
2631 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1
2632 emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q)
2633 emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q)
2634 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2639 emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2640 emit_d32(cbuf,1023);
2641 emit_opcode(cbuf,0x8B); // mov rbx,eax
2642 emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2643 emit_opcode(cbuf,0xC1); // shl rax,20 - Slide to exponent position
2644 emit_rm(cbuf,0x3,0x4,EAX_enc);
2645 emit_d8(cbuf,20);
2646 emit_opcode(cbuf,0x85); // test rbx,ecx - check for overflow
2647 emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2648 emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45); // CMOVne rax,ecx - overflow; stuff NAN into EAX
2649 emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2650 emit_opcode(cbuf,0x89); // mov [esp+4],eax - Store as part of double word
2651 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2652 emit_opcode(cbuf,0xC7); // mov [esp+0],0 - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2653 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2654 emit_d32(cbuf,0);
2655 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2656 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2657 %}
2658
2659 enc_class Push_Result_Mod_DPR( regDPR src) %{
2660 if ($src$$reg != FPR1L_enc) {
2661 // fincstp
2662 emit_opcode (cbuf, 0xD9);
2663 emit_opcode (cbuf, 0xF7);
2664 // FXCH FPR1 with src
2665 emit_opcode(cbuf, 0xD9);
2666 emit_d8(cbuf, 0xC8-1+$src$$reg );
2667 // fdecstp
2668 emit_opcode (cbuf, 0xD9);
2669 emit_opcode (cbuf, 0xF6);
2670 }
2671 // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2672 // // FSTP FPR$dst$$reg
2673 // emit_opcode( cbuf, 0xDD );
2674 // emit_d8( cbuf, 0xD8+$dst$$reg );
2675 %}
2676
2677 enc_class fnstsw_sahf_skip_parity() %{
2678 // fnstsw ax
2679 emit_opcode( cbuf, 0xDF );
2680 emit_opcode( cbuf, 0xE0 );
2681 // sahf
2682 emit_opcode( cbuf, 0x9E );
2683 // jnp ::skip
2684 emit_opcode( cbuf, 0x7B );
2685 emit_opcode( cbuf, 0x05 );
2686 %}
2687
2688 enc_class emitModDPR() %{
2689 // fprem must be iterative
2690 // :: loop
2691 // fprem
2692 emit_opcode( cbuf, 0xD9 );
2693 emit_opcode( cbuf, 0xF8 );
2694 // wait
2695 emit_opcode( cbuf, 0x9b );
2696 // fnstsw ax
2697 emit_opcode( cbuf, 0xDF );
2698 emit_opcode( cbuf, 0xE0 );
2699 // sahf
2700 emit_opcode( cbuf, 0x9E );
2701 // jp ::loop
2702 emit_opcode( cbuf, 0x0F );
2703 emit_opcode( cbuf, 0x8A );
2704 emit_opcode( cbuf, 0xF4 );
2705 emit_opcode( cbuf, 0xFF );
2706 emit_opcode( cbuf, 0xFF );
2707 emit_opcode( cbuf, 0xFF );
2708 %}
3548 %}
3549
3550
3551 enc_class enc_pop_rdx() %{
3552 emit_opcode(cbuf,0x5A);
3553 %}
3554
3555 enc_class enc_rethrow() %{
3556 cbuf.set_insts_mark();
3557 emit_opcode(cbuf, 0xE9); // jmp entry
3558 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
3559 runtime_call_Relocation::spec(), RELOC_IMM32 );
3560 %}
3561
3562
3563 // Convert a double to an int. Java semantics require we do complex
3564 // manglelations in the corner cases. So we set the rounding mode to
3565 // 'zero', store the darned double down as an int, and reset the
3566 // rounding mode to 'nearest'. The hardware throws an exception which
3567 // patches up the correct value directly to the stack.
3568 enc_class DPR2I_encoding( regDPR src ) %{
3569 // Flip to round-to-zero mode. We attempted to allow invalid-op
3570 // exceptions here, so that a NAN or other corner-case value will
3571 // thrown an exception (but normal values get converted at full speed).
3572 // However, I2C adapters and other float-stack manglers leave pending
3573 // invalid-op exceptions hanging. We would have to clear them before
3574 // enabling them and that is more expensive than just testing for the
3575 // invalid value Intel stores down in the corner cases.
3576 emit_opcode(cbuf,0xD9); // FLDCW trunc
3577 emit_opcode(cbuf,0x2D);
3578 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3579 // Allocate a word
3580 emit_opcode(cbuf,0x83); // SUB ESP,4
3581 emit_opcode(cbuf,0xEC);
3582 emit_d8(cbuf,0x04);
3583 // Encoding assumes a double has been pushed into FPR0.
3584 // Store down the double as an int, popping the FPU stack
3585 emit_opcode(cbuf,0xDB); // FISTP [ESP]
3586 emit_opcode(cbuf,0x1C);
3587 emit_d8(cbuf,0x24);
3588 // Restore the rounding mode; mask the exception
3591 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3592 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3593 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3594
3595 // Load the converted int; adjust CPU stack
3596 emit_opcode(cbuf,0x58); // POP EAX
3597 emit_opcode(cbuf,0x3D); // CMP EAX,imm
3598 emit_d32 (cbuf,0x80000000); // 0x80000000
3599 emit_opcode(cbuf,0x75); // JNE around_slow_call
3600 emit_d8 (cbuf,0x07); // Size of slow_call
3601 // Push src onto stack slow-path
3602 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3603 emit_d8 (cbuf,0xC0-1+$src$$reg );
3604 // CALL directly to the runtime
3605 cbuf.set_insts_mark();
3606 emit_opcode(cbuf,0xE8); // Call into runtime
3607 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3608 // Carry on here...
3609 %}
3610
3611 enc_class DPR2L_encoding( regDPR src ) %{
3612 emit_opcode(cbuf,0xD9); // FLDCW trunc
3613 emit_opcode(cbuf,0x2D);
3614 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3615 // Allocate a word
3616 emit_opcode(cbuf,0x83); // SUB ESP,8
3617 emit_opcode(cbuf,0xEC);
3618 emit_d8(cbuf,0x08);
3619 // Encoding assumes a double has been pushed into FPR0.
3620 // Store down the double as a long, popping the FPU stack
3621 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3622 emit_opcode(cbuf,0x3C);
3623 emit_d8(cbuf,0x24);
3624 // Restore the rounding mode; mask the exception
3625 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3626 emit_opcode(cbuf,0x2D);
3627 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3628 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3629 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3630
3631 // Load the converted int; adjust CPU stack
3633 emit_opcode(cbuf,0x5A); // POP EDX
3634 emit_opcode(cbuf,0x81); // CMP EDX,imm
3635 emit_d8 (cbuf,0xFA); // rdx
3636 emit_d32 (cbuf,0x80000000); // 0x80000000
3637 emit_opcode(cbuf,0x75); // JNE around_slow_call
3638 emit_d8 (cbuf,0x07+4); // Size of slow_call
3639 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3640 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3641 emit_opcode(cbuf,0x75); // JNE around_slow_call
3642 emit_d8 (cbuf,0x07); // Size of slow_call
3643 // Push src onto stack slow-path
3644 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3645 emit_d8 (cbuf,0xC0-1+$src$$reg );
3646 // CALL directly to the runtime
3647 cbuf.set_insts_mark();
3648 emit_opcode(cbuf,0xE8); // Call into runtime
3649 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3650 // Carry on here...
3651 %}
3652
3653 enc_class FMul_ST_reg( eRegFPR src1 ) %{
3654 // Operand was loaded from memory into fp ST (stack top)
3655 // FMUL ST,$src /* D8 C8+i */
3656 emit_opcode(cbuf, 0xD8);
3657 emit_opcode(cbuf, 0xC8 + $src1$$reg);
3658 %}
3659
3660 enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3661 // FADDP ST,src2 /* D8 C0+i */
3662 emit_opcode(cbuf, 0xD8);
3663 emit_opcode(cbuf, 0xC0 + $src2$$reg);
3664 //could use FADDP src2,fpST /* DE C0+i */
3665 %}
3666
3667 enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3668 // FADDP src2,ST /* DE C0+i */
3669 emit_opcode(cbuf, 0xDE);
3670 emit_opcode(cbuf, 0xC0 + $src2$$reg);
3671 %}
3672
3673 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3674 // Operand has been loaded into fp ST (stack top)
3675 // FSUB ST,$src1
3676 emit_opcode(cbuf, 0xD8);
3677 emit_opcode(cbuf, 0xE0 + $src1$$reg);
3678
3679 // FDIV
3680 emit_opcode(cbuf, 0xD8);
3681 emit_opcode(cbuf, 0xF0 + $src2$$reg);
3682 %}
3683
3684 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3685 // Operand was loaded from memory into fp ST (stack top)
3686 // FADD ST,$src /* D8 C0+i */
3687 emit_opcode(cbuf, 0xD8);
3688 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3689
3690 // FMUL ST,src2 /* D8 C*+i */
3691 emit_opcode(cbuf, 0xD8);
3692 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3693 %}
3694
3695
3696 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3697 // Operand was loaded from memory into fp ST (stack top)
3698 // FADD ST,$src /* D8 C0+i */
3699 emit_opcode(cbuf, 0xD8);
3700 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3701
3702 // FMULP src2,ST /* DE C8+i */
3703 emit_opcode(cbuf, 0xDE);
3704 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3705 %}
3706
3707 // Atomically load the volatile long
3708 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3709 emit_opcode(cbuf,0xDF);
3710 int rm_byte_opcode = 0x05;
3711 int base = $mem$$base;
3712 int index = $mem$$index;
3713 int scale = $mem$$scale;
3714 int displace = $mem$$disp;
3715 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
3716 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4109 operand immL_32bits() %{
4110 predicate(n->get_long() == 0xFFFFFFFFL);
4111 match(ConL);
4112 op_cost(0);
4113
4114 format %{ %}
4115 interface(CONST_INTER);
4116 %}
4117
4118 // Long Immediate: low 32-bit mask
4119 operand immL32() %{
4120 predicate(n->get_long() == (int)(n->get_long()));
4121 match(ConL);
4122 op_cost(20);
4123
4124 format %{ %}
4125 interface(CONST_INTER);
4126 %}
4127
4128 //Double Immediate zero
4129 operand immDPR0() %{
4130 // Do additional (and counter-intuitive) test against NaN to work around VC++
4131 // bug that generates code such that NaNs compare equal to 0.0
4132 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4133 match(ConD);
4134
4135 op_cost(5);
4136 format %{ %}
4137 interface(CONST_INTER);
4138 %}
4139
4140 // Double Immediate one
4141 operand immDPR1() %{
4142 predicate( UseSSE<=1 && n->getd() == 1.0 );
4143 match(ConD);
4144
4145 op_cost(5);
4146 format %{ %}
4147 interface(CONST_INTER);
4148 %}
4149
4150 // Double Immediate
4151 operand immDPR() %{
4152 predicate(UseSSE<=1);
4153 match(ConD);
4154
4155 op_cost(5);
4156 format %{ %}
4157 interface(CONST_INTER);
4158 %}
4159
4160 operand immD() %{
4161 predicate(UseSSE>=2);
4162 match(ConD);
4163
4164 op_cost(5);
4165 format %{ %}
4166 interface(CONST_INTER);
4167 %}
4168
4169 // Double Immediate zero
4170 operand immD0() %{
4171 // Do additional (and counter-intuitive) test against NaN to work around VC++
4172 // bug that generates code such that NaNs compare equal to 0.0 AND do not
4173 // compare equal to -0.0.
4174 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4175 match(ConD);
4176
4177 format %{ %}
4178 interface(CONST_INTER);
4179 %}
4180
4181 // Float Immediate zero
4182 operand immFPR0() %{
4183 predicate(UseSSE == 0 && n->getf() == 0.0F);
4184 match(ConF);
4185
4186 op_cost(5);
4187 format %{ %}
4188 interface(CONST_INTER);
4189 %}
4190
4191 // Float Immediate one
4192 operand immFPR1() %{
4193 predicate(UseSSE == 0 && n->getf() == 1.0F);
4194 match(ConF);
4195
4196 op_cost(5);
4197 format %{ %}
4198 interface(CONST_INTER);
4199 %}
4200
4201 // Float Immediate
4202 operand immFPR() %{
4203 predicate( UseSSE == 0 );
4204 match(ConF);
4205
4206 op_cost(5);
4207 format %{ %}
4208 interface(CONST_INTER);
4209 %}
4210
4211 // Float Immediate
4212 operand immF() %{
4213 predicate(UseSSE >= 1);
4214 match(ConF);
4215
4216 op_cost(5);
4217 format %{ %}
4218 interface(CONST_INTER);
4219 %}
4220
4221 // Float Immediate zero. Zero and not -0.0
4222 operand immF0() %{
4223 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4224 match(ConF);
4225
4226 op_cost(5);
4227 format %{ %}
4228 interface(CONST_INTER);
4229 %}
4230
4231 // Immediates for special shifts (sign extend)
4232
4233 // Constants for increment
4234 operand immI_16() %{
4235 predicate( n->get_int() == 16 );
4236 match(ConI);
4237
4238 format %{ %}
4239 interface(CONST_INTER);
4240 %}
4241
4242 operand immI_24() %{
4578 operand flagsReg_long_LTGE() %{
4579 constraint(ALLOC_IN_RC(int_flags));
4580 match(RegFlags);
4581 format %{ "FLAGS_LTGE" %}
4582 interface(REG_INTER);
4583 %}
4584 operand flagsReg_long_EQNE() %{
4585 constraint(ALLOC_IN_RC(int_flags));
4586 match(RegFlags);
4587 format %{ "FLAGS_EQNE" %}
4588 interface(REG_INTER);
4589 %}
4590 operand flagsReg_long_LEGT() %{
4591 constraint(ALLOC_IN_RC(int_flags));
4592 match(RegFlags);
4593 format %{ "FLAGS_LEGT" %}
4594 interface(REG_INTER);
4595 %}
4596
4597 // Float register operands
4598 operand regDPR() %{
4599 predicate( UseSSE < 2 );
4600 constraint(ALLOC_IN_RC(dbl_reg));
4601 match(RegD);
4602 match(regDPR1);
4603 match(regDPR2);
4604 format %{ %}
4605 interface(REG_INTER);
4606 %}
4607
4608 operand regDPR1(regDPR reg) %{
4609 predicate( UseSSE < 2 );
4610 constraint(ALLOC_IN_RC(dbl_reg0));
4611 match(reg);
4612 format %{ "FPR1" %}
4613 interface(REG_INTER);
4614 %}
4615
4616 operand regDPR2(regDPR reg) %{
4617 predicate( UseSSE < 2 );
4618 constraint(ALLOC_IN_RC(dbl_reg1));
4619 match(reg);
4620 format %{ "FPR2" %}
4621 interface(REG_INTER);
4622 %}
4623
4624 operand regnotDPR1(regDPR reg) %{
4625 predicate( UseSSE < 2 );
4626 constraint(ALLOC_IN_RC(dbl_notreg0));
4627 match(reg);
4628 format %{ %}
4629 interface(REG_INTER);
4630 %}
4631
4632 // XMM Double register operands
4633 operand regD() %{
4634 predicate( UseSSE>=2 );
4635 constraint(ALLOC_IN_RC(xdb_reg));
4636 match(RegD);
4637 match(regD6);
4638 match(regD7);
4639 format %{ %}
4640 interface(REG_INTER);
4641 %}
4642
4643 // XMM6 double register operands
4644 operand regD6(regD reg) %{
4645 predicate( UseSSE>=2 );
4646 constraint(ALLOC_IN_RC(xdb_reg6));
4647 match(reg);
4648 format %{ "XMM6" %}
4649 interface(REG_INTER);
4650 %}
4651
4652 // XMM7 double register operands
4653 operand regD7(regD reg) %{
4654 predicate( UseSSE>=2 );
4655 constraint(ALLOC_IN_RC(xdb_reg7));
4656 match(reg);
4657 format %{ "XMM7" %}
4658 interface(REG_INTER);
4659 %}
4660
4661 // Float register operands
4662 operand regFPR() %{
4663 predicate( UseSSE < 2 );
4664 constraint(ALLOC_IN_RC(flt_reg));
4665 match(RegF);
4666 match(regFPR1);
4667 format %{ %}
4668 interface(REG_INTER);
4669 %}
4670
4671 // Float register operands
4672 operand regFPR1(regFPR reg) %{
4673 predicate( UseSSE < 2 );
4674 constraint(ALLOC_IN_RC(flt_reg0));
4675 match(reg);
4676 format %{ "FPR1" %}
4677 interface(REG_INTER);
4678 %}
4679
4680 // XMM register operands
4681 operand regF() %{
4682 predicate( UseSSE>=1 );
4683 constraint(ALLOC_IN_RC(xmm_reg));
4684 match(RegF);
4685 format %{ %}
4686 interface(REG_INTER);
4687 %}
4688
4689
4690 //----------Memory Operands----------------------------------------------------
4691 // Direct Memory Operand
4692 operand direct(immP addr) %{
4693 match(addr);
4694
4695 format %{ "[$addr]" %}
4696 interface(MEMORY_INTER) %{
4697 base(0xFFFFFFFF);
4698 index(0x4);
4699 scale(0x0);
4700 disp($addr);
4701 %}
5405 // Conditional move reg-mem
5406 pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
5407 single_instruction;
5408 dst : S4(write);
5409 src : S3(read);
5410 cr : S3(read);
5411 DECODE : S0; // any decoder
5412 MEM : S3;
5413 %}
5414
5415 // Conditional move reg-reg long
5416 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
5417 single_instruction;
5418 dst : S4(write);
5419 src : S3(read);
5420 cr : S3(read);
5421 DECODE : S0(2); // any 2 decoders
5422 %}
5423
5424 // Conditional move double reg-reg
5425 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
5426 single_instruction;
5427 dst : S4(write);
5428 src : S3(read);
5429 cr : S3(read);
5430 DECODE : S0; // any decoder
5431 %}
5432
5433 // Float reg-reg operation
5434 pipe_class fpu_reg(regDPR dst) %{
5435 instruction_count(2);
5436 dst : S3(read);
5437 DECODE : S0(2); // any 2 decoders
5438 FPU : S3;
5439 %}
5440
5441 // Float reg-reg operation
5442 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
5443 instruction_count(2);
5444 dst : S4(write);
5445 src : S3(read);
5446 DECODE : S0(2); // any 2 decoders
5447 FPU : S3;
5448 %}
5449
5450 // Float reg-reg operation
5451 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
5452 instruction_count(3);
5453 dst : S4(write);
5454 src1 : S3(read);
5455 src2 : S3(read);
5456 DECODE : S0(3); // any 3 decoders
5457 FPU : S3(2);
5458 %}
5459
5460 // Float reg-reg operation
5461 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
5462 instruction_count(4);
5463 dst : S4(write);
5464 src1 : S3(read);
5465 src2 : S3(read);
5466 src3 : S3(read);
5467 DECODE : S0(4); // any 3 decoders
5468 FPU : S3(2);
5469 %}
5470
5471 // Float reg-reg operation
5472 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
5473 instruction_count(4);
5474 dst : S4(write);
5475 src1 : S3(read);
5476 src2 : S3(read);
5477 src3 : S3(read);
5478 DECODE : S1(3); // any 3 decoders
5479 D0 : S0; // Big decoder only
5480 FPU : S3(2);
5481 MEM : S3;
5482 %}
5483
5484 // Float reg-mem operation
5485 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
5486 instruction_count(2);
5487 dst : S5(write);
5488 mem : S3(read);
5489 D0 : S0; // big decoder only
5490 DECODE : S1; // any decoder for FPU POP
5491 FPU : S4;
5492 MEM : S3; // any mem
5493 %}
5494
5495 // Float reg-mem operation
5496 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
5497 instruction_count(3);
5498 dst : S5(write);
5499 src1 : S3(read);
5500 mem : S3(read);
5501 D0 : S0; // big decoder only
5502 DECODE : S1(2); // any decoder for FPU POP
5503 FPU : S4;
5504 MEM : S3; // any mem
5505 %}
5506
5507 // Float mem-reg operation
5508 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5509 instruction_count(2);
5510 src : S5(read);
5511 mem : S3(read);
5512 DECODE : S0; // any decoder for FPU PUSH
5513 D0 : S1; // big decoder only
5514 FPU : S4;
5515 MEM : S3; // any mem
5516 %}
5517
5518 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5519 instruction_count(3);
5520 src1 : S3(read);
5521 src2 : S3(read);
5522 mem : S3(read);
5523 DECODE : S0(2); // any decoder for FPU PUSH
5524 D0 : S1; // big decoder only
5525 FPU : S4;
5526 MEM : S3; // any mem
5527 %}
5528
5529 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5530 instruction_count(3);
5531 src1 : S3(read);
5532 src2 : S3(read);
5533 mem : S4(read);
5534 DECODE : S0; // any decoder for FPU PUSH
5535 D0 : S0(2); // big decoder only
5536 FPU : S4;
5537 MEM : S3(2); // any mem
5538 %}
5539
5540 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5541 instruction_count(2);
5542 src1 : S3(read);
5543 dst : S4(read);
5544 D0 : S0(2); // big decoder only
5545 MEM : S3(2); // any mem
5546 %}
5547
5548 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5549 instruction_count(3);
5550 src1 : S3(read);
5551 src2 : S3(read);
5552 dst : S4(read);
5553 D0 : S0(3); // big decoder only
5554 FPU : S4;
5555 MEM : S3(3); // any mem
5556 %}
5557
5558 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5559 instruction_count(3);
5560 src1 : S4(read);
5561 mem : S4(read);
5562 DECODE : S0; // any decoder for FPU PUSH
5563 D0 : S0(2); // big decoder only
5564 FPU : S4;
5565 MEM : S3(2); // any mem
5566 %}
5567
5568 // Float load constant
5569 pipe_class fpu_reg_con(regDPR dst) %{
5570 instruction_count(2);
5571 dst : S5(write);
5572 D0 : S0; // big decoder only for the load
5573 DECODE : S1; // any decoder for FPU POP
5574 FPU : S4;
5575 MEM : S3; // any mem
5576 %}
5577
5578 // Float load constant
5579 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5580 instruction_count(3);
5581 dst : S5(write);
5582 src : S3(read);
5583 D0 : S0; // big decoder only for the load
5584 DECODE : S1(2); // any decoder for FPU POP
5585 FPU : S4;
5586 MEM : S3; // any mem
5587 %}
5588
5589 // UnConditional branch
5590 pipe_class pipe_jmp( label labl ) %{
5591 single_instruction;
5592 BR : S3;
5593 %}
5594
5595 // Conditional branch
5596 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5597 single_instruction;
5598 cr : S1(read);
5599 BR : S3;
6274 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
6275 %}
6276
6277 ins_pipe(ialu_reg_long_mem);
6278 %}
6279
6280 // Volatile Load Long. Must be atomic, so do 64-bit FILD
6281 // then store it down to the stack and reload on the int
6282 // side.
6283 instruct loadL_volatile(stackSlotL dst, memory mem) %{
6284 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6285 match(Set dst (LoadL mem));
6286
6287 ins_cost(200);
6288 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
6289 "FISTp $dst" %}
6290 ins_encode(enc_loadL_volatile(mem,dst));
6291 ins_pipe( fpu_reg_mem );
6292 %}
6293
6294 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
6295 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6296 match(Set dst (LoadL mem));
6297 effect(TEMP tmp);
6298 ins_cost(180);
6299 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6300 "MOVSD $dst,$tmp" %}
6301 ins_encode %{
6302 __ movdbl($tmp$$XMMRegister, $mem$$Address);
6303 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
6304 %}
6305 ins_pipe( pipe_slow );
6306 %}
6307
6308 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
6309 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6310 match(Set dst (LoadL mem));
6311 effect(TEMP tmp);
6312 ins_cost(160);
6313 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6314 "MOVD $dst.lo,$tmp\n\t"
6315 "PSRLQ $tmp,32\n\t"
6316 "MOVD $dst.hi,$tmp" %}
6317 ins_encode %{
6318 __ movdbl($tmp$$XMMRegister, $mem$$Address);
6319 __ movdl($dst$$Register, $tmp$$XMMRegister);
6320 __ psrlq($tmp$$XMMRegister, 32);
6321 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
6322 %}
6323 ins_pipe( pipe_slow );
6324 %}
6325
6326 // Load Range
6327 instruct loadRange(eRegI dst, memory mem) %{
6328 match(Set dst (LoadRange mem));
6341
6342 ins_cost(125);
6343 format %{ "MOV $dst,$mem" %}
6344 opcode(0x8B);
6345 ins_encode( OpcP, RegMem(dst,mem));
6346 ins_pipe( ialu_reg_mem );
6347 %}
6348
6349 // Load Klass Pointer
6350 instruct loadKlass(eRegP dst, memory mem) %{
6351 match(Set dst (LoadKlass mem));
6352
6353 ins_cost(125);
6354 format %{ "MOV $dst,$mem" %}
6355 opcode(0x8B);
6356 ins_encode( OpcP, RegMem(dst,mem));
6357 ins_pipe( ialu_reg_mem );
6358 %}
6359
6360 // Load Double
6361 instruct loadDPR(regDPR dst, memory mem) %{
6362 predicate(UseSSE<=1);
6363 match(Set dst (LoadD mem));
6364
6365 ins_cost(150);
6366 format %{ "FLD_D ST,$mem\n\t"
6367 "FSTP $dst" %}
6368 opcode(0xDD); /* DD /0 */
6369 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6370 Pop_Reg_DPR(dst) );
6371 ins_pipe( fpu_reg_mem );
6372 %}
6373
6374 // Load Double to XMM
6375 instruct loadD(regD dst, memory mem) %{
6376 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6377 match(Set dst (LoadD mem));
6378 ins_cost(145);
6379 format %{ "MOVSD $dst,$mem" %}
6380 ins_encode %{
6381 __ movdbl ($dst$$XMMRegister, $mem$$Address);
6382 %}
6383 ins_pipe( pipe_slow );
6384 %}
6385
6386 instruct loadD_partial(regD dst, memory mem) %{
6387 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6388 match(Set dst (LoadD mem));
6389 ins_cost(145);
6390 format %{ "MOVLPD $dst,$mem" %}
6391 ins_encode %{
6392 __ movdbl ($dst$$XMMRegister, $mem$$Address);
6393 %}
6394 ins_pipe( pipe_slow );
6395 %}
6396
6397 // Load to XMM register (single-precision floating point)
6398 // MOVSS instruction
6399 instruct loadF(regF dst, memory mem) %{
6400 predicate(UseSSE>=1);
6401 match(Set dst (LoadF mem));
6402 ins_cost(145);
6403 format %{ "MOVSS $dst,$mem" %}
6404 ins_encode %{
6405 __ movflt ($dst$$XMMRegister, $mem$$Address);
6406 %}
6407 ins_pipe( pipe_slow );
6408 %}
6409
6410 // Load Float
6411 instruct loadFPR(regFPR dst, memory mem) %{
6412 predicate(UseSSE==0);
6413 match(Set dst (LoadF mem));
6414
6415 ins_cost(150);
6416 format %{ "FLD_S ST,$mem\n\t"
6417 "FSTP $dst" %}
6418 opcode(0xD9); /* D9 /0 */
6419 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6420 Pop_Reg_FPR(dst) );
6421 ins_pipe( fpu_reg_mem );
6422 %}
6423
6424 // Load Aligned Packed Byte to XMM register
6425 instruct loadA8B(regD dst, memory mem) %{
6426 predicate(UseSSE>=1);
6427 match(Set dst (Load8B mem));
6428 ins_cost(125);
6429 format %{ "MOVQ $dst,$mem\t! packed8B" %}
6430 ins_encode %{
6431 __ movq($dst$$XMMRegister, $mem$$Address);
6432 %}
6433 ins_pipe( pipe_slow );
6434 %}
6435
6436 // Load Aligned Packed Short to XMM register
6437 instruct loadA4S(regD dst, memory mem) %{
6438 predicate(UseSSE>=1);
6439 match(Set dst (Load4S mem));
6440 ins_cost(125);
6441 format %{ "MOVQ $dst,$mem\t! packed4S" %}
6442 ins_encode %{
6443 __ movq($dst$$XMMRegister, $mem$$Address);
6444 %}
6445 ins_pipe( pipe_slow );
6446 %}
6447
6448 // Load Aligned Packed Char to XMM register
6449 instruct loadA4C(regD dst, memory mem) %{
6450 predicate(UseSSE>=1);
6451 match(Set dst (Load4C mem));
6452 ins_cost(125);
6453 format %{ "MOVQ $dst,$mem\t! packed4C" %}
6454 ins_encode %{
6455 __ movq($dst$$XMMRegister, $mem$$Address);
6456 %}
6457 ins_pipe( pipe_slow );
6458 %}
6459
6460 // Load Aligned Packed Integer to XMM register
6461 instruct load2IU(regD dst, memory mem) %{
6462 predicate(UseSSE>=1);
6463 match(Set dst (Load2I mem));
6464 ins_cost(125);
6465 format %{ "MOVQ $dst,$mem\t! packed2I" %}
6466 ins_encode %{
6467 __ movq($dst$$XMMRegister, $mem$$Address);
6468 %}
6469 ins_pipe( pipe_slow );
6470 %}
6471
6472 // Load Aligned Packed Single to XMM
6473 instruct loadA2F(regD dst, memory mem) %{
6474 predicate(UseSSE>=1);
6475 match(Set dst (Load2F mem));
6476 ins_cost(145);
6477 format %{ "MOVQ $dst,$mem\t! packed2F" %}
6478 ins_encode %{
6479 __ movq($dst$$XMMRegister, $mem$$Address);
6480 %}
6481 ins_pipe( pipe_slow );
6482 %}
6483
6484 // Load Effective Address
6485 instruct leaP8(eRegP dst, indOffset8 mem) %{
6486 match(Set dst mem);
6487
6488 ins_cost(110);
6489 format %{ "LEA $dst,$mem" %}
6490 opcode(0x8D);
6491 ins_encode( OpcP, RegMem(dst,mem));
6492 ins_pipe( ialu_reg_reg_fat );
6493 %}
6567 effect(KILL cr);
6568 ins_cost(200);
6569 format %{ "MOV $dst.lo,$src.lo\n\t"
6570 "MOV $dst.hi,$src.hi" %}
6571 opcode(0xB8);
6572 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6573 ins_pipe( ialu_reg_long_fat );
6574 %}
6575
6576 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6577 match(Set dst src);
6578 effect(KILL cr);
6579 ins_cost(150);
6580 format %{ "XOR $dst.lo,$dst.lo\n\t"
6581 "XOR $dst.hi,$dst.hi" %}
6582 opcode(0x33,0x33);
6583 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6584 ins_pipe( ialu_reg_long );
6585 %}
6586
6587 // The instruction usage is guarded by predicate in operand immFPR().
6588 instruct loadConFPR(regFPR dst, immFPR con) %{
6589 match(Set dst con);
6590 ins_cost(125);
6591 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6592 "FSTP $dst" %}
6593 ins_encode %{
6594 __ fld_s($constantaddress($con));
6595 __ fstp_d($dst$$reg);
6596 %}
6597 ins_pipe(fpu_reg_con);
6598 %}
6599
6600 // The instruction usage is guarded by predicate in operand immFPR0().
6601 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6602 match(Set dst con);
6603 ins_cost(125);
6604 format %{ "FLDZ ST\n\t"
6605 "FSTP $dst" %}
6606 ins_encode %{
6607 __ fldz();
6608 __ fstp_d($dst$$reg);
6609 %}
6610 ins_pipe(fpu_reg_con);
6611 %}
6612
6613 // The instruction usage is guarded by predicate in operand immFPR1().
6614 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6615 match(Set dst con);
6616 ins_cost(125);
6617 format %{ "FLD1 ST\n\t"
6618 "FSTP $dst" %}
6619 ins_encode %{
6620 __ fld1();
6621 __ fstp_d($dst$$reg);
6622 %}
6623 ins_pipe(fpu_reg_con);
6624 %}
6625
6626 // The instruction usage is guarded by predicate in operand immF().
6627 instruct loadConF(regF dst, immF con) %{
6628 match(Set dst con);
6629 ins_cost(125);
6630 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6631 ins_encode %{
6632 __ movflt($dst$$XMMRegister, $constantaddress($con));
6633 %}
6634 ins_pipe(pipe_slow);
6635 %}
6636
6637 // The instruction usage is guarded by predicate in operand immF0().
6638 instruct loadConF0(regF dst, immF0 src) %{
6639 match(Set dst src);
6640 ins_cost(100);
6641 format %{ "XORPS $dst,$dst\t# float 0.0" %}
6642 ins_encode %{
6643 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6644 %}
6645 ins_pipe(pipe_slow);
6646 %}
6647
6648 // The instruction usage is guarded by predicate in operand immDPR().
6649 instruct loadConDPR(regDPR dst, immDPR con) %{
6650 match(Set dst con);
6651 ins_cost(125);
6652
6653 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6654 "FSTP $dst" %}
6655 ins_encode %{
6656 __ fld_d($constantaddress($con));
6657 __ fstp_d($dst$$reg);
6658 %}
6659 ins_pipe(fpu_reg_con);
6660 %}
6661
6662 // The instruction usage is guarded by predicate in operand immDPR0().
6663 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6664 match(Set dst con);
6665 ins_cost(125);
6666
6667 format %{ "FLDZ ST\n\t"
6668 "FSTP $dst" %}
6669 ins_encode %{
6670 __ fldz();
6671 __ fstp_d($dst$$reg);
6672 %}
6673 ins_pipe(fpu_reg_con);
6674 %}
6675
6676 // The instruction usage is guarded by predicate in operand immDPR1().
6677 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6678 match(Set dst con);
6679 ins_cost(125);
6680
6681 format %{ "FLD1 ST\n\t"
6682 "FSTP $dst" %}
6683 ins_encode %{
6684 __ fld1();
6685 __ fstp_d($dst$$reg);
6686 %}
6687 ins_pipe(fpu_reg_con);
6688 %}
6689
6690 // The instruction usage is guarded by predicate in operand immD().
6691 instruct loadConD(regD dst, immD con) %{
6692 match(Set dst con);
6693 ins_cost(125);
6694 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6695 ins_encode %{
6696 __ movdbl($dst$$XMMRegister, $constantaddress($con));
6697 %}
6698 ins_pipe(pipe_slow);
6699 %}
6700
6701 // The instruction usage is guarded by predicate in operand immD0().
6702 instruct loadConD0(regD dst, immD0 src) %{
6703 match(Set dst src);
6704 ins_cost(100);
6705 format %{ "XORPD $dst,$dst\t# double 0.0" %}
6706 ins_encode %{
6707 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6708 %}
6709 ins_pipe( pipe_slow );
6710 %}
6711
6712 // Load Stack Slot
6713 instruct loadSSI(eRegI dst, stackSlotI src) %{
6714 match(Set dst src);
6715 ins_cost(125);
6716
6717 format %{ "MOV $dst,$src" %}
6718 opcode(0x8B);
6719 ins_encode( OpcP, RegMem(dst,src));
6720 ins_pipe( ialu_reg_mem );
6721 %}
6722
6726 ins_cost(200);
6727 format %{ "MOV $dst,$src.lo\n\t"
6728 "MOV $dst+4,$src.hi" %}
6729 opcode(0x8B, 0x8B);
6730 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6731 ins_pipe( ialu_mem_long_reg );
6732 %}
6733
6734 // Load Stack Slot
6735 instruct loadSSP(eRegP dst, stackSlotP src) %{
6736 match(Set dst src);
6737 ins_cost(125);
6738
6739 format %{ "MOV $dst,$src" %}
6740 opcode(0x8B);
6741 ins_encode( OpcP, RegMem(dst,src));
6742 ins_pipe( ialu_reg_mem );
6743 %}
6744
6745 // Load Stack Slot
6746 instruct loadSSF(regFPR dst, stackSlotF src) %{
6747 match(Set dst src);
6748 ins_cost(125);
6749
6750 format %{ "FLD_S $src\n\t"
6751 "FSTP $dst" %}
6752 opcode(0xD9); /* D9 /0, FLD m32real */
6753 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6754 Pop_Reg_FPR(dst) );
6755 ins_pipe( fpu_reg_mem );
6756 %}
6757
6758 // Load Stack Slot
6759 instruct loadSSD(regDPR dst, stackSlotD src) %{
6760 match(Set dst src);
6761 ins_cost(125);
6762
6763 format %{ "FLD_D $src\n\t"
6764 "FSTP $dst" %}
6765 opcode(0xDD); /* DD /0, FLD m64real */
6766 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6767 Pop_Reg_DPR(dst) );
6768 ins_pipe( fpu_reg_mem );
6769 %}
6770
6771 // Prefetch instructions.
6772 // Must be safe to execute with invalid address (cannot fault).
6773
6774 instruct prefetchr0( memory mem ) %{
6775 predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
6776 match(PrefetchRead mem);
6777 ins_cost(0);
6778 size(0);
6779 format %{ "PREFETCHR (non-SSE is empty encoding)" %}
6780 ins_encode();
6781 ins_pipe(empty);
6782 %}
6783
6784 instruct prefetchr( memory mem ) %{
6785 predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || ReadPrefetchInstr==3);
6786 match(PrefetchRead mem);
6787 ins_cost(100);
6982 ins_pipe(ialu_mem_reg);
6983 %}
6984
6985 // Volatile Store Long. Must be atomic, so move it into
6986 // the FP TOS and then do a 64-bit FIST. Has to probe the
6987 // target address before the store (for null-ptr checks)
6988 // so the memory operand is used twice in the encoding.
6989 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6990 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6991 match(Set mem (StoreL mem src));
6992 effect( KILL cr );
6993 ins_cost(400);
6994 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6995 "FILD $src\n\t"
6996 "FISTp $mem\t # 64-bit atomic volatile long store" %}
6997 opcode(0x3B);
6998 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6999 ins_pipe( fpu_reg_mem );
7000 %}
7001
7002 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
7003 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7004 match(Set mem (StoreL mem src));
7005 effect( TEMP tmp, KILL cr );
7006 ins_cost(380);
7007 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7008 "MOVSD $tmp,$src\n\t"
7009 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7010 ins_encode %{
7011 __ cmpl(rax, $mem$$Address);
7012 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
7013 __ movdbl($mem$$Address, $tmp$$XMMRegister);
7014 %}
7015 ins_pipe( pipe_slow );
7016 %}
7017
7018 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
7019 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7020 match(Set mem (StoreL mem src));
7021 effect( TEMP tmp2 , TEMP tmp, KILL cr );
7022 ins_cost(360);
7023 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7024 "MOVD $tmp,$src.lo\n\t"
7025 "MOVD $tmp2,$src.hi\n\t"
7026 "PUNPCKLDQ $tmp,$tmp2\n\t"
7027 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7028 ins_encode %{
7029 __ cmpl(rax, $mem$$Address);
7030 __ movdl($tmp$$XMMRegister, $src$$Register);
7031 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
7032 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
7033 __ movdbl($mem$$Address, $tmp$$XMMRegister);
7034 %}
7035 ins_pipe( pipe_slow );
7036 %}
7037
7038 // Store Pointer; for storing unknown oops and raw pointers
7076
7077 ins_cost(150);
7078 format %{ "MOV $mem,$src" %}
7079 opcode(0xC7); /* C7 /0 */
7080 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
7081 ins_pipe( ialu_mem_imm );
7082 %}
7083
7084 // Store Byte Immediate
7085 instruct storeImmB(memory mem, immI8 src) %{
7086 match(Set mem (StoreB mem src));
7087
7088 ins_cost(150);
7089 format %{ "MOV8 $mem,$src" %}
7090 opcode(0xC6); /* C6 /0 */
7091 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7092 ins_pipe( ialu_mem_imm );
7093 %}
7094
7095 // Store Aligned Packed Byte XMM register to memory
7096 instruct storeA8B(memory mem, regD src) %{
7097 predicate(UseSSE>=1);
7098 match(Set mem (Store8B mem src));
7099 ins_cost(145);
7100 format %{ "MOVQ $mem,$src\t! packed8B" %}
7101 ins_encode %{
7102 __ movq($mem$$Address, $src$$XMMRegister);
7103 %}
7104 ins_pipe( pipe_slow );
7105 %}
7106
7107 // Store Aligned Packed Char/Short XMM register to memory
7108 instruct storeA4C(memory mem, regD src) %{
7109 predicate(UseSSE>=1);
7110 match(Set mem (Store4C mem src));
7111 ins_cost(145);
7112 format %{ "MOVQ $mem,$src\t! packed4C" %}
7113 ins_encode %{
7114 __ movq($mem$$Address, $src$$XMMRegister);
7115 %}
7116 ins_pipe( pipe_slow );
7117 %}
7118
7119 // Store Aligned Packed Integer XMM register to memory
7120 instruct storeA2I(memory mem, regD src) %{
7121 predicate(UseSSE>=1);
7122 match(Set mem (Store2I mem src));
7123 ins_cost(145);
7124 format %{ "MOVQ $mem,$src\t! packed2I" %}
7125 ins_encode %{
7126 __ movq($mem$$Address, $src$$XMMRegister);
7127 %}
7128 ins_pipe( pipe_slow );
7129 %}
7130
7131 // Store CMS card-mark Immediate
7132 instruct storeImmCM(memory mem, immI8 src) %{
7133 match(Set mem (StoreCM mem src));
7134
7135 ins_cost(150);
7136 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %}
7137 opcode(0xC6); /* C6 /0 */
7138 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7139 ins_pipe( ialu_mem_imm );
7140 %}
7141
7142 // Store Double
7143 instruct storeDPR( memory mem, regDPR1 src) %{
7144 predicate(UseSSE<=1);
7145 match(Set mem (StoreD mem src));
7146
7147 ins_cost(100);
7148 format %{ "FST_D $mem,$src" %}
7149 opcode(0xDD); /* DD /2 */
7150 ins_encode( enc_FPR_store(mem,src) );
7151 ins_pipe( fpu_mem_reg );
7152 %}
7153
7154 // Store double does rounding on x86
7155 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
7156 predicate(UseSSE<=1);
7157 match(Set mem (StoreD mem (RoundDouble src)));
7158
7159 ins_cost(100);
7160 format %{ "FST_D $mem,$src\t# round" %}
7161 opcode(0xDD); /* DD /2 */
7162 ins_encode( enc_FPR_store(mem,src) );
7163 ins_pipe( fpu_mem_reg );
7164 %}
7165
7166 // Store XMM register to memory (double-precision floating points)
7167 // MOVSD instruction
7168 instruct storeD(memory mem, regD src) %{
7169 predicate(UseSSE>=2);
7170 match(Set mem (StoreD mem src));
7171 ins_cost(95);
7172 format %{ "MOVSD $mem,$src" %}
7173 ins_encode %{
7174 __ movdbl($mem$$Address, $src$$XMMRegister);
7175 %}
7176 ins_pipe( pipe_slow );
7177 %}
7178
7179 // Store XMM register to memory (single-precision floating point)
7180 // MOVSS instruction
7181 instruct storeF(memory mem, regF src) %{
7182 predicate(UseSSE>=1);
7183 match(Set mem (StoreF mem src));
7184 ins_cost(95);
7185 format %{ "MOVSS $mem,$src" %}
7186 ins_encode %{
7187 __ movflt($mem$$Address, $src$$XMMRegister);
7188 %}
7189 ins_pipe( pipe_slow );
7190 %}
7191
7192 // Store Aligned Packed Single Float XMM register to memory
7193 instruct storeA2F(memory mem, regD src) %{
7194 predicate(UseSSE>=1);
7195 match(Set mem (Store2F mem src));
7196 ins_cost(145);
7197 format %{ "MOVQ $mem,$src\t! packed2F" %}
7198 ins_encode %{
7199 __ movq($mem$$Address, $src$$XMMRegister);
7200 %}
7201 ins_pipe( pipe_slow );
7202 %}
7203
7204 // Store Float
7205 instruct storeFPR( memory mem, regFPR1 src) %{
7206 predicate(UseSSE==0);
7207 match(Set mem (StoreF mem src));
7208
7209 ins_cost(100);
7210 format %{ "FST_S $mem,$src" %}
7211 opcode(0xD9); /* D9 /2 */
7212 ins_encode( enc_FPR_store(mem,src) );
7213 ins_pipe( fpu_mem_reg );
7214 %}
7215
7216 // Store Float does rounding on x86
7217 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
7218 predicate(UseSSE==0);
7219 match(Set mem (StoreF mem (RoundFloat src)));
7220
7221 ins_cost(100);
7222 format %{ "FST_S $mem,$src\t# round" %}
7223 opcode(0xD9); /* D9 /2 */
7224 ins_encode( enc_FPR_store(mem,src) );
7225 ins_pipe( fpu_mem_reg );
7226 %}
7227
7228 // Store Float does rounding on x86
7229 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
7230 predicate(UseSSE<=1);
7231 match(Set mem (StoreF mem (ConvD2F src)));
7232
7233 ins_cost(100);
7234 format %{ "FST_S $mem,$src\t# D-round" %}
7235 opcode(0xD9); /* D9 /2 */
7236 ins_encode( enc_FPR_store(mem,src) );
7237 ins_pipe( fpu_mem_reg );
7238 %}
7239
7240 // Store immediate Float value (it is faster than store from FPU register)
7241 // The instruction usage is guarded by predicate in operand immFPR().
7242 instruct storeFPR_imm( memory mem, immFPR src) %{
7243 match(Set mem (StoreF mem src));
7244
7245 ins_cost(50);
7246 format %{ "MOV $mem,$src\t# store float" %}
7247 opcode(0xC7); /* C7 /0 */
7248 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src ));
7249 ins_pipe( ialu_mem_imm );
7250 %}
7251
7252 // Store immediate Float value (it is faster than store from XMM register)
7253 // The instruction usage is guarded by predicate in operand immF().
7254 instruct storeF_imm( memory mem, immF src) %{
7255 match(Set mem (StoreF mem src));
7256
7257 ins_cost(50);
7258 format %{ "MOV $mem,$src\t# store float" %}
7259 opcode(0xC7); /* C7 /0 */
7260 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src ));
7261 ins_pipe( ialu_mem_imm );
7262 %}
7263
7264 // Store Integer to stack slot
7265 instruct storeSSI(stackSlotI dst, eRegI src) %{
7266 match(Set dst src);
7267
7268 ins_cost(100);
7269 format %{ "MOV $dst,$src" %}
7270 opcode(0x89);
7271 ins_encode( OpcPRegSS( dst, src ) );
7272 ins_pipe( ialu_mem_reg );
7273 %}
7274
7275 // Store Integer to stack slot
7276 instruct storeSSP(stackSlotP dst, eRegP src) %{
7277 match(Set dst src);
7278
7279 ins_cost(100);
7280 format %{ "MOV $dst,$src" %}
7538 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7539 // ins_cost(250);
7540 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
7541 // opcode(0x0F,0x40);
7542 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7543 // ins_pipe( pipe_cmov_mem );
7544 //%}
7545 //
7546 //// Conditional move
7547 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
7548 // predicate(VM_Version::supports_cmov() );
7549 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7550 // ins_cost(250);
7551 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
7552 // opcode(0x0F,0x40);
7553 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7554 // ins_pipe( pipe_cmov_mem );
7555 //%}
7556
7557 // Conditional move
7558 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
7559 predicate(UseSSE<=1);
7560 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7561 ins_cost(200);
7562 format %{ "FCMOV$cop $dst,$src\t# double" %}
7563 opcode(0xDA);
7564 ins_encode( enc_cmov_dpr(cop,src) );
7565 ins_pipe( pipe_cmovDPR_reg );
7566 %}
7567
7568 // Conditional move
7569 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
7570 predicate(UseSSE==0);
7571 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7572 ins_cost(200);
7573 format %{ "FCMOV$cop $dst,$src\t# float" %}
7574 opcode(0xDA);
7575 ins_encode( enc_cmov_dpr(cop,src) );
7576 ins_pipe( pipe_cmovDPR_reg );
7577 %}
7578
7579 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7580 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
7581 predicate(UseSSE<=1);
7582 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7583 ins_cost(200);
7584 format %{ "Jn$cop skip\n\t"
7585 "MOV $dst,$src\t# double\n"
7586 "skip:" %}
7587 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
7588 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
7589 ins_pipe( pipe_cmovDPR_reg );
7590 %}
7591
7592 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7593 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
7594 predicate(UseSSE==0);
7595 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7596 ins_cost(200);
7597 format %{ "Jn$cop skip\n\t"
7598 "MOV $dst,$src\t# float\n"
7599 "skip:" %}
7600 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
7601 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
7602 ins_pipe( pipe_cmovDPR_reg );
7603 %}
7604
7605 // No CMOVE with SSE/SSE2
7606 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7607 predicate (UseSSE>=1);
7608 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7609 ins_cost(200);
7610 format %{ "Jn$cop skip\n\t"
7611 "MOVSS $dst,$src\t# float\n"
7612 "skip:" %}
7613 ins_encode %{
7614 Label skip;
7615 // Invert sense of branch from sense of CMOV
7616 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7617 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7618 __ bind(skip);
7619 %}
7620 ins_pipe( pipe_slow );
7621 %}
7622
7623 // No CMOVE with SSE/SSE2
7624 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7625 predicate (UseSSE>=2);
7626 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7627 ins_cost(200);
7628 format %{ "Jn$cop skip\n\t"
7629 "MOVSD $dst,$src\t# float\n"
7630 "skip:" %}
7631 ins_encode %{
7632 Label skip;
7633 // Invert sense of branch from sense of CMOV
7634 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7635 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7636 __ bind(skip);
7637 %}
7638 ins_pipe( pipe_slow );
7639 %}
7640
7641 // unsigned version
7642 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
7643 predicate (UseSSE>=1);
7644 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7645 ins_cost(200);
7646 format %{ "Jn$cop skip\n\t"
7647 "MOVSS $dst,$src\t# float\n"
7648 "skip:" %}
7649 ins_encode %{
7650 Label skip;
7651 // Invert sense of branch from sense of CMOV
7652 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7653 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7654 __ bind(skip);
7655 %}
7656 ins_pipe( pipe_slow );
7657 %}
7658
7659 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7660 predicate (UseSSE>=1);
7661 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7662 ins_cost(200);
7663 expand %{
7664 fcmovF_regU(cop, cr, dst, src);
7665 %}
7666 %}
7667
7668 // unsigned version
7669 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7670 predicate (UseSSE>=2);
7671 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7672 ins_cost(200);
7673 format %{ "Jn$cop skip\n\t"
7674 "MOVSD $dst,$src\t# float\n"
7675 "skip:" %}
7676 ins_encode %{
7677 Label skip;
7678 // Invert sense of branch from sense of CMOV
7679 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7680 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7681 __ bind(skip);
7682 %}
7683 ins_pipe( pipe_slow );
7684 %}
7685
7686 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7687 predicate (UseSSE>=2);
7688 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7689 ins_cost(200);
7690 expand %{
7691 fcmovD_regU(cop, cr, dst, src);
7692 %}
7693 %}
7694
7695 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7696 predicate(VM_Version::supports_cmov() );
7697 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7698 ins_cost(200);
7699 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7700 "CMOV$cop $dst.hi,$src.hi" %}
7701 opcode(0x0F,0x40);
7702 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7703 ins_pipe( pipe_cmov_reg_long );
7704 %}
7705
7706 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7707 predicate(VM_Version::supports_cmov() );
7708 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7709 ins_cost(200);
7710 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7711 "CMOV$cop $dst.hi,$src.hi" %}
7901
7902 ins_cost(125);
7903 format %{ "MOV $dst,$mem\t# Load ptr. locked" %}
7904 opcode(0x8B);
7905 ins_encode( OpcP, RegMem(dst,mem));
7906 ins_pipe( ialu_reg_mem );
7907 %}
7908
7909 // LoadLong-locked - same as a volatile long load when used with compare-swap
7910 instruct loadLLocked(stackSlotL dst, memory mem) %{
7911 predicate(UseSSE<=1);
7912 match(Set dst (LoadLLocked mem));
7913
7914 ins_cost(200);
7915 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
7916 "FISTp $dst" %}
7917 ins_encode(enc_loadL_volatile(mem,dst));
7918 ins_pipe( fpu_reg_mem );
7919 %}
7920
7921 instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{
7922 predicate(UseSSE>=2);
7923 match(Set dst (LoadLLocked mem));
7924 effect(TEMP tmp);
7925 ins_cost(180);
7926 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
7927 "MOVSD $dst,$tmp" %}
7928 ins_encode %{
7929 __ movdbl($tmp$$XMMRegister, $mem$$Address);
7930 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
7931 %}
7932 ins_pipe( pipe_slow );
7933 %}
7934
7935 instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{
7936 predicate(UseSSE>=2);
7937 match(Set dst (LoadLLocked mem));
7938 effect(TEMP tmp);
7939 ins_cost(160);
7940 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
7941 "MOVD $dst.lo,$tmp\n\t"
7942 "PSRLQ $tmp,32\n\t"
7943 "MOVD $dst.hi,$tmp" %}
7944 ins_encode %{
7945 __ movdbl($tmp$$XMMRegister, $mem$$Address);
7946 __ movdl($dst$$Register, $tmp$$XMMRegister);
7947 __ psrlq($tmp$$XMMRegister, 32);
7948 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
7949 %}
7950 ins_pipe( pipe_slow );
7951 %}
7952
7953 // Conditional-store of the updated heap-top.
7954 // Used during allocation of the shared heap.
7955 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel.
9512 effect(KILL cr);
9513 ins_cost(600);
9514 size(18);
9515 format %{ "TEST $shift,32\n\t"
9516 "JEQ,s small\n\t"
9517 "MOV $dst.lo,$dst.hi\n\t"
9518 "SAR $dst.hi,31\n"
9519 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9520 "SAR $dst.hi,$shift" %}
9521 ins_encode( shift_right_arith_long( dst, shift ) );
9522 ins_pipe( pipe_slow );
9523 %}
9524
9525
9526 //----------Double Instructions------------------------------------------------
9527 // Double Math
9528
9529 // Compare & branch
9530
9531 // P6 version of float compare, sets condition codes in EFLAGS
9532 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9533 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9534 match(Set cr (CmpD src1 src2));
9535 effect(KILL rax);
9536 ins_cost(150);
9537 format %{ "FLD $src1\n\t"
9538 "FUCOMIP ST,$src2 // P6 instruction\n\t"
9539 "JNP exit\n\t"
9540 "MOV ah,1 // saw a NaN, set CF\n\t"
9541 "SAHF\n"
9542 "exit:\tNOP // avoid branch to branch" %}
9543 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9544 ins_encode( Push_Reg_DPR(src1),
9545 OpcP, RegOpc(src2),
9546 cmpF_P6_fixup );
9547 ins_pipe( pipe_slow );
9548 %}
9549
9550 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9551 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9552 match(Set cr (CmpD src1 src2));
9553 ins_cost(150);
9554 format %{ "FLD $src1\n\t"
9555 "FUCOMIP ST,$src2 // P6 instruction" %}
9556 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9557 ins_encode( Push_Reg_DPR(src1),
9558 OpcP, RegOpc(src2));
9559 ins_pipe( pipe_slow );
9560 %}
9561
9562 // Compare & branch
9563 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9564 predicate(UseSSE<=1);
9565 match(Set cr (CmpD src1 src2));
9566 effect(KILL rax);
9567 ins_cost(200);
9568 format %{ "FLD $src1\n\t"
9569 "FCOMp $src2\n\t"
9570 "FNSTSW AX\n\t"
9571 "TEST AX,0x400\n\t"
9572 "JZ,s flags\n\t"
9573 "MOV AH,1\t# unordered treat as LT\n"
9574 "flags:\tSAHF" %}
9575 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9576 ins_encode( Push_Reg_DPR(src1),
9577 OpcP, RegOpc(src2),
9578 fpu_flags);
9579 ins_pipe( pipe_slow );
9580 %}
9581
9582 // Compare vs zero into -1,0,1
9583 instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9584 predicate(UseSSE<=1);
9585 match(Set dst (CmpD3 src1 zero));
9586 effect(KILL cr, KILL rax);
9587 ins_cost(280);
9588 format %{ "FTSTD $dst,$src1" %}
9589 opcode(0xE4, 0xD9);
9590 ins_encode( Push_Reg_DPR(src1),
9591 OpcS, OpcP, PopFPU,
9592 CmpF_Result(dst));
9593 ins_pipe( pipe_slow );
9594 %}
9595
9596 // Compare into -1,0,1
9597 instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9598 predicate(UseSSE<=1);
9599 match(Set dst (CmpD3 src1 src2));
9600 effect(KILL cr, KILL rax);
9601 ins_cost(300);
9602 format %{ "FCMPD $dst,$src1,$src2" %}
9603 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9604 ins_encode( Push_Reg_DPR(src1),
9605 OpcP, RegOpc(src2),
9606 CmpF_Result(dst));
9607 ins_pipe( pipe_slow );
9608 %}
9609
9610 // float compare and set condition codes in EFLAGS by XMM regs
9611 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9612 predicate(UseSSE>=2);
9613 match(Set cr (CmpD src1 src2));
9614 ins_cost(145);
9615 format %{ "UCOMISD $src1,$src2\n\t"
9616 "JNP,s exit\n\t"
9617 "PUSHF\t# saw NaN, set CF\n\t"
9618 "AND [rsp], #0xffffff2b\n\t"
9619 "POPF\n"
9620 "exit:" %}
9621 ins_encode %{
9622 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9623 emit_cmpfp_fixup(_masm);
9624 %}
9625 ins_pipe( pipe_slow );
9626 %}
9627
9628 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9629 predicate(UseSSE>=2);
9630 match(Set cr (CmpD src1 src2));
9631 ins_cost(100);
9632 format %{ "UCOMISD $src1,$src2" %}
9633 ins_encode %{
9634 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9635 %}
9636 ins_pipe( pipe_slow );
9637 %}
9638
9639 // float compare and set condition codes in EFLAGS by XMM regs
9640 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9641 predicate(UseSSE>=2);
9642 match(Set cr (CmpD src1 (LoadD src2)));
9643 ins_cost(145);
9644 format %{ "UCOMISD $src1,$src2\n\t"
9645 "JNP,s exit\n\t"
9646 "PUSHF\t# saw NaN, set CF\n\t"
9647 "AND [rsp], #0xffffff2b\n\t"
9648 "POPF\n"
9649 "exit:" %}
9650 ins_encode %{
9651 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9652 emit_cmpfp_fixup(_masm);
9653 %}
9654 ins_pipe( pipe_slow );
9655 %}
9656
9657 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9658 predicate(UseSSE>=2);
9659 match(Set cr (CmpD src1 (LoadD src2)));
9660 ins_cost(100);
9661 format %{ "UCOMISD $src1,$src2" %}
9662 ins_encode %{
9663 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9664 %}
9665 ins_pipe( pipe_slow );
9666 %}
9667
9668 // Compare into -1,0,1 in XMM
9669 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9670 predicate(UseSSE>=2);
9671 match(Set dst (CmpD3 src1 src2));
9672 effect(KILL cr);
9673 ins_cost(255);
9674 format %{ "UCOMISD $src1, $src2\n\t"
9675 "MOV $dst, #-1\n\t"
9676 "JP,s done\n\t"
9677 "JB,s done\n\t"
9678 "SETNE $dst\n\t"
9679 "MOVZB $dst, $dst\n"
9680 "done:" %}
9681 ins_encode %{
9682 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9683 emit_cmpfp3(_masm, $dst$$Register);
9684 %}
9685 ins_pipe( pipe_slow );
9686 %}
9687
9688 // Compare into -1,0,1 in XMM and memory
9689 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9690 predicate(UseSSE>=2);
9691 match(Set dst (CmpD3 src1 (LoadD src2)));
9692 effect(KILL cr);
9693 ins_cost(275);
9694 format %{ "UCOMISD $src1, $src2\n\t"
9695 "MOV $dst, #-1\n\t"
9696 "JP,s done\n\t"
9697 "JB,s done\n\t"
9698 "SETNE $dst\n\t"
9699 "MOVZB $dst, $dst\n"
9700 "done:" %}
9701 ins_encode %{
9702 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9703 emit_cmpfp3(_masm, $dst$$Register);
9704 %}
9705 ins_pipe( pipe_slow );
9706 %}
9707
9708
9709 instruct subDPR_reg(regDPR dst, regDPR src) %{
9710 predicate (UseSSE <=1);
9711 match(Set dst (SubD dst src));
9712
9713 format %{ "FLD $src\n\t"
9714 "DSUBp $dst,ST" %}
9715 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
9716 ins_cost(150);
9717 ins_encode( Push_Reg_DPR(src),
9718 OpcP, RegOpc(dst) );
9719 ins_pipe( fpu_reg_reg );
9720 %}
9721
9722 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9723 predicate (UseSSE <=1);
9724 match(Set dst (RoundDouble (SubD src1 src2)));
9725 ins_cost(250);
9726
9727 format %{ "FLD $src2\n\t"
9728 "DSUB ST,$src1\n\t"
9729 "FSTP_D $dst\t# D-round" %}
9730 opcode(0xD8, 0x5);
9731 ins_encode( Push_Reg_DPR(src2),
9732 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9733 ins_pipe( fpu_mem_reg_reg );
9734 %}
9735
9736
9737 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9738 predicate (UseSSE <=1);
9739 match(Set dst (SubD dst (LoadD src)));
9740 ins_cost(150);
9741
9742 format %{ "FLD $src\n\t"
9743 "DSUBp $dst,ST" %}
9744 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9745 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9746 OpcP, RegOpc(dst) );
9747 ins_pipe( fpu_reg_mem );
9748 %}
9749
9750 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9751 predicate (UseSSE<=1);
9752 match(Set dst (AbsD src));
9753 ins_cost(100);
9754 format %{ "FABS" %}
9755 opcode(0xE1, 0xD9);
9756 ins_encode( OpcS, OpcP );
9757 ins_pipe( fpu_reg_reg );
9758 %}
9759
9760 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9761 predicate(UseSSE<=1);
9762 match(Set dst (NegD src));
9763 ins_cost(100);
9764 format %{ "FCHS" %}
9765 opcode(0xE0, 0xD9);
9766 ins_encode( OpcS, OpcP );
9767 ins_pipe( fpu_reg_reg );
9768 %}
9769
9770 instruct addDPR_reg(regDPR dst, regDPR src) %{
9771 predicate(UseSSE<=1);
9772 match(Set dst (AddD dst src));
9773 format %{ "FLD $src\n\t"
9774 "DADD $dst,ST" %}
9775 size(4);
9776 ins_cost(150);
9777 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9778 ins_encode( Push_Reg_DPR(src),
9779 OpcP, RegOpc(dst) );
9780 ins_pipe( fpu_reg_reg );
9781 %}
9782
9783
9784 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9785 predicate(UseSSE<=1);
9786 match(Set dst (RoundDouble (AddD src1 src2)));
9787 ins_cost(250);
9788
9789 format %{ "FLD $src2\n\t"
9790 "DADD ST,$src1\n\t"
9791 "FSTP_D $dst\t# D-round" %}
9792 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9793 ins_encode( Push_Reg_DPR(src2),
9794 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9795 ins_pipe( fpu_mem_reg_reg );
9796 %}
9797
9798
9799 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9800 predicate(UseSSE<=1);
9801 match(Set dst (AddD dst (LoadD src)));
9802 ins_cost(150);
9803
9804 format %{ "FLD $src\n\t"
9805 "DADDp $dst,ST" %}
9806 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9807 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9808 OpcP, RegOpc(dst) );
9809 ins_pipe( fpu_reg_mem );
9810 %}
9811
9812 // add-to-memory
9813 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9814 predicate(UseSSE<=1);
9815 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9816 ins_cost(150);
9817
9818 format %{ "FLD_D $dst\n\t"
9819 "DADD ST,$src\n\t"
9820 "FST_D $dst" %}
9821 opcode(0xDD, 0x0);
9822 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9823 Opcode(0xD8), RegOpc(src),
9824 set_instruction_start,
9825 Opcode(0xDD), RMopc_Mem(0x03,dst) );
9826 ins_pipe( fpu_reg_mem );
9827 %}
9828
9829 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9830 predicate(UseSSE<=1);
9831 match(Set dst (AddD dst con));
9832 ins_cost(125);
9833 format %{ "FLD1\n\t"
9834 "DADDp $dst,ST" %}
9835 ins_encode %{
9836 __ fld1();
9837 __ faddp($dst$$reg);
9838 %}
9839 ins_pipe(fpu_reg);
9840 %}
9841
9842 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9843 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9844 match(Set dst (AddD dst con));
9845 ins_cost(200);
9846 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9847 "DADDp $dst,ST" %}
9848 ins_encode %{
9849 __ fld_d($constantaddress($con));
9850 __ faddp($dst$$reg);
9851 %}
9852 ins_pipe(fpu_reg_mem);
9853 %}
9854
9855 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9856 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9857 match(Set dst (RoundDouble (AddD src con)));
9858 ins_cost(200);
9859 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9860 "DADD ST,$src\n\t"
9861 "FSTP_D $dst\t# D-round" %}
9862 ins_encode %{
9863 __ fld_d($constantaddress($con));
9864 __ fadd($src$$reg);
9865 __ fstp_d(Address(rsp, $dst$$disp));
9866 %}
9867 ins_pipe(fpu_mem_reg_con);
9868 %}
9869
9870 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9871 predicate(UseSSE<=1);
9872 match(Set dst (MulD dst src));
9873 format %{ "FLD $src\n\t"
9874 "DMULp $dst,ST" %}
9875 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9876 ins_cost(150);
9877 ins_encode( Push_Reg_DPR(src),
9878 OpcP, RegOpc(dst) );
9879 ins_pipe( fpu_reg_reg );
9880 %}
9881
9882 // Strict FP instruction biases argument before multiply then
9883 // biases result to avoid double rounding of subnormals.
9884 //
9885 // scale arg1 by multiplying arg1 by 2^(-15360)
9886 // load arg2
9887 // multiply scaled arg1 by arg2
9888 // rescale product by 2^(15360)
9889 //
9890 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9891 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9892 match(Set dst (MulD dst src));
9893 ins_cost(1); // Select this instruction for all strict FP double multiplies
9894
9895 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
9896 "DMULp $dst,ST\n\t"
9897 "FLD $src\n\t"
9898 "DMULp $dst,ST\n\t"
9899 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
9900 "DMULp $dst,ST\n\t" %}
9901 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9902 ins_encode( strictfp_bias1(dst),
9903 Push_Reg_DPR(src),
9904 OpcP, RegOpc(dst),
9905 strictfp_bias2(dst) );
9906 ins_pipe( fpu_reg_reg );
9907 %}
9908
9909 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9910 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9911 match(Set dst (MulD dst con));
9912 ins_cost(200);
9913 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9914 "DMULp $dst,ST" %}
9915 ins_encode %{
9916 __ fld_d($constantaddress($con));
9917 __ fmulp($dst$$reg);
9918 %}
9919 ins_pipe(fpu_reg_mem);
9920 %}
9921
9922
9923 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9924 predicate( UseSSE<=1 );
9925 match(Set dst (MulD dst (LoadD src)));
9926 ins_cost(200);
9927 format %{ "FLD_D $src\n\t"
9928 "DMULp $dst,ST" %}
9929 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */
9930 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9931 OpcP, RegOpc(dst) );
9932 ins_pipe( fpu_reg_mem );
9933 %}
9934
9935 //
9936 // Cisc-alternate to reg-reg multiply
9937 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9938 predicate( UseSSE<=1 );
9939 match(Set dst (MulD src (LoadD mem)));
9940 ins_cost(250);
9941 format %{ "FLD_D $mem\n\t"
9942 "DMUL ST,$src\n\t"
9943 "FSTP_D $dst" %}
9944 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
9945 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9946 OpcReg_FPR(src),
9947 Pop_Reg_DPR(dst) );
9948 ins_pipe( fpu_reg_reg_mem );
9949 %}
9950
9951
9952 // MACRO3 -- addDPR a mulDPR
9953 // This instruction is a '2-address' instruction in that the result goes
9954 // back to src2. This eliminates a move from the macro; possibly the
9955 // register allocator will have to add it back (and maybe not).
9956 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9957 predicate( UseSSE<=1 );
9958 match(Set src2 (AddD (MulD src0 src1) src2));
9959 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9960 "DMUL ST,$src1\n\t"
9961 "DADDp $src2,ST" %}
9962 ins_cost(250);
9963 opcode(0xDD); /* LoadD DD /0 */
9964 ins_encode( Push_Reg_FPR(src0),
9965 FMul_ST_reg(src1),
9966 FAddP_reg_ST(src2) );
9967 ins_pipe( fpu_reg_reg_reg );
9968 %}
9969
9970
9971 // MACRO3 -- subDPR a mulDPR
9972 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9973 predicate( UseSSE<=1 );
9974 match(Set src2 (SubD (MulD src0 src1) src2));
9975 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9976 "DMUL ST,$src1\n\t"
9977 "DSUBRp $src2,ST" %}
9978 ins_cost(250);
9979 ins_encode( Push_Reg_FPR(src0),
9980 FMul_ST_reg(src1),
9981 Opcode(0xDE), Opc_plus(0xE0,src2));
9982 ins_pipe( fpu_reg_reg_reg );
9983 %}
9984
9985
9986 instruct divDPR_reg(regDPR dst, regDPR src) %{
9987 predicate( UseSSE<=1 );
9988 match(Set dst (DivD dst src));
9989
9990 format %{ "FLD $src\n\t"
9991 "FDIVp $dst,ST" %}
9992 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9993 ins_cost(150);
9994 ins_encode( Push_Reg_DPR(src),
9995 OpcP, RegOpc(dst) );
9996 ins_pipe( fpu_reg_reg );
9997 %}
9998
9999 // Strict FP instruction biases argument before division then
10000 // biases result, to avoid double rounding of subnormals.
10001 //
10002 // scale dividend by multiplying dividend by 2^(-15360)
10003 // load divisor
10004 // divide scaled dividend by divisor
10005 // rescale quotient by 2^(15360)
10006 //
10007 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10008 predicate (UseSSE<=1);
10009 match(Set dst (DivD dst src));
10010 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10011 ins_cost(01);
10012
10013 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
10014 "DMULp $dst,ST\n\t"
10015 "FLD $src\n\t"
10016 "FDIVp $dst,ST\n\t"
10017 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
10018 "DMULp $dst,ST\n\t" %}
10019 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10020 ins_encode( strictfp_bias1(dst),
10021 Push_Reg_DPR(src),
10022 OpcP, RegOpc(dst),
10023 strictfp_bias2(dst) );
10024 ins_pipe( fpu_reg_reg );
10025 %}
10026
10027 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10028 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10029 match(Set dst (RoundDouble (DivD src1 src2)));
10030
10031 format %{ "FLD $src1\n\t"
10032 "FDIV ST,$src2\n\t"
10033 "FSTP_D $dst\t# D-round" %}
10034 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10035 ins_encode( Push_Reg_DPR(src1),
10036 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
10037 ins_pipe( fpu_mem_reg_reg );
10038 %}
10039
10040
10041 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
10042 predicate(UseSSE<=1);
10043 match(Set dst (ModD dst src));
10044 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10045
10046 format %{ "DMOD $dst,$src" %}
10047 ins_cost(250);
10048 ins_encode(Push_Reg_Mod_DPR(dst, src),
10049 emitModDPR(),
10050 Push_Result_Mod_DPR(src),
10051 Pop_Reg_DPR(dst));
10052 ins_pipe( pipe_slow );
10053 %}
10054
10055 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10056 predicate(UseSSE>=2);
10057 match(Set dst (ModD src0 src1));
10058 effect(KILL rax, KILL cr);
10059
10060 format %{ "SUB ESP,8\t # DMOD\n"
10061 "\tMOVSD [ESP+0],$src1\n"
10062 "\tFLD_D [ESP+0]\n"
10063 "\tMOVSD [ESP+0],$src0\n"
10064 "\tFLD_D [ESP+0]\n"
10065 "loop:\tFPREM\n"
10066 "\tFWAIT\n"
10067 "\tFNSTSW AX\n"
10068 "\tSAHF\n"
10069 "\tJP loop\n"
10070 "\tFSTP_D [ESP+0]\n"
10071 "\tMOVSD $dst,[ESP+0]\n"
10072 "\tADD ESP,8\n"
10073 "\tFSTP ST0\t # Restore FPU Stack"
10074 %}
10075 ins_cost(250);
10076 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10077 ins_pipe( pipe_slow );
10078 %}
10079
10080 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
10081 predicate (UseSSE<=1);
10082 match(Set dst (SinD src));
10083 ins_cost(1800);
10084 format %{ "DSIN $dst" %}
10085 opcode(0xD9, 0xFE);
10086 ins_encode( OpcP, OpcS );
10087 ins_pipe( pipe_slow );
10088 %}
10089
10090 instruct sinD_reg(regD dst, eFlagsReg cr) %{
10091 predicate (UseSSE>=2);
10092 match(Set dst (SinD dst));
10093 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10094 ins_cost(1800);
10095 format %{ "DSIN $dst" %}
10096 opcode(0xD9, 0xFE);
10097 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
10098 ins_pipe( pipe_slow );
10099 %}
10100
10101 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
10102 predicate (UseSSE<=1);
10103 match(Set dst (CosD src));
10104 ins_cost(1800);
10105 format %{ "DCOS $dst" %}
10106 opcode(0xD9, 0xFF);
10107 ins_encode( OpcP, OpcS );
10108 ins_pipe( pipe_slow );
10109 %}
10110
10111 instruct cosD_reg(regD dst, eFlagsReg cr) %{
10112 predicate (UseSSE>=2);
10113 match(Set dst (CosD dst));
10114 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10115 ins_cost(1800);
10116 format %{ "DCOS $dst" %}
10117 opcode(0xD9, 0xFF);
10118 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
10119 ins_pipe( pipe_slow );
10120 %}
10121
10122 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
10123 predicate (UseSSE<=1);
10124 match(Set dst(TanD src));
10125 format %{ "DTAN $dst" %}
10126 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
10127 Opcode(0xDD), Opcode(0xD8)); // fstp st
10128 ins_pipe( pipe_slow );
10129 %}
10130
10131 instruct tanD_reg(regD dst, eFlagsReg cr) %{
10132 predicate (UseSSE>=2);
10133 match(Set dst(TanD dst));
10134 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10135 format %{ "DTAN $dst" %}
10136 ins_encode( Push_SrcD(dst),
10137 Opcode(0xD9), Opcode(0xF2), // fptan
10138 Opcode(0xDD), Opcode(0xD8), // fstp st
10139 Push_ResultD(dst) );
10140 ins_pipe( pipe_slow );
10141 %}
10142
10143 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10144 predicate (UseSSE<=1);
10145 match(Set dst(AtanD dst src));
10146 format %{ "DATA $dst,$src" %}
10147 opcode(0xD9, 0xF3);
10148 ins_encode( Push_Reg_DPR(src),
10149 OpcP, OpcS, RegOpc(dst) );
10150 ins_pipe( pipe_slow );
10151 %}
10152
10153 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10154 predicate (UseSSE>=2);
10155 match(Set dst(AtanD dst src));
10156 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10157 format %{ "DATA $dst,$src" %}
10158 opcode(0xD9, 0xF3);
10159 ins_encode( Push_SrcD(src),
10160 OpcP, OpcS, Push_ResultD(dst) );
10161 ins_pipe( pipe_slow );
10162 %}
10163
10164 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10165 predicate (UseSSE<=1);
10166 match(Set dst (SqrtD src));
10167 format %{ "DSQRT $dst,$src" %}
10168 opcode(0xFA, 0xD9);
10169 ins_encode( Push_Reg_DPR(src),
10170 OpcS, OpcP, Pop_Reg_DPR(dst) );
10171 ins_pipe( pipe_slow );
10172 %}
10173
10174 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10175 predicate (UseSSE<=1);
10176 match(Set Y (PowD X Y)); // Raise X to the Yth power
10177 effect(KILL rax, KILL rbx, KILL rcx);
10178 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10179 "FLD_D $X\n\t"
10180 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t"
10181
10182 "FDUP \t\t\t# Q Q\n\t"
10183 "FRNDINT\t\t\t# int(Q) Q\n\t"
10184 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10185 "FISTP dword [ESP]\n\t"
10186 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10187 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10188 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10189 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10190 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10191 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10192 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10193 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10194 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10195 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10196 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10197 "MOV [ESP+0],0\n\t"
10198 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10199
10200 "ADD ESP,8"
10201 %}
10202 ins_encode( push_stack_temp_qword,
10203 Push_Reg_DPR(X),
10204 Opcode(0xD9), Opcode(0xF1), // fyl2x
10205 pow_exp_core_encoding,
10206 pop_stack_temp_qword);
10207 ins_pipe( pipe_slow );
10208 %}
10209
10210 instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10211 predicate (UseSSE>=2);
10212 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
10213 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10214 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10215 "MOVSD [ESP],$src1\n\t"
10216 "FLD FPR1,$src1\n\t"
10217 "MOVSD [ESP],$src0\n\t"
10218 "FLD FPR1,$src0\n\t"
10219 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t"
10220
10221 "FDUP \t\t\t# Q Q\n\t"
10222 "FRNDINT\t\t\t# int(Q) Q\n\t"
10223 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10224 "FISTP dword [ESP]\n\t"
10225 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10226 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10227 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10228 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10229 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10230 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10231 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10232 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10233 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10234 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10235 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10236 "MOV [ESP+0],0\n\t"
10237 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10238
10239 "FST_D [ESP]\n\t"
10240 "MOVSD $dst,[ESP]\n\t"
10241 "ADD ESP,8"
10242 %}
10243 ins_encode( push_stack_temp_qword,
10244 push_xmm_to_fpr1(src1),
10245 push_xmm_to_fpr1(src0),
10246 Opcode(0xD9), Opcode(0xF1), // fyl2x
10247 pow_exp_core_encoding,
10248 Push_ResultD(dst) );
10249 ins_pipe( pipe_slow );
10250 %}
10251
10252
10253 instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10254 predicate (UseSSE<=1);
10255 match(Set dpr1 (ExpD dpr1));
10256 effect(KILL rax, KILL rbx, KILL rcx);
10257 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding"
10258 "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10259 "FMULP \t\t\t# Q=X*log2(e)\n\t"
10260
10261 "FDUP \t\t\t# Q Q\n\t"
10262 "FRNDINT\t\t\t# int(Q) Q\n\t"
10263 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10264 "FISTP dword [ESP]\n\t"
10265 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10266 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10267 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10268 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10269 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10270 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10271 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10272 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10273 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10274 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10275 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10276 "MOV [ESP+0],0\n\t"
10277 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10278
10279 "ADD ESP,8"
10280 %}
10281 ins_encode( push_stack_temp_qword,
10282 Opcode(0xD9), Opcode(0xEA), // fldl2e
10283 Opcode(0xDE), Opcode(0xC9), // fmulp
10284 pow_exp_core_encoding,
10285 pop_stack_temp_qword);
10286 ins_pipe( pipe_slow );
10287 %}
10288
10289 instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10290 predicate (UseSSE>=2);
10291 match(Set dst (ExpD src));
10292 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10293 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t"
10294 "MOVSD [ESP],$src\n\t"
10295 "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10296 "FMULP \t\t\t# Q=X*log2(e) X\n\t"
10297
10298 "FDUP \t\t\t# Q Q\n\t"
10299 "FRNDINT\t\t\t# int(Q) Q\n\t"
10300 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10301 "FISTP dword [ESP]\n\t"
10302 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10303 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10304 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10305 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10306 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10307 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10308 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10309 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10310 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10311 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10312 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10313 "MOV [ESP+0],0\n\t"
10314 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10315
10316 "FST_D [ESP]\n\t"
10317 "MOVSD $dst,[ESP]\n\t"
10318 "ADD ESP,8"
10319 %}
10320 ins_encode( Push_SrcD(src),
10321 Opcode(0xD9), Opcode(0xEA), // fldl2e
10322 Opcode(0xDE), Opcode(0xC9), // fmulp
10323 pow_exp_core_encoding,
10324 Push_ResultD(dst) );
10325 ins_pipe( pipe_slow );
10326 %}
10327
10328
10329
10330 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
10331 predicate (UseSSE<=1);
10332 // The source Double operand on FPU stack
10333 match(Set dst (Log10D src));
10334 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10335 // fxch ; swap ST(0) with ST(1)
10336 // fyl2x ; compute log_10(2) * log_2(x)
10337 format %{ "FLDLG2 \t\t\t#Log10\n\t"
10338 "FXCH \n\t"
10339 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
10340 %}
10341 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
10342 Opcode(0xD9), Opcode(0xC9), // fxch
10343 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10344
10345 ins_pipe( pipe_slow );
10346 %}
10347
10348 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
10349 predicate (UseSSE>=2);
10350 effect(KILL cr);
10351 match(Set dst (Log10D src));
10352 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10353 // fyl2x ; compute log_10(2) * log_2(x)
10354 format %{ "FLDLG2 \t\t\t#Log10\n\t"
10355 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
10356 %}
10357 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
10358 Push_SrcD(src),
10359 Opcode(0xD9), Opcode(0xF1), // fyl2x
10360 Push_ResultD(dst));
10361
10362 ins_pipe( pipe_slow );
10363 %}
10364
10365 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
10366 predicate (UseSSE<=1);
10367 // The source Double operand on FPU stack
10368 match(Set dst (LogD src));
10369 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10370 // fxch ; swap ST(0) with ST(1)
10371 // fyl2x ; compute log_e(2) * log_2(x)
10372 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10373 "FXCH \n\t"
10374 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
10375 %}
10376 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
10377 Opcode(0xD9), Opcode(0xC9), // fxch
10378 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10379
10380 ins_pipe( pipe_slow );
10381 %}
10382
10383 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
10384 predicate (UseSSE>=2);
10385 effect(KILL cr);
10386 // The source and result Double operands in XMM registers
10387 match(Set dst (LogD src));
10388 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10389 // fyl2x ; compute log_e(2) * log_2(x)
10390 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10391 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
10392 %}
10393 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
10394 Push_SrcD(src),
10395 Opcode(0xD9), Opcode(0xF1), // fyl2x
10396 Push_ResultD(dst));
10397 ins_pipe( pipe_slow );
10398 %}
10399
10400 //-------------Float Instructions-------------------------------
10401 // Float Math
10402
10403 // Code for float compare:
10404 // fcompp();
10405 // fwait(); fnstsw_ax();
10406 // sahf();
10407 // movl(dst, unordered_result);
10408 // jcc(Assembler::parity, exit);
10409 // movl(dst, less_result);
10410 // jcc(Assembler::below, exit);
10411 // movl(dst, equal_result);
10412 // jcc(Assembler::equal, exit);
10413 // movl(dst, greater_result);
10414 // exit:
10415
10416 // P6 version of float compare, sets condition codes in EFLAGS
10417 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10418 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10419 match(Set cr (CmpF src1 src2));
10420 effect(KILL rax);
10421 ins_cost(150);
10422 format %{ "FLD $src1\n\t"
10423 "FUCOMIP ST,$src2 // P6 instruction\n\t"
10424 "JNP exit\n\t"
10425 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t"
10426 "SAHF\n"
10427 "exit:\tNOP // avoid branch to branch" %}
10428 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10429 ins_encode( Push_Reg_DPR(src1),
10430 OpcP, RegOpc(src2),
10431 cmpF_P6_fixup );
10432 ins_pipe( pipe_slow );
10433 %}
10434
10435 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10436 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10437 match(Set cr (CmpF src1 src2));
10438 ins_cost(100);
10439 format %{ "FLD $src1\n\t"
10440 "FUCOMIP ST,$src2 // P6 instruction" %}
10441 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10442 ins_encode( Push_Reg_DPR(src1),
10443 OpcP, RegOpc(src2));
10444 ins_pipe( pipe_slow );
10445 %}
10446
10447
10448 // Compare & branch
10449 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10450 predicate(UseSSE == 0);
10451 match(Set cr (CmpF src1 src2));
10452 effect(KILL rax);
10453 ins_cost(200);
10454 format %{ "FLD $src1\n\t"
10455 "FCOMp $src2\n\t"
10456 "FNSTSW AX\n\t"
10457 "TEST AX,0x400\n\t"
10458 "JZ,s flags\n\t"
10459 "MOV AH,1\t# unordered treat as LT\n"
10460 "flags:\tSAHF" %}
10461 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10462 ins_encode( Push_Reg_DPR(src1),
10463 OpcP, RegOpc(src2),
10464 fpu_flags);
10465 ins_pipe( pipe_slow );
10466 %}
10467
10468 // Compare vs zero into -1,0,1
10469 instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10470 predicate(UseSSE == 0);
10471 match(Set dst (CmpF3 src1 zero));
10472 effect(KILL cr, KILL rax);
10473 ins_cost(280);
10474 format %{ "FTSTF $dst,$src1" %}
10475 opcode(0xE4, 0xD9);
10476 ins_encode( Push_Reg_DPR(src1),
10477 OpcS, OpcP, PopFPU,
10478 CmpF_Result(dst));
10479 ins_pipe( pipe_slow );
10480 %}
10481
10482 // Compare into -1,0,1
10483 instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10484 predicate(UseSSE == 0);
10485 match(Set dst (CmpF3 src1 src2));
10486 effect(KILL cr, KILL rax);
10487 ins_cost(300);
10488 format %{ "FCMPF $dst,$src1,$src2" %}
10489 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10490 ins_encode( Push_Reg_DPR(src1),
10491 OpcP, RegOpc(src2),
10492 CmpF_Result(dst));
10493 ins_pipe( pipe_slow );
10494 %}
10495
10496 // float compare and set condition codes in EFLAGS by XMM regs
10497 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10498 predicate(UseSSE>=1);
10499 match(Set cr (CmpF src1 src2));
10500 ins_cost(145);
10501 format %{ "UCOMISS $src1,$src2\n\t"
10502 "JNP,s exit\n\t"
10503 "PUSHF\t# saw NaN, set CF\n\t"
10504 "AND [rsp], #0xffffff2b\n\t"
10505 "POPF\n"
10506 "exit:" %}
10507 ins_encode %{
10508 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10509 emit_cmpfp_fixup(_masm);
10510 %}
10511 ins_pipe( pipe_slow );
10512 %}
10513
10514 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10515 predicate(UseSSE>=1);
10516 match(Set cr (CmpF src1 src2));
10517 ins_cost(100);
10518 format %{ "UCOMISS $src1,$src2" %}
10519 ins_encode %{
10520 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10521 %}
10522 ins_pipe( pipe_slow );
10523 %}
10524
10525 // float compare and set condition codes in EFLAGS by XMM regs
10526 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10527 predicate(UseSSE>=1);
10528 match(Set cr (CmpF src1 (LoadF src2)));
10529 ins_cost(165);
10530 format %{ "UCOMISS $src1,$src2\n\t"
10531 "JNP,s exit\n\t"
10532 "PUSHF\t# saw NaN, set CF\n\t"
10533 "AND [rsp], #0xffffff2b\n\t"
10534 "POPF\n"
10535 "exit:" %}
10536 ins_encode %{
10537 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10538 emit_cmpfp_fixup(_masm);
10539 %}
10540 ins_pipe( pipe_slow );
10541 %}
10542
10543 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10544 predicate(UseSSE>=1);
10545 match(Set cr (CmpF src1 (LoadF src2)));
10546 ins_cost(100);
10547 format %{ "UCOMISS $src1,$src2" %}
10548 ins_encode %{
10549 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10550 %}
10551 ins_pipe( pipe_slow );
10552 %}
10553
10554 // Compare into -1,0,1 in XMM
10555 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10556 predicate(UseSSE>=1);
10557 match(Set dst (CmpF3 src1 src2));
10558 effect(KILL cr);
10559 ins_cost(255);
10560 format %{ "UCOMISS $src1, $src2\n\t"
10561 "MOV $dst, #-1\n\t"
10562 "JP,s done\n\t"
10563 "JB,s done\n\t"
10564 "SETNE $dst\n\t"
10565 "MOVZB $dst, $dst\n"
10566 "done:" %}
10567 ins_encode %{
10568 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10569 emit_cmpfp3(_masm, $dst$$Register);
10570 %}
10571 ins_pipe( pipe_slow );
10572 %}
10573
10574 // Compare into -1,0,1 in XMM and memory
10575 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10576 predicate(UseSSE>=1);
10577 match(Set dst (CmpF3 src1 (LoadF src2)));
10578 effect(KILL cr);
10579 ins_cost(275);
10580 format %{ "UCOMISS $src1, $src2\n\t"
10581 "MOV $dst, #-1\n\t"
10582 "JP,s done\n\t"
10583 "JB,s done\n\t"
10584 "SETNE $dst\n\t"
10585 "MOVZB $dst, $dst\n"
10586 "done:" %}
10587 ins_encode %{
10588 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10589 emit_cmpfp3(_masm, $dst$$Register);
10590 %}
10591 ins_pipe( pipe_slow );
10592 %}
10593
10594 // Spill to obtain 24-bit precision
10595 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10596 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10597 match(Set dst (SubF src1 src2));
10598
10599 format %{ "FSUB $dst,$src1 - $src2" %}
10600 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10601 ins_encode( Push_Reg_FPR(src1),
10602 OpcReg_FPR(src2),
10603 Pop_Mem_FPR(dst) );
10604 ins_pipe( fpu_mem_reg_reg );
10605 %}
10606 //
10607 // This instruction does not round to 24-bits
10608 instruct subFPR_reg(regFPR dst, regFPR src) %{
10609 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10610 match(Set dst (SubF dst src));
10611
10612 format %{ "FSUB $dst,$src" %}
10613 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10614 ins_encode( Push_Reg_FPR(src),
10615 OpcP, RegOpc(dst) );
10616 ins_pipe( fpu_reg_reg );
10617 %}
10618
10619 // Spill to obtain 24-bit precision
10620 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10621 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10622 match(Set dst (AddF src1 src2));
10623
10624 format %{ "FADD $dst,$src1,$src2" %}
10625 opcode(0xD8, 0x0); /* D8 C0+i */
10626 ins_encode( Push_Reg_FPR(src2),
10627 OpcReg_FPR(src1),
10628 Pop_Mem_FPR(dst) );
10629 ins_pipe( fpu_mem_reg_reg );
10630 %}
10631 //
10632 // This instruction does not round to 24-bits
10633 instruct addFPR_reg(regFPR dst, regFPR src) %{
10634 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10635 match(Set dst (AddF dst src));
10636
10637 format %{ "FLD $src\n\t"
10638 "FADDp $dst,ST" %}
10639 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10640 ins_encode( Push_Reg_FPR(src),
10641 OpcP, RegOpc(dst) );
10642 ins_pipe( fpu_reg_reg );
10643 %}
10644
10645 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10646 predicate(UseSSE==0);
10647 match(Set dst (AbsF src));
10648 ins_cost(100);
10649 format %{ "FABS" %}
10650 opcode(0xE1, 0xD9);
10651 ins_encode( OpcS, OpcP );
10652 ins_pipe( fpu_reg_reg );
10653 %}
10654
10655 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10656 predicate(UseSSE==0);
10657 match(Set dst (NegF src));
10658 ins_cost(100);
10659 format %{ "FCHS" %}
10660 opcode(0xE0, 0xD9);
10661 ins_encode( OpcS, OpcP );
10662 ins_pipe( fpu_reg_reg );
10663 %}
10664
10665 // Cisc-alternate to addFPR_reg
10666 // Spill to obtain 24-bit precision
10667 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10668 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10669 match(Set dst (AddF src1 (LoadF src2)));
10670
10671 format %{ "FLD $src2\n\t"
10672 "FADD ST,$src1\n\t"
10673 "FSTP_S $dst" %}
10674 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10675 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10676 OpcReg_FPR(src1),
10677 Pop_Mem_FPR(dst) );
10678 ins_pipe( fpu_mem_reg_mem );
10679 %}
10680 //
10681 // Cisc-alternate to addFPR_reg
10682 // This instruction does not round to 24-bits
10683 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10684 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10685 match(Set dst (AddF dst (LoadF src)));
10686
10687 format %{ "FADD $dst,$src" %}
10688 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */
10689 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10690 OpcP, RegOpc(dst) );
10691 ins_pipe( fpu_reg_mem );
10692 %}
10693
10694 // // Following two instructions for _222_mpegaudio
10695 // Spill to obtain 24-bit precision
10696 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10697 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10698 match(Set dst (AddF src1 src2));
10699
10700 format %{ "FADD $dst,$src1,$src2" %}
10701 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10702 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10703 OpcReg_FPR(src2),
10704 Pop_Mem_FPR(dst) );
10705 ins_pipe( fpu_mem_reg_mem );
10706 %}
10707
10708 // Cisc-spill variant
10709 // Spill to obtain 24-bit precision
10710 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10711 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10712 match(Set dst (AddF src1 (LoadF src2)));
10713
10714 format %{ "FADD $dst,$src1,$src2 cisc" %}
10715 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10716 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10717 set_instruction_start,
10718 OpcP, RMopc_Mem(secondary,src1),
10719 Pop_Mem_FPR(dst) );
10720 ins_pipe( fpu_mem_mem_mem );
10721 %}
10722
10723 // Spill to obtain 24-bit precision
10724 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10725 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10726 match(Set dst (AddF src1 src2));
10727
10728 format %{ "FADD $dst,$src1,$src2" %}
10729 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */
10730 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10731 set_instruction_start,
10732 OpcP, RMopc_Mem(secondary,src1),
10733 Pop_Mem_FPR(dst) );
10734 ins_pipe( fpu_mem_mem_mem );
10735 %}
10736
10737
10738 // Spill to obtain 24-bit precision
10739 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10740 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10741 match(Set dst (AddF src con));
10742 format %{ "FLD $src\n\t"
10743 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10744 "FSTP_S $dst" %}
10745 ins_encode %{
10746 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10747 __ fadd_s($constantaddress($con));
10748 __ fstp_s(Address(rsp, $dst$$disp));
10749 %}
10750 ins_pipe(fpu_mem_reg_con);
10751 %}
10752 //
10753 // This instruction does not round to 24-bits
10754 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10755 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10756 match(Set dst (AddF src con));
10757 format %{ "FLD $src\n\t"
10758 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10759 "FSTP $dst" %}
10760 ins_encode %{
10761 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10762 __ fadd_s($constantaddress($con));
10763 __ fstp_d($dst$$reg);
10764 %}
10765 ins_pipe(fpu_reg_reg_con);
10766 %}
10767
10768 // Spill to obtain 24-bit precision
10769 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10770 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10771 match(Set dst (MulF src1 src2));
10772
10773 format %{ "FLD $src1\n\t"
10774 "FMUL $src2\n\t"
10775 "FSTP_S $dst" %}
10776 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10777 ins_encode( Push_Reg_FPR(src1),
10778 OpcReg_FPR(src2),
10779 Pop_Mem_FPR(dst) );
10780 ins_pipe( fpu_mem_reg_reg );
10781 %}
10782 //
10783 // This instruction does not round to 24-bits
10784 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10785 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10786 match(Set dst (MulF src1 src2));
10787
10788 format %{ "FLD $src1\n\t"
10789 "FMUL $src2\n\t"
10790 "FSTP_S $dst" %}
10791 opcode(0xD8, 0x1); /* D8 C8+i */
10792 ins_encode( Push_Reg_FPR(src2),
10793 OpcReg_FPR(src1),
10794 Pop_Reg_FPR(dst) );
10795 ins_pipe( fpu_reg_reg_reg );
10796 %}
10797
10798
10799 // Spill to obtain 24-bit precision
10800 // Cisc-alternate to reg-reg multiply
10801 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10802 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10803 match(Set dst (MulF src1 (LoadF src2)));
10804
10805 format %{ "FLD_S $src2\n\t"
10806 "FMUL $src1\n\t"
10807 "FSTP_S $dst" %}
10808 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
10809 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10810 OpcReg_FPR(src1),
10811 Pop_Mem_FPR(dst) );
10812 ins_pipe( fpu_mem_reg_mem );
10813 %}
10814 //
10815 // This instruction does not round to 24-bits
10816 // Cisc-alternate to reg-reg multiply
10817 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10818 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10819 match(Set dst (MulF src1 (LoadF src2)));
10820
10821 format %{ "FMUL $dst,$src1,$src2" %}
10822 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
10823 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10824 OpcReg_FPR(src1),
10825 Pop_Reg_FPR(dst) );
10826 ins_pipe( fpu_reg_reg_mem );
10827 %}
10828
10829 // Spill to obtain 24-bit precision
10830 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10831 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10832 match(Set dst (MulF src1 src2));
10833
10834 format %{ "FMUL $dst,$src1,$src2" %}
10835 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */
10836 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10837 set_instruction_start,
10838 OpcP, RMopc_Mem(secondary,src1),
10839 Pop_Mem_FPR(dst) );
10840 ins_pipe( fpu_mem_mem_mem );
10841 %}
10842
10843 // Spill to obtain 24-bit precision
10844 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10845 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10846 match(Set dst (MulF src con));
10847
10848 format %{ "FLD $src\n\t"
10849 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10850 "FSTP_S $dst" %}
10851 ins_encode %{
10852 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10853 __ fmul_s($constantaddress($con));
10854 __ fstp_s(Address(rsp, $dst$$disp));
10855 %}
10856 ins_pipe(fpu_mem_reg_con);
10857 %}
10858 //
10859 // This instruction does not round to 24-bits
10860 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10861 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10862 match(Set dst (MulF src con));
10863
10864 format %{ "FLD $src\n\t"
10865 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10866 "FSTP $dst" %}
10867 ins_encode %{
10868 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10869 __ fmul_s($constantaddress($con));
10870 __ fstp_d($dst$$reg);
10871 %}
10872 ins_pipe(fpu_reg_reg_con);
10873 %}
10874
10875
10876 //
10877 // MACRO1 -- subsume unshared load into mulFPR
10878 // This instruction does not round to 24-bits
10879 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10880 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10881 match(Set dst (MulF (LoadF mem1) src));
10882
10883 format %{ "FLD $mem1 ===MACRO1===\n\t"
10884 "FMUL ST,$src\n\t"
10885 "FSTP $dst" %}
10886 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
10887 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10888 OpcReg_FPR(src),
10889 Pop_Reg_FPR(dst) );
10890 ins_pipe( fpu_reg_reg_mem );
10891 %}
10892 //
10893 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10894 // This instruction does not round to 24-bits
10895 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10896 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10897 match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10898 ins_cost(95);
10899
10900 format %{ "FLD $mem1 ===MACRO2===\n\t"
10901 "FMUL ST,$src1 subsume mulFPR left load\n\t"
10902 "FADD ST,$src2\n\t"
10903 "FSTP $dst" %}
10904 opcode(0xD9); /* LoadF D9 /0 */
10905 ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10906 FMul_ST_reg(src1),
10907 FAdd_ST_reg(src2),
10908 Pop_Reg_FPR(dst) );
10909 ins_pipe( fpu_reg_mem_reg_reg );
10910 %}
10911
10912 // MACRO3 -- addFPR a mulFPR
10913 // This instruction does not round to 24-bits. It is a '2-address'
10914 // instruction in that the result goes back to src2. This eliminates
10915 // a move from the macro; possibly the register allocator will have
10916 // to add it back (and maybe not).
10917 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10918 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10919 match(Set src2 (AddF (MulF src0 src1) src2));
10920
10921 format %{ "FLD $src0 ===MACRO3===\n\t"
10922 "FMUL ST,$src1\n\t"
10923 "FADDP $src2,ST" %}
10924 opcode(0xD9); /* LoadF D9 /0 */
10925 ins_encode( Push_Reg_FPR(src0),
10926 FMul_ST_reg(src1),
10927 FAddP_reg_ST(src2) );
10928 ins_pipe( fpu_reg_reg_reg );
10929 %}
10930
10931 // MACRO4 -- divFPR subFPR
10932 // This instruction does not round to 24-bits
10933 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10934 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10935 match(Set dst (DivF (SubF src2 src1) src3));
10936
10937 format %{ "FLD $src2 ===MACRO4===\n\t"
10938 "FSUB ST,$src1\n\t"
10939 "FDIV ST,$src3\n\t"
10940 "FSTP $dst" %}
10941 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10942 ins_encode( Push_Reg_FPR(src2),
10943 subFPR_divFPR_encode(src1,src3),
10944 Pop_Reg_FPR(dst) );
10945 ins_pipe( fpu_reg_reg_reg_reg );
10946 %}
10947
10948 // Spill to obtain 24-bit precision
10949 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10950 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10951 match(Set dst (DivF src1 src2));
10952
10953 format %{ "FDIV $dst,$src1,$src2" %}
10954 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10955 ins_encode( Push_Reg_FPR(src1),
10956 OpcReg_FPR(src2),
10957 Pop_Mem_FPR(dst) );
10958 ins_pipe( fpu_mem_reg_reg );
10959 %}
10960 //
10961 // This instruction does not round to 24-bits
10962 instruct divFPR_reg(regFPR dst, regFPR src) %{
10963 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10964 match(Set dst (DivF dst src));
10965
10966 format %{ "FDIV $dst,$src" %}
10967 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10968 ins_encode( Push_Reg_FPR(src),
10969 OpcP, RegOpc(dst) );
10970 ins_pipe( fpu_reg_reg );
10971 %}
10972
10973
10974 // Spill to obtain 24-bit precision
10975 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10976 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10977 match(Set dst (ModF src1 src2));
10978 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10979
10980 format %{ "FMOD $dst,$src1,$src2" %}
10981 ins_encode( Push_Reg_Mod_DPR(src1, src2),
10982 emitModDPR(),
10983 Push_Result_Mod_DPR(src2),
10984 Pop_Mem_FPR(dst));
10985 ins_pipe( pipe_slow );
10986 %}
10987 //
10988 // This instruction does not round to 24-bits
10989 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10990 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10991 match(Set dst (ModF dst src));
10992 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10993
10994 format %{ "FMOD $dst,$src" %}
10995 ins_encode(Push_Reg_Mod_DPR(dst, src),
10996 emitModDPR(),
10997 Push_Result_Mod_DPR(src),
10998 Pop_Reg_FPR(dst));
10999 ins_pipe( pipe_slow );
11000 %}
11001
11002 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
11003 predicate(UseSSE>=1);
11004 match(Set dst (ModF src0 src1));
11005 effect(KILL rax, KILL cr);
11006 format %{ "SUB ESP,4\t # FMOD\n"
11007 "\tMOVSS [ESP+0],$src1\n"
11008 "\tFLD_S [ESP+0]\n"
11009 "\tMOVSS [ESP+0],$src0\n"
11010 "\tFLD_S [ESP+0]\n"
11011 "loop:\tFPREM\n"
11012 "\tFWAIT\n"
11013 "\tFNSTSW AX\n"
11014 "\tSAHF\n"
11015 "\tJP loop\n"
11016 "\tFSTP_S [ESP+0]\n"
11017 "\tMOVSS $dst,[ESP+0]\n"
11018 "\tADD ESP,4\n"
11019 "\tFSTP ST0\t # Restore FPU Stack"
11020 %}
11021 ins_cost(250);
11022 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
11023 ins_pipe( pipe_slow );
11024 %}
11025
11026
11027 //----------Arithmetic Conversion Instructions---------------------------------
11028 // The conversions operations are all Alpha sorted. Please keep it that way!
11029
11030 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
11031 predicate(UseSSE==0);
11032 match(Set dst (RoundFloat src));
11033 ins_cost(125);
11034 format %{ "FST_S $dst,$src\t# F-round" %}
11035 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11036 ins_pipe( fpu_mem_reg );
11037 %}
11038
11039 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
11040 predicate(UseSSE<=1);
11041 match(Set dst (RoundDouble src));
11042 ins_cost(125);
11043 format %{ "FST_D $dst,$src\t# D-round" %}
11044 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11045 ins_pipe( fpu_mem_reg );
11046 %}
11047
11048 // Force rounding to 24-bit precision and 6-bit exponent
11049 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
11050 predicate(UseSSE==0);
11051 match(Set dst (ConvD2F src));
11052 format %{ "FST_S $dst,$src\t# F-round" %}
11053 expand %{
11054 roundFloat_mem_reg(dst,src);
11055 %}
11056 %}
11057
11058 // Force rounding to 24-bit precision and 6-bit exponent
11059 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
11060 predicate(UseSSE==1);
11061 match(Set dst (ConvD2F src));
11062 effect( KILL cr );
11063 format %{ "SUB ESP,4\n\t"
11064 "FST_S [ESP],$src\t# F-round\n\t"
11065 "MOVSS $dst,[ESP]\n\t"
11066 "ADD ESP,4" %}
11067 ins_encode %{
11068 __ subptr(rsp, 4);
11069 if ($src$$reg != FPR1L_enc) {
11070 __ fld_s($src$$reg-1);
11071 __ fstp_s(Address(rsp, 0));
11072 } else {
11073 __ fst_s(Address(rsp, 0));
11074 }
11075 __ movflt($dst$$XMMRegister, Address(rsp, 0));
11076 __ addptr(rsp, 4);
11077 %}
11078 ins_pipe( pipe_slow );
11079 %}
11080
11081 // Force rounding double precision to single precision
11082 instruct convD2F_reg(regF dst, regD src) %{
11083 predicate(UseSSE>=2);
11084 match(Set dst (ConvD2F src));
11085 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11086 ins_encode %{
11087 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
11088 %}
11089 ins_pipe( pipe_slow );
11090 %}
11091
11092 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
11093 predicate(UseSSE==0);
11094 match(Set dst (ConvF2D src));
11095 format %{ "FST_S $dst,$src\t# D-round" %}
11096 ins_encode( Pop_Reg_Reg_DPR(dst, src));
11097 ins_pipe( fpu_reg_reg );
11098 %}
11099
11100 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
11101 predicate(UseSSE==1);
11102 match(Set dst (ConvF2D src));
11103 format %{ "FST_D $dst,$src\t# D-round" %}
11104 expand %{
11105 roundDouble_mem_reg(dst,src);
11106 %}
11107 %}
11108
11109 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
11110 predicate(UseSSE==1);
11111 match(Set dst (ConvF2D src));
11112 effect( KILL cr );
11113 format %{ "SUB ESP,4\n\t"
11114 "MOVSS [ESP] $src\n\t"
11115 "FLD_S [ESP]\n\t"
11116 "ADD ESP,4\n\t"
11117 "FSTP $dst\t# D-round" %}
11118 ins_encode %{
11119 __ subptr(rsp, 4);
11120 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11121 __ fld_s(Address(rsp, 0));
11122 __ addptr(rsp, 4);
11123 __ fstp_d($dst$$reg);
11124 %}
11125 ins_pipe( pipe_slow );
11126 %}
11127
11128 instruct convF2D_reg(regD dst, regF src) %{
11129 predicate(UseSSE>=2);
11130 match(Set dst (ConvF2D src));
11131 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11132 ins_encode %{
11133 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
11134 %}
11135 ins_pipe( pipe_slow );
11136 %}
11137
11138 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11139 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
11140 predicate(UseSSE<=1);
11141 match(Set dst (ConvD2I src));
11142 effect( KILL tmp, KILL cr );
11143 format %{ "FLD $src\t# Convert double to int \n\t"
11144 "FLDCW trunc mode\n\t"
11145 "SUB ESP,4\n\t"
11146 "FISTp [ESP + #0]\n\t"
11147 "FLDCW std/24-bit mode\n\t"
11148 "POP EAX\n\t"
11149 "CMP EAX,0x80000000\n\t"
11150 "JNE,s fast\n\t"
11151 "FLD_D $src\n\t"
11152 "CALL d2i_wrapper\n"
11153 "fast:" %}
11154 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
11155 ins_pipe( pipe_slow );
11156 %}
11157
11158 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11159 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11160 predicate(UseSSE>=2);
11161 match(Set dst (ConvD2I src));
11162 effect( KILL tmp, KILL cr );
11163 format %{ "CVTTSD2SI $dst, $src\n\t"
11164 "CMP $dst,0x80000000\n\t"
11165 "JNE,s fast\n\t"
11166 "SUB ESP, 8\n\t"
11167 "MOVSD [ESP], $src\n\t"
11168 "FLD_D [ESP]\n\t"
11169 "ADD ESP, 8\n\t"
11170 "CALL d2i_wrapper\n"
11171 "fast:" %}
11172 ins_encode %{
11173 Label fast;
11174 __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
11175 __ cmpl($dst$$Register, 0x80000000);
11176 __ jccb(Assembler::notEqual, fast);
11177 __ subptr(rsp, 8);
11178 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11179 __ fld_d(Address(rsp, 0));
11180 __ addptr(rsp, 8);
11181 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11182 __ bind(fast);
11183 %}
11184 ins_pipe( pipe_slow );
11185 %}
11186
11187 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
11188 predicate(UseSSE<=1);
11189 match(Set dst (ConvD2L src));
11190 effect( KILL cr );
11191 format %{ "FLD $src\t# Convert double to long\n\t"
11192 "FLDCW trunc mode\n\t"
11193 "SUB ESP,8\n\t"
11194 "FISTp [ESP + #0]\n\t"
11195 "FLDCW std/24-bit mode\n\t"
11196 "POP EAX\n\t"
11197 "POP EDX\n\t"
11198 "CMP EDX,0x80000000\n\t"
11199 "JNE,s fast\n\t"
11200 "TEST EAX,EAX\n\t"
11201 "JNE,s fast\n\t"
11202 "FLD $src\n\t"
11203 "CALL d2l_wrapper\n"
11204 "fast:" %}
11205 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) );
11206 ins_pipe( pipe_slow );
11207 %}
11208
11209 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11210 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11211 predicate (UseSSE>=2);
11212 match(Set dst (ConvD2L src));
11213 effect( KILL cr );
11214 format %{ "SUB ESP,8\t# Convert double to long\n\t"
11215 "MOVSD [ESP],$src\n\t"
11216 "FLD_D [ESP]\n\t"
11217 "FLDCW trunc mode\n\t"
11218 "FISTp [ESP + #0]\n\t"
11219 "FLDCW std/24-bit mode\n\t"
11220 "POP EAX\n\t"
11221 "POP EDX\n\t"
11222 "CMP EDX,0x80000000\n\t"
11223 "JNE,s fast\n\t"
11224 "TEST EAX,EAX\n\t"
11225 "JNE,s fast\n\t"
11226 "SUB ESP,8\n\t"
11227 "MOVSD [ESP],$src\n\t"
11228 "FLD_D [ESP]\n\t"
11229 "ADD ESP,8\n\t"
11230 "CALL d2l_wrapper\n"
11248 __ cmpl(rdx, 0x80000000);
11249 __ jccb(Assembler::notEqual, fast);
11250 __ testl(rax, rax);
11251 __ jccb(Assembler::notEqual, fast);
11252 __ subptr(rsp, 8);
11253 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11254 __ fld_d(Address(rsp, 0));
11255 __ addptr(rsp, 8);
11256 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11257 __ bind(fast);
11258 %}
11259 ins_pipe( pipe_slow );
11260 %}
11261
11262 // Convert a double to an int. Java semantics require we do complex
11263 // manglations in the corner cases. So we set the rounding mode to
11264 // 'zero', store the darned double down as an int, and reset the
11265 // rounding mode to 'nearest'. The hardware stores a flag value down
11266 // if we would overflow or converted a NAN; we check for this and
11267 // and go the slow path if needed.
11268 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
11269 predicate(UseSSE==0);
11270 match(Set dst (ConvF2I src));
11271 effect( KILL tmp, KILL cr );
11272 format %{ "FLD $src\t# Convert float to int \n\t"
11273 "FLDCW trunc mode\n\t"
11274 "SUB ESP,4\n\t"
11275 "FISTp [ESP + #0]\n\t"
11276 "FLDCW std/24-bit mode\n\t"
11277 "POP EAX\n\t"
11278 "CMP EAX,0x80000000\n\t"
11279 "JNE,s fast\n\t"
11280 "FLD $src\n\t"
11281 "CALL d2i_wrapper\n"
11282 "fast:" %}
11283 // DPR2I_encoding works for FPR2I
11284 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
11285 ins_pipe( pipe_slow );
11286 %}
11287
11288 // Convert a float in xmm to an int reg.
11289 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11290 predicate(UseSSE>=1);
11291 match(Set dst (ConvF2I src));
11292 effect( KILL tmp, KILL cr );
11293 format %{ "CVTTSS2SI $dst, $src\n\t"
11294 "CMP $dst,0x80000000\n\t"
11295 "JNE,s fast\n\t"
11296 "SUB ESP, 4\n\t"
11297 "MOVSS [ESP], $src\n\t"
11298 "FLD [ESP]\n\t"
11299 "ADD ESP, 4\n\t"
11300 "CALL d2i_wrapper\n"
11301 "fast:" %}
11302 ins_encode %{
11303 Label fast;
11304 __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11305 __ cmpl($dst$$Register, 0x80000000);
11306 __ jccb(Assembler::notEqual, fast);
11307 __ subptr(rsp, 4);
11308 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11309 __ fld_s(Address(rsp, 0));
11310 __ addptr(rsp, 4);
11311 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11312 __ bind(fast);
11313 %}
11314 ins_pipe( pipe_slow );
11315 %}
11316
11317 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11318 predicate(UseSSE==0);
11319 match(Set dst (ConvF2L src));
11320 effect( KILL cr );
11321 format %{ "FLD $src\t# Convert float to long\n\t"
11322 "FLDCW trunc mode\n\t"
11323 "SUB ESP,8\n\t"
11324 "FISTp [ESP + #0]\n\t"
11325 "FLDCW std/24-bit mode\n\t"
11326 "POP EAX\n\t"
11327 "POP EDX\n\t"
11328 "CMP EDX,0x80000000\n\t"
11329 "JNE,s fast\n\t"
11330 "TEST EAX,EAX\n\t"
11331 "JNE,s fast\n\t"
11332 "FLD $src\n\t"
11333 "CALL d2l_wrapper\n"
11334 "fast:" %}
11335 // DPR2L_encoding works for FPR2L
11336 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11337 ins_pipe( pipe_slow );
11338 %}
11339
11340 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11341 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11342 predicate (UseSSE>=1);
11343 match(Set dst (ConvF2L src));
11344 effect( KILL cr );
11345 format %{ "SUB ESP,8\t# Convert float to long\n\t"
11346 "MOVSS [ESP],$src\n\t"
11347 "FLD_S [ESP]\n\t"
11348 "FLDCW trunc mode\n\t"
11349 "FISTp [ESP + #0]\n\t"
11350 "FLDCW std/24-bit mode\n\t"
11351 "POP EAX\n\t"
11352 "POP EDX\n\t"
11353 "CMP EDX,0x80000000\n\t"
11354 "JNE,s fast\n\t"
11355 "TEST EAX,EAX\n\t"
11356 "JNE,s fast\n\t"
11357 "SUB ESP,4\t# Convert float to long\n\t"
11358 "MOVSS [ESP],$src\n\t"
11359 "FLD_S [ESP]\n\t"
11360 "ADD ESP,4\n\t"
11361 "CALL d2l_wrapper\n"
11373 } else {
11374 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11375 }
11376 // Load the converted long, adjust CPU stack
11377 __ pop(rax);
11378 __ pop(rdx);
11379 __ cmpl(rdx, 0x80000000);
11380 __ jccb(Assembler::notEqual, fast);
11381 __ testl(rax, rax);
11382 __ jccb(Assembler::notEqual, fast);
11383 __ subptr(rsp, 4);
11384 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11385 __ fld_s(Address(rsp, 0));
11386 __ addptr(rsp, 4);
11387 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11388 __ bind(fast);
11389 %}
11390 ins_pipe( pipe_slow );
11391 %}
11392
11393 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11394 predicate( UseSSE<=1 );
11395 match(Set dst (ConvI2D src));
11396 format %{ "FILD $src\n\t"
11397 "FSTP $dst" %}
11398 opcode(0xDB, 0x0); /* DB /0 */
11399 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11400 ins_pipe( fpu_reg_mem );
11401 %}
11402
11403 instruct convI2D_reg(regD dst, eRegI src) %{
11404 predicate( UseSSE>=2 && !UseXmmI2D );
11405 match(Set dst (ConvI2D src));
11406 format %{ "CVTSI2SD $dst,$src" %}
11407 ins_encode %{
11408 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11409 %}
11410 ins_pipe( pipe_slow );
11411 %}
11412
11413 instruct convI2D_mem(regD dst, memory mem) %{
11414 predicate( UseSSE>=2 );
11415 match(Set dst (ConvI2D (LoadI mem)));
11416 format %{ "CVTSI2SD $dst,$mem" %}
11417 ins_encode %{
11418 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11419 %}
11420 ins_pipe( pipe_slow );
11421 %}
11422
11423 instruct convXI2D_reg(regD dst, eRegI src)
11424 %{
11425 predicate( UseSSE>=2 && UseXmmI2D );
11426 match(Set dst (ConvI2D src));
11427
11428 format %{ "MOVD $dst,$src\n\t"
11429 "CVTDQ2PD $dst,$dst\t# i2d" %}
11430 ins_encode %{
11431 __ movdl($dst$$XMMRegister, $src$$Register);
11432 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11433 %}
11434 ins_pipe(pipe_slow); // XXX
11435 %}
11436
11437 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11438 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11439 match(Set dst (ConvI2D (LoadI mem)));
11440 format %{ "FILD $mem\n\t"
11441 "FSTP $dst" %}
11442 opcode(0xDB); /* DB /0 */
11443 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11444 Pop_Reg_DPR(dst));
11445 ins_pipe( fpu_reg_mem );
11446 %}
11447
11448 // Convert a byte to a float; no rounding step needed.
11449 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11450 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11451 match(Set dst (ConvI2F src));
11452 format %{ "FILD $src\n\t"
11453 "FSTP $dst" %}
11454
11455 opcode(0xDB, 0x0); /* DB /0 */
11456 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11457 ins_pipe( fpu_reg_mem );
11458 %}
11459
11460 // In 24-bit mode, force exponent rounding by storing back out
11461 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11462 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11463 match(Set dst (ConvI2F src));
11464 ins_cost(200);
11465 format %{ "FILD $src\n\t"
11466 "FSTP_S $dst" %}
11467 opcode(0xDB, 0x0); /* DB /0 */
11468 ins_encode( Push_Mem_I(src),
11469 Pop_Mem_FPR(dst));
11470 ins_pipe( fpu_mem_mem );
11471 %}
11472
11473 // In 24-bit mode, force exponent rounding by storing back out
11474 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11475 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11476 match(Set dst (ConvI2F (LoadI mem)));
11477 ins_cost(200);
11478 format %{ "FILD $mem\n\t"
11479 "FSTP_S $dst" %}
11480 opcode(0xDB); /* DB /0 */
11481 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11482 Pop_Mem_FPR(dst));
11483 ins_pipe( fpu_mem_mem );
11484 %}
11485
11486 // This instruction does not round to 24-bits
11487 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11488 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11489 match(Set dst (ConvI2F src));
11490 format %{ "FILD $src\n\t"
11491 "FSTP $dst" %}
11492 opcode(0xDB, 0x0); /* DB /0 */
11493 ins_encode( Push_Mem_I(src),
11494 Pop_Reg_FPR(dst));
11495 ins_pipe( fpu_reg_mem );
11496 %}
11497
11498 // This instruction does not round to 24-bits
11499 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11500 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11501 match(Set dst (ConvI2F (LoadI mem)));
11502 format %{ "FILD $mem\n\t"
11503 "FSTP $dst" %}
11504 opcode(0xDB); /* DB /0 */
11505 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11506 Pop_Reg_FPR(dst));
11507 ins_pipe( fpu_reg_mem );
11508 %}
11509
11510 // Convert an int to a float in xmm; no rounding step needed.
11511 instruct convI2F_reg(regF dst, eRegI src) %{
11512 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11513 match(Set dst (ConvI2F src));
11514 format %{ "CVTSI2SS $dst, $src" %}
11515 ins_encode %{
11516 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11517 %}
11518 ins_pipe( pipe_slow );
11519 %}
11520
11521 instruct convXI2F_reg(regF dst, eRegI src)
11522 %{
11523 predicate( UseSSE>=2 && UseXmmI2F );
11524 match(Set dst (ConvI2F src));
11525
11526 format %{ "MOVD $dst,$src\n\t"
11527 "CVTDQ2PS $dst,$dst\t# i2f" %}
11528 ins_encode %{
11529 __ movdl($dst$$XMMRegister, $src$$Register);
11530 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11531 %}
11532 ins_pipe(pipe_slow); // XXX
11533 %}
11534
11535 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
11536 match(Set dst (ConvI2L src));
11537 effect(KILL cr);
11538 ins_cost(375);
11539 format %{ "MOV $dst.lo,$src\n\t"
11540 "MOV $dst.hi,$src\n\t"
11541 "SAR $dst.hi,31" %}
11550 ins_cost(250);
11551 format %{ "MOV $dst.lo,$src\n\t"
11552 "XOR $dst.hi,$dst.hi" %}
11553 opcode(0x33); // XOR
11554 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11555 ins_pipe( ialu_reg_reg_long );
11556 %}
11557
11558 // Zero-extend long
11559 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11560 match(Set dst (AndL src mask) );
11561 effect( KILL flags );
11562 ins_cost(250);
11563 format %{ "MOV $dst.lo,$src.lo\n\t"
11564 "XOR $dst.hi,$dst.hi\n\t" %}
11565 opcode(0x33); // XOR
11566 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11567 ins_pipe( ialu_reg_reg_long );
11568 %}
11569
11570 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11571 predicate (UseSSE<=1);
11572 match(Set dst (ConvL2D src));
11573 effect( KILL cr );
11574 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11575 "PUSH $src.lo\n\t"
11576 "FILD ST,[ESP + #0]\n\t"
11577 "ADD ESP,8\n\t"
11578 "FSTP_D $dst\t# D-round" %}
11579 opcode(0xDF, 0x5); /* DF /5 */
11580 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11581 ins_pipe( pipe_slow );
11582 %}
11583
11584 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11585 predicate (UseSSE>=2);
11586 match(Set dst (ConvL2D src));
11587 effect( KILL cr );
11588 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11589 "PUSH $src.lo\n\t"
11590 "FILD_D [ESP]\n\t"
11591 "FSTP_D [ESP]\n\t"
11592 "MOVSD $dst,[ESP]\n\t"
11593 "ADD ESP,8" %}
11594 opcode(0xDF, 0x5); /* DF /5 */
11595 ins_encode(convert_long_double2(src), Push_ResultD(dst));
11596 ins_pipe( pipe_slow );
11597 %}
11598
11599 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11600 predicate (UseSSE>=1);
11601 match(Set dst (ConvL2F src));
11602 effect( KILL cr );
11603 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11604 "PUSH $src.lo\n\t"
11605 "FILD_D [ESP]\n\t"
11606 "FSTP_S [ESP]\n\t"
11607 "MOVSS $dst,[ESP]\n\t"
11608 "ADD ESP,8" %}
11609 opcode(0xDF, 0x5); /* DF /5 */
11610 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11611 ins_pipe( pipe_slow );
11612 %}
11613
11614 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11615 match(Set dst (ConvL2F src));
11616 effect( KILL cr );
11617 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11618 "PUSH $src.lo\n\t"
11619 "FILD ST,[ESP + #0]\n\t"
11620 "ADD ESP,8\n\t"
11621 "FSTP_S $dst\t# F-round" %}
11622 opcode(0xDF, 0x5); /* DF /5 */
11623 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11624 ins_pipe( pipe_slow );
11625 %}
11626
11627 instruct convL2I_reg( eRegI dst, eRegL src ) %{
11628 match(Set dst (ConvL2I src));
11629 effect( DEF dst, USE src );
11630 format %{ "MOV $dst,$src.lo" %}
11631 ins_encode(enc_CopyL_Lo(dst,src));
11632 ins_pipe( ialu_reg_reg );
11633 %}
11634
11635
11636 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
11637 match(Set dst (MoveF2I src));
11638 effect( DEF dst, USE src );
11639 ins_cost(100);
11640 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
11641 ins_encode %{
11642 __ movl($dst$$Register, Address(rsp, $src$$disp));
11643 %}
11644 ins_pipe( ialu_reg_mem );
11645 %}
11646
11647 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11648 predicate(UseSSE==0);
11649 match(Set dst (MoveF2I src));
11650 effect( DEF dst, USE src );
11651
11652 ins_cost(125);
11653 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
11654 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11655 ins_pipe( fpu_mem_reg );
11656 %}
11657
11658 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11659 predicate(UseSSE>=1);
11660 match(Set dst (MoveF2I src));
11661 effect( DEF dst, USE src );
11662
11663 ins_cost(95);
11664 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
11665 ins_encode %{
11666 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11667 %}
11668 ins_pipe( pipe_slow );
11669 %}
11670
11671 instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{
11672 predicate(UseSSE>=2);
11673 match(Set dst (MoveF2I src));
11674 effect( DEF dst, USE src );
11675 ins_cost(85);
11676 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
11677 ins_encode %{
11678 __ movdl($dst$$Register, $src$$XMMRegister);
11679 %}
11680 ins_pipe( pipe_slow );
11681 %}
11682
11683 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
11684 match(Set dst (MoveI2F src));
11685 effect( DEF dst, USE src );
11686
11687 ins_cost(100);
11688 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
11689 ins_encode %{
11690 __ movl(Address(rsp, $dst$$disp), $src$$Register);
11691 %}
11692 ins_pipe( ialu_mem_reg );
11693 %}
11694
11695
11696 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11697 predicate(UseSSE==0);
11698 match(Set dst (MoveI2F src));
11699 effect(DEF dst, USE src);
11700
11701 ins_cost(125);
11702 format %{ "FLD_S $src\n\t"
11703 "FSTP $dst\t# MoveI2F_stack_reg" %}
11704 opcode(0xD9); /* D9 /0, FLD m32real */
11705 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11706 Pop_Reg_FPR(dst) );
11707 ins_pipe( fpu_reg_mem );
11708 %}
11709
11710 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11711 predicate(UseSSE>=1);
11712 match(Set dst (MoveI2F src));
11713 effect( DEF dst, USE src );
11714
11715 ins_cost(95);
11716 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
11717 ins_encode %{
11718 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11719 %}
11720 ins_pipe( pipe_slow );
11721 %}
11722
11723 instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{
11724 predicate(UseSSE>=2);
11725 match(Set dst (MoveI2F src));
11726 effect( DEF dst, USE src );
11727
11728 ins_cost(85);
11729 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
11730 ins_encode %{
11731 __ movdl($dst$$XMMRegister, $src$$Register);
11732 %}
11733 ins_pipe( pipe_slow );
11734 %}
11735
11736 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11737 match(Set dst (MoveD2L src));
11738 effect(DEF dst, USE src);
11739
11740 ins_cost(250);
11741 format %{ "MOV $dst.lo,$src\n\t"
11742 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11743 opcode(0x8B, 0x8B);
11744 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11745 ins_pipe( ialu_mem_long_reg );
11746 %}
11747
11748 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11749 predicate(UseSSE<=1);
11750 match(Set dst (MoveD2L src));
11751 effect(DEF dst, USE src);
11752
11753 ins_cost(125);
11754 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
11755 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11756 ins_pipe( fpu_mem_reg );
11757 %}
11758
11759 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11760 predicate(UseSSE>=2);
11761 match(Set dst (MoveD2L src));
11762 effect(DEF dst, USE src);
11763 ins_cost(95);
11764 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
11765 ins_encode %{
11766 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11767 %}
11768 ins_pipe( pipe_slow );
11769 %}
11770
11771 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11772 predicate(UseSSE>=2);
11773 match(Set dst (MoveD2L src));
11774 effect(DEF dst, USE src, TEMP tmp);
11775 ins_cost(85);
11776 format %{ "MOVD $dst.lo,$src\n\t"
11777 "PSHUFLW $tmp,$src,0x4E\n\t"
11778 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11779 ins_encode %{
11780 __ movdl($dst$$Register, $src$$XMMRegister);
11781 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11782 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11783 %}
11784 ins_pipe( pipe_slow );
11785 %}
11786
11787 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11788 match(Set dst (MoveL2D src));
11789 effect(DEF dst, USE src);
11790
11791 ins_cost(200);
11792 format %{ "MOV $dst,$src.lo\n\t"
11793 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11794 opcode(0x89, 0x89);
11795 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11796 ins_pipe( ialu_mem_long_reg );
11797 %}
11798
11799
11800 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11801 predicate(UseSSE<=1);
11802 match(Set dst (MoveL2D src));
11803 effect(DEF dst, USE src);
11804 ins_cost(125);
11805
11806 format %{ "FLD_D $src\n\t"
11807 "FSTP $dst\t# MoveL2D_stack_reg" %}
11808 opcode(0xDD); /* DD /0, FLD m64real */
11809 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11810 Pop_Reg_DPR(dst) );
11811 ins_pipe( fpu_reg_mem );
11812 %}
11813
11814
11815 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11816 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11817 match(Set dst (MoveL2D src));
11818 effect(DEF dst, USE src);
11819
11820 ins_cost(95);
11821 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11822 ins_encode %{
11823 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11824 %}
11825 ins_pipe( pipe_slow );
11826 %}
11827
11828 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11829 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11830 match(Set dst (MoveL2D src));
11831 effect(DEF dst, USE src);
11832
11833 ins_cost(95);
11834 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11835 ins_encode %{
11836 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11837 %}
11838 ins_pipe( pipe_slow );
11839 %}
11840
11841 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11842 predicate(UseSSE>=2);
11843 match(Set dst (MoveL2D src));
11844 effect(TEMP dst, USE src, TEMP tmp);
11845 ins_cost(85);
11846 format %{ "MOVD $dst,$src.lo\n\t"
11847 "MOVD $tmp,$src.hi\n\t"
11848 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11849 ins_encode %{
11850 __ movdl($dst$$XMMRegister, $src$$Register);
11851 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11852 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11853 %}
11854 ins_pipe( pipe_slow );
11855 %}
11856
11857 // Replicate scalar to packed byte (1 byte) values in xmm
11858 instruct Repl8B_reg(regD dst, regD src) %{
11859 predicate(UseSSE>=2);
11860 match(Set dst (Replicate8B src));
11861 format %{ "MOVDQA $dst,$src\n\t"
11862 "PUNPCKLBW $dst,$dst\n\t"
11863 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11864 ins_encode %{
11865 if ($dst$$reg != $src$$reg) {
11866 __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
11867 }
11868 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
11869 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11870 %}
11871 ins_pipe( pipe_slow );
11872 %}
11873
11874 // Replicate scalar to packed byte (1 byte) values in xmm
11875 instruct Repl8B_eRegI(regD dst, eRegI src) %{
11876 predicate(UseSSE>=2);
11877 match(Set dst (Replicate8B src));
11878 format %{ "MOVD $dst,$src\n\t"
11879 "PUNPCKLBW $dst,$dst\n\t"
11880 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11881 ins_encode %{
11882 __ movdl($dst$$XMMRegister, $src$$Register);
11883 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
11884 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11885 %}
11886 ins_pipe( pipe_slow );
11887 %}
11888
11889 // Replicate scalar zero to packed byte (1 byte) values in xmm
11890 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11891 predicate(UseSSE>=2);
11892 match(Set dst (Replicate8B zero));
11893 format %{ "PXOR $dst,$dst\t! replicate8B" %}
11894 ins_encode %{
11895 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11896 %}
11897 ins_pipe( fpu_reg_reg );
11898 %}
11899
11900 // Replicate scalar to packed shore (2 byte) values in xmm
11901 instruct Repl4S_reg(regD dst, regD src) %{
11902 predicate(UseSSE>=2);
11903 match(Set dst (Replicate4S src));
11904 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11905 ins_encode %{
11906 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
11907 %}
11908 ins_pipe( fpu_reg_reg );
11909 %}
11910
11911 // Replicate scalar to packed shore (2 byte) values in xmm
11912 instruct Repl4S_eRegI(regD dst, eRegI src) %{
11913 predicate(UseSSE>=2);
11914 match(Set dst (Replicate4S src));
11915 format %{ "MOVD $dst,$src\n\t"
11916 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11917 ins_encode %{
11918 __ movdl($dst$$XMMRegister, $src$$Register);
11919 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11920 %}
11921 ins_pipe( fpu_reg_reg );
11922 %}
11923
11924 // Replicate scalar zero to packed short (2 byte) values in xmm
11925 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11926 predicate(UseSSE>=2);
11927 match(Set dst (Replicate4S zero));
11928 format %{ "PXOR $dst,$dst\t! replicate4S" %}
11929 ins_encode %{
11930 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11931 %}
11932 ins_pipe( fpu_reg_reg );
11933 %}
11934
11935 // Replicate scalar to packed char (2 byte) values in xmm
11936 instruct Repl4C_reg(regD dst, regD src) %{
11937 predicate(UseSSE>=2);
11938 match(Set dst (Replicate4C src));
11939 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11940 ins_encode %{
11941 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
11942 %}
11943 ins_pipe( fpu_reg_reg );
11944 %}
11945
11946 // Replicate scalar to packed char (2 byte) values in xmm
11947 instruct Repl4C_eRegI(regD dst, eRegI src) %{
11948 predicate(UseSSE>=2);
11949 match(Set dst (Replicate4C src));
11950 format %{ "MOVD $dst,$src\n\t"
11951 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11952 ins_encode %{
11953 __ movdl($dst$$XMMRegister, $src$$Register);
11954 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11955 %}
11956 ins_pipe( fpu_reg_reg );
11957 %}
11958
11959 // Replicate scalar zero to packed char (2 byte) values in xmm
11960 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11961 predicate(UseSSE>=2);
11962 match(Set dst (Replicate4C zero));
11963 format %{ "PXOR $dst,$dst\t! replicate4C" %}
11964 ins_encode %{
11965 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11966 %}
11967 ins_pipe( fpu_reg_reg );
11968 %}
11969
11970 // Replicate scalar to packed integer (4 byte) values in xmm
11971 instruct Repl2I_reg(regD dst, regD src) %{
11972 predicate(UseSSE>=2);
11973 match(Set dst (Replicate2I src));
11974 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11975 ins_encode %{
11976 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
11977 %}
11978 ins_pipe( fpu_reg_reg );
11979 %}
11980
11981 // Replicate scalar to packed integer (4 byte) values in xmm
11982 instruct Repl2I_eRegI(regD dst, eRegI src) %{
11983 predicate(UseSSE>=2);
11984 match(Set dst (Replicate2I src));
11985 format %{ "MOVD $dst,$src\n\t"
11986 "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11987 ins_encode %{
11988 __ movdl($dst$$XMMRegister, $src$$Register);
11989 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11990 %}
11991 ins_pipe( fpu_reg_reg );
11992 %}
11993
11994 // Replicate scalar zero to packed integer (2 byte) values in xmm
11995 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11996 predicate(UseSSE>=2);
11997 match(Set dst (Replicate2I zero));
11998 format %{ "PXOR $dst,$dst\t! replicate2I" %}
11999 ins_encode %{
12000 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12001 %}
12002 ins_pipe( fpu_reg_reg );
12003 %}
12004
12005 // Replicate scalar to packed single precision floating point values in xmm
12006 instruct Repl2F_reg(regD dst, regD src) %{
12007 predicate(UseSSE>=2);
12008 match(Set dst (Replicate2F src));
12009 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12010 ins_encode %{
12011 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
12012 %}
12013 ins_pipe( fpu_reg_reg );
12014 %}
12015
12016 // Replicate scalar to packed single precision floating point values in xmm
12017 instruct Repl2F_regF(regD dst, regF src) %{
12018 predicate(UseSSE>=2);
12019 match(Set dst (Replicate2F src));
12020 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12021 ins_encode %{
12022 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
12023 %}
12024 ins_pipe( fpu_reg_reg );
12025 %}
12026
12027 // Replicate scalar to packed single precision floating point values in xmm
12028 instruct Repl2F_immF0(regD dst, immF0 zero) %{
12029 predicate(UseSSE>=2);
12030 match(Set dst (Replicate2F zero));
12031 format %{ "PXOR $dst,$dst\t! replicate2F" %}
12032 ins_encode %{
12033 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12034 %}
12035 ins_pipe( fpu_reg_reg );
12036 %}
12037
12038 // =======================================================================
12039 // fast clearing of an array
12040 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12041 match(Set dummy (ClearArray cnt base));
12042 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12043 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t"
12044 "XOR EAX,EAX\n\t"
12045 "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12046 opcode(0,0x4);
12047 ins_encode( Opcode(0xD1), RegOpc(ECX),
12048 OpcRegReg(0x33,EAX,EAX),
12049 Opcode(0xF3), Opcode(0xAB) );
12050 ins_pipe( pipe_slow );
12051 %}
12052
12053 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
12054 eAXRegI result, regD tmp1, eFlagsReg cr) %{
12055 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12056 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12057
12058 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12059 ins_encode %{
12060 __ string_compare($str1$$Register, $str2$$Register,
12061 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12062 $tmp1$$XMMRegister);
12063 %}
12064 ins_pipe( pipe_slow );
12065 %}
12066
12067 // fast string equals
12068 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
12069 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
12070 match(Set result (StrEquals (Binary str1 str2) cnt));
12071 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12072
12073 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12074 ins_encode %{
12075 __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
12076 $cnt$$Register, $result$$Register, $tmp3$$Register,
12077 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12078 %}
12079 ins_pipe( pipe_slow );
12080 %}
12081
12082 // fast search of substring with known size.
12083 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12084 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12085 predicate(UseSSE42Intrinsics);
12086 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12087 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12088
12089 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %}
12090 ins_encode %{
12091 int icnt2 = (int)$int_cnt2$$constant;
12092 if (icnt2 >= 8) {
12093 // IndexOf for constant substrings with size >= 8 elements
12094 // which don't need to be loaded through stack.
12095 __ string_indexofC8($str1$$Register, $str2$$Register,
12096 $cnt1$$Register, $cnt2$$Register,
12097 icnt2, $result$$Register,
12098 $vec$$XMMRegister, $tmp$$Register);
12099 } else {
12100 // Small strings are loaded through stack if they cross page boundary.
12101 __ string_indexof($str1$$Register, $str2$$Register,
12102 $cnt1$$Register, $cnt2$$Register,
12103 icnt2, $result$$Register,
12104 $vec$$XMMRegister, $tmp$$Register);
12105 }
12106 %}
12107 ins_pipe( pipe_slow );
12108 %}
12109
12110 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12111 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
12112 predicate(UseSSE42Intrinsics);
12113 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12114 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12115
12116 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
12117 ins_encode %{
12118 __ string_indexof($str1$$Register, $str2$$Register,
12119 $cnt1$$Register, $cnt2$$Register,
12120 (-1), $result$$Register,
12121 $vec$$XMMRegister, $tmp$$Register);
12122 %}
12123 ins_pipe( pipe_slow );
12124 %}
12125
12126 // fast array equals
12127 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12128 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12129 %{
12130 match(Set result (AryEq ary1 ary2));
12131 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12132 //ins_cost(300);
12133
12134 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12135 ins_encode %{
12136 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
12137 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12138 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12139 %}
12140 ins_pipe( pipe_slow );
12141 %}
12142
12143 //----------Control Flow Instructions------------------------------------------
12144 // Signed compare Instructions
12145 instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
12146 match(Set cr (CmpI op1 op2));
12147 effect( DEF cr, USE op1, USE op2 );
12148 format %{ "CMP $op1,$op2" %}
12934 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12935 ins_cost(250);
12936 format %{ "CMOV$cmp $dst,$src" %}
12937 opcode(0x0F,0x40);
12938 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12939 ins_pipe( pipe_cmov_mem );
12940 %}
12941
12942 // Compare 2 longs and CMOVE ints.
12943 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12944 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12945 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12946 ins_cost(200);
12947 format %{ "CMOV$cmp $dst,$src" %}
12948 opcode(0x0F,0x40);
12949 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12950 ins_pipe( pipe_cmov_reg );
12951 %}
12952
12953 // Compare 2 longs and CMOVE doubles
12954 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12955 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12956 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12957 ins_cost(200);
12958 expand %{
12959 fcmovDPR_regS(cmp,flags,dst,src);
12960 %}
12961 %}
12962
12963 // Compare 2 longs and CMOVE doubles
12964 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12965 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12966 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12967 ins_cost(200);
12968 expand %{
12969 fcmovD_regS(cmp,flags,dst,src);
12970 %}
12971 %}
12972
12973 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12974 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12975 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12976 ins_cost(200);
12977 expand %{
12978 fcmovFPR_regS(cmp,flags,dst,src);
12979 %}
12980 %}
12981
12982 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12983 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12984 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12985 ins_cost(200);
12986 expand %{
12987 fcmovF_regS(cmp,flags,dst,src);
12988 %}
12989 %}
12990
12991 //======
12992 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
12993 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
12994 match( Set flags (CmpL src zero ));
12995 effect(TEMP tmp);
12996 ins_cost(200);
12997 format %{ "MOV $tmp,$src.lo\n\t"
12998 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12999 ins_encode( long_cmp_flags0( src, tmp ) );
13000 ins_pipe( ialu_reg_reg_long );
13001 %}
13002
13003 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13004 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13005 match( Set flags (CmpL src1 src2 ));
13006 ins_cost(200+300);
13007 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13062 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13063 ins_cost(250);
13064 format %{ "CMOV$cmp $dst,$src" %}
13065 opcode(0x0F,0x40);
13066 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13067 ins_pipe( pipe_cmov_mem );
13068 %}
13069
13070 // Compare 2 longs and CMOVE ints.
13071 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13072 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13073 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13074 ins_cost(200);
13075 format %{ "CMOV$cmp $dst,$src" %}
13076 opcode(0x0F,0x40);
13077 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13078 ins_pipe( pipe_cmov_reg );
13079 %}
13080
13081 // Compare 2 longs and CMOVE doubles
13082 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13083 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13084 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13085 ins_cost(200);
13086 expand %{
13087 fcmovDPR_regS(cmp,flags,dst,src);
13088 %}
13089 %}
13090
13091 // Compare 2 longs and CMOVE doubles
13092 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13093 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13094 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13095 ins_cost(200);
13096 expand %{
13097 fcmovD_regS(cmp,flags,dst,src);
13098 %}
13099 %}
13100
13101 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13102 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13103 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13104 ins_cost(200);
13105 expand %{
13106 fcmovFPR_regS(cmp,flags,dst,src);
13107 %}
13108 %}
13109
13110 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13111 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13112 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13113 ins_cost(200);
13114 expand %{
13115 fcmovF_regS(cmp,flags,dst,src);
13116 %}
13117 %}
13118
13119 //======
13120 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13121 // Same as cmpL_reg_flags_LEGT except must negate src
13122 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
13123 match( Set flags (CmpL src zero ));
13124 effect( TEMP tmp );
13125 ins_cost(300);
13126 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13127 "CMP $tmp,$src.lo\n\t"
13128 "SBB $tmp,$src.hi\n\t" %}
13129 ins_encode( long_cmp_flags3(src, tmp) );
13130 ins_pipe( ialu_reg_reg_long );
13131 %}
13132
13133 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13134 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands
13135 // requires a commuted test to get the same result.
13195 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13196 ins_cost(250);
13197 format %{ "CMOV$cmp $dst,$src" %}
13198 opcode(0x0F,0x40);
13199 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13200 ins_pipe( pipe_cmov_mem );
13201 %}
13202
13203 // Compare 2 longs and CMOVE ptrs.
13204 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13205 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13206 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13207 ins_cost(200);
13208 format %{ "CMOV$cmp $dst,$src" %}
13209 opcode(0x0F,0x40);
13210 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13211 ins_pipe( pipe_cmov_reg );
13212 %}
13213
13214 // Compare 2 longs and CMOVE doubles
13215 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13216 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13217 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13218 ins_cost(200);
13219 expand %{
13220 fcmovDPR_regS(cmp,flags,dst,src);
13221 %}
13222 %}
13223
13224 // Compare 2 longs and CMOVE doubles
13225 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13226 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13227 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13228 ins_cost(200);
13229 expand %{
13230 fcmovD_regS(cmp,flags,dst,src);
13231 %}
13232 %}
13233
13234 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13235 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13236 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13237 ins_cost(200);
13238 expand %{
13239 fcmovFPR_regS(cmp,flags,dst,src);
13240 %}
13241 %}
13242
13243
13244 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13245 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13246 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13247 ins_cost(200);
13248 expand %{
13249 fcmovF_regS(cmp,flags,dst,src);
13250 %}
13251 %}
13252
13253
13254 // ============================================================================
13255 // Procedure Call/Return Instructions
13256 // Call Java Static Instruction
13257 // Note: If this code changes, the corresponding ret_addr_offset() and
13258 // compute_padding() functions will have to be adjusted.
13259 instruct CallStaticJavaDirect(method meth) %{
13260 match(CallStaticJava);
13261 predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
13262 effect(USE meth);
13263
13264 ins_cost(300);
13265 format %{ "CALL,static " %}
13266 opcode(0xE8); /* E8 cd */
13267 ins_encode( pre_call_FPU,
13268 Java_Static_Call( meth ),
13269 call_epilog,
|